mirror of
https://github.com/tiennm99/goclaw.git
synced 2026-06-10 14:10:52 +00:00
9468aae422
Remove ModelThinkingCapable interface and ChatRequest.ModelSupportsThinking hint field — DashScope handles per-model checks internally via its own whitelist. Fix double applyThinkingGuard on ChatStream→Chat tool fallback by calling OpenAIProvider.Chat directly.
144 lines
5.7 KiB
Go
144 lines
5.7 KiB
Go
package providers
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
)
|
|
|
|
// Options keys used in ChatRequest.Options across providers.
|
|
const (
|
|
OptMaxTokens = "max_tokens"
|
|
OptTemperature = "temperature"
|
|
OptThinkingLevel = "thinking_level"
|
|
OptReasoningEffort = "reasoning_effort"
|
|
OptEnableThinking = "enable_thinking"
|
|
OptThinkingBudget = "thinking_budget"
|
|
)
|
|
|
|
// TokenSource provides an OAuth access token (with auto-refresh).
|
|
type TokenSource interface {
|
|
Token() (string, error)
|
|
}
|
|
|
|
// Provider is the interface all LLM providers must implement.
|
|
type Provider interface {
|
|
// Chat sends messages to the LLM and returns a response.
|
|
// tools defines available tool schemas; model overrides the default.
|
|
Chat(ctx context.Context, req ChatRequest) (*ChatResponse, error)
|
|
|
|
// ChatStream sends messages and streams response chunks via callback.
|
|
// Returns the final complete response after streaming ends.
|
|
ChatStream(ctx context.Context, req ChatRequest, onChunk func(StreamChunk)) (*ChatResponse, error)
|
|
|
|
// DefaultModel returns the provider's default model name.
|
|
DefaultModel() string
|
|
|
|
// Name returns the provider identifier (e.g. "anthropic", "openai").
|
|
Name() string
|
|
}
|
|
|
|
// ThinkingCapable is optionally implemented by providers that support extended thinking.
|
|
// Used to gate thinking_level injection so it's not sent to providers that ignore it.
|
|
type ThinkingCapable interface {
|
|
SupportsThinking() bool
|
|
}
|
|
|
|
// ChatRequest contains the input for a Chat/ChatStream call.
|
|
type ChatRequest struct {
|
|
Messages []Message `json:"messages"`
|
|
Tools []ToolDefinition `json:"tools,omitempty"`
|
|
Model string `json:"model,omitempty"`
|
|
Options map[string]any `json:"options,omitempty"`
|
|
}
|
|
|
|
// ChatResponse is the result from an LLM call.
|
|
type ChatResponse struct {
|
|
Content string `json:"content"`
|
|
Thinking string `json:"thinking,omitempty"`
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
FinishReason string `json:"finish_reason"` // "stop", "tool_calls", "length"
|
|
Usage *Usage `json:"usage,omitempty"`
|
|
|
|
// Phase is Codex-specific (gpt-5.3-codex): "commentary" or "final_answer".
|
|
// Agent loop must persist this on assistant messages for Codex performance.
|
|
Phase string `json:"phase,omitempty"`
|
|
|
|
// RawAssistantContent preserves the raw content blocks array from the provider response.
|
|
// Used by Anthropic to pass thinking blocks back in tool use loops (required by API).
|
|
RawAssistantContent json.RawMessage `json:"-"`
|
|
}
|
|
|
|
// StreamChunk is a piece of a streaming response.
|
|
type StreamChunk struct {
|
|
Content string `json:"content,omitempty"`
|
|
Thinking string `json:"thinking,omitempty"`
|
|
Done bool `json:"done,omitempty"`
|
|
}
|
|
|
|
// ImageContent represents a base64-encoded image for vision-capable models.
|
|
type ImageContent struct {
|
|
MimeType string `json:"mime_type"` // e.g. "image/jpeg"
|
|
Data string `json:"data"` // base64-encoded image bytes
|
|
}
|
|
|
|
// MediaRef is a lightweight reference to a persistently stored media file.
|
|
// Stored in session JSONB (~60 bytes each) instead of megabytes for base64.
|
|
// On reload, MediaRefs are resolved to file paths and loaded into Images (for images).
|
|
type MediaRef struct {
|
|
ID string `json:"id"` // unique media ID (uuid)
|
|
MimeType string `json:"mime_type"` // e.g. "image/jpeg", "application/pdf"
|
|
Kind string `json:"kind"` // "image", "video", "audio", "document"
|
|
}
|
|
|
|
// Message represents a conversation message.
|
|
type Message struct {
|
|
Role string `json:"role"` // "system", "user", "assistant", "tool"
|
|
Content string `json:"content"`
|
|
Thinking string `json:"thinking,omitempty"` // reasoning_content for thinking models (Kimi, DeepSeek, etc.)
|
|
Images []ImageContent `json:"images,omitempty"` // vision: base64 images (runtime only, not persisted)
|
|
MediaRefs []MediaRef `json:"media_refs,omitempty"` // persistent media file references
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
ToolCallID string `json:"tool_call_id,omitempty"` // for role="tool" responses
|
|
|
|
// Phase is a Codex-specific field (gpt-5.3-codex) indicating message purpose.
|
|
// Values: "commentary" (intermediate), "final_answer" (closeout), or "" (unset).
|
|
// Must be persisted and passed back in subsequent requests for Codex performance.
|
|
// Other providers ignore this field.
|
|
Phase string `json:"phase,omitempty"`
|
|
|
|
// RawAssistantContent carries raw provider content blocks through tool loop iterations.
|
|
// Anthropic requires thinking blocks to be passed back exactly as received.
|
|
RawAssistantContent json.RawMessage `json:"-"`
|
|
}
|
|
|
|
// ToolCall represents a tool invocation requested by the LLM.
|
|
type ToolCall struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Arguments map[string]any `json:"arguments"`
|
|
Metadata map[string]string `json:"metadata,omitempty"` // provider-specific (e.g. Gemini thought_signature)
|
|
}
|
|
|
|
// ToolDefinition describes a tool available to the LLM.
|
|
type ToolDefinition struct {
|
|
Type string `json:"type"` // "function"
|
|
Function ToolFunctionSchema `json:"function"`
|
|
}
|
|
|
|
// ToolFunctionSchema is the schema for a function tool.
|
|
type ToolFunctionSchema struct {
|
|
Name string `json:"name"`
|
|
Description string `json:"description"`
|
|
Parameters map[string]any `json:"parameters"`
|
|
}
|
|
|
|
// Usage tracks token consumption.
|
|
type Usage struct {
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
CompletionTokens int `json:"completion_tokens"`
|
|
TotalTokens int `json:"total_tokens"`
|
|
CacheCreationTokens int `json:"cache_creation_input_tokens,omitempty"`
|
|
CacheReadTokens int `json:"cache_read_input_tokens,omitempty"`
|
|
ThinkingTokens int `json:"thinking_tokens,omitempty"`
|
|
}
|