Files
goclaw/internal/providers/types.go
T
viettranx 9468aae422 refactor(providers): simplify DashScope per-model thinking guard
Remove ModelThinkingCapable interface and ChatRequest.ModelSupportsThinking
hint field — DashScope handles per-model checks internally via its own
whitelist. Fix double applyThinkingGuard on ChatStream→Chat tool fallback
by calling OpenAIProvider.Chat directly.
2026-03-16 07:55:25 +07:00

144 lines
5.7 KiB
Go

package providers
import (
"context"
"encoding/json"
)
// Options keys used in ChatRequest.Options across providers.
const (
OptMaxTokens = "max_tokens"
OptTemperature = "temperature"
OptThinkingLevel = "thinking_level"
OptReasoningEffort = "reasoning_effort"
OptEnableThinking = "enable_thinking"
OptThinkingBudget = "thinking_budget"
)
// TokenSource provides an OAuth access token (with auto-refresh).
type TokenSource interface {
Token() (string, error)
}
// Provider is the interface all LLM providers must implement.
type Provider interface {
// Chat sends messages to the LLM and returns a response.
// tools defines available tool schemas; model overrides the default.
Chat(ctx context.Context, req ChatRequest) (*ChatResponse, error)
// ChatStream sends messages and streams response chunks via callback.
// Returns the final complete response after streaming ends.
ChatStream(ctx context.Context, req ChatRequest, onChunk func(StreamChunk)) (*ChatResponse, error)
// DefaultModel returns the provider's default model name.
DefaultModel() string
// Name returns the provider identifier (e.g. "anthropic", "openai").
Name() string
}
// ThinkingCapable is optionally implemented by providers that support extended thinking.
// Used to gate thinking_level injection so it's not sent to providers that ignore it.
type ThinkingCapable interface {
SupportsThinking() bool
}
// ChatRequest contains the input for a Chat/ChatStream call.
type ChatRequest struct {
Messages []Message `json:"messages"`
Tools []ToolDefinition `json:"tools,omitempty"`
Model string `json:"model,omitempty"`
Options map[string]any `json:"options,omitempty"`
}
// ChatResponse is the result from an LLM call.
type ChatResponse struct {
Content string `json:"content"`
Thinking string `json:"thinking,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
FinishReason string `json:"finish_reason"` // "stop", "tool_calls", "length"
Usage *Usage `json:"usage,omitempty"`
// Phase is Codex-specific (gpt-5.3-codex): "commentary" or "final_answer".
// Agent loop must persist this on assistant messages for Codex performance.
Phase string `json:"phase,omitempty"`
// RawAssistantContent preserves the raw content blocks array from the provider response.
// Used by Anthropic to pass thinking blocks back in tool use loops (required by API).
RawAssistantContent json.RawMessage `json:"-"`
}
// StreamChunk is a piece of a streaming response.
type StreamChunk struct {
Content string `json:"content,omitempty"`
Thinking string `json:"thinking,omitempty"`
Done bool `json:"done,omitempty"`
}
// ImageContent represents a base64-encoded image for vision-capable models.
type ImageContent struct {
MimeType string `json:"mime_type"` // e.g. "image/jpeg"
Data string `json:"data"` // base64-encoded image bytes
}
// MediaRef is a lightweight reference to a persistently stored media file.
// Stored in session JSONB (~60 bytes each) instead of megabytes for base64.
// On reload, MediaRefs are resolved to file paths and loaded into Images (for images).
type MediaRef struct {
ID string `json:"id"` // unique media ID (uuid)
MimeType string `json:"mime_type"` // e.g. "image/jpeg", "application/pdf"
Kind string `json:"kind"` // "image", "video", "audio", "document"
}
// Message represents a conversation message.
type Message struct {
Role string `json:"role"` // "system", "user", "assistant", "tool"
Content string `json:"content"`
Thinking string `json:"thinking,omitempty"` // reasoning_content for thinking models (Kimi, DeepSeek, etc.)
Images []ImageContent `json:"images,omitempty"` // vision: base64 images (runtime only, not persisted)
MediaRefs []MediaRef `json:"media_refs,omitempty"` // persistent media file references
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"` // for role="tool" responses
// Phase is a Codex-specific field (gpt-5.3-codex) indicating message purpose.
// Values: "commentary" (intermediate), "final_answer" (closeout), or "" (unset).
// Must be persisted and passed back in subsequent requests for Codex performance.
// Other providers ignore this field.
Phase string `json:"phase,omitempty"`
// RawAssistantContent carries raw provider content blocks through tool loop iterations.
// Anthropic requires thinking blocks to be passed back exactly as received.
RawAssistantContent json.RawMessage `json:"-"`
}
// ToolCall represents a tool invocation requested by the LLM.
type ToolCall struct {
ID string `json:"id"`
Name string `json:"name"`
Arguments map[string]any `json:"arguments"`
Metadata map[string]string `json:"metadata,omitempty"` // provider-specific (e.g. Gemini thought_signature)
}
// ToolDefinition describes a tool available to the LLM.
type ToolDefinition struct {
Type string `json:"type"` // "function"
Function ToolFunctionSchema `json:"function"`
}
// ToolFunctionSchema is the schema for a function tool.
type ToolFunctionSchema struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters map[string]any `json:"parameters"`
}
// Usage tracks token consumption.
type Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
CacheCreationTokens int `json:"cache_creation_input_tokens,omitempty"`
CacheReadTokens int `json:"cache_read_input_tokens,omitempty"`
ThinkingTokens int `json:"thinking_tokens,omitempty"`
}