mirror of
https://github.com/tiennm99/goclaw.git
synced 2026-06-10 04:10:26 +00:00
bdb60de7ae
- Update go.mod and Dockerfile to Go 1.26 - Apply `go fix ./...` stdlib modernizations across 170+ files - Add `go fix` to post-implementation checklist in CLAUDE.md - Fix go fix misapplied rewrite in loop_history.go
120 lines
3.1 KiB
Go
120 lines
3.1 KiB
Go
package tools
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/nextlevelbuilder/goclaw/internal/providers"
|
|
)
|
|
|
|
// geminiNativeDocumentCall sends a document to Gemini's native generateContent API.
|
|
// Uses inlineData with the document's MIME type (e.g. application/pdf) which Gemini
|
|
// supports natively, unlike the OpenAI-compat endpoint that only handles images.
|
|
func geminiNativeDocumentCall(ctx context.Context, apiKey, model, prompt string, docData []byte, docMime string) (*providers.ChatResponse, error) {
|
|
b64 := base64.StdEncoding.EncodeToString(docData)
|
|
|
|
// Gemini generateContent request body.
|
|
body := map[string]any{
|
|
"contents": []map[string]any{
|
|
{
|
|
"parts": []map[string]any{
|
|
{
|
|
"inline_data": map[string]any{
|
|
"mime_type": docMime,
|
|
"data": b64,
|
|
},
|
|
},
|
|
{
|
|
"text": prompt,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"generationConfig": map[string]any{
|
|
"maxOutputTokens": 16384,
|
|
"temperature": 0.2,
|
|
},
|
|
}
|
|
|
|
bodyJSON, err := json.Marshal(body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
url := fmt.Sprintf("https://generativelanguage.googleapis.com/v1beta/models/%s:generateContent?key=%s", model, apiKey)
|
|
|
|
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(bodyJSON))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create request: %w", err)
|
|
}
|
|
httpReq.Header.Set("Content-Type", "application/json")
|
|
|
|
client := &http.Client{Timeout: 120 * time.Second}
|
|
httpResp, err := client.Do(httpReq)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("HTTP request: %w", err)
|
|
}
|
|
defer httpResp.Body.Close()
|
|
|
|
respBody, err := io.ReadAll(httpResp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
if httpResp.StatusCode != 200 {
|
|
return nil, fmt.Errorf("HTTP %d: %s", httpResp.StatusCode, truncateStr(string(respBody), 500))
|
|
}
|
|
|
|
// Parse Gemini native response format.
|
|
var geminiResp struct {
|
|
Candidates []struct {
|
|
Content struct {
|
|
Parts []struct {
|
|
Text string `json:"text"`
|
|
} `json:"parts"`
|
|
} `json:"content"`
|
|
} `json:"candidates"`
|
|
UsageMetadata struct {
|
|
PromptTokenCount int `json:"promptTokenCount"`
|
|
CandidatesTokenCount int `json:"candidatesTokenCount"`
|
|
TotalTokenCount int `json:"totalTokenCount"`
|
|
} `json:"usageMetadata"`
|
|
}
|
|
|
|
if err := json.Unmarshal(respBody, &geminiResp); err != nil {
|
|
return nil, fmt.Errorf("parse response: %w", err)
|
|
}
|
|
|
|
// Extract text from response.
|
|
var content string
|
|
if len(geminiResp.Candidates) > 0 {
|
|
for _, part := range geminiResp.Candidates[0].Content.Parts {
|
|
if part.Text != "" {
|
|
if content != "" {
|
|
content += "\n"
|
|
}
|
|
content += part.Text
|
|
}
|
|
}
|
|
}
|
|
if content == "" {
|
|
return nil, fmt.Errorf("empty response from Gemini")
|
|
}
|
|
|
|
return &providers.ChatResponse{
|
|
Content: content,
|
|
FinishReason: "stop",
|
|
Usage: &providers.Usage{
|
|
PromptTokens: geminiResp.UsageMetadata.PromptTokenCount,
|
|
CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
|
|
TotalTokens: geminiResp.UsageMetadata.TotalTokenCount,
|
|
},
|
|
}, nil
|
|
}
|