Files
goclaw/internal/tts/openai.go
T
viettranx bdb60de7ae chore: upgrade Go 1.25 → 1.26 and apply go fix modernizations
- Update go.mod and Dockerfile to Go 1.26
- Apply `go fix ./...` stdlib modernizations across 170+ files
- Add `go fix` to post-implementation checklist in CLAUDE.md
- Fix go fix misapplied rewrite in loop_history.go
2026-03-10 00:09:15 +07:00

127 lines
2.8 KiB
Go

package tts
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// OpenAIProvider implements TTS via the OpenAI audio/speech API.
// Matching TS openaiTTS() in src/tts/tts-core.ts.
type OpenAIProvider struct {
apiKey string
apiBase string
model string // default "gpt-4o-mini-tts"
voice string // default "alloy"
timeoutMs int // default 30000
}
// OpenAIConfig configures the OpenAI TTS provider.
type OpenAIConfig struct {
APIKey string
APIBase string
Model string
Voice string
TimeoutMs int
}
// NewOpenAIProvider creates an OpenAI TTS provider.
func NewOpenAIProvider(cfg OpenAIConfig) *OpenAIProvider {
p := &OpenAIProvider{
apiKey: cfg.APIKey,
apiBase: cfg.APIBase,
model: cfg.Model,
voice: cfg.Voice,
timeoutMs: cfg.TimeoutMs,
}
if p.apiBase == "" {
p.apiBase = "https://api.openai.com/v1"
}
if p.model == "" {
p.model = "gpt-4o-mini-tts"
}
if p.voice == "" {
p.voice = "alloy"
}
if p.timeoutMs <= 0 {
p.timeoutMs = 30000
}
return p
}
func (p *OpenAIProvider) Name() string { return "openai" }
// Synthesize calls the OpenAI audio/speech endpoint.
// Matching TS: POST {apiBase}/audio/speech with {model, input, voice, response_format}.
func (p *OpenAIProvider) Synthesize(ctx context.Context, text string, opts Options) (*SynthResult, error) {
voice := opts.Voice
if voice == "" {
voice = p.voice
}
model := opts.Model
if model == "" {
model = p.model
}
format := opts.Format
if format == "" {
format = "mp3"
}
body := map[string]any{
"model": model,
"input": text,
"voice": voice,
"response_format": format,
}
bodyJSON, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("marshal openai tts request: %w", err)
}
url := p.apiBase + "/audio/speech"
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyJSON))
if err != nil {
return nil, fmt.Errorf("create openai tts request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+p.apiKey)
client := &http.Client{Timeout: time.Duration(p.timeoutMs) * time.Millisecond}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("openai tts request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
errBody, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("openai tts error %d: %s", resp.StatusCode, string(errBody))
}
audio, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read openai tts response: %w", err)
}
ext := format
mime := "audio/mpeg"
switch format {
case "opus":
ext = "ogg"
mime = "audio/ogg"
case "mp3":
mime = "audio/mpeg"
}
return &SynthResult{
Audio: audio,
Extension: ext,
MimeType: mime,
}, nil
}