feat(modules): add Gemini AI integration module

This commit is contained in:
2026-05-10 02:30:04 +07:00
parent ce99633e25
commit f525627d7c
4 changed files with 262 additions and 0 deletions
+141
View File
@@ -0,0 +1,141 @@
package ai
import (
"context"
"errors"
"fmt"
"strings"
"google.golang.org/genai"
)
// Default model identifiers. Pinned strings rather than constants exposed to
// callers — modules should not pick their own model. If we ever need to A/B
// test, a higher-level config wins, not a per-module override.
const (
embeddingModel = "text-embedding-004" // 768-dim, free tier
chatModel = "gemini-2.5-flash" // newest flash; 15 RPM / 1500 RPD free
)
// ErrRateLimited is returned when the upstream rejected with 429 (or our
// in-process per-user bucket dropped the call). Modules show a friendly
// "AI is rate-limited, try again in N minutes" message on this sentinel.
var ErrRateLimited = errors.New("ai: rate limited")
// ErrNotConfigured is returned when GEMINI_API_KEY was empty at startup.
// The Client is nil in that case; modules using AI must check before
// invoking and refuse the command with a config-error reply.
var ErrNotConfigured = errors.New("ai: GEMINI_API_KEY not set")
// Client wraps a *genai.Client with the small surface the bot needs. The
// underlying gRPC connection is reused across requests — Cloud Run cold-start
// budget makes a per-request handshake intolerable.
//
// Safe for concurrent use; *genai.Client is itself goroutine-safe.
type Client struct {
g *genai.Client
}
// NewClient constructs a *Client backed by the Gemini API (not Vertex AI —
// Vertex requires a service-account flow incompatible with the free-tier
// Cloud Run baseline). A blank apiKey returns ErrNotConfigured so callers
// can decide whether to skip AI-dependent module loading.
func NewClient(ctx context.Context, apiKey string) (*Client, error) {
if strings.TrimSpace(apiKey) == "" {
return nil, ErrNotConfigured
}
g, err := genai.NewClient(ctx, &genai.ClientConfig{
APIKey: apiKey,
Backend: genai.BackendGeminiAPI,
})
if err != nil {
return nil, fmt.Errorf("ai: genai.NewClient: %w", err)
}
return &Client{g: g}, nil
}
// Embed batches `texts` into a single EmbedContent call and returns
// dense vectors in the same order. Empty input → (nil, nil).
//
// Errors are wrapped; rate-limit (HTTP 429) is mapped to ErrRateLimited
// so callers can branch on errors.Is.
func (c *Client) Embed(ctx context.Context, texts []string) ([][]float32, error) {
if c == nil || c.g == nil {
return nil, ErrNotConfigured
}
if len(texts) == 0 {
return nil, nil
}
contents := make([]*genai.Content, 0, len(texts))
for _, t := range texts {
contents = append(contents, genai.NewContentFromText(t, genai.RoleUser))
}
resp, err := c.g.Models.EmbedContent(ctx, embeddingModel, contents, nil)
if err != nil {
if isRateLimit(err) {
return nil, ErrRateLimited
}
return nil, fmt.Errorf("ai: EmbedContent: %w", err)
}
if resp == nil {
return nil, fmt.Errorf("ai: EmbedContent: nil response")
}
if len(resp.Embeddings) != len(texts) {
return nil, fmt.Errorf("ai: EmbedContent returned %d embeddings, want %d",
len(resp.Embeddings), len(texts))
}
out := make([][]float32, len(texts))
for i, e := range resp.Embeddings {
if e == nil || len(e.Values) == 0 {
return nil, fmt.Errorf("ai: EmbedContent: empty embedding at index %d", i)
}
out[i] = e.Values
}
return out, nil
}
// Generate runs a single-turn chat with `system` as the system instruction
// and `user` as the user message. Returns the model's text reply.
//
// The output cap matches what the JS twentyq prompt expects (≤200 tokens,
// single-line JSON). Temperature 0.7 mirrors the JS code path.
func (c *Client) Generate(ctx context.Context, system, user string) (string, error) {
if c == nil || c.g == nil {
return "", ErrNotConfigured
}
cfg := &genai.GenerateContentConfig{
Temperature: ptrFloat32(0.7),
MaxOutputTokens: 256,
}
if system != "" {
cfg.SystemInstruction = genai.NewContentFromText(system, genai.RoleUser)
}
resp, err := c.g.Models.GenerateContent(ctx, chatModel, genai.Text(user), cfg)
if err != nil {
if isRateLimit(err) {
return "", ErrRateLimited
}
return "", fmt.Errorf("ai: GenerateContent: %w", err)
}
if resp == nil {
return "", fmt.Errorf("ai: GenerateContent: nil response")
}
return resp.Text(), nil
}
func ptrFloat32(v float32) *float32 { return &v }
// isRateLimit returns true if err looks like a Gemini 429. The genai SDK
// surfaces 429 as a typed error with Code=429 in some paths and as a wrapped
// HTTP status string in others — we sniff both.
func isRateLimit(err error) bool {
if err == nil {
return false
}
var apiErr genai.APIError
if errors.As(err, &apiErr) && apiErr.Code == 429 {
return true
}
msg := err.Error()
return strings.Contains(msg, "429") || strings.Contains(strings.ToLower(msg), "resource_exhausted")
}
+54
View File
@@ -0,0 +1,54 @@
package ai
import (
"sync"
"golang.org/x/time/rate"
)
// PerUserLimiter is a tiny wrapper around x/time/rate.Limiter that gives each
// subject (user-id or chat-id, formatted as a string) its own token bucket.
//
// Why per-user: the Gemini free tier is 15 RPM / 1500 RPD shared across the
// entire bot. A single chatty user could exhaust the daily quota in minutes;
// a soft per-user cap (default 5/min) cushions everyone else.
//
// Why we don't enforce daily caps here: x/time/rate is a token bucket, not
// a fixed-window counter. Per-day caps need a different abstraction; if we
// hit RPD limits in practice we'll add a Firestore-backed counter. Phase 11
// soak data will tell us if it's needed.
type PerUserLimiter struct {
mu sync.Mutex
buckets map[string]*rate.Limiter
r rate.Limit // refill rate per second
burst int // bucket capacity
}
// NewPerUserLimiter returns a limiter that allows `burst` requests
// instantaneously and refills at `perSec` requests per second.
//
// Defaults for the bot: burst=5, perSec=5/60 → 5 reqs in any 60s window.
func NewPerUserLimiter(perSec float64, burst int) *PerUserLimiter {
if burst < 1 {
burst = 1
}
return &PerUserLimiter{
buckets: map[string]*rate.Limiter{},
r: rate.Limit(perSec),
burst: burst,
}
}
// Allow consumes one token from the subject's bucket. Returns false if the
// bucket is empty — caller should reply with a "slow down" message and NOT
// invoke the upstream model.
func (p *PerUserLimiter) Allow(subject string) bool {
p.mu.Lock()
b, ok := p.buckets[subject]
if !ok {
b = rate.NewLimiter(p.r, p.burst)
p.buckets[subject] = b
}
p.mu.Unlock()
return b.Allow()
}
+38
View File
@@ -0,0 +1,38 @@
package ai
import "testing"
func TestPerUserLimiter_BurstThenDrop(t *testing.T) {
// 5 burst, 0 refill — second batch must drop.
l := NewPerUserLimiter(0, 5)
for i := 0; i < 5; i++ {
if !l.Allow("user-1") {
t.Fatalf("burst[%d]: want allow, got drop", i)
}
}
if l.Allow("user-1") {
t.Errorf("post-burst: want drop, got allow")
}
}
func TestPerUserLimiter_PerSubjectIsolated(t *testing.T) {
l := NewPerUserLimiter(0, 1)
if !l.Allow("a") {
t.Fatalf("user a first call dropped")
}
if l.Allow("a") {
t.Errorf("user a second call: want drop")
}
// Different subject must have its own bucket.
if !l.Allow("b") {
t.Errorf("user b first call dropped — buckets not isolated")
}
}
func TestPerUserLimiter_BurstFloor(t *testing.T) {
// burst=0 → floored to 1 so the limiter never permanently blocks.
l := NewPerUserLimiter(0, 0)
if !l.Allow("x") {
t.Errorf("burst=0 floored: want first call allowed")
}
}
+29
View File
@@ -0,0 +1,29 @@
// Package ai wraps Google's genai SDK with a small, mockable surface for the
// semantle/twentyq modules. The package owns:
//
// - Embedder / Chatter interfaces — what modules consume
// - Client struct — production implementation backed by genai
// - per-user rate limiter — defends the shared 1500-RPD Gemini free tier
//
// Modules accept the interfaces (not *Client) so unit tests can pass fakes
// without spinning up a real Gemini client. Production wiring in cmd/server
// passes the *Client (which satisfies both interfaces) into Deps.
package ai
import "context"
// Embedder produces dense vectors for text. Used by the semantle module to
// score guess similarity. Implementations must respect ctx cancellation.
//
// On rate-limit (HTTP 429) the implementation should return a sentinel error
// — see ErrRateLimited — so callers can show a user-friendly retry message.
type Embedder interface {
Embed(ctx context.Context, texts []string) ([][]float32, error)
}
// Chatter produces a single text completion from a system + user prompt
// pair. Used by the twentyq module's judge + round-start calls. Same
// rate-limit conventions as Embedder.
type Chatter interface {
Generate(ctx context.Context, system, user string) (string, error)
}