mirror of
https://github.com/tiennm99/miti99bot.git
synced 2026-06-08 08:16:03 +00:00
feat(modules): add Gemini AI integration module
This commit is contained in:
@@ -0,0 +1,141 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"google.golang.org/genai"
|
||||
)
|
||||
|
||||
// Default model identifiers. Pinned strings rather than constants exposed to
|
||||
// callers — modules should not pick their own model. If we ever need to A/B
|
||||
// test, a higher-level config wins, not a per-module override.
|
||||
const (
|
||||
embeddingModel = "text-embedding-004" // 768-dim, free tier
|
||||
chatModel = "gemini-2.5-flash" // newest flash; 15 RPM / 1500 RPD free
|
||||
)
|
||||
|
||||
// ErrRateLimited is returned when the upstream rejected with 429 (or our
|
||||
// in-process per-user bucket dropped the call). Modules show a friendly
|
||||
// "AI is rate-limited, try again in N minutes" message on this sentinel.
|
||||
var ErrRateLimited = errors.New("ai: rate limited")
|
||||
|
||||
// ErrNotConfigured is returned when GEMINI_API_KEY was empty at startup.
|
||||
// The Client is nil in that case; modules using AI must check before
|
||||
// invoking and refuse the command with a config-error reply.
|
||||
var ErrNotConfigured = errors.New("ai: GEMINI_API_KEY not set")
|
||||
|
||||
// Client wraps a *genai.Client with the small surface the bot needs. The
|
||||
// underlying gRPC connection is reused across requests — Cloud Run cold-start
|
||||
// budget makes a per-request handshake intolerable.
|
||||
//
|
||||
// Safe for concurrent use; *genai.Client is itself goroutine-safe.
|
||||
type Client struct {
|
||||
g *genai.Client
|
||||
}
|
||||
|
||||
// NewClient constructs a *Client backed by the Gemini API (not Vertex AI —
|
||||
// Vertex requires a service-account flow incompatible with the free-tier
|
||||
// Cloud Run baseline). A blank apiKey returns ErrNotConfigured so callers
|
||||
// can decide whether to skip AI-dependent module loading.
|
||||
func NewClient(ctx context.Context, apiKey string) (*Client, error) {
|
||||
if strings.TrimSpace(apiKey) == "" {
|
||||
return nil, ErrNotConfigured
|
||||
}
|
||||
g, err := genai.NewClient(ctx, &genai.ClientConfig{
|
||||
APIKey: apiKey,
|
||||
Backend: genai.BackendGeminiAPI,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ai: genai.NewClient: %w", err)
|
||||
}
|
||||
return &Client{g: g}, nil
|
||||
}
|
||||
|
||||
// Embed batches `texts` into a single EmbedContent call and returns
|
||||
// dense vectors in the same order. Empty input → (nil, nil).
|
||||
//
|
||||
// Errors are wrapped; rate-limit (HTTP 429) is mapped to ErrRateLimited
|
||||
// so callers can branch on errors.Is.
|
||||
func (c *Client) Embed(ctx context.Context, texts []string) ([][]float32, error) {
|
||||
if c == nil || c.g == nil {
|
||||
return nil, ErrNotConfigured
|
||||
}
|
||||
if len(texts) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
contents := make([]*genai.Content, 0, len(texts))
|
||||
for _, t := range texts {
|
||||
contents = append(contents, genai.NewContentFromText(t, genai.RoleUser))
|
||||
}
|
||||
resp, err := c.g.Models.EmbedContent(ctx, embeddingModel, contents, nil)
|
||||
if err != nil {
|
||||
if isRateLimit(err) {
|
||||
return nil, ErrRateLimited
|
||||
}
|
||||
return nil, fmt.Errorf("ai: EmbedContent: %w", err)
|
||||
}
|
||||
if resp == nil {
|
||||
return nil, fmt.Errorf("ai: EmbedContent: nil response")
|
||||
}
|
||||
if len(resp.Embeddings) != len(texts) {
|
||||
return nil, fmt.Errorf("ai: EmbedContent returned %d embeddings, want %d",
|
||||
len(resp.Embeddings), len(texts))
|
||||
}
|
||||
out := make([][]float32, len(texts))
|
||||
for i, e := range resp.Embeddings {
|
||||
if e == nil || len(e.Values) == 0 {
|
||||
return nil, fmt.Errorf("ai: EmbedContent: empty embedding at index %d", i)
|
||||
}
|
||||
out[i] = e.Values
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Generate runs a single-turn chat with `system` as the system instruction
|
||||
// and `user` as the user message. Returns the model's text reply.
|
||||
//
|
||||
// The output cap matches what the JS twentyq prompt expects (≤200 tokens,
|
||||
// single-line JSON). Temperature 0.7 mirrors the JS code path.
|
||||
func (c *Client) Generate(ctx context.Context, system, user string) (string, error) {
|
||||
if c == nil || c.g == nil {
|
||||
return "", ErrNotConfigured
|
||||
}
|
||||
cfg := &genai.GenerateContentConfig{
|
||||
Temperature: ptrFloat32(0.7),
|
||||
MaxOutputTokens: 256,
|
||||
}
|
||||
if system != "" {
|
||||
cfg.SystemInstruction = genai.NewContentFromText(system, genai.RoleUser)
|
||||
}
|
||||
resp, err := c.g.Models.GenerateContent(ctx, chatModel, genai.Text(user), cfg)
|
||||
if err != nil {
|
||||
if isRateLimit(err) {
|
||||
return "", ErrRateLimited
|
||||
}
|
||||
return "", fmt.Errorf("ai: GenerateContent: %w", err)
|
||||
}
|
||||
if resp == nil {
|
||||
return "", fmt.Errorf("ai: GenerateContent: nil response")
|
||||
}
|
||||
return resp.Text(), nil
|
||||
}
|
||||
|
||||
func ptrFloat32(v float32) *float32 { return &v }
|
||||
|
||||
// isRateLimit returns true if err looks like a Gemini 429. The genai SDK
|
||||
// surfaces 429 as a typed error with Code=429 in some paths and as a wrapped
|
||||
// HTTP status string in others — we sniff both.
|
||||
func isRateLimit(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
var apiErr genai.APIError
|
||||
if errors.As(err, &apiErr) && apiErr.Code == 429 {
|
||||
return true
|
||||
}
|
||||
msg := err.Error()
|
||||
return strings.Contains(msg, "429") || strings.Contains(strings.ToLower(msg), "resource_exhausted")
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
// PerUserLimiter is a tiny wrapper around x/time/rate.Limiter that gives each
|
||||
// subject (user-id or chat-id, formatted as a string) its own token bucket.
|
||||
//
|
||||
// Why per-user: the Gemini free tier is 15 RPM / 1500 RPD shared across the
|
||||
// entire bot. A single chatty user could exhaust the daily quota in minutes;
|
||||
// a soft per-user cap (default 5/min) cushions everyone else.
|
||||
//
|
||||
// Why we don't enforce daily caps here: x/time/rate is a token bucket, not
|
||||
// a fixed-window counter. Per-day caps need a different abstraction; if we
|
||||
// hit RPD limits in practice we'll add a Firestore-backed counter. Phase 11
|
||||
// soak data will tell us if it's needed.
|
||||
type PerUserLimiter struct {
|
||||
mu sync.Mutex
|
||||
buckets map[string]*rate.Limiter
|
||||
r rate.Limit // refill rate per second
|
||||
burst int // bucket capacity
|
||||
}
|
||||
|
||||
// NewPerUserLimiter returns a limiter that allows `burst` requests
|
||||
// instantaneously and refills at `perSec` requests per second.
|
||||
//
|
||||
// Defaults for the bot: burst=5, perSec=5/60 → 5 reqs in any 60s window.
|
||||
func NewPerUserLimiter(perSec float64, burst int) *PerUserLimiter {
|
||||
if burst < 1 {
|
||||
burst = 1
|
||||
}
|
||||
return &PerUserLimiter{
|
||||
buckets: map[string]*rate.Limiter{},
|
||||
r: rate.Limit(perSec),
|
||||
burst: burst,
|
||||
}
|
||||
}
|
||||
|
||||
// Allow consumes one token from the subject's bucket. Returns false if the
|
||||
// bucket is empty — caller should reply with a "slow down" message and NOT
|
||||
// invoke the upstream model.
|
||||
func (p *PerUserLimiter) Allow(subject string) bool {
|
||||
p.mu.Lock()
|
||||
b, ok := p.buckets[subject]
|
||||
if !ok {
|
||||
b = rate.NewLimiter(p.r, p.burst)
|
||||
p.buckets[subject] = b
|
||||
}
|
||||
p.mu.Unlock()
|
||||
return b.Allow()
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
package ai
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestPerUserLimiter_BurstThenDrop(t *testing.T) {
|
||||
// 5 burst, 0 refill — second batch must drop.
|
||||
l := NewPerUserLimiter(0, 5)
|
||||
for i := 0; i < 5; i++ {
|
||||
if !l.Allow("user-1") {
|
||||
t.Fatalf("burst[%d]: want allow, got drop", i)
|
||||
}
|
||||
}
|
||||
if l.Allow("user-1") {
|
||||
t.Errorf("post-burst: want drop, got allow")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPerUserLimiter_PerSubjectIsolated(t *testing.T) {
|
||||
l := NewPerUserLimiter(0, 1)
|
||||
if !l.Allow("a") {
|
||||
t.Fatalf("user a first call dropped")
|
||||
}
|
||||
if l.Allow("a") {
|
||||
t.Errorf("user a second call: want drop")
|
||||
}
|
||||
// Different subject must have its own bucket.
|
||||
if !l.Allow("b") {
|
||||
t.Errorf("user b first call dropped — buckets not isolated")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPerUserLimiter_BurstFloor(t *testing.T) {
|
||||
// burst=0 → floored to 1 so the limiter never permanently blocks.
|
||||
l := NewPerUserLimiter(0, 0)
|
||||
if !l.Allow("x") {
|
||||
t.Errorf("burst=0 floored: want first call allowed")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
// Package ai wraps Google's genai SDK with a small, mockable surface for the
|
||||
// semantle/twentyq modules. The package owns:
|
||||
//
|
||||
// - Embedder / Chatter interfaces — what modules consume
|
||||
// - Client struct — production implementation backed by genai
|
||||
// - per-user rate limiter — defends the shared 1500-RPD Gemini free tier
|
||||
//
|
||||
// Modules accept the interfaces (not *Client) so unit tests can pass fakes
|
||||
// without spinning up a real Gemini client. Production wiring in cmd/server
|
||||
// passes the *Client (which satisfies both interfaces) into Deps.
|
||||
package ai
|
||||
|
||||
import "context"
|
||||
|
||||
// Embedder produces dense vectors for text. Used by the semantle module to
|
||||
// score guess similarity. Implementations must respect ctx cancellation.
|
||||
//
|
||||
// On rate-limit (HTTP 429) the implementation should return a sentinel error
|
||||
// — see ErrRateLimited — so callers can show a user-friendly retry message.
|
||||
type Embedder interface {
|
||||
Embed(ctx context.Context, texts []string) ([][]float32, error)
|
||||
}
|
||||
|
||||
// Chatter produces a single text completion from a system + user prompt
|
||||
// pair. Used by the twentyq module's judge + round-start calls. Same
|
||||
// rate-limit conventions as Embedder.
|
||||
type Chatter interface {
|
||||
Generate(ctx context.Context, system, user string) (string, error)
|
||||
}
|
||||
Reference in New Issue
Block a user