diff --git a/src/modules/semantle/README.md b/src/modules/semantle/README.md index 9755494..f6b8eca 100644 --- a/src/modules/semantle/README.md +++ b/src/modules/semantle/README.md @@ -28,11 +28,12 @@ the target pool and the vocabulary** — so every legal guess could itself have been the answer, and OOV detection is an O(1) `Set.has()` with no upstream round-trip. Regenerate with `node scripts/build-semantle-words.js`. -**Similarity:** `@cf/baai/bge-small-en-v1.5` text embeddings via the +**Similarity:** `@cf/baai/bge-m3` multilingual text embeddings via the `env.AI` binding. Each in-vocab guess runs one inference call batching -target + guess (384-dim vectors) and the module scores them with local -cosine similarity. At ~0.0037 Neurons per guess, the Workers Free plan -cap of 10k Neurons/day covers ~2.7M guesses/day. +target + guess (1024-dim vectors) and the module scores them with local +cosine similarity. At 1075 Neurons per M input tokens (~0.002 N/guess +for short words), the Workers Free plan cap of 10k Neurons/day covers +~4.6M guesses/day. Same model as `doantu` so both share the binding. OOV guesses short-circuit before inference — the player sees "isn't in the vocabulary" instead of a noisy subword-based score. @@ -70,8 +71,8 @@ Each `guesses[]` entry is `{ word, canonical, similarity }`. ## Config -No env vars. Model defaults to `@cf/baai/bge-small-en-v1.5`; override with -`createClient(env.AI, { model: "@cf/baai/bge-base-en-v1.5" })` in a test +No env vars. Model defaults to `@cf/baai/bge-m3`; override with +`createClient(env.AI, { model: "@cf/baai/bge-small-en-v1.5" })` in a test or alternative deploy. ## Why unlimited guesses? @@ -82,6 +83,6 @@ tracked via `bestGuessCount` — fewest guesses to solve across all rounds. ## Credits -- Embeddings: [`@cf/baai/bge-small-en-v1.5`](https://developers.cloudflare.com/workers-ai/models/bge-small-en-v1.5/) on Cloudflare Workers AI. +- Embeddings: [`@cf/baai/bge-m3`](https://developers.cloudflare.com/workers-ai/models/bge-m3/) on Cloudflare Workers AI (multilingual). - Target dictionary: [google-10000-english](https://github.com/first20hours/google-10000-english) by Josh Kaufman, derived from Peter Norvig's Google Ngram analysis. - Game concept: [Semantle](https://semantle.com/) by David Turner. diff --git a/src/modules/semantle/api-client.js b/src/modules/semantle/api-client.js index 32cc80b..8682f11 100644 --- a/src/modules/semantle/api-client.js +++ b/src/modules/semantle/api-client.js @@ -1,9 +1,9 @@ /** * @file Cloudflare Workers AI client for the semantle module. * - * Runs the `@cf/baai/bge-small-en-v1.5` text-embedding model via the `env.AI` - * binding, then scores guesses by computing cosine similarity between the - * target and guess vectors locally (no extra round-trip). + * Runs the `@cf/baai/bge-m3` text-embedding model via the `env.AI` binding, + * then scores guesses by computing cosine similarity between the target + * and guess vectors locally (no extra round-trip). * * Vocabulary: the curated `words-data.js` list (google-10k) doubles as our * in/out-of-vocabulary set — anything outside it is treated as OOV so players @@ -13,7 +13,9 @@ import { randomLine } from "./wordlist.js"; import WORDS from "./words-data.js"; -const DEFAULT_MODEL = "@cf/baai/bge-small-en-v1.5"; +// BGE-M3: multilingual, 1024 dimensions, 1075 Neurons per M input tokens — +// cheaper than bge-small-en-v1.5 (1841 N/M) and matches the doantu sibling. +const DEFAULT_MODEL = "@cf/baai/bge-m3"; // O(1) membership lookup for OOV detection. Built once per isolate. const VOCAB = new Set(WORDS); diff --git a/src/modules/semantle/index.js b/src/modules/semantle/index.js index c0508fb..3349796 100644 --- a/src/modules/semantle/index.js +++ b/src/modules/semantle/index.js @@ -4,7 +4,7 @@ * Targets come from a curated local wordlist (same list doubles as the * vocabulary for OOV detection, so no upstream check is needed to pick or * validate a word). Similarity scores come from cosine distance between - * `@cf/baai/bge-small-en-v1.5` embeddings produced by the `env.AI` binding. + * `@cf/baai/bge-m3` multilingual embeddings produced by the `env.AI` binding. */ import { createClient } from "./api-client.js"; diff --git a/tests/modules/semantle/api-client.test.js b/tests/modules/semantle/api-client.test.js index 9acc464..a429846 100644 --- a/tests/modules/semantle/api-client.test.js +++ b/tests/modules/semantle/api-client.test.js @@ -2,10 +2,10 @@ import { describe, expect, it, vi } from "vitest"; import { UpstreamError, createClient } from "../../../src/modules/semantle/api-client.js"; /** - * Build a deterministic 384-dim vector (bge-small output size) from a seed - * so cosine scores are reproducible without hardcoding 384 floats. + * Build a deterministic 1024-dim vector (bge-m3 output size) from a seed + * so cosine scores are reproducible without hardcoding 1024 floats. */ -function fakeVector(seed, dim = 384) { +function fakeVector(seed, dim = 1024) { const out = new Array(dim); for (let i = 0; i < dim; i++) out[i] = Math.sin(seed * (i + 1)); return out; @@ -45,14 +45,14 @@ describe("semantle/api-client", () => { it("similarity batches target + guess in a single run() call", async () => { const ai = fakeAi(async (_model, { text }) => ({ - shape: [text.length, 384], + shape: [text.length, 1024], data: text.map((_, i) => fakeVector(i + 1)), })); const client = createClient(ai); await client.similarity("apple", "orange"); expect(ai.run).toHaveBeenCalledTimes(1); const [model, input] = ai.run.mock.calls[0]; - expect(model).toBe("@cf/baai/bge-small-en-v1.5"); + expect(model).toBe("@cf/baai/bge-m3"); expect(input).toEqual({ text: ["apple", "orange"] }); }); @@ -107,7 +107,7 @@ describe("semantle/api-client", () => { }); it("similarity returns null score when a vector norm is zero", async () => { - const zero = new Array(384).fill(0); + const zero = new Array(1024).fill(0); const ai = fakeAi(async () => ({ data: [zero, fakeVector(1)] })); const client = createClient(ai); const res = await client.similarity("apple", "orange"); diff --git a/wrangler.toml b/wrangler.toml index 21b2c5d..5c44305 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -25,11 +25,12 @@ binding = "DB" database_name = "miti99bot-db" database_id = "261b54e7-0fdb-4fe7-8ed9-2e8a8bcf459c" -# Workers AI — inference binding used by the semantle module for -# @cf/baai/bge-small-en-v1.5 text embeddings (replaces ConceptNet upstream). -# Accessed as `env.AI` in handlers. Included on the Workers Free plan: -# 10,000 Neurons/day at no charge (hard-stops — no billing on Free plan). -# bge-small is ~0.0037 Neurons/guess → ~2.7M guesses/day within the cap. +# Workers AI — inference binding used by semantle + doantu for +# @cf/baai/bge-m3 multilingual text embeddings. Accessed as `env.AI` +# in handlers. Included on the Workers Free plan: 10,000 Neurons/day at +# no charge (hard-stops — no billing on Free plan). +# bge-m3 is 1075 Neurons per M input tokens → ~0.002 N/guess (2 short +# words), ~4.6M guesses/day within the cap. # Pricing: https://developers.cloudflare.com/workers-ai/platform/pricing/ [ai] binding = "AI"