refactor(doantu): swap Workers AI bge-m3 for hosted phow2sim HTTP API

Doantu now mirrors semantle's pre-Workers-AI shape: a thin fetch wrapper
around /random + /similarity on https://phow2sim.sg.miti99.com (overridable
via PHOW2SIM_API_URL). Drops the local Viet22K wordlist + build script —
the service owns vocabulary now. Promotes commands from protected to
public so they show up in Telegram's native / menu.
This commit is contained in:
2026-04-23 11:35:32 +07:00
parent fd5a1d2903
commit 4acc471f6f
9 changed files with 239 additions and 22429 deletions
+137 -103
View File
@@ -1,26 +1,11 @@
import { describe, expect, it, vi } from "vitest";
import { afterEach, describe, expect, it, vi } from "vitest";
import { UpstreamError, createClient } from "../../../src/modules/doantu/api-client.js";
/**
* Build a deterministic 1024-dim vector from a seed so cosine scores are
* reproducible in tests without hardcoding floats. bge-m3 produces 1024-dim
* vectors; tests use the same width for realism.
*/
function fakeVector(seed, dim = 1024) {
const out = new Array(dim);
for (let i = 0; i < dim; i++) out[i] = Math.sin(seed * (i + 1));
return out;
}
/**
* Minimal Workers AI binding fake. `impl(model, input)` returns the payload
* `env.AI.run()` would normally resolve to.
*/
function fakeAi(impl) {
return { run: vi.fn(impl) };
}
describe("doantu/api-client", () => {
afterEach(() => {
vi.restoreAllMocks();
});
describe("UpstreamError", () => {
it("stores status and body metadata", () => {
const err = new UpstreamError("test", { status: 404, body: "not found" });
@@ -38,107 +23,156 @@ describe("doantu/api-client", () => {
});
describe("createClient", () => {
it("throws without a valid AI binding", () => {
expect(() => createClient(null)).toThrow(TypeError);
expect(() => createClient({})).toThrow(TypeError);
expect(() => createClient({ run: "not a function" })).toThrow(TypeError);
it("randomWord builds correct URL with filters", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url).toContain("/random");
expect(url).toContain("min_len=2");
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"chó"}'),
});
});
const res = await client.randomWord({ min_len: 2 });
expect(res.word).toBe("chó");
});
it("similarity batches target + guess in a single run() call with bge-m3", async () => {
const ai = fakeAi(async (_model, { text }) => ({
shape: [text.length, 1024],
data: text.map((_, i) => fakeVector(i + 1)),
}));
const client = createClient(ai);
await client.similarity("chó", "mèo");
expect(ai.run).toHaveBeenCalledTimes(1);
const [model, input] = ai.run.mock.calls[0];
expect(model).toBe("@cf/baai/bge-m3");
expect(input).toEqual({ text: ["chó", "mèo"] });
});
it("similarity returns cosine score for an in-vocab Vietnamese guess", async () => {
const ai = fakeAi(async (_model, { text }) => ({
data: text.map((_, i) => fakeVector(i + 1)),
}));
const client = createClient(ai);
it("similarity builds URL with both words", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url).toContain("/similarity");
expect(url).toMatch(/a=ch%C3%B3/);
expect(url).toMatch(/b=m%C3%A8o/);
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
'{"a":"chó","b":"mèo","in_vocab_a":true,"in_vocab_b":true,"canonical_a":"chó","canonical_b":"mèo","similarity":0.42}',
),
});
});
const res = await client.similarity("chó", "mèo");
expect(res.in_vocab_a).toBe(true);
expect(res.in_vocab_b).toBe(true);
expect(res.canonical_a).toBe("chó");
expect(res.similarity).toBe(0.42);
expect(res.canonical_b).toBe("mèo");
expect(typeof res.similarity).toBe("number");
expect(res.similarity).toBeGreaterThan(-1);
expect(res.similarity).toBeLessThanOrEqual(1);
});
it('similarity accepts multi-syllable Vietnamese words in vocab ("a dua")', async () => {
const ai = fakeAi(async () => ({ data: [fakeVector(1), fakeVector(2)] }));
const client = createClient(ai);
const res = await client.similarity("chó", "a dua");
expect(res.in_vocab_b).toBe(true);
expect(res.similarity).not.toBeNull();
});
it("similarity returns 1 for identical vectors", async () => {
const vec = fakeVector(7);
const ai = fakeAi(async () => ({ data: [vec, vec] }));
const client = createClient(ai);
const res = await client.similarity("chó", "mèo");
expect(res.similarity).toBeCloseTo(1, 10);
});
it("similarity skips AI call for OOV guess and flags in_vocab_b:false", async () => {
const ai = fakeAi(async () => ({ data: [fakeVector(1), fakeVector(2)] }));
const client = createClient(ai);
const res = await client.similarity("chó", "zzzkhôngcótrongtừđiển");
expect(res.in_vocab_b).toBe(false);
expect(res.similarity).toBe(null);
expect(ai.run).not.toHaveBeenCalled();
});
it("similarity wraps AI.run rejection as UpstreamError", async () => {
const ai = fakeAi(async () => {
throw new Error("boom");
it("URL-encodes multi-syllable Vietnamese guesses", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url).toMatch(/b=con\+ch%C3%B3|b=con%20ch%C3%B3/);
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
'{"a":"mèo","b":"con chó","in_vocab_a":true,"in_vocab_b":true,"similarity":0.3}',
),
});
});
const client = createClient(ai);
await expect(client.similarity("chó", "mèo")).rejects.toMatchObject({
await client.similarity("mèo", "con chó");
expect(global.fetch).toHaveBeenCalled();
});
it("throws UpstreamError on non-2xx response", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn(() =>
Promise.resolve({
ok: false,
status: 500,
text: () => Promise.resolve("Internal Server Error"),
}),
);
await expect(client.randomWord()).rejects.toMatchObject({
name: "UpstreamError",
status: 500,
body: "Internal Server Error",
});
});
it("throws UpstreamError when response is not valid JSON", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve("not json at all"),
}),
);
await expect(client.randomWord()).rejects.toMatchObject({
name: "UpstreamError",
});
});
it("similarity throws UpstreamError on malformed payload", async () => {
const ai = fakeAi(async () => ({ data: [fakeVector(1)] }));
const client = createClient(ai);
await expect(client.similarity("chó", "mèo")).rejects.toMatchObject({
name: "UpstreamError",
it("throws UpstreamError on fetch failure", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn(() => Promise.reject(new Error("network error")));
await expect(client.randomWord()).rejects.toThrow("phow2sim fetch failed");
});
it("truncates response body to 500 chars on non-OK", async () => {
const client = createClient("https://api.test", { timeoutMs: 50 });
const longBody = "x".repeat(600);
global.fetch = vi.fn(() =>
Promise.resolve({
ok: false,
status: 400,
text: () => Promise.resolve(longBody),
}),
);
try {
await client.randomWord();
} catch (err) {
expect(err.body.length).toBe(500);
}
});
it("includes User-Agent header identifying doantu", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((_, opts) => {
expect(opts.headers["User-Agent"]).toContain("miti99bot");
expect(opts.headers["User-Agent"]).toContain("doantu");
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"chó"}'),
});
});
await client.randomWord();
});
it("similarity returns null score when a vector norm is zero", async () => {
const zero = new Array(1024).fill(0);
const ai = fakeAi(async () => ({ data: [zero, fakeVector(1)] }));
const client = createClient(ai);
const res = await client.similarity("chó", "mèo");
expect(res.in_vocab_b).toBe(true);
expect(res.similarity).toBe(null);
it("includes Accept header", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((_, opts) => {
expect(opts.headers.Accept).toBe("application/json");
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"chó"}'),
});
});
await client.randomWord();
});
it("randomWord returns a verified pick from the local pool", async () => {
const ai = fakeAi(async () => ({ data: [] }));
const client = createClient(ai);
const res = await client.randomWord();
expect(typeof res.word).toBe("string");
expect(res.word.length).toBeGreaterThan(0);
expect(res.verified).toBe(true);
expect(ai.run).not.toHaveBeenCalled();
it("handles trailing slashes in API base URL", async () => {
const client = createClient("https://api.test///", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url.startsWith("https://api.test/")).toBe(true);
expect(url.startsWith("https://api.test////")).toBe(false);
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"chó"}'),
});
});
await client.randomWord();
});
it("supports model override via options", async () => {
const ai = fakeAi(async () => ({ data: [fakeVector(1), fakeVector(2)] }));
const client = createClient(ai, { model: "@cf/baai/bge-large-en-v1.5" });
await client.similarity("chó", "mèo");
expect(ai.run.mock.calls[0][0]).toBe("@cf/baai/bge-large-en-v1.5");
it("filters out undefined/null params", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url).not.toContain("min_len=");
expect(url).not.toContain("max_len=");
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"chó"}'),
});
});
await client.randomWord({ min_len: undefined, max_len: null });
});
});
});