refactor(semantle): swap word2sim backend for ConceptNet

ConceptNet provides a free public /relatedness endpoint (returns cosine-like
[-1, 1]) and /c/en/{term} for vocabulary check. No random-word endpoint, so
we ship a curated local target pool in wordlist.js (~250 words) and verify
each pick via the concept endpoint with a fallback to an unverified pick.

Each guess now makes two parallel ConceptNet calls (concept + relatedness)
instead of a single word2sim call. Slightly higher latency but zero hosting
cost and no dependency on the self-hosted word2sim instance.

- api-client.js rewritten; UpstreamError replaces Word2SimError (aliased
  for backwards compat with older imports).
- wordlist.js added (curated target pool + pickFromPool).
- handlers.js: drops RANDOM_FILTERS (no filtering needed; pool is curated).
- index.js: drops WORD2SIM_API_URL env var; ConceptNet base hardcoded.
- wrangler.toml + .dev.vars.example: drop WORD2SIM_API_URL.
- api-client tests rewritten for ConceptNet shape; total tests 336 → 341.
This commit is contained in:
2026-04-22 23:07:54 +07:00
parent 51d36272c7
commit fca6d733c9
8 changed files with 337 additions and 164 deletions
+137 -91
View File
@@ -1,76 +1,142 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { Word2SimError, createClient } from "../../../src/modules/semantle/api-client.js";
import {
UpstreamError,
Word2SimError,
createClient,
} from "../../../src/modules/semantle/api-client.js";
/**
* ConceptNet stubs — minimal shape the client cares about.
*/
function conceptResp(edgeCount = 5) {
return {
ok: true,
text: () =>
Promise.resolve(
JSON.stringify({
edges: Array.from({ length: edgeCount }, (_, i) => ({ id: `e${i}` })),
}),
),
};
}
function relatednessResp(value) {
return {
ok: true,
text: () => Promise.resolve(JSON.stringify({ value })),
};
}
describe("semantle/api-client", () => {
afterEach(() => {
vi.restoreAllMocks();
});
describe("Word2SimError", () => {
describe("UpstreamError", () => {
it("stores status and body metadata", () => {
const err = new Word2SimError("test", { status: 404, body: "not found" });
const err = new UpstreamError("test", { status: 404, body: "not found" });
expect(err.message).toBe("test");
expect(err.status).toBe(404);
expect(err.body).toBe("not found");
expect(err.name).toBe("Word2SimError");
expect(err.name).toBe("UpstreamError");
});
it("stores cause when provided", () => {
const cause = new Error("underlying");
const err = new Word2SimError("wrapper", { cause });
const err = new UpstreamError("wrapper", { cause });
expect(err.cause).toBe(cause);
});
it("is re-exported as Word2SimError alias for legacy callers", () => {
expect(Word2SimError).toBe(UpstreamError);
});
});
describe("createClient", () => {
it("randomWord builds correct URL with filters", async () => {
it("similarity runs concept + relatedness in parallel", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
const calls = [];
global.fetch = vi.fn((url) => {
expect(url).toContain("/random");
expect(url).toContain("min_rank=5");
expect(url).toContain("alpha=true");
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"apple","rank":1234}'),
});
});
const res = await client.randomWord({ min_rank: 5, alpha: true });
expect(res.word).toBe("apple");
expect(res.rank).toBe(1234);
});
it("similarity builds URL with both words", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url).toContain("/similarity");
expect(url).toContain("a=apple");
expect(url).toContain("b=orange");
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
'{"a":"apple","b":"orange","in_vocab_a":true,"in_vocab_b":true,"similarity":0.45}',
),
});
calls.push(String(url));
if (url.includes("/relatedness")) return Promise.resolve(relatednessResp(0.45));
return Promise.resolve(conceptResp(3));
});
const res = await client.similarity("apple", "orange");
expect(res.similarity).toBe(0.45);
expect(res.in_vocab_b).toBe(true);
expect(res.canonical_b).toBe("orange");
expect(global.fetch).toHaveBeenCalledTimes(2);
expect(calls.some((u) => u.includes("/c/en/orange"))).toBe(true);
expect(calls.some((u) => u.includes("node1=%2Fc%2Fen%2Fapple"))).toBe(true);
expect(calls.some((u) => u.includes("node2=%2Fc%2Fen%2Forange"))).toBe(true);
});
it("URL-encodes special characters in params", async () => {
it("similarity flags OOV when the concept endpoint returns no edges", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url).toMatch(/search=hello/);
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"test"}'),
});
if (url.includes("/relatedness")) return Promise.resolve(relatednessResp(0.02));
return Promise.resolve(conceptResp(0));
});
await client.randomWord({ search: "hello world" });
expect(global.fetch).toHaveBeenCalled();
const res = await client.similarity("apple", "zzzfoo");
expect(res.in_vocab_b).toBe(false);
expect(res.similarity).toBe(null);
});
it("throws Word2SimError on non-2xx response", async () => {
it("similarity returns null when relatedness payload lacks a numeric value", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
if (url.includes("/relatedness")) {
return Promise.resolve({ ok: true, text: () => Promise.resolve("{}") });
}
return Promise.resolve(conceptResp(5));
});
const res = await client.similarity("apple", "orange");
expect(res.similarity).toBe(null);
});
it("similarity distinguishes 0 from null", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
if (url.includes("/relatedness")) return Promise.resolve(relatednessResp(0));
return Promise.resolve(conceptResp(5));
});
const res = await client.similarity("apple", "orange");
expect(res.similarity).toBe(0);
expect(res.in_vocab_b).toBe(true);
});
it("randomWord returns a verified pick when edges present", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn(() => Promise.resolve(conceptResp(5)));
const res = await client.randomWord();
expect(typeof res.word).toBe("string");
expect(res.word.length).toBeGreaterThan(0);
expect(res.verified).toBe(true);
});
it("randomWord falls back to unverified pick after max attempts", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
// Every concept lookup returns zero edges → exhausts retries.
global.fetch = vi.fn(() => Promise.resolve(conceptResp(0)));
const res = await client.randomWord();
expect(res.verified).toBe(false);
expect(typeof res.word).toBe("string");
});
it("randomWord swallows transient fetch errors during verification", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
let n = 0;
global.fetch = vi.fn(() => {
n += 1;
// Error for the first few attempts, then succeed.
if (n <= 2) return Promise.reject(new Error("transient"));
return Promise.resolve(conceptResp(3));
});
const res = await client.randomWord();
expect(res.verified).toBe(true);
});
it("concept throws UpstreamError on non-2xx response", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn(() =>
Promise.resolve({
@@ -79,95 +145,75 @@ describe("semantle/api-client", () => {
text: () => Promise.resolve("Internal Server Error"),
}),
);
await expect(client.randomWord()).rejects.toMatchObject({
name: "Word2SimError",
await expect(client.concept("apple")).rejects.toMatchObject({
name: "UpstreamError",
status: 500,
body: "Internal Server Error",
});
});
it("throws Word2SimError when response is not valid JSON", async () => {
it("concept throws UpstreamError when response is not valid JSON", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve("not json at all"),
}),
Promise.resolve({ ok: true, text: () => Promise.resolve("not json") }),
);
await expect(client.randomWord()).rejects.toMatchObject({
name: "Word2SimError",
});
await expect(client.concept("apple")).rejects.toMatchObject({ name: "UpstreamError" });
});
it("throws Word2SimError on fetch failure", async () => {
it("concept throws UpstreamError on fetch failure", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn(() => Promise.reject(new Error("network error")));
await expect(client.randomWord()).rejects.toThrow("word2sim fetch failed");
await expect(client.concept("apple")).rejects.toThrow("conceptnet fetch failed");
});
it("uses custom timeout and truncates response body to 500 chars", async () => {
it("truncates response body to 500 chars in UpstreamError", async () => {
const client = createClient("https://api.test", { timeoutMs: 50 });
const longBody = "x".repeat(600);
global.fetch = vi.fn(() =>
Promise.resolve({
ok: false,
status: 400,
text: () => Promise.resolve(longBody),
}),
Promise.resolve({ ok: false, status: 400, text: () => Promise.resolve(longBody) }),
);
try {
await client.randomWord();
await client.concept("apple");
} catch (err) {
expect(err.body.length).toBe(500);
}
});
it("includes User-Agent header", async () => {
it("sends User-Agent and Accept headers", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((_, opts) => {
expect(opts.headers["User-Agent"]).toContain("miti99bot");
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"test"}'),
});
});
await client.randomWord();
});
it("includes Accept header", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((_, opts) => {
expect(opts.headers.Accept).toBe("application/json");
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"test"}'),
});
return Promise.resolve(conceptResp(1));
});
await client.randomWord();
await client.concept("apple");
});
it("handles trailing slashes in API base URL", async () => {
it("strips trailing slashes from the API base URL", async () => {
const client = createClient("https://api.test///", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url.startsWith("https://api.test/")).toBe(true);
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"test"}'),
});
expect(url.startsWith("https://api.test/c/en/")).toBe(true);
return Promise.resolve(conceptResp(1));
});
await client.randomWord();
await client.concept("apple");
});
it("filters out undefined/null params", async () => {
it("URL-encodes the term path segment", async () => {
const client = createClient("https://api.test", { timeoutMs: 100 });
global.fetch = vi.fn((url) => {
expect(url).not.toContain("min_rank=");
return Promise.resolve({
ok: true,
text: () => Promise.resolve('{"word":"test"}'),
});
expect(url).toContain("/c/en/hello%20world");
return Promise.resolve(conceptResp(1));
});
await client.randomWord({ min_rank: undefined, max_rank: null });
await client.concept("hello world");
});
it("defaults to the public ConceptNet base URL when none provided", async () => {
const client = createClient();
global.fetch = vi.fn((url) => {
expect(url.startsWith("https://api.conceptnet.io/")).toBe(true);
return Promise.resolve(conceptResp(1));
});
await client.concept("apple");
});
});
});