mirror of
https://github.com/tiennm99/miti99bot.git
synced 2026-04-28 10:20:35 +00:00
c0315574c0
Near-clone of the semantle module, adapted for Vietnamese: - Targets from duyet/vietnamese-wordlist Viet22K (~22k entries, GPL). Regenerate via scripts/build-doantu-words.js; chained into npm run build. - ConceptNet client uses /c/vi/<term> URIs; multi-word guesses (e.g. "con chó") are space-to-underscore converted at URL build time so the board keeps the natural display. - lookup.js permits Unicode letters + combining marks + single internal spaces; rejects digits/punctuation. - All three commands (/doantu, /doantu_giveup, /doantu_stats) are visibility=protected — shown in /help, hidden from Telegram's native / autocomplete menu while the module is still experimental. Wired into src/modules/index.js, wrangler.toml MODULES, .env.deploy(.example), and package.json build chain. Separate module rather than a shared base with semantle — matches the repo's one-module-per-game convention (see loldle vs wordle); factor later if a third language appears.
63 lines
2.3 KiB
JavaScript
63 lines
2.3 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* @file build-doantu-words — fetches the middle-sized Vietnamese wordlist
|
|
* (Viet22K) from duyet/vietnamese-wordlist and writes it to
|
|
* src/modules/doantu/words-data.js as a static ES-module array.
|
|
*
|
|
* Source is an alphabetically-sorted Unicode dictionary, one word or phrase
|
|
* per line. We normalize only (trim, lowercase, dedupe); NO length/char
|
|
* filtering — ConceptNet's verify-and-fallback in api-client handles
|
|
* unusable picks at round start.
|
|
*
|
|
* Source: https://github.com/duyet/vietnamese-wordlist
|
|
* Credits: Ho Ngoc Duc — Vietnamese word list (GPL).
|
|
*
|
|
* Usage:
|
|
* node scripts/build-doantu-words.js
|
|
*/
|
|
|
|
import { writeFileSync } from "node:fs";
|
|
import { resolve } from "node:path";
|
|
|
|
// Size options from the upstream repo: Viet11K / Viet22K / Viet39K / Viet74K.
|
|
// Viet22K is a good balance — enough variety, not overwhelmed by archaic terms.
|
|
const SOURCE_URL = "https://raw.githubusercontent.com/duyet/vietnamese-wordlist/master/Viet22K.txt";
|
|
|
|
const root = resolve(import.meta.dirname, "..");
|
|
const dst = resolve(root, "src/modules/doantu/words-data.js");
|
|
|
|
const res = await fetch(SOURCE_URL);
|
|
if (!res.ok) throw new Error(`fetch failed: ${res.status} ${res.statusText}`);
|
|
const text = await res.text();
|
|
|
|
// Normalize only: trim, lowercase, drop blanks, dedupe. Preserve source order.
|
|
// Multi-word entries keep their spaces — the api-client converts to underscore
|
|
// only at URL-build time, so the board still displays them naturally.
|
|
const words = Array.from(
|
|
new Set(
|
|
text
|
|
.split(/\r?\n/)
|
|
.map((w) => w.trim().toLowerCase())
|
|
.filter((w) => w.length > 0),
|
|
),
|
|
);
|
|
|
|
if (words.length === 0) throw new Error("no words parsed from source");
|
|
|
|
// JSON.stringify each word — safer than manual quoting for Vietnamese
|
|
// diacritics and the occasional character that needs escaping.
|
|
const body = words.map((w) => ` ${JSON.stringify(w)},`).join("\n");
|
|
const out = [
|
|
"// Auto-generated from https://github.com/duyet/vietnamese-wordlist (Viet22K.txt)",
|
|
"// Credits: Ho Ngoc Duc — Vietnamese word list (GPL).",
|
|
"// Normalized (lowercased + deduped) but otherwise unfiltered.",
|
|
"// Regenerate with: node scripts/build-doantu-words.js",
|
|
"export default [",
|
|
body,
|
|
"];",
|
|
"",
|
|
].join("\n");
|
|
|
|
writeFileSync(dst, out);
|
|
console.log(`wrote ${dst} (${words.length} words)`);
|