Files
miti99bot/scripts/scrape-loldle-data.js
T
tiennm99 615dc8174c refactor(loldle): import champions.json directly, drop ESM wrapper
Node 24 + wrangler 4.x both accept `import ... with { type: "json" }`,
so the generated champions-data.js wrapper is no longer needed.

Drop scripts/build-loldle-data.js and the build:loldle-data npm script.
Scraper writes champions.json only.
2026-04-22 13:24:24 +07:00

104 lines
3.5 KiB
JavaScript

#!/usr/bin/env node
/**
* @file scrape-loldle-data — rebuilds src/modules/loldle/champions.json from
* loldle.net's JS bundle, the canonical source for the classic-mode axes:
* gender, species, resource, attackType, region, lane, releaseDate.
*
* loldle.net embeds the full champion array in plaintext inside its JS bundle
* at `<script src="js/index.<hash>.js">`, one record per champion with the
* exact shape the bot needs. No CryptoJS decoding, no ddragon merge.
*
* Writes src/modules/loldle/champions.json. The bot imports this JSON
* directly via `with { type: "json" }` (Node 24 + wrangler 4.x).
*
* Usage:
* node scripts/scrape-loldle-data.js
*
* Schedule: weekly via .github/workflows/scrape-loldle-data.yml
*/
import { writeFileSync } from "node:fs";
import { resolve } from "node:path";
const LOLDLE_CLASSIC = "https://loldle.net/classic";
const LANE_MAP = {
top: "top",
jungle: "jungle",
middle: "mid",
bottom: "bottom",
support: "support",
};
const GENDER_MAP = { male: "male", female: "female", other: "divers" };
const CHAMPION_RECORD_RX =
/\{_id:"[a-f0-9]+",championId:"[^"]+",championName:"([^"]+)",gender:"([^"]+)",positions:\[([^\]]+)\],species:\[([^\]]+)\],resource:"([^"]+)",range_type:\[([^\]]+)\],regions:\[([^\]]+)\],release_date:"(\d{4})-\d{2}-\d{2}"\}/g;
async function fetchText(url) {
const res = await fetch(url);
if (!res.ok) throw new Error(`fetch ${url}: ${res.status} ${res.statusText}`);
return res.text();
}
function parseJsArrayStrings(inner) {
return [...inner.matchAll(/"([^"]+)"/g)].map((m) => m[1]);
}
function normalizeRegion(name) {
return name.toLowerCase().replace(/\s+/g, "-");
}
async function scrapeLoldle() {
const html = await fetchText(LOLDLE_CLASSIC);
const scriptMatch = html.match(/<script\s+src="(js\/index\.[^"]+\.js)"/);
if (!scriptMatch) throw new Error("loldle.net: could not locate index.js script tag in HTML");
const bundleUrl = `https://loldle.net/${scriptMatch[1]}`;
const bundle = await fetchText(bundleUrl);
const seen = new Set();
const records = [];
for (const m of bundle.matchAll(CHAMPION_RECORD_RX)) {
const [, name, gender, positionsRaw, speciesRaw, resource, rangeTypeRaw, regionsRaw, year] = m;
if (seen.has(name)) continue;
seen.add(name);
const lanes = parseJsArrayStrings(positionsRaw)
.map((p) => LANE_MAP[p.toLowerCase()])
.filter(Boolean);
const regions = parseJsArrayStrings(regionsRaw).map(normalizeRegion);
const species = parseJsArrayStrings(speciesRaw).map((s) => s.toLowerCase());
const rangeType = parseJsArrayStrings(rangeTypeRaw)[0]?.toLowerCase();
records.push({
id: name,
name,
gender: GENDER_MAP[gender.toLowerCase()] ?? "divers",
species: species.join(","),
resource,
attackType: rangeType === "melee" ? "close" : "range",
region: regions.join(","),
lane: lanes.join(","),
releaseDate: Number(year),
});
}
if (records.length === 0) {
throw new Error(
"loldle.net: zero champion records parsed — bundle format changed, update CHAMPION_RECORD_RX",
);
}
records.sort((a, b) => a.name.localeCompare(b.name));
return records;
}
const root = resolve(import.meta.dirname, "..");
const jsonPath = resolve(root, "src/modules/loldle/champions.json");
console.log("scraping loldle.net…");
const records = await scrapeLoldle();
console.log(` parsed ${records.length} champions`);
writeFileSync(jsonPath, `${JSON.stringify(records, null, 4)}\n`);
console.log(`wrote ${jsonPath}`);