Files
miti99bot/scripts/scrape-loldle-data.js
T
tiennm99 85c43109b6 fix(loldle): recover newer champions skipped by the scraper
loldle.net's classic-mode bundle has two record shapes — older champions
carry _id/championId, newer ones (Bel'Veth, K'Sante, Nilah, …) don't.
The regex required those leading fields, silently dropping anyone added
since 2022.

Make _id/championId optional and non-capturing, and drop them from the
output record (the bot never read them anyway). Champion count:
169 → 172; guessing /loldle k'sante, /loldle bel'veth, /loldle nilah
now resolve correctly.
2026-04-22 14:07:11 +07:00

81 lines
2.9 KiB
JavaScript

#!/usr/bin/env node
/**
* @file Rebuilds src/modules/loldle/champions.json from loldle.net's JS
* bundle. The bundle embeds the full champion array in plaintext. Records
* have two shapes (older champions carry _id/championId, newer ones don't);
* both shapes share the gameplay fields: championName, gender, positions,
* species, resource, range_type, regions, release_date — the only fields
* the bot consumes, so we keep just those.
*
* The bot imports the resulting JSON directly via `with { type: "json" }`.
*
* Usage: node scripts/scrape-loldle-data.js
* Schedule: weekly via .github/workflows/scrape-loldle-data.yml
*/
import { writeFileSync } from "node:fs";
import { resolve } from "node:path";
const LOLDLE_CLASSIC = "https://loldle.net/classic";
// _id and championId are only present on older records — make them optional
// and discard them (they're not used downstream).
const CHAMPION_RECORD_RX =
/\{(?:_id:"[a-f0-9]+",championId:"[^"]+",)?championName:"([^"]+)",gender:"([^"]+)",positions:\[([^\]]+)\],species:\[([^\]]+)\],resource:"([^"]+)",range_type:\[([^\]]+)\],regions:\[([^\]]+)\],release_date:"(\d{4}-\d{2}-\d{2})"\}/g;
async function fetchText(url) {
const res = await fetch(url);
if (!res.ok) throw new Error(`fetch ${url}: ${res.status} ${res.statusText}`);
return res.text();
}
function parseJsArrayStrings(inner) {
return [...inner.matchAll(/"([^"]+)"/g)].map((m) => m[1]);
}
async function scrapeLoldle() {
const html = await fetchText(LOLDLE_CLASSIC);
const scriptMatch = html.match(/<script\s+src="(js\/index\.[^"]+\.js)"/);
if (!scriptMatch) throw new Error("loldle.net: could not locate index.js script tag in HTML");
const bundleUrl = `https://loldle.net/${scriptMatch[1]}`;
const bundle = await fetchText(bundleUrl);
const seen = new Set();
const records = [];
for (const m of bundle.matchAll(CHAMPION_RECORD_RX)) {
const [, championName, gender, positions, species, resource, rangeType, regions, releaseDate] =
m;
if (seen.has(championName)) continue;
seen.add(championName);
records.push({
championName,
gender,
positions: parseJsArrayStrings(positions),
species: parseJsArrayStrings(species),
resource,
range_type: parseJsArrayStrings(rangeType),
regions: parseJsArrayStrings(regions),
release_date: releaseDate,
});
}
if (records.length === 0) {
throw new Error(
"loldle.net: zero champion records parsed — bundle format changed, update CHAMPION_RECORD_RX",
);
}
records.sort((a, b) => a.championName.localeCompare(b.championName));
return records;
}
const root = resolve(import.meta.dirname, "..");
const jsonPath = resolve(root, "src/modules/loldle/champions.json");
console.log("scraping loldle.net…");
const records = await scrapeLoldle();
console.log(` parsed ${records.length} champions`);
writeFileSync(jsonPath, `${JSON.stringify(records, null, 4)}\n`);
console.log(`wrote ${jsonPath}`);