Files
miti99bot-js/scripts/check-secret-leaks.js
tiennm99 0859356ec7 feat(scripts): phase 05 — backfill + verify + wipe (local node, no admin routes)
Operator-run migration scripts for KV→Mongo and D1→trading_trades, plus a
parity verifier and a rollback wiper. Pure local Node — no Worker code,
no /__admin/* routes, no new Worker secrets. Complies with
docs/architecture.md §10.

Scripts
- backfill-kv-to-mongo.js: paginates CF KV REST API per module, fetches
  values, $setOnInsert upsert into per-module Mongo collection. Resumes
  from .backfill-cursor-<module>.json on restart. Throttles 50 ops/sec.
  expiresAt derived from KV metadata.expiration (debugger #10). --dry-run
  and --module flags for incremental work.
- backfill-d1-to-mongo.js: wrangler d1 execute --remote --json → parse →
  insertMany batches into trading_trades, preserving original integer id
  as legacy_id (code-reviewer #13). Pre-flight aborts if collection
  non-empty unless --force.
- verify-mongo-parity.js: count parity ±1%, SHA256 value compare,
  expiresAt ±5min bucket. Full-scan when <10K docs, sqrt-sample
  capped at 500 otherwise (code-reviewer #21). Trading: full-scan
  on legacy_id/ts/user_id/symbol/qty.
- wipe-mongo.js: rollback helper. deleteMany across all collections
  with readline confirm. --yes for CI.
- lib/migration-helpers.js: shared sleep, sha256, checkpoint I/O,
  cfKvList/cfKvGet, MongoClient singleton, sample strategy.

Surface updates
- .env.deploy.example: CF account/token/namespace placeholders.
- package.json: backfill:kv[:dry], backfill:d1[:dry], verify:mongo,
  wipe:mongo scripts.
- check-secret-leaks.js: SECRETS array gains CLOUDFLARE_API_TOKEN +
  CLOUDFLARE_ACCOUNT_ID for defense-in-depth.
- .gitignore: .backfill-cursor-*.json excluded.

Tests: 638 → 667 (+29 pure-logic tests for sha256, checkpoint round-trip,
count-diff, sample-size, fetch-mocked CF REST). Lint clean.

Operator-run sequence (after Phase 06 deploy):
  npm run backfill:kv:dry   # preview
  npm run backfill:kv
  npm run backfill:d1:dry
  npm run backfill:d1
  npm run verify:mongo      # exit 0 = parity ok
2026-04-26 09:13:00 +07:00

125 lines
3.9 KiB
JavaScript

#!/usr/bin/env node
/**
* @file check-secret-leaks — fails build if any source file logs a known secret.
*
* Catches the common foot-gun where a developer prints `env.MONGODB_URI` or
* similar during debugging and forgets to remove the line before commit. We
* are NOT trying to be a full SAST — we just block obvious `console.log` /
* `console.error` sites that interpolate a secret env var.
*
* Wired into `npm run lint` so every PR / pre-deploy run catches it.
*
* Patterns checked (add more as new secrets are introduced):
* - MONGODB_URI — Atlas connection string (Phase 01)
* - TELEGRAM_BOT_TOKEN — bot token from BotFather
* - TELEGRAM_WEBHOOK_SECRET — gates incoming webhook traffic
* - ADMIN_TOKEN — kept for defense-in-depth even though Phase 05
* redesign removed admin routes; cheap to leave in.
*
* Detection scope: any of these tokens appearing on the SAME line as a
* `console.<level>(...)`, `JSON.stringify(env)`, or `throw new Error(...env...)`.
*
* Exit codes:
* 0 — no leaks found
* 1 — at least one leak detected (prints file:line + offending line)
*/
import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
import { extname, join, resolve } from "node:path";
const PROJECT_ROOT = resolve(import.meta.dirname, "..");
const SCAN_DIRS = ["src", "scripts"];
const SCAN_EXTS = new Set([".js", ".mjs", ".ts"]);
const SECRETS = [
"MONGODB_URI",
"TELEGRAM_BOT_TOKEN",
"TELEGRAM_WEBHOOK_SECRET",
"ADMIN_TOKEN",
// Phase 05: CF API creds used by backfill scripts — defense-in-depth.
"CLOUDFLARE_API_TOKEN",
"CLOUDFLARE_ACCOUNT_ID",
];
// A line is suspicious if it both names a secret AND looks like it's emitting
// the value (console.*, JSON.stringify(env...), throw with interpolation).
const EMIT_PATTERNS = [
/\bconsole\.(log|info|warn|error|debug|trace)\b/,
/\bJSON\.stringify\s*\(\s*env\b/,
/\bthrow\s+new\s+\w*Error\b/,
];
/**
* Walk a directory tree and yield absolute file paths matching SCAN_EXTS.
*
* @param {string} dir
* @returns {string[]}
*/
function walk(dir) {
if (!existsSync(dir)) return [];
const out = [];
for (const entry of readdirSync(dir)) {
const full = join(dir, entry);
const st = statSync(full);
if (st.isDirectory()) {
if (entry === "node_modules" || entry === ".wrangler") continue;
out.push(...walk(full));
} else if (SCAN_EXTS.has(extname(entry))) {
out.push(full);
}
}
return out;
}
/**
* Scan one file. Pushes hits to `findings`.
*
* @param {string} file
* @param {Array<{file: string, line: number, secret: string, snippet: string}>} findings
*/
function scanFile(file, findings) {
// Don't flag this file (it lists the patterns) or .example files.
if (file.endsWith("check-secret-leaks.js")) return;
const content = readFileSync(file, "utf8");
const lines = content.split("\n");
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (!EMIT_PATTERNS.some((p) => p.test(line))) continue;
for (const secret of SECRETS) {
if (line.includes(secret)) {
findings.push({
file,
line: i + 1,
secret,
snippet: line.trim(),
});
}
}
}
}
function main() {
/** @type {Array<{file: string, line: number, secret: string, snippet: string}>} */
const findings = [];
for (const dir of SCAN_DIRS) {
const abs = join(PROJECT_ROOT, dir);
for (const file of walk(abs)) scanFile(file, findings);
}
if (findings.length === 0) {
console.log(`secret-leak check: 0 findings across ${SCAN_DIRS.join(", ")}`);
return;
}
console.error(`secret-leak check: ${findings.length} finding(s)`);
for (const f of findings) {
const rel = f.file.replace(`${PROJECT_ROOT}/`, "");
console.error(` ${rel}:${f.line} [${f.secret}] ${f.snippet}`);
}
process.exit(1);
}
main();