From d78e761731d36cb196e9e7e7f8566ff25ddd9415 Mon Sep 17 00:00:00 2001 From: tiennm99 Date: Thu, 9 Apr 2026 00:48:42 +0700 Subject: [PATCH] feat: add metrics tracking and remove unsupported HMAC verification - Add metrics.js tracking webhooks, messages, cron, commands - Add GET /metrics/:secret endpoint (text + JSON format) - Remove HMAC verification (Statuspage doesn't support it) - Extract validateSecret helper in index.js - Integrate trackMetrics across webhook, queue, cron, bot handlers --- CLAUDE.md | 7 ++-- README.md | 6 +-- src/bot-commands.js | 7 ++++ src/cron-status-check.js | 8 ++++ src/index.js | 29 +++++++++++++-- src/metrics.js | 77 +++++++++++++++++++++++++++++++++++++++ src/queue-consumer.js | 19 ++++++++-- src/statuspage-webhook.js | 50 ++++++------------------- wrangler.jsonc | 1 - 9 files changed, 148 insertions(+), 56 deletions(-) create mode 100644 src/metrics.js diff --git a/CLAUDE.md b/CLAUDE.md index df0c114..1f83c4a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,7 +19,6 @@ No test framework configured yet. No linter configured. - `BOT_TOKEN` — Telegram bot token - `WEBHOOK_SECRET` — Secret token in Statuspage webhook URL path -- `STATUSPAGE_HMAC_KEY` — HMAC key from Statuspage webhook settings (optional, for signature verification) ## Architecture @@ -34,12 +33,13 @@ Cloudflare Workers with three entry points exported from `src/index.js`: |--------|------|---------|---------| | GET | `/` | inline | Health check | | POST | `/webhook/telegram` | `bot-commands.js` | grammY `webhookCallback("cloudflare-mod")` | -| POST | `/webhook/status/:secret` | `statuspage-webhook.js` | Receives Statuspage webhooks (HMAC + URL secret) | +| POST | `/webhook/status/:secret` | `statuspage-webhook.js` | Receives Statuspage webhooks (URL secret) | +| GET | `/metrics/:secret` | inline | Bot statistics (text or `?format=json`) | | GET | `/migrate/:secret` | inline | One-time KV migration (remove after use) | ### Data Flow -1. **Statuspage → Worker**: Webhook POST → verify HMAC signature (fallback: URL secret) → parse incident/component event → filter subscribers by type + component → `sendBatch` to CF Queue +1. **Statuspage → Worker**: Webhook POST → verify URL secret (timing-safe) → parse incident/component event → filter subscribers by type + component → `sendBatch` to CF Queue 2. **Cron → Worker**: Every 5 min → fetch summary → compare with `last-status` KV → notify on changes → update stored state 3. **Queue → Telegram**: Consumer processes batches of 30 → `sendMessage` via `telegram-api.js` helper → auto-removes blocked subscribers (403/400), retries on 429 4. **User → Bot**: Telegram webhook → grammY handles `/help`, `/start`, `/stop`, `/status`, `/subscribe`, `/history`, `/uptime` commands → reads/writes KV @@ -52,6 +52,7 @@ Per-subscriber keys (no read-modify-write races): Special keys: - `last-status` — JSON snapshot of component statuses for cron comparison +- `metrics` — Counters for webhooks, messages, cron checks, commands `kv-store.js` handles key building/parsing with `kv.list({ prefix: "sub:" })` pagination. `threadId` can be `0` (General topic), so null checks use `!= null`. diff --git a/README.md b/README.md index b7f09c8..76112e3 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Hosted on [Cloudflare Workers](https://workers.cloudflare.com/) with KV for stor - **Supergroup topic support** — send `/start` in a specific topic and notifications go to that topic - **On-demand status check** — `/status` fetches live data from status.claude.com - **Automatic status monitoring** — cron checks every 5 minutes as a safety net -- **HMAC webhook verification** — validates Statuspage webhook signatures +- **Metrics dashboard** — track webhooks, messages, cron checks via `/metrics` endpoint - **Self-healing** — automatically removes subscribers who block the bot ## Bot Commands @@ -79,10 +79,6 @@ npx wrangler secret put BOT_TOKEN npx wrangler secret put WEBHOOK_SECRET # Choose a random secret string for the Statuspage webhook URL - -# Optional: HMAC verification for Statuspage webhooks -npx wrangler secret put STATUSPAGE_HMAC_KEY -# Paste the HMAC key from Statuspage webhook settings ``` ### 5. Deploy diff --git a/src/bot-commands.js b/src/bot-commands.js index 183ff9f..141c960 100644 --- a/src/bot-commands.js +++ b/src/bot-commands.js @@ -8,6 +8,7 @@ import { } from "./kv-store.js"; import { fetchComponentByName, escapeHtml } from "./status-fetcher.js"; import { registerInfoCommands } from "./bot-info-commands.js"; +import { trackMetrics } from "./metrics.js"; /** * Extract chatId and threadId from grammY context @@ -26,6 +27,12 @@ export async function handleTelegramWebhook(c) { const bot = new Bot(c.env.BOT_TOKEN); const kv = c.env.claude_status; + // Track command usage + bot.use(async (ctx, next) => { + await trackMetrics(kv, { commandsProcessed: 1 }); + await next(); + }); + bot.command("start", async (ctx) => { const { chatId, threadId } = getChatTarget(ctx); await addSubscriber(kv, chatId, threadId); diff --git a/src/cron-status-check.js b/src/cron-status-check.js index 21376bb..88a3a72 100644 --- a/src/cron-status-check.js +++ b/src/cron-status-check.js @@ -1,5 +1,6 @@ import { fetchSummary, humanizeStatus, escapeHtml } from "./status-fetcher.js"; import { getSubscribersByType } from "./kv-store.js"; +import { trackMetrics } from "./metrics.js"; const LAST_STATUS_KEY = "last-status"; @@ -58,6 +59,11 @@ export async function handleScheduled(env) { timestamp: new Date().toISOString(), })); + await trackMetrics(kv, { + cronChecks: 1, + lastCronAt: new Date().toISOString(), + }); + if (changes.length === 0) return; console.log(`Cron: ${changes.length} component change(s) detected`); @@ -74,4 +80,6 @@ export async function handleScheduled(env) { } console.log(`Cron: enqueued ${messages.length} messages for ${name} change`); } + + await trackMetrics(kv, { cronChangesDetected: changes.length }); } diff --git a/src/index.js b/src/index.js index 288203b..6097eb6 100644 --- a/src/index.js +++ b/src/index.js @@ -4,20 +4,41 @@ import { handleStatuspageWebhook } from "./statuspage-webhook.js"; import { handleQueue } from "./queue-consumer.js"; import { handleScheduled } from "./cron-status-check.js"; import { migrateFromSingleKey } from "./kv-store.js"; +import { getMetrics, formatMetricsText } from "./metrics.js"; const app = new Hono(); +/** + * Timing-safe secret validation helper + */ +async function validateSecret(secret, expected) { + const encoder = new TextEncoder(); + const a = encoder.encode(secret); + const b = encoder.encode(expected); + if (a.byteLength !== b.byteLength) return false; + return crypto.subtle.timingSafeEqual(a, b); +} + app.get("/", (c) => c.text("Claude Status Bot is running")); app.post("/webhook/telegram", (c) => handleTelegramWebhook(c)); app.post("/webhook/status/:secret", (c) => handleStatuspageWebhook(c)); +// Metrics endpoint — view bot statistics +app.get("/metrics/:secret", async (c) => { + const secret = c.req.param("secret"); + if (!await validateSecret(secret, c.env.WEBHOOK_SECRET)) { + return c.text("Unauthorized", 401); + } + const metrics = await getMetrics(c.env.claude_status); + const format = c.req.query("format"); + if (format === "json") return c.json(metrics); + return c.text(formatMetricsText(metrics)); +}); + // One-time migration route — remove after migration is confirmed app.get("/migrate/:secret", async (c) => { const secret = c.req.param("secret"); - const encoder = new TextEncoder(); - const a = encoder.encode(secret); - const b = encoder.encode(c.env.WEBHOOK_SECRET); - if (a.byteLength !== b.byteLength || !crypto.subtle.timingSafeEqual(a, b)) { + if (!await validateSecret(secret, c.env.WEBHOOK_SECRET)) { return c.text("Unauthorized", 401); } const count = await migrateFromSingleKey(c.env.claude_status); diff --git a/src/metrics.js b/src/metrics.js new file mode 100644 index 0000000..ccbe9a4 --- /dev/null +++ b/src/metrics.js @@ -0,0 +1,77 @@ +const METRICS_KEY = "metrics"; + +const DEFAULT_METRICS = { + webhooksReceived: 0, + messagesEnqueued: 0, + messagesSent: 0, + messagesFailedPermanent: 0, + messagesRetried: 0, + subscribersRemoved: 0, + cronChecks: 0, + cronChangesDetected: 0, + commandsProcessed: 0, + lastWebhookAt: null, + lastCronAt: null, + startedAt: new Date().toISOString(), +}; + +/** + * Get current metrics from KV + */ +export async function getMetrics(kv) { + const data = await kv.get(METRICS_KEY, "json"); + return data || { ...DEFAULT_METRICS }; +} + +/** + * Increment one or more metric counters and optionally set timestamp fields + */ +export async function trackMetrics(kv, updates) { + const metrics = await getMetrics(kv); + for (const [key, value] of Object.entries(updates)) { + if (typeof value === "number") { + metrics[key] = (metrics[key] || 0) + value; + } else { + metrics[key] = value; + } + } + await kv.put(METRICS_KEY, JSON.stringify(metrics)); +} + +/** + * Format metrics as HTML for Telegram or plain text for API + */ +export function formatMetricsText(metrics) { + const uptime = metrics.startedAt + ? timeSince(new Date(metrics.startedAt)) + : "unknown"; + + return [ + `Webhooks received: ${metrics.webhooksReceived}`, + `Messages enqueued: ${metrics.messagesEnqueued}`, + `Messages sent: ${metrics.messagesSent}`, + `Messages failed: ${metrics.messagesFailedPermanent}`, + `Messages retried: ${metrics.messagesRetried}`, + `Subscribers auto-removed: ${metrics.subscribersRemoved}`, + `Cron checks: ${metrics.cronChecks}`, + `Cron changes detected: ${metrics.cronChangesDetected}`, + `Commands processed: ${metrics.commandsProcessed}`, + `Last webhook: ${metrics.lastWebhookAt || "never"}`, + `Last cron: ${metrics.lastCronAt || "never"}`, + `Tracking since: ${uptime}`, + ].join("\n"); +} + +/** + * Human-readable time duration since a given date + */ +function timeSince(date) { + const seconds = Math.floor((Date.now() - date.getTime()) / 1000); + if (seconds < 60) return `${seconds}s ago`; + const minutes = Math.floor(seconds / 60); + if (minutes < 60) return `${minutes}m ago`; + const hours = Math.floor(minutes / 60); + if (hours < 24) return `${hours}h ${minutes % 60}m ago`; + const days = Math.floor(hours / 24); + return `${days}d ${hours % 24}h ago`; +} diff --git a/src/queue-consumer.js b/src/queue-consumer.js index b6eca0c..b8ff549 100644 --- a/src/queue-consumer.js +++ b/src/queue-consumer.js @@ -1,11 +1,14 @@ import { removeSubscriber } from "./kv-store.js"; import { telegramUrl } from "./telegram-api.js"; +import { trackMetrics } from "./metrics.js"; /** * Process a batch of queued messages, sending each to Telegram. * Handles rate limits (429 → retry), blocked bots (403/400 → remove subscriber). */ export async function handleQueue(batch, env) { + let sent = 0, failed = 0, retried = 0, removed = 0; + for (const msg of batch.messages) { const { chatId, threadId, html } = msg.body; @@ -33,25 +36,33 @@ export async function handleQueue(batch, env) { }); if (res.ok) { + sent++; msg.ack(); } else if (res.status === 403 || res.status === 400) { - // Bot blocked or chat not found — auto-remove subscriber console.log(`Queue: removing subscriber ${chatId}:${threadId} (HTTP ${res.status})`); await removeSubscriber(env.claude_status, chatId, threadId); + removed++; msg.ack(); } else if (res.status === 429) { - // Rate limited — let queue retry later console.log("Queue: rate limited, retrying"); + retried++; msg.retry(); } else { - // Unknown error — ack to avoid infinite retry console.error(`Queue: unexpected HTTP ${res.status} for ${chatId}`); + failed++; msg.ack(); } } catch (err) { - // Network error — retry console.error("Queue: network error, retrying", err); + retried++; msg.retry(); } } + + await trackMetrics(env.claude_status, { + messagesSent: sent, + messagesFailedPermanent: failed, + messagesRetried: retried, + subscribersRemoved: removed, + }); } diff --git a/src/statuspage-webhook.js b/src/statuspage-webhook.js index 2999837..ede53b7 100644 --- a/src/statuspage-webhook.js +++ b/src/statuspage-webhook.js @@ -1,37 +1,6 @@ import { getSubscribersByType } from "./kv-store.js"; import { humanizeStatus, escapeHtml } from "./status-fetcher.js"; - -/** - * Convert hex string to Uint8Array - */ -function hexToBytes(hex) { - const bytes = new Uint8Array(hex.length / 2); - for (let i = 0; i < hex.length; i += 2) { - bytes[i / 2] = parseInt(hex.substr(i, 2), 16); - } - return bytes; -} - -/** - * Verify Statuspage HMAC-SHA256 signature - */ -async function verifyHmacSignature(request, hmacKey) { - if (!hmacKey) return false; - const signature = request.headers.get("X-Statuspage-Signature"); - if (!signature) return false; - - const body = await request.clone().arrayBuffer(); - const key = await crypto.subtle.importKey( - "raw", - new TextEncoder().encode(hmacKey), - { name: "HMAC", hash: "SHA-256" }, - false, - ["verify"] - ); - - const sigBytes = hexToBytes(signature); - return crypto.subtle.verify("HMAC", key, sigBytes, body); -} +import { trackMetrics } from "./metrics.js"; /** * Timing-safe string comparison @@ -79,13 +48,10 @@ function formatComponentMessage(component, update) { * Handle incoming Statuspage webhook */ export async function handleStatuspageWebhook(c) { - // Try HMAC verification first, fall back to URL secret - const hmacValid = await verifyHmacSignature(c.req.raw, c.env.STATUSPAGE_HMAC_KEY); - if (!hmacValid) { - const secret = c.req.param("secret"); - if (!await timingSafeEqual(secret, c.env.WEBHOOK_SECRET)) { - return c.text("Unauthorized", 401); - } + // Validate URL secret (timing-safe) + const secret = c.req.param("secret"); + if (!await timingSafeEqual(secret, c.env.WEBHOOK_SECRET)) { + return c.text("Unauthorized", 401); } // Parse body @@ -127,5 +93,11 @@ export async function handleStatuspageWebhook(c) { console.log(`Enqueued ${messages.length} messages for ${category}${componentName ? `:${componentName}` : ""}`); + await trackMetrics(c.env.claude_status, { + webhooksReceived: 1, + messagesEnqueued: messages.length, + lastWebhookAt: new Date().toISOString(), + }); + return c.text("OK", 200); } diff --git a/wrangler.jsonc b/wrangler.jsonc index cfe66ad..f5189a7 100644 --- a/wrangler.jsonc +++ b/wrangler.jsonc @@ -30,5 +30,4 @@ // Secrets (set via `wrangler secret put`): // BOT_TOKEN - Telegram bot token // WEBHOOK_SECRET - Statuspage webhook URL secret - // STATUSPAGE_HMAC_KEY - HMAC key from Statuspage webhook settings (optional) }