Files
awesome-coding-agents/github.go
T
tiennm99 62cbdd7a4a fix: harden GitHub fetcher and history I/O, add canonical keying
GitHub fetcher (github.go):
- add 30s HTTP client timeout (was http.DefaultClient with no bound)
- chunk GraphQL alias requests at 50 repos to stay clear of abuse detection
- abort the run on any partial GraphQL error or missing repo rather than
  silently shrinking the README and poisoning the next delta
- retry transient failures (network, 5xx, 429) with 2s/4s/8s backoff

History layer (history.go):
- key snapshots by canonical owner/repo from agents.yml instead of the
  rename-resolved NameWithOwner returned by the API; carry a lazy
  migration map so existing aaif-goose/goose entries fold into block/goose
  on next read with no manual data edit
- tighten the 7d delta window to (cutoff-3d, cutoff] so a missed cron week
  no longer mislabels a 90d-old comparison as Delta7d
- replace the snapshots[:0] aliased filter loop with slices.DeleteFunc
- log malformed JSONL lines to stderr with line numbers instead of
  silently skipping them
- write history.jsonl atomically via tmp file + rename so a crash
  mid-write can no longer truncate accumulated history

Plus collapse a few redundant fmt.Errorf wraps, drop a named Config type
that was used once, inline the single-call sortByStars helper with a
deterministic tiebreaker on canonical key, and use filepath.Base instead
of hand-rolling a basename.

Includes unit tests covering the 7d window edges, canonical-key
migration, atomic write path, malformed-line tolerance, YAML validation,
and markdown cell escaping.
2026-05-14 15:49:05 +07:00

216 lines
5.8 KiB
Go

package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"sort"
"strings"
"time"
)
type Stat struct {
// CanonicalKey is owner/repo from agents.yml — stable across renames.
CanonicalKey string
Owner string
Repo string
Category string
Notes string
Description string
Stars int
Language string
PushedAt time.Time
URL string
NameWithOwner string
}
type repoNode struct {
StargazerCount int `json:"stargazerCount"`
Description string `json:"description"`
PrimaryLanguage *struct {
Name string `json:"name"`
} `json:"primaryLanguage"`
PushedAt time.Time `json:"pushedAt"`
URL string `json:"url"`
NameWithOwner string `json:"nameWithOwner"`
}
type graphQLResponse struct {
Data map[string]*repoNode `json:"data"`
Errors []struct {
Message string `json:"message"`
Path []any `json:"path"`
} `json:"errors"`
}
const repoFields = `
stargazerCount
description
primaryLanguage { name }
pushedAt
url
nameWithOwner
`
// httpClient has a timeout to prevent hung workflow jobs.
var httpClient = &http.Client{Timeout: 30 * time.Second}
// chunkSize is the max aliases per GraphQL request (GitHub node-limit safety margin).
const chunkSize = 50
// maxRetries and retry backoff for transient HTTP/network errors.
const maxRetries = 3
var retryBackoff = []time.Duration{2 * time.Second, 4 * time.Second, 8 * time.Second}
// fetchStats queries GitHub GraphQL in chunks of up to chunkSize repos per
// request. Returns an error if any GraphQL errors are present OR if any
// requested repo is missing from the response — better to fail loud than
// silently publish a shorter README.
func fetchStats(token string, agents []Agent) ([]Stat, error) {
collected := make(map[string]*repoNode, len(agents))
for start := 0; start < len(agents); start += chunkSize {
end := start + chunkSize
if end > len(agents) {
end = len(agents)
}
chunk := agents[start:end]
nodes, err := fetchChunk(token, chunk, start)
if err != nil {
return nil, err
}
for k, v := range nodes {
collected[k] = v
}
}
stats := make([]Stat, 0, len(agents))
for i, a := range agents {
alias := fmt.Sprintf("r%d", i)
node := collected[alias]
if node == nil {
return nil, fmt.Errorf("repo %s/%s missing from GraphQL response", a.Owner, a.Repo)
}
lang := ""
if node.PrimaryLanguage != nil {
lang = node.PrimaryLanguage.Name
}
stats = append(stats, Stat{
CanonicalKey: a.Owner + "/" + a.Repo,
Owner: a.Owner,
Repo: a.Repo,
Category: a.Category,
Notes: a.Notes,
Description: node.Description,
Stars: node.StargazerCount,
Language: lang,
PushedAt: node.PushedAt,
URL: node.URL,
NameWithOwner: node.NameWithOwner,
})
}
// Sort by stars descending. Ties are ordered by CanonicalKey for determinism
// regardless of map-iteration or agents.yml order.
sort.Slice(stats, func(i, j int) bool {
if stats[i].Stars != stats[j].Stars {
return stats[i].Stars > stats[j].Stars
}
return stats[i].CanonicalKey < stats[j].CanonicalKey
})
return stats, nil
}
// fetchChunk sends one GraphQL request for a slice of agents. aliasOffset
// ensures alias names (r0, r1, …) are globally unique across chunks.
func fetchChunk(token string, agents []Agent, aliasOffset int) (map[string]*repoNode, error) {
var b strings.Builder
b.WriteString("query {\n")
for i, a := range agents {
fmt.Fprintf(&b, " r%d: repository(owner: %q, name: %q) {%s}\n", aliasOffset+i, a.Owner, a.Repo, repoFields)
}
b.WriteString("}\n")
body, err := json.Marshal(map[string]string{"query": b.String()})
if err != nil {
return nil, err
}
raw, statusCode, err := doWithRetry(token, body)
if err != nil {
return nil, err
}
if statusCode != http.StatusOK {
return nil, fmt.Errorf("graphql HTTP %d: %s", statusCode, raw)
}
var out graphQLResponse
if err := json.Unmarshal(raw, &out); err != nil {
return nil, fmt.Errorf("decode response: %w (body=%s)", err, raw)
}
// Treat any GraphQL-level error as fatal — partial data silently shrinks
// the README and corrupts future delta math.
if len(out.Errors) > 0 {
msgs := make([]string, len(out.Errors))
for i, e := range out.Errors {
msgs[i] = fmt.Sprintf("%s (path=%v)", e.Message, e.Path)
}
return nil, fmt.Errorf("graphql errors: %s", strings.Join(msgs, "; "))
}
return out.Data, nil
}
// doWithRetry executes the GraphQL POST with exponential backoff on transient
// errors (network failures, HTTP 5xx, HTTP 429). 4xx other than 429 are not
// retried.
func doWithRetry(token string, body []byte) ([]byte, int, error) {
var lastErr error
for attempt := 0; attempt < maxRetries; attempt++ {
if attempt > 0 {
log.Printf("retry attempt %d after %v: %v", attempt, retryBackoff[attempt-1], lastErr)
time.Sleep(retryBackoff[attempt-1])
}
req, err := http.NewRequest("POST", "https://api.github.com/graphql", bytes.NewReader(body))
if err != nil {
return nil, 0, err
}
req.Header.Set("Authorization", "Bearer "+token)
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", "awesome-coding-agents-updater")
resp, err := httpClient.Do(req)
if err != nil {
lastErr = err
continue // network error — retry
}
raw, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
lastErr = fmt.Errorf("read body: %w", err)
continue
}
sc := resp.StatusCode
if sc == http.StatusOK {
return raw, sc, nil
}
if sc == http.StatusTooManyRequests || sc >= 500 {
lastErr = fmt.Errorf("HTTP %d: %s", sc, raw)
continue // retryable
}
// 4xx (except 429) — not retryable
return raw, sc, nil
}
return nil, 0, fmt.Errorf("all %d attempts failed; last error: %w", maxRetries, lastErr)
}