mirror of
https://github.com/tiennm99/awesome-coding-agents.git
synced 2026-05-25 05:38:59 +00:00
62cbdd7a4a
GitHub fetcher (github.go): - add 30s HTTP client timeout (was http.DefaultClient with no bound) - chunk GraphQL alias requests at 50 repos to stay clear of abuse detection - abort the run on any partial GraphQL error or missing repo rather than silently shrinking the README and poisoning the next delta - retry transient failures (network, 5xx, 429) with 2s/4s/8s backoff History layer (history.go): - key snapshots by canonical owner/repo from agents.yml instead of the rename-resolved NameWithOwner returned by the API; carry a lazy migration map so existing aaif-goose/goose entries fold into block/goose on next read with no manual data edit - tighten the 7d delta window to (cutoff-3d, cutoff] so a missed cron week no longer mislabels a 90d-old comparison as Delta7d - replace the snapshots[:0] aliased filter loop with slices.DeleteFunc - log malformed JSONL lines to stderr with line numbers instead of silently skipping them - write history.jsonl atomically via tmp file + rename so a crash mid-write can no longer truncate accumulated history Plus collapse a few redundant fmt.Errorf wraps, drop a named Config type that was used once, inline the single-call sortByStars helper with a deterministic tiebreaker on canonical key, and use filepath.Base instead of hand-rolling a basename. Includes unit tests covering the 7d window edges, canonical-key migration, atomic write path, malformed-line tolerance, YAML validation, and markdown cell escaping.
216 lines
5.8 KiB
Go
216 lines
5.8 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type Stat struct {
|
|
// CanonicalKey is owner/repo from agents.yml — stable across renames.
|
|
CanonicalKey string
|
|
Owner string
|
|
Repo string
|
|
Category string
|
|
Notes string
|
|
Description string
|
|
Stars int
|
|
Language string
|
|
PushedAt time.Time
|
|
URL string
|
|
NameWithOwner string
|
|
}
|
|
|
|
type repoNode struct {
|
|
StargazerCount int `json:"stargazerCount"`
|
|
Description string `json:"description"`
|
|
PrimaryLanguage *struct {
|
|
Name string `json:"name"`
|
|
} `json:"primaryLanguage"`
|
|
PushedAt time.Time `json:"pushedAt"`
|
|
URL string `json:"url"`
|
|
NameWithOwner string `json:"nameWithOwner"`
|
|
}
|
|
|
|
type graphQLResponse struct {
|
|
Data map[string]*repoNode `json:"data"`
|
|
Errors []struct {
|
|
Message string `json:"message"`
|
|
Path []any `json:"path"`
|
|
} `json:"errors"`
|
|
}
|
|
|
|
const repoFields = `
|
|
stargazerCount
|
|
description
|
|
primaryLanguage { name }
|
|
pushedAt
|
|
url
|
|
nameWithOwner
|
|
`
|
|
|
|
// httpClient has a timeout to prevent hung workflow jobs.
|
|
var httpClient = &http.Client{Timeout: 30 * time.Second}
|
|
|
|
// chunkSize is the max aliases per GraphQL request (GitHub node-limit safety margin).
|
|
const chunkSize = 50
|
|
|
|
// maxRetries and retry backoff for transient HTTP/network errors.
|
|
const maxRetries = 3
|
|
|
|
var retryBackoff = []time.Duration{2 * time.Second, 4 * time.Second, 8 * time.Second}
|
|
|
|
// fetchStats queries GitHub GraphQL in chunks of up to chunkSize repos per
|
|
// request. Returns an error if any GraphQL errors are present OR if any
|
|
// requested repo is missing from the response — better to fail loud than
|
|
// silently publish a shorter README.
|
|
func fetchStats(token string, agents []Agent) ([]Stat, error) {
|
|
collected := make(map[string]*repoNode, len(agents))
|
|
|
|
for start := 0; start < len(agents); start += chunkSize {
|
|
end := start + chunkSize
|
|
if end > len(agents) {
|
|
end = len(agents)
|
|
}
|
|
chunk := agents[start:end]
|
|
|
|
nodes, err := fetchChunk(token, chunk, start)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for k, v := range nodes {
|
|
collected[k] = v
|
|
}
|
|
}
|
|
|
|
stats := make([]Stat, 0, len(agents))
|
|
for i, a := range agents {
|
|
alias := fmt.Sprintf("r%d", i)
|
|
node := collected[alias]
|
|
if node == nil {
|
|
return nil, fmt.Errorf("repo %s/%s missing from GraphQL response", a.Owner, a.Repo)
|
|
}
|
|
lang := ""
|
|
if node.PrimaryLanguage != nil {
|
|
lang = node.PrimaryLanguage.Name
|
|
}
|
|
stats = append(stats, Stat{
|
|
CanonicalKey: a.Owner + "/" + a.Repo,
|
|
Owner: a.Owner,
|
|
Repo: a.Repo,
|
|
Category: a.Category,
|
|
Notes: a.Notes,
|
|
Description: node.Description,
|
|
Stars: node.StargazerCount,
|
|
Language: lang,
|
|
PushedAt: node.PushedAt,
|
|
URL: node.URL,
|
|
NameWithOwner: node.NameWithOwner,
|
|
})
|
|
}
|
|
|
|
// Sort by stars descending. Ties are ordered by CanonicalKey for determinism
|
|
// regardless of map-iteration or agents.yml order.
|
|
sort.Slice(stats, func(i, j int) bool {
|
|
if stats[i].Stars != stats[j].Stars {
|
|
return stats[i].Stars > stats[j].Stars
|
|
}
|
|
return stats[i].CanonicalKey < stats[j].CanonicalKey
|
|
})
|
|
|
|
return stats, nil
|
|
}
|
|
|
|
// fetchChunk sends one GraphQL request for a slice of agents. aliasOffset
|
|
// ensures alias names (r0, r1, …) are globally unique across chunks.
|
|
func fetchChunk(token string, agents []Agent, aliasOffset int) (map[string]*repoNode, error) {
|
|
var b strings.Builder
|
|
b.WriteString("query {\n")
|
|
for i, a := range agents {
|
|
fmt.Fprintf(&b, " r%d: repository(owner: %q, name: %q) {%s}\n", aliasOffset+i, a.Owner, a.Repo, repoFields)
|
|
}
|
|
b.WriteString("}\n")
|
|
|
|
body, err := json.Marshal(map[string]string{"query": b.String()})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
raw, statusCode, err := doWithRetry(token, body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if statusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("graphql HTTP %d: %s", statusCode, raw)
|
|
}
|
|
|
|
var out graphQLResponse
|
|
if err := json.Unmarshal(raw, &out); err != nil {
|
|
return nil, fmt.Errorf("decode response: %w (body=%s)", err, raw)
|
|
}
|
|
|
|
// Treat any GraphQL-level error as fatal — partial data silently shrinks
|
|
// the README and corrupts future delta math.
|
|
if len(out.Errors) > 0 {
|
|
msgs := make([]string, len(out.Errors))
|
|
for i, e := range out.Errors {
|
|
msgs[i] = fmt.Sprintf("%s (path=%v)", e.Message, e.Path)
|
|
}
|
|
return nil, fmt.Errorf("graphql errors: %s", strings.Join(msgs, "; "))
|
|
}
|
|
|
|
return out.Data, nil
|
|
}
|
|
|
|
// doWithRetry executes the GraphQL POST with exponential backoff on transient
|
|
// errors (network failures, HTTP 5xx, HTTP 429). 4xx other than 429 are not
|
|
// retried.
|
|
func doWithRetry(token string, body []byte) ([]byte, int, error) {
|
|
var lastErr error
|
|
for attempt := 0; attempt < maxRetries; attempt++ {
|
|
if attempt > 0 {
|
|
log.Printf("retry attempt %d after %v: %v", attempt, retryBackoff[attempt-1], lastErr)
|
|
time.Sleep(retryBackoff[attempt-1])
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", "https://api.github.com/graphql", bytes.NewReader(body))
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("User-Agent", "awesome-coding-agents-updater")
|
|
|
|
resp, err := httpClient.Do(req)
|
|
if err != nil {
|
|
lastErr = err
|
|
continue // network error — retry
|
|
}
|
|
|
|
raw, err := io.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
if err != nil {
|
|
lastErr = fmt.Errorf("read body: %w", err)
|
|
continue
|
|
}
|
|
|
|
sc := resp.StatusCode
|
|
if sc == http.StatusOK {
|
|
return raw, sc, nil
|
|
}
|
|
if sc == http.StatusTooManyRequests || sc >= 500 {
|
|
lastErr = fmt.Errorf("HTTP %d: %s", sc, raw)
|
|
continue // retryable
|
|
}
|
|
// 4xx (except 429) — not retryable
|
|
return raw, sc, nil
|
|
}
|
|
return nil, 0, fmt.Errorf("all %d attempts failed; last error: %w", maxRetries, lastErr)
|
|
}
|