mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 16:48:54 +00:00
231 lines
6.9 KiB
Python
Executable File
231 lines
6.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Detect and close duplicate GitHub issues using title similarity.
|
|
|
|
Modes:
|
|
--scan Compare all open issues against each other (batch)
|
|
--issue-number N Check a single issue against older open issues
|
|
|
|
Requires the `gh` CLI to be authenticated.
|
|
"""
|
|
|
|
import argparse
|
|
import difflib
|
|
import json
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
|
|
def normalize_title(title: str) -> str:
|
|
"""Strip common prefixes, lowercase, and collapse whitespace."""
|
|
title = re.sub(
|
|
r"^\[?(bug|feature request|enhancement|question|docs)[:\]]?\s*",
|
|
"",
|
|
title,
|
|
flags=re.IGNORECASE,
|
|
)
|
|
return " ".join(title.lower().split())
|
|
|
|
|
|
def gh(*args: str) -> str:
|
|
"""Run a gh CLI command and return stdout."""
|
|
result = subprocess.run(
|
|
["gh", *args],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
return result.stdout
|
|
|
|
|
|
def fetch_open_issues(repo: str | None) -> list[dict]:
|
|
"""Fetch all open issues (excluding PRs) via gh api --paginate."""
|
|
if repo:
|
|
endpoint = (
|
|
f"repos/{repo}/issues?state=open&per_page=100&sort=created&direction=asc"
|
|
)
|
|
else:
|
|
endpoint = "repos/{owner}/{repo}/issues?state=open&per_page=100&sort=created&direction=asc"
|
|
cmd = ["api", "--paginate", endpoint]
|
|
|
|
raw = gh(*cmd)
|
|
# gh --paginate concatenates JSON arrays, so we may get multiple arrays
|
|
issues = []
|
|
for line in raw.strip().splitlines():
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
parsed = json.loads(line)
|
|
if isinstance(parsed, list):
|
|
issues.extend(parsed)
|
|
else:
|
|
issues.append(parsed)
|
|
|
|
# Filter out pull requests (they also appear in the issues endpoint)
|
|
return [i for i in issues if "pull_request" not in i]
|
|
|
|
|
|
def close_as_duplicate(
|
|
issue_number: int, duplicate_of: int, repo: str | None, dry_run: bool
|
|
) -> None:
|
|
"""Close an issue as duplicate of another, adding a comment and label."""
|
|
repo_args = ["--repo", repo] if repo else []
|
|
|
|
if dry_run:
|
|
print(
|
|
f" [DRY RUN] Would close #{issue_number} as duplicate of #{duplicate_of}"
|
|
)
|
|
return
|
|
|
|
# Add comment
|
|
comment_body = (
|
|
f"Closing as duplicate of #{duplicate_of}.\n\n"
|
|
"If you believe this is not a duplicate, please reopen and add context "
|
|
"explaining how this differs."
|
|
)
|
|
gh("issue", "comment", str(issue_number), "--body", comment_body, *repo_args)
|
|
|
|
# Add label
|
|
gh("issue", "edit", str(issue_number), "--add-label", "duplicate", *repo_args)
|
|
|
|
# Close with not_planned reason
|
|
gh(
|
|
"api",
|
|
f"repos/{repo or '{owner}/{repo}'}/issues/{issue_number}",
|
|
"-X",
|
|
"PATCH",
|
|
"-f",
|
|
"state=closed",
|
|
"-f",
|
|
"state_reason=not_planned",
|
|
)
|
|
|
|
print(f" Closed #{issue_number} as duplicate of #{duplicate_of}")
|
|
|
|
|
|
def find_duplicate(
|
|
issue: dict, candidates: list[dict], threshold: float
|
|
) -> dict | None:
|
|
"""Return the first candidate whose normalized title is above threshold."""
|
|
norm = normalize_title(issue["title"])
|
|
for candidate in candidates:
|
|
if candidate["number"] == issue["number"]:
|
|
continue
|
|
cand_norm = normalize_title(candidate["title"])
|
|
ratio = difflib.SequenceMatcher(None, norm, cand_norm).ratio()
|
|
if ratio >= threshold:
|
|
return candidate
|
|
return None
|
|
|
|
|
|
def scan_all(
|
|
issues: list[dict], threshold: float, repo: str | None, dry_run: bool
|
|
) -> int:
|
|
"""Compare every issue against all older issues. Returns count of duplicates found."""
|
|
# Sort oldest first
|
|
issues.sort(key=lambda i: i["number"])
|
|
closed_count = 0
|
|
|
|
for idx, issue in enumerate(issues):
|
|
older = issues[:idx]
|
|
if not older:
|
|
continue
|
|
dup = find_duplicate(issue, older, threshold)
|
|
if dup:
|
|
ratio = difflib.SequenceMatcher(
|
|
None,
|
|
normalize_title(issue["title"]),
|
|
normalize_title(dup["title"]),
|
|
).ratio()
|
|
print(
|
|
f"#{issue['number']}: \"{issue['title']}\"\n"
|
|
f" -> duplicate of #{dup['number']}: \"{dup['title']}\" "
|
|
f"({ratio:.0%} similar)"
|
|
)
|
|
close_as_duplicate(issue["number"], dup["number"], repo, dry_run)
|
|
closed_count += 1
|
|
|
|
return closed_count
|
|
|
|
|
|
def check_single(
|
|
issue_number: int,
|
|
issues: list[dict],
|
|
threshold: float,
|
|
repo: str | None,
|
|
dry_run: bool,
|
|
) -> bool:
|
|
"""Check a single issue against all older open issues. Returns True if duplicate found."""
|
|
target = None
|
|
for i in issues:
|
|
if i["number"] == issue_number:
|
|
target = i
|
|
break
|
|
|
|
if target is None:
|
|
print(f"Issue #{issue_number} not found among open issues.")
|
|
return False
|
|
|
|
older = [i for i in issues if i["number"] < issue_number]
|
|
dup = find_duplicate(target, older, threshold)
|
|
if dup:
|
|
ratio = difflib.SequenceMatcher(
|
|
None,
|
|
normalize_title(target["title"]),
|
|
normalize_title(dup["title"]),
|
|
).ratio()
|
|
print(
|
|
f"#{target['number']}: \"{target['title']}\"\n"
|
|
f" -> duplicate of #{dup['number']}: \"{dup['title']}\" "
|
|
f"({ratio:.0%} similar)"
|
|
)
|
|
close_as_duplicate(issue_number, dup["number"], repo, dry_run)
|
|
return True
|
|
|
|
print(f"#{issue_number}: no duplicate found above threshold {threshold}")
|
|
return False
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="Detect and close duplicate GitHub issues"
|
|
)
|
|
mode = parser.add_mutually_exclusive_group(required=True)
|
|
mode.add_argument("--scan", action="store_true", help="Scan all open issues")
|
|
mode.add_argument("--issue-number", type=int, help="Check a single issue number")
|
|
parser.add_argument(
|
|
"--threshold", type=float, default=0.85, help="Similarity threshold (0-1)"
|
|
)
|
|
parser.add_argument(
|
|
"--close",
|
|
action="store_true",
|
|
help="Actually close duplicates (default is dry-run)",
|
|
)
|
|
parser.add_argument(
|
|
"--repo", type=str, help="Repository (owner/repo). Auto-detected if omitted."
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
dry_run = not args.close
|
|
|
|
if dry_run:
|
|
print("=== DRY RUN MODE (pass --close to actually close issues) ===\n")
|
|
|
|
print("Fetching open issues...")
|
|
issues = fetch_open_issues(args.repo)
|
|
print(f"Found {len(issues)} open issues.\n")
|
|
|
|
if args.scan:
|
|
count = scan_all(issues, args.threshold, args.repo, dry_run)
|
|
print(f"\nTotal duplicates {'found' if dry_run else 'closed'}: {count}")
|
|
else:
|
|
found = check_single(
|
|
args.issue_number, issues, args.threshold, args.repo, dry_run
|
|
)
|
|
sys.exit(0 if found else 0) # Always exit 0; finding no dup is not an error
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|