mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 14:48:44 +00:00
f38c16c71e
* test(proxy): add harness for proxy_server.py behavior-pinning Creates tests/test_litellm/proxy/proxy_server/ with: - conftest.py: 11 shared fixtures (app, client, mock_prisma, auth_as, mock_router with parametrized response builders, normalize, etc.) - _coverage_check.py: per-PR coverage gate (line + branch) against a baseline, self-selects target by inspecting which placeholder files have been filled - _pin_check.py: AST-based gate that verifies every pin-list item has >=1 happy + >=1 error test with a real assertion (no status-only) - test_harness_smoke.py: 19 smoke tests covering every fixture + both scripts end-to-end - 26 placeholder test files (one docstring each) reserved for follow-up PRs per the directory ownership in the Notion plan - .coverage_baseline pinned at 0% so future PRs measure deltas against new-tests-only and aren't entangled with the broader scattered test suite Adds a dedicated proxy-server job to test-unit-proxy-endpoints.yml so this directory's runtime + coverage are tracked independently. Plan: https://www.notion.so/36c43b8acdab81ee845fd5365128a2fc * ci(proxy-endpoints): allow workflow_dispatch Lets the workflow be triggered manually on a branch via `gh workflow run`, which is needed for the verify-first flow on workflow changes before opening a PR. * test(proxy): address review feedback on proxy_server harness - conftest.py: anchor sys.path insert to __file__ (Path(__file__).resolve().parents[4]) instead of CWD-relative os.path.abspath("../../../../") which resolved to the wrong directory when pytest is launched from the repo root. - _coverage_check.py: actually read .coverage_baseline and use it as the floor (line_min = max(target, baseline)). Closes the gap between the PR description's "delta semantics" and what the script was doing. With baseline=0.0 today this is a no-op; future PRs that update the baseline cause regressions (test deletions etc.) to trip the gate even if the static PR target is still met. - _pin_check.py: drop unreachable startswith("_") guard (test_*.py glob never yields underscore-prefixed names) and read each test file once instead of twice.
250 lines
8.0 KiB
Python
250 lines
8.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Pin-list gate for the proxy_server.py behavior-pinning project.
|
|
|
|
For each identifier in a pin list, asserts that the test directory contains:
|
|
1. At least one happy-path test that references the identifier and uses
|
|
a real assertion (normalize(response.json()) == {...}, .model_validate,
|
|
or a dict-equality with >= 3 keys).
|
|
2. At least one error-path test (name hints at error OR asserts a 4xx/5xx
|
|
status OR uses pytest.raises).
|
|
3. No test that is "status-only" (its sole assert is on response.status_code).
|
|
|
|
``test_harness_smoke.py`` is ignored (harness self-tests don't count toward
|
|
behavior pinning).
|
|
|
|
Exits 0 on PASS, non-zero on FAIL.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import ast
|
|
import re
|
|
import sys
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Set, Tuple
|
|
|
|
HERE = Path(__file__).resolve().parent
|
|
|
|
PIN_LINE_RE = re.compile(r"^- `([^`]+)`\s*$")
|
|
ERROR_NAME_HINTS = (
|
|
"error",
|
|
"fail",
|
|
"invalid",
|
|
"unauthorized",
|
|
"forbidden",
|
|
"missing",
|
|
"denied",
|
|
"rejected",
|
|
"bad",
|
|
"raises",
|
|
"exception",
|
|
"404",
|
|
"401",
|
|
"403",
|
|
"422",
|
|
"500",
|
|
)
|
|
ERROR_STATUS_CODES = frozenset({400, 401, 402, 403, 404, 405, 409, 422, 500, 502, 503})
|
|
|
|
|
|
@dataclass
|
|
class TestFunction:
|
|
name: str
|
|
file: Path
|
|
source: str
|
|
asserts: List[ast.Assert] = field(default_factory=list)
|
|
raises_calls: int = 0
|
|
status_code_asserts: List[int] = field(default_factory=list)
|
|
has_strong_assertion: bool = (
|
|
False # normalize() or .model_validate() or large dict-eq
|
|
)
|
|
|
|
|
|
def parse_pin_list(path: Path) -> List[str]:
|
|
items: List[str] = []
|
|
for line in path.read_text().splitlines():
|
|
m = PIN_LINE_RE.match(line)
|
|
if m:
|
|
items.append(m.group(1).strip())
|
|
return items
|
|
|
|
|
|
def _has_strong_assertion(node: ast.AST) -> bool:
|
|
"""True if an assert subtree contains normalize(), .model_validate(), or dict-eq with >=3 keys."""
|
|
for sub in ast.walk(node):
|
|
if isinstance(sub, ast.Call):
|
|
func = sub.func
|
|
if isinstance(func, ast.Name) and func.id == "normalize":
|
|
return True
|
|
if isinstance(func, ast.Attribute) and func.attr == "model_validate":
|
|
return True
|
|
if (
|
|
isinstance(sub, ast.Compare)
|
|
and len(sub.ops) == 1
|
|
and isinstance(sub.ops[0], ast.Eq)
|
|
):
|
|
# response.json() == {<dict literal with >= 3 keys>}
|
|
rhs = sub.comparators[0]
|
|
if isinstance(rhs, ast.Dict) and len(rhs.keys) >= 3:
|
|
return True
|
|
return False
|
|
|
|
|
|
def _extract_status_code(node: ast.Assert) -> Optional[int]:
|
|
"""If this assert is exactly ``X.status_code == <int>``, return the int."""
|
|
test = node.test
|
|
if not isinstance(test, ast.Compare):
|
|
return None
|
|
if len(test.ops) != 1 or not isinstance(test.ops[0], ast.Eq):
|
|
return None
|
|
left = test.left
|
|
if not (isinstance(left, ast.Attribute) and left.attr == "status_code"):
|
|
return None
|
|
right = test.comparators[0]
|
|
if isinstance(right, ast.Constant) and isinstance(right.value, int):
|
|
return right.value
|
|
return None
|
|
|
|
|
|
def collect_test_functions(test_dir: Path) -> List[TestFunction]:
|
|
funcs: List[TestFunction] = []
|
|
for path in sorted(test_dir.glob("test_*.py")):
|
|
# Skip the harness's own smoke tests — they don't count toward
|
|
# behavior pinning.
|
|
if path.name == "test_harness_smoke.py":
|
|
continue
|
|
source = path.read_text()
|
|
try:
|
|
tree = ast.parse(source)
|
|
except SyntaxError:
|
|
continue
|
|
for node in ast.walk(tree):
|
|
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
continue
|
|
if not node.name.startswith("test_"):
|
|
continue
|
|
tf = TestFunction(name=node.name, file=path, source=source)
|
|
for sub in ast.walk(node):
|
|
if isinstance(sub, ast.Assert):
|
|
tf.asserts.append(sub)
|
|
sc = _extract_status_code(sub)
|
|
if sc is not None:
|
|
tf.status_code_asserts.append(sc)
|
|
if _has_strong_assertion(sub):
|
|
tf.has_strong_assertion = True
|
|
if isinstance(sub, ast.With):
|
|
for item in sub.items:
|
|
ctx = item.context_expr
|
|
if isinstance(ctx, ast.Call) and isinstance(
|
|
ctx.func, ast.Attribute
|
|
):
|
|
if ctx.func.attr == "raises":
|
|
tf.raises_calls += 1
|
|
funcs.append(tf)
|
|
return funcs
|
|
|
|
|
|
def _is_status_only(tf: TestFunction) -> bool:
|
|
"""A test that has >=1 status_code assert and ALL its asserts are status_code."""
|
|
return len(tf.asserts) >= 1 and len(tf.status_code_asserts) == len(tf.asserts)
|
|
|
|
|
|
def _looks_like_error_test(tf: TestFunction) -> bool:
|
|
name_lower = tf.name.lower()
|
|
if any(hint in name_lower for hint in ERROR_NAME_HINTS):
|
|
return True
|
|
if tf.raises_calls > 0:
|
|
return True
|
|
if any(sc in ERROR_STATUS_CODES for sc in tf.status_code_asserts):
|
|
return True
|
|
return False
|
|
|
|
|
|
def _references_pin(tf: TestFunction, pin: str) -> bool:
|
|
"""Cheap string-contains check against the test function's source.
|
|
|
|
This is intentionally permissive — if the pin identifier (e.g.
|
|
``update_cache`` or ``POST /chat/completions``) appears anywhere in
|
|
the test file we count it. Aliased route paths or parametrize
|
|
cases trigger the same reference.
|
|
"""
|
|
return pin in tf.source
|
|
|
|
|
|
def check(pin_list: List[str], funcs: List[TestFunction]) -> Tuple[bool, List[str]]:
|
|
failures: List[str] = []
|
|
|
|
status_only = [tf for tf in funcs if _is_status_only(tf)]
|
|
for tf in status_only:
|
|
failures.append(
|
|
f"status-only test (only asserts response.status_code): "
|
|
f"{tf.file.name}::{tf.name}"
|
|
)
|
|
|
|
by_pin: Dict[str, List[TestFunction]] = {pin: [] for pin in pin_list}
|
|
for tf in funcs:
|
|
for pin in pin_list:
|
|
if _references_pin(tf, pin):
|
|
by_pin[pin].append(tf)
|
|
|
|
for pin, matches in by_pin.items():
|
|
if not matches:
|
|
failures.append(f"no tests reference pin: {pin}")
|
|
continue
|
|
has_happy = any(
|
|
tf.has_strong_assertion and not _looks_like_error_test(tf) for tf in matches
|
|
)
|
|
has_error = any(_looks_like_error_test(tf) for tf in matches)
|
|
if not has_happy:
|
|
failures.append(
|
|
f"no happy-path test with strong assertion (normalize/model_validate/dict-eq>=3) "
|
|
f"for pin: {pin}"
|
|
)
|
|
if not has_error:
|
|
failures.append(f"no error-path test for pin: {pin}")
|
|
|
|
return (not failures), failures
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument(
|
|
"--list",
|
|
required=True,
|
|
help="Path to pin list file (markdown bullets in `- ` + backtick + symbol + backtick format)",
|
|
)
|
|
parser.add_argument(
|
|
"--test-dir",
|
|
default=str(HERE),
|
|
help="Test directory to scan (default: this directory)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
pin_path = Path(args.list)
|
|
if not pin_path.is_file():
|
|
print(f"FAIL: pin list not found at {pin_path}", file=sys.stderr)
|
|
return 2
|
|
|
|
pin_list = parse_pin_list(pin_path)
|
|
if not pin_list:
|
|
print(f"FAIL: pin list at {pin_path} contained zero items", file=sys.stderr)
|
|
return 2
|
|
|
|
test_dir = Path(args.test_dir)
|
|
funcs = collect_test_functions(test_dir)
|
|
|
|
ok, failures = check(pin_list, funcs)
|
|
print(f"pins: {len(pin_list)}")
|
|
print(f"tests: {len(funcs)}")
|
|
if failures:
|
|
for f in failures:
|
|
print(f" - {f}")
|
|
print("PASS" if ok else "FAIL")
|
|
return 0 if ok else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|