test(vcr): classify cache verdicts, detect live calls, surface cost leaks

Convert the per-test VCR verdict line from a single 'NOOP / HIT / MISS /
PARTIAL' tag into a classified outcome that distinguishes the cases that
silently bill the live API on every CI run from the ones that don't:

  HIT                         pure replay
  PARTIAL                     mixed replay + new recordings
  MISS:RECORDED               new cassette saved to Redis (cached next run)
  MISS:OVERFLOW               cassette > MAX_EPISODES_PER_CASSETTE; persister
                              refused to save; re-bills every run
  MISS:NOT_PERSISTED          test failed; save_cassette skipped; re-bills
  NOOP                        VCR-marked but no HTTP traffic (mocked elsewhere)
  UNMARKED:LIVE_CALL          test bypassed VCR AND opened a TCP connection
                              to a known LLM provider host -> wasted spend
  UNMARKED:NO_TRAFFIC         test bypassed VCR but didn't call out

The UNMARKED:LIVE_CALL signal is what converts 'this test probably hits
live' into 'this test connected to api.openai.com'. We install a
socket.connect / socket.create_connection wrapper for the duration of
each non-VCR-marked test and record any outbound TCP to a known LLM
provider hostname. The probe sits below the httpx layer so vcrpy and
respx (which both patch above the socket) are unaffected.

Replace the file-level _RESPX_CONFLICTING_FILES blacklists in the
llm_translation and local_testing conftests with per-item respx
detection in apply_vcr_auto_marker_to_items. A test now skips VCR when
it actually carries @pytest.mark.respx or has respx_mock in its fixture
chain - not just because some other test in the same file imports
MockRouter. Items skipped by skip_files are split into respx_conflict
(real conflict, the module wires up respx) vs file_opt_out (dead skip-
list entry whose module never touches respx) so the session summary
makes pruning obvious.

Stabilize the AWS SigV4 fingerprint: the Authorization header on
Bedrock requests rotates its Credential date and Signature on every
call, which previously pushed every Bedrock test past the 50-episode
overflow threshold. Extract the access-key id only
('aws-sigv4:AKIA...') so two requests with the same identity match.

Always emit verdict logging when VCR is active (set
LITELLM_VCR_VERBOSE=0 to opt back into the legacy quiet mode). Add a
session-end classification summary that lists overflow tests, unmarked
live-call tests, and the skip-reason breakdown.

Wire the live-call probe + summary hook into every test directory that
already uses the Redis-backed VCR cache (audio_tests, guardrails_tests,
image_gen_tests, litellm_utils_tests, llm_responses_api_testing,
llm_translation, local_testing, logging_callback_tests, ocr_tests,
pass_through_unit_tests, router_unit_tests, search_tests,
unified_google_tests).

Add tests/llm_translation/test_vcr_classification.py covering the
verdict classifier, skip-reason tagging, AWS SigV4 fingerprint stability,
live-host classification, and session summary rendering.

Co-authored-by: Mateo Wang <mateo-berri@users.noreply.github.com>
This commit is contained in:
Cursor Agent
2026-05-13 00:31:47 +00:00
parent 0deffd3618
commit b637d9f64a
15 changed files with 1135 additions and 39 deletions
+507 -13
View File
@@ -10,20 +10,22 @@ import hashlib
import json
import os
import re
import socket
import sys
from collections import defaultdict
from typing import Iterable
import pytest
from tests._vcr_redis_persister import (
MAX_EPISODES_PER_CASSETTE,
VCR_VERBOSE_ENV,
cassette_cache_capacity_snapshot,
cassette_cache_health,
filter_non_2xx_response,
format_vcr_verdict,
make_redis_persister,
mark_test_outcome_for_cassette,
patch_vcrpy_aiohttp_record_path,
vcr_verbose_enabled,
)
CASSETTE_CACHE_HIGH_WATER_FRACTION = 0.85
@@ -231,6 +233,29 @@ def _iter_header_values(headers, name: str):
yield value
_AWS_SIGV4_CREDENTIAL_RE = re.compile(
r"AWS4-HMAC-SHA256\s+Credential=([^/\s,]+)/", re.IGNORECASE
)
def _stable_key_value(header_name: str, raw: str) -> str:
"""Return a *stable* identifier for a credential header.
For Bearer / API-key headers the entire value is stable across calls,
so we hash it as-is. For AWS SigV4 ``Authorization`` headers, only
the access-key portion of ``Credential=AKIA.../<DATE>/...`` is stable
— date, region, signed headers, and signature all rotate per request,
so hashing the full value would push every Bedrock request into a new
cassette episode. Extract just the access-key id when present.
"""
if header_name.lower() != "authorization":
return raw
match = _AWS_SIGV4_CREDENTIAL_RE.search(raw)
if match:
return f"aws-sigv4:{match.group(1)}"
return raw
def _compute_key_fingerprint(request) -> str:
headers = getattr(request, "headers", None)
parts: list[str] = []
@@ -242,7 +267,8 @@ def _compute_key_fingerprint(request) -> str:
text = text.strip()
if not text:
continue
parts.append(f"{header_name}={text}")
stable = _stable_key_value(header_name, text)
parts.append(f"{header_name}={stable}")
if not parts:
return "no-key"
digest = hashlib.sha256("\n".join(parts).encode("utf-8")).hexdigest()
@@ -470,6 +496,114 @@ def register_persister_if_enabled(vcr) -> None:
_atexit_banner_registered = True
VCR_SKIP_REASON_USER_ATTR = "vcr_skip_reason"
# Marker reasons recorded per-item / per-test for the session summary.
SKIP_REASON_RESPX = "respx_conflict"
SKIP_REASON_RESPX_MODULE = "respx_conflict_module"
SKIP_REASON_INCOMPATIBLE = "incompatible"
SKIP_REASON_FILE_OPT_OUT = "file_opt_out"
SKIP_REASON_DISABLED = "disabled"
SKIP_REASON_PRE_MARKED = "already_marked"
# Hostnames we consider an "expensive live call" if a non-VCR-marked test
# happens to hit them. Localhost/redis/databases are explicitly excluded.
_LIVE_CALL_HOST_SUFFIXES = (
".openai.com",
".anthropic.com",
".vertexai.googleapis.com",
".aiplatform.googleapis.com",
".googleapis.com",
".bedrock-runtime.amazonaws.com",
".x.ai",
".cohere.ai",
".cohere.com",
".voyageai.com",
".perplexity.ai",
".mistral.ai",
".groq.com",
".huggingface.co",
".azure.com",
".tavily.com",
".serper.dev",
".searchapi.io",
".firecrawl.dev",
".exa.ai",
)
_LIVE_CALL_LOCAL_PREFIXES = (
"127.",
"localhost",
"::1",
"0.0.0.0",
"10.",
"172.16.",
"172.17.",
"172.18.",
"172.19.",
"172.20.",
"192.168.",
)
def _module_uses_respx(item) -> bool:
"""Return True if the test's *module* actually wires up respx.
A bare ``from respx import MockRouter`` import (with no actual usage)
does not patch the httpx transport, so it does not conflict with vcrpy.
We confirm by checking the module's source for any of:
- ``@pytest.mark.respx``
- ``@respx.mock`` / ``with respx.mock``
- ``respx_mock`` fixture name
"""
module = getattr(item, "module", None)
src_file = getattr(module, "__file__", None)
if not src_file or not os.path.isfile(src_file):
return False
try:
with open(src_file, encoding="utf-8") as f:
src = f.read()
except OSError:
return False
if "respx_mock" in src:
return True
if "@pytest.mark.respx" in src or "@respx.mock" in src:
return True
if "respx.mock" in src or "with respx" in src:
return True
return False
def _item_uses_respx(item) -> bool:
"""Return True if *this specific item* will trigger respx.
Two signals: the ``respx`` pytest marker, and the ``respx_mock``
fixture appearing in the item's resolved fixture chain. Either alone
causes vcrpy + respx to fight over the httpx transport.
"""
if item.get_closest_marker("respx") is not None:
return True
fixturenames = getattr(item, "fixturenames", None) or ()
if "respx_mock" in fixturenames:
return True
return False
# Cache the source-scan result so we don't reread each module per item.
_RESPX_MODULE_CACHE: dict[str, bool] = {}
def _module_path_uses_respx(item) -> bool:
src_file = str(getattr(item, "path", "") or "")
if not src_file:
return False
cached = _RESPX_MODULE_CACHE.get(src_file)
if cached is not None:
return cached
result = _module_uses_respx(item)
_RESPX_MODULE_CACHE[src_file] = result
return result
def apply_vcr_auto_marker_to_items(
items,
*,
@@ -478,26 +612,232 @@ def apply_vcr_auto_marker_to_items(
) -> None:
"""Auto-apply ``pytest.mark.vcr`` to collected items.
``skip_files`` are basenames to leave un-marked (e.g. respx-using
files, since respx and vcrpy both patch the httpx transport).
``skip_nodeid_suffixes`` are node-id suffixes for individual tests
that depend on live cross-call provider state.
Skip semantics (in priority order):
1. ``vcr_disabled()`` — global env-var off-switch (``LITELLM_VCR_DISABLE=1``
or no ``CASSETTE_REDIS_URL``).
2. Item already carries ``@pytest.mark.vcr`` — leave it alone.
3. Item triggers respx (per-item marker / fixture) — vcrpy and respx
both patch the httpx transport so applying both makes one silently
no-op. We tag the item ``vcr_skip_reason=respx_conflict``.
4. Module wires up respx anywhere — even tests in the file that don't
themselves use respx still inherit the patched transport when
respx fixtures activate at session level. Tagged
``respx_conflict_module``.
5. ``skip_files`` / ``skip_nodeid_suffixes`` opt-out lists from the
caller — used for tests that observe live cross-call provider state
(e.g. prompt-cache warmup) which deterministic replay can't model.
Tagged ``incompatible``.
Each skipped item gets a ``vcr_skip_reason`` attribute so the
session-end summary can show why it isn't cached.
"""
if vcr_disabled():
for item in items:
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_DISABLED)
return
skip_files = frozenset(skip_files)
skip_nodeid_suffixes = tuple(skip_nodeid_suffixes)
for item in items:
if item.get_closest_marker("vcr") is not None:
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_PRE_MARKED)
continue
if _item_uses_respx(item):
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_RESPX)
continue
filename = os.path.basename(str(item.path))
if filename in skip_files:
# Trust the caller's opt-out, but split by reason: if the
# module actually uses respx, label the conflict precisely so
# the summary surfaces dead respx imports vs. real conflicts.
if _module_path_uses_respx(item):
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_RESPX_MODULE)
else:
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_FILE_OPT_OUT)
continue
if any(item.nodeid.endswith(suffix) for suffix in skip_nodeid_suffixes):
continue
if item.get_closest_marker("vcr") is not None:
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_INCOMPATIBLE)
continue
item.add_marker(pytest.mark.vcr)
# ---------------------------------------------------------------------------
# Per-test stats accumulator + verdict classification.
#
# The session-end summary needs richer signal than the line-level verdict:
# - which tests overflowed ``MAX_EPISODES_PER_CASSETTE`` (cassette refused
# to save → live calls every CI run);
# - which tests fired live HTTP at a real LLM endpoint while VCR was not
# active for them (genuine wasted spend, not just "test mocked elsewhere");
# - skip-reason buckets so we can tell respx-conflict from
# incompatible-by-design from "module imports respx but never uses it".
# ---------------------------------------------------------------------------
# Verdict tags used in the per-test logline AND in the session summary
# breakdown.
VERDICT_HIT = "VCR HIT"
VERDICT_MISS_RECORDED = "VCR MISS:RECORDED"
VERDICT_MISS_OVERFLOW = "VCR MISS:OVERFLOW"
VERDICT_MISS_NOT_PERSISTED = "VCR MISS:NOT_PERSISTED"
VERDICT_PARTIAL = "VCR PARTIAL"
VERDICT_NOOP_NO_TRAFFIC = "VCR NOOP"
VERDICT_UNMARKED_LIVE_CALL = "VCR UNMARKED:LIVE_CALL"
VERDICT_UNMARKED_NO_TRAFFIC = "VCR UNMARKED:NO_TRAFFIC"
VERDICT_DISABLED = "VCR DISABLED"
# Per-session stats. Cleared by ``_reset_session_stats`` for unit tests.
_session_stats = {
"verdict_counts": defaultdict(int),
"overflow_tests": [], # list of nodeids
"unmarked_live_call_tests": [], # list of (nodeid, hosts)
"skip_reason_counts": defaultdict(int),
"skip_reason_examples": defaultdict(list),
}
def _reset_session_stats() -> None:
_session_stats["verdict_counts"].clear()
_session_stats["overflow_tests"].clear()
_session_stats["unmarked_live_call_tests"].clear()
_session_stats["skip_reason_counts"].clear()
_session_stats["skip_reason_examples"].clear()
def session_stats_snapshot() -> dict:
"""Read-only copy of the per-session VCR stats. Used by the summary."""
return {
"verdict_counts": dict(_session_stats["verdict_counts"]),
"overflow_tests": list(_session_stats["overflow_tests"]),
"unmarked_live_call_tests": list(_session_stats["unmarked_live_call_tests"]),
"skip_reason_counts": dict(_session_stats["skip_reason_counts"]),
"skip_reason_examples": {
k: list(v) for k, v in _session_stats["skip_reason_examples"].items()
},
}
def _classify_marked_test(cassette) -> str:
"""Map cassette state → verdict tag for tests that *were* VCR-marked."""
played = getattr(cassette, "play_count", 0) or 0
dirty = getattr(cassette, "dirty", False)
total = len(cassette) if hasattr(cassette, "__len__") else 0
# "OVERFLOW" mirrors ``_RedisPersister.save_cassette``'s
# ``> MAX_EPISODES_PER_CASSETTE`` guard. Cassettes that hit this
# threshold are refused for save, so the test re-records live every
# run.
if total > MAX_EPISODES_PER_CASSETTE:
return VERDICT_MISS_OVERFLOW
if played == 0 and not dirty:
return VERDICT_NOOP_NO_TRAFFIC
if played > 0 and not dirty:
return VERDICT_HIT
if played == 0 and dirty:
return VERDICT_MISS_RECORDED
return VERDICT_PARTIAL
def _format_verdict_line(verdict: str, cassette, extra: str = "") -> str:
if cassette is None:
return f"[{verdict}]{(' ' + extra) if extra else ''}"
played = getattr(cassette, "play_count", 0) or 0
total = len(cassette) if hasattr(cassette, "__len__") else 0
base = f"[{verdict}] played={played} entries={total}"
if extra:
base = f"{base} {extra}"
return base
# ---------------------------------------------------------------------------
# Live-call detection for tests that bypass VCR.
#
# When a test isn't VCR-marked (respx_conflict, incompatible, or just
# plain unmarked), we wrap its socket calls inside the autouse
# ``_vcr_outcome_gate`` fixture so we can flag any outbound TCP connection
# to a known LLM provider. This converts "likely live call" into
# "confirmed: this test connected to host X".
# ---------------------------------------------------------------------------
_LIVE_CALL_PROBE_INSTALLED = False
_LIVE_CALL_BUFFER_KEY = "vcr_live_call_hosts"
def _is_live_call_host(host: str) -> bool:
if not host:
return False
host = host.lower()
if any(host.startswith(p) for p in _LIVE_CALL_LOCAL_PREFIXES):
return False
return any(host.endswith(suffix) for suffix in _LIVE_CALL_HOST_SUFFIXES)
class _LiveCallProbe:
"""Context manager that monkeypatches ``socket.create_connection`` and
``socket.socket.connect`` for the lifetime of a test, recording any
outbound TCP connection to a known LLM host.
We don't intercept HTTP at the application layer because that would
fight with vcrpy/respx in tests that *do* mock httpx — the socket
layer is below both, so this probe is safe regardless of what's
patched above it. We also don't raise: the goal is observability, not
a hard gate.
"""
def __init__(self) -> None:
self.hosts: list[str] = []
self._orig_create_connection = None
self._orig_socket_connect = None
def __enter__(self):
self._orig_create_connection = socket.create_connection
self._orig_socket_connect = socket.socket.connect
def _wrapped_create_connection(address, *args, **kwargs):
try:
host = address[0] if isinstance(address, tuple) else None
if host and _is_live_call_host(host) and host not in self.hosts:
self.hosts.append(host)
except Exception:
pass
return self._orig_create_connection(address, *args, **kwargs)
def _wrapped_socket_connect(sock_self, address):
try:
host = address[0] if isinstance(address, tuple) else None
if host and _is_live_call_host(host) and host not in self.hosts:
self.hosts.append(host)
except Exception:
pass
return self._orig_socket_connect(sock_self, address)
socket.create_connection = _wrapped_create_connection
socket.socket.connect = _wrapped_socket_connect
return self
def __exit__(self, *exc):
if self._orig_create_connection is not None:
socket.create_connection = self._orig_create_connection
if self._orig_socket_connect is not None:
socket.socket.connect = self._orig_socket_connect
return False
def vcr_outcome_logging_enabled() -> bool:
"""Verdict logging is on whenever VCR itself is active.
The old ``LITELLM_VCR_VERBOSE=1`` gate kept logs quiet by default, but
that hides the very signal we need to know whether a paid test ran
against a real provider. CI logs already drop a one-line verdict per
test; that's what makes the cost analysis tractable. Set
``LITELLM_VCR_VERBOSE=0`` if you really want the legacy quiet mode.
"""
if vcr_disabled():
return False
if os.environ.get(VCR_VERBOSE_ENV) == "0":
return False
return True
def record_vcr_outcome(request, vcr) -> None:
"""Call from the post-yield section of an autouse fixture per test."""
cassette = vcr
@@ -507,10 +847,71 @@ def record_vcr_outcome(request, vcr) -> None:
if cassette_path:
mark_test_outcome_for_cassette(cassette_path, test_passed)
if not vcr_verbose_enabled():
nodeid = request.node.nodeid
if cassette is not None:
verdict = _classify_marked_test(cassette)
# Track overflow tests even when verbose logging is off — the
# session summary shows them either way.
if verdict == VERDICT_MISS_OVERFLOW:
_session_stats["overflow_tests"].append(nodeid)
if not test_passed and verdict == VERDICT_MISS_RECORDED:
verdict = VERDICT_MISS_NOT_PERSISTED
_session_stats["verdict_counts"][verdict] += 1
if vcr_outcome_logging_enabled():
line = _format_verdict_line(verdict, cassette)
request.node.user_properties.append(("vcr_verdict", line))
return
verdict = format_vcr_verdict(cassette)
request.node.user_properties.append(("vcr_verdict", verdict))
# Cassette is None ⇒ test wasn't VCR-marked. Honor the skip reason
# we tagged at collection time, and pull live-call hosts captured by
# the socket probe (if any).
skip_reason = getattr(
request.node, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_FILE_OPT_OUT
)
_session_stats["skip_reason_counts"][skip_reason] += 1
hosts = getattr(request.node, _LIVE_CALL_BUFFER_KEY, []) or []
if hosts:
verdict = VERDICT_UNMARKED_LIVE_CALL
_session_stats["unmarked_live_call_tests"].append((nodeid, list(hosts)))
extra = f"reason={skip_reason} hosts={','.join(hosts)}"
else:
verdict = VERDICT_UNMARKED_NO_TRAFFIC
extra = f"reason={skip_reason}"
_session_stats["verdict_counts"][verdict] += 1
examples = _session_stats["skip_reason_examples"][skip_reason]
if len(examples) < 5:
examples.append(nodeid)
if vcr_outcome_logging_enabled():
request.node.user_properties.append(
("vcr_verdict", _format_verdict_line(verdict, None, extra))
)
def install_live_call_probe(request, vcr) -> None:
"""Activate the live-call socket probe for non-VCR-marked tests.
Call this from inside the per-test autouse ``_vcr_outcome_gate``
fixture *before* the ``yield``. When ``vcr`` is ``None`` (test isn't
VCR-marked) we patch ``socket.connect`` for the duration of the test
and stash any LLM-host connections on ``request.node`` so
``record_vcr_outcome`` can include them in the verdict line.
Tests that *are* VCR-marked don't get the probe — vcrpy itself
intercepts above the socket layer, so any "outbound" socket would be
a recording cycle, not real spend.
"""
if vcr is not None or vcr_disabled():
return None
probe = _LiveCallProbe()
probe.__enter__()
setattr(request.node, _LIVE_CALL_BUFFER_KEY, probe.hosts)
request.addfinalizer(lambda: probe.__exit__(None, None, None))
return probe
def _format_capacity_line(snapshot: dict) -> str:
@@ -525,6 +926,99 @@ def _format_capacity_line(snapshot: dict) -> str:
)
def emit_vcr_classification_summary(terminalreporter) -> None:
"""Render the per-classification summary at session end.
Output sections (only included when non-empty):
* **Verdict counts** — full breakdown of HIT / MISS:RECORDED /
MISS:OVERFLOW / MISS:NOT_PERSISTED / PARTIAL / NOOP /
UNMARKED:LIVE_CALL / UNMARKED:NO_TRAFFIC. The OVERFLOW and
UNMARKED:LIVE_CALL counts are the cost-leak signals.
* **Cassette overflow** (>``MAX_EPISODES_PER_CASSETTE``) — these tests
fire live every CI run because the persister refuses to save them.
Usually means the request body is non-deterministic (file handle
consumed, AWS SigV4 timestamp, random UUID).
* **Unmarked tests with live API calls** — confirmed live HTTP traffic
to a known LLM host while VCR was *not* active for the test. This
is the "convert likely → confirmed" signal: each entry is real
money the cache would otherwise prevent.
* **Skip-reason breakdown** — how many tests opted out of VCR and
why (respx_conflict, respx_conflict_module, file_opt_out,
incompatible). Bare ``file_opt_out`` entries with zero respx usage
in the module are dead skip-list rows worth pruning.
"""
if vcr_disabled():
return
if os.environ.get("PYTEST_XDIST_WORKER"):
return
snapshot = session_stats_snapshot()
counts = snapshot["verdict_counts"]
if not counts:
return
terminalreporter.write_sep("=", "VCR CACHE CLASSIFICATION SUMMARY", bold=True)
for verdict in (
VERDICT_HIT,
VERDICT_PARTIAL,
VERDICT_MISS_RECORDED,
VERDICT_MISS_OVERFLOW,
VERDICT_MISS_NOT_PERSISTED,
VERDICT_NOOP_NO_TRAFFIC,
VERDICT_UNMARKED_NO_TRAFFIC,
VERDICT_UNMARKED_LIVE_CALL,
):
n = counts.get(verdict, 0)
if not n:
continue
terminalreporter.write_line(f" [{verdict}] {n}")
overflow = snapshot["overflow_tests"]
if overflow:
terminalreporter.write_sep(
"-",
f"CASSETTE OVERFLOW (>{MAX_EPISODES_PER_CASSETTE} episodes, save refused)",
red=True,
bold=True,
)
terminalreporter.write_line(
" These tests will hit the live provider on every CI run "
"because the persister won't save cassettes that grew past "
"the limit. Stabilize the request body (file handle consumed, "
"SigV4 timestamp, UUID, or boundary leak)."
)
for nodeid in overflow:
terminalreporter.write_line(f" - {nodeid}")
live_calls = snapshot["unmarked_live_call_tests"]
if live_calls:
terminalreporter.write_sep(
"-",
"UNMARKED TESTS WITH LIVE API CALLS",
red=True,
bold=True,
)
terminalreporter.write_line(
" These tests connected to a real LLM provider host while "
"they were NOT VCR-marked. Either add @pytest.mark.vcr "
"explicitly, mock with respx, or move them off the "
"respx_conflict / incompatible skip list."
)
for nodeid, hosts in live_calls:
terminalreporter.write_line(f" - {nodeid}{','.join(hosts)}")
reasons = snapshot["skip_reason_counts"]
if reasons:
terminalreporter.write_sep("-", "SKIP-REASON BREAKDOWN", bold=True)
for reason, n in sorted(reasons.items(), key=lambda kv: -kv[1]):
examples = snapshot["skip_reason_examples"].get(reason, [])
terminalreporter.write_line(f" {reason}: {n}")
for ex in examples:
terminalreporter.write_line(f" - {ex}")
terminalreporter.write_sep("=", bold=True)
def emit_cassette_cache_session_banner(terminalreporter) -> None:
"""Call from ``pytest_terminal_summary``. No-op on xdist workers."""
if vcr_disabled():
@@ -600,7 +1094,7 @@ class VerboseReporterState:
return
if os.environ.get("PYTEST_XDIST_WORKER"):
return
if not vcr_verbose_enabled():
if not vcr_outcome_logging_enabled():
return
reporter = self.resolve_terminal_reporter()
if reporter is None:
+9
View File
@@ -8,6 +8,9 @@ sys.path.insert(0, os.path.abspath("../.."))
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -34,6 +37,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -48,3 +52,8 @@ def pytest_runtest_logreport(report):
def pytest_collection_modifyitems(config, items):
apply_vcr_auto_marker_to_items(items)
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
+9
View File
@@ -19,6 +19,9 @@ import litellm
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -45,6 +48,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -151,3 +155,8 @@ def pytest_collection_modifyitems(config, items):
# Reorder the items list
items[:] = custom_logger_tests + other_tests
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
+9
View File
@@ -12,6 +12,9 @@ import litellm # noqa: E402,F401
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -48,6 +51,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -62,3 +66,8 @@ def pytest_runtest_logreport(report):
def pytest_collection_modifyitems(config, items):
apply_vcr_auto_marker_to_items(items)
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
+9
View File
@@ -15,6 +15,9 @@ import litellm # noqa: E402,F401
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -76,6 +79,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -107,3 +111,8 @@ def pytest_collection_modifyitems(config, items):
# Reorder the items list
items[:] = custom_logger_tests + other_tests
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
@@ -16,6 +16,9 @@ import litellm # noqa: E402
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -42,6 +45,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -107,3 +111,8 @@ def pytest_collection_modifyitems(config, items):
other_tests.sort(key=lambda x: x.name)
items[:] = custom_logger_tests + other_tests
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
+15 -16
View File
@@ -21,27 +21,20 @@ import litellm # noqa: E402
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
)
# vcrpy and respx both patch the httpx transport — applying both makes one
# silently win, so respx-using files opt out of the auto-marker.
_RESPX_CONFLICTING_FILES = frozenset(
{
"test_gpt4o_audio.py",
"test_nvidia_nim.py",
"test_openai.py",
"test_openai_o1.py",
"test_prompt_caching.py",
"test_text_completion_unit_tests.py",
"test_xai.py",
}
)
_VCR_AUTO_MARKER_SKIP_FILES = _RESPX_CONFLICTING_FILES | frozenset(
{"test_vcr_redis_persister.py"}
)
# Per-item respx detection (``apply_vcr_auto_marker_to_items``) handles
# the vast majority of respx-vs-vcrpy conflicts automatically. The only
# entry below is the persister's own unit-test file, which exercises
# ``save_cassette`` / ``load_cassette`` against fakeredis and must not
# itself run under a live cassette context.
_VCR_AUTO_MARKER_SKIP_FILES = frozenset({"test_vcr_redis_persister.py"})
# Tests that observe live cross-call provider state (e.g. prompt-cache
# warm-up between two consecutive calls); replay can't reproduce that state.
@@ -73,6 +66,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -85,6 +79,11 @@ def pytest_runtest_logreport(report):
_verbose_state.maybe_emit_verdict(report)
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
# ---------------------------------------------------------------------------
# Capture TRUE defaults at conftest import time (before test modules pollute).
# ---------------------------------------------------------------------------
@@ -0,0 +1,497 @@
"""Unit tests for the VCR classification + observability layer.
Covers:
- per-item respx detection (module scan, marker, fixture)
- skip-reason tagging in ``apply_vcr_auto_marker_to_items``
- verdict classification (HIT / MISS:RECORDED / MISS:OVERFLOW / MISS:NOT_PERSISTED /
PARTIAL / NOOP / UNMARKED:LIVE_CALL / UNMARKED:NO_TRAFFIC)
- AWS SigV4 fingerprint stability
- session-end summary rendering
- live-call host classification
"""
from __future__ import annotations
import os
import sys
from types import SimpleNamespace
from typing import Optional
import pytest
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from tests._vcr_conftest_common import ( # noqa: E402
SKIP_REASON_FILE_OPT_OUT,
SKIP_REASON_INCOMPATIBLE,
SKIP_REASON_PRE_MARKED,
SKIP_REASON_RESPX,
SKIP_REASON_RESPX_MODULE,
VCR_SKIP_REASON_USER_ATTR,
VERDICT_HIT,
VERDICT_MISS_NOT_PERSISTED,
VERDICT_MISS_OVERFLOW,
VERDICT_MISS_RECORDED,
VERDICT_NOOP_NO_TRAFFIC,
VERDICT_PARTIAL,
VERDICT_UNMARKED_LIVE_CALL,
VERDICT_UNMARKED_NO_TRAFFIC,
_RESPX_MODULE_CACHE,
_classify_marked_test,
_compute_key_fingerprint,
_is_live_call_host,
_reset_session_stats,
_stable_key_value,
apply_vcr_auto_marker_to_items,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
session_stats_snapshot,
)
# ---------------------------------------------------------------------------
# Test doubles
# ---------------------------------------------------------------------------
class _StubItem:
"""Pytest item double sufficient for the auto-marker logic."""
def __init__(
self,
nodeid: str,
path: str,
*,
markers: Optional[list[str]] = None,
fixturenames: Optional[list[str]] = None,
module=None,
) -> None:
self.nodeid = nodeid
self.path = path
self._markers = list(markers or [])
self.fixturenames = list(fixturenames or [])
self.module = module
self.user_properties: list = []
def get_closest_marker(self, name: str):
return name if name in self._markers else None
def add_marker(self, marker):
# ``pytest.mark.vcr`` is a MarkDecorator; rely on its ``name``.
name = getattr(marker, "name", str(marker))
self._markers.append(name)
@pytest.fixture
def vcr_enabled(monkeypatch):
monkeypatch.setenv("CASSETTE_REDIS_URL", "redis://stub")
monkeypatch.delenv("LITELLM_VCR_DISABLE", raising=False)
monkeypatch.delenv("PYTEST_XDIST_WORKER", raising=False)
@pytest.fixture(autouse=True)
def _reset_module_caches():
_reset_session_stats()
_RESPX_MODULE_CACHE.clear()
yield
_reset_session_stats()
_RESPX_MODULE_CACHE.clear()
# ---------------------------------------------------------------------------
# AWS SigV4 fingerprint stability — the Bedrock cassette overflow root cause
# ---------------------------------------------------------------------------
def test_should_extract_only_aws_access_key_from_sigv4_authorization():
"""Two Bedrock requests with the same access key but different
timestamps and signatures must produce the same fingerprint, otherwise
every CI run pushes a new episode into the cassette."""
auth_today = (
"AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE12345/20260512/us-east-1/"
"bedrock/aws4_request, SignedHeaders=host;x-amz-date, "
"Signature=AAAAAAAA"
)
auth_tomorrow = (
"AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE12345/20260513/us-east-1/"
"bedrock/aws4_request, SignedHeaders=host;x-amz-date, "
"Signature=BBBBBBBB"
)
today = _stable_key_value("Authorization", auth_today)
tomorrow = _stable_key_value("Authorization", auth_tomorrow)
assert today == tomorrow == "aws-sigv4:AKIAEXAMPLE12345"
def test_should_keep_bearer_authorization_unchanged():
"""OpenAI ``Bearer <key>`` headers are stable as-is — keep them."""
out = _stable_key_value("Authorization", "Bearer sk-1234")
assert out == "Bearer sk-1234"
def test_should_produce_stable_fingerprint_across_sigv4_signatures():
"""``_compute_key_fingerprint`` should not change when only the SigV4
signature/timestamp rotates."""
req_a = SimpleNamespace(
headers={
"authorization": (
"AWS4-HMAC-SHA256 Credential=AKIA1/20260101/us-east-1/"
"bedrock/aws4_request, SignedHeaders=host, Signature=AAA"
)
}
)
req_b = SimpleNamespace(
headers={
"authorization": (
"AWS4-HMAC-SHA256 Credential=AKIA1/20260512/us-east-1/"
"bedrock/aws4_request, SignedHeaders=host;x-amz-date, "
"Signature=ZZZ"
)
}
)
assert _compute_key_fingerprint(req_a) == _compute_key_fingerprint(req_b)
def test_should_distinguish_different_aws_access_keys():
"""Two different access keys must produce different fingerprints so
cassettes recorded under one identity never serve another."""
req_a = SimpleNamespace(
headers={
"authorization": "AWS4-HMAC-SHA256 Credential=AKIAONE/x/y/z/aws4_request, Signature=A"
}
)
req_b = SimpleNamespace(
headers={
"authorization": "AWS4-HMAC-SHA256 Credential=AKIATWO/x/y/z/aws4_request, Signature=A"
}
)
assert _compute_key_fingerprint(req_a) != _compute_key_fingerprint(req_b)
# ---------------------------------------------------------------------------
# Live-call host classification
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"host,expected",
[
("api.openai.com", True),
("api.anthropic.com", True),
("bedrock-runtime.us-east-1.amazonaws.com", False),
("api.us-east-1.bedrock-runtime.amazonaws.com", True),
("foo.bar.openai.com", True),
("127.0.0.1", False),
("localhost", False),
("10.0.0.1", False),
("172.16.0.1", False),
("redis.example.com", False),
("", False),
],
)
def test_should_classify_live_call_hosts(host, expected):
assert _is_live_call_host(host) is expected
# ---------------------------------------------------------------------------
# Verdict classification
# ---------------------------------------------------------------------------
def _cassette(played: int, dirty: bool, total: int):
class _Sized:
def __init__(self, n):
self.n = n
self.play_count = played
self.dirty = dirty
def __len__(self):
return self.n
return _Sized(total)
def test_should_classify_pure_replay_as_hit():
assert (
_classify_marked_test(_cassette(played=3, dirty=False, total=3)) == VERDICT_HIT
)
def test_should_classify_no_traffic_as_noop():
assert (
_classify_marked_test(_cassette(played=0, dirty=False, total=0))
== VERDICT_NOOP_NO_TRAFFIC
)
def test_should_classify_pure_record_as_miss_recorded():
assert (
_classify_marked_test(_cassette(played=0, dirty=True, total=1))
== VERDICT_MISS_RECORDED
)
def test_should_classify_mixed_replay_and_record_as_partial():
assert (
_classify_marked_test(_cassette(played=2, dirty=True, total=4))
== VERDICT_PARTIAL
)
def test_should_classify_overflow_as_miss_overflow_regardless_of_play_state():
"""Cassettes that exceed ``MAX_EPISODES_PER_CASSETTE`` (50) are
refused for save — they will hit live every CI run, so the verdict
must override HIT/PARTIAL classification."""
assert (
_classify_marked_test(_cassette(played=0, dirty=True, total=51))
== VERDICT_MISS_OVERFLOW
)
assert (
_classify_marked_test(_cassette(played=10, dirty=True, total=52))
== VERDICT_MISS_OVERFLOW
)
# ---------------------------------------------------------------------------
# apply_vcr_auto_marker_to_items: skip-reason tagging
# ---------------------------------------------------------------------------
def _make_module_with_source(tmp_path, src: str, name: str):
p = tmp_path / f"{name}.py"
p.write_text(src)
mod = SimpleNamespace(__file__=str(p))
return mod, str(p)
def test_should_apply_vcr_marker_to_clean_test(vcr_enabled, tmp_path):
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "clean")
item = _StubItem("clean.py::test_x", p, module=mod)
apply_vcr_auto_marker_to_items([item])
assert item.get_closest_marker("vcr") == "vcr"
def test_should_skip_per_item_when_respx_marker_present(vcr_enabled, tmp_path):
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "respx_marker")
item = _StubItem("respx_marker.py::test_x", p, markers=["respx"], module=mod)
apply_vcr_auto_marker_to_items([item])
assert item.get_closest_marker("vcr") is None
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_RESPX
def test_should_skip_per_item_when_respx_mock_fixture_present(vcr_enabled, tmp_path):
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "respx_fixture")
item = _StubItem(
"respx_fixture.py::test_x", p, fixturenames=["respx_mock"], module=mod
)
apply_vcr_auto_marker_to_items([item])
assert item.get_closest_marker("vcr") is None
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_RESPX
def test_should_tag_pre_marked_items_so_summary_can_show_them(vcr_enabled, tmp_path):
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "premarked")
item = _StubItem("premarked.py::test_x", p, markers=["vcr"], module=mod)
apply_vcr_auto_marker_to_items([item])
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_PRE_MARKED
def test_should_tag_skip_files_with_respx_module_when_module_actually_uses_respx(
vcr_enabled, tmp_path
):
"""A file in ``skip_files`` whose module *does* call respx should be
labeled as a real conflict (respx_conflict_module), not a dead opt-out."""
mod, p = _make_module_with_source(
tmp_path,
"import respx\n@pytest.mark.respx\ndef test_x(): pass\n",
"real_respx",
)
item = _StubItem("real_respx.py::test_x", p, module=mod)
apply_vcr_auto_marker_to_items([item], skip_files={"real_respx.py"})
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_RESPX_MODULE
def test_should_tag_skip_files_with_file_opt_out_when_module_does_not_use_respx(
vcr_enabled, tmp_path
):
"""A file in ``skip_files`` whose module never wires up respx is a
dead skip-list entry — surface it so we can prune."""
mod, p = _make_module_with_source(
tmp_path,
"from respx import MockRouter # dead import\ndef test_x(): pass\n",
"dead_skip",
)
item = _StubItem("dead_skip.py::test_x", p, module=mod)
apply_vcr_auto_marker_to_items([item], skip_files={"dead_skip.py"})
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_FILE_OPT_OUT
def test_should_tag_nodeid_suffix_skips_as_incompatible(vcr_enabled, tmp_path):
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "incompat")
item = _StubItem("incompat.py::test_prompt_caching", p, module=mod)
apply_vcr_auto_marker_to_items(
[item], skip_nodeid_suffixes=("::test_prompt_caching",)
)
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_INCOMPATIBLE
# ---------------------------------------------------------------------------
# Session-end summary
# ---------------------------------------------------------------------------
class _FakeReporter:
def __init__(self):
self.lines: list[str] = []
def write_sep(self, sep, title="", **kwargs):
self.lines.append(f"=== {title}" if title else "===")
def write_line(self, line):
self.lines.append(line)
@property
def output(self):
return "\n".join(self.lines)
def test_should_render_overflow_section_when_any_test_overflowed(vcr_enabled):
"""The OVERFLOW section is the cost-leak signal: if it's empty, no
cassettes are silently being refused; if it's not empty, those tests
re-bill on every run."""
request = SimpleNamespace(
node=SimpleNamespace(
nodeid="t::overflow",
user_properties=[],
rep_call=SimpleNamespace(passed=True),
)
)
cassette = _cassette(played=0, dirty=True, total=51)
cassette._path = None # avoid mark_test_outcome side-effects
record_vcr_outcome(request, cassette)
reporter = _FakeReporter()
emit_vcr_classification_summary(reporter)
assert "VCR CACHE CLASSIFICATION SUMMARY" in reporter.output
assert "VCR MISS:OVERFLOW" in reporter.output
assert "CASSETTE OVERFLOW" in reporter.output
assert "t::overflow" in reporter.output
def test_should_render_unmarked_live_call_section_with_hosts(vcr_enabled):
request_node = SimpleNamespace(
nodeid="t::leak",
user_properties=[],
rep_call=SimpleNamespace(passed=True),
)
setattr(request_node, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_RESPX)
setattr(request_node, "vcr_live_call_hosts", ["api.openai.com"])
request = SimpleNamespace(node=request_node)
record_vcr_outcome(request, None)
snap = session_stats_snapshot()
assert snap["unmarked_live_call_tests"] == [("t::leak", ["api.openai.com"])]
assert snap["verdict_counts"][VERDICT_UNMARKED_LIVE_CALL] == 1
reporter = _FakeReporter()
emit_vcr_classification_summary(reporter)
assert "UNMARKED TESTS WITH LIVE API CALLS" in reporter.output
assert "api.openai.com" in reporter.output
assert "t::leak" in reporter.output
def test_should_record_unmarked_no_traffic_when_test_skipped_vcr_but_did_not_call_out(
vcr_enabled,
):
request_node = SimpleNamespace(
nodeid="t::clean_skip",
user_properties=[],
rep_call=SimpleNamespace(passed=True),
)
setattr(request_node, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_INCOMPATIBLE)
request = SimpleNamespace(node=request_node)
record_vcr_outcome(request, None)
snap = session_stats_snapshot()
assert snap["verdict_counts"][VERDICT_UNMARKED_NO_TRAFFIC] == 1
assert snap["skip_reason_counts"][SKIP_REASON_INCOMPATIBLE] == 1
def test_should_demote_miss_recorded_to_not_persisted_when_test_failed(vcr_enabled):
"""If a test failed, ``save_cassette`` skips persisting — that means
the next CI run will hit live again. The verdict must reflect that."""
request = SimpleNamespace(
node=SimpleNamespace(
nodeid="t::failed",
user_properties=[],
rep_call=SimpleNamespace(passed=False),
)
)
cassette = _cassette(played=0, dirty=True, total=1)
cassette._path = None
record_vcr_outcome(request, cassette)
snap = session_stats_snapshot()
assert snap["verdict_counts"].get(VERDICT_MISS_NOT_PERSISTED) == 1
def test_should_emit_no_summary_when_no_tests_observed(vcr_enabled):
reporter = _FakeReporter()
emit_vcr_classification_summary(reporter)
assert reporter.output == ""
# ---------------------------------------------------------------------------
# Live-call probe
# ---------------------------------------------------------------------------
def test_should_skip_live_probe_when_vcr_active(vcr_enabled):
"""When the test *is* VCR-marked (cassette truthy), we don't install
the probe — vcrpy intercepts above the socket layer, so any
'connection' would be vcrpy's own bookkeeping and not real spend."""
request = SimpleNamespace(node=SimpleNamespace(), addfinalizer=lambda fn: None)
fake_cassette = SimpleNamespace(play_count=0, dirty=False)
probe = install_live_call_probe(request, fake_cassette)
assert probe is None
def test_live_call_probe_records_known_llm_hosts(vcr_enabled, monkeypatch):
"""The probe should record outbound TCP connections to known LLM
provider hosts (and ignore localhost / RFC1918 / unknown hosts)."""
finalizers = []
class _Node:
pass
request = SimpleNamespace(
node=_Node(), addfinalizer=lambda fn: finalizers.append(fn)
)
probe = install_live_call_probe(request, None)
assert probe is not None
import socket
# Manually invoke the patched function — we don't actually open a
# connection because that would hit the network. The probe records
# at the *call site* before delegating, and the original
# ``socket.create_connection`` will then fail; we swallow that.
try:
socket.create_connection(("api.openai.com", 443), timeout=0.001)
except Exception:
pass
try:
socket.create_connection(("127.0.0.1", 6379), timeout=0.001)
except Exception:
pass
# Restore via finalizers before asserting so the rest of the test
# session is unaffected.
for fn in finalizers:
fn()
hosts = getattr(request.node, "vcr_live_call_hosts", [])
assert "api.openai.com" in hosts
assert "127.0.0.1" not in hosts
+17 -10
View File
@@ -25,20 +25,21 @@ import litellm
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
)
# vcrpy and respx both patch the httpx transport — applying both makes one
# silently win, so respx-using files opt out of the auto-marker.
_RESPX_CONFLICTING_FILES = frozenset(
{
"test_router.py",
"test_amazing_vertex_completion.py",
"test_azure_openai.py",
}
)
# Per-item respx detection (``apply_vcr_auto_marker_to_items``) auto-skips
# tests whose ``@pytest.mark.respx`` marker or ``respx_mock`` fixture
# would conflict with vcrpy's transport patch. We no longer maintain a
# file-level ``_RESPX_CONFLICTING_FILES`` list here — the previous
# entries (``test_router.py``) had only a stale ``from respx import
# MockRouter`` import with no actual respx wiring, so file-level
# blacklisting was masking valid cache opportunities.
# Files where VCR replay breaks the test:
# - ``test_assistants.py``: polls fresh per-session run IDs that no cassette
@@ -76,6 +77,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -88,6 +90,11 @@ def pytest_runtest_logreport(report):
_verbose_state.maybe_emit_verdict(report)
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
# ---------------------------------------------------------------------------
# Capture TRUE defaults at conftest import time. This runs before any test
# module's top-level code (e.g. `litellm.num_retries = 3`) executes, so
@@ -215,7 +222,7 @@ def setup_and_teardown():
def pytest_collection_modifyitems(config, items):
apply_vcr_auto_marker_to_items(
items,
skip_files=_RESPX_CONFLICTING_FILES | _VCR_INCOMPATIBLE_FILES,
skip_files=_VCR_INCOMPATIBLE_FILES,
skip_nodeid_suffixes=_VCR_INCOMPATIBLE_NODEID_SUFFIXES,
)
+9
View File
@@ -22,6 +22,9 @@ import litellm
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -69,6 +72,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -220,3 +224,8 @@ def pytest_collection_modifyitems(config, items):
# Reorder the items list
items[:] = custom_logger_tests + other_tests
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
+9
View File
@@ -15,6 +15,9 @@ sys.path.insert(0, os.path.abspath("../.."))
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -41,6 +44,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -55,3 +59,8 @@ def pytest_runtest_logreport(report):
def pytest_collection_modifyitems(config, items):
apply_vcr_auto_marker_to_items(items)
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
@@ -8,6 +8,9 @@ sys.path.insert(0, os.path.abspath("../.."))
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -34,6 +37,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -48,3 +52,8 @@ def pytest_runtest_logreport(report):
def pytest_collection_modifyitems(config, items):
apply_vcr_auto_marker_to_items(items)
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
+9
View File
@@ -15,6 +15,9 @@ import litellm # noqa: E402,F401
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -87,6 +90,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -114,3 +118,8 @@ def pytest_collection_modifyitems(config, items):
# Reorder the items list
items[:] = custom_logger_tests + other_tests
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
+9
View File
@@ -16,6 +16,9 @@ sys.path.insert(0, os.path.abspath("../.."))
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -42,6 +45,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -56,3 +60,8 @@ def pytest_runtest_logreport(report):
def pytest_collection_modifyitems(config, items):
apply_vcr_auto_marker_to_items(items)
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)
+9
View File
@@ -15,6 +15,9 @@ import litellm # noqa: E402,F401
from tests._vcr_conftest_common import ( # noqa: E402
VerboseReporterState,
apply_vcr_auto_marker_to_items,
emit_cassette_cache_session_banner,
emit_vcr_classification_summary,
install_live_call_probe,
record_vcr_outcome,
register_persister_if_enabled,
vcr_config_dict,
@@ -74,6 +77,7 @@ def pytest_runtest_makereport(item, call):
@pytest.fixture(autouse=True)
def _vcr_outcome_gate(request, vcr):
install_live_call_probe(request, vcr)
yield
record_vcr_outcome(request, vcr)
@@ -101,3 +105,8 @@ def pytest_collection_modifyitems(config, items):
# Reorder the items list
items[:] = custom_logger_tests + other_tests
def pytest_terminal_summary(terminalreporter, exitstatus, config):
emit_cassette_cache_session_banner(terminalreporter)
emit_vcr_classification_summary(terminalreporter)