mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-18 00:48:01 +00:00
test(vcr): classify cache verdicts, detect live calls, surface cost leaks
Convert the per-test VCR verdict line from a single 'NOOP / HIT / MISS /
PARTIAL' tag into a classified outcome that distinguishes the cases that
silently bill the live API on every CI run from the ones that don't:
HIT pure replay
PARTIAL mixed replay + new recordings
MISS:RECORDED new cassette saved to Redis (cached next run)
MISS:OVERFLOW cassette > MAX_EPISODES_PER_CASSETTE; persister
refused to save; re-bills every run
MISS:NOT_PERSISTED test failed; save_cassette skipped; re-bills
NOOP VCR-marked but no HTTP traffic (mocked elsewhere)
UNMARKED:LIVE_CALL test bypassed VCR AND opened a TCP connection
to a known LLM provider host -> wasted spend
UNMARKED:NO_TRAFFIC test bypassed VCR but didn't call out
The UNMARKED:LIVE_CALL signal is what converts 'this test probably hits
live' into 'this test connected to api.openai.com'. We install a
socket.connect / socket.create_connection wrapper for the duration of
each non-VCR-marked test and record any outbound TCP to a known LLM
provider hostname. The probe sits below the httpx layer so vcrpy and
respx (which both patch above the socket) are unaffected.
Replace the file-level _RESPX_CONFLICTING_FILES blacklists in the
llm_translation and local_testing conftests with per-item respx
detection in apply_vcr_auto_marker_to_items. A test now skips VCR when
it actually carries @pytest.mark.respx or has respx_mock in its fixture
chain - not just because some other test in the same file imports
MockRouter. Items skipped by skip_files are split into respx_conflict
(real conflict, the module wires up respx) vs file_opt_out (dead skip-
list entry whose module never touches respx) so the session summary
makes pruning obvious.
Stabilize the AWS SigV4 fingerprint: the Authorization header on
Bedrock requests rotates its Credential date and Signature on every
call, which previously pushed every Bedrock test past the 50-episode
overflow threshold. Extract the access-key id only
('aws-sigv4:AKIA...') so two requests with the same identity match.
Always emit verdict logging when VCR is active (set
LITELLM_VCR_VERBOSE=0 to opt back into the legacy quiet mode). Add a
session-end classification summary that lists overflow tests, unmarked
live-call tests, and the skip-reason breakdown.
Wire the live-call probe + summary hook into every test directory that
already uses the Redis-backed VCR cache (audio_tests, guardrails_tests,
image_gen_tests, litellm_utils_tests, llm_responses_api_testing,
llm_translation, local_testing, logging_callback_tests, ocr_tests,
pass_through_unit_tests, router_unit_tests, search_tests,
unified_google_tests).
Add tests/llm_translation/test_vcr_classification.py covering the
verdict classifier, skip-reason tagging, AWS SigV4 fingerprint stability,
live-host classification, and session summary rendering.
Co-authored-by: Mateo Wang <mateo-berri@users.noreply.github.com>
This commit is contained in:
+507
-13
@@ -10,20 +10,22 @@ import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from typing import Iterable
|
||||
|
||||
import pytest
|
||||
|
||||
from tests._vcr_redis_persister import (
|
||||
MAX_EPISODES_PER_CASSETTE,
|
||||
VCR_VERBOSE_ENV,
|
||||
cassette_cache_capacity_snapshot,
|
||||
cassette_cache_health,
|
||||
filter_non_2xx_response,
|
||||
format_vcr_verdict,
|
||||
make_redis_persister,
|
||||
mark_test_outcome_for_cassette,
|
||||
patch_vcrpy_aiohttp_record_path,
|
||||
vcr_verbose_enabled,
|
||||
)
|
||||
|
||||
CASSETTE_CACHE_HIGH_WATER_FRACTION = 0.85
|
||||
@@ -231,6 +233,29 @@ def _iter_header_values(headers, name: str):
|
||||
yield value
|
||||
|
||||
|
||||
_AWS_SIGV4_CREDENTIAL_RE = re.compile(
|
||||
r"AWS4-HMAC-SHA256\s+Credential=([^/\s,]+)/", re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
def _stable_key_value(header_name: str, raw: str) -> str:
|
||||
"""Return a *stable* identifier for a credential header.
|
||||
|
||||
For Bearer / API-key headers the entire value is stable across calls,
|
||||
so we hash it as-is. For AWS SigV4 ``Authorization`` headers, only
|
||||
the access-key portion of ``Credential=AKIA.../<DATE>/...`` is stable
|
||||
— date, region, signed headers, and signature all rotate per request,
|
||||
so hashing the full value would push every Bedrock request into a new
|
||||
cassette episode. Extract just the access-key id when present.
|
||||
"""
|
||||
if header_name.lower() != "authorization":
|
||||
return raw
|
||||
match = _AWS_SIGV4_CREDENTIAL_RE.search(raw)
|
||||
if match:
|
||||
return f"aws-sigv4:{match.group(1)}"
|
||||
return raw
|
||||
|
||||
|
||||
def _compute_key_fingerprint(request) -> str:
|
||||
headers = getattr(request, "headers", None)
|
||||
parts: list[str] = []
|
||||
@@ -242,7 +267,8 @@ def _compute_key_fingerprint(request) -> str:
|
||||
text = text.strip()
|
||||
if not text:
|
||||
continue
|
||||
parts.append(f"{header_name}={text}")
|
||||
stable = _stable_key_value(header_name, text)
|
||||
parts.append(f"{header_name}={stable}")
|
||||
if not parts:
|
||||
return "no-key"
|
||||
digest = hashlib.sha256("\n".join(parts).encode("utf-8")).hexdigest()
|
||||
@@ -470,6 +496,114 @@ def register_persister_if_enabled(vcr) -> None:
|
||||
_atexit_banner_registered = True
|
||||
|
||||
|
||||
VCR_SKIP_REASON_USER_ATTR = "vcr_skip_reason"
|
||||
|
||||
# Marker reasons recorded per-item / per-test for the session summary.
|
||||
SKIP_REASON_RESPX = "respx_conflict"
|
||||
SKIP_REASON_RESPX_MODULE = "respx_conflict_module"
|
||||
SKIP_REASON_INCOMPATIBLE = "incompatible"
|
||||
SKIP_REASON_FILE_OPT_OUT = "file_opt_out"
|
||||
SKIP_REASON_DISABLED = "disabled"
|
||||
SKIP_REASON_PRE_MARKED = "already_marked"
|
||||
|
||||
# Hostnames we consider an "expensive live call" if a non-VCR-marked test
|
||||
# happens to hit them. Localhost/redis/databases are explicitly excluded.
|
||||
_LIVE_CALL_HOST_SUFFIXES = (
|
||||
".openai.com",
|
||||
".anthropic.com",
|
||||
".vertexai.googleapis.com",
|
||||
".aiplatform.googleapis.com",
|
||||
".googleapis.com",
|
||||
".bedrock-runtime.amazonaws.com",
|
||||
".x.ai",
|
||||
".cohere.ai",
|
||||
".cohere.com",
|
||||
".voyageai.com",
|
||||
".perplexity.ai",
|
||||
".mistral.ai",
|
||||
".groq.com",
|
||||
".huggingface.co",
|
||||
".azure.com",
|
||||
".tavily.com",
|
||||
".serper.dev",
|
||||
".searchapi.io",
|
||||
".firecrawl.dev",
|
||||
".exa.ai",
|
||||
)
|
||||
_LIVE_CALL_LOCAL_PREFIXES = (
|
||||
"127.",
|
||||
"localhost",
|
||||
"::1",
|
||||
"0.0.0.0",
|
||||
"10.",
|
||||
"172.16.",
|
||||
"172.17.",
|
||||
"172.18.",
|
||||
"172.19.",
|
||||
"172.20.",
|
||||
"192.168.",
|
||||
)
|
||||
|
||||
|
||||
def _module_uses_respx(item) -> bool:
|
||||
"""Return True if the test's *module* actually wires up respx.
|
||||
|
||||
A bare ``from respx import MockRouter`` import (with no actual usage)
|
||||
does not patch the httpx transport, so it does not conflict with vcrpy.
|
||||
We confirm by checking the module's source for any of:
|
||||
- ``@pytest.mark.respx``
|
||||
- ``@respx.mock`` / ``with respx.mock``
|
||||
- ``respx_mock`` fixture name
|
||||
"""
|
||||
module = getattr(item, "module", None)
|
||||
src_file = getattr(module, "__file__", None)
|
||||
if not src_file or not os.path.isfile(src_file):
|
||||
return False
|
||||
try:
|
||||
with open(src_file, encoding="utf-8") as f:
|
||||
src = f.read()
|
||||
except OSError:
|
||||
return False
|
||||
if "respx_mock" in src:
|
||||
return True
|
||||
if "@pytest.mark.respx" in src or "@respx.mock" in src:
|
||||
return True
|
||||
if "respx.mock" in src or "with respx" in src:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _item_uses_respx(item) -> bool:
|
||||
"""Return True if *this specific item* will trigger respx.
|
||||
|
||||
Two signals: the ``respx`` pytest marker, and the ``respx_mock``
|
||||
fixture appearing in the item's resolved fixture chain. Either alone
|
||||
causes vcrpy + respx to fight over the httpx transport.
|
||||
"""
|
||||
if item.get_closest_marker("respx") is not None:
|
||||
return True
|
||||
fixturenames = getattr(item, "fixturenames", None) or ()
|
||||
if "respx_mock" in fixturenames:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# Cache the source-scan result so we don't reread each module per item.
|
||||
_RESPX_MODULE_CACHE: dict[str, bool] = {}
|
||||
|
||||
|
||||
def _module_path_uses_respx(item) -> bool:
|
||||
src_file = str(getattr(item, "path", "") or "")
|
||||
if not src_file:
|
||||
return False
|
||||
cached = _RESPX_MODULE_CACHE.get(src_file)
|
||||
if cached is not None:
|
||||
return cached
|
||||
result = _module_uses_respx(item)
|
||||
_RESPX_MODULE_CACHE[src_file] = result
|
||||
return result
|
||||
|
||||
|
||||
def apply_vcr_auto_marker_to_items(
|
||||
items,
|
||||
*,
|
||||
@@ -478,26 +612,232 @@ def apply_vcr_auto_marker_to_items(
|
||||
) -> None:
|
||||
"""Auto-apply ``pytest.mark.vcr`` to collected items.
|
||||
|
||||
``skip_files`` are basenames to leave un-marked (e.g. respx-using
|
||||
files, since respx and vcrpy both patch the httpx transport).
|
||||
``skip_nodeid_suffixes`` are node-id suffixes for individual tests
|
||||
that depend on live cross-call provider state.
|
||||
Skip semantics (in priority order):
|
||||
|
||||
1. ``vcr_disabled()`` — global env-var off-switch (``LITELLM_VCR_DISABLE=1``
|
||||
or no ``CASSETTE_REDIS_URL``).
|
||||
2. Item already carries ``@pytest.mark.vcr`` — leave it alone.
|
||||
3. Item triggers respx (per-item marker / fixture) — vcrpy and respx
|
||||
both patch the httpx transport so applying both makes one silently
|
||||
no-op. We tag the item ``vcr_skip_reason=respx_conflict``.
|
||||
4. Module wires up respx anywhere — even tests in the file that don't
|
||||
themselves use respx still inherit the patched transport when
|
||||
respx fixtures activate at session level. Tagged
|
||||
``respx_conflict_module``.
|
||||
5. ``skip_files`` / ``skip_nodeid_suffixes`` opt-out lists from the
|
||||
caller — used for tests that observe live cross-call provider state
|
||||
(e.g. prompt-cache warmup) which deterministic replay can't model.
|
||||
Tagged ``incompatible``.
|
||||
|
||||
Each skipped item gets a ``vcr_skip_reason`` attribute so the
|
||||
session-end summary can show why it isn't cached.
|
||||
"""
|
||||
if vcr_disabled():
|
||||
for item in items:
|
||||
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_DISABLED)
|
||||
return
|
||||
skip_files = frozenset(skip_files)
|
||||
skip_nodeid_suffixes = tuple(skip_nodeid_suffixes)
|
||||
for item in items:
|
||||
if item.get_closest_marker("vcr") is not None:
|
||||
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_PRE_MARKED)
|
||||
continue
|
||||
if _item_uses_respx(item):
|
||||
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_RESPX)
|
||||
continue
|
||||
filename = os.path.basename(str(item.path))
|
||||
if filename in skip_files:
|
||||
# Trust the caller's opt-out, but split by reason: if the
|
||||
# module actually uses respx, label the conflict precisely so
|
||||
# the summary surfaces dead respx imports vs. real conflicts.
|
||||
if _module_path_uses_respx(item):
|
||||
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_RESPX_MODULE)
|
||||
else:
|
||||
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_FILE_OPT_OUT)
|
||||
continue
|
||||
if any(item.nodeid.endswith(suffix) for suffix in skip_nodeid_suffixes):
|
||||
continue
|
||||
if item.get_closest_marker("vcr") is not None:
|
||||
setattr(item, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_INCOMPATIBLE)
|
||||
continue
|
||||
item.add_marker(pytest.mark.vcr)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-test stats accumulator + verdict classification.
|
||||
#
|
||||
# The session-end summary needs richer signal than the line-level verdict:
|
||||
# - which tests overflowed ``MAX_EPISODES_PER_CASSETTE`` (cassette refused
|
||||
# to save → live calls every CI run);
|
||||
# - which tests fired live HTTP at a real LLM endpoint while VCR was not
|
||||
# active for them (genuine wasted spend, not just "test mocked elsewhere");
|
||||
# - skip-reason buckets so we can tell respx-conflict from
|
||||
# incompatible-by-design from "module imports respx but never uses it".
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Verdict tags used in the per-test logline AND in the session summary
|
||||
# breakdown.
|
||||
VERDICT_HIT = "VCR HIT"
|
||||
VERDICT_MISS_RECORDED = "VCR MISS:RECORDED"
|
||||
VERDICT_MISS_OVERFLOW = "VCR MISS:OVERFLOW"
|
||||
VERDICT_MISS_NOT_PERSISTED = "VCR MISS:NOT_PERSISTED"
|
||||
VERDICT_PARTIAL = "VCR PARTIAL"
|
||||
VERDICT_NOOP_NO_TRAFFIC = "VCR NOOP"
|
||||
VERDICT_UNMARKED_LIVE_CALL = "VCR UNMARKED:LIVE_CALL"
|
||||
VERDICT_UNMARKED_NO_TRAFFIC = "VCR UNMARKED:NO_TRAFFIC"
|
||||
VERDICT_DISABLED = "VCR DISABLED"
|
||||
|
||||
# Per-session stats. Cleared by ``_reset_session_stats`` for unit tests.
|
||||
_session_stats = {
|
||||
"verdict_counts": defaultdict(int),
|
||||
"overflow_tests": [], # list of nodeids
|
||||
"unmarked_live_call_tests": [], # list of (nodeid, hosts)
|
||||
"skip_reason_counts": defaultdict(int),
|
||||
"skip_reason_examples": defaultdict(list),
|
||||
}
|
||||
|
||||
|
||||
def _reset_session_stats() -> None:
|
||||
_session_stats["verdict_counts"].clear()
|
||||
_session_stats["overflow_tests"].clear()
|
||||
_session_stats["unmarked_live_call_tests"].clear()
|
||||
_session_stats["skip_reason_counts"].clear()
|
||||
_session_stats["skip_reason_examples"].clear()
|
||||
|
||||
|
||||
def session_stats_snapshot() -> dict:
|
||||
"""Read-only copy of the per-session VCR stats. Used by the summary."""
|
||||
return {
|
||||
"verdict_counts": dict(_session_stats["verdict_counts"]),
|
||||
"overflow_tests": list(_session_stats["overflow_tests"]),
|
||||
"unmarked_live_call_tests": list(_session_stats["unmarked_live_call_tests"]),
|
||||
"skip_reason_counts": dict(_session_stats["skip_reason_counts"]),
|
||||
"skip_reason_examples": {
|
||||
k: list(v) for k, v in _session_stats["skip_reason_examples"].items()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _classify_marked_test(cassette) -> str:
|
||||
"""Map cassette state → verdict tag for tests that *were* VCR-marked."""
|
||||
played = getattr(cassette, "play_count", 0) or 0
|
||||
dirty = getattr(cassette, "dirty", False)
|
||||
total = len(cassette) if hasattr(cassette, "__len__") else 0
|
||||
|
||||
# "OVERFLOW" mirrors ``_RedisPersister.save_cassette``'s
|
||||
# ``> MAX_EPISODES_PER_CASSETTE`` guard. Cassettes that hit this
|
||||
# threshold are refused for save, so the test re-records live every
|
||||
# run.
|
||||
if total > MAX_EPISODES_PER_CASSETTE:
|
||||
return VERDICT_MISS_OVERFLOW
|
||||
if played == 0 and not dirty:
|
||||
return VERDICT_NOOP_NO_TRAFFIC
|
||||
if played > 0 and not dirty:
|
||||
return VERDICT_HIT
|
||||
if played == 0 and dirty:
|
||||
return VERDICT_MISS_RECORDED
|
||||
return VERDICT_PARTIAL
|
||||
|
||||
|
||||
def _format_verdict_line(verdict: str, cassette, extra: str = "") -> str:
|
||||
if cassette is None:
|
||||
return f"[{verdict}]{(' ' + extra) if extra else ''}"
|
||||
played = getattr(cassette, "play_count", 0) or 0
|
||||
total = len(cassette) if hasattr(cassette, "__len__") else 0
|
||||
base = f"[{verdict}] played={played} entries={total}"
|
||||
if extra:
|
||||
base = f"{base} {extra}"
|
||||
return base
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Live-call detection for tests that bypass VCR.
|
||||
#
|
||||
# When a test isn't VCR-marked (respx_conflict, incompatible, or just
|
||||
# plain unmarked), we wrap its socket calls inside the autouse
|
||||
# ``_vcr_outcome_gate`` fixture so we can flag any outbound TCP connection
|
||||
# to a known LLM provider. This converts "likely live call" into
|
||||
# "confirmed: this test connected to host X".
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_LIVE_CALL_PROBE_INSTALLED = False
|
||||
_LIVE_CALL_BUFFER_KEY = "vcr_live_call_hosts"
|
||||
|
||||
|
||||
def _is_live_call_host(host: str) -> bool:
|
||||
if not host:
|
||||
return False
|
||||
host = host.lower()
|
||||
if any(host.startswith(p) for p in _LIVE_CALL_LOCAL_PREFIXES):
|
||||
return False
|
||||
return any(host.endswith(suffix) for suffix in _LIVE_CALL_HOST_SUFFIXES)
|
||||
|
||||
|
||||
class _LiveCallProbe:
|
||||
"""Context manager that monkeypatches ``socket.create_connection`` and
|
||||
``socket.socket.connect`` for the lifetime of a test, recording any
|
||||
outbound TCP connection to a known LLM host.
|
||||
|
||||
We don't intercept HTTP at the application layer because that would
|
||||
fight with vcrpy/respx in tests that *do* mock httpx — the socket
|
||||
layer is below both, so this probe is safe regardless of what's
|
||||
patched above it. We also don't raise: the goal is observability, not
|
||||
a hard gate.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.hosts: list[str] = []
|
||||
self._orig_create_connection = None
|
||||
self._orig_socket_connect = None
|
||||
|
||||
def __enter__(self):
|
||||
self._orig_create_connection = socket.create_connection
|
||||
self._orig_socket_connect = socket.socket.connect
|
||||
|
||||
def _wrapped_create_connection(address, *args, **kwargs):
|
||||
try:
|
||||
host = address[0] if isinstance(address, tuple) else None
|
||||
if host and _is_live_call_host(host) and host not in self.hosts:
|
||||
self.hosts.append(host)
|
||||
except Exception:
|
||||
pass
|
||||
return self._orig_create_connection(address, *args, **kwargs)
|
||||
|
||||
def _wrapped_socket_connect(sock_self, address):
|
||||
try:
|
||||
host = address[0] if isinstance(address, tuple) else None
|
||||
if host and _is_live_call_host(host) and host not in self.hosts:
|
||||
self.hosts.append(host)
|
||||
except Exception:
|
||||
pass
|
||||
return self._orig_socket_connect(sock_self, address)
|
||||
|
||||
socket.create_connection = _wrapped_create_connection
|
||||
socket.socket.connect = _wrapped_socket_connect
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc):
|
||||
if self._orig_create_connection is not None:
|
||||
socket.create_connection = self._orig_create_connection
|
||||
if self._orig_socket_connect is not None:
|
||||
socket.socket.connect = self._orig_socket_connect
|
||||
return False
|
||||
|
||||
|
||||
def vcr_outcome_logging_enabled() -> bool:
|
||||
"""Verdict logging is on whenever VCR itself is active.
|
||||
|
||||
The old ``LITELLM_VCR_VERBOSE=1`` gate kept logs quiet by default, but
|
||||
that hides the very signal we need to know whether a paid test ran
|
||||
against a real provider. CI logs already drop a one-line verdict per
|
||||
test; that's what makes the cost analysis tractable. Set
|
||||
``LITELLM_VCR_VERBOSE=0`` if you really want the legacy quiet mode.
|
||||
"""
|
||||
if vcr_disabled():
|
||||
return False
|
||||
if os.environ.get(VCR_VERBOSE_ENV) == "0":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def record_vcr_outcome(request, vcr) -> None:
|
||||
"""Call from the post-yield section of an autouse fixture per test."""
|
||||
cassette = vcr
|
||||
@@ -507,10 +847,71 @@ def record_vcr_outcome(request, vcr) -> None:
|
||||
if cassette_path:
|
||||
mark_test_outcome_for_cassette(cassette_path, test_passed)
|
||||
|
||||
if not vcr_verbose_enabled():
|
||||
nodeid = request.node.nodeid
|
||||
|
||||
if cassette is not None:
|
||||
verdict = _classify_marked_test(cassette)
|
||||
# Track overflow tests even when verbose logging is off — the
|
||||
# session summary shows them either way.
|
||||
if verdict == VERDICT_MISS_OVERFLOW:
|
||||
_session_stats["overflow_tests"].append(nodeid)
|
||||
if not test_passed and verdict == VERDICT_MISS_RECORDED:
|
||||
verdict = VERDICT_MISS_NOT_PERSISTED
|
||||
_session_stats["verdict_counts"][verdict] += 1
|
||||
if vcr_outcome_logging_enabled():
|
||||
line = _format_verdict_line(verdict, cassette)
|
||||
request.node.user_properties.append(("vcr_verdict", line))
|
||||
return
|
||||
verdict = format_vcr_verdict(cassette)
|
||||
request.node.user_properties.append(("vcr_verdict", verdict))
|
||||
|
||||
# Cassette is None ⇒ test wasn't VCR-marked. Honor the skip reason
|
||||
# we tagged at collection time, and pull live-call hosts captured by
|
||||
# the socket probe (if any).
|
||||
skip_reason = getattr(
|
||||
request.node, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_FILE_OPT_OUT
|
||||
)
|
||||
_session_stats["skip_reason_counts"][skip_reason] += 1
|
||||
|
||||
hosts = getattr(request.node, _LIVE_CALL_BUFFER_KEY, []) or []
|
||||
if hosts:
|
||||
verdict = VERDICT_UNMARKED_LIVE_CALL
|
||||
_session_stats["unmarked_live_call_tests"].append((nodeid, list(hosts)))
|
||||
extra = f"reason={skip_reason} hosts={','.join(hosts)}"
|
||||
else:
|
||||
verdict = VERDICT_UNMARKED_NO_TRAFFIC
|
||||
extra = f"reason={skip_reason}"
|
||||
|
||||
_session_stats["verdict_counts"][verdict] += 1
|
||||
|
||||
examples = _session_stats["skip_reason_examples"][skip_reason]
|
||||
if len(examples) < 5:
|
||||
examples.append(nodeid)
|
||||
|
||||
if vcr_outcome_logging_enabled():
|
||||
request.node.user_properties.append(
|
||||
("vcr_verdict", _format_verdict_line(verdict, None, extra))
|
||||
)
|
||||
|
||||
|
||||
def install_live_call_probe(request, vcr) -> None:
|
||||
"""Activate the live-call socket probe for non-VCR-marked tests.
|
||||
|
||||
Call this from inside the per-test autouse ``_vcr_outcome_gate``
|
||||
fixture *before* the ``yield``. When ``vcr`` is ``None`` (test isn't
|
||||
VCR-marked) we patch ``socket.connect`` for the duration of the test
|
||||
and stash any LLM-host connections on ``request.node`` so
|
||||
``record_vcr_outcome`` can include them in the verdict line.
|
||||
|
||||
Tests that *are* VCR-marked don't get the probe — vcrpy itself
|
||||
intercepts above the socket layer, so any "outbound" socket would be
|
||||
a recording cycle, not real spend.
|
||||
"""
|
||||
if vcr is not None or vcr_disabled():
|
||||
return None
|
||||
probe = _LiveCallProbe()
|
||||
probe.__enter__()
|
||||
setattr(request.node, _LIVE_CALL_BUFFER_KEY, probe.hosts)
|
||||
request.addfinalizer(lambda: probe.__exit__(None, None, None))
|
||||
return probe
|
||||
|
||||
|
||||
def _format_capacity_line(snapshot: dict) -> str:
|
||||
@@ -525,6 +926,99 @@ def _format_capacity_line(snapshot: dict) -> str:
|
||||
)
|
||||
|
||||
|
||||
def emit_vcr_classification_summary(terminalreporter) -> None:
|
||||
"""Render the per-classification summary at session end.
|
||||
|
||||
Output sections (only included when non-empty):
|
||||
|
||||
* **Verdict counts** — full breakdown of HIT / MISS:RECORDED /
|
||||
MISS:OVERFLOW / MISS:NOT_PERSISTED / PARTIAL / NOOP /
|
||||
UNMARKED:LIVE_CALL / UNMARKED:NO_TRAFFIC. The OVERFLOW and
|
||||
UNMARKED:LIVE_CALL counts are the cost-leak signals.
|
||||
* **Cassette overflow** (>``MAX_EPISODES_PER_CASSETTE``) — these tests
|
||||
fire live every CI run because the persister refuses to save them.
|
||||
Usually means the request body is non-deterministic (file handle
|
||||
consumed, AWS SigV4 timestamp, random UUID).
|
||||
* **Unmarked tests with live API calls** — confirmed live HTTP traffic
|
||||
to a known LLM host while VCR was *not* active for the test. This
|
||||
is the "convert likely → confirmed" signal: each entry is real
|
||||
money the cache would otherwise prevent.
|
||||
* **Skip-reason breakdown** — how many tests opted out of VCR and
|
||||
why (respx_conflict, respx_conflict_module, file_opt_out,
|
||||
incompatible). Bare ``file_opt_out`` entries with zero respx usage
|
||||
in the module are dead skip-list rows worth pruning.
|
||||
"""
|
||||
if vcr_disabled():
|
||||
return
|
||||
if os.environ.get("PYTEST_XDIST_WORKER"):
|
||||
return
|
||||
|
||||
snapshot = session_stats_snapshot()
|
||||
counts = snapshot["verdict_counts"]
|
||||
if not counts:
|
||||
return
|
||||
|
||||
terminalreporter.write_sep("=", "VCR CACHE CLASSIFICATION SUMMARY", bold=True)
|
||||
for verdict in (
|
||||
VERDICT_HIT,
|
||||
VERDICT_PARTIAL,
|
||||
VERDICT_MISS_RECORDED,
|
||||
VERDICT_MISS_OVERFLOW,
|
||||
VERDICT_MISS_NOT_PERSISTED,
|
||||
VERDICT_NOOP_NO_TRAFFIC,
|
||||
VERDICT_UNMARKED_NO_TRAFFIC,
|
||||
VERDICT_UNMARKED_LIVE_CALL,
|
||||
):
|
||||
n = counts.get(verdict, 0)
|
||||
if not n:
|
||||
continue
|
||||
terminalreporter.write_line(f" [{verdict}] {n}")
|
||||
|
||||
overflow = snapshot["overflow_tests"]
|
||||
if overflow:
|
||||
terminalreporter.write_sep(
|
||||
"-",
|
||||
f"CASSETTE OVERFLOW (>{MAX_EPISODES_PER_CASSETTE} episodes, save refused)",
|
||||
red=True,
|
||||
bold=True,
|
||||
)
|
||||
terminalreporter.write_line(
|
||||
" These tests will hit the live provider on every CI run "
|
||||
"because the persister won't save cassettes that grew past "
|
||||
"the limit. Stabilize the request body (file handle consumed, "
|
||||
"SigV4 timestamp, UUID, or boundary leak)."
|
||||
)
|
||||
for nodeid in overflow:
|
||||
terminalreporter.write_line(f" - {nodeid}")
|
||||
|
||||
live_calls = snapshot["unmarked_live_call_tests"]
|
||||
if live_calls:
|
||||
terminalreporter.write_sep(
|
||||
"-",
|
||||
"UNMARKED TESTS WITH LIVE API CALLS",
|
||||
red=True,
|
||||
bold=True,
|
||||
)
|
||||
terminalreporter.write_line(
|
||||
" These tests connected to a real LLM provider host while "
|
||||
"they were NOT VCR-marked. Either add @pytest.mark.vcr "
|
||||
"explicitly, mock with respx, or move them off the "
|
||||
"respx_conflict / incompatible skip list."
|
||||
)
|
||||
for nodeid, hosts in live_calls:
|
||||
terminalreporter.write_line(f" - {nodeid} → {','.join(hosts)}")
|
||||
|
||||
reasons = snapshot["skip_reason_counts"]
|
||||
if reasons:
|
||||
terminalreporter.write_sep("-", "SKIP-REASON BREAKDOWN", bold=True)
|
||||
for reason, n in sorted(reasons.items(), key=lambda kv: -kv[1]):
|
||||
examples = snapshot["skip_reason_examples"].get(reason, [])
|
||||
terminalreporter.write_line(f" {reason}: {n}")
|
||||
for ex in examples:
|
||||
terminalreporter.write_line(f" - {ex}")
|
||||
terminalreporter.write_sep("=", bold=True)
|
||||
|
||||
|
||||
def emit_cassette_cache_session_banner(terminalreporter) -> None:
|
||||
"""Call from ``pytest_terminal_summary``. No-op on xdist workers."""
|
||||
if vcr_disabled():
|
||||
@@ -600,7 +1094,7 @@ class VerboseReporterState:
|
||||
return
|
||||
if os.environ.get("PYTEST_XDIST_WORKER"):
|
||||
return
|
||||
if not vcr_verbose_enabled():
|
||||
if not vcr_outcome_logging_enabled():
|
||||
return
|
||||
reporter = self.resolve_terminal_reporter()
|
||||
if reporter is None:
|
||||
|
||||
@@ -8,6 +8,9 @@ sys.path.insert(0, os.path.abspath("../.."))
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -34,6 +37,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -48,3 +52,8 @@ def pytest_runtest_logreport(report):
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
apply_vcr_auto_marker_to_items(items)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -19,6 +19,9 @@ import litellm
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -45,6 +48,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -151,3 +155,8 @@ def pytest_collection_modifyitems(config, items):
|
||||
|
||||
# Reorder the items list
|
||||
items[:] = custom_logger_tests + other_tests
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -12,6 +12,9 @@ import litellm # noqa: E402,F401
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -48,6 +51,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -62,3 +66,8 @@ def pytest_runtest_logreport(report):
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
apply_vcr_auto_marker_to_items(items)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -15,6 +15,9 @@ import litellm # noqa: E402,F401
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -76,6 +79,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -107,3 +111,8 @@ def pytest_collection_modifyitems(config, items):
|
||||
|
||||
# Reorder the items list
|
||||
items[:] = custom_logger_tests + other_tests
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -16,6 +16,9 @@ import litellm # noqa: E402
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -42,6 +45,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -107,3 +111,8 @@ def pytest_collection_modifyitems(config, items):
|
||||
other_tests.sort(key=lambda x: x.name)
|
||||
|
||||
items[:] = custom_logger_tests + other_tests
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -21,27 +21,20 @@ import litellm # noqa: E402
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
)
|
||||
|
||||
# vcrpy and respx both patch the httpx transport — applying both makes one
|
||||
# silently win, so respx-using files opt out of the auto-marker.
|
||||
_RESPX_CONFLICTING_FILES = frozenset(
|
||||
{
|
||||
"test_gpt4o_audio.py",
|
||||
"test_nvidia_nim.py",
|
||||
"test_openai.py",
|
||||
"test_openai_o1.py",
|
||||
"test_prompt_caching.py",
|
||||
"test_text_completion_unit_tests.py",
|
||||
"test_xai.py",
|
||||
}
|
||||
)
|
||||
_VCR_AUTO_MARKER_SKIP_FILES = _RESPX_CONFLICTING_FILES | frozenset(
|
||||
{"test_vcr_redis_persister.py"}
|
||||
)
|
||||
# Per-item respx detection (``apply_vcr_auto_marker_to_items``) handles
|
||||
# the vast majority of respx-vs-vcrpy conflicts automatically. The only
|
||||
# entry below is the persister's own unit-test file, which exercises
|
||||
# ``save_cassette`` / ``load_cassette`` against fakeredis and must not
|
||||
# itself run under a live cassette context.
|
||||
_VCR_AUTO_MARKER_SKIP_FILES = frozenset({"test_vcr_redis_persister.py"})
|
||||
|
||||
# Tests that observe live cross-call provider state (e.g. prompt-cache
|
||||
# warm-up between two consecutive calls); replay can't reproduce that state.
|
||||
@@ -73,6 +66,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -85,6 +79,11 @@ def pytest_runtest_logreport(report):
|
||||
_verbose_state.maybe_emit_verdict(report)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Capture TRUE defaults at conftest import time (before test modules pollute).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,497 @@
|
||||
"""Unit tests for the VCR classification + observability layer.
|
||||
|
||||
Covers:
|
||||
- per-item respx detection (module scan, marker, fixture)
|
||||
- skip-reason tagging in ``apply_vcr_auto_marker_to_items``
|
||||
- verdict classification (HIT / MISS:RECORDED / MISS:OVERFLOW / MISS:NOT_PERSISTED /
|
||||
PARTIAL / NOOP / UNMARKED:LIVE_CALL / UNMARKED:NO_TRAFFIC)
|
||||
- AWS SigV4 fingerprint stability
|
||||
- session-end summary rendering
|
||||
- live-call host classification
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
||||
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
SKIP_REASON_FILE_OPT_OUT,
|
||||
SKIP_REASON_INCOMPATIBLE,
|
||||
SKIP_REASON_PRE_MARKED,
|
||||
SKIP_REASON_RESPX,
|
||||
SKIP_REASON_RESPX_MODULE,
|
||||
VCR_SKIP_REASON_USER_ATTR,
|
||||
VERDICT_HIT,
|
||||
VERDICT_MISS_NOT_PERSISTED,
|
||||
VERDICT_MISS_OVERFLOW,
|
||||
VERDICT_MISS_RECORDED,
|
||||
VERDICT_NOOP_NO_TRAFFIC,
|
||||
VERDICT_PARTIAL,
|
||||
VERDICT_UNMARKED_LIVE_CALL,
|
||||
VERDICT_UNMARKED_NO_TRAFFIC,
|
||||
_RESPX_MODULE_CACHE,
|
||||
_classify_marked_test,
|
||||
_compute_key_fingerprint,
|
||||
_is_live_call_host,
|
||||
_reset_session_stats,
|
||||
_stable_key_value,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
session_stats_snapshot,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test doubles
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _StubItem:
|
||||
"""Pytest item double sufficient for the auto-marker logic."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
nodeid: str,
|
||||
path: str,
|
||||
*,
|
||||
markers: Optional[list[str]] = None,
|
||||
fixturenames: Optional[list[str]] = None,
|
||||
module=None,
|
||||
) -> None:
|
||||
self.nodeid = nodeid
|
||||
self.path = path
|
||||
self._markers = list(markers or [])
|
||||
self.fixturenames = list(fixturenames or [])
|
||||
self.module = module
|
||||
self.user_properties: list = []
|
||||
|
||||
def get_closest_marker(self, name: str):
|
||||
return name if name in self._markers else None
|
||||
|
||||
def add_marker(self, marker):
|
||||
# ``pytest.mark.vcr`` is a MarkDecorator; rely on its ``name``.
|
||||
name = getattr(marker, "name", str(marker))
|
||||
self._markers.append(name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vcr_enabled(monkeypatch):
|
||||
monkeypatch.setenv("CASSETTE_REDIS_URL", "redis://stub")
|
||||
monkeypatch.delenv("LITELLM_VCR_DISABLE", raising=False)
|
||||
monkeypatch.delenv("PYTEST_XDIST_WORKER", raising=False)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_module_caches():
|
||||
_reset_session_stats()
|
||||
_RESPX_MODULE_CACHE.clear()
|
||||
yield
|
||||
_reset_session_stats()
|
||||
_RESPX_MODULE_CACHE.clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AWS SigV4 fingerprint stability — the Bedrock cassette overflow root cause
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_should_extract_only_aws_access_key_from_sigv4_authorization():
|
||||
"""Two Bedrock requests with the same access key but different
|
||||
timestamps and signatures must produce the same fingerprint, otherwise
|
||||
every CI run pushes a new episode into the cassette."""
|
||||
auth_today = (
|
||||
"AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE12345/20260512/us-east-1/"
|
||||
"bedrock/aws4_request, SignedHeaders=host;x-amz-date, "
|
||||
"Signature=AAAAAAAA"
|
||||
)
|
||||
auth_tomorrow = (
|
||||
"AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE12345/20260513/us-east-1/"
|
||||
"bedrock/aws4_request, SignedHeaders=host;x-amz-date, "
|
||||
"Signature=BBBBBBBB"
|
||||
)
|
||||
today = _stable_key_value("Authorization", auth_today)
|
||||
tomorrow = _stable_key_value("Authorization", auth_tomorrow)
|
||||
assert today == tomorrow == "aws-sigv4:AKIAEXAMPLE12345"
|
||||
|
||||
|
||||
def test_should_keep_bearer_authorization_unchanged():
|
||||
"""OpenAI ``Bearer <key>`` headers are stable as-is — keep them."""
|
||||
out = _stable_key_value("Authorization", "Bearer sk-1234")
|
||||
assert out == "Bearer sk-1234"
|
||||
|
||||
|
||||
def test_should_produce_stable_fingerprint_across_sigv4_signatures():
|
||||
"""``_compute_key_fingerprint`` should not change when only the SigV4
|
||||
signature/timestamp rotates."""
|
||||
req_a = SimpleNamespace(
|
||||
headers={
|
||||
"authorization": (
|
||||
"AWS4-HMAC-SHA256 Credential=AKIA1/20260101/us-east-1/"
|
||||
"bedrock/aws4_request, SignedHeaders=host, Signature=AAA"
|
||||
)
|
||||
}
|
||||
)
|
||||
req_b = SimpleNamespace(
|
||||
headers={
|
||||
"authorization": (
|
||||
"AWS4-HMAC-SHA256 Credential=AKIA1/20260512/us-east-1/"
|
||||
"bedrock/aws4_request, SignedHeaders=host;x-amz-date, "
|
||||
"Signature=ZZZ"
|
||||
)
|
||||
}
|
||||
)
|
||||
assert _compute_key_fingerprint(req_a) == _compute_key_fingerprint(req_b)
|
||||
|
||||
|
||||
def test_should_distinguish_different_aws_access_keys():
|
||||
"""Two different access keys must produce different fingerprints so
|
||||
cassettes recorded under one identity never serve another."""
|
||||
req_a = SimpleNamespace(
|
||||
headers={
|
||||
"authorization": "AWS4-HMAC-SHA256 Credential=AKIAONE/x/y/z/aws4_request, Signature=A"
|
||||
}
|
||||
)
|
||||
req_b = SimpleNamespace(
|
||||
headers={
|
||||
"authorization": "AWS4-HMAC-SHA256 Credential=AKIATWO/x/y/z/aws4_request, Signature=A"
|
||||
}
|
||||
)
|
||||
assert _compute_key_fingerprint(req_a) != _compute_key_fingerprint(req_b)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Live-call host classification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"host,expected",
|
||||
[
|
||||
("api.openai.com", True),
|
||||
("api.anthropic.com", True),
|
||||
("bedrock-runtime.us-east-1.amazonaws.com", False),
|
||||
("api.us-east-1.bedrock-runtime.amazonaws.com", True),
|
||||
("foo.bar.openai.com", True),
|
||||
("127.0.0.1", False),
|
||||
("localhost", False),
|
||||
("10.0.0.1", False),
|
||||
("172.16.0.1", False),
|
||||
("redis.example.com", False),
|
||||
("", False),
|
||||
],
|
||||
)
|
||||
def test_should_classify_live_call_hosts(host, expected):
|
||||
assert _is_live_call_host(host) is expected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Verdict classification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _cassette(played: int, dirty: bool, total: int):
|
||||
class _Sized:
|
||||
def __init__(self, n):
|
||||
self.n = n
|
||||
self.play_count = played
|
||||
self.dirty = dirty
|
||||
|
||||
def __len__(self):
|
||||
return self.n
|
||||
|
||||
return _Sized(total)
|
||||
|
||||
|
||||
def test_should_classify_pure_replay_as_hit():
|
||||
assert (
|
||||
_classify_marked_test(_cassette(played=3, dirty=False, total=3)) == VERDICT_HIT
|
||||
)
|
||||
|
||||
|
||||
def test_should_classify_no_traffic_as_noop():
|
||||
assert (
|
||||
_classify_marked_test(_cassette(played=0, dirty=False, total=0))
|
||||
== VERDICT_NOOP_NO_TRAFFIC
|
||||
)
|
||||
|
||||
|
||||
def test_should_classify_pure_record_as_miss_recorded():
|
||||
assert (
|
||||
_classify_marked_test(_cassette(played=0, dirty=True, total=1))
|
||||
== VERDICT_MISS_RECORDED
|
||||
)
|
||||
|
||||
|
||||
def test_should_classify_mixed_replay_and_record_as_partial():
|
||||
assert (
|
||||
_classify_marked_test(_cassette(played=2, dirty=True, total=4))
|
||||
== VERDICT_PARTIAL
|
||||
)
|
||||
|
||||
|
||||
def test_should_classify_overflow_as_miss_overflow_regardless_of_play_state():
|
||||
"""Cassettes that exceed ``MAX_EPISODES_PER_CASSETTE`` (50) are
|
||||
refused for save — they will hit live every CI run, so the verdict
|
||||
must override HIT/PARTIAL classification."""
|
||||
assert (
|
||||
_classify_marked_test(_cassette(played=0, dirty=True, total=51))
|
||||
== VERDICT_MISS_OVERFLOW
|
||||
)
|
||||
assert (
|
||||
_classify_marked_test(_cassette(played=10, dirty=True, total=52))
|
||||
== VERDICT_MISS_OVERFLOW
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# apply_vcr_auto_marker_to_items: skip-reason tagging
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_module_with_source(tmp_path, src: str, name: str):
|
||||
p = tmp_path / f"{name}.py"
|
||||
p.write_text(src)
|
||||
mod = SimpleNamespace(__file__=str(p))
|
||||
return mod, str(p)
|
||||
|
||||
|
||||
def test_should_apply_vcr_marker_to_clean_test(vcr_enabled, tmp_path):
|
||||
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "clean")
|
||||
item = _StubItem("clean.py::test_x", p, module=mod)
|
||||
apply_vcr_auto_marker_to_items([item])
|
||||
assert item.get_closest_marker("vcr") == "vcr"
|
||||
|
||||
|
||||
def test_should_skip_per_item_when_respx_marker_present(vcr_enabled, tmp_path):
|
||||
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "respx_marker")
|
||||
item = _StubItem("respx_marker.py::test_x", p, markers=["respx"], module=mod)
|
||||
apply_vcr_auto_marker_to_items([item])
|
||||
assert item.get_closest_marker("vcr") is None
|
||||
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_RESPX
|
||||
|
||||
|
||||
def test_should_skip_per_item_when_respx_mock_fixture_present(vcr_enabled, tmp_path):
|
||||
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "respx_fixture")
|
||||
item = _StubItem(
|
||||
"respx_fixture.py::test_x", p, fixturenames=["respx_mock"], module=mod
|
||||
)
|
||||
apply_vcr_auto_marker_to_items([item])
|
||||
assert item.get_closest_marker("vcr") is None
|
||||
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_RESPX
|
||||
|
||||
|
||||
def test_should_tag_pre_marked_items_so_summary_can_show_them(vcr_enabled, tmp_path):
|
||||
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "premarked")
|
||||
item = _StubItem("premarked.py::test_x", p, markers=["vcr"], module=mod)
|
||||
apply_vcr_auto_marker_to_items([item])
|
||||
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_PRE_MARKED
|
||||
|
||||
|
||||
def test_should_tag_skip_files_with_respx_module_when_module_actually_uses_respx(
|
||||
vcr_enabled, tmp_path
|
||||
):
|
||||
"""A file in ``skip_files`` whose module *does* call respx should be
|
||||
labeled as a real conflict (respx_conflict_module), not a dead opt-out."""
|
||||
mod, p = _make_module_with_source(
|
||||
tmp_path,
|
||||
"import respx\n@pytest.mark.respx\ndef test_x(): pass\n",
|
||||
"real_respx",
|
||||
)
|
||||
item = _StubItem("real_respx.py::test_x", p, module=mod)
|
||||
apply_vcr_auto_marker_to_items([item], skip_files={"real_respx.py"})
|
||||
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_RESPX_MODULE
|
||||
|
||||
|
||||
def test_should_tag_skip_files_with_file_opt_out_when_module_does_not_use_respx(
|
||||
vcr_enabled, tmp_path
|
||||
):
|
||||
"""A file in ``skip_files`` whose module never wires up respx is a
|
||||
dead skip-list entry — surface it so we can prune."""
|
||||
mod, p = _make_module_with_source(
|
||||
tmp_path,
|
||||
"from respx import MockRouter # dead import\ndef test_x(): pass\n",
|
||||
"dead_skip",
|
||||
)
|
||||
item = _StubItem("dead_skip.py::test_x", p, module=mod)
|
||||
apply_vcr_auto_marker_to_items([item], skip_files={"dead_skip.py"})
|
||||
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_FILE_OPT_OUT
|
||||
|
||||
|
||||
def test_should_tag_nodeid_suffix_skips_as_incompatible(vcr_enabled, tmp_path):
|
||||
mod, p = _make_module_with_source(tmp_path, "def test_x(): pass\n", "incompat")
|
||||
item = _StubItem("incompat.py::test_prompt_caching", p, module=mod)
|
||||
apply_vcr_auto_marker_to_items(
|
||||
[item], skip_nodeid_suffixes=("::test_prompt_caching",)
|
||||
)
|
||||
assert getattr(item, VCR_SKIP_REASON_USER_ATTR) == SKIP_REASON_INCOMPATIBLE
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Session-end summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeReporter:
|
||||
def __init__(self):
|
||||
self.lines: list[str] = []
|
||||
|
||||
def write_sep(self, sep, title="", **kwargs):
|
||||
self.lines.append(f"=== {title}" if title else "===")
|
||||
|
||||
def write_line(self, line):
|
||||
self.lines.append(line)
|
||||
|
||||
@property
|
||||
def output(self):
|
||||
return "\n".join(self.lines)
|
||||
|
||||
|
||||
def test_should_render_overflow_section_when_any_test_overflowed(vcr_enabled):
|
||||
"""The OVERFLOW section is the cost-leak signal: if it's empty, no
|
||||
cassettes are silently being refused; if it's not empty, those tests
|
||||
re-bill on every run."""
|
||||
request = SimpleNamespace(
|
||||
node=SimpleNamespace(
|
||||
nodeid="t::overflow",
|
||||
user_properties=[],
|
||||
rep_call=SimpleNamespace(passed=True),
|
||||
)
|
||||
)
|
||||
cassette = _cassette(played=0, dirty=True, total=51)
|
||||
cassette._path = None # avoid mark_test_outcome side-effects
|
||||
record_vcr_outcome(request, cassette)
|
||||
|
||||
reporter = _FakeReporter()
|
||||
emit_vcr_classification_summary(reporter)
|
||||
assert "VCR CACHE CLASSIFICATION SUMMARY" in reporter.output
|
||||
assert "VCR MISS:OVERFLOW" in reporter.output
|
||||
assert "CASSETTE OVERFLOW" in reporter.output
|
||||
assert "t::overflow" in reporter.output
|
||||
|
||||
|
||||
def test_should_render_unmarked_live_call_section_with_hosts(vcr_enabled):
|
||||
request_node = SimpleNamespace(
|
||||
nodeid="t::leak",
|
||||
user_properties=[],
|
||||
rep_call=SimpleNamespace(passed=True),
|
||||
)
|
||||
setattr(request_node, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_RESPX)
|
||||
setattr(request_node, "vcr_live_call_hosts", ["api.openai.com"])
|
||||
request = SimpleNamespace(node=request_node)
|
||||
|
||||
record_vcr_outcome(request, None)
|
||||
|
||||
snap = session_stats_snapshot()
|
||||
assert snap["unmarked_live_call_tests"] == [("t::leak", ["api.openai.com"])]
|
||||
assert snap["verdict_counts"][VERDICT_UNMARKED_LIVE_CALL] == 1
|
||||
|
||||
reporter = _FakeReporter()
|
||||
emit_vcr_classification_summary(reporter)
|
||||
assert "UNMARKED TESTS WITH LIVE API CALLS" in reporter.output
|
||||
assert "api.openai.com" in reporter.output
|
||||
assert "t::leak" in reporter.output
|
||||
|
||||
|
||||
def test_should_record_unmarked_no_traffic_when_test_skipped_vcr_but_did_not_call_out(
|
||||
vcr_enabled,
|
||||
):
|
||||
request_node = SimpleNamespace(
|
||||
nodeid="t::clean_skip",
|
||||
user_properties=[],
|
||||
rep_call=SimpleNamespace(passed=True),
|
||||
)
|
||||
setattr(request_node, VCR_SKIP_REASON_USER_ATTR, SKIP_REASON_INCOMPATIBLE)
|
||||
request = SimpleNamespace(node=request_node)
|
||||
|
||||
record_vcr_outcome(request, None)
|
||||
|
||||
snap = session_stats_snapshot()
|
||||
assert snap["verdict_counts"][VERDICT_UNMARKED_NO_TRAFFIC] == 1
|
||||
assert snap["skip_reason_counts"][SKIP_REASON_INCOMPATIBLE] == 1
|
||||
|
||||
|
||||
def test_should_demote_miss_recorded_to_not_persisted_when_test_failed(vcr_enabled):
|
||||
"""If a test failed, ``save_cassette`` skips persisting — that means
|
||||
the next CI run will hit live again. The verdict must reflect that."""
|
||||
request = SimpleNamespace(
|
||||
node=SimpleNamespace(
|
||||
nodeid="t::failed",
|
||||
user_properties=[],
|
||||
rep_call=SimpleNamespace(passed=False),
|
||||
)
|
||||
)
|
||||
cassette = _cassette(played=0, dirty=True, total=1)
|
||||
cassette._path = None
|
||||
record_vcr_outcome(request, cassette)
|
||||
|
||||
snap = session_stats_snapshot()
|
||||
assert snap["verdict_counts"].get(VERDICT_MISS_NOT_PERSISTED) == 1
|
||||
|
||||
|
||||
def test_should_emit_no_summary_when_no_tests_observed(vcr_enabled):
|
||||
reporter = _FakeReporter()
|
||||
emit_vcr_classification_summary(reporter)
|
||||
assert reporter.output == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Live-call probe
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_should_skip_live_probe_when_vcr_active(vcr_enabled):
|
||||
"""When the test *is* VCR-marked (cassette truthy), we don't install
|
||||
the probe — vcrpy intercepts above the socket layer, so any
|
||||
'connection' would be vcrpy's own bookkeeping and not real spend."""
|
||||
request = SimpleNamespace(node=SimpleNamespace(), addfinalizer=lambda fn: None)
|
||||
fake_cassette = SimpleNamespace(play_count=0, dirty=False)
|
||||
probe = install_live_call_probe(request, fake_cassette)
|
||||
assert probe is None
|
||||
|
||||
|
||||
def test_live_call_probe_records_known_llm_hosts(vcr_enabled, monkeypatch):
|
||||
"""The probe should record outbound TCP connections to known LLM
|
||||
provider hosts (and ignore localhost / RFC1918 / unknown hosts)."""
|
||||
finalizers = []
|
||||
|
||||
class _Node:
|
||||
pass
|
||||
|
||||
request = SimpleNamespace(
|
||||
node=_Node(), addfinalizer=lambda fn: finalizers.append(fn)
|
||||
)
|
||||
probe = install_live_call_probe(request, None)
|
||||
assert probe is not None
|
||||
|
||||
import socket
|
||||
|
||||
# Manually invoke the patched function — we don't actually open a
|
||||
# connection because that would hit the network. The probe records
|
||||
# at the *call site* before delegating, and the original
|
||||
# ``socket.create_connection`` will then fail; we swallow that.
|
||||
try:
|
||||
socket.create_connection(("api.openai.com", 443), timeout=0.001)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
socket.create_connection(("127.0.0.1", 6379), timeout=0.001)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Restore via finalizers before asserting so the rest of the test
|
||||
# session is unaffected.
|
||||
for fn in finalizers:
|
||||
fn()
|
||||
|
||||
hosts = getattr(request.node, "vcr_live_call_hosts", [])
|
||||
assert "api.openai.com" in hosts
|
||||
assert "127.0.0.1" not in hosts
|
||||
@@ -25,20 +25,21 @@ import litellm
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
)
|
||||
|
||||
# vcrpy and respx both patch the httpx transport — applying both makes one
|
||||
# silently win, so respx-using files opt out of the auto-marker.
|
||||
_RESPX_CONFLICTING_FILES = frozenset(
|
||||
{
|
||||
"test_router.py",
|
||||
"test_amazing_vertex_completion.py",
|
||||
"test_azure_openai.py",
|
||||
}
|
||||
)
|
||||
# Per-item respx detection (``apply_vcr_auto_marker_to_items``) auto-skips
|
||||
# tests whose ``@pytest.mark.respx`` marker or ``respx_mock`` fixture
|
||||
# would conflict with vcrpy's transport patch. We no longer maintain a
|
||||
# file-level ``_RESPX_CONFLICTING_FILES`` list here — the previous
|
||||
# entries (``test_router.py``) had only a stale ``from respx import
|
||||
# MockRouter`` import with no actual respx wiring, so file-level
|
||||
# blacklisting was masking valid cache opportunities.
|
||||
|
||||
# Files where VCR replay breaks the test:
|
||||
# - ``test_assistants.py``: polls fresh per-session run IDs that no cassette
|
||||
@@ -76,6 +77,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -88,6 +90,11 @@ def pytest_runtest_logreport(report):
|
||||
_verbose_state.maybe_emit_verdict(report)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Capture TRUE defaults at conftest import time. This runs before any test
|
||||
# module's top-level code (e.g. `litellm.num_retries = 3`) executes, so
|
||||
@@ -215,7 +222,7 @@ def setup_and_teardown():
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
apply_vcr_auto_marker_to_items(
|
||||
items,
|
||||
skip_files=_RESPX_CONFLICTING_FILES | _VCR_INCOMPATIBLE_FILES,
|
||||
skip_files=_VCR_INCOMPATIBLE_FILES,
|
||||
skip_nodeid_suffixes=_VCR_INCOMPATIBLE_NODEID_SUFFIXES,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,6 +22,9 @@ import litellm
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -69,6 +72,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -220,3 +224,8 @@ def pytest_collection_modifyitems(config, items):
|
||||
|
||||
# Reorder the items list
|
||||
items[:] = custom_logger_tests + other_tests
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -15,6 +15,9 @@ sys.path.insert(0, os.path.abspath("../.."))
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -41,6 +44,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -55,3 +59,8 @@ def pytest_runtest_logreport(report):
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
apply_vcr_auto_marker_to_items(items)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -8,6 +8,9 @@ sys.path.insert(0, os.path.abspath("../.."))
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -34,6 +37,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -48,3 +52,8 @@ def pytest_runtest_logreport(report):
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
apply_vcr_auto_marker_to_items(items)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -15,6 +15,9 @@ import litellm # noqa: E402,F401
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -87,6 +90,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -114,3 +118,8 @@ def pytest_collection_modifyitems(config, items):
|
||||
|
||||
# Reorder the items list
|
||||
items[:] = custom_logger_tests + other_tests
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -16,6 +16,9 @@ sys.path.insert(0, os.path.abspath("../.."))
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -42,6 +45,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -56,3 +60,8 @@ def pytest_runtest_logreport(report):
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
apply_vcr_auto_marker_to_items(items)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
@@ -15,6 +15,9 @@ import litellm # noqa: E402,F401
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VerboseReporterState,
|
||||
apply_vcr_auto_marker_to_items,
|
||||
emit_cassette_cache_session_banner,
|
||||
emit_vcr_classification_summary,
|
||||
install_live_call_probe,
|
||||
record_vcr_outcome,
|
||||
register_persister_if_enabled,
|
||||
vcr_config_dict,
|
||||
@@ -74,6 +77,7 @@ def pytest_runtest_makereport(item, call):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _vcr_outcome_gate(request, vcr):
|
||||
install_live_call_probe(request, vcr)
|
||||
yield
|
||||
record_vcr_outcome(request, vcr)
|
||||
|
||||
@@ -101,3 +105,8 @@ def pytest_collection_modifyitems(config, items):
|
||||
|
||||
# Reorder the items list
|
||||
items[:] = custom_logger_tests + other_tests
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
||||
emit_cassette_cache_session_banner(terminalreporter)
|
||||
emit_vcr_classification_summary(terminalreporter)
|
||||
|
||||
Reference in New Issue
Block a user