Files
litellm/tests/test_litellm/caching/test_dual_cache.py
T
Ishaan Jaff 2ea9e207bd Litellm ishaan march 20 (#24303)
* feat(redis): add circuit breaker to RedisCache to fast-fail when Redis is down (#24181)

* feat(redis): add circuit breaker env var constants

* feat(redis): add RedisCircuitBreaker and apply guard decorator to all async ops

* fix(dual_cache): fall back to L1 instead of re-raising on Redis increment failures

* test(caching): add circuit breaker unit tests

* fix(redis): fast-fail concurrent HALF_OPEN probes — only one probe at a time

* fix(dual_cache): return None fallback when in_memory_cache is absent and Redis fails

* test(caching): add regression tests for HALF_OPEN concurrency and None fallback

* Fix blocking sync next in __anext__ (#24177)

* Fix blocking sync next

* Update tests/test_litellm/litellm_core_utils/test_streaming_handler.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fix PEP 479 regression in __anext__ sync iterator exhaustion

asyncio.to_thread re-raises thread exceptions inside a coroutine, where
PEP 479 converts StopIteration to RuntimeError before any except clause
can catch it. Add _next_sync_or_exhausted() module-level helper that
catches StopIteration in the thread and returns a sentinel instead, then
raise StopAsyncIteration in the coroutine.

Also rewrites the non-blocking test to use asyncio.gather() instead of
asyncio.create_task() (which returned None on Python 3.9 / pytest-asyncio
in CI), and adds an exhaustion regression test that drains the wrapper
fully and asserts no RuntimeError leaks out.

---------

Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* feat: add git-subdir source type to claude-code/plugins API (#24223)

Support a third plugin source type `git-subdir` alongside the existing
`github` and `url` types, as documented in the official Claude Code
plugin marketplaces spec.

New format: {"source": "git-subdir", "url": "...", "path": "subdir/path"}

- Validates url and path fields are present and non-empty
- Rejects absolute paths, '..' segments, backslashes, and percent-encoded
  traversal sequences (including double-encoded variants via regex check)
- Extracts path validation into _validate_git_subdir_path() helper
- Updates Pydantic field description to document all three source types
- Adds isValidUrl() check for url/git-subdir source types in the UI form
- Adds "Git Subdir" option to the UI form with a required Path field
- Adds unit tests covering success, update, missing/empty fields,
  path traversal variants, and unknown source type

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>

* [FEAT] add extract_header and extract_footer to Mistral OCR supported params (#24213)

* docs: add git-subdir source type to claude-code plugin marketplace docs (#24289)

* fix(ui): swap J/K keyboard navigation in log details drawer (#24279) (#24286)

J should navigate down (next) and K should navigate up (previous),
matching vim/standard conventions.

* fix: use async_set_cache in user_api_key_auth hot path (#24302)

* fix: use async_set_cache in auth hot path to avoid blocking event loop

* test: assert no blocking set_cache call in _user_api_key_auth_builder

* test: broaden blocking call check to all sync DualCache methods

* test: fix regression test to actually catch blocking cache calls

* fix: ruff lint unused variable + UI build MessageManager error

- litellm/caching/redis_cache.py: remove unused variable 'e' in circuit
  breaker exception handler (F841)
- add_plugin_form.tsx: use MessageManager.error() instead of undefined
  message.error() for git URL validation

Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>

* docs: add REDIS_CIRCUIT_BREAKER env vars to config_settings reference

Add REDIS_CIRCUIT_BREAKER_FAILURE_THRESHOLD and
REDIS_CIRCUIT_BREAKER_RECOVERY_TIMEOUT to the environment variables
reference table so test_env_keys.py passes.

Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>

---------

Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Vincenzo Barrea <manamana88@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Robert Kirscht <rkirscht242@gmail.com>
Co-authored-by: Imgyu Kim <kimimgo@gmail.com>
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>
2026-03-21 12:40:11 -07:00

261 lines
9.0 KiB
Python

import asyncio
import time
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from litellm.caching.dual_cache import DualCache
from litellm.caching.in_memory_cache import InMemoryCache
from litellm.caching.redis_cache import RedisCache
@pytest.mark.asyncio
async def test_dual_cache_async_batch_get_cache_coalesces_concurrent_redis_reads():
dual_cache = DualCache(
redis_cache=MagicMock(spec=RedisCache), default_redis_batch_cache_expiry=10
)
keys = ["shared_a", "shared_b"]
start_gate = asyncio.Event()
async def _mock_async_batch_get_cache(key_list, parent_otel_span=None):
await asyncio.sleep(0.05)
return {k: None for k in key_list}
with patch.object(
dual_cache.redis_cache,
"async_batch_get_cache",
new=AsyncMock(side_effect=_mock_async_batch_get_cache),
) as mock_async_batch_get_cache:
async def worker():
await start_gate.wait()
return await dual_cache.async_batch_get_cache(keys=keys)
tasks = [asyncio.create_task(worker()) for _ in range(50)]
start_gate.set()
await asyncio.gather(*tasks)
assert mock_async_batch_get_cache.call_count == 1
@pytest.mark.asyncio
async def test_dual_cache_async_batch_get_cache_rolls_back_redis_reservation_on_error():
dual_cache = DualCache(
redis_cache=MagicMock(spec=RedisCache), default_redis_batch_cache_expiry=10
)
keys = ["shared_a", "shared_b"]
with patch.object(
dual_cache.redis_cache,
"async_batch_get_cache",
new=AsyncMock(side_effect=RuntimeError("redis unavailable")),
) as mock_async_batch_get_cache:
first_result = await dual_cache.async_batch_get_cache(keys=keys)
second_result = await dual_cache.async_batch_get_cache(keys=keys)
assert first_result is None
assert second_result is None
assert mock_async_batch_get_cache.call_count == 2
assert "shared_a" not in dual_cache.last_redis_batch_access_time
assert "shared_b" not in dual_cache.last_redis_batch_access_time
@pytest.mark.asyncio
async def test_dual_cache_async_set_cache_injects_default_in_memory_ttl():
"""
Test that async_set_cache injects default_in_memory_ttl into kwargs
when no explicit ttl is provided, matching the sync set_cache behavior.
Regression test for: async_set_cache was missing the TTL injection that
sync set_cache has, causing InMemoryCache to use its own default_ttl (600s)
instead of DualCache's default_in_memory_ttl.
"""
in_memory_cache = InMemoryCache(default_ttl=600)
dual_cache = DualCache(
in_memory_cache=in_memory_cache,
default_in_memory_ttl=60,
)
before = time.time()
await dual_cache.async_set_cache(key="test_key", value="test_value")
after = time.time()
# The TTL stored should reflect default_in_memory_ttl (60s), not
# InMemoryCache's default_ttl (600s)
expiry = in_memory_cache.ttl_dict["test_key"]
assert expiry >= before + 60
assert expiry <= after + 60
@pytest.mark.asyncio
async def test_dual_cache_async_set_cache_respects_explicit_ttl():
"""
Test that async_set_cache does NOT override an explicitly provided ttl.
"""
in_memory_cache = InMemoryCache(default_ttl=600)
dual_cache = DualCache(
in_memory_cache=in_memory_cache,
default_in_memory_ttl=60,
)
before = time.time()
await dual_cache.async_set_cache(key="test_key", value="test_value", ttl=30)
after = time.time()
# The explicit ttl=30 should be used, not default_in_memory_ttl (60)
expiry = in_memory_cache.ttl_dict["test_key"]
assert expiry >= before + 30
assert expiry <= after + 30
@pytest.mark.asyncio
async def test_dual_cache_async_set_cache_pipeline_injects_default_in_memory_ttl():
"""
Test that async_set_cache_pipeline injects default_in_memory_ttl into kwargs
when no explicit ttl is provided.
"""
in_memory_cache = InMemoryCache(default_ttl=600)
dual_cache = DualCache(
in_memory_cache=in_memory_cache,
default_in_memory_ttl=60,
)
cache_list = [("key_a", "value_a"), ("key_b", "value_b")]
before = time.time()
await dual_cache.async_set_cache_pipeline(cache_list=cache_list)
after = time.time()
for key in ["key_a", "key_b"]:
expiry = in_memory_cache.ttl_dict[key]
assert expiry >= before + 60
assert expiry <= after + 60
@pytest.mark.asyncio
async def test_dual_cache_sync_and_async_set_cache_use_same_ttl():
"""
Test that sync set_cache and async async_set_cache produce the same TTL
when no explicit ttl is provided, ensuring parity between the two paths.
"""
in_memory_sync = InMemoryCache(default_ttl=600)
dual_cache_sync = DualCache(
in_memory_cache=in_memory_sync,
default_in_memory_ttl=60,
)
in_memory_async = InMemoryCache(default_ttl=600)
dual_cache_async = DualCache(
in_memory_cache=in_memory_async,
default_in_memory_ttl=60,
)
dual_cache_sync.set_cache(key="test_key", value="test_value")
await dual_cache_async.async_set_cache(key="test_key", value="test_value")
sync_expiry = in_memory_sync.ttl_dict["test_key"]
async_expiry = in_memory_async.ttl_dict["test_key"]
# Both should use default_in_memory_ttl=60, so their expiry times
# should be within a small tolerance of each other
assert abs(sync_expiry - async_expiry) < 1.0
def test_circuit_breaker_opens_after_threshold():
"""Circuit opens after N consecutive Redis failures."""
from litellm.caching.redis_cache import RedisCircuitBreaker
cb = RedisCircuitBreaker(failure_threshold=3, recovery_timeout=60)
for _ in range(3):
cb.record_failure()
assert cb._state == "open"
@pytest.mark.asyncio
async def test_circuit_breaker_open_skips_redis():
"""When circuit is open, the guard decorator raises immediately without calling the method."""
from litellm.caching.redis_cache import (
RedisCircuitBreaker,
_redis_circuit_breaker_guard,
)
class FakeRedis:
def __init__(self):
self._circuit_breaker = RedisCircuitBreaker(
failure_threshold=3, recovery_timeout=60
)
self._circuit_breaker._state = "open"
self._circuit_breaker._opened_at = time.time()
self.call_count = 0
@_redis_circuit_breaker_guard
async def do_thing(self):
self.call_count += 1
return "result"
fr = FakeRedis()
with pytest.raises(Exception, match="circuit breaker is open"):
await fr.do_thing()
assert fr.call_count == 0 # method body never executed
def test_circuit_breaker_closes_on_recovery():
"""After recovery_timeout expires, probe is allowed and success closes the circuit."""
from litellm.caching.redis_cache import RedisCircuitBreaker
cb = RedisCircuitBreaker(failure_threshold=3, recovery_timeout=60)
cb._state = "open"
cb._opened_at = time.time() - 9999 # recovery timeout long expired
# is_open() should return False to allow a probe through, and transition to HALF_OPEN
assert cb.is_open() is False
assert cb._state == "half_open"
# Successful probe closes the circuit
cb.record_success()
assert cb._state == "closed"
def test_circuit_breaker_half_open_concurrent_calls_are_fast_failed():
"""
Regression test: only ONE probe gets through when the circuit transitions
OPEN → HALF_OPEN. All concurrent callers that check is_open() while the
state is already HALF_OPEN must be fast-failed (return True), not allowed
through as additional probes.
"""
from litellm.caching.redis_cache import RedisCircuitBreaker
cb = RedisCircuitBreaker(failure_threshold=3, recovery_timeout=60)
cb._state = "open"
cb._opened_at = time.time() - 9999 # recovery timeout long expired
# First caller: OPEN + expired → transitions to HALF_OPEN, returns False (probe)
assert cb.is_open() is False
assert cb._state == "half_open"
# All subsequent concurrent callers: HALF_OPEN → fast-fail (return True)
for _ in range(10):
assert cb.is_open() is True, "concurrent callers should be fast-failed in HALF_OPEN"
@pytest.mark.asyncio
async def test_async_increment_cache_returns_none_when_no_in_memory_cache_and_redis_fails():
"""
Regression test: when in_memory_cache is None and Redis fails, async_increment_cache
must return None — not the raw increment delta — to avoid silently miscalculating
rate-limit counters.
"""
dc = DualCache()
dc.in_memory_cache = None # type: ignore[assignment] # constructor always creates InMemoryCache, so null it manually
dc.redis_cache = MagicMock()
dc.redis_cache.async_increment = AsyncMock(side_effect=Exception("redis down"))
result = await dc.async_increment_cache("rpm:model:14-05", 1.0, ttl=60)
assert result is None, (
f"Expected None when in_memory_cache is absent and Redis fails, got {result!r}. "
"Returning the delta (1.0) would silently miscalculate rate-limit counters."
)