mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-24 11:40:20 +00:00
2ea9e207bd
* feat(redis): add circuit breaker to RedisCache to fast-fail when Redis is down (#24181) * feat(redis): add circuit breaker env var constants * feat(redis): add RedisCircuitBreaker and apply guard decorator to all async ops * fix(dual_cache): fall back to L1 instead of re-raising on Redis increment failures * test(caching): add circuit breaker unit tests * fix(redis): fast-fail concurrent HALF_OPEN probes — only one probe at a time * fix(dual_cache): return None fallback when in_memory_cache is absent and Redis fails * test(caching): add regression tests for HALF_OPEN concurrency and None fallback * Fix blocking sync next in __anext__ (#24177) * Fix blocking sync next * Update tests/test_litellm/litellm_core_utils/test_streaming_handler.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix PEP 479 regression in __anext__ sync iterator exhaustion asyncio.to_thread re-raises thread exceptions inside a coroutine, where PEP 479 converts StopIteration to RuntimeError before any except clause can catch it. Add _next_sync_or_exhausted() module-level helper that catches StopIteration in the thread and returns a sentinel instead, then raise StopAsyncIteration in the coroutine. Also rewrites the non-blocking test to use asyncio.gather() instead of asyncio.create_task() (which returned None on Python 3.9 / pytest-asyncio in CI), and adds an exhaustion regression test that drains the wrapper fully and asserts no RuntimeError leaks out. --------- Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat: add git-subdir source type to claude-code/plugins API (#24223) Support a third plugin source type `git-subdir` alongside the existing `github` and `url` types, as documented in the official Claude Code plugin marketplaces spec. New format: {"source": "git-subdir", "url": "...", "path": "subdir/path"} - Validates url and path fields are present and non-empty - Rejects absolute paths, '..' segments, backslashes, and percent-encoded traversal sequences (including double-encoded variants via regex check) - Extracts path validation into _validate_git_subdir_path() helper - Updates Pydantic field description to document all three source types - Adds isValidUrl() check for url/git-subdir source types in the UI form - Adds "Git Subdir" option to the UI form with a required Path field - Adds unit tests covering success, update, missing/empty fields, path traversal variants, and unknown source type Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> * [FEAT] add extract_header and extract_footer to Mistral OCR supported params (#24213) * docs: add git-subdir source type to claude-code plugin marketplace docs (#24289) * fix(ui): swap J/K keyboard navigation in log details drawer (#24279) (#24286) J should navigate down (next) and K should navigate up (previous), matching vim/standard conventions. * fix: use async_set_cache in user_api_key_auth hot path (#24302) * fix: use async_set_cache in auth hot path to avoid blocking event loop * test: assert no blocking set_cache call in _user_api_key_auth_builder * test: broaden blocking call check to all sync DualCache methods * test: fix regression test to actually catch blocking cache calls * fix: ruff lint unused variable + UI build MessageManager error - litellm/caching/redis_cache.py: remove unused variable 'e' in circuit breaker exception handler (F841) - add_plugin_form.tsx: use MessageManager.error() instead of undefined message.error() for git URL validation Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com> * docs: add REDIS_CIRCUIT_BREAKER env vars to config_settings reference Add REDIS_CIRCUIT_BREAKER_FAILURE_THRESHOLD and REDIS_CIRCUIT_BREAKER_RECOVERY_TIMEOUT to the environment variables reference table so test_env_keys.py passes. Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com> --------- Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Vincenzo Barrea <manamana88@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Robert Kirscht <rkirscht242@gmail.com> Co-authored-by: Imgyu Kim <kimimgo@gmail.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>
261 lines
9.0 KiB
Python
261 lines
9.0 KiB
Python
import asyncio
|
|
import time
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from litellm.caching.dual_cache import DualCache
|
|
from litellm.caching.in_memory_cache import InMemoryCache
|
|
from litellm.caching.redis_cache import RedisCache
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_async_batch_get_cache_coalesces_concurrent_redis_reads():
|
|
dual_cache = DualCache(
|
|
redis_cache=MagicMock(spec=RedisCache), default_redis_batch_cache_expiry=10
|
|
)
|
|
keys = ["shared_a", "shared_b"]
|
|
start_gate = asyncio.Event()
|
|
|
|
async def _mock_async_batch_get_cache(key_list, parent_otel_span=None):
|
|
await asyncio.sleep(0.05)
|
|
return {k: None for k in key_list}
|
|
|
|
with patch.object(
|
|
dual_cache.redis_cache,
|
|
"async_batch_get_cache",
|
|
new=AsyncMock(side_effect=_mock_async_batch_get_cache),
|
|
) as mock_async_batch_get_cache:
|
|
|
|
async def worker():
|
|
await start_gate.wait()
|
|
return await dual_cache.async_batch_get_cache(keys=keys)
|
|
|
|
tasks = [asyncio.create_task(worker()) for _ in range(50)]
|
|
start_gate.set()
|
|
await asyncio.gather(*tasks)
|
|
|
|
assert mock_async_batch_get_cache.call_count == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_async_batch_get_cache_rolls_back_redis_reservation_on_error():
|
|
dual_cache = DualCache(
|
|
redis_cache=MagicMock(spec=RedisCache), default_redis_batch_cache_expiry=10
|
|
)
|
|
keys = ["shared_a", "shared_b"]
|
|
|
|
with patch.object(
|
|
dual_cache.redis_cache,
|
|
"async_batch_get_cache",
|
|
new=AsyncMock(side_effect=RuntimeError("redis unavailable")),
|
|
) as mock_async_batch_get_cache:
|
|
first_result = await dual_cache.async_batch_get_cache(keys=keys)
|
|
second_result = await dual_cache.async_batch_get_cache(keys=keys)
|
|
|
|
assert first_result is None
|
|
assert second_result is None
|
|
assert mock_async_batch_get_cache.call_count == 2
|
|
assert "shared_a" not in dual_cache.last_redis_batch_access_time
|
|
assert "shared_b" not in dual_cache.last_redis_batch_access_time
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_async_set_cache_injects_default_in_memory_ttl():
|
|
"""
|
|
Test that async_set_cache injects default_in_memory_ttl into kwargs
|
|
when no explicit ttl is provided, matching the sync set_cache behavior.
|
|
|
|
Regression test for: async_set_cache was missing the TTL injection that
|
|
sync set_cache has, causing InMemoryCache to use its own default_ttl (600s)
|
|
instead of DualCache's default_in_memory_ttl.
|
|
"""
|
|
in_memory_cache = InMemoryCache(default_ttl=600)
|
|
dual_cache = DualCache(
|
|
in_memory_cache=in_memory_cache,
|
|
default_in_memory_ttl=60,
|
|
)
|
|
|
|
before = time.time()
|
|
await dual_cache.async_set_cache(key="test_key", value="test_value")
|
|
after = time.time()
|
|
|
|
# The TTL stored should reflect default_in_memory_ttl (60s), not
|
|
# InMemoryCache's default_ttl (600s)
|
|
expiry = in_memory_cache.ttl_dict["test_key"]
|
|
assert expiry >= before + 60
|
|
assert expiry <= after + 60
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_async_set_cache_respects_explicit_ttl():
|
|
"""
|
|
Test that async_set_cache does NOT override an explicitly provided ttl.
|
|
"""
|
|
in_memory_cache = InMemoryCache(default_ttl=600)
|
|
dual_cache = DualCache(
|
|
in_memory_cache=in_memory_cache,
|
|
default_in_memory_ttl=60,
|
|
)
|
|
|
|
before = time.time()
|
|
await dual_cache.async_set_cache(key="test_key", value="test_value", ttl=30)
|
|
after = time.time()
|
|
|
|
# The explicit ttl=30 should be used, not default_in_memory_ttl (60)
|
|
expiry = in_memory_cache.ttl_dict["test_key"]
|
|
assert expiry >= before + 30
|
|
assert expiry <= after + 30
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_async_set_cache_pipeline_injects_default_in_memory_ttl():
|
|
"""
|
|
Test that async_set_cache_pipeline injects default_in_memory_ttl into kwargs
|
|
when no explicit ttl is provided.
|
|
"""
|
|
in_memory_cache = InMemoryCache(default_ttl=600)
|
|
dual_cache = DualCache(
|
|
in_memory_cache=in_memory_cache,
|
|
default_in_memory_ttl=60,
|
|
)
|
|
|
|
cache_list = [("key_a", "value_a"), ("key_b", "value_b")]
|
|
|
|
before = time.time()
|
|
await dual_cache.async_set_cache_pipeline(cache_list=cache_list)
|
|
after = time.time()
|
|
|
|
for key in ["key_a", "key_b"]:
|
|
expiry = in_memory_cache.ttl_dict[key]
|
|
assert expiry >= before + 60
|
|
assert expiry <= after + 60
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_sync_and_async_set_cache_use_same_ttl():
|
|
"""
|
|
Test that sync set_cache and async async_set_cache produce the same TTL
|
|
when no explicit ttl is provided, ensuring parity between the two paths.
|
|
"""
|
|
in_memory_sync = InMemoryCache(default_ttl=600)
|
|
dual_cache_sync = DualCache(
|
|
in_memory_cache=in_memory_sync,
|
|
default_in_memory_ttl=60,
|
|
)
|
|
|
|
in_memory_async = InMemoryCache(default_ttl=600)
|
|
dual_cache_async = DualCache(
|
|
in_memory_cache=in_memory_async,
|
|
default_in_memory_ttl=60,
|
|
)
|
|
|
|
dual_cache_sync.set_cache(key="test_key", value="test_value")
|
|
await dual_cache_async.async_set_cache(key="test_key", value="test_value")
|
|
|
|
sync_expiry = in_memory_sync.ttl_dict["test_key"]
|
|
async_expiry = in_memory_async.ttl_dict["test_key"]
|
|
|
|
# Both should use default_in_memory_ttl=60, so their expiry times
|
|
# should be within a small tolerance of each other
|
|
assert abs(sync_expiry - async_expiry) < 1.0
|
|
|
|
|
|
def test_circuit_breaker_opens_after_threshold():
|
|
"""Circuit opens after N consecutive Redis failures."""
|
|
from litellm.caching.redis_cache import RedisCircuitBreaker
|
|
|
|
cb = RedisCircuitBreaker(failure_threshold=3, recovery_timeout=60)
|
|
for _ in range(3):
|
|
cb.record_failure()
|
|
|
|
assert cb._state == "open"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_circuit_breaker_open_skips_redis():
|
|
"""When circuit is open, the guard decorator raises immediately without calling the method."""
|
|
from litellm.caching.redis_cache import (
|
|
RedisCircuitBreaker,
|
|
_redis_circuit_breaker_guard,
|
|
)
|
|
|
|
class FakeRedis:
|
|
def __init__(self):
|
|
self._circuit_breaker = RedisCircuitBreaker(
|
|
failure_threshold=3, recovery_timeout=60
|
|
)
|
|
self._circuit_breaker._state = "open"
|
|
self._circuit_breaker._opened_at = time.time()
|
|
self.call_count = 0
|
|
|
|
@_redis_circuit_breaker_guard
|
|
async def do_thing(self):
|
|
self.call_count += 1
|
|
return "result"
|
|
|
|
fr = FakeRedis()
|
|
with pytest.raises(Exception, match="circuit breaker is open"):
|
|
await fr.do_thing()
|
|
|
|
assert fr.call_count == 0 # method body never executed
|
|
|
|
|
|
def test_circuit_breaker_closes_on_recovery():
|
|
"""After recovery_timeout expires, probe is allowed and success closes the circuit."""
|
|
from litellm.caching.redis_cache import RedisCircuitBreaker
|
|
|
|
cb = RedisCircuitBreaker(failure_threshold=3, recovery_timeout=60)
|
|
cb._state = "open"
|
|
cb._opened_at = time.time() - 9999 # recovery timeout long expired
|
|
|
|
# is_open() should return False to allow a probe through, and transition to HALF_OPEN
|
|
assert cb.is_open() is False
|
|
assert cb._state == "half_open"
|
|
|
|
# Successful probe closes the circuit
|
|
cb.record_success()
|
|
assert cb._state == "closed"
|
|
|
|
|
|
def test_circuit_breaker_half_open_concurrent_calls_are_fast_failed():
|
|
"""
|
|
Regression test: only ONE probe gets through when the circuit transitions
|
|
OPEN → HALF_OPEN. All concurrent callers that check is_open() while the
|
|
state is already HALF_OPEN must be fast-failed (return True), not allowed
|
|
through as additional probes.
|
|
"""
|
|
from litellm.caching.redis_cache import RedisCircuitBreaker
|
|
|
|
cb = RedisCircuitBreaker(failure_threshold=3, recovery_timeout=60)
|
|
cb._state = "open"
|
|
cb._opened_at = time.time() - 9999 # recovery timeout long expired
|
|
|
|
# First caller: OPEN + expired → transitions to HALF_OPEN, returns False (probe)
|
|
assert cb.is_open() is False
|
|
assert cb._state == "half_open"
|
|
|
|
# All subsequent concurrent callers: HALF_OPEN → fast-fail (return True)
|
|
for _ in range(10):
|
|
assert cb.is_open() is True, "concurrent callers should be fast-failed in HALF_OPEN"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_increment_cache_returns_none_when_no_in_memory_cache_and_redis_fails():
|
|
"""
|
|
Regression test: when in_memory_cache is None and Redis fails, async_increment_cache
|
|
must return None — not the raw increment delta — to avoid silently miscalculating
|
|
rate-limit counters.
|
|
"""
|
|
dc = DualCache()
|
|
dc.in_memory_cache = None # type: ignore[assignment] # constructor always creates InMemoryCache, so null it manually
|
|
dc.redis_cache = MagicMock()
|
|
dc.redis_cache.async_increment = AsyncMock(side_effect=Exception("redis down"))
|
|
|
|
result = await dc.async_increment_cache("rpm:model:14-05", 1.0, ttl=60)
|
|
|
|
assert result is None, (
|
|
f"Expected None when in_memory_cache is absent and Redis fails, got {result!r}. "
|
|
"Returning the delta (1.0) would silently miscalculate rate-limit counters."
|
|
)
|