Files
litellm/tests/test_litellm/caching/test_llm_caching_handler.py
T
Ishaan Jaff 503eb2fd4c fix: don't close HTTP/SDK clients on LLMClientCache eviction (#22925)
* fix: don't close HTTP/SDK clients on LLMClientCache eviction

Removing the _remove_key override that eagerly called aclose()/close()
on evicted clients. Evicted clients may still be held by in-flight
streaming requests; closing them causes:

  RuntimeError: Cannot send a request, as the client has been closed.

This is a regression from commit fb72979432. Clients that are no longer
referenced will be garbage-collected naturally. Explicit shutdown cleanup
happens via close_litellm_async_clients().

Fixes production crashes after the 1-hour cache TTL expires.

* test: update LLMClientCache unit tests for no-close-on-eviction behavior

Flip the assertions: evicted clients must NOT be closed. Replace
test_remove_key_closes_async_client → test_remove_key_does_not_close_async_client
and equivalents for sync/eviction paths.

Add test_remove_key_removes_plain_values for non-client cache entries.
Remove test_background_tasks_cleaned_up_after_completion (no more _background_tasks).
Remove test_remove_key_no_event_loop variant that depended on old behavior.

* test: add e2e tests for OpenAI SDK client surviving cache eviction

Add two new e2e tests using real AsyncOpenAI clients:
- test_evicted_openai_sdk_client_stays_usable: verifies size-based eviction
  doesn't close the client
- test_ttl_expired_openai_sdk_client_stays_usable: verifies TTL expiry
  eviction doesn't close the client

Both tests sleep after eviction so any create_task()-based close would
have time to run, making the regression detectable.

Also expand the module docstring to explain why the sleep is required.

* docs(AGENTS.md): add rule — never close HTTP/SDK clients on cache eviction

* docs(CLAUDE.md): add HTTP client cache safety guideline
2026-03-05 12:00:38 -08:00

175 lines
4.9 KiB
Python

"""
Tests for LLMClientCache.
The cache intentionally does NOT close clients on eviction because evicted
clients may still be referenced by in-flight requests. Closing them eagerly
causes ``RuntimeError: Cannot send a request, as the client has been closed.``
See: https://github.com/BerriAI/litellm/pull/22247
"""
import asyncio
import os
import sys
import warnings
import pytest
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
from litellm.caching.llm_caching_handler import LLMClientCache
class MockAsyncClient:
"""Mock async HTTP client with an async close method."""
def __init__(self):
self.closed = False
async def close(self):
self.closed = True
class MockSyncClient:
"""Mock sync HTTP client with a sync close method."""
def __init__(self):
self.closed = False
def close(self):
self.closed = True
@pytest.mark.asyncio
async def test_remove_key_does_not_close_async_client():
"""
Evicting an async client from LLMClientCache must NOT close it because
an in-flight request may still hold a reference to the client.
Regression test for production 'client has been closed' crashes.
"""
cache = LLMClientCache(max_size_in_memory=2)
mock_client = MockAsyncClient()
cache.cache_dict["test-key"] = mock_client
cache.ttl_dict["test-key"] = 0 # expired
cache._remove_key("test-key")
# Give the event loop a chance to run any background tasks
await asyncio.sleep(0.1)
# Client must NOT be closed — it may still be in use
assert mock_client.closed is False
assert "test-key" not in cache.cache_dict
assert "test-key" not in cache.ttl_dict
def test_remove_key_does_not_close_sync_client():
"""
Evicting a sync client from the cache must NOT close it.
"""
cache = LLMClientCache(max_size_in_memory=2)
mock_client = MockSyncClient()
cache.cache_dict["test-key"] = mock_client
cache.ttl_dict["test-key"] = 0
cache._remove_key("test-key")
assert mock_client.closed is False
assert "test-key" not in cache.cache_dict
@pytest.mark.asyncio
async def test_eviction_does_not_close_async_clients():
"""
When the cache is full and an entry is evicted, the evicted async client
must remain open and must not produce 'coroutine was never awaited' warnings.
"""
cache = LLMClientCache(max_size_in_memory=2, default_ttl=1)
clients = []
for i in range(2):
client = MockAsyncClient()
clients.append(client)
cache.set_cache(f"key-{i}", client)
with warnings.catch_warnings(record=True) as caught_warnings:
warnings.simplefilter("always")
# This should trigger eviction of one of the existing entries
cache.set_cache("key-new", "new-value")
await asyncio.sleep(0.1)
coroutine_warnings = [
w for w in caught_warnings if "coroutine" in str(w.message).lower()
]
assert (
len(coroutine_warnings) == 0
), f"Got unawaited coroutine warnings: {coroutine_warnings}"
# Evicted clients must NOT be closed
for client in clients:
assert client.closed is False
@pytest.mark.asyncio
async def test_eviction_no_unawaited_coroutine_warning():
"""
Evicting an async client from LLMClientCache must not produce
'coroutine was never awaited' warnings.
Regression test for https://github.com/BerriAI/litellm/issues/22128
"""
cache = LLMClientCache(max_size_in_memory=2)
mock_client = MockAsyncClient()
cache.cache_dict["test-key"] = mock_client
cache.ttl_dict["test-key"] = 0 # expired
with warnings.catch_warnings(record=True) as caught_warnings:
warnings.simplefilter("always")
cache._remove_key("test-key")
await asyncio.sleep(0.1)
coroutine_warnings = [
w for w in caught_warnings if "coroutine" in str(w.message).lower()
]
assert (
len(coroutine_warnings) == 0
), f"Got unawaited coroutine warnings: {coroutine_warnings}"
def test_remove_key_no_event_loop():
"""
_remove_key works correctly even when there's no running event loop.
"""
cache = LLMClientCache(max_size_in_memory=2)
mock_client = MockAsyncClient()
cache.cache_dict["test-key"] = mock_client
cache.ttl_dict["test-key"] = 0
# Should not raise even though there's no running event loop
cache._remove_key("test-key")
assert "test-key" not in cache.cache_dict
def test_remove_key_removes_plain_values():
"""
_remove_key correctly removes non-client values (strings, dicts, etc.).
"""
cache = LLMClientCache(max_size_in_memory=5)
cache.cache_dict["str-key"] = "hello"
cache.ttl_dict["str-key"] = 0
cache.cache_dict["dict-key"] = {"foo": "bar"}
cache.ttl_dict["dict-key"] = 0
cache._remove_key("str-key")
cache._remove_key("dict-key")
assert "str-key" not in cache.cache_dict
assert "dict-key" not in cache.cache_dict