Files
litellm/tests/test_litellm/caching/test_in_memory_cache.py
T
malags 68189d1c04 [Performance] Reduce complexity of InMemoryCache.evict_cache from O(n*log(n)) to O(log(n)) (#15000)
* Improved performance by reducing complexity

* Improved logic to prevent memory from increasing too much, added test

* Restore indent

* Restore indent

* Added type annotation

* Updated test to correctly initialize the expiration_heap
2025-09-30 16:49:35 -07:00

202 lines
6.9 KiB
Python

import asyncio
import json
import os
import sys
import time
from unittest.mock import MagicMock, patch
import httpx
import pytest
import respx
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
from unittest.mock import AsyncMock
from litellm.caching.in_memory_cache import InMemoryCache
def test_in_memory_openai_obj_cache():
from openai import OpenAI
openai_obj = OpenAI(api_key="my-fake-key")
in_memory_cache = InMemoryCache()
in_memory_cache.set_cache(key="my-fake-key", value=openai_obj)
cached_obj = in_memory_cache.get_cache(key="my-fake-key")
assert cached_obj is not None
assert cached_obj == openai_obj
def test_in_memory_cache_max_size_per_item():
"""
Test that the cache will not store items larger than the max size per item
"""
in_memory_cache = InMemoryCache(max_size_per_item=100)
result = in_memory_cache.check_value_size("a" * 100000000)
assert result is False
def test_in_memory_cache_ttl():
"""
Check that
- if ttl is not set, it will be set to default ttl
- if object expires, the ttl is also removed
"""
in_memory_cache = InMemoryCache()
in_memory_cache.set_cache(key="my-fake-key", value="my-fake-value", ttl=10)
initial_ttl_time = in_memory_cache.ttl_dict["my-fake-key"]
assert initial_ttl_time is not None
in_memory_cache.set_cache(key="my-fake-key", value="my-fake-value-2", ttl=10)
new_ttl_time = in_memory_cache.ttl_dict["my-fake-key"]
assert new_ttl_time == initial_ttl_time # ttl should not be updated
## On object expiration, the ttl should be removed
in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value", ttl=1)
new_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
assert new_ttl_time is not None
time.sleep(1)
cached_obj = in_memory_cache.get_cache(key="new-fake-key")
new_ttl_time = in_memory_cache.ttl_dict.get("new-fake-key")
assert new_ttl_time is None
def test_in_memory_cache_ttl_allow_override():
"""
Check that
- if ttl is not set, it will be set to default ttl
- if object expires, the ttl is also removed
"""
in_memory_cache = InMemoryCache()
## On object expiration, but no get_cache, the override should be allowed
in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value", ttl=1)
initial_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
assert initial_ttl_time is not None
time.sleep(1)
in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value-2", ttl=1)
new_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
assert new_ttl_time is not None
assert new_ttl_time != initial_ttl_time
def test_in_memory_cache_max_size_with_ttl():
"""
Test that max_size_in_memory is respected even when all items have long TTLs.
This tests the fix for the unbounded growth issue.
"""
in_memory_cache = InMemoryCache(max_size_in_memory=3)
long_ttl = 86400 # 1 day
# Fill the cache to max capacity
for i in range(3):
in_memory_cache.set_cache(key=f"key_{i}", value=f"value_{i}", ttl=long_ttl)
time.sleep(0.01) # Small delay to ensure different timestamps
assert len(in_memory_cache.cache_dict) == 3
assert len(in_memory_cache.ttl_dict) == 3
# Add another item - should evict the earliest item
in_memory_cache.set_cache(key="key_3", value="value_3", ttl=long_ttl)
# Cache should still be at max size, not larger
assert len(in_memory_cache.cache_dict) == 3
assert len(in_memory_cache.ttl_dict) == 3
# key_0 should have been evicted (it was added first)
assert "key_0" not in in_memory_cache.cache_dict
assert "key_0" not in in_memory_cache.ttl_dict
# Other keys should still be present
assert "key_1" in in_memory_cache.cache_dict
assert "key_2" in in_memory_cache.cache_dict
assert "key_3" in in_memory_cache.cache_dict
def test_in_memory_cache_expired_items_evicted_first():
"""
Test that expired items are evicted before non-expired items when cache is full.
"""
in_memory_cache = InMemoryCache(max_size_in_memory=3)
# Add items with short TTL that will expire
in_memory_cache.set_cache(key="expired_1", value="value_1", ttl=1)
in_memory_cache.set_cache(key="expired_2", value="value_2", ttl=1)
# Add item with long TTL
in_memory_cache.set_cache(key="long_lived", value="value_long", ttl=86400)
assert len(in_memory_cache.cache_dict) == 3
# Wait for short TTL items to expire
time.sleep(2)
# Add new item - should evict expired items first, not the long-lived one
in_memory_cache.set_cache(key="new_item", value="new_value", ttl=86400)
# Long-lived item should still be present
assert "long_lived" in in_memory_cache.cache_dict
assert "new_item" in in_memory_cache.cache_dict
# Expired items should be gone
assert "expired_1" not in in_memory_cache.cache_dict
assert "expired_2" not in in_memory_cache.cache_dict
assert "expired_1" not in in_memory_cache.ttl_dict
assert "expired_2" not in in_memory_cache.ttl_dict
def test_in_memory_cache_eviction_order():
"""
Test that when non-expired items need to be evicted, those with earliest expiration times are evicted first.
"""
in_memory_cache = InMemoryCache(max_size_in_memory=2)
# Add items with different TTLs
now = time.time()
in_memory_cache.set_cache(key="early_expire", value="value_1", ttl=100) # expires in 100 seconds
time.sleep(0.01)
in_memory_cache.set_cache(key="late_expire", value="value_2", ttl=200) # expires in 200 seconds
# Verify TTL order
early_ttl = in_memory_cache.ttl_dict["early_expire"]
late_ttl = in_memory_cache.ttl_dict["late_expire"]
assert early_ttl < late_ttl, "early_expire should have earlier expiration time"
assert len(in_memory_cache.cache_dict) == 2
# Add third item - should evict the one with earliest expiration time
in_memory_cache.set_cache(key="new_item", value="value_3", ttl=300)
assert len(in_memory_cache.cache_dict) == 2
# Item with earliest expiration should be evicted
assert "early_expire" not in in_memory_cache.cache_dict
assert "early_expire" not in in_memory_cache.ttl_dict
# Items with later expiration should remain
assert "late_expire" in in_memory_cache.cache_dict
assert "new_item" in in_memory_cache.cache_dict
def test_in_memory_cache_heap_size_staus_bounded():
"""
Test that the expiration_heap does not grow unbounded when the same key is updated repeaatedly.
"""
in_memory_cache = InMemoryCache(max_size_in_memory=10)
for i in range(1_000):
in_memory_cache.set_cache(key="hot_key", value=f"value_{i}", ttl=60)
# Expiration heap should only have 1 entry
assert len(in_memory_cache.expiration_heap) == 1