mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-18 00:48:01 +00:00
68189d1c04
* Improved performance by reducing complexity * Improved logic to prevent memory from increasing too much, added test * Restore indent * Restore indent * Added type annotation * Updated test to correctly initialize the expiration_heap
202 lines
6.9 KiB
Python
202 lines
6.9 KiB
Python
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import httpx
|
|
import pytest
|
|
import respx
|
|
from fastapi.testclient import TestClient
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../../..")
|
|
) # Adds the parent directory to the system path
|
|
from unittest.mock import AsyncMock
|
|
|
|
from litellm.caching.in_memory_cache import InMemoryCache
|
|
|
|
|
|
def test_in_memory_openai_obj_cache():
|
|
from openai import OpenAI
|
|
|
|
openai_obj = OpenAI(api_key="my-fake-key")
|
|
|
|
in_memory_cache = InMemoryCache()
|
|
|
|
in_memory_cache.set_cache(key="my-fake-key", value=openai_obj)
|
|
|
|
cached_obj = in_memory_cache.get_cache(key="my-fake-key")
|
|
|
|
assert cached_obj is not None
|
|
|
|
assert cached_obj == openai_obj
|
|
|
|
|
|
def test_in_memory_cache_max_size_per_item():
|
|
"""
|
|
Test that the cache will not store items larger than the max size per item
|
|
"""
|
|
in_memory_cache = InMemoryCache(max_size_per_item=100)
|
|
|
|
result = in_memory_cache.check_value_size("a" * 100000000)
|
|
|
|
assert result is False
|
|
|
|
|
|
def test_in_memory_cache_ttl():
|
|
"""
|
|
Check that
|
|
- if ttl is not set, it will be set to default ttl
|
|
- if object expires, the ttl is also removed
|
|
"""
|
|
in_memory_cache = InMemoryCache()
|
|
|
|
in_memory_cache.set_cache(key="my-fake-key", value="my-fake-value", ttl=10)
|
|
initial_ttl_time = in_memory_cache.ttl_dict["my-fake-key"]
|
|
assert initial_ttl_time is not None
|
|
|
|
in_memory_cache.set_cache(key="my-fake-key", value="my-fake-value-2", ttl=10)
|
|
new_ttl_time = in_memory_cache.ttl_dict["my-fake-key"]
|
|
assert new_ttl_time == initial_ttl_time # ttl should not be updated
|
|
|
|
## On object expiration, the ttl should be removed
|
|
in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value", ttl=1)
|
|
new_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
|
|
assert new_ttl_time is not None
|
|
time.sleep(1)
|
|
cached_obj = in_memory_cache.get_cache(key="new-fake-key")
|
|
new_ttl_time = in_memory_cache.ttl_dict.get("new-fake-key")
|
|
assert new_ttl_time is None
|
|
|
|
|
|
def test_in_memory_cache_ttl_allow_override():
|
|
"""
|
|
Check that
|
|
- if ttl is not set, it will be set to default ttl
|
|
- if object expires, the ttl is also removed
|
|
"""
|
|
in_memory_cache = InMemoryCache()
|
|
## On object expiration, but no get_cache, the override should be allowed
|
|
in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value", ttl=1)
|
|
initial_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
|
|
assert initial_ttl_time is not None
|
|
time.sleep(1)
|
|
|
|
in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value-2", ttl=1)
|
|
new_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
|
|
assert new_ttl_time is not None
|
|
assert new_ttl_time != initial_ttl_time
|
|
|
|
|
|
def test_in_memory_cache_max_size_with_ttl():
|
|
"""
|
|
Test that max_size_in_memory is respected even when all items have long TTLs.
|
|
This tests the fix for the unbounded growth issue.
|
|
"""
|
|
in_memory_cache = InMemoryCache(max_size_in_memory=3)
|
|
long_ttl = 86400 # 1 day
|
|
|
|
# Fill the cache to max capacity
|
|
for i in range(3):
|
|
in_memory_cache.set_cache(key=f"key_{i}", value=f"value_{i}", ttl=long_ttl)
|
|
time.sleep(0.01) # Small delay to ensure different timestamps
|
|
|
|
assert len(in_memory_cache.cache_dict) == 3
|
|
assert len(in_memory_cache.ttl_dict) == 3
|
|
|
|
# Add another item - should evict the earliest item
|
|
in_memory_cache.set_cache(key="key_3", value="value_3", ttl=long_ttl)
|
|
|
|
# Cache should still be at max size, not larger
|
|
assert len(in_memory_cache.cache_dict) == 3
|
|
assert len(in_memory_cache.ttl_dict) == 3
|
|
|
|
# key_0 should have been evicted (it was added first)
|
|
assert "key_0" not in in_memory_cache.cache_dict
|
|
assert "key_0" not in in_memory_cache.ttl_dict
|
|
|
|
# Other keys should still be present
|
|
assert "key_1" in in_memory_cache.cache_dict
|
|
assert "key_2" in in_memory_cache.cache_dict
|
|
assert "key_3" in in_memory_cache.cache_dict
|
|
|
|
|
|
def test_in_memory_cache_expired_items_evicted_first():
|
|
"""
|
|
Test that expired items are evicted before non-expired items when cache is full.
|
|
"""
|
|
in_memory_cache = InMemoryCache(max_size_in_memory=3)
|
|
|
|
# Add items with short TTL that will expire
|
|
in_memory_cache.set_cache(key="expired_1", value="value_1", ttl=1)
|
|
in_memory_cache.set_cache(key="expired_2", value="value_2", ttl=1)
|
|
|
|
# Add item with long TTL
|
|
in_memory_cache.set_cache(key="long_lived", value="value_long", ttl=86400)
|
|
|
|
assert len(in_memory_cache.cache_dict) == 3
|
|
|
|
# Wait for short TTL items to expire
|
|
time.sleep(2)
|
|
|
|
# Add new item - should evict expired items first, not the long-lived one
|
|
in_memory_cache.set_cache(key="new_item", value="new_value", ttl=86400)
|
|
|
|
# Long-lived item should still be present
|
|
assert "long_lived" in in_memory_cache.cache_dict
|
|
assert "new_item" in in_memory_cache.cache_dict
|
|
|
|
# Expired items should be gone
|
|
assert "expired_1" not in in_memory_cache.cache_dict
|
|
assert "expired_2" not in in_memory_cache.cache_dict
|
|
assert "expired_1" not in in_memory_cache.ttl_dict
|
|
assert "expired_2" not in in_memory_cache.ttl_dict
|
|
|
|
|
|
def test_in_memory_cache_eviction_order():
|
|
"""
|
|
Test that when non-expired items need to be evicted, those with earliest expiration times are evicted first.
|
|
"""
|
|
in_memory_cache = InMemoryCache(max_size_in_memory=2)
|
|
|
|
# Add items with different TTLs
|
|
now = time.time()
|
|
in_memory_cache.set_cache(key="early_expire", value="value_1", ttl=100) # expires in 100 seconds
|
|
time.sleep(0.01)
|
|
in_memory_cache.set_cache(key="late_expire", value="value_2", ttl=200) # expires in 200 seconds
|
|
|
|
# Verify TTL order
|
|
early_ttl = in_memory_cache.ttl_dict["early_expire"]
|
|
late_ttl = in_memory_cache.ttl_dict["late_expire"]
|
|
assert early_ttl < late_ttl, "early_expire should have earlier expiration time"
|
|
|
|
assert len(in_memory_cache.cache_dict) == 2
|
|
|
|
# Add third item - should evict the one with earliest expiration time
|
|
in_memory_cache.set_cache(key="new_item", value="value_3", ttl=300)
|
|
|
|
assert len(in_memory_cache.cache_dict) == 2
|
|
|
|
# Item with earliest expiration should be evicted
|
|
assert "early_expire" not in in_memory_cache.cache_dict
|
|
assert "early_expire" not in in_memory_cache.ttl_dict
|
|
|
|
# Items with later expiration should remain
|
|
assert "late_expire" in in_memory_cache.cache_dict
|
|
assert "new_item" in in_memory_cache.cache_dict
|
|
|
|
|
|
def test_in_memory_cache_heap_size_staus_bounded():
|
|
"""
|
|
Test that the expiration_heap does not grow unbounded when the same key is updated repeaatedly.
|
|
"""
|
|
in_memory_cache = InMemoryCache(max_size_in_memory=10)
|
|
|
|
for i in range(1_000):
|
|
in_memory_cache.set_cache(key="hot_key", value=f"value_{i}", ttl=60)
|
|
|
|
# Expiration heap should only have 1 entry
|
|
assert len(in_memory_cache.expiration_heap) == 1
|