litellm/tests/test_litellm/caching/test_in_memory_cache.py

import asyncio
import json
import os
import sys
import time
from unittest.mock import MagicMock, patch

import httpx
import pytest
import respx
from fastapi.testclient import TestClient

sys.path.insert(
    0, os.path.abspath("../../..")
)  # Adds the parent directory to the system path
from unittest.mock import AsyncMock

from litellm.caching.in_memory_cache import InMemoryCache


def test_in_memory_openai_obj_cache():
    from openai import OpenAI

    openai_obj = OpenAI(api_key="my-fake-key")

    in_memory_cache = InMemoryCache()

    in_memory_cache.set_cache(key="my-fake-key", value=openai_obj)

    cached_obj = in_memory_cache.get_cache(key="my-fake-key")

    assert cached_obj is not None

    assert cached_obj == openai_obj


def test_in_memory_cache_max_size_per_item():
    """
    Test that the cache will not store items larger than the max size per item
    """
    in_memory_cache = InMemoryCache(max_size_per_item=100)

    result = in_memory_cache.check_value_size("a" * 100000000)

    assert result is False


def test_in_memory_cache_ttl():
    """
    Check that
    - if ttl is not set, it will be set to default ttl
    - if object expires, the ttl is also removed
    """
    in_memory_cache = InMemoryCache()

    in_memory_cache.set_cache(key="my-fake-key", value="my-fake-value", ttl=10)
    initial_ttl_time = in_memory_cache.ttl_dict["my-fake-key"]
    assert initial_ttl_time is not None

    in_memory_cache.set_cache(key="my-fake-key", value="my-fake-value-2", ttl=10)
    new_ttl_time = in_memory_cache.ttl_dict["my-fake-key"]
    assert new_ttl_time == initial_ttl_time  # ttl should not be updated

    ## On object expiration, the ttl should be removed
    in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value", ttl=1)
    new_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
    assert new_ttl_time is not None
    time.sleep(1)
    cached_obj = in_memory_cache.get_cache(key="new-fake-key")
    new_ttl_time = in_memory_cache.ttl_dict.get("new-fake-key")
    assert new_ttl_time is None


def test_in_memory_cache_ttl_allow_override():
    """
    Check that
    - if ttl is not set, it will be set to default ttl
    - if object expires, the ttl is also removed
    """
    in_memory_cache = InMemoryCache()
    ## On object expiration, but no get_cache, the override should be allowed
    in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value", ttl=1)
    initial_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
    assert initial_ttl_time is not None
    time.sleep(1)

    in_memory_cache.set_cache(key="new-fake-key", value="new-fake-value-2", ttl=1)
    new_ttl_time = in_memory_cache.ttl_dict["new-fake-key"]
    assert new_ttl_time is not None
    assert new_ttl_time != initial_ttl_time


def test_in_memory_cache_max_size_with_ttl():
    """
    Test that max_size_in_memory is respected even when all items have long TTLs.
    This tests the fix for the unbounded growth issue.
    """
    in_memory_cache = InMemoryCache(max_size_in_memory=3)
    long_ttl = 86400  # 1 day

    # Fill the cache to max capacity
    for i in range(3):
        in_memory_cache.set_cache(key=f"key_{i}", value=f"value_{i}", ttl=long_ttl)
        time.sleep(0.01)  # Small delay to ensure different timestamps

    assert len(in_memory_cache.cache_dict) == 3
    assert len(in_memory_cache.ttl_dict) == 3

    # Add another item - should evict the earliest item
    in_memory_cache.set_cache(key="key_3", value="value_3", ttl=long_ttl)

    # Cache should still be at max size, not larger
    assert len(in_memory_cache.cache_dict) == 3
    assert len(in_memory_cache.ttl_dict) == 3

    # key_0 should have been evicted (it was added first)
    assert "key_0" not in in_memory_cache.cache_dict
    assert "key_0" not in in_memory_cache.ttl_dict

    # Other keys should still be present
    assert "key_1" in in_memory_cache.cache_dict
    assert "key_2" in in_memory_cache.cache_dict
    assert "key_3" in in_memory_cache.cache_dict


def test_in_memory_cache_expired_items_evicted_first():
    """
    Test that expired items are evicted before non-expired items when cache is full.
    """
    in_memory_cache = InMemoryCache(max_size_in_memory=3)

    # Add items with short TTL that will expire
    in_memory_cache.set_cache(key="expired_1", value="value_1", ttl=1)
    in_memory_cache.set_cache(key="expired_2", value="value_2", ttl=1)

    # Add item with long TTL
    in_memory_cache.set_cache(key="long_lived", value="value_long", ttl=86400)

    assert len(in_memory_cache.cache_dict) == 3

    # Wait for short TTL items to expire
    time.sleep(2)

    # Add new item - should evict expired items first, not the long-lived one
    in_memory_cache.set_cache(key="new_item", value="new_value", ttl=86400)

    # Long-lived item should still be present
    assert "long_lived" in in_memory_cache.cache_dict
    assert "new_item" in in_memory_cache.cache_dict

    # Expired items should be gone
    assert "expired_1" not in in_memory_cache.cache_dict
    assert "expired_2" not in in_memory_cache.cache_dict
    assert "expired_1" not in in_memory_cache.ttl_dict
    assert "expired_2" not in in_memory_cache.ttl_dict


def test_in_memory_cache_eviction_order():
    """
    Test that when non-expired items need to be evicted, those with earliest expiration times are evicted first.
    """
    in_memory_cache = InMemoryCache(max_size_in_memory=2)

    # Add items with different TTLs
    now = time.time()
    in_memory_cache.set_cache(key="early_expire", value="value_1", ttl=100)  # expires in 100 seconds
    time.sleep(0.01)
    in_memory_cache.set_cache(key="late_expire", value="value_2", ttl=200)   # expires in 200 seconds

    # Verify TTL order
    early_ttl = in_memory_cache.ttl_dict["early_expire"]
    late_ttl = in_memory_cache.ttl_dict["late_expire"]
    assert early_ttl < late_ttl, "early_expire should have earlier expiration time"

    assert len(in_memory_cache.cache_dict) == 2

    # Add third item - should evict the one with earliest expiration time
    in_memory_cache.set_cache(key="new_item", value="value_3", ttl=300)

    assert len(in_memory_cache.cache_dict) == 2

    # Item with earliest expiration should be evicted
    assert "early_expire" not in in_memory_cache.cache_dict
    assert "early_expire" not in in_memory_cache.ttl_dict

    # Items with later expiration should remain
    assert "late_expire" in in_memory_cache.cache_dict
    assert "new_item" in in_memory_cache.cache_dict


def test_in_memory_cache_heap_size_staus_bounded():
    """
    Test that the expiration_heap does not grow unbounded when the same key is updated repeaatedly.
    """
    in_memory_cache = InMemoryCache(max_size_in_memory=10)

    for i in range(1_000):
        in_memory_cache.set_cache(key="hot_key", value=f"value_{i}", ttl=60)

    # Expiration heap should only have 1 entry
    assert len(in_memory_cache.expiration_heap) == 1