mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 22:48:35 +00:00
8947a74e13
* fix(cache): persist and replay streamed Responses API requests * Add focused coverage for streamed responses cache * Cover streamed responses cache helper branches * Exercise streamed responses cache edge branches
142 lines
4.4 KiB
Python
142 lines
4.4 KiB
Python
from datetime import datetime
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
import litellm
|
|
from litellm import aresponses
|
|
from litellm._uuid import uuid
|
|
from litellm.caching.caching_handler import LLMCachingHandler
|
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
|
from litellm.types.llms import openai as openai_types
|
|
from litellm.types.utils import CallTypes
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_get_cache_reuses_preset_cache_key_for_responses():
|
|
caching_handler = LLMCachingHandler(
|
|
original_function=aresponses,
|
|
request_kwargs={},
|
|
start_time=datetime.now(),
|
|
)
|
|
logging_obj = LiteLLMLogging(
|
|
litellm_call_id=str(datetime.now()),
|
|
call_type=CallTypes.aresponses.value,
|
|
model="gpt-4.1-mini",
|
|
messages=[],
|
|
function_id=str(uuid.uuid4()),
|
|
stream=True,
|
|
start_time=datetime.now(),
|
|
)
|
|
|
|
original_cache = litellm.cache
|
|
mock_cache = MagicMock()
|
|
mock_cache.supported_call_types = [CallTypes.aresponses.value]
|
|
mock_cache._supports_async.return_value = True
|
|
mock_cache.get_cache_key.return_value = "responses-stream-cache-key"
|
|
mock_cache.async_get_cache = AsyncMock(return_value=None)
|
|
litellm.cache = mock_cache
|
|
|
|
kwargs = {
|
|
"model": "gpt-4.1-mini",
|
|
"input": "hello",
|
|
"stream": True,
|
|
"litellm_params": {},
|
|
}
|
|
await caching_handler._async_get_cache(
|
|
model="gpt-4.1-mini",
|
|
original_function=aresponses,
|
|
logging_obj=logging_obj,
|
|
start_time=datetime.now(),
|
|
call_type=CallTypes.aresponses.value,
|
|
kwargs=kwargs,
|
|
)
|
|
|
|
assert caching_handler.preset_cache_key == "responses-stream-cache-key"
|
|
mock_cache.async_get_cache.assert_awaited_once()
|
|
assert (
|
|
mock_cache.async_get_cache.call_args.kwargs["cache_key"]
|
|
== "responses-stream-cache-key"
|
|
)
|
|
|
|
litellm.cache = original_cache
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_get_cache_falls_back_to_sync_cache_for_responses():
|
|
caching_handler = LLMCachingHandler(
|
|
original_function=aresponses,
|
|
request_kwargs={},
|
|
start_time=datetime.now(),
|
|
)
|
|
logging_obj = LiteLLMLogging(
|
|
litellm_call_id=str(datetime.now()),
|
|
call_type=CallTypes.aresponses.value,
|
|
model="gpt-4.1-mini",
|
|
messages=[],
|
|
function_id=str(uuid.uuid4()),
|
|
stream=True,
|
|
start_time=datetime.now(),
|
|
)
|
|
|
|
original_cache = litellm.cache
|
|
mock_cache = MagicMock()
|
|
mock_cache.supported_call_types = [CallTypes.aresponses.value]
|
|
mock_cache._supports_async.return_value = False
|
|
mock_cache.get_cache_key.return_value = "responses-stream-cache-key"
|
|
mock_cache.get_cache.return_value = None
|
|
litellm.cache = mock_cache
|
|
|
|
kwargs = {
|
|
"model": "gpt-4.1-mini",
|
|
"input": "hello",
|
|
"stream": True,
|
|
"litellm_params": {},
|
|
}
|
|
await caching_handler._async_get_cache(
|
|
model="gpt-4.1-mini",
|
|
original_function=aresponses,
|
|
logging_obj=logging_obj,
|
|
start_time=datetime.now(),
|
|
call_type=CallTypes.aresponses.value,
|
|
kwargs=kwargs,
|
|
)
|
|
|
|
assert caching_handler.preset_cache_key == "responses-stream-cache-key"
|
|
mock_cache.get_cache.assert_called_once()
|
|
assert mock_cache.get_cache.call_args.kwargs["cache_key"] == (
|
|
"responses-stream-cache-key"
|
|
)
|
|
|
|
litellm.cache = original_cache
|
|
|
|
|
|
def test_reasoning_summary_events_default_summary_index():
|
|
delta_event = openai_types.ReasoningSummaryTextDeltaEvent(
|
|
type=openai_types.ResponsesAPIStreamEvents.REASONING_SUMMARY_TEXT_DELTA,
|
|
item_id="rs_1",
|
|
output_index=0,
|
|
delta="abc",
|
|
)
|
|
text_done_event = openai_types.ReasoningSummaryTextDoneEvent(
|
|
type=openai_types.ResponsesAPIStreamEvents.REASONING_SUMMARY_TEXT_DONE,
|
|
item_id="rs_1",
|
|
output_index=0,
|
|
sequence_number=1,
|
|
text="abc",
|
|
)
|
|
part_done_event = openai_types.ReasoningSummaryPartDoneEvent(
|
|
type=openai_types.ResponsesAPIStreamEvents.REASONING_SUMMARY_PART_DONE,
|
|
item_id="rs_1",
|
|
output_index=0,
|
|
sequence_number=2,
|
|
part=openai_types.BaseLiteLLMOpenAIResponseObject(
|
|
type="summary_text",
|
|
text="abc",
|
|
),
|
|
)
|
|
|
|
assert delta_event.summary_index == 0
|
|
assert text_done_event.summary_index == 0
|
|
assert part_done_event.summary_index == 0
|