Files
litellm/tests/llm_responses_api_testing/test_openai_responses_api.py
T
Ishaan Jaff 736daf0a7d [Feat] Adds Shell tool support for the OpenAI Responses API (#21063)
* test_responses_api_context_management_server_side_compaction

* Server-side compaction

* docs fix

* test_responses_api_shell_tool

* add SHELL tool

* test_responses_api_shell_tool

* add SHELL_CALL_IN_PROGRESS

* add SHELL_CALL_IN_PROGRESS events

* TestOpenAIResponsesAPITest

* transform_streaming_response

* test_responses_api_shell_tool_streaming_sees_shell_output

* test_responses_api_shell_tool_streaming_sees_shell_output

* test_responses_api_shell_tool

* docs fix
2026-02-12 13:04:29 -08:00

1866 lines
64 KiB
Python

import os
import sys
import pytest
import asyncio
from typing import Optional, cast
from unittest.mock import patch, AsyncMock
import httpx
from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
import time
import json
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm.integrations.custom_logger import CustomLogger
import json
from litellm.types.utils import StandardLoggingPayload
from litellm.types.llms.openai import (
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponseAPIUsage,
IncompleteDetails,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from base_responses_api import BaseResponsesAPITest, validate_responses_api_response
class TestOpenAIResponsesAPITest(BaseResponsesAPITest):
def get_base_completion_call_args(self):
return {
"model": "openai/gpt-4o",
}
def get_base_completion_reasoning_call_args(self):
return {
"model": "openai/gpt-5-mini",
}
def get_advanced_model_for_shell_tool(self):
return "openai/gpt-5.2"
class TestCustomLogger(CustomLogger):
def __init__(
self,
):
self.standard_logging_object: Optional[StandardLoggingPayload] = None
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print("in async_log_success_event")
print("kwargs=", json.dumps(kwargs, indent=4, default=str))
self.standard_logging_object = kwargs["standard_logging_object"]
pass
def validate_standard_logging_payload(
slp: StandardLoggingPayload, response: ResponsesAPIResponse, request_model: str
):
"""
Validate that a StandardLoggingPayload object matches the expected response
Args:
slp (StandardLoggingPayload): The standard logging payload object to validate
response (dict): The litellm response to compare against
request_model (str): The model name that was requested
"""
# Validate payload exists
assert slp is not None, "Standard logging payload should not be None"
# Validate token counts
print("VALIDATING STANDARD LOGGING PAYLOAD. response=", json.dumps(response, indent=4, default=str))
print("FIELDS IN SLP=", json.dumps(slp, indent=4, default=str))
print("SLP PROMPT TOKENS=", slp["prompt_tokens"])
print("RESPONSE PROMPT TOKENS=", response["usage"]["input_tokens"])
assert (
slp["prompt_tokens"] == response["usage"]["input_tokens"]
), "Prompt tokens mismatch"
assert (
slp["completion_tokens"] == response["usage"]["output_tokens"]
), "Completion tokens mismatch"
assert (
slp["total_tokens"]
== response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
), "Total tokens mismatch"
# Validate spend and response metadata
assert slp["response_cost"] > 0, "Response cost should be greater than 0"
assert slp["id"] == response["id"], "Response ID mismatch"
assert slp["model"] == request_model, "Model name mismatch"
# Validate messages
assert slp["messages"] == [{"content": "hi", "role": "user"}], "Messages mismatch"
# Validate complete response structure
validate_responses_match(slp["response"], response)
@pytest.mark.asyncio
def test_basic_openai_responses_api_streaming_with_logging():
litellm._turn_on_debug()
litellm.set_verbose = True
test_custom_logger = TestCustomLogger()
litellm.callbacks = [test_custom_logger]
request_model = "gpt-4o"
response = litellm.responses(
model=request_model,
input="hi",
stream=True,
)
final_response: Optional[ResponseCompletedEvent] = None
for event in response:
if event.type == "response.completed":
final_response = event
print("litellm response=", json.dumps(event, indent=4, default=str))
print("sleeping for 2 seconds...")
time.sleep(2)
print(
"standard logging payload=",
json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str),
)
assert final_response is not None
assert test_custom_logger.standard_logging_object is not None
validate_standard_logging_payload(
slp=test_custom_logger.standard_logging_object,
response=final_response.response,
request_model=request_model,
)
def validate_responses_match(slp_response, litellm_response):
"""Validate that the standard logging payload OpenAI response matches the litellm response"""
# Validate core fields
assert slp_response["id"] == litellm_response["id"], "ID mismatch"
assert slp_response["model"] == litellm_response["model"], "Model mismatch"
assert (
slp_response["created_at"] == litellm_response["created_at"]
), "Created at mismatch"
# Validate usage
assert (
slp_response["usage"]["prompt_tokens"]
== litellm_response["usage"]["input_tokens"]
), "Input tokens mismatch"
assert (
slp_response["usage"]["completion_tokens"]
== litellm_response["usage"]["output_tokens"]
), "Output tokens mismatch"
assert (
slp_response["usage"]["total_tokens"]
== litellm_response["usage"]["total_tokens"]
), "Total tokens mismatch"
# Validate output/messages
assert len(slp_response["output"]) == len(
litellm_response["output"]
), "Output length mismatch"
for slp_msg, litellm_msg in zip(slp_response["output"], litellm_response["output"]):
assert slp_msg["role"] == litellm_msg.role, "Message role mismatch"
# Access the content's text field for the litellm response
litellm_content = litellm_msg.content[0].text if litellm_msg.content else ""
assert (
slp_msg["content"][0]["text"] == litellm_content
), f"Message content mismatch. Expected {litellm_content}, Got {slp_msg['content']}"
assert slp_msg["status"] == litellm_msg.status, "Message status mismatch"
@pytest.mark.asyncio
async def test_basic_openai_responses_api_non_streaming_with_logging():
litellm._turn_on_debug()
litellm.set_verbose = True
test_custom_logger = TestCustomLogger()
litellm.callbacks = [test_custom_logger]
request_model = "gpt-4o"
response = await litellm.aresponses(
model=request_model,
input="hi",
)
print("litellm response=", json.dumps(response, indent=4, default=str))
print("response hidden params=", response._hidden_params)
print("sleeping for 2 seconds...")
await asyncio.sleep(5)
print(
"standard logging payload=",
json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str),
)
print("response usage=", response.usage)
assert response is not None
assert test_custom_logger.standard_logging_object is not None
validate_standard_logging_payload(
test_custom_logger.standard_logging_object, response, request_model
)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_openai_responses_api_returns_headers(sync_mode):
"""
Test that OpenAI responses API returns OpenAI headers in _hidden_params.
This ensures the proxy can forward these headers to clients.
Related issue: LiteLLM responses API should return OpenAI headers like chat completions does
"""
litellm._turn_on_debug()
litellm.set_verbose = True
if sync_mode:
response = litellm.responses(
model="gpt-4o",
input="Say hello",
max_output_tokens=20,
)
else:
response = await litellm.aresponses(
model="gpt-4o",
input="Say hello",
max_output_tokens=20,
)
# Verify response is valid
assert response is not None
assert isinstance(response, ResponsesAPIResponse)
# Verify _hidden_params exists
assert hasattr(
response, "_hidden_params"
), "Response should have _hidden_params attribute"
assert response._hidden_params is not None, "_hidden_params should not be None"
# Verify additional_headers exists in _hidden_params
assert (
"additional_headers" in response._hidden_params
), "_hidden_params should contain 'additional_headers' key"
additional_headers = response._hidden_params["additional_headers"]
assert isinstance(
additional_headers, dict
), "additional_headers should be a dictionary"
assert len(additional_headers) > 0, "additional_headers should not be empty"
# Check for expected OpenAI rate limit headers
# These can be either direct (x-ratelimit-*) or prefixed (llm_provider-x-ratelimit-*)
rate_limit_headers = [
"x-ratelimit-remaining-tokens",
"x-ratelimit-limit-tokens",
"x-ratelimit-remaining-requests",
"x-ratelimit-limit-requests",
]
found_headers = []
for header_name in rate_limit_headers:
if header_name in additional_headers:
found_headers.append(header_name)
elif f"llm_provider-{header_name}" in additional_headers:
found_headers.append(f"llm_provider-{header_name}")
assert (
len(found_headers) > 0
), f"Should find at least one OpenAI rate limit header. Headers found: {list(additional_headers.keys())}"
# Verify headers key also exists (raw headers)
assert (
"headers" in response._hidden_params
), "_hidden_params should contain 'headers' key with raw response headers"
print(
f"✓ Successfully validated OpenAI headers in {'sync' if sync_mode else 'async'} mode"
)
print(f" Found {len(additional_headers)} headers total")
print(f" Rate limit headers found: {found_headers}")
def validate_stream_event(event):
"""
Validate that a streaming event from litellm.responses() or litellm.aresponses()
with stream=True conforms to the expected structure based on its event type.
Args:
event: The streaming event object to validate
Raises:
AssertionError: If the event doesn't match the expected structure for its type
"""
# Common validation for all event types
assert hasattr(event, "type"), "Event should have a 'type' attribute"
# Type-specific validation
if event.type == "response.created" or event.type == "response.in_progress":
assert hasattr(
event, "response"
), f"{event.type} event should have a 'response' attribute"
validate_responses_api_response(event.response, final_chunk=False)
elif event.type == "response.completed":
assert hasattr(
event, "response"
), "response.completed event should have a 'response' attribute"
validate_responses_api_response(event.response, final_chunk=True)
# Usage is guaranteed only on the completed event
assert (
"usage" in event.response
), "response.completed event should have usage information"
print("Usage in event.response=", event.response["usage"])
assert isinstance(event.response["usage"], ResponseAPIUsage)
elif event.type == "response.failed" or event.type == "response.incomplete":
assert hasattr(
event, "response"
), f"{event.type} event should have a 'response' attribute"
elif (
event.type == "response.output_item.added"
or event.type == "response.output_item.done"
):
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "item"
), f"{event.type} event should have an 'item' attribute"
elif (
event.type == "response.content_part.added"
or event.type == "response.content_part.done"
):
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "part"
), f"{event.type} event should have a 'part' attribute"
elif event.type == "response.output_text.delta":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "delta"
), f"{event.type} event should have a 'delta' attribute"
elif event.type == "response.output_text.annotation.added":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "annotation_index"
), f"{event.type} event should have an 'annotation_index' attribute"
assert hasattr(
event, "annotation"
), f"{event.type} event should have an 'annotation' attribute"
elif event.type == "response.output_text.done":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "text"
), f"{event.type} event should have a 'text' attribute"
elif event.type == "response.refusal.delta":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "delta"
), f"{event.type} event should have a 'delta' attribute"
elif event.type == "response.refusal.done":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "refusal"
), f"{event.type} event should have a 'refusal' attribute"
elif event.type == "response.function_call_arguments.delta":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "delta"
), f"{event.type} event should have a 'delta' attribute"
elif event.type == "response.function_call_arguments.done":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "arguments"
), f"{event.type} event should have an 'arguments' attribute"
elif event.type in [
"response.file_search_call.in_progress",
"response.file_search_call.searching",
"response.file_search_call.completed",
"response.web_search_call.in_progress",
"response.web_search_call.searching",
"response.web_search_call.completed",
]:
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
elif event.type == "error":
assert hasattr(
event, "message"
), "Error event should have a 'message' attribute"
return True # Return True if validation passes
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_openai_responses_api_streaming_validation(sync_mode):
"""Test that validates each streaming event from the responses API"""
litellm._turn_on_debug()
event_types_seen = set()
if sync_mode:
response = litellm.responses(
model="gpt-4o",
input="Tell me about artificial intelligence in 3 sentences.",
stream=True,
)
for event in response:
print(f"Validating event type: {event.type}")
validate_stream_event(event)
event_types_seen.add(event.type)
else:
response = await litellm.aresponses(
model="gpt-4o",
input="Tell me about artificial intelligence in 3 sentences.",
stream=True,
)
async for event in response:
print(f"Validating event type: {event.type}")
validate_stream_event(event)
event_types_seen.add(event.type)
# At minimum, we should see these core event types
required_events = {"response.created", "response.completed"}
missing_events = required_events - event_types_seen
assert not missing_events, f"Missing required event types: {missing_events}"
print(f"Successfully validated all event types: {event_types_seen}")
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_openai_responses_litellm_router(sync_mode):
"""
Test the OpenAI responses API with LiteLLM Router in both sync and async modes
"""
litellm._turn_on_debug()
router = litellm.Router(
model_list=[
{
"model_name": "gpt4o-special-alias",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.getenv("OPENAI_API_KEY"),
},
}
]
)
# Call the handler
if sync_mode:
response = router.responses(
model="gpt4o-special-alias",
input="Hello, can you tell me a short joke?",
max_output_tokens=100,
)
print("SYNC MODE RESPONSE=", response)
else:
response = await router.aresponses(
model="gpt4o-special-alias",
input="Hello, can you tell me a short joke?",
max_output_tokens=100,
)
print(
f"Router {'sync' if sync_mode else 'async'} response=",
json.dumps(response, indent=4, default=str),
)
# Use the helper function to validate the response
validate_responses_api_response(response, final_chunk=True)
return response
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_openai_responses_litellm_router_streaming(sync_mode):
"""
Test the OpenAI responses API with streaming through LiteLLM Router
"""
litellm._turn_on_debug()
router = litellm.Router(
model_list=[
{
"model_name": "gpt4o-special-alias",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.getenv("OPENAI_API_KEY"),
},
}
]
)
event_types_seen = set()
if sync_mode:
response = router.responses(
model="gpt4o-special-alias",
input="Tell me about artificial intelligence in 2 sentences.",
stream=True,
)
for event in response:
print(f"Validating event type: {event.type}")
validate_stream_event(event)
event_types_seen.add(event.type)
else:
response = await router.aresponses(
model="gpt4o-special-alias",
input="Tell me about artificial intelligence in 2 sentences.",
stream=True,
)
async for event in response:
print(f"Validating event type: {event.type}")
validate_stream_event(event)
event_types_seen.add(event.type)
# At minimum, we should see these core event types
required_events = {"response.created", "response.completed"}
missing_events = required_events - event_types_seen
assert not missing_events, f"Missing required event types: {missing_events}"
print(f"Successfully validated all event types: {event_types_seen}")
@pytest.mark.asyncio
async def test_openai_responses_litellm_router_no_metadata():
"""
Test that metadata is not passed through when using the Router for responses API
"""
mock_response = {
"id": "resp_123",
"object": "response",
"created_at": 1741476542,
"status": "completed",
"model": "gpt-4o",
"output": [
{
"type": "message",
"id": "msg_123",
"status": "completed",
"role": "assistant",
"content": [
{"type": "output_text", "text": "Hello world!", "annotations": []}
],
}
],
"parallel_tool_calls": True,
"usage": {
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30,
"output_tokens_details": {"reasoning_tokens": 0},
},
"text": {"format": {"type": "text"}},
# Adding all required fields
"error": None,
"incomplete_details": None,
"instructions": None,
"metadata": {},
"temperature": 1.0,
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"max_output_tokens": None,
"previous_response_id": None,
"reasoning": {"effort": None, "summary": None},
"truncation": "disabled",
"user": None,
}
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = str(json_data)
self.headers = httpx.Headers({})
def json(self): # Changed from async to sync
return self._json_data
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
# Configure the mock to return our response
mock_post.return_value = MockResponse(mock_response, 200)
litellm._turn_on_debug()
router = litellm.Router(
model_list=[
{
"model_name": "gpt4o-special-alias",
"litellm_params": {
"model": "gpt-4o",
"api_key": "fake-key",
},
}
]
)
# Call the handler with metadata
await router.aresponses(
model="gpt4o-special-alias",
input="Hello, can you tell me a short joke?",
)
# Check the request body
request_body = mock_post.call_args.kwargs["json"]
print("Request body:", json.dumps(request_body, indent=4))
# Assert metadata is not in the request
assert (
"metadata" not in request_body
), "metadata should not be in the request body"
mock_post.assert_called_once()
@pytest.mark.asyncio
async def test_openai_responses_litellm_router_with_metadata():
"""
Test that metadata is correctly passed through when explicitly provided to the Router for responses API
"""
test_metadata = {
"user_id": "123",
"conversation_id": "abc",
"custom_field": "test_value",
}
mock_response = {
"id": "resp_123",
"object": "response",
"created_at": 1741476542,
"status": "completed",
"model": "gpt-4o",
"output": [
{
"type": "message",
"id": "msg_123",
"status": "completed",
"role": "assistant",
"content": [
{"type": "output_text", "text": "Hello world!", "annotations": []}
],
}
],
"parallel_tool_calls": True,
"usage": {
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30,
"output_tokens_details": {"reasoning_tokens": 0},
},
"text": {"format": {"type": "text"}},
"error": None,
"incomplete_details": None,
"instructions": None,
"metadata": test_metadata, # Include the test metadata in response
"temperature": 1.0,
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"max_output_tokens": None,
"previous_response_id": None,
"reasoning": {"effort": None, "summary": None},
"truncation": "disabled",
"user": None,
}
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = str(json_data)
self.headers = httpx.Headers({})
def json(self):
return self._json_data
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
# Configure the mock to return our response
mock_post.return_value = MockResponse(mock_response, 200)
litellm._turn_on_debug()
router = litellm.Router(
model_list=[
{
"model_name": "gpt4o-special-alias",
"litellm_params": {
"model": "gpt-4o",
"api_key": "fake-key",
},
}
]
)
# Call the handler with metadata
await router.aresponses(
model="gpt4o-special-alias",
input="Hello, can you tell me a short joke?",
metadata=test_metadata,
)
# Check the request body
request_body = mock_post.call_args.kwargs["json"]
print("Request body:", json.dumps(request_body, indent=4))
# Assert metadata matches exactly what was passed
assert (
request_body["metadata"] == test_metadata
), "metadata in request body should match what was passed"
mock_post.assert_called_once()
@pytest.mark.asyncio
async def test_openai_responses_litellm_router_with_prompt():
"""Test that prompt object is passed through the Router for responses API"""
prompt_obj = {
"id": "pmpt_abc123",
"version": "2",
"variables": {"random_variable": "ishaan_from_litellm"},
}
mock_response = {
"id": "resp_123",
"object": "response",
"created_at": 1741476542,
"status": "completed",
"model": "gpt-4o",
"output": [],
"parallel_tool_calls": True,
"usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
"text": {"format": {"type": "text"}},
"error": None,
"incomplete_details": None,
"instructions": None,
"metadata": {},
"temperature": 1.0,
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"max_output_tokens": None,
"previous_response_id": None,
"reasoning": {"effort": None, "summary": None},
"truncation": "disabled",
"user": None,
}
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = str(json_data)
self.headers = httpx.Headers({})
def json(self):
return self._json_data
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
mock_post.return_value = MockResponse(mock_response, 200)
litellm._turn_on_debug()
router = litellm.Router(
model_list=[
{
"model_name": "gpt4o-special-alias",
"litellm_params": {
"model": "gpt-4o",
"api_key": "fake-key",
},
}
]
)
await router.aresponses(
model="gpt4o-special-alias",
input="Hello",
prompt=prompt_obj,
)
request_body = mock_post.call_args.kwargs["json"]
assert request_body["prompt"] == prompt_obj
mock_post.assert_called_once()
def test_bad_request_bad_param_error():
"""Raise a BadRequestError when an invalid parameter value is provided"""
try:
litellm.responses(model="gpt-4o", input="This should fail", temperature=2000)
pytest.fail("Expected BadRequestError but no exception was raised")
except litellm.BadRequestError as e:
print(f"Exception raised: {e}")
print(f"Exception type: {type(e)}")
print(f"Exception args: {e.args}")
print(f"Exception details: {e.__dict__}")
except Exception as e:
pytest.fail(f"Unexpected exception raised: {e}")
@pytest.mark.asyncio()
async def test_async_bad_request_bad_param_error():
"""Raise a BadRequestError when an invalid parameter value is provided"""
try:
await litellm.aresponses(
model="gpt-4o", input="This should fail", temperature=2000
)
pytest.fail("Expected BadRequestError but no exception was raised")
except litellm.BadRequestError as e:
print(f"Exception raised: {e}")
print(f"Exception type: {type(e)}")
print(f"Exception args: {e.args}")
print(f"Exception details: {e.__dict__}")
except Exception as e:
pytest.fail(f"Unexpected exception raised: {e}")
@pytest.mark.asyncio
@pytest.mark.parametrize("sync_mode", [True, False])
async def test_openai_o1_pro_response_api(sync_mode):
"""
Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
due to reaching max_output_tokens limit.
"""
# Mock response from o1-pro
mock_response = {
"id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88",
"object": "response",
"created_at": 1742486999,
"status": "incomplete",
"error": None,
"incomplete_details": {"reason": "max_output_tokens"},
"instructions": None,
"max_output_tokens": 20,
"model": "o1-pro-2025-03-19",
"output": [
{
"type": "reasoning",
"id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88",
"summary": [],
}
],
"parallel_tool_calls": True,
"previous_response_id": None,
"reasoning": {"effort": "medium", "generate_summary": None},
"store": True,
"temperature": 1.0,
"text": {"format": {"type": "text"}},
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"truncation": "disabled",
"usage": {
"input_tokens": 73,
"input_tokens_details": {"cached_tokens": 0},
"output_tokens": 20,
"output_tokens_details": {"reasoning_tokens": 0},
"total_tokens": 93,
},
"user": None,
"metadata": {},
}
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = json.dumps(json_data)
self.headers = httpx.Headers({})
def json(self): # Changed from async to sync
return self._json_data
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
# Configure the mock to return our response
mock_post.return_value = MockResponse(mock_response, 200)
litellm._turn_on_debug()
litellm.set_verbose = True
# Call o1-pro with max_output_tokens=20
response = await litellm.aresponses(
model="openai/o1-pro",
input="Write a detailed essay about artificial intelligence and its impact on society",
max_output_tokens=20,
)
# Verify the request was made correctly
mock_post.assert_called_once()
request_body = mock_post.call_args.kwargs["json"]
assert request_body["model"] == "o1-pro"
assert request_body["max_output_tokens"] == 20
# Validate the response
print("Response:", json.dumps(response, indent=4, default=str))
# Check that the response has the expected structure
assert response["id"] is not None
assert response["status"] == "incomplete"
assert response["incomplete_details"].reason == "max_output_tokens"
assert response["max_output_tokens"] == 20
# Validate usage information
assert response["usage"]["input_tokens"] == 73
assert response["usage"]["output_tokens"] == 20
assert response["usage"]["total_tokens"] == 93
# Validate that the response is properly identified as incomplete
validate_responses_api_response(response, final_chunk=True)
@pytest.mark.asyncio
@pytest.mark.parametrize("sync_mode", [True, False])
async def test_openai_o1_pro_response_api_streaming(sync_mode):
"""
Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
due to reaching max_output_tokens limit in both sync and async streaming modes.
"""
# Mock response from o1-pro
mock_response = {
"id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88",
"object": "response",
"created_at": 1742486999,
"status": "incomplete",
"error": None,
"incomplete_details": {"reason": "max_output_tokens"},
"instructions": None,
"max_output_tokens": 20,
"model": "o1-pro-2025-03-19",
"output": [
{
"type": "reasoning",
"id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88",
"summary": [],
}
],
"parallel_tool_calls": True,
"previous_response_id": None,
"reasoning": {"effort": "medium", "generate_summary": None},
"store": True,
"temperature": 1.0,
"text": {"format": {"type": "text"}},
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"truncation": "disabled",
"usage": {
"input_tokens": 73,
"input_tokens_details": {"cached_tokens": 0},
"output_tokens": 20,
"output_tokens_details": {"reasoning_tokens": 0},
"total_tokens": 93,
},
"user": None,
"metadata": {},
}
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = json.dumps(json_data)
self.headers = httpx.Headers({})
def json(self):
return self._json_data
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
# Configure the mock to return our response
mock_post.return_value = MockResponse(mock_response, 200)
litellm._turn_on_debug()
litellm.set_verbose = True
# Verify the request was made correctly
if sync_mode:
# For sync mode, we need to patch the sync HTTP handler
with patch(
"litellm.llms.custom_httpx.http_handler.HTTPHandler.post",
return_value=MockResponse(mock_response, 200),
) as mock_sync_post:
response = litellm.responses(
model="openai/o1-pro",
input="Write a detailed essay about artificial intelligence and its impact on society",
max_output_tokens=20,
stream=True,
)
# Process the sync stream
event_count = 0
for event in response:
print(
f"Sync litellm response #{event_count}:",
json.dumps(event, indent=4, default=str),
)
event_count += 1
# Verify the sync request was made correctly
mock_sync_post.assert_called_once()
request_body = mock_sync_post.call_args.kwargs["json"]
assert request_body["model"] == "o1-pro"
assert request_body["max_output_tokens"] == 20
assert "stream" not in request_body
else:
# For async mode
response = await litellm.aresponses(
model="openai/o1-pro",
input="Write a detailed essay about artificial intelligence and its impact on society",
max_output_tokens=20,
stream=True,
)
# Process the async stream
event_count = 0
async for event in response:
print(
f"Async litellm response #{event_count}:",
json.dumps(event, indent=4, default=str),
)
event_count += 1
# Verify the async request was made correctly
mock_post.assert_called_once()
request_body = mock_post.call_args.kwargs["json"]
assert request_body["model"] == "o1-pro"
assert request_body["max_output_tokens"] == 20
assert "stream" not in request_body
def test_basic_computer_use_preview_tool_call():
"""
Test that LiteLLM correctly handles a computer_use_preview tool call where the environment is set to "linux"
linux is an unsupported environment for the computer_use_preview tool, but litellm users should still be able to pass it to openai
"""
# Mock response from OpenAI
mock_response = {
"id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88",
"object": "response",
"created_at": 1742486999,
"status": "incomplete",
"error": None,
"incomplete_details": {"reason": "max_output_tokens"},
"instructions": None,
"max_output_tokens": 20,
"model": "o1-pro-2025-03-19",
"output": [
{
"type": "reasoning",
"id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88",
"summary": [],
}
],
"parallel_tool_calls": True,
"previous_response_id": None,
"reasoning": {"effort": "medium", "generate_summary": None},
"store": True,
"temperature": 1.0,
"text": {"format": {"type": "text"}},
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"truncation": "disabled",
"usage": {
"input_tokens": 73,
"input_tokens_details": {"cached_tokens": 0},
"output_tokens": 20,
"output_tokens_details": {"reasoning_tokens": 0},
"total_tokens": 93,
},
"user": None,
"metadata": {},
}
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = json.dumps(json_data)
self.headers = httpx.Headers({})
def json(self):
return self._json_data
with patch(
"litellm.llms.custom_httpx.http_handler.HTTPHandler.post",
return_value=MockResponse(mock_response, 200),
) as mock_post:
litellm._turn_on_debug()
litellm.set_verbose = True
# Call the responses API with computer_use_preview tool
response = litellm.responses(
model="openai/computer-use-preview",
tools=[
{
"type": "computer_use_preview",
"display_width": 1024,
"display_height": 768,
"environment": "linux", # other possible values: "mac", "windows", "ubuntu"
}
],
input="Check the latest OpenAI news on bing.com.",
reasoning={"summary": "concise"},
truncation="auto",
)
# Verify the request was made correctly
mock_post.assert_called_once()
request_body = mock_post.call_args.kwargs["json"]
# Validate the request structure
assert request_body["model"] == "computer-use-preview"
assert len(request_body["tools"]) == 1
assert request_body["tools"][0]["type"] == "computer_use_preview"
assert request_body["tools"][0]["display_width"] == 1024
assert request_body["tools"][0]["display_height"] == 768
assert request_body["tools"][0]["environment"] == "linux"
# Check that reasoning was passed correctly
assert request_body["reasoning"]["summary"] == "concise"
assert request_body["truncation"] == "auto"
# Validate the input format
assert isinstance(request_body["input"], str)
assert request_body["input"] == "Check the latest OpenAI news on bing.com."
def test_mcp_tools_with_responses_api():
litellm._turn_on_debug()
MCP_TOOLS = [
{
"type": "mcp",
"server_label": "zapier",
"server_url": "https://mcp.zapier.com/api/mcp/mcp",
"headers": {
"Authorization": f"Bearer {os.getenv('ZAPIER_CI_CD_MCP_TOKEN')}"
},
}
]
MODEL = "openai/gpt-4.1"
USER_QUERY = "how does tiktoken work?"
#########################################################
# Step 1: OpenAI will use MCP LIST, and return a list of MCP calls for our approval
try:
response = litellm.responses(model=MODEL, tools=MCP_TOOLS, input=USER_QUERY)
print(response)
response = cast(ResponsesAPIResponse, response)
mcp_approval_id: Optional[str] = None
for output in response.output:
if output.type == "mcp_approval_request":
mcp_approval_id = output.id
break
# Step 2: Send followup with approval for the MCP call
if mcp_approval_id:
response_with_mcp_call = litellm.responses(
model=MODEL,
tools=MCP_TOOLS,
input=[
{
"type": "mcp_approval_response",
"approve": True,
"approval_request_id": mcp_approval_id,
}
],
previous_response_id=response.id,
)
print(response_with_mcp_call)
except litellm.APIError as e:
if (
"424" in str(e)
or "Failed Dependency" in str(e)
or "external_connector_error" in str(e)
):
pytest.skip(f"Skipping test due to external MCP server error: {e}")
else:
raise e
except litellm.InternalServerError as e:
if "500" in str(e) or "server_error" in str(e):
pytest.skip(
f"Skipping test due to OpenAI server error (likely MCP server unavailable): {e}"
)
else:
raise e
@pytest.mark.asyncio
async def test_openai_responses_api_field_types():
"""Test that specific fields in the response have the correct types"""
litellm._turn_on_debug()
litellm.set_verbose = True
# Test with store=True
response = await litellm.aresponses(
model="gpt-4o",
input="hi",
)
# Verify created_at is an integer
assert isinstance(response.created_at, int), "created_at should be an integer"
# Verify store field is present and matches input
assert hasattr(response, "store"), "store field should be present"
assert response.store is True, "store field should match input value"
# Test without store parameter
response_without_store = await litellm.aresponses(model="gpt-4o", input="hi")
# Verify created_at is still an integer
assert isinstance(
response_without_store.created_at, int
), "created_at should be an integer"
# Verify store field is present but None when not specified
assert hasattr(response_without_store, "store"), "store field should be present"
@pytest.mark.asyncio
async def test_store_field_transformation():
"""Test store field transformation with mocked API responses"""
config = OpenAIResponsesAPIConfig()
# Initialize logging object with required parameters
logging_obj = LiteLLMLoggingObj(
model="gpt-4o",
messages=[],
stream=False,
call_type="aresponses",
start_time=time.time(),
litellm_call_id="test-call-id",
function_id="test-function-id",
)
# Base response data with all required fields
base_response = {
"id": "test_id",
"created_at": 1751443898,
"model": "gpt-4o",
"object": "response",
"output": [
{
"type": "message",
"id": "msg_1",
"status": "completed",
"role": "assistant",
"content": [
{"type": "output_text", "text": "Hello", "annotations": []}
],
}
],
"parallel_tool_calls": True,
"tool_choice": "auto",
"tools": [],
"error": None,
"incomplete_details": None,
"instructions": "test instructions",
"metadata": {},
"temperature": 0.7,
"top_p": 1.0,
"max_output_tokens": 100,
"previous_response_id": None,
"reasoning": None,
"status": "completed",
"text": None,
"truncation": "auto",
"usage": {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
"user": "test_user",
}
# Test case 1: API returns store=True
mock_response_store_true = httpx.Response(
status_code=200, content=json.dumps({**base_response, "store": True}).encode()
)
# Test case 2: API returns store=False
mock_response_store_false = httpx.Response(
status_code=200, content=json.dumps({**base_response, "store": False}).encode()
)
# Test case 3: API returns store=null
mock_response_store_null = httpx.Response(
status_code=200, content=json.dumps({**base_response, "store": None}).encode()
)
# Test case 4: API omits store field
mock_response_no_store = httpx.Response(
status_code=200, content=json.dumps(base_response).encode()
)
# Test when store=True in request
logging_obj.optional_params = {"store": True}
response = config.transform_response_api_response(
model="gpt-4o", raw_response=mock_response_store_true, logging_obj=logging_obj
)
assert (
response.store is True
), "store should be True when specified in request and API returns True"
# Test when store=False in request
logging_obj.optional_params = {"store": False}
response = config.transform_response_api_response(
model="gpt-4o", raw_response=mock_response_store_false, logging_obj=logging_obj
)
assert (
response.store is False
), "store should be False when specified in request and API returns False"
# Test when store not in request but API returns null
response = config.transform_response_api_response(
model="gpt-4o", raw_response=mock_response_store_null, logging_obj=logging_obj
)
assert (
response.store is None
), "store should be None when not specified in request and API returns null"
# Test when store not in request and API omits store field
response = config.transform_response_api_response(
model="gpt-4o", raw_response=mock_response_no_store, logging_obj=logging_obj
)
assert (
response.store is None
), "store should be None when not specified in request and API omits store"
# Verify created_at is always converted to integer
assert isinstance(
response.created_at, int
), "created_at should always be converted to integer"
assert (
response.created_at == 1751443898
), "created_at should maintain the same value after conversion"
@pytest.mark.asyncio
async def test_aresponses_service_tier_and_safety_identifier():
"""
Test that service_tier and safety_identifier parameters are correctly sent in the request body
when using litellm.aresponses.
"""
mock_response = {
"id": "resp_01234567890abcdef",
"object": "response",
"created_at": 1753060947,
"status": "completed",
"error": None,
"incomplete_details": None,
"instructions": None,
"max_output_tokens": None,
"model": "gpt-4o-2024-05-13",
"output": [
{
"type": "text",
"id": "out_01234567890abcdef",
"text": "This is a test response with service tier and safety identifier.",
}
],
"parallel_tool_calls": True,
"previous_response_id": None,
"reasoning": None,
"store": True,
"temperature": 1.0,
"text": {"format": {"type": "text"}},
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"truncation": "disabled",
"usage": {
"input_tokens": 15,
"input_tokens_details": {"cached_tokens": 0},
"output_tokens": 25,
"output_tokens_details": {"reasoning_tokens": 0},
"total_tokens": 40,
},
"user": None,
"metadata": {},
}
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = json.dumps(json_data)
self.headers = httpx.Headers({})
def json(self):
return self._json_data
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
# Configure the mock to return our response
mock_post.return_value = MockResponse(mock_response, 200)
litellm._turn_on_debug()
litellm.set_verbose = True
# Call aresponses with service_tier and safety_identifier
response = await litellm.aresponses(
model="openai/gpt-4o",
input="Test with service tier and safety identifier",
service_tier="flex",
safety_identifier="123",
)
# Verify the request was made correctly
mock_post.assert_called_once()
request_body = mock_post.call_args.kwargs["json"]
print("request_body=", json.dumps(request_body, indent=4, default=str))
# Validate that both parameters are present in the request body
assert (
request_body["service_tier"] == "flex"
), "service_tier should be 'flex' in request body"
assert (
request_body["safety_identifier"] == "123"
), "safety_identifier should be '123' in request body"
assert request_body["model"] == "gpt-4o"
assert request_body["input"] == "Test with service tier and safety identifier"
# Validate the response
print("Response:", json.dumps(response, indent=4, default=str))
@pytest.mark.asyncio
async def test_openai_gpt5_reasoning_effort_parameter():
"""Test that reasoning_effort parameter is properly sent in the HTTP request for GPT-5 models."""
# Mock response for GPT-5 responses API (correct format)
mock_response = {
"id": "resp_01ABC123",
"object": "response",
"created_at": 1729621667,
"status": "completed",
"model": "gpt-5-mini",
"output": [
{
"type": "message",
"id": "msg_123",
"status": "completed",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": "The capital of France is Paris.",
"annotations": [],
}
],
}
],
"parallel_tool_calls": True,
"usage": {
"input_tokens": 15,
"input_tokens_details": {"cached_tokens": 0},
"output_tokens": 8,
"output_tokens_details": {"reasoning_tokens": 0},
"total_tokens": 23,
},
"text": {"format": {"type": "text"}},
"error": None,
"incomplete_details": None,
"instructions": None,
"metadata": {},
"temperature": 1.0,
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"max_output_tokens": None,
"previous_response_id": None,
"reasoning": {"effort": "low", "summary": None},
"truncation": "disabled",
"user": None,
}
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = json.dumps(json_data)
self.headers = httpx.Headers({})
def json(self):
return self._json_data
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
# Configure the mock to return our response
mock_post.return_value = MockResponse(mock_response, 200)
litellm._turn_on_debug()
litellm.set_verbose = True
# Call aresponses with reasoning_effort parameter
response = await litellm.aresponses(
model="openai/gpt-5-mini",
input="What is the capital of France?",
reasoning={"effort": "minimal"},
)
# Verify the request was made correctly
mock_post.assert_called_once()
request_body = mock_post.call_args.kwargs["json"]
print("request_body=", json.dumps(request_body, indent=4, default=str))
print("reasoning=", request_body["reasoning"])
# Validate that reasoning_effort is present in the request body
assert (
"reasoning" in request_body
), "reasoning should be present in request body"
assert (
request_body["reasoning"]["effort"] == "minimal"
), "reasoning_effort should be 'minimal' in request body"
assert request_body["model"] == "gpt-5-mini"
assert request_body["input"] == "What is the capital of France?"
# Validate the response
print("Response:", json.dumps(response, indent=4, default=str))
@pytest.mark.asyncio
@pytest.mark.parametrize("stream", [True, False])
async def test_basic_openai_responses_with_websearch(stream):
litellm._turn_on_debug()
request_model = "gpt-4o"
response = await litellm.aresponses(
model=request_model,
stream=stream,
input="hi",
tools=[{"type": "web_search", "search_context_size": "low"}],
)
if stream:
async for chunk in response:
print("chunk=", json.dumps(chunk, indent=4, default=str))
else:
print("response=", json.dumps(response, indent=4, default=str))
@pytest.mark.asyncio
async def test_openai_responses_api_token_limit_error():
"""
Relevant issue: https://github.com/BerriAI/litellm/issues/15785
When this fails you'll see:
"pydantic_core._pydantic_core.ValidationError: 3 validation errors for ErrorEvent"
in the console.
"""
litellm._turn_on_debug()
# Generate text with >400k tokens to trigger token limit error
oversized_text = "This is a test sentence. " * 50000 # ~400k tokens
# This will raise ValidationError instead of showing the real error
response = await litellm.aresponses(
model="gpt-5-mini", input=oversized_text, stream=True
)
async for event in response:
print(event) # Never reaches here - ValidationError is raised
async def test_openai_streaming_logging():
"""Test that OpenAI Responses API streaming logging is working correctly."""
litellm._turn_on_debug()
from litellm.integrations.custom_logger import CustomLogger
from litellm.types.utils import Usage
class TestCustomLogger(CustomLogger):
validate_usage = False
def __init__(self):
self.standard_logging_object: Optional[StandardLoggingPayload] = None
async def async_log_success_event(
self, kwargs, response_obj, start_time, end_time
):
print(f"response_obj: {response_obj.usage}")
assert isinstance(
response_obj.usage, (Usage, dict)
), f"Expected response_obj.usage to be of type Usage or dict, but got {type(response_obj.usage)}"
# Verify it has the chat completion format fields
if isinstance(response_obj.usage, dict):
assert "prompt_tokens" in response_obj.usage, "Usage dict should have prompt_tokens"
assert "completion_tokens" in response_obj.usage, "Usage dict should have completion_tokens"
print("\n\nVALIDATED USAGE\n\n")
self.validate_usage = True
tcl = TestCustomLogger()
litellm.callbacks = [tcl]
request_model = "gpt-5-mini"
response = await litellm.aresponses(
model=request_model,
input="What is the capital of France?",
stream=True,
)
print("response=", json.dumps(response, indent=4, default=str))
async for event in response:
if event.type == "response.completed":
final_response = event
print("litellm response=", json.dumps(event, indent=4, default=str))
await asyncio.sleep(2)
assert tcl.validate_usage, "Usage should be validated"
# Tests for extra_body parameter passing
class MockResponse:
def __init__(self, json_data, status_code):
self._json_data = json_data
self.status_code = status_code
self.text = str(json_data)
self.headers = httpx.Headers({})
def json(self):
return self._json_data
@pytest.fixture
def extra_body_mock_response_data():
return {
"id": "resp_test123",
"object": "response",
"created_at": 1234567890,
"status": "completed",
"model": "gpt-4o",
"output": [
{
"type": "message",
"id": "msg_123",
"status": "completed",
"role": "assistant",
"content": [
{"type": "output_text", "text": "Hello!", "annotations": []}
],
}
],
"usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
"parallel_tool_calls": True,
"text": {"format": {"type": "text"}},
"error": None,
"metadata": {},
"temperature": 1.0,
"reasoning": {"effort": None, "summary": None},
}
@pytest.mark.asyncio
async def test_aresponses_extra_body_params_passed(extra_body_mock_response_data):
"""Test that extra_body parameters are passed in async mode."""
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
mock_post.return_value = MockResponse(extra_body_mock_response_data, 200)
response = await litellm.aresponses(
model="gpt-4o",
input="Test input",
max_output_tokens=20,
extra_body={
"custom_param_1": "value1",
"custom_param_2": {"nested": "value2"},
"experimental_feature": True,
},
)
assert response is not None
assert response.id is not None
request_body = mock_post.call_args.kwargs["json"]
assert "custom_param_1" in request_body
assert request_body["custom_param_1"] == "value1"
assert "custom_param_2" in request_body
assert request_body["custom_param_2"]["nested"] == "value2"
assert "experimental_feature" in request_body
assert request_body["experimental_feature"] is True
assert request_body["model"] == "gpt-4o"
assert request_body["input"] == "Test input"
def test_responses_extra_body_params_passed_sync(extra_body_mock_response_data):
"""Test that extra_body parameters are passed in sync mode."""
with patch(
"litellm.llms.custom_httpx.http_handler.HTTPHandler.post",
return_value=MockResponse(extra_body_mock_response_data, 200),
) as mock_post:
response = litellm.responses(
model="gpt-4o",
input="Sync test",
max_output_tokens=20,
extra_body={
"sync_custom_param": "sync_value",
"another_param": 42,
},
)
assert response is not None
assert response.id is not None
request_body = mock_post.call_args.kwargs["json"]
assert "sync_custom_param" in request_body
assert request_body["sync_custom_param"] == "sync_value"
assert "another_param" in request_body
assert request_body["another_param"] == 42
assert request_body["model"] == "gpt-4o"
@pytest.mark.asyncio
async def test_extra_body_merges_with_request_data(extra_body_mock_response_data):
"""Test that extra_body is merged into the request data."""
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
mock_post.return_value = MockResponse(extra_body_mock_response_data, 200)
await litellm.aresponses(
model="gpt-4o",
input="Test",
temperature=0.7,
max_output_tokens=20,
extra_body={
"custom_field": "custom_value",
},
)
request_body = mock_post.call_args.kwargs["json"]
assert "temperature" in request_body
assert "custom_field" in request_body
assert request_body["custom_field"] == "custom_value"
@pytest.mark.asyncio
@pytest.mark.parametrize("sync_mode", [True, False])
async def test_openai_compact_responses_api(sync_mode):
"""
Test the compact_responses API for OpenAI.
This test verifies that the compact_responses endpoint works correctly
for compressing conversation history.
"""
litellm._turn_on_debug()
litellm.set_verbose = True
input_messages = [
{"role": "user", "content": "Hello, how are you?"},
{"role": "assistant", "content": "I'm doing well, thank you for asking!"},
{"role": "user", "content": "What is the weather like today?"},
]
try:
if sync_mode:
response = litellm.compact_responses(
model="openai/gpt-4o",
input=input_messages,
instructions="Be helpful and concise",
)
else:
response = await litellm.acompact_responses(
model="openai/gpt-4o",
input=input_messages,
instructions="Be helpful and concise",
)
except litellm.InternalServerError:
pytest.skip("Skipping test due to InternalServerError")
except litellm.BadRequestError as e:
# compact_responses may not be available for all models/accounts
pytest.skip(f"Skipping test due to BadRequestError: {e}")
print("compact_responses response=", json.dumps(response, indent=4, default=str))
# Validate response structure
assert response is not None
assert "id" in response, "Response should have an 'id' field"
assert "output" in response, "Response should have an 'output' field"
assert isinstance(response["output"], list), "Output should be a list"