Files
litellm/tests/test_litellm/proxy/response_api_endpoints/test_endpoints.py
T
2026-04-17 13:02:59 -07:00

199 lines
7.0 KiB
Python

"""
Test for response_api_endpoints/endpoints.py
"""
import unittest
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from fastapi.testclient import TestClient
from litellm.proxy.proxy_server import app
class TestResponsesAPIEndpoints(unittest.TestCase):
@pytest.mark.asyncio
@patch("litellm.proxy.proxy_server.llm_router")
@patch("litellm.proxy.proxy_server.user_api_key_auth")
async def test_openai_v1_responses_route(self, mock_auth, mock_router):
"""
Test that /openai/v1/responses endpoint is correctly registered and accessible.
"""
mock_auth.return_value = MagicMock(
token="test_token",
user_id="test_user",
team_id=None,
)
mock_router.aresponses = AsyncMock(
return_value={
"id": "resp_abc123",
"object": "realtime.response",
"status": "completed",
"output": [
{
"type": "message",
"role": "assistant",
"content": [{"type": "text", "text": "Test response"}],
}
],
}
)
client = TestClient(app)
test_data = {"model": "gpt-4o", "input": "Tell me about AI"}
response = client.post(
"/openai/v1/responses",
json=test_data,
headers={"Authorization": "Bearer sk-1234"},
)
assert response.status_code in [200, 401, 500]
@pytest.mark.asyncio
@patch("litellm.proxy.proxy_server.llm_router")
@patch("litellm.proxy.proxy_server.user_api_key_auth")
async def test_cursor_chat_completions_route(self, mock_auth, mock_router):
"""
Test that /cursor/chat/completions endpoint:
1. Accepts Responses API input format
2. Returns chat completions format response
3. Transforms streaming responses correctly
"""
from litellm.types.llms.openai import ResponsesAPIResponse
from litellm.types.utils import ResponseOutputMessage, ResponseOutputText
mock_auth.return_value = MagicMock(
token="test_token",
user_id="test_user",
team_id=None,
)
# Mock a Responses API response
mock_responses_response = ResponsesAPIResponse(
id="resp_cursor123",
created_at=1234567890,
model="gpt-4o",
object="response",
output=[
ResponseOutputMessage(
type="message",
role="assistant",
content=[
ResponseOutputText(
type="output_text", text="Hello from Cursor!"
)
],
)
],
)
mock_router.aresponses = AsyncMock(return_value=mock_responses_response)
client = TestClient(app)
# Test with Responses API input format (what Cursor sends)
test_data = {
"model": "gpt-4o",
"input": [{"role": "user", "content": "Hello"}],
}
response = client.post(
"/cursor/chat/completions",
json=test_data,
headers={"Authorization": "Bearer sk-1234"},
)
# Should return 200 (or 401/500 if auth fails)
assert response.status_code in [200, 401, 500]
# If successful, verify it returns chat completions format
if response.status_code == 200:
response_data = response.json()
# Should have chat completion structure
assert "choices" in response_data or "id" in response_data
# Should not have Responses API structure
assert "output" not in response_data or "status" not in response_data
@pytest.mark.asyncio
@patch("litellm.proxy.proxy_server.llm_router")
@patch("litellm.proxy.proxy_server.user_api_key_auth")
async def test_responses_api_key_spend_header_includes_response_cost(
self, mock_auth, mock_router
):
"""
Test that x-litellm-key-spend header includes the current request's response_cost
for /v1/responses endpoint.
This ensures the spend header reflects updated spend including the current request,
even though spend tracking updates happen asynchronously after the response.
"""
from litellm.types.llms.openai import ResponsesAPIResponse
from litellm.types.utils import ResponseOutputMessage, ResponseOutputText
# Create mock user API key with initial spend
mock_user_api_key_dict = MagicMock()
mock_user_api_key_dict.token = "test_token"
mock_user_api_key_dict.user_id = "test_user"
mock_user_api_key_dict.team_id = None
mock_user_api_key_dict.spend = 0.001 # Initial spend: $0.001
mock_user_api_key_dict.tpm_limit = None
mock_user_api_key_dict.rpm_limit = None
mock_user_api_key_dict.max_budget = None
mock_user_api_key_dict.allowed_model_region = None
mock_user_api_key_dict.api_key = "sk-test-key"
mock_user_api_key_dict.metadata = {}
mock_auth.return_value = mock_user_api_key_dict
# Mock response with hidden_params containing response_cost
mock_response = ResponsesAPIResponse(
id="resp_test123",
created_at=1234567890,
model="gpt-4o",
object="response",
output=[
ResponseOutputMessage(
type="message",
role="assistant",
content=[
ResponseOutputText(type="output_text", text="Test response")
],
)
],
)
# Add hidden_params with response_cost to the mock response
mock_response._hidden_params = {
"response_cost": 0.0005, # Current request cost: $0.0005
"model_id": "test-model-id",
}
mock_router.aresponses = AsyncMock(return_value=mock_response)
client = TestClient(app)
test_data = {"model": "gpt-4o", "input": "Tell me about AI"}
response = client.post(
"/v1/responses",
json=test_data,
headers={"Authorization": "Bearer sk-test-key"},
)
# Verify the response was successful
assert response.status_code == 200
# Verify x-litellm-key-spend header includes current request cost
assert "x-litellm-key-spend" in response.headers
key_spend_value = float(response.headers["x-litellm-key-spend"])
expected_spend = 0.001 + 0.0005 # Initial spend + current request cost
assert key_spend_value == pytest.approx(expected_spend, abs=1e-10)
# Verify x-litellm-response-cost header is present
assert "x-litellm-response-cost" in response.headers
response_cost_value = float(response.headers["x-litellm-response-cost"])
assert response_cost_value == pytest.approx(0.0005, abs=1e-10)