mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-18 09:32:08 +00:00
197 lines
7.0 KiB
Python
197 lines
7.0 KiB
Python
"""
|
|
Test for response_api_endpoints/endpoints.py
|
|
"""
|
|
import unittest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from litellm.proxy.proxy_server import app
|
|
|
|
|
|
class TestResponsesAPIEndpoints(unittest.TestCase):
|
|
@pytest.mark.asyncio
|
|
@patch("litellm.proxy.proxy_server.llm_router")
|
|
@patch("litellm.proxy.proxy_server.user_api_key_auth")
|
|
async def test_openai_v1_responses_route(self, mock_auth, mock_router):
|
|
"""
|
|
Test that /openai/v1/responses endpoint is correctly registered and accessible.
|
|
"""
|
|
mock_auth.return_value = MagicMock(
|
|
token="test_token",
|
|
user_id="test_user",
|
|
team_id=None,
|
|
)
|
|
|
|
mock_router.aresponses = AsyncMock(
|
|
return_value={
|
|
"id": "resp_abc123",
|
|
"object": "realtime.response",
|
|
"status": "completed",
|
|
"output": [
|
|
{
|
|
"type": "message",
|
|
"role": "assistant",
|
|
"content": [{"type": "text", "text": "Test response"}],
|
|
}
|
|
],
|
|
}
|
|
)
|
|
|
|
client = TestClient(app)
|
|
|
|
test_data = {"model": "gpt-4o", "input": "Tell me about AI"}
|
|
|
|
response = client.post(
|
|
"/openai/v1/responses",
|
|
json=test_data,
|
|
headers={"Authorization": "Bearer sk-1234"},
|
|
)
|
|
|
|
assert response.status_code in [200, 401, 500]
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("litellm.proxy.proxy_server.llm_router")
|
|
@patch("litellm.proxy.proxy_server.user_api_key_auth")
|
|
async def test_cursor_chat_completions_route(self, mock_auth, mock_router):
|
|
"""
|
|
Test that /cursor/chat/completions endpoint:
|
|
1. Accepts Responses API input format
|
|
2. Returns chat completions format response
|
|
3. Transforms streaming responses correctly
|
|
"""
|
|
from litellm.types.llms.openai import ResponsesAPIResponse
|
|
from litellm.types.utils import ResponseOutputMessage, ResponseOutputText
|
|
|
|
mock_auth.return_value = MagicMock(
|
|
token="test_token",
|
|
user_id="test_user",
|
|
team_id=None,
|
|
)
|
|
|
|
# Mock a Responses API response
|
|
mock_responses_response = ResponsesAPIResponse(
|
|
id="resp_cursor123",
|
|
created_at=1234567890,
|
|
model="gpt-4o",
|
|
object="response",
|
|
output=[
|
|
ResponseOutputMessage(
|
|
type="message",
|
|
role="assistant",
|
|
content=[
|
|
ResponseOutputText(type="output_text", text="Hello from Cursor!")
|
|
],
|
|
)
|
|
],
|
|
)
|
|
|
|
mock_router.aresponses = AsyncMock(return_value=mock_responses_response)
|
|
|
|
client = TestClient(app)
|
|
|
|
# Test with Responses API input format (what Cursor sends)
|
|
test_data = {
|
|
"model": "gpt-4o",
|
|
"input": [{"role": "user", "content": "Hello"}],
|
|
}
|
|
|
|
response = client.post(
|
|
"/cursor/chat/completions",
|
|
json=test_data,
|
|
headers={"Authorization": "Bearer sk-1234"},
|
|
)
|
|
|
|
# Should return 200 (or 401/500 if auth fails)
|
|
assert response.status_code in [200, 401, 500]
|
|
|
|
# If successful, verify it returns chat completions format
|
|
if response.status_code == 200:
|
|
response_data = response.json()
|
|
# Should have chat completion structure
|
|
assert "choices" in response_data or "id" in response_data
|
|
# Should not have Responses API structure
|
|
assert "output" not in response_data or "status" not in response_data
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("litellm.proxy.proxy_server.llm_router")
|
|
@patch("litellm.proxy.proxy_server.user_api_key_auth")
|
|
async def test_responses_api_key_spend_header_includes_response_cost(
|
|
self, mock_auth, mock_router
|
|
):
|
|
"""
|
|
Test that x-litellm-key-spend header includes the current request's response_cost
|
|
for /v1/responses endpoint.
|
|
|
|
This ensures the spend header reflects updated spend including the current request,
|
|
even though spend tracking updates happen asynchronously after the response.
|
|
"""
|
|
from litellm.types.llms.openai import ResponsesAPIResponse
|
|
from litellm.types.utils import ResponseOutputMessage, ResponseOutputText
|
|
|
|
# Create mock user API key with initial spend
|
|
mock_user_api_key_dict = MagicMock()
|
|
mock_user_api_key_dict.token = "test_token"
|
|
mock_user_api_key_dict.user_id = "test_user"
|
|
mock_user_api_key_dict.team_id = None
|
|
mock_user_api_key_dict.spend = 0.001 # Initial spend: $0.001
|
|
mock_user_api_key_dict.tpm_limit = None
|
|
mock_user_api_key_dict.rpm_limit = None
|
|
mock_user_api_key_dict.max_budget = None
|
|
mock_user_api_key_dict.allowed_model_region = None
|
|
mock_user_api_key_dict.api_key = "sk-test-key"
|
|
mock_user_api_key_dict.metadata = {}
|
|
|
|
mock_auth.return_value = mock_user_api_key_dict
|
|
|
|
# Mock response with hidden_params containing response_cost
|
|
mock_response = ResponsesAPIResponse(
|
|
id="resp_test123",
|
|
created_at=1234567890,
|
|
model="gpt-4o",
|
|
object="response",
|
|
output=[
|
|
ResponseOutputMessage(
|
|
type="message",
|
|
role="assistant",
|
|
content=[
|
|
ResponseOutputText(type="output_text", text="Test response")
|
|
],
|
|
)
|
|
],
|
|
)
|
|
|
|
# Add hidden_params with response_cost to the mock response
|
|
mock_response._hidden_params = {
|
|
"response_cost": 0.0005, # Current request cost: $0.0005
|
|
"model_id": "test-model-id",
|
|
}
|
|
|
|
mock_router.aresponses = AsyncMock(return_value=mock_response)
|
|
|
|
client = TestClient(app)
|
|
|
|
test_data = {"model": "gpt-4o", "input": "Tell me about AI"}
|
|
|
|
response = client.post(
|
|
"/v1/responses",
|
|
json=test_data,
|
|
headers={"Authorization": "Bearer sk-test-key"},
|
|
)
|
|
|
|
# Verify the response was successful
|
|
assert response.status_code == 200
|
|
|
|
# Verify x-litellm-key-spend header includes current request cost
|
|
assert "x-litellm-key-spend" in response.headers
|
|
key_spend_value = float(response.headers["x-litellm-key-spend"])
|
|
expected_spend = 0.001 + 0.0005 # Initial spend + current request cost
|
|
assert key_spend_value == pytest.approx(expected_spend, abs=1e-10)
|
|
|
|
# Verify x-litellm-response-cost header is present
|
|
assert "x-litellm-response-cost" in response.headers
|
|
response_cost_value = float(response.headers["x-litellm-response-cost"])
|
|
assert response_cost_value == pytest.approx(0.0005, abs=1e-10)
|
|
|