mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 22:48:35 +00:00
283 lines
9.6 KiB
Python
283 lines
9.6 KiB
Python
import os
|
|
import sys
|
|
import pytest
|
|
import asyncio
|
|
from typing import Optional
|
|
from unittest.mock import patch, AsyncMock
|
|
|
|
sys.path.insert(0, os.path.abspath("../.."))
|
|
import litellm
|
|
from litellm.integrations.custom_logger import CustomLogger
|
|
import json
|
|
from litellm.types.utils import StandardLoggingPayload
|
|
from litellm.types.llms.openai import (
|
|
ResponseCompletedEvent,
|
|
ResponsesAPIResponse,
|
|
ResponseAPIUsage,
|
|
IncompleteDetails,
|
|
)
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
|
from base_responses_api import BaseResponsesAPITest
|
|
|
|
|
|
class TestAzureResponsesAPITest(BaseResponsesAPITest):
|
|
def get_base_completion_call_args(self):
|
|
return {
|
|
"model": "azure/gpt-4.1-mini",
|
|
"truncation": "auto",
|
|
"api_base": os.getenv("AZURE_AI_API_BASE"),
|
|
"api_key": os.getenv("AZURE_AI_API_KEY"),
|
|
"api_version": "2025-03-01-preview",
|
|
}
|
|
|
|
def get_advanced_model_for_shell_tool(self) -> Optional[str]:
|
|
"""If specified, overrides the model used by test_responses_api_shell_tool_streaming_sees_shell_output (e.g. openai/gpt-5.2 for shell support)."""
|
|
return "azure/gpt-5-mini"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_azure_responses_api_preview_api_version():
|
|
"""
|
|
Ensure new azure preview api version is working
|
|
"""
|
|
litellm._turn_on_debug()
|
|
response = await litellm.aresponses(
|
|
model="azure/gpt-5-mini",
|
|
truncation="auto",
|
|
api_version="preview",
|
|
api_base=os.getenv("AZURE_AI_API_BASE"),
|
|
api_key=os.getenv("AZURE_AI_API_KEY"),
|
|
input="Hello, can you tell me a short joke?",
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_azure_responses_api_status_error():
|
|
"""
|
|
Test that 'status' field is not sent in the final request body to Azure API.
|
|
The status field should be filtered out from input messages before making the API call.
|
|
"""
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
import json
|
|
|
|
request_data = {
|
|
"model": "computer-use-preview",
|
|
"input": [
|
|
{"content": "tell me an interesting fact", "role": "user"},
|
|
{
|
|
"id": "rs_0ab687487834d9df0068e462a1b2d88197aabbc832c9ba5316",
|
|
"summary": [],
|
|
"type": "reasoning",
|
|
"content": None,
|
|
"encrypted_content": None,
|
|
"status": "completed",
|
|
},
|
|
{
|
|
"id": "msg_0ab687487834d9df0068e462a1df188197b74b1eef05102c18",
|
|
"content": [
|
|
{
|
|
"annotations": [],
|
|
"text": "very good morning",
|
|
"type": "output_text",
|
|
"logprobs": [],
|
|
}
|
|
],
|
|
"role": "assistant",
|
|
"status": "completed",
|
|
"type": "message",
|
|
},
|
|
{"role": "user", "content": "tell me another"},
|
|
],
|
|
"include": [],
|
|
"instructions": "You are a helpful assistant.",
|
|
"reasoning": {"effort": "minimal"},
|
|
"stream": False,
|
|
"tools": [],
|
|
}
|
|
|
|
# Mock response
|
|
mock_response_data = {
|
|
"id": "resp_123",
|
|
"object": "response",
|
|
"created_at": 1234567890,
|
|
"model": "computer-use-preview",
|
|
"status": "completed",
|
|
"output": [
|
|
{
|
|
"id": "msg_123",
|
|
"role": "assistant",
|
|
"type": "message",
|
|
"status": "completed",
|
|
"content": [
|
|
{"type": "output_text", "text": "Here's an interesting fact."}
|
|
],
|
|
}
|
|
],
|
|
}
|
|
|
|
captured_request_body = {}
|
|
|
|
async def mock_post(*args, **kwargs):
|
|
# Capture the request body
|
|
nonlocal captured_request_body
|
|
if "json" in kwargs:
|
|
captured_request_body = kwargs["json"]
|
|
elif "data" in kwargs:
|
|
captured_request_body = json.loads(kwargs["data"])
|
|
|
|
import httpx
|
|
|
|
# Create a proper httpx Response object
|
|
response_content = json.dumps(mock_response_data).encode("utf-8")
|
|
response = httpx.Response(
|
|
status_code=200,
|
|
headers={"content-type": "application/json"},
|
|
content=response_content,
|
|
request=httpx.Request(method="POST", url="https://test.openai.azure.com"),
|
|
)
|
|
return response
|
|
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
|
from unittest.mock import patch
|
|
|
|
with patch.object(AsyncHTTPHandler, "post", new=mock_post):
|
|
response = await litellm.aresponses(
|
|
model="azure/computer-use-preview",
|
|
truncation="auto",
|
|
api_version="preview",
|
|
api_base="https://test.openai.azure.com",
|
|
api_key="test-key",
|
|
input=request_data["input"],
|
|
)
|
|
|
|
# Verify that 'status' field is not present in any of the input messages
|
|
print(
|
|
"Final request body:", json.dumps(captured_request_body, indent=4, default=str)
|
|
)
|
|
assert "input" in captured_request_body, "Request body should contain 'input' field"
|
|
|
|
expected_input = [
|
|
{"content": "tell me an interesting fact", "role": "user"},
|
|
{
|
|
"id": "rs_0ab687487834d9df0068e462a1b2d88197aabbc832c9ba5316",
|
|
"summary": [],
|
|
"type": "reasoning",
|
|
},
|
|
{
|
|
"id": "msg_0ab687487834d9df0068e462a1df188197b74b1eef05102c18",
|
|
"content": [
|
|
{
|
|
"annotations": [],
|
|
"text": "very good morning",
|
|
"type": "output_text",
|
|
"logprobs": [],
|
|
}
|
|
],
|
|
"role": "assistant",
|
|
"type": "message",
|
|
},
|
|
{"role": "user", "content": "tell me another"},
|
|
]
|
|
|
|
assert captured_request_body["input"] == expected_input, (
|
|
f"Request body input should match expected format without 'status' field.\n"
|
|
f"Expected: {json.dumps(expected_input, indent=2)}\n"
|
|
f"Got: {json.dumps(captured_request_body['input'], indent=2)}"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_azure_responses_api_headers_with_llm_provider_prefix():
|
|
"""
|
|
Test that Azure-specific headers like 'x-request-id' and 'apim-request-id'
|
|
are properly forwarded with 'llm_provider-' prefix in response._hidden_params["headers"].
|
|
|
|
Issue: https://github.com/BerriAI/litellm/issues/16538
|
|
|
|
The fix ensures that processed headers (with llm_provider- prefix) are stored
|
|
in response._hidden_params["headers"] instead of additional_headers, making them
|
|
accessible via completion.headers in the same way as the completion API.
|
|
"""
|
|
import json
|
|
import httpx
|
|
|
|
mock_response_data = {
|
|
"id": "resp_123",
|
|
"object": "response",
|
|
"created_at": 1234567890,
|
|
"model": "gpt-5-codex",
|
|
"status": "completed",
|
|
"output": [
|
|
{
|
|
"id": "msg_123",
|
|
"role": "assistant",
|
|
"type": "message",
|
|
"content": [{"type": "output_text", "text": "Hello!"}],
|
|
}
|
|
],
|
|
}
|
|
|
|
# Mock headers that Azure returns - exactly like in the issue
|
|
mock_headers = {
|
|
"date": "Wed, 12 Nov 2025 15:31:28 GMT",
|
|
"server": "uvicorn",
|
|
"content-type": "application/json",
|
|
"x-ratelimit-remaining-tokens": "5010000",
|
|
"x-ratelimit-limit-tokens": "5010000",
|
|
# These are the Azure-specific headers that should be forwarded with llm_provider- prefix
|
|
"x-request-id": "12086715-aca3-4006-a29f-2f1e1d552043",
|
|
"apim-request-id": "25664b0d-cf4b-4e10-8d27-c7272e7efd49",
|
|
"x-ms-region": "Sweden Central",
|
|
}
|
|
|
|
async def mock_post(*args, **kwargs):
|
|
response_content = json.dumps(mock_response_data).encode("utf-8")
|
|
response = httpx.Response(
|
|
status_code=200,
|
|
headers=mock_headers,
|
|
content=response_content,
|
|
request=httpx.Request(method="POST", url="https://test.openai.azure.com"),
|
|
)
|
|
return response
|
|
|
|
with patch.object(AsyncHTTPHandler, "post", new=mock_post):
|
|
response = await litellm.aresponses(
|
|
model="azure/gpt-5-codex",
|
|
api_version="2025-03-01-preview",
|
|
api_base="https://test.openai.azure.com",
|
|
api_key="test-key",
|
|
input="Hello, can you tell me a short joke?",
|
|
)
|
|
|
|
# Check that the response has the expected headers structure
|
|
assert hasattr(response, "_hidden_params"), "Response should have _hidden_params"
|
|
assert (
|
|
"additional_headers" in response._hidden_params
|
|
), "Response _hidden_params should contain 'additional_headers' with the LLM provider headers"
|
|
|
|
headers = response._hidden_params["additional_headers"]
|
|
|
|
# Verify that Azure-specific headers are present with llm_provider- prefix
|
|
assert "llm_provider-x-request-id" in headers, (
|
|
f"Response should contain 'llm_provider-x-request-id' header. "
|
|
f"Headers: {list(headers.keys())}"
|
|
)
|
|
assert "llm_provider-apim-request-id" in headers, (
|
|
f"Response should contain 'llm_provider-apim-request-id' header. "
|
|
f"Headers: {list(headers.keys())}"
|
|
)
|
|
|
|
# Verify the header values match
|
|
assert (
|
|
headers["llm_provider-x-request-id"] == "12086715-aca3-4006-a29f-2f1e1d552043"
|
|
)
|
|
assert (
|
|
headers["llm_provider-apim-request-id"]
|
|
== "25664b0d-cf4b-4e10-8d27-c7272e7efd49"
|
|
)
|
|
assert headers["llm_provider-x-ms-region"] == "Sweden Central"
|
|
|
|
# Also verify openai-compatible headers are included
|
|
assert "x-ratelimit-limit-tokens" in headers
|
|
assert "x-ratelimit-remaining-tokens" in headers
|