Files
litellm/tests/llm_responses_api_testing/test_azure_responses_api.py
T
Krrish Dholakia bc829d51f2 test: test
2026-03-28 19:17:38 -07:00

283 lines
9.6 KiB
Python

import os
import sys
import pytest
import asyncio
from typing import Optional
from unittest.mock import patch, AsyncMock
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm.integrations.custom_logger import CustomLogger
import json
from litellm.types.utils import StandardLoggingPayload
from litellm.types.llms.openai import (
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponseAPIUsage,
IncompleteDetails,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from base_responses_api import BaseResponsesAPITest
class TestAzureResponsesAPITest(BaseResponsesAPITest):
def get_base_completion_call_args(self):
return {
"model": "azure/gpt-4.1-mini",
"truncation": "auto",
"api_base": os.getenv("AZURE_AI_API_BASE"),
"api_key": os.getenv("AZURE_AI_API_KEY"),
"api_version": "2025-03-01-preview",
}
def get_advanced_model_for_shell_tool(self) -> Optional[str]:
"""If specified, overrides the model used by test_responses_api_shell_tool_streaming_sees_shell_output (e.g. openai/gpt-5.2 for shell support)."""
return "azure/gpt-5-mini"
@pytest.mark.asyncio
async def test_azure_responses_api_preview_api_version():
"""
Ensure new azure preview api version is working
"""
litellm._turn_on_debug()
response = await litellm.aresponses(
model="azure/gpt-5-mini",
truncation="auto",
api_version="preview",
api_base=os.getenv("AZURE_AI_API_BASE"),
api_key=os.getenv("AZURE_AI_API_KEY"),
input="Hello, can you tell me a short joke?",
)
@pytest.mark.asyncio
async def test_azure_responses_api_status_error():
"""
Test that 'status' field is not sent in the final request body to Azure API.
The status field should be filtered out from input messages before making the API call.
"""
from unittest.mock import AsyncMock, MagicMock
import json
request_data = {
"model": "computer-use-preview",
"input": [
{"content": "tell me an interesting fact", "role": "user"},
{
"id": "rs_0ab687487834d9df0068e462a1b2d88197aabbc832c9ba5316",
"summary": [],
"type": "reasoning",
"content": None,
"encrypted_content": None,
"status": "completed",
},
{
"id": "msg_0ab687487834d9df0068e462a1df188197b74b1eef05102c18",
"content": [
{
"annotations": [],
"text": "very good morning",
"type": "output_text",
"logprobs": [],
}
],
"role": "assistant",
"status": "completed",
"type": "message",
},
{"role": "user", "content": "tell me another"},
],
"include": [],
"instructions": "You are a helpful assistant.",
"reasoning": {"effort": "minimal"},
"stream": False,
"tools": [],
}
# Mock response
mock_response_data = {
"id": "resp_123",
"object": "response",
"created_at": 1234567890,
"model": "computer-use-preview",
"status": "completed",
"output": [
{
"id": "msg_123",
"role": "assistant",
"type": "message",
"status": "completed",
"content": [
{"type": "output_text", "text": "Here's an interesting fact."}
],
}
],
}
captured_request_body = {}
async def mock_post(*args, **kwargs):
# Capture the request body
nonlocal captured_request_body
if "json" in kwargs:
captured_request_body = kwargs["json"]
elif "data" in kwargs:
captured_request_body = json.loads(kwargs["data"])
import httpx
# Create a proper httpx Response object
response_content = json.dumps(mock_response_data).encode("utf-8")
response = httpx.Response(
status_code=200,
headers={"content-type": "application/json"},
content=response_content,
request=httpx.Request(method="POST", url="https://test.openai.azure.com"),
)
return response
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from unittest.mock import patch
with patch.object(AsyncHTTPHandler, "post", new=mock_post):
response = await litellm.aresponses(
model="azure/computer-use-preview",
truncation="auto",
api_version="preview",
api_base="https://test.openai.azure.com",
api_key="test-key",
input=request_data["input"],
)
# Verify that 'status' field is not present in any of the input messages
print(
"Final request body:", json.dumps(captured_request_body, indent=4, default=str)
)
assert "input" in captured_request_body, "Request body should contain 'input' field"
expected_input = [
{"content": "tell me an interesting fact", "role": "user"},
{
"id": "rs_0ab687487834d9df0068e462a1b2d88197aabbc832c9ba5316",
"summary": [],
"type": "reasoning",
},
{
"id": "msg_0ab687487834d9df0068e462a1df188197b74b1eef05102c18",
"content": [
{
"annotations": [],
"text": "very good morning",
"type": "output_text",
"logprobs": [],
}
],
"role": "assistant",
"type": "message",
},
{"role": "user", "content": "tell me another"},
]
assert captured_request_body["input"] == expected_input, (
f"Request body input should match expected format without 'status' field.\n"
f"Expected: {json.dumps(expected_input, indent=2)}\n"
f"Got: {json.dumps(captured_request_body['input'], indent=2)}"
)
@pytest.mark.asyncio
async def test_azure_responses_api_headers_with_llm_provider_prefix():
"""
Test that Azure-specific headers like 'x-request-id' and 'apim-request-id'
are properly forwarded with 'llm_provider-' prefix in response._hidden_params["headers"].
Issue: https://github.com/BerriAI/litellm/issues/16538
The fix ensures that processed headers (with llm_provider- prefix) are stored
in response._hidden_params["headers"] instead of additional_headers, making them
accessible via completion.headers in the same way as the completion API.
"""
import json
import httpx
mock_response_data = {
"id": "resp_123",
"object": "response",
"created_at": 1234567890,
"model": "gpt-5-codex",
"status": "completed",
"output": [
{
"id": "msg_123",
"role": "assistant",
"type": "message",
"content": [{"type": "output_text", "text": "Hello!"}],
}
],
}
# Mock headers that Azure returns - exactly like in the issue
mock_headers = {
"date": "Wed, 12 Nov 2025 15:31:28 GMT",
"server": "uvicorn",
"content-type": "application/json",
"x-ratelimit-remaining-tokens": "5010000",
"x-ratelimit-limit-tokens": "5010000",
# These are the Azure-specific headers that should be forwarded with llm_provider- prefix
"x-request-id": "12086715-aca3-4006-a29f-2f1e1d552043",
"apim-request-id": "25664b0d-cf4b-4e10-8d27-c7272e7efd49",
"x-ms-region": "Sweden Central",
}
async def mock_post(*args, **kwargs):
response_content = json.dumps(mock_response_data).encode("utf-8")
response = httpx.Response(
status_code=200,
headers=mock_headers,
content=response_content,
request=httpx.Request(method="POST", url="https://test.openai.azure.com"),
)
return response
with patch.object(AsyncHTTPHandler, "post", new=mock_post):
response = await litellm.aresponses(
model="azure/gpt-5-codex",
api_version="2025-03-01-preview",
api_base="https://test.openai.azure.com",
api_key="test-key",
input="Hello, can you tell me a short joke?",
)
# Check that the response has the expected headers structure
assert hasattr(response, "_hidden_params"), "Response should have _hidden_params"
assert (
"additional_headers" in response._hidden_params
), "Response _hidden_params should contain 'additional_headers' with the LLM provider headers"
headers = response._hidden_params["additional_headers"]
# Verify that Azure-specific headers are present with llm_provider- prefix
assert "llm_provider-x-request-id" in headers, (
f"Response should contain 'llm_provider-x-request-id' header. "
f"Headers: {list(headers.keys())}"
)
assert "llm_provider-apim-request-id" in headers, (
f"Response should contain 'llm_provider-apim-request-id' header. "
f"Headers: {list(headers.keys())}"
)
# Verify the header values match
assert (
headers["llm_provider-x-request-id"] == "12086715-aca3-4006-a29f-2f1e1d552043"
)
assert (
headers["llm_provider-apim-request-id"]
== "25664b0d-cf4b-4e10-8d27-c7272e7efd49"
)
assert headers["llm_provider-x-ms-region"] == "Sweden Central"
# Also verify openai-compatible headers are included
assert "x-ratelimit-limit-tokens" in headers
assert "x-ratelimit-remaining-tokens" in headers