From 825ea65b96817d0cf8bac3114792eb8a06b5c8fa Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 9 Aug 2025 11:20:34 -0700 Subject: [PATCH] [Bug Fix] Responses API - Responses API failed if input containing ResponseReasoningItem (#13465) * add test_responses_api_multi_turn_with_reasoning_and_structured_output * fix transform_responses_api_request --- .../llms/openai/responses/transformation.py | 26 ++++- .../base_responses_api.py | 94 +++++++++++++++++++ .../test_openai_responses_api.py | 4 + 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py index 12814286f6..501941fdc5 100644 --- a/litellm/llms/openai/responses/transformation.py +++ b/litellm/llms/openai/responses/transformation.py @@ -1,6 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast import httpx +from pydantic import BaseModel import litellm from litellm._logging import verbose_logger @@ -75,12 +76,35 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig): headers: dict, ) -> Dict: """No transform applied since inputs are in OpenAI spec already""" - return dict( + + input = self._validate_input_param(input) + final_request_params = dict( ResponsesAPIRequestParams( model=model, input=input, **response_api_optional_request_params ) ) + return final_request_params + + def _validate_input_param(self, input: Union[str, ResponseInputParam]) -> Union[str, ResponseInputParam]: + """ + Ensure all input fields if pydantic are converted to dict + + OpenAI API Fails when we try to JSON dumps specific input pydantic fields. + This function ensures all input fields are converted to dict. + """ + if isinstance(input, list): + validated_input = [] + for item in input: + # if it's pydantic, convert to dict + if isinstance(item, BaseModel): + validated_input.append(item.model_dump(exclude_none=True)) + else: + validated_input.append(item) + return validated_input + # Input is expected to be either str or List, no single BaseModel expected + return input + def transform_response_api_response( self, model: str, diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py index a4ee9a8835..bf15e44879 100644 --- a/tests/llm_responses_api_testing/base_responses_api.py +++ b/tests/llm_responses_api_testing/base_responses_api.py @@ -112,6 +112,10 @@ class BaseResponsesAPITest(ABC): """Must return the base completion call args""" pass + def get_base_completion_reasoning_call_args(self) -> dict: + """Must return the base completion reasoning call args""" + return None + @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio @@ -440,3 +444,93 @@ class BaseResponsesAPITest(ABC): assert response is not None assert "output" in response assert len(response["output"]) > 0 + + @pytest.mark.asyncio + async def test_responses_api_multi_turn_with_reasoning_and_structured_output(self): + """ + Test multi-turn conversation with reasoning, structured output, and tool calls. + + This test validates: + - First call: Model uses reasoning to process a question and makes a tool call + - Tool call handling: Function call output is properly processed + - Second call: Model produces structured output incorporating tool results + - Structured output: Response conforms to defined Pydantic model schema + """ + from pydantic import BaseModel + + litellm._turn_on_debug() + litellm.set_verbose = True + base_completion_call_args = self.get_base_completion_reasoning_call_args() + if base_completion_call_args is None: + pytest.skip("Skipping test due to no base completion reasoning call args") + + # Define tools for the conversation + tools = [{"type": "function", "name": "get_today"}] + + # Define structured output schema + class Output(BaseModel): + today: str + number_of_r: str + + # Initial conversation input + input_messages = [ + { + "role": "user", + "content": "How many r in strrawberrry? While you're thinking, you should call tool get_today. Then you output the today and number of r", + } + ] + + + # First call - should trigger reasoning and tool call + response = await litellm.aresponses( + input=input_messages, + tools=tools, + reasoning={"effort": "low", "summary": "detailed"}, + text_format=Output, + **base_completion_call_args + ) + + print("First call output:") + print(json.dumps(response.output, indent=4, default=str)) + + # Validate first response structure + validate_responses_api_response(response, final_chunk=True) + assert response.output is not None + assert len(response.output) > 0 + + # Extend input with first response output + input_messages.extend(response.output) + + # Process any tool calls and add function outputs + function_outputs = [] + for item in response.output: + if hasattr(item, 'type') and item.type in ["function_call", "custom_tool_call"]: + if hasattr(item, 'name') and item.name == "get_today": + function_outputs.append({ + "type": "function_call_output", + "call_id": item.call_id, + "output": "2025-01-15" + }) + + # Add function outputs to conversation + input_messages.extend(function_outputs) + + print("Second call input:") + print(json.dumps(input_messages, indent=4, default=str)) + + # Second call - should produce structured output + final_response = await litellm.aresponses( + input=input_messages, + tools=tools, + reasoning={"effort": "low", "summary": "detailed"}, + text_format=Output, + **base_completion_call_args + ) + + print("Second call output:") + print(json.dumps(final_response.output, indent=4, default=str)) + + # Validate final response structure + validate_responses_api_response(final_response, final_chunk=True) + assert final_response.output is not None + assert len(final_response.output) > 0 diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py index 5cd515be23..427f779cb0 100644 --- a/tests/llm_responses_api_testing/test_openai_responses_api.py +++ b/tests/llm_responses_api_testing/test_openai_responses_api.py @@ -30,6 +30,10 @@ class TestOpenAIResponsesAPITest(BaseResponsesAPITest): return { "model": "openai/gpt-4o", } + def get_base_completion_reasoning_call_args(self): + return { + "model": "openai/gpt-5-mini", + } class TestCustomLogger(CustomLogger):