[Bug Fix] Responses API - Responses API failed if input containing ResponseReasoningItem (#13465)

* add test_responses_api_multi_turn_with_reasoning_and_structured_output * fix transform_responses_api_request
2026-08-03 02:20:15 +00:00 · 2025-08-09 11:20:34 -07:00
parent ee40db7b31
commit 825ea65b96
3 changed files with 123 additions and 1 deletions
@@ -1,6 +1,7 @@
 from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast

 import httpx
+from pydantic import BaseModel

 import litellm
 from litellm._logging import verbose_logger
@@ -75,12 +76,35 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
        headers: dict,
    ) -> Dict:
        """No transform applied since inputs are in OpenAI spec already"""
-        return dict(
+
+        input = self._validate_input_param(input)
+        final_request_params = dict(
            ResponsesAPIRequestParams(
                model=model, input=input, **response_api_optional_request_params
            )
        )

+        return final_request_params
+    
+    def _validate_input_param(self, input: Union[str, ResponseInputParam]) -> Union[str, ResponseInputParam]:
+        """
+        Ensure all input fields if pydantic are converted to dict
+
+        OpenAI API Fails when we try to JSON dumps specific input pydantic fields.
+        This function ensures all input fields are converted to dict.
+        """
+        if isinstance(input, list):
+            validated_input = []
+            for item in input:
+                # if it's pydantic, convert to dict
+                if isinstance(item, BaseModel):
+                    validated_input.append(item.model_dump(exclude_none=True))
+                else:
+                    validated_input.append(item)
+            return validated_input
+        # Input is expected to be either str or List, no single BaseModel expected
+        return input
+
    def transform_response_api_response(
        self,
        model: str,
@@ -112,6 +112,10 @@ class BaseResponsesAPITest(ABC):
        """Must return the base completion call args"""
        pass

+    def get_base_completion_reasoning_call_args(self) -> dict:
+        """Must return the base completion reasoning call args"""
+        return None
+

    @pytest.mark.parametrize("sync_mode", [True, False])
    @pytest.mark.asyncio
@@ -440,3 +444,93 @@ class BaseResponsesAPITest(ABC):
        assert response is not None
        assert "output" in response
        assert len(response["output"]) > 0
+    
+    @pytest.mark.asyncio
+    async def test_responses_api_multi_turn_with_reasoning_and_structured_output(self):
+        """
+        Test multi-turn conversation with reasoning, structured output, and tool calls.
+        
+        This test validates:
+        - First call: Model uses reasoning to process a question and makes a tool call
+        - Tool call handling: Function call output is properly processed 
+        - Second call: Model produces structured output incorporating tool results
+        - Structured output: Response conforms to defined Pydantic model schema
+        """
+        from pydantic import BaseModel
+        
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_reasoning_call_args()
+        if base_completion_call_args is None:
+            pytest.skip("Skipping test due to no base completion reasoning call args")
+        
+        # Define tools for the conversation
+        tools = [{"type": "function", "name": "get_today"}]
+        
+        # Define structured output schema
+        class Output(BaseModel):
+            today: str
+            number_of_r: str
+        
+        # Initial conversation input
+        input_messages = [
+            {
+                "role": "user", 
+                "content": "How many r in strrawberrry? While you're thinking, you should call tool get_today. Then you output the today and number of r",
+            }
+        ]
+        
+
+        # First call - should trigger reasoning and tool call
+        response = await litellm.aresponses(
+            input=input_messages,
+            tools=tools,
+            reasoning={"effort": "low", "summary": "detailed"},
+            text_format=Output,
+            **base_completion_call_args
+        )
+
+        print("First call output:")
+        print(json.dumps(response.output, indent=4, default=str))
+        
+        # Validate first response structure
+        validate_responses_api_response(response, final_chunk=True)
+        assert response.output is not None
+        assert len(response.output) > 0
+        
+        # Extend input with first response output
+        input_messages.extend(response.output)
+        
+        # Process any tool calls and add function outputs
+        function_outputs = []
+        for item in response.output:
+            if hasattr(item, 'type') and item.type in ["function_call", "custom_tool_call"]:
+                if hasattr(item, 'name') and item.name == "get_today":
+                    function_outputs.append({
+                        "type": "function_call_output", 
+                        "call_id": item.call_id, 
+                        "output": "2025-01-15"
+                    })
+        
+        # Add function outputs to conversation
+        input_messages.extend(function_outputs)
+        
+        print("Second call input:")
+        print(json.dumps(input_messages, indent=4, default=str))
+        
+        # Second call - should produce structured output
+        final_response = await litellm.aresponses(
+            input=input_messages,
+            tools=tools,
+            reasoning={"effort": "low", "summary": "detailed"},
+            text_format=Output,
+            **base_completion_call_args
+        )
+        
+        print("Second call output:")
+        print(json.dumps(final_response.output, indent=4, default=str))
+        
+        # Validate final response structure
+        validate_responses_api_response(final_response, final_chunk=True)
+        assert final_response.output is not None
+        assert len(final_response.output) > 0
@@ -30,6 +30,10 @@ class TestOpenAIResponsesAPITest(BaseResponsesAPITest):
        return {
            "model": "openai/gpt-4o",
        }
+    def get_base_completion_reasoning_call_args(self):
+        return {
+            "model": "openai/gpt-5-mini",
+        }


 class TestCustomLogger(CustomLogger):