From 825ea65b96817d0cf8bac3114792eb8a06b5c8fa Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 9 Aug 2025 11:20:34 -0700
Subject: [PATCH] [Bug Fix] Responses API - Responses API failed if input
 containing ResponseReasoningItem (#13465)

* add test_responses_api_multi_turn_with_reasoning_and_structured_output

* fix transform_responses_api_request
---
 .../llms/openai/responses/transformation.py   | 26 ++++-
 .../base_responses_api.py                     | 94 +++++++++++++++++++
 .../test_openai_responses_api.py              |  4 +
 3 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py
index 12814286f6..501941fdc5 100644
--- a/litellm/llms/openai/responses/transformation.py
+++ b/litellm/llms/openai/responses/transformation.py
@@ -1,6 +1,7 @@
 from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast
 
 import httpx
+from pydantic import BaseModel
 
 import litellm
 from litellm._logging import verbose_logger
@@ -75,12 +76,35 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
         headers: dict,
     ) -> Dict:
         """No transform applied since inputs are in OpenAI spec already"""
-        return dict(
+
+        input = self._validate_input_param(input)
+        final_request_params = dict(
             ResponsesAPIRequestParams(
                 model=model, input=input, **response_api_optional_request_params
             )
         )
 
+        return final_request_params
+    
+    def _validate_input_param(self, input: Union[str, ResponseInputParam]) -> Union[str, ResponseInputParam]:
+        """
+        Ensure all input fields if pydantic are converted to dict
+
+        OpenAI API Fails when we try to JSON dumps specific input pydantic fields.
+        This function ensures all input fields are converted to dict.
+        """
+        if isinstance(input, list):
+            validated_input = []
+            for item in input:
+                # if it's pydantic, convert to dict
+                if isinstance(item, BaseModel):
+                    validated_input.append(item.model_dump(exclude_none=True))
+                else:
+                    validated_input.append(item)
+            return validated_input
+        # Input is expected to be either str or List, no single BaseModel expected
+        return input
+
     def transform_response_api_response(
         self,
         model: str,
diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py
index a4ee9a8835..bf15e44879 100644
--- a/tests/llm_responses_api_testing/base_responses_api.py
+++ b/tests/llm_responses_api_testing/base_responses_api.py
@@ -112,6 +112,10 @@ class BaseResponsesAPITest(ABC):
         """Must return the base completion call args"""
         pass
 
+    def get_base_completion_reasoning_call_args(self) -> dict:
+        """Must return the base completion reasoning call args"""
+        return None
+
 
     @pytest.mark.parametrize("sync_mode", [True, False])
     @pytest.mark.asyncio
@@ -440,3 +444,93 @@ class BaseResponsesAPITest(ABC):
         assert response is not None
         assert "output" in response
         assert len(response["output"]) > 0
+    
+    @pytest.mark.asyncio
+    async def test_responses_api_multi_turn_with_reasoning_and_structured_output(self):
+        """
+        Test multi-turn conversation with reasoning, structured output, and tool calls.
+        
+        This test validates:
+        - First call: Model uses reasoning to process a question and makes a tool call
+        - Tool call handling: Function call output is properly processed 
+        - Second call: Model produces structured output incorporating tool results
+        - Structured output: Response conforms to defined Pydantic model schema
+        """
+        from pydantic import BaseModel
+        
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_reasoning_call_args()
+        if base_completion_call_args is None:
+            pytest.skip("Skipping test due to no base completion reasoning call args")
+        
+        # Define tools for the conversation
+        tools = [{"type": "function", "name": "get_today"}]
+        
+        # Define structured output schema
+        class Output(BaseModel):
+            today: str
+            number_of_r: str
+        
+        # Initial conversation input
+        input_messages = [
+            {
+                "role": "user", 
+                "content": "How many r in strrawberrry? While you're thinking, you should call tool get_today. Then you output the today and number of r",
+            }
+        ]
+        
+
+        # First call - should trigger reasoning and tool call
+        response = await litellm.aresponses(
+            input=input_messages,
+            tools=tools,
+            reasoning={"effort": "low", "summary": "detailed"},
+            text_format=Output,
+            **base_completion_call_args
+        )
+
+        print("First call output:")
+        print(json.dumps(response.output, indent=4, default=str))
+        
+        # Validate first response structure
+        validate_responses_api_response(response, final_chunk=True)
+        assert response.output is not None
+        assert len(response.output) > 0
+        
+        # Extend input with first response output
+        input_messages.extend(response.output)
+        
+        # Process any tool calls and add function outputs
+        function_outputs = []
+        for item in response.output:
+            if hasattr(item, 'type') and item.type in ["function_call", "custom_tool_call"]:
+                if hasattr(item, 'name') and item.name == "get_today":
+                    function_outputs.append({
+                        "type": "function_call_output", 
+                        "call_id": item.call_id, 
+                        "output": "2025-01-15"
+                    })
+        
+        # Add function outputs to conversation
+        input_messages.extend(function_outputs)
+        
+        print("Second call input:")
+        print(json.dumps(input_messages, indent=4, default=str))
+        
+        # Second call - should produce structured output
+        final_response = await litellm.aresponses(
+            input=input_messages,
+            tools=tools,
+            reasoning={"effort": "low", "summary": "detailed"},
+            text_format=Output,
+            **base_completion_call_args
+        )
+        
+        print("Second call output:")
+        print(json.dumps(final_response.output, indent=4, default=str))
+        
+        # Validate final response structure
+        validate_responses_api_response(final_response, final_chunk=True)
+        assert final_response.output is not None
+        assert len(final_response.output) > 0
diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py
index 5cd515be23..427f779cb0 100644
--- a/tests/llm_responses_api_testing/test_openai_responses_api.py
+++ b/tests/llm_responses_api_testing/test_openai_responses_api.py
@@ -30,6 +30,10 @@ class TestOpenAIResponsesAPITest(BaseResponsesAPITest):
         return {
             "model": "openai/gpt-4o",
         }
+    def get_base_completion_reasoning_call_args(self):
+        return {
+            "model": "openai/gpt-5-mini",
+        }
 
 
 class TestCustomLogger(CustomLogger):