feat(openai): round-trip Responses API reasoning_items in chat completions

Made-with: Cursor
2026-08-02 04:21:34 +00:00 · 2026-03-27 20:25:08 +05:30
parent b20cff8673
commit 00a810e92d
6 changed files with 474 additions and 13 deletions
@@ -581,6 +581,90 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \

 See [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning) for more details on organization verification requirements.

+### Multi-turn Conversations with `reasoning_items`
+
+For multi-turn conversations you need `reasoning_items`: structured blocks that include the `encrypted_content` token OpenAI uses to restore reasoning state on the next request. Pass `include=["reasoning.encrypted_content"]` on every call where you want that token returned.
+
+<Tabs>
+<TabItem value="non-streaming" label="Non-Streaming">
+
+```python showLineNumbers title="Non-streaming: round-trip reasoning_items"
+import litellm
+
+messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}]
+
+# Turn 1 — get reasoning_items (encrypted_content);
+response = litellm.completion(
+    model="openai/responses/gpt-5-mini",
+    messages=messages,
+    reasoning_effort="low",
+    include=["reasoning.encrypted_content"],
+)
+
+assistant_msg = response.choices[0].message
+
+# Turn 2 — pass reasoning_items back; LiteLLM converts to the correct Responses API format
+messages.append({
+    "role": "assistant",
+    "content": assistant_msg.content,
+    "reasoning_items": assistant_msg.reasoning_items,
+})
+messages.append({"role": "user", "content": "Now summarize your reasoning."})
+
+response2 = litellm.completion(
+    model="openai/responses/gpt-5-mini",
+    messages=messages,
+    reasoning_effort="low",
+    include=["reasoning.encrypted_content"],
+)
+```
+
+</TabItem>
+<TabItem value="streaming" label="Streaming">
+
+`reasoning_items` (with `encrypted_content`) arrive on the final chunk when the full response completes:
+
+```python showLineNumbers title="Streaming: collect and round-trip reasoning_items"
+import litellm
+
+messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}]
+
+collected_content = []
+collected_reasoning_items = []
+
+stream = litellm.completion(
+    model="openai/responses/gpt-5-mini",
+    messages=messages,
+    stream=True,
+    reasoning_effort="low",
+    include=["reasoning.encrypted_content"],
+)
+
+for chunk in stream:
+    delta = chunk.choices[0].delta
+    if delta.content:
+        collected_content.append(delta.content)
+    if getattr(delta, "reasoning_items", None):
+        collected_reasoning_items.extend(delta.reasoning_items)
+
+messages.append({
+    "role": "assistant",
+    "content": "".join(collected_content),
+    "reasoning_items": collected_reasoning_items or None,
+})
+messages.append({"role": "user", "content": "Continue the conversation."})
+
+response2 = litellm.completion(
+    model="openai/responses/gpt-5-mini",
+    messages=messages,
+    reasoning_effort="low",
+    include=["reasoning.encrypted_content"],
+)
+```
+
+</TabItem>
+</Tabs>
+
 ### Verbosity Control for GPT-5 Models

 The `verbosity` parameter controls the length and detail of responses from GPT-5 family models. It accepts three values: `"low"`, `"medium"`, or `"high"`.
@@ -32,6 +32,7 @@ from litellm.llms.base_llm.bridges.completion_transformation import (
 )
 from litellm.types.llms.openai import (
    ChatCompletionAnnotation,
+    ChatCompletionReasoningItem,
    ChatCompletionToolParamFunctionChunk,
    Reasoning,
    ResponsesAPIOptionalRequestParams,
@@ -55,6 +56,49 @@ if TYPE_CHECKING:
    )


+def _build_reasoning_item(
+    item_id: str,
+    encrypted_content: Optional[str],
+    summary_raw: Any,
+) -> Dict[str, Any]:
+    """Build a ChatCompletionReasoningItem-shaped dict from raw response data.
+
+    Handles both pydantic objects (attribute access) and plain dicts.
+    """
+    summary: List[Dict[str, Any]] = []
+    for s in summary_raw or []:
+        if isinstance(s, dict):
+            summary.append(
+                {"type": s.get("type", "summary_text"), "text": s.get("text", "")}
+            )
+        else:
+            summary.append(
+                {
+                    "type": getattr(s, "type", "summary_text"),
+                    "text": getattr(s, "text", ""),
+                }
+            )
+    return {
+        "id": item_id,
+        "type": "reasoning",
+        "encrypted_content": encrypted_content,
+        "summary": summary,
+    }
+
+
+def _reasoning_item_to_response_input(r_item: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert a stored ChatCompletionReasoningItem back to a Responses API input item."""
+    r_input: Dict[str, Any] = {
+        "type": "reasoning",
+        "id": r_item.get("id") or f"rs_{id(r_item)}",
+        # summary is always required by the Responses API, even when empty
+        "summary": r_item.get("summary") or [],
+    }
+    if r_item.get("encrypted_content"):
+        r_input["encrypted_content"] = r_item["encrypted_content"]
+    return r_input
+
+
 class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
    """
    Handler for transforming /chat/completions api requests to litellm.responses requests
@@ -202,10 +246,12 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
                    }
                )
            elif role == "assistant" and tool_calls and isinstance(tool_calls, list):
+                for r_item in msg.get("reasoning_items") or []:
+                    input_items.append(_reasoning_item_to_response_input(r_item))
                for tool_call in tool_calls:
                    function = tool_call.get("function")
                    if function:
-                        input_tool_call = {
+                        input_tool_call: Dict[str, Any] = {
                            "type": "function_call",
                            "call_id": tool_call["id"],
                        }
@@ -217,7 +263,9 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
                    else:
                        raise ValueError(f"tool call not supported: {tool_call}")
            elif content is not None:
-                # Regular user/assistant message
+                if role == "assistant":
+                    for r_item in msg.get("reasoning_items") or []:
+                        input_items.append(_reasoning_item_to_response_input(r_item))
                input_items.append(
                    {
                        "type": "message",
@@ -411,6 +459,7 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
        choices: List[Choices] = []
        index = 0
        reasoning_content: Optional[str] = None
+        pending_reasoning_item: Optional[Dict[str, Any]] = None

        # Collect all tool calls to put them in a single choice
        # (Chat Completions API expects all tool calls in one message)
@@ -419,9 +468,16 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):

        for item in output_items:
            if isinstance(item, ResponseReasoningItem):
-                for summary_item in item.summary:
-                    response_text = getattr(summary_item, "text", "")
-                    reasoning_content = response_text if response_text else ""
+                pending_reasoning_item = _build_reasoning_item(
+                    item_id=item.id,
+                    encrypted_content=getattr(item, "encrypted_content", None),
+                    summary_raw=item.summary,
+                )
+                reasoning_content = " ".join(
+                    s["text"]
+                    for s in pending_reasoning_item["summary"]
+                    if s.get("text")
+                )

            elif isinstance(item, ResponseOutputMessage):
                for content in item.content:
@@ -436,6 +492,12 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
                        content=response_text if response_text else "",
                        reasoning_content=reasoning_content,
                        annotations=annotations,
+                        reasoning_items=cast(
+                            Optional[List[ChatCompletionReasoningItem]],
+                            [pending_reasoning_item]
+                            if pending_reasoning_item is not None
+                            else None,
+                        ),
                    )

                    choices.append(
@@ -446,7 +508,8 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
                        )
                    )

-                    reasoning_content = None  # flush reasoning content
+                    reasoning_content = None  # flush
+                    pending_reasoning_item = None  # flush
                    index += 1

            elif isinstance(item, ResponseFunctionToolCall):
@@ -489,11 +552,18 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
                content=None,
                tool_calls=accumulated_tool_calls,
                reasoning_content=reasoning_content,
+                reasoning_items=cast(
+                    Optional[List[ChatCompletionReasoningItem]],
+                    [pending_reasoning_item]
+                    if pending_reasoning_item is not None
+                    else None,
+                ),
            )
            choices.append(
                Choices(message=msg, finish_reason="tool_calls", index=index)
            )
            reasoning_content = None
+            pending_reasoning_item = None

        return choices

@@ -1232,6 +1302,25 @@ class OpenAiResponsesToChatCompletionStreamIterator(BaseModelResponseIterator):

            finish_reason = "tool_calls" if has_function_calls else "stop"

+            # Extract reasoning items with encrypted_content for round-tripping
+            completed_reasoning_items: Optional[List[Dict[str, Any]]] = None
+            for item in output_items:
+                if not isinstance(item, dict) or item.get("type") != "reasoning":
+                    continue
+                if completed_reasoning_items is None:
+                    completed_reasoning_items = []
+                completed_reasoning_items.append(
+                    _build_reasoning_item(
+                        item_id=item.get("id", ""),
+                        encrypted_content=item.get("encrypted_content"),
+                        summary_raw=item.get("summary"),
+                    )
+                )
+            completed_reasoning_items_typed = cast(
+                Optional[List[ChatCompletionReasoningItem]],
+                completed_reasoning_items,
+            )
+
            usage = None
            if response_data.get("usage"):
                from litellm.responses.utils import ResponseAPILoggingUtils
@@ -1245,7 +1334,10 @@ class OpenAiResponsesToChatCompletionStreamIterator(BaseModelResponseIterator):
                choices=[
                    StreamingChoices(
                        index=0,
-                        delta=Delta(content=""),
+                        delta=Delta(
+                            content="",
+                            reasoning_items=completed_reasoning_items_typed,
+                        ),
                        finish_reason=finish_reason,
                    )
                ],
@@ -831,6 +831,10 @@ class CustomStreamWrapper:
                "annotations" in model_response.choices[0].delta
                and model_response.choices[0].delta.annotations is not None
            )
+            or (
+                getattr(model_response.choices[0].delta, "reasoning_items", None)
+                is not None
+            )
        ):
            return True
        else:
@@ -536,6 +536,20 @@ class ChatCompletionRedactedThinkingBlock(TypedDict, total=False):
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]


+class ChatCompletionReasoningSummaryTextBlock(TypedDict, total=False):
+    type: Required[Literal["summary_text"]]
+    text: str
+
+
+class ChatCompletionReasoningItem(TypedDict, total=False):
+    """Represents an OpenAI Responses API reasoning item for round-tripping in conversation history."""
+
+    type: Required[Literal["reasoning"]]
+    id: str
+    encrypted_content: Optional[str]
+    summary: List["ChatCompletionReasoningSummaryTextBlock"]
+
+
 class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
    city: str
    """Free text input for the city of the user, e.g. `San Francisco`."""
@@ -58,6 +58,7 @@ from .llms.openai import (
    AllMessageValues,
    Batch,
    ChatCompletionAnnotation,
+    ChatCompletionReasoningItem,
    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
@@ -1132,6 +1133,7 @@ class Message(SafeAttributeModel, OpenAIObject):
    thinking_blocks: Optional[
        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
    ] = None
+    reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None
    provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
    annotations: Optional[List[ChatCompletionAnnotation]] = None

@@ -1150,6 +1152,7 @@ class Message(SafeAttributeModel, OpenAIObject):
                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
            ]
        ] = None,
+        reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None,
        annotations: Optional[List[ChatCompletionAnnotation]] = None,
        **params,
    ):
@@ -1182,6 +1185,9 @@ class Message(SafeAttributeModel, OpenAIObject):
        if thinking_blocks is not None:
            init_values["thinking_blocks"] = thinking_blocks

+        if reasoning_items is not None:
+            init_values["reasoning_items"] = reasoning_items
+
        if annotations is not None:
            init_values["annotations"] = annotations

@@ -1219,6 +1225,11 @@ class Message(SafeAttributeModel, OpenAIObject):
            if hasattr(self, "thinking_blocks"):
                del self.thinking_blocks

+        if reasoning_items is None:
+            # ensure default response matches OpenAI spec
+            if hasattr(self, "reasoning_items"):
+                del self.reasoning_items
+
        add_provider_specific_fields(self, provider_specific_fields)

    def get(self, key, default=None):
@@ -1246,6 +1257,7 @@ class Delta(SafeAttributeModel, OpenAIObject):
    thinking_blocks: Optional[
        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
    ] = None
+    reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None
    provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)

    def __init__(
@@ -1262,6 +1274,7 @@ class Delta(SafeAttributeModel, OpenAIObject):
                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
            ]
        ] = None,
+        reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None,
        annotations: Optional[List[ChatCompletionAnnotation]] = None,
        **params,
    ):
@@ -1295,6 +1308,13 @@ class Delta(SafeAttributeModel, OpenAIObject):
            # ensure default response matches OpenAI spec
            del self.thinking_blocks

+        if reasoning_items is not None:
+            self.reasoning_items = reasoning_items
+        else:
+            # ensure default response matches OpenAI spec
+            if hasattr(self, "reasoning_items"):
+                del self.reasoning_items
+
        # Add annotations to the delta, ensure they are only on Delta if they exist (Match OpenAI spec)
        if annotations is not None:
            self.annotations = annotations
@@ -2127,9 +2127,10 @@ def test_convert_chat_completion_file_type_to_input_file():
        }
    ]

-    input_items, instructions = (
-        handler.convert_chat_completion_messages_to_responses_api(messages)
-    )
+    (
+        input_items,
+        instructions,
+    ) = handler.convert_chat_completion_messages_to_responses_api(messages)

    assert len(input_items) == 1
    msg = input_items[0]
@@ -2176,11 +2177,257 @@ def test_convert_chat_completion_file_type_with_file_id():
        }
    ]

-    input_items, instructions = (
-        handler.convert_chat_completion_messages_to_responses_api(messages)
-    )
+    (
+        input_items,
+        instructions,
+    ) = handler.convert_chat_completion_messages_to_responses_api(messages)

    content = input_items[0]["content"]
    assert content[1]["type"] == "input_file"
    assert content[1]["file_id"] == "file-abc123"
    assert "file_data" not in content[1]
+
+
+# =============================================================================
+# Tests for reasoning_items round-trip (encrypted_content preservation)
+# =============================================================================
+
+
+def test_reasoning_items_non_streaming_round_trip():
+    """
+    Non-streaming: verify that reasoning_items (with encrypted_content) are:
+      1. Extracted from ResponseReasoningItem and attached to the Message.
+      2. Emitted as a 'reasoning' input item when the assistant message is
+         passed back to convert_chat_completion_messages_to_responses_api.
+    """
+    from unittest.mock import Mock
+
+    from openai.types.responses import ResponseOutputMessage, ResponseOutputText
+    from openai.types.responses.response_reasoning_item import (
+        ResponseReasoningItem,
+        Summary,
+    )
+
+    from litellm.completion_extras.litellm_responses_transformation.transformation import (
+        LiteLLMResponsesTransformationHandler,
+    )
+    from litellm.types.llms.openai import (
+        InputTokensDetails,
+        OutputTokensDetails,
+        ResponseAPIUsage,
+        ResponsesAPIResponse,
+    )
+    from litellm.types.utils import ModelResponse, Usage
+
+    handler = LiteLLMResponsesTransformationHandler()
+
+    encrypted = "gAAAAABpw5abc123FAKE=="
+    summary_text = "**Thinking about it**\n\nSome reasoning here."
+
+    reasoning_item = ResponseReasoningItem(
+        id="rs_test001",
+        summary=[Summary(text=summary_text, type="summary_text")],
+        type="reasoning",
+        content=None,
+        encrypted_content=encrypted,
+        status=None,
+    )
+    output_message = ResponseOutputMessage(
+        id="msg_test001",
+        content=[
+            ResponseOutputText(
+                annotations=[],
+                text="The answer is 42.",
+                type="output_text",
+                logprobs=[],
+            )
+        ],
+        role="assistant",
+        status="completed",
+        type="message",
+    )
+    usage = ResponseAPIUsage(
+        input_tokens=10,
+        input_tokens_details=InputTokensDetails(
+            audio_tokens=None, cached_tokens=0, text_tokens=None
+        ),
+        output_tokens=20,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=0, text_tokens=None),
+        total_tokens=30,
+        cost=None,
+    )
+    raw_response = ResponsesAPIResponse(
+        id="resp_test001",
+        created_at=1234567890,
+        error=None,
+        incomplete_details=None,
+        instructions=None,
+        metadata={},
+        model="gpt-5-mini",
+        object="response",
+        output=[reasoning_item, output_message],
+        parallel_tool_calls=True,
+        temperature=1.0,
+        tool_choice="auto",
+        tools=[],
+        top_p=1.0,
+        max_output_tokens=None,
+        previous_response_id=None,
+        reasoning={"effort": "low", "summary": "detailed"},
+        status="completed",
+        text={"format": {"type": "text"}, "verbosity": "medium"},
+        truncation="disabled",
+        usage=usage,
+        user=None,
+        store=True,
+        background=False,
+        billing={"payer": "developer"},
+        max_tool_calls=None,
+        prompt_cache_key=None,
+        safety_identifier=None,
+        service_tier="default",
+        top_logprobs=0,
+    )
+    model_response = ModelResponse(
+        id="chatcmpl-test001",
+        created=1234567890,
+        model=None,
+        object="chat.completion",
+        system_fingerprint=None,
+        choices=[],
+        usage=Usage(completion_tokens=0, prompt_tokens=0, total_tokens=0),
+    )
+
+    result = handler.transform_response(
+        model="gpt-5-mini",
+        raw_response=raw_response,
+        model_response=model_response,
+        logging_obj=Mock(),
+        request_data={"model": "gpt-5-mini"},
+        messages=[{"role": "user", "content": "What is the answer?"}],
+        optional_params={},
+        litellm_params={},
+        encoding=Mock(),
+    )
+
+    # ── Part 1: reasoning_items on the response message ──────────────────────
+    assert len(result.choices) == 1
+    msg = result.choices[0].message
+
+    assert (
+        msg.reasoning_content == summary_text
+    ), "reasoning_content should equal summary text"
+
+    assert msg.reasoning_items is not None, "reasoning_items should be set"
+    assert len(msg.reasoning_items) == 1
+    ri = msg.reasoning_items[0]
+    assert ri["type"] == "reasoning"
+    assert ri["id"] == "rs_test001"
+    assert ri["encrypted_content"] == encrypted, "encrypted_content must be preserved"
+    assert len(ri["summary"]) == 1
+    assert ri["summary"][0]["text"] == summary_text
+
+    # ── Part 2: reasoning item round-trips through message history ────────────
+    history = [
+        {"role": "user", "content": "What is the answer?"},
+        {
+            "role": "assistant",
+            "content": msg.content,
+            "reasoning_items": msg.reasoning_items,
+        },
+        {"role": "user", "content": "Can you elaborate?"},
+    ]
+    input_items, _ = handler.convert_chat_completion_messages_to_responses_api(history)
+
+    # The reasoning input item must appear before the assistant message item
+    types = [item.get("type") for item in input_items]
+    assert (
+        "reasoning" in types
+    ), "reasoning input item must be emitted for the assistant turn"
+
+    reasoning_input = next(
+        item for item in input_items if item.get("type") == "reasoning"
+    )
+    assert reasoning_input["id"] == "rs_test001"
+    assert reasoning_input["encrypted_content"] == encrypted
+    assert reasoning_input["summary"][0]["text"] == summary_text
+
+    # reasoning item must come before the assistant message item
+    reasoning_idx = types.index("reasoning")
+    assistant_msg_idx = next(
+        i
+        for i, item in enumerate(input_items)
+        if item.get("type") == "message" and item.get("role") == "assistant"
+    )
+    assert (
+        reasoning_idx < assistant_msg_idx
+    ), "reasoning input item must precede the assistant message item"
+
+
+def test_reasoning_items_streaming_emitted_on_response_completed():
+    """
+    Streaming: verify that reasoning_items (with encrypted_content) are emitted
+    on the delta of the response.completed chunk, enabling the caller to
+    round-trip them in subsequent requests.
+    """
+    from litellm.completion_extras.litellm_responses_transformation.transformation import (
+        OpenAiResponsesToChatCompletionStreamIterator,
+    )
+
+    iterator = OpenAiResponsesToChatCompletionStreamIterator(
+        streaming_response=None, sync_stream=True
+    )
+
+    encrypted = "gAAAAABpw5xyz987FAKE=="
+    summary_text = "**Reasoning summary**\n\nModel thought about this carefully."
+
+    chunk = {
+        "type": "response.completed",
+        "response": {
+            "id": "resp_stream001",
+            "status": "completed",
+            "output": [
+                {
+                    "type": "reasoning",
+                    "id": "rs_stream001",
+                    "encrypted_content": encrypted,
+                    "summary": [{"type": "summary_text", "text": summary_text}],
+                },
+                {
+                    "type": "message",
+                    "id": "msg_stream001",
+                    "role": "assistant",
+                    "content": [{"type": "output_text", "text": "The answer."}],
+                    "status": "completed",
+                },
+            ],
+            "usage": {
+                "input_tokens": 10,
+                "output_tokens": 5,
+                "total_tokens": 15,
+                "input_tokens_details": {"cached_tokens": 0},
+                "output_tokens_details": {"reasoning_tokens": 0},
+            },
+        },
+    }
+
+    result = iterator.chunk_parser(chunk)
+
+    assert len(result.choices) == 1
+    delta = result.choices[0].delta
+
+    # finish_reason must be set (response is complete)
+    assert result.choices[0].finish_reason == "stop"
+
+    # reasoning_items must be on the delta
+    assert (
+        getattr(delta, "reasoning_items", None) is not None
+    ), "reasoning_items must be present on the response.completed delta"
+    assert len(delta.reasoning_items) == 1
+    ri = delta.reasoning_items[0]
+    assert ri["type"] == "reasoning"
+    assert ri["id"] == "rs_stream001"
+    assert (
+        ri["encrypted_content"] == encrypted
+    ), "encrypted_content must be preserved in streaming"
+    assert ri["summary"][0]["text"] == summary_text