diff --git a/docs/my-website/docs/providers/openai.md b/docs/my-website/docs/providers/openai.md index 2907cdf9f4..1f4a1687e8 100644 --- a/docs/my-website/docs/providers/openai.md +++ b/docs/my-website/docs/providers/openai.md @@ -581,6 +581,90 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \ See [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning) for more details on organization verification requirements. +### Multi-turn Conversations with `reasoning_items` + +For multi-turn conversations you need `reasoning_items`: structured blocks that include the `encrypted_content` token OpenAI uses to restore reasoning state on the next request. Pass `include=["reasoning.encrypted_content"]` on every call where you want that token returned. + + + + +```python showLineNumbers title="Non-streaming: round-trip reasoning_items" +import litellm + +messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}] + +# Turn 1 — get reasoning_items (encrypted_content); +response = litellm.completion( + model="openai/responses/gpt-5-mini", + messages=messages, + reasoning_effort="low", + include=["reasoning.encrypted_content"], +) + +assistant_msg = response.choices[0].message + +# Turn 2 — pass reasoning_items back; LiteLLM converts to the correct Responses API format +messages.append({ + "role": "assistant", + "content": assistant_msg.content, + "reasoning_items": assistant_msg.reasoning_items, +}) +messages.append({"role": "user", "content": "Now summarize your reasoning."}) + +response2 = litellm.completion( + model="openai/responses/gpt-5-mini", + messages=messages, + reasoning_effort="low", + include=["reasoning.encrypted_content"], +) +``` + + + + +`reasoning_items` (with `encrypted_content`) arrive on the final chunk when the full response completes: + +```python showLineNumbers title="Streaming: collect and round-trip reasoning_items" +import litellm + +messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}] + +collected_content = [] +collected_reasoning_items = [] + +stream = litellm.completion( + model="openai/responses/gpt-5-mini", + messages=messages, + stream=True, + reasoning_effort="low", + include=["reasoning.encrypted_content"], +) + +for chunk in stream: + delta = chunk.choices[0].delta + if delta.content: + collected_content.append(delta.content) + if getattr(delta, "reasoning_items", None): + collected_reasoning_items.extend(delta.reasoning_items) + +messages.append({ + "role": "assistant", + "content": "".join(collected_content), + "reasoning_items": collected_reasoning_items or None, +}) +messages.append({"role": "user", "content": "Continue the conversation."}) + +response2 = litellm.completion( + model="openai/responses/gpt-5-mini", + messages=messages, + reasoning_effort="low", + include=["reasoning.encrypted_content"], +) +``` + + + + ### Verbosity Control for GPT-5 Models The `verbosity` parameter controls the length and detail of responses from GPT-5 family models. It accepts three values: `"low"`, `"medium"`, or `"high"`. diff --git a/litellm/completion_extras/litellm_responses_transformation/transformation.py b/litellm/completion_extras/litellm_responses_transformation/transformation.py index ee4cdbcdf3..6e6070f7f2 100644 --- a/litellm/completion_extras/litellm_responses_transformation/transformation.py +++ b/litellm/completion_extras/litellm_responses_transformation/transformation.py @@ -32,6 +32,7 @@ from litellm.llms.base_llm.bridges.completion_transformation import ( ) from litellm.types.llms.openai import ( ChatCompletionAnnotation, + ChatCompletionReasoningItem, ChatCompletionToolParamFunctionChunk, Reasoning, ResponsesAPIOptionalRequestParams, @@ -55,6 +56,49 @@ if TYPE_CHECKING: ) +def _build_reasoning_item( + item_id: str, + encrypted_content: Optional[str], + summary_raw: Any, +) -> Dict[str, Any]: + """Build a ChatCompletionReasoningItem-shaped dict from raw response data. + + Handles both pydantic objects (attribute access) and plain dicts. + """ + summary: List[Dict[str, Any]] = [] + for s in summary_raw or []: + if isinstance(s, dict): + summary.append( + {"type": s.get("type", "summary_text"), "text": s.get("text", "")} + ) + else: + summary.append( + { + "type": getattr(s, "type", "summary_text"), + "text": getattr(s, "text", ""), + } + ) + return { + "id": item_id, + "type": "reasoning", + "encrypted_content": encrypted_content, + "summary": summary, + } + + +def _reasoning_item_to_response_input(r_item: Dict[str, Any]) -> Dict[str, Any]: + """Convert a stored ChatCompletionReasoningItem back to a Responses API input item.""" + r_input: Dict[str, Any] = { + "type": "reasoning", + "id": r_item.get("id") or f"rs_{id(r_item)}", + # summary is always required by the Responses API, even when empty + "summary": r_item.get("summary") or [], + } + if r_item.get("encrypted_content"): + r_input["encrypted_content"] = r_item["encrypted_content"] + return r_input + + class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): """ Handler for transforming /chat/completions api requests to litellm.responses requests @@ -202,10 +246,12 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): } ) elif role == "assistant" and tool_calls and isinstance(tool_calls, list): + for r_item in msg.get("reasoning_items") or []: + input_items.append(_reasoning_item_to_response_input(r_item)) for tool_call in tool_calls: function = tool_call.get("function") if function: - input_tool_call = { + input_tool_call: Dict[str, Any] = { "type": "function_call", "call_id": tool_call["id"], } @@ -217,7 +263,9 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): else: raise ValueError(f"tool call not supported: {tool_call}") elif content is not None: - # Regular user/assistant message + if role == "assistant": + for r_item in msg.get("reasoning_items") or []: + input_items.append(_reasoning_item_to_response_input(r_item)) input_items.append( { "type": "message", @@ -411,6 +459,7 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): choices: List[Choices] = [] index = 0 reasoning_content: Optional[str] = None + pending_reasoning_item: Optional[Dict[str, Any]] = None # Collect all tool calls to put them in a single choice # (Chat Completions API expects all tool calls in one message) @@ -419,9 +468,16 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): for item in output_items: if isinstance(item, ResponseReasoningItem): - for summary_item in item.summary: - response_text = getattr(summary_item, "text", "") - reasoning_content = response_text if response_text else "" + pending_reasoning_item = _build_reasoning_item( + item_id=item.id, + encrypted_content=getattr(item, "encrypted_content", None), + summary_raw=item.summary, + ) + reasoning_content = " ".join( + s["text"] + for s in pending_reasoning_item["summary"] + if s.get("text") + ) elif isinstance(item, ResponseOutputMessage): for content in item.content: @@ -436,6 +492,12 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): content=response_text if response_text else "", reasoning_content=reasoning_content, annotations=annotations, + reasoning_items=cast( + Optional[List[ChatCompletionReasoningItem]], + [pending_reasoning_item] + if pending_reasoning_item is not None + else None, + ), ) choices.append( @@ -446,7 +508,8 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): ) ) - reasoning_content = None # flush reasoning content + reasoning_content = None # flush + pending_reasoning_item = None # flush index += 1 elif isinstance(item, ResponseFunctionToolCall): @@ -489,11 +552,18 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): content=None, tool_calls=accumulated_tool_calls, reasoning_content=reasoning_content, + reasoning_items=cast( + Optional[List[ChatCompletionReasoningItem]], + [pending_reasoning_item] + if pending_reasoning_item is not None + else None, + ), ) choices.append( Choices(message=msg, finish_reason="tool_calls", index=index) ) reasoning_content = None + pending_reasoning_item = None return choices @@ -1232,6 +1302,25 @@ class OpenAiResponsesToChatCompletionStreamIterator(BaseModelResponseIterator): finish_reason = "tool_calls" if has_function_calls else "stop" + # Extract reasoning items with encrypted_content for round-tripping + completed_reasoning_items: Optional[List[Dict[str, Any]]] = None + for item in output_items: + if not isinstance(item, dict) or item.get("type") != "reasoning": + continue + if completed_reasoning_items is None: + completed_reasoning_items = [] + completed_reasoning_items.append( + _build_reasoning_item( + item_id=item.get("id", ""), + encrypted_content=item.get("encrypted_content"), + summary_raw=item.get("summary"), + ) + ) + completed_reasoning_items_typed = cast( + Optional[List[ChatCompletionReasoningItem]], + completed_reasoning_items, + ) + usage = None if response_data.get("usage"): from litellm.responses.utils import ResponseAPILoggingUtils @@ -1245,7 +1334,10 @@ class OpenAiResponsesToChatCompletionStreamIterator(BaseModelResponseIterator): choices=[ StreamingChoices( index=0, - delta=Delta(content=""), + delta=Delta( + content="", + reasoning_items=completed_reasoning_items_typed, + ), finish_reason=finish_reason, ) ], diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index 67e4fadf63..1bb2b99c01 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -831,6 +831,10 @@ class CustomStreamWrapper: "annotations" in model_response.choices[0].delta and model_response.choices[0].delta.annotations is not None ) + or ( + getattr(model_response.choices[0].delta, "reasoning_items", None) + is not None + ) ): return True else: diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 5a80b40d61..aa5719bc7a 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -536,6 +536,20 @@ class ChatCompletionRedactedThinkingBlock(TypedDict, total=False): cache_control: Optional[Union[dict, ChatCompletionCachedContent]] +class ChatCompletionReasoningSummaryTextBlock(TypedDict, total=False): + type: Required[Literal["summary_text"]] + text: str + + +class ChatCompletionReasoningItem(TypedDict, total=False): + """Represents an OpenAI Responses API reasoning item for round-tripping in conversation history.""" + + type: Required[Literal["reasoning"]] + id: str + encrypted_content: Optional[str] + summary: List["ChatCompletionReasoningSummaryTextBlock"] + + class WebSearchOptionsUserLocationApproximate(TypedDict, total=False): city: str """Free text input for the city of the user, e.g. `San Francisco`.""" diff --git a/litellm/types/utils.py b/litellm/types/utils.py index bd673da8be..8b94fbdad6 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -58,6 +58,7 @@ from .llms.openai import ( AllMessageValues, Batch, ChatCompletionAnnotation, + ChatCompletionReasoningItem, ChatCompletionRedactedThinkingBlock, ChatCompletionThinkingBlock, ChatCompletionToolCallChunk, @@ -1132,6 +1133,7 @@ class Message(SafeAttributeModel, OpenAIObject): thinking_blocks: Optional[ List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] ] = None + reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None) annotations: Optional[List[ChatCompletionAnnotation]] = None @@ -1150,6 +1152,7 @@ class Message(SafeAttributeModel, OpenAIObject): Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock] ] ] = None, + reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None, annotations: Optional[List[ChatCompletionAnnotation]] = None, **params, ): @@ -1182,6 +1185,9 @@ class Message(SafeAttributeModel, OpenAIObject): if thinking_blocks is not None: init_values["thinking_blocks"] = thinking_blocks + if reasoning_items is not None: + init_values["reasoning_items"] = reasoning_items + if annotations is not None: init_values["annotations"] = annotations @@ -1219,6 +1225,11 @@ class Message(SafeAttributeModel, OpenAIObject): if hasattr(self, "thinking_blocks"): del self.thinking_blocks + if reasoning_items is None: + # ensure default response matches OpenAI spec + if hasattr(self, "reasoning_items"): + del self.reasoning_items + add_provider_specific_fields(self, provider_specific_fields) def get(self, key, default=None): @@ -1246,6 +1257,7 @@ class Delta(SafeAttributeModel, OpenAIObject): thinking_blocks: Optional[ List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] ] = None + reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None) def __init__( @@ -1262,6 +1274,7 @@ class Delta(SafeAttributeModel, OpenAIObject): Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock] ] ] = None, + reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None, annotations: Optional[List[ChatCompletionAnnotation]] = None, **params, ): @@ -1295,6 +1308,13 @@ class Delta(SafeAttributeModel, OpenAIObject): # ensure default response matches OpenAI spec del self.thinking_blocks + if reasoning_items is not None: + self.reasoning_items = reasoning_items + else: + # ensure default response matches OpenAI spec + if hasattr(self, "reasoning_items"): + del self.reasoning_items + # Add annotations to the delta, ensure they are only on Delta if they exist (Match OpenAI spec) if annotations is not None: self.annotations = annotations diff --git a/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py b/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py index 8bc6ffc050..e40543e01a 100644 --- a/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py +++ b/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py @@ -2127,9 +2127,10 @@ def test_convert_chat_completion_file_type_to_input_file(): } ] - input_items, instructions = ( - handler.convert_chat_completion_messages_to_responses_api(messages) - ) + ( + input_items, + instructions, + ) = handler.convert_chat_completion_messages_to_responses_api(messages) assert len(input_items) == 1 msg = input_items[0] @@ -2176,11 +2177,257 @@ def test_convert_chat_completion_file_type_with_file_id(): } ] - input_items, instructions = ( - handler.convert_chat_completion_messages_to_responses_api(messages) - ) + ( + input_items, + instructions, + ) = handler.convert_chat_completion_messages_to_responses_api(messages) content = input_items[0]["content"] assert content[1]["type"] == "input_file" assert content[1]["file_id"] == "file-abc123" assert "file_data" not in content[1] + + +# ============================================================================= +# Tests for reasoning_items round-trip (encrypted_content preservation) +# ============================================================================= + + +def test_reasoning_items_non_streaming_round_trip(): + """ + Non-streaming: verify that reasoning_items (with encrypted_content) are: + 1. Extracted from ResponseReasoningItem and attached to the Message. + 2. Emitted as a 'reasoning' input item when the assistant message is + passed back to convert_chat_completion_messages_to_responses_api. + """ + from unittest.mock import Mock + + from openai.types.responses import ResponseOutputMessage, ResponseOutputText + from openai.types.responses.response_reasoning_item import ( + ResponseReasoningItem, + Summary, + ) + + from litellm.completion_extras.litellm_responses_transformation.transformation import ( + LiteLLMResponsesTransformationHandler, + ) + from litellm.types.llms.openai import ( + InputTokensDetails, + OutputTokensDetails, + ResponseAPIUsage, + ResponsesAPIResponse, + ) + from litellm.types.utils import ModelResponse, Usage + + handler = LiteLLMResponsesTransformationHandler() + + encrypted = "gAAAAABpw5abc123FAKE==" + summary_text = "**Thinking about it**\n\nSome reasoning here." + + reasoning_item = ResponseReasoningItem( + id="rs_test001", + summary=[Summary(text=summary_text, type="summary_text")], + type="reasoning", + content=None, + encrypted_content=encrypted, + status=None, + ) + output_message = ResponseOutputMessage( + id="msg_test001", + content=[ + ResponseOutputText( + annotations=[], + text="The answer is 42.", + type="output_text", + logprobs=[], + ) + ], + role="assistant", + status="completed", + type="message", + ) + usage = ResponseAPIUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + audio_tokens=None, cached_tokens=0, text_tokens=None + ), + output_tokens=20, + output_tokens_details=OutputTokensDetails(reasoning_tokens=0, text_tokens=None), + total_tokens=30, + cost=None, + ) + raw_response = ResponsesAPIResponse( + id="resp_test001", + created_at=1234567890, + error=None, + incomplete_details=None, + instructions=None, + metadata={}, + model="gpt-5-mini", + object="response", + output=[reasoning_item, output_message], + parallel_tool_calls=True, + temperature=1.0, + tool_choice="auto", + tools=[], + top_p=1.0, + max_output_tokens=None, + previous_response_id=None, + reasoning={"effort": "low", "summary": "detailed"}, + status="completed", + text={"format": {"type": "text"}, "verbosity": "medium"}, + truncation="disabled", + usage=usage, + user=None, + store=True, + background=False, + billing={"payer": "developer"}, + max_tool_calls=None, + prompt_cache_key=None, + safety_identifier=None, + service_tier="default", + top_logprobs=0, + ) + model_response = ModelResponse( + id="chatcmpl-test001", + created=1234567890, + model=None, + object="chat.completion", + system_fingerprint=None, + choices=[], + usage=Usage(completion_tokens=0, prompt_tokens=0, total_tokens=0), + ) + + result = handler.transform_response( + model="gpt-5-mini", + raw_response=raw_response, + model_response=model_response, + logging_obj=Mock(), + request_data={"model": "gpt-5-mini"}, + messages=[{"role": "user", "content": "What is the answer?"}], + optional_params={}, + litellm_params={}, + encoding=Mock(), + ) + + # ── Part 1: reasoning_items on the response message ────────────────────── + assert len(result.choices) == 1 + msg = result.choices[0].message + + assert ( + msg.reasoning_content == summary_text + ), "reasoning_content should equal summary text" + + assert msg.reasoning_items is not None, "reasoning_items should be set" + assert len(msg.reasoning_items) == 1 + ri = msg.reasoning_items[0] + assert ri["type"] == "reasoning" + assert ri["id"] == "rs_test001" + assert ri["encrypted_content"] == encrypted, "encrypted_content must be preserved" + assert len(ri["summary"]) == 1 + assert ri["summary"][0]["text"] == summary_text + + # ── Part 2: reasoning item round-trips through message history ──────────── + history = [ + {"role": "user", "content": "What is the answer?"}, + { + "role": "assistant", + "content": msg.content, + "reasoning_items": msg.reasoning_items, + }, + {"role": "user", "content": "Can you elaborate?"}, + ] + input_items, _ = handler.convert_chat_completion_messages_to_responses_api(history) + + # The reasoning input item must appear before the assistant message item + types = [item.get("type") for item in input_items] + assert ( + "reasoning" in types + ), "reasoning input item must be emitted for the assistant turn" + + reasoning_input = next( + item for item in input_items if item.get("type") == "reasoning" + ) + assert reasoning_input["id"] == "rs_test001" + assert reasoning_input["encrypted_content"] == encrypted + assert reasoning_input["summary"][0]["text"] == summary_text + + # reasoning item must come before the assistant message item + reasoning_idx = types.index("reasoning") + assistant_msg_idx = next( + i + for i, item in enumerate(input_items) + if item.get("type") == "message" and item.get("role") == "assistant" + ) + assert ( + reasoning_idx < assistant_msg_idx + ), "reasoning input item must precede the assistant message item" + + +def test_reasoning_items_streaming_emitted_on_response_completed(): + """ + Streaming: verify that reasoning_items (with encrypted_content) are emitted + on the delta of the response.completed chunk, enabling the caller to + round-trip them in subsequent requests. + """ + from litellm.completion_extras.litellm_responses_transformation.transformation import ( + OpenAiResponsesToChatCompletionStreamIterator, + ) + + iterator = OpenAiResponsesToChatCompletionStreamIterator( + streaming_response=None, sync_stream=True + ) + + encrypted = "gAAAAABpw5xyz987FAKE==" + summary_text = "**Reasoning summary**\n\nModel thought about this carefully." + + chunk = { + "type": "response.completed", + "response": { + "id": "resp_stream001", + "status": "completed", + "output": [ + { + "type": "reasoning", + "id": "rs_stream001", + "encrypted_content": encrypted, + "summary": [{"type": "summary_text", "text": summary_text}], + }, + { + "type": "message", + "id": "msg_stream001", + "role": "assistant", + "content": [{"type": "output_text", "text": "The answer."}], + "status": "completed", + }, + ], + "usage": { + "input_tokens": 10, + "output_tokens": 5, + "total_tokens": 15, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + }, + } + + result = iterator.chunk_parser(chunk) + + assert len(result.choices) == 1 + delta = result.choices[0].delta + + # finish_reason must be set (response is complete) + assert result.choices[0].finish_reason == "stop" + + # reasoning_items must be on the delta + assert ( + getattr(delta, "reasoning_items", None) is not None + ), "reasoning_items must be present on the response.completed delta" + assert len(delta.reasoning_items) == 1 + ri = delta.reasoning_items[0] + assert ri["type"] == "reasoning" + assert ri["id"] == "rs_stream001" + assert ( + ri["encrypted_content"] == encrypted + ), "encrypted_content must be preserved in streaming" + assert ri["summary"][0]["text"] == summary_text