mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 18:48:36 +00:00
feat(openai): round-trip Responses API reasoning_items in chat completions
Made-with: Cursor
This commit is contained in:
@@ -581,6 +581,90 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||
|
||||
See [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning) for more details on organization verification requirements.
|
||||
|
||||
### Multi-turn Conversations with `reasoning_items`
|
||||
|
||||
For multi-turn conversations you need `reasoning_items`: structured blocks that include the `encrypted_content` token OpenAI uses to restore reasoning state on the next request. Pass `include=["reasoning.encrypted_content"]` on every call where you want that token returned.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="non-streaming" label="Non-Streaming">
|
||||
|
||||
```python showLineNumbers title="Non-streaming: round-trip reasoning_items"
|
||||
import litellm
|
||||
|
||||
messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}]
|
||||
|
||||
# Turn 1 — get reasoning_items (encrypted_content);
|
||||
response = litellm.completion(
|
||||
model="openai/responses/gpt-5-mini",
|
||||
messages=messages,
|
||||
reasoning_effort="low",
|
||||
include=["reasoning.encrypted_content"],
|
||||
)
|
||||
|
||||
assistant_msg = response.choices[0].message
|
||||
|
||||
# Turn 2 — pass reasoning_items back; LiteLLM converts to the correct Responses API format
|
||||
messages.append({
|
||||
"role": "assistant",
|
||||
"content": assistant_msg.content,
|
||||
"reasoning_items": assistant_msg.reasoning_items,
|
||||
})
|
||||
messages.append({"role": "user", "content": "Now summarize your reasoning."})
|
||||
|
||||
response2 = litellm.completion(
|
||||
model="openai/responses/gpt-5-mini",
|
||||
messages=messages,
|
||||
reasoning_effort="low",
|
||||
include=["reasoning.encrypted_content"],
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="streaming" label="Streaming">
|
||||
|
||||
`reasoning_items` (with `encrypted_content`) arrive on the final chunk when the full response completes:
|
||||
|
||||
```python showLineNumbers title="Streaming: collect and round-trip reasoning_items"
|
||||
import litellm
|
||||
|
||||
messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}]
|
||||
|
||||
collected_content = []
|
||||
collected_reasoning_items = []
|
||||
|
||||
stream = litellm.completion(
|
||||
model="openai/responses/gpt-5-mini",
|
||||
messages=messages,
|
||||
stream=True,
|
||||
reasoning_effort="low",
|
||||
include=["reasoning.encrypted_content"],
|
||||
)
|
||||
|
||||
for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
if delta.content:
|
||||
collected_content.append(delta.content)
|
||||
if getattr(delta, "reasoning_items", None):
|
||||
collected_reasoning_items.extend(delta.reasoning_items)
|
||||
|
||||
messages.append({
|
||||
"role": "assistant",
|
||||
"content": "".join(collected_content),
|
||||
"reasoning_items": collected_reasoning_items or None,
|
||||
})
|
||||
messages.append({"role": "user", "content": "Continue the conversation."})
|
||||
|
||||
response2 = litellm.completion(
|
||||
model="openai/responses/gpt-5-mini",
|
||||
messages=messages,
|
||||
reasoning_effort="low",
|
||||
include=["reasoning.encrypted_content"],
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Verbosity Control for GPT-5 Models
|
||||
|
||||
The `verbosity` parameter controls the length and detail of responses from GPT-5 family models. It accepts three values: `"low"`, `"medium"`, or `"high"`.
|
||||
|
||||
@@ -32,6 +32,7 @@ from litellm.llms.base_llm.bridges.completion_transformation import (
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionAnnotation,
|
||||
ChatCompletionReasoningItem,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
Reasoning,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
@@ -55,6 +56,49 @@ if TYPE_CHECKING:
|
||||
)
|
||||
|
||||
|
||||
def _build_reasoning_item(
|
||||
item_id: str,
|
||||
encrypted_content: Optional[str],
|
||||
summary_raw: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a ChatCompletionReasoningItem-shaped dict from raw response data.
|
||||
|
||||
Handles both pydantic objects (attribute access) and plain dicts.
|
||||
"""
|
||||
summary: List[Dict[str, Any]] = []
|
||||
for s in summary_raw or []:
|
||||
if isinstance(s, dict):
|
||||
summary.append(
|
||||
{"type": s.get("type", "summary_text"), "text": s.get("text", "")}
|
||||
)
|
||||
else:
|
||||
summary.append(
|
||||
{
|
||||
"type": getattr(s, "type", "summary_text"),
|
||||
"text": getattr(s, "text", ""),
|
||||
}
|
||||
)
|
||||
return {
|
||||
"id": item_id,
|
||||
"type": "reasoning",
|
||||
"encrypted_content": encrypted_content,
|
||||
"summary": summary,
|
||||
}
|
||||
|
||||
|
||||
def _reasoning_item_to_response_input(r_item: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert a stored ChatCompletionReasoningItem back to a Responses API input item."""
|
||||
r_input: Dict[str, Any] = {
|
||||
"type": "reasoning",
|
||||
"id": r_item.get("id") or f"rs_{id(r_item)}",
|
||||
# summary is always required by the Responses API, even when empty
|
||||
"summary": r_item.get("summary") or [],
|
||||
}
|
||||
if r_item.get("encrypted_content"):
|
||||
r_input["encrypted_content"] = r_item["encrypted_content"]
|
||||
return r_input
|
||||
|
||||
|
||||
class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
"""
|
||||
Handler for transforming /chat/completions api requests to litellm.responses requests
|
||||
@@ -202,10 +246,12 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
}
|
||||
)
|
||||
elif role == "assistant" and tool_calls and isinstance(tool_calls, list):
|
||||
for r_item in msg.get("reasoning_items") or []:
|
||||
input_items.append(_reasoning_item_to_response_input(r_item))
|
||||
for tool_call in tool_calls:
|
||||
function = tool_call.get("function")
|
||||
if function:
|
||||
input_tool_call = {
|
||||
input_tool_call: Dict[str, Any] = {
|
||||
"type": "function_call",
|
||||
"call_id": tool_call["id"],
|
||||
}
|
||||
@@ -217,7 +263,9 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
else:
|
||||
raise ValueError(f"tool call not supported: {tool_call}")
|
||||
elif content is not None:
|
||||
# Regular user/assistant message
|
||||
if role == "assistant":
|
||||
for r_item in msg.get("reasoning_items") or []:
|
||||
input_items.append(_reasoning_item_to_response_input(r_item))
|
||||
input_items.append(
|
||||
{
|
||||
"type": "message",
|
||||
@@ -411,6 +459,7 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
choices: List[Choices] = []
|
||||
index = 0
|
||||
reasoning_content: Optional[str] = None
|
||||
pending_reasoning_item: Optional[Dict[str, Any]] = None
|
||||
|
||||
# Collect all tool calls to put them in a single choice
|
||||
# (Chat Completions API expects all tool calls in one message)
|
||||
@@ -419,9 +468,16 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
|
||||
for item in output_items:
|
||||
if isinstance(item, ResponseReasoningItem):
|
||||
for summary_item in item.summary:
|
||||
response_text = getattr(summary_item, "text", "")
|
||||
reasoning_content = response_text if response_text else ""
|
||||
pending_reasoning_item = _build_reasoning_item(
|
||||
item_id=item.id,
|
||||
encrypted_content=getattr(item, "encrypted_content", None),
|
||||
summary_raw=item.summary,
|
||||
)
|
||||
reasoning_content = " ".join(
|
||||
s["text"]
|
||||
for s in pending_reasoning_item["summary"]
|
||||
if s.get("text")
|
||||
)
|
||||
|
||||
elif isinstance(item, ResponseOutputMessage):
|
||||
for content in item.content:
|
||||
@@ -436,6 +492,12 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
content=response_text if response_text else "",
|
||||
reasoning_content=reasoning_content,
|
||||
annotations=annotations,
|
||||
reasoning_items=cast(
|
||||
Optional[List[ChatCompletionReasoningItem]],
|
||||
[pending_reasoning_item]
|
||||
if pending_reasoning_item is not None
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
choices.append(
|
||||
@@ -446,7 +508,8 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
)
|
||||
)
|
||||
|
||||
reasoning_content = None # flush reasoning content
|
||||
reasoning_content = None # flush
|
||||
pending_reasoning_item = None # flush
|
||||
index += 1
|
||||
|
||||
elif isinstance(item, ResponseFunctionToolCall):
|
||||
@@ -489,11 +552,18 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
content=None,
|
||||
tool_calls=accumulated_tool_calls,
|
||||
reasoning_content=reasoning_content,
|
||||
reasoning_items=cast(
|
||||
Optional[List[ChatCompletionReasoningItem]],
|
||||
[pending_reasoning_item]
|
||||
if pending_reasoning_item is not None
|
||||
else None,
|
||||
),
|
||||
)
|
||||
choices.append(
|
||||
Choices(message=msg, finish_reason="tool_calls", index=index)
|
||||
)
|
||||
reasoning_content = None
|
||||
pending_reasoning_item = None
|
||||
|
||||
return choices
|
||||
|
||||
@@ -1232,6 +1302,25 @@ class OpenAiResponsesToChatCompletionStreamIterator(BaseModelResponseIterator):
|
||||
|
||||
finish_reason = "tool_calls" if has_function_calls else "stop"
|
||||
|
||||
# Extract reasoning items with encrypted_content for round-tripping
|
||||
completed_reasoning_items: Optional[List[Dict[str, Any]]] = None
|
||||
for item in output_items:
|
||||
if not isinstance(item, dict) or item.get("type") != "reasoning":
|
||||
continue
|
||||
if completed_reasoning_items is None:
|
||||
completed_reasoning_items = []
|
||||
completed_reasoning_items.append(
|
||||
_build_reasoning_item(
|
||||
item_id=item.get("id", ""),
|
||||
encrypted_content=item.get("encrypted_content"),
|
||||
summary_raw=item.get("summary"),
|
||||
)
|
||||
)
|
||||
completed_reasoning_items_typed = cast(
|
||||
Optional[List[ChatCompletionReasoningItem]],
|
||||
completed_reasoning_items,
|
||||
)
|
||||
|
||||
usage = None
|
||||
if response_data.get("usage"):
|
||||
from litellm.responses.utils import ResponseAPILoggingUtils
|
||||
@@ -1245,7 +1334,10 @@ class OpenAiResponsesToChatCompletionStreamIterator(BaseModelResponseIterator):
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
index=0,
|
||||
delta=Delta(content=""),
|
||||
delta=Delta(
|
||||
content="",
|
||||
reasoning_items=completed_reasoning_items_typed,
|
||||
),
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
],
|
||||
|
||||
@@ -831,6 +831,10 @@ class CustomStreamWrapper:
|
||||
"annotations" in model_response.choices[0].delta
|
||||
and model_response.choices[0].delta.annotations is not None
|
||||
)
|
||||
or (
|
||||
getattr(model_response.choices[0].delta, "reasoning_items", None)
|
||||
is not None
|
||||
)
|
||||
):
|
||||
return True
|
||||
else:
|
||||
|
||||
@@ -536,6 +536,20 @@ class ChatCompletionRedactedThinkingBlock(TypedDict, total=False):
|
||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
class ChatCompletionReasoningSummaryTextBlock(TypedDict, total=False):
|
||||
type: Required[Literal["summary_text"]]
|
||||
text: str
|
||||
|
||||
|
||||
class ChatCompletionReasoningItem(TypedDict, total=False):
|
||||
"""Represents an OpenAI Responses API reasoning item for round-tripping in conversation history."""
|
||||
|
||||
type: Required[Literal["reasoning"]]
|
||||
id: str
|
||||
encrypted_content: Optional[str]
|
||||
summary: List["ChatCompletionReasoningSummaryTextBlock"]
|
||||
|
||||
|
||||
class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
|
||||
city: str
|
||||
"""Free text input for the city of the user, e.g. `San Francisco`."""
|
||||
|
||||
@@ -58,6 +58,7 @@ from .llms.openai import (
|
||||
AllMessageValues,
|
||||
Batch,
|
||||
ChatCompletionAnnotation,
|
||||
ChatCompletionReasoningItem,
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
@@ -1132,6 +1133,7 @@ class Message(SafeAttributeModel, OpenAIObject):
|
||||
thinking_blocks: Optional[
|
||||
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||
] = None
|
||||
reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
||||
annotations: Optional[List[ChatCompletionAnnotation]] = None
|
||||
|
||||
@@ -1150,6 +1152,7 @@ class Message(SafeAttributeModel, OpenAIObject):
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
]
|
||||
] = None,
|
||||
reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None,
|
||||
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
||||
**params,
|
||||
):
|
||||
@@ -1182,6 +1185,9 @@ class Message(SafeAttributeModel, OpenAIObject):
|
||||
if thinking_blocks is not None:
|
||||
init_values["thinking_blocks"] = thinking_blocks
|
||||
|
||||
if reasoning_items is not None:
|
||||
init_values["reasoning_items"] = reasoning_items
|
||||
|
||||
if annotations is not None:
|
||||
init_values["annotations"] = annotations
|
||||
|
||||
@@ -1219,6 +1225,11 @@ class Message(SafeAttributeModel, OpenAIObject):
|
||||
if hasattr(self, "thinking_blocks"):
|
||||
del self.thinking_blocks
|
||||
|
||||
if reasoning_items is None:
|
||||
# ensure default response matches OpenAI spec
|
||||
if hasattr(self, "reasoning_items"):
|
||||
del self.reasoning_items
|
||||
|
||||
add_provider_specific_fields(self, provider_specific_fields)
|
||||
|
||||
def get(self, key, default=None):
|
||||
@@ -1246,6 +1257,7 @@ class Delta(SafeAttributeModel, OpenAIObject):
|
||||
thinking_blocks: Optional[
|
||||
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||
] = None
|
||||
reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
||||
|
||||
def __init__(
|
||||
@@ -1262,6 +1274,7 @@ class Delta(SafeAttributeModel, OpenAIObject):
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
]
|
||||
] = None,
|
||||
reasoning_items: Optional[List[ChatCompletionReasoningItem]] = None,
|
||||
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
||||
**params,
|
||||
):
|
||||
@@ -1295,6 +1308,13 @@ class Delta(SafeAttributeModel, OpenAIObject):
|
||||
# ensure default response matches OpenAI spec
|
||||
del self.thinking_blocks
|
||||
|
||||
if reasoning_items is not None:
|
||||
self.reasoning_items = reasoning_items
|
||||
else:
|
||||
# ensure default response matches OpenAI spec
|
||||
if hasattr(self, "reasoning_items"):
|
||||
del self.reasoning_items
|
||||
|
||||
# Add annotations to the delta, ensure they are only on Delta if they exist (Match OpenAI spec)
|
||||
if annotations is not None:
|
||||
self.annotations = annotations
|
||||
|
||||
+253
-6
@@ -2127,9 +2127,10 @@ def test_convert_chat_completion_file_type_to_input_file():
|
||||
}
|
||||
]
|
||||
|
||||
input_items, instructions = (
|
||||
handler.convert_chat_completion_messages_to_responses_api(messages)
|
||||
)
|
||||
(
|
||||
input_items,
|
||||
instructions,
|
||||
) = handler.convert_chat_completion_messages_to_responses_api(messages)
|
||||
|
||||
assert len(input_items) == 1
|
||||
msg = input_items[0]
|
||||
@@ -2176,11 +2177,257 @@ def test_convert_chat_completion_file_type_with_file_id():
|
||||
}
|
||||
]
|
||||
|
||||
input_items, instructions = (
|
||||
handler.convert_chat_completion_messages_to_responses_api(messages)
|
||||
)
|
||||
(
|
||||
input_items,
|
||||
instructions,
|
||||
) = handler.convert_chat_completion_messages_to_responses_api(messages)
|
||||
|
||||
content = input_items[0]["content"]
|
||||
assert content[1]["type"] == "input_file"
|
||||
assert content[1]["file_id"] == "file-abc123"
|
||||
assert "file_data" not in content[1]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests for reasoning_items round-trip (encrypted_content preservation)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_reasoning_items_non_streaming_round_trip():
|
||||
"""
|
||||
Non-streaming: verify that reasoning_items (with encrypted_content) are:
|
||||
1. Extracted from ResponseReasoningItem and attached to the Message.
|
||||
2. Emitted as a 'reasoning' input item when the assistant message is
|
||||
passed back to convert_chat_completion_messages_to_responses_api.
|
||||
"""
|
||||
from unittest.mock import Mock
|
||||
|
||||
from openai.types.responses import ResponseOutputMessage, ResponseOutputText
|
||||
from openai.types.responses.response_reasoning_item import (
|
||||
ResponseReasoningItem,
|
||||
Summary,
|
||||
)
|
||||
|
||||
from litellm.completion_extras.litellm_responses_transformation.transformation import (
|
||||
LiteLLMResponsesTransformationHandler,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
InputTokensDetails,
|
||||
OutputTokensDetails,
|
||||
ResponseAPIUsage,
|
||||
ResponsesAPIResponse,
|
||||
)
|
||||
from litellm.types.utils import ModelResponse, Usage
|
||||
|
||||
handler = LiteLLMResponsesTransformationHandler()
|
||||
|
||||
encrypted = "gAAAAABpw5abc123FAKE=="
|
||||
summary_text = "**Thinking about it**\n\nSome reasoning here."
|
||||
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
id="rs_test001",
|
||||
summary=[Summary(text=summary_text, type="summary_text")],
|
||||
type="reasoning",
|
||||
content=None,
|
||||
encrypted_content=encrypted,
|
||||
status=None,
|
||||
)
|
||||
output_message = ResponseOutputMessage(
|
||||
id="msg_test001",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[],
|
||||
text="The answer is 42.",
|
||||
type="output_text",
|
||||
logprobs=[],
|
||||
)
|
||||
],
|
||||
role="assistant",
|
||||
status="completed",
|
||||
type="message",
|
||||
)
|
||||
usage = ResponseAPIUsage(
|
||||
input_tokens=10,
|
||||
input_tokens_details=InputTokensDetails(
|
||||
audio_tokens=None, cached_tokens=0, text_tokens=None
|
||||
),
|
||||
output_tokens=20,
|
||||
output_tokens_details=OutputTokensDetails(reasoning_tokens=0, text_tokens=None),
|
||||
total_tokens=30,
|
||||
cost=None,
|
||||
)
|
||||
raw_response = ResponsesAPIResponse(
|
||||
id="resp_test001",
|
||||
created_at=1234567890,
|
||||
error=None,
|
||||
incomplete_details=None,
|
||||
instructions=None,
|
||||
metadata={},
|
||||
model="gpt-5-mini",
|
||||
object="response",
|
||||
output=[reasoning_item, output_message],
|
||||
parallel_tool_calls=True,
|
||||
temperature=1.0,
|
||||
tool_choice="auto",
|
||||
tools=[],
|
||||
top_p=1.0,
|
||||
max_output_tokens=None,
|
||||
previous_response_id=None,
|
||||
reasoning={"effort": "low", "summary": "detailed"},
|
||||
status="completed",
|
||||
text={"format": {"type": "text"}, "verbosity": "medium"},
|
||||
truncation="disabled",
|
||||
usage=usage,
|
||||
user=None,
|
||||
store=True,
|
||||
background=False,
|
||||
billing={"payer": "developer"},
|
||||
max_tool_calls=None,
|
||||
prompt_cache_key=None,
|
||||
safety_identifier=None,
|
||||
service_tier="default",
|
||||
top_logprobs=0,
|
||||
)
|
||||
model_response = ModelResponse(
|
||||
id="chatcmpl-test001",
|
||||
created=1234567890,
|
||||
model=None,
|
||||
object="chat.completion",
|
||||
system_fingerprint=None,
|
||||
choices=[],
|
||||
usage=Usage(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
||||
)
|
||||
|
||||
result = handler.transform_response(
|
||||
model="gpt-5-mini",
|
||||
raw_response=raw_response,
|
||||
model_response=model_response,
|
||||
logging_obj=Mock(),
|
||||
request_data={"model": "gpt-5-mini"},
|
||||
messages=[{"role": "user", "content": "What is the answer?"}],
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
encoding=Mock(),
|
||||
)
|
||||
|
||||
# ── Part 1: reasoning_items on the response message ──────────────────────
|
||||
assert len(result.choices) == 1
|
||||
msg = result.choices[0].message
|
||||
|
||||
assert (
|
||||
msg.reasoning_content == summary_text
|
||||
), "reasoning_content should equal summary text"
|
||||
|
||||
assert msg.reasoning_items is not None, "reasoning_items should be set"
|
||||
assert len(msg.reasoning_items) == 1
|
||||
ri = msg.reasoning_items[0]
|
||||
assert ri["type"] == "reasoning"
|
||||
assert ri["id"] == "rs_test001"
|
||||
assert ri["encrypted_content"] == encrypted, "encrypted_content must be preserved"
|
||||
assert len(ri["summary"]) == 1
|
||||
assert ri["summary"][0]["text"] == summary_text
|
||||
|
||||
# ── Part 2: reasoning item round-trips through message history ────────────
|
||||
history = [
|
||||
{"role": "user", "content": "What is the answer?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": msg.content,
|
||||
"reasoning_items": msg.reasoning_items,
|
||||
},
|
||||
{"role": "user", "content": "Can you elaborate?"},
|
||||
]
|
||||
input_items, _ = handler.convert_chat_completion_messages_to_responses_api(history)
|
||||
|
||||
# The reasoning input item must appear before the assistant message item
|
||||
types = [item.get("type") for item in input_items]
|
||||
assert (
|
||||
"reasoning" in types
|
||||
), "reasoning input item must be emitted for the assistant turn"
|
||||
|
||||
reasoning_input = next(
|
||||
item for item in input_items if item.get("type") == "reasoning"
|
||||
)
|
||||
assert reasoning_input["id"] == "rs_test001"
|
||||
assert reasoning_input["encrypted_content"] == encrypted
|
||||
assert reasoning_input["summary"][0]["text"] == summary_text
|
||||
|
||||
# reasoning item must come before the assistant message item
|
||||
reasoning_idx = types.index("reasoning")
|
||||
assistant_msg_idx = next(
|
||||
i
|
||||
for i, item in enumerate(input_items)
|
||||
if item.get("type") == "message" and item.get("role") == "assistant"
|
||||
)
|
||||
assert (
|
||||
reasoning_idx < assistant_msg_idx
|
||||
), "reasoning input item must precede the assistant message item"
|
||||
|
||||
|
||||
def test_reasoning_items_streaming_emitted_on_response_completed():
|
||||
"""
|
||||
Streaming: verify that reasoning_items (with encrypted_content) are emitted
|
||||
on the delta of the response.completed chunk, enabling the caller to
|
||||
round-trip them in subsequent requests.
|
||||
"""
|
||||
from litellm.completion_extras.litellm_responses_transformation.transformation import (
|
||||
OpenAiResponsesToChatCompletionStreamIterator,
|
||||
)
|
||||
|
||||
iterator = OpenAiResponsesToChatCompletionStreamIterator(
|
||||
streaming_response=None, sync_stream=True
|
||||
)
|
||||
|
||||
encrypted = "gAAAAABpw5xyz987FAKE=="
|
||||
summary_text = "**Reasoning summary**\n\nModel thought about this carefully."
|
||||
|
||||
chunk = {
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "resp_stream001",
|
||||
"status": "completed",
|
||||
"output": [
|
||||
{
|
||||
"type": "reasoning",
|
||||
"id": "rs_stream001",
|
||||
"encrypted_content": encrypted,
|
||||
"summary": [{"type": "summary_text", "text": summary_text}],
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"id": "msg_stream001",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "The answer."}],
|
||||
"status": "completed",
|
||||
},
|
||||
],
|
||||
"usage": {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 5,
|
||||
"total_tokens": 15,
|
||||
"input_tokens_details": {"cached_tokens": 0},
|
||||
"output_tokens_details": {"reasoning_tokens": 0},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
result = iterator.chunk_parser(chunk)
|
||||
|
||||
assert len(result.choices) == 1
|
||||
delta = result.choices[0].delta
|
||||
|
||||
# finish_reason must be set (response is complete)
|
||||
assert result.choices[0].finish_reason == "stop"
|
||||
|
||||
# reasoning_items must be on the delta
|
||||
assert (
|
||||
getattr(delta, "reasoning_items", None) is not None
|
||||
), "reasoning_items must be present on the response.completed delta"
|
||||
assert len(delta.reasoning_items) == 1
|
||||
ri = delta.reasoning_items[0]
|
||||
assert ri["type"] == "reasoning"
|
||||
assert ri["id"] == "rs_stream001"
|
||||
assert (
|
||||
ri["encrypted_content"] == encrypted
|
||||
), "encrypted_content must be preserved in streaming"
|
||||
assert ri["summary"][0]["text"] == summary_text
|
||||
|
||||
Reference in New Issue
Block a user