mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-23 05:37:21 +00:00
Put reasoning summary behind feat flag
This commit is contained in:
@@ -591,3 +591,68 @@ Expected Response
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## OpenAI Responses API - Auto-Summary Control
|
||||
|
||||
When using OpenAI Responses API models (like `gpt-5`) via `/chat/completions` with `reasoning_effort`, you can control whether `summary="detailed"` is automatically added to the reasoning parameter.
|
||||
|
||||
### Enabling Auto-Summary
|
||||
|
||||
You can enable automatic `summary="detailed"` in two ways:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
import litellm
|
||||
|
||||
# Enable auto-summary globally
|
||||
litellm.reasoning_auto_summary = True
|
||||
|
||||
response = litellm.completion(
|
||||
model="openai/responses/gpt-5-mini",
|
||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||
reasoning_effort="low", # Will automatically add summary="detailed"
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="env" label="Environment Variable">
|
||||
|
||||
```bash
|
||||
# Set environment variable
|
||||
export LITELLM_REASONING_AUTO_SUMMARY=true
|
||||
|
||||
# Or in your .env file
|
||||
LITELLM_REASONING_AUTO_SUMMARY=true
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="proxy" label="Proxy Config">
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
reasoning_auto_summary: true # Enable auto-summary for all requests
|
||||
|
||||
model_list:
|
||||
- model_name: gpt-5-mini
|
||||
litellm_params:
|
||||
model: openai/responses/gpt-5-mini
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Manual Control (Recommended)
|
||||
|
||||
For fine-grained control, pass `reasoning_effort` as a dictionary:
|
||||
|
||||
```python
|
||||
response = litellm.completion(
|
||||
model="openai/responses/gpt-5-mini",
|
||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||
reasoning_effort={"effort": "low", "summary": "detailed"}, # Explicit control
|
||||
)
|
||||
```
|
||||
|
||||
@@ -276,6 +276,7 @@ banned_keywords_list: Optional[Union[str, List]] = None
|
||||
llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
|
||||
guardrail_name_config_map: Dict[str, GuardrailItem] = {}
|
||||
include_cost_in_streaming_usage: bool = False
|
||||
reasoning_auto_summary: bool = False
|
||||
### PROMPTS ####
|
||||
from litellm.types.prompts.init_prompts import PromptSpec
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ Handler for transforming /chat/completions api requests to litellm.responses req
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
@@ -22,6 +23,7 @@ from typing import (
|
||||
from openai.types.responses.tool_param import FunctionToolParam
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm import ModelResponse
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
|
||||
@@ -691,19 +693,26 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
|
||||
if isinstance(reasoning_effort, dict):
|
||||
return Reasoning(**reasoning_effort) # type: ignore[typeddict-item]
|
||||
|
||||
# If string is passed, map with summary="detailed"
|
||||
# Check if auto-summary is enabled via flag or environment variable
|
||||
# Priority: litellm.reasoning_auto_summary flag > LITELLM_REASONING_AUTO_SUMMARY env var
|
||||
auto_summary_enabled = (
|
||||
litellm.reasoning_auto_summary
|
||||
or os.getenv("LITELLM_REASONING_AUTO_SUMMARY", "false").lower() == "true"
|
||||
)
|
||||
|
||||
# If string is passed, map with optional summary based on flag/env var
|
||||
if reasoning_effort == "none":
|
||||
return Reasoning(effort="none", summary="detailed") # type: ignore
|
||||
return Reasoning(effort="none", summary="detailed") if auto_summary_enabled else Reasoning(effort="none") # type: ignore
|
||||
elif reasoning_effort == "high":
|
||||
return Reasoning(effort="high", summary="detailed")
|
||||
return Reasoning(effort="high", summary="detailed") if auto_summary_enabled else Reasoning(effort="high")
|
||||
elif reasoning_effort == "xhigh":
|
||||
return Reasoning(effort="xhigh", summary="detailed") # type: ignore[typeddict-item]
|
||||
return Reasoning(effort="xhigh", summary="detailed") if auto_summary_enabled else Reasoning(effort="xhigh") # type: ignore[typeddict-item]
|
||||
elif reasoning_effort == "medium":
|
||||
return Reasoning(effort="medium", summary="detailed")
|
||||
return Reasoning(effort="medium", summary="detailed") if auto_summary_enabled else Reasoning(effort="medium")
|
||||
elif reasoning_effort == "low":
|
||||
return Reasoning(effort="low", summary="detailed")
|
||||
return Reasoning(effort="low", summary="detailed") if auto_summary_enabled else Reasoning(effort="low")
|
||||
elif reasoning_effort == "minimal":
|
||||
return Reasoning(effort="minimal", summary="detailed")
|
||||
return Reasoning(effort="minimal", summary="detailed") if auto_summary_enabled else Reasoning(effort="minimal")
|
||||
return None
|
||||
|
||||
def _transform_response_format_to_text_format(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
+71
-23
@@ -1011,39 +1011,87 @@ def test_multiple_tool_calls_in_single_choice():
|
||||
|
||||
def test_map_reasoning_effort_adds_summary_detailed():
|
||||
"""
|
||||
Test that _map_reasoning_effort adds summary="detailed" when user provides reasoning_effort as a string.
|
||||
Test that _map_reasoning_effort behavior with reasoning_auto_summary flag.
|
||||
|
||||
This ensures that when users pass reasoning_effort in the completions API for OpenAI responses/models,
|
||||
the transformation automatically includes summary="detailed" in the reasoning parameter.
|
||||
By default (flag=False), summary should NOT be added to avoid:
|
||||
1. Breaking for users without verified OpenAI orgs (400 errors)
|
||||
2. Making requests more expensive by including summary reasoning tokens
|
||||
|
||||
When flag is enabled (flag=True or env var), summary="detailed" is added.
|
||||
"""
|
||||
import os
|
||||
|
||||
import litellm
|
||||
from litellm.completion_extras.litellm_responses_transformation.transformation import (
|
||||
LiteLLMResponsesTransformationHandler,
|
||||
)
|
||||
|
||||
handler = LiteLLMResponsesTransformationHandler()
|
||||
|
||||
# Test all string effort levels
|
||||
# Test all string effort levels - DEFAULT BEHAVIOR (no summary)
|
||||
effort_levels = ["none", "low", "medium", "high", "xhigh", "minimal"]
|
||||
|
||||
for effort in effort_levels:
|
||||
result = handler._map_reasoning_effort(effort)
|
||||
# Save original flag value
|
||||
original_flag = litellm.reasoning_auto_summary
|
||||
original_env = os.environ.get("LITELLM_REASONING_AUTO_SUMMARY")
|
||||
|
||||
try:
|
||||
# Test 1: Default behavior (flag=False, no env var) - NO summary
|
||||
litellm.reasoning_auto_summary = False
|
||||
if "LITELLM_REASONING_AUTO_SUMMARY" in os.environ:
|
||||
del os.environ["LITELLM_REASONING_AUTO_SUMMARY"]
|
||||
|
||||
assert result is not None, f"Result should not be None for effort={effort}"
|
||||
assert result["effort"] == effort, f"Effort should be {effort}"
|
||||
assert result["summary"] == "detailed", f"Summary should be 'detailed' for effort={effort}"
|
||||
for effort in effort_levels:
|
||||
result = handler._map_reasoning_effort(effort)
|
||||
|
||||
assert result is not None, f"Result should not be None for effort={effort}"
|
||||
assert result["effort"] == effort, f"Effort should be {effort}"
|
||||
assert "summary" not in result, f"Summary should NOT be present by default for effort={effort}"
|
||||
|
||||
print(f"✓ reasoning_effort='{effort}' correctly maps to effort='{effort}' (no summary by default)")
|
||||
|
||||
print(f"✓ reasoning_effort='{effort}' correctly maps to effort='{effort}', summary='detailed'")
|
||||
# Test 2: With flag enabled - summary IS added
|
||||
litellm.reasoning_auto_summary = True
|
||||
|
||||
for effort in effort_levels:
|
||||
result = handler._map_reasoning_effort(effort)
|
||||
|
||||
assert result is not None, f"Result should not be None for effort={effort}"
|
||||
assert result["effort"] == effort, f"Effort should be {effort}"
|
||||
assert result["summary"] == "detailed", f"Summary should be 'detailed' when flag is enabled for effort={effort}"
|
||||
|
||||
print(f"✓ reasoning_effort='{effort}' correctly maps to effort='{effort}', summary='detailed' (flag enabled)")
|
||||
|
||||
# Test 3: With env var enabled (flag disabled) - summary IS added
|
||||
litellm.reasoning_auto_summary = False
|
||||
os.environ["LITELLM_REASONING_AUTO_SUMMARY"] = "true"
|
||||
|
||||
result = handler._map_reasoning_effort("high")
|
||||
assert result["summary"] == "detailed", "Summary should be 'detailed' when env var is enabled"
|
||||
print("✓ LITELLM_REASONING_AUTO_SUMMARY env var works correctly")
|
||||
|
||||
# Test 4: Dict input is passed through as-is (no modification)
|
||||
litellm.reasoning_auto_summary = False
|
||||
if "LITELLM_REASONING_AUTO_SUMMARY" in os.environ:
|
||||
del os.environ["LITELLM_REASONING_AUTO_SUMMARY"]
|
||||
|
||||
dict_input = {"effort": "high", "summary": "custom_summary"}
|
||||
result_dict = handler._map_reasoning_effort(dict_input)
|
||||
assert result_dict["effort"] == "high"
|
||||
assert result_dict["summary"] == "custom_summary"
|
||||
print("✓ Dict input is passed through without modification")
|
||||
|
||||
# Test 5: None/unknown values return None
|
||||
result_unknown = handler._map_reasoning_effort("unknown_value")
|
||||
assert result_unknown is None
|
||||
print("✓ Unknown reasoning_effort values return None")
|
||||
|
||||
print("✓ All reasoning_effort behaviors work correctly with flag/env var control")
|
||||
|
||||
# Test that dict input is passed through as-is (no modification)
|
||||
dict_input = {"effort": "high", "summary": "custom_summary"}
|
||||
result_dict = handler._map_reasoning_effort(dict_input)
|
||||
assert result_dict["effort"] == "high"
|
||||
assert result_dict["summary"] == "custom_summary"
|
||||
print("✓ Dict input is passed through without modification")
|
||||
|
||||
# Test that None/unknown values return None
|
||||
result_unknown = handler._map_reasoning_effort("unknown_value")
|
||||
assert result_unknown is None
|
||||
print("✓ Unknown reasoning_effort values return None")
|
||||
|
||||
print("✓ All reasoning_effort string values correctly map to summary='detailed'")
|
||||
finally:
|
||||
# Restore original values
|
||||
litellm.reasoning_auto_summary = original_flag
|
||||
if original_env is not None:
|
||||
os.environ["LITELLM_REASONING_AUTO_SUMMARY"] = original_env
|
||||
elif "LITELLM_REASONING_AUTO_SUMMARY" in os.environ:
|
||||
del os.environ["LITELLM_REASONING_AUTO_SUMMARY"]
|
||||
|
||||
Reference in New Issue
Block a user