Put reasoning summary behind feat flag

This commit is contained in:
Sameer Kankute
2026-01-06 11:36:20 +05:30
parent 694e44551f
commit bb00a53786
5 changed files with 5721 additions and 1399 deletions
+65
View File
@@ -591,3 +591,68 @@ Expected Response
</TabItem>
</Tabs>
## OpenAI Responses API - Auto-Summary Control
When using OpenAI Responses API models (like `gpt-5`) via `/chat/completions` with `reasoning_effort`, you can control whether `summary="detailed"` is automatically added to the reasoning parameter.
### Enabling Auto-Summary
You can enable automatic `summary="detailed"` in two ways:
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import litellm
# Enable auto-summary globally
litellm.reasoning_auto_summary = True
response = litellm.completion(
model="openai/responses/gpt-5-mini",
messages=[{"role": "user", "content": "What is the capital of France?"}],
reasoning_effort="low", # Will automatically add summary="detailed"
)
```
</TabItem>
<TabItem value="env" label="Environment Variable">
```bash
# Set environment variable
export LITELLM_REASONING_AUTO_SUMMARY=true
# Or in your .env file
LITELLM_REASONING_AUTO_SUMMARY=true
```
</TabItem>
<TabItem value="proxy" label="Proxy Config">
```yaml
litellm_settings:
reasoning_auto_summary: true # Enable auto-summary for all requests
model_list:
- model_name: gpt-5-mini
litellm_params:
model: openai/responses/gpt-5-mini
```
</TabItem>
</Tabs>
### Manual Control (Recommended)
For fine-grained control, pass `reasoning_effort` as a dictionary:
```python
response = litellm.completion(
model="openai/responses/gpt-5-mini",
messages=[{"role": "user", "content": "What is the capital of France?"}],
reasoning_effort={"effort": "low", "summary": "detailed"}, # Explicit control
)
```
+1
View File
@@ -276,6 +276,7 @@ banned_keywords_list: Optional[Union[str, List]] = None
llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
guardrail_name_config_map: Dict[str, GuardrailItem] = {}
include_cost_in_streaming_usage: bool = False
reasoning_auto_summary: bool = False
### PROMPTS ####
from litellm.types.prompts.init_prompts import PromptSpec
@@ -3,6 +3,7 @@ Handler for transforming /chat/completions api requests to litellm.responses req
"""
import json
import os
from typing import (
TYPE_CHECKING,
Any,
@@ -22,6 +23,7 @@ from typing import (
from openai.types.responses.tool_param import FunctionToolParam
from pydantic import BaseModel
import litellm
from litellm import ModelResponse
from litellm._logging import verbose_logger
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
@@ -691,19 +693,26 @@ class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
if isinstance(reasoning_effort, dict):
return Reasoning(**reasoning_effort) # type: ignore[typeddict-item]
# If string is passed, map with summary="detailed"
# Check if auto-summary is enabled via flag or environment variable
# Priority: litellm.reasoning_auto_summary flag > LITELLM_REASONING_AUTO_SUMMARY env var
auto_summary_enabled = (
litellm.reasoning_auto_summary
or os.getenv("LITELLM_REASONING_AUTO_SUMMARY", "false").lower() == "true"
)
# If string is passed, map with optional summary based on flag/env var
if reasoning_effort == "none":
return Reasoning(effort="none", summary="detailed") # type: ignore
return Reasoning(effort="none", summary="detailed") if auto_summary_enabled else Reasoning(effort="none") # type: ignore
elif reasoning_effort == "high":
return Reasoning(effort="high", summary="detailed")
return Reasoning(effort="high", summary="detailed") if auto_summary_enabled else Reasoning(effort="high")
elif reasoning_effort == "xhigh":
return Reasoning(effort="xhigh", summary="detailed") # type: ignore[typeddict-item]
return Reasoning(effort="xhigh", summary="detailed") if auto_summary_enabled else Reasoning(effort="xhigh") # type: ignore[typeddict-item]
elif reasoning_effort == "medium":
return Reasoning(effort="medium", summary="detailed")
return Reasoning(effort="medium", summary="detailed") if auto_summary_enabled else Reasoning(effort="medium")
elif reasoning_effort == "low":
return Reasoning(effort="low", summary="detailed")
return Reasoning(effort="low", summary="detailed") if auto_summary_enabled else Reasoning(effort="low")
elif reasoning_effort == "minimal":
return Reasoning(effort="minimal", summary="detailed")
return Reasoning(effort="minimal", summary="detailed") if auto_summary_enabled else Reasoning(effort="minimal")
return None
def _transform_response_format_to_text_format(
File diff suppressed because it is too large Load Diff
@@ -1011,39 +1011,87 @@ def test_multiple_tool_calls_in_single_choice():
def test_map_reasoning_effort_adds_summary_detailed():
"""
Test that _map_reasoning_effort adds summary="detailed" when user provides reasoning_effort as a string.
Test that _map_reasoning_effort behavior with reasoning_auto_summary flag.
This ensures that when users pass reasoning_effort in the completions API for OpenAI responses/models,
the transformation automatically includes summary="detailed" in the reasoning parameter.
By default (flag=False), summary should NOT be added to avoid:
1. Breaking for users without verified OpenAI orgs (400 errors)
2. Making requests more expensive by including summary reasoning tokens
When flag is enabled (flag=True or env var), summary="detailed" is added.
"""
import os
import litellm
from litellm.completion_extras.litellm_responses_transformation.transformation import (
LiteLLMResponsesTransformationHandler,
)
handler = LiteLLMResponsesTransformationHandler()
# Test all string effort levels
# Test all string effort levels - DEFAULT BEHAVIOR (no summary)
effort_levels = ["none", "low", "medium", "high", "xhigh", "minimal"]
for effort in effort_levels:
result = handler._map_reasoning_effort(effort)
# Save original flag value
original_flag = litellm.reasoning_auto_summary
original_env = os.environ.get("LITELLM_REASONING_AUTO_SUMMARY")
try:
# Test 1: Default behavior (flag=False, no env var) - NO summary
litellm.reasoning_auto_summary = False
if "LITELLM_REASONING_AUTO_SUMMARY" in os.environ:
del os.environ["LITELLM_REASONING_AUTO_SUMMARY"]
assert result is not None, f"Result should not be None for effort={effort}"
assert result["effort"] == effort, f"Effort should be {effort}"
assert result["summary"] == "detailed", f"Summary should be 'detailed' for effort={effort}"
for effort in effort_levels:
result = handler._map_reasoning_effort(effort)
assert result is not None, f"Result should not be None for effort={effort}"
assert result["effort"] == effort, f"Effort should be {effort}"
assert "summary" not in result, f"Summary should NOT be present by default for effort={effort}"
print(f"✓ reasoning_effort='{effort}' correctly maps to effort='{effort}' (no summary by default)")
print(f"✓ reasoning_effort='{effort}' correctly maps to effort='{effort}', summary='detailed'")
# Test 2: With flag enabled - summary IS added
litellm.reasoning_auto_summary = True
for effort in effort_levels:
result = handler._map_reasoning_effort(effort)
assert result is not None, f"Result should not be None for effort={effort}"
assert result["effort"] == effort, f"Effort should be {effort}"
assert result["summary"] == "detailed", f"Summary should be 'detailed' when flag is enabled for effort={effort}"
print(f"✓ reasoning_effort='{effort}' correctly maps to effort='{effort}', summary='detailed' (flag enabled)")
# Test 3: With env var enabled (flag disabled) - summary IS added
litellm.reasoning_auto_summary = False
os.environ["LITELLM_REASONING_AUTO_SUMMARY"] = "true"
result = handler._map_reasoning_effort("high")
assert result["summary"] == "detailed", "Summary should be 'detailed' when env var is enabled"
print("✓ LITELLM_REASONING_AUTO_SUMMARY env var works correctly")
# Test 4: Dict input is passed through as-is (no modification)
litellm.reasoning_auto_summary = False
if "LITELLM_REASONING_AUTO_SUMMARY" in os.environ:
del os.environ["LITELLM_REASONING_AUTO_SUMMARY"]
dict_input = {"effort": "high", "summary": "custom_summary"}
result_dict = handler._map_reasoning_effort(dict_input)
assert result_dict["effort"] == "high"
assert result_dict["summary"] == "custom_summary"
print("✓ Dict input is passed through without modification")
# Test 5: None/unknown values return None
result_unknown = handler._map_reasoning_effort("unknown_value")
assert result_unknown is None
print("✓ Unknown reasoning_effort values return None")
print("✓ All reasoning_effort behaviors work correctly with flag/env var control")
# Test that dict input is passed through as-is (no modification)
dict_input = {"effort": "high", "summary": "custom_summary"}
result_dict = handler._map_reasoning_effort(dict_input)
assert result_dict["effort"] == "high"
assert result_dict["summary"] == "custom_summary"
print("✓ Dict input is passed through without modification")
# Test that None/unknown values return None
result_unknown = handler._map_reasoning_effort("unknown_value")
assert result_unknown is None
print("✓ Unknown reasoning_effort values return None")
print("✓ All reasoning_effort string values correctly map to summary='detailed'")
finally:
# Restore original values
litellm.reasoning_auto_summary = original_flag
if original_env is not None:
os.environ["LITELLM_REASONING_AUTO_SUMMARY"] = original_env
elif "LITELLM_REASONING_AUTO_SUMMARY" in os.environ:
del os.environ["LITELLM_REASONING_AUTO_SUMMARY"]