mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 10:47:56 +00:00
feat(anthropic,bedrock): strip output_config under drop_params for non-effort models
When a proxy fronts Claude Code (which always sends `output_config.effort`) at a pre-4.5 Anthropic model — haiku-3, sonnet-3.5, opus-3, sonnet-4 — the forwarded knob causes a forced 400 the client can't fix. Gating a strip behind the existing `drop_params` flag lets operators opt into silent fixup once and stop worrying about per-model param hygiene. Default (`drop_params=False`) still forwards and surfaces the provider's error, preserving the strict, debuggable contract from #27074. Per https://platform.claude.com/docs/en/build-with-claude/effort the supporting set is Opus 4.5+, Sonnet 4.6+, and Mythos Preview; everything else is dropped (with a verbose_logger warning so the strip is visible). Recognition uses model-name patterns plus a fallback to any `supports_*_reasoning_effort` flag in the model map for forward compatibility with new entries. https://claude.ai/code/session_01WjHq31rvXT6xYNdVmSJvRp
This commit is contained in:
@@ -1,7 +1,17 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
ClassVar,
|
||||
Dict,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -227,6 +237,52 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# Per https://platform.claude.com/docs/en/build-with-claude/effort the
|
||||
# ``output_config.effort`` parameter is supported on Opus 4.5+, Sonnet 4.6+
|
||||
# and Mythos Preview. Older Claude models (haiku-3, sonnet-3.5, opus-3,
|
||||
# sonnet-4, ...) reject it with a 400. The patterns below let us recognize
|
||||
# the supporting families regardless of route prefix (``anthropic.``,
|
||||
# ``us.anthropic.``, ``vertex_ai/``, ``azure_ai/``, ...).
|
||||
_EFFORT_SUPPORTING_MODEL_PATTERNS: ClassVar[Tuple[str, ...]] = (
|
||||
"opus-4-5",
|
||||
"opus_4_5",
|
||||
"opus-4.5",
|
||||
"opus_4.5",
|
||||
"opus-4-6",
|
||||
"opus_4_6",
|
||||
"opus-4.6",
|
||||
"opus_4.6",
|
||||
"opus-4-7",
|
||||
"opus_4_7",
|
||||
"opus-4.7",
|
||||
"opus_4.7",
|
||||
"sonnet-4-6",
|
||||
"sonnet_4_6",
|
||||
"sonnet-4.6",
|
||||
"sonnet_4.6",
|
||||
"mythos",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _model_supports_effort_param(model: str) -> bool:
|
||||
"""Whether the model accepts ``output_config.effort`` at all.
|
||||
|
||||
Used to decide whether to strip ``output_config`` for known-incompatible
|
||||
models when ``drop_params`` is set. New models that land in
|
||||
``model_prices_and_context_window.json`` with a ``supports_*_reasoning_effort``
|
||||
flag are auto-recognized; otherwise we fall back to the documented
|
||||
family patterns above.
|
||||
"""
|
||||
model_lower = model.lower()
|
||||
if any(
|
||||
p in model_lower for p in AnthropicConfig._EFFORT_SUPPORTING_MODEL_PATTERNS
|
||||
):
|
||||
return True
|
||||
for level in ("low", "minimal", "medium", "high", "xhigh", "max"):
|
||||
if AnthropicConfig._supports_effort_level(model, level):
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_supported_openai_params(self, model: str):
|
||||
params = [
|
||||
"stream",
|
||||
@@ -1563,6 +1619,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
output_config = optional_params.get("output_config")
|
||||
if not output_config or not isinstance(output_config, dict):
|
||||
return
|
||||
# When ``drop_params`` is set, strip ``output_config`` for models that
|
||||
# cannot accept it (e.g. proxy fronting Claude Code at haiku-3, where
|
||||
# the client always sends effort but the model rejects it). The user
|
||||
# opted into silent fixup via the global flag — log a warning so the
|
||||
# strip is still visible in logs.
|
||||
if litellm.drop_params is True and not self._model_supports_effort_param(model):
|
||||
litellm.verbose_logger.warning(
|
||||
"Dropping unsupported `output_config` for model=%s "
|
||||
"(drop_params=True). Effort is only supported on Opus 4.5+, "
|
||||
"Sonnet 4.6+, and Mythos Preview.",
|
||||
model,
|
||||
)
|
||||
optional_params.pop("output_config", None)
|
||||
data.pop("output_config", None)
|
||||
return
|
||||
effort = output_config.get("effort")
|
||||
# ``effort=""`` (empty string) and unmapped strings should be treated
|
||||
# as invalid, not silently passed through. We use ``effort is not None``
|
||||
|
||||
@@ -1371,10 +1371,26 @@ class AmazonConverseConfig(BaseConfig):
|
||||
):
|
||||
base_model = BedrockModelInfo.get_base_model(model)
|
||||
if base_model.startswith("anthropic"):
|
||||
effort = anthropic_output_config.get("effort")
|
||||
if effort is not None:
|
||||
self._validate_anthropic_adaptive_effort(model=model, effort=effort)
|
||||
additional_request_params["output_config"] = anthropic_output_config
|
||||
# When ``drop_params`` is set, strip for models that don't
|
||||
# accept effort (e.g. proxy routing Claude Code at haiku-3).
|
||||
# Otherwise forward and let Bedrock surface the model's error.
|
||||
if (
|
||||
litellm.drop_params is True
|
||||
and not AnthropicConfig._model_supports_effort_param(model)
|
||||
):
|
||||
litellm.verbose_logger.warning(
|
||||
"Dropping unsupported `output_config` for model=%s "
|
||||
"(drop_params=True). Effort is only supported on "
|
||||
"Opus 4.5+, Sonnet 4.6+, and Mythos Preview.",
|
||||
model,
|
||||
)
|
||||
else:
|
||||
effort = anthropic_output_config.get("effort")
|
||||
if effort is not None:
|
||||
self._validate_anthropic_adaptive_effort(
|
||||
model=model, effort=effort
|
||||
)
|
||||
additional_request_params["output_config"] = anthropic_output_config
|
||||
|
||||
return (
|
||||
inference_params,
|
||||
|
||||
+19
@@ -12,9 +12,11 @@ from typing import (
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.anthropic_beta_headers_manager import filter_and_transform_beta_headers
|
||||
from litellm.constants import BEDROCK_MIN_THINKING_BUDGET_TOKENS
|
||||
from litellm.litellm_core_utils.litellm_logging import verbose_logger
|
||||
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
|
||||
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages.transformation import (
|
||||
AnthropicMessagesConfig,
|
||||
@@ -580,6 +582,23 @@ class AmazonAnthropicClaudeMessagesConfig(
|
||||
if filtered_betas:
|
||||
anthropic_messages_request["anthropic_beta"] = filtered_betas
|
||||
|
||||
# 6a. When ``drop_params`` is set, strip ``output_config`` for models
|
||||
# that don't accept it (e.g. proxy fronting Claude Code at haiku-3).
|
||||
# Without this, every Claude Code request to a pre-4.5 Anthropic model
|
||||
# routes a forced 400 from Bedrock that the client can't fix.
|
||||
if (
|
||||
litellm.drop_params is True
|
||||
and "output_config" in anthropic_messages_request
|
||||
and not AnthropicConfig._model_supports_effort_param(model)
|
||||
):
|
||||
verbose_logger.warning(
|
||||
"Dropping unsupported `output_config` for model=%s "
|
||||
"(drop_params=True). Effort is only supported on Opus 4.5+, "
|
||||
"Sonnet 4.6+, and Mythos Preview.",
|
||||
model,
|
||||
)
|
||||
anthropic_messages_request.pop("output_config", None)
|
||||
|
||||
# 7. Final safety net: filter top-level fields to the Bedrock Invoke allowlist.
|
||||
# Catches Anthropic-only extensions (context_management, output_config, speed,
|
||||
# mcp_servers, ...) and any future additions Claude Code may start sending.
|
||||
|
||||
@@ -1748,6 +1748,104 @@ def test_effort_with_other_features():
|
||||
assert "thinking" in result
|
||||
|
||||
|
||||
def test_anthropic_drop_params_strips_output_config_for_pre_4_5_models():
|
||||
"""
|
||||
Proxies fronting Claude Code at pre-4.5 Anthropic models receive
|
||||
``output_config`` injected by the client; without ``drop_params`` Bedrock /
|
||||
Anthropic 400s. With ``drop_params=True`` we strip it (logged) so the
|
||||
request can succeed.
|
||||
"""
|
||||
config = AnthropicConfig()
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
|
||||
original = litellm.drop_params
|
||||
litellm.drop_params = True
|
||||
try:
|
||||
result = config.transform_request(
|
||||
model="claude-3-haiku-20240307",
|
||||
messages=messages,
|
||||
optional_params={"output_config": {"effort": "low"}},
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
finally:
|
||||
litellm.drop_params = original
|
||||
|
||||
assert "output_config" not in result
|
||||
|
||||
|
||||
def test_anthropic_drop_params_keeps_output_config_for_supporting_models():
|
||||
"""``drop_params=True`` must not strip on models that support effort."""
|
||||
config = AnthropicConfig()
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
|
||||
original = litellm.drop_params
|
||||
litellm.drop_params = True
|
||||
try:
|
||||
result = config.transform_request(
|
||||
model="claude-opus-4-7",
|
||||
messages=messages,
|
||||
optional_params={"output_config": {"effort": "high"}},
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
finally:
|
||||
litellm.drop_params = original
|
||||
|
||||
assert result.get("output_config") == {"effort": "high"}
|
||||
|
||||
|
||||
def test_anthropic_drop_params_false_forwards_to_unsupported_model():
|
||||
"""
|
||||
Default behavior: forward ``output_config`` and let the provider 400.
|
||||
This is the contract for users who want strict, debuggable failures.
|
||||
"""
|
||||
config = AnthropicConfig()
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
|
||||
original = litellm.drop_params
|
||||
litellm.drop_params = False
|
||||
try:
|
||||
result = config.transform_request(
|
||||
model="claude-3-haiku-20240307",
|
||||
messages=messages,
|
||||
optional_params={"output_config": {"effort": "low"}},
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
finally:
|
||||
litellm.drop_params = original
|
||||
|
||||
assert result.get("output_config") == {"effort": "low"}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"claude-opus-4-5-20251101",
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4-7",
|
||||
"claude-sonnet-4-6",
|
||||
"claude-mythos-preview",
|
||||
],
|
||||
)
|
||||
def test_anthropic_model_supports_effort_param_recognizes_supporting_models(model):
|
||||
assert AnthropicConfig._model_supports_effort_param(model) is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"claude-3-haiku-20240307",
|
||||
"claude-3-5-sonnet-20241022",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-sonnet-4-20250514",
|
||||
],
|
||||
)
|
||||
def test_anthropic_model_supports_effort_param_rejects_non_supporting_models(model):
|
||||
assert AnthropicConfig._model_supports_effort_param(model) is False
|
||||
|
||||
|
||||
def test_translate_system_message_skips_empty_string_content():
|
||||
"""
|
||||
Test that translate_system_message skips system messages with empty string content.
|
||||
|
||||
@@ -3434,6 +3434,59 @@ def test_transform_request_strips_anthropic_output_config():
|
||||
assert "output_config" not in additional_fields
|
||||
|
||||
|
||||
def test_converse_drop_params_strips_output_config_for_pre_4_5_anthropic():
|
||||
"""``drop_params=True`` strips ``output_config`` for pre-4.5 Anthropic
|
||||
models on Bedrock Converse so a proxy fronting Claude Code at haiku doesn't
|
||||
force a 400 on every request."""
|
||||
config = AmazonConverseConfig()
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
original = litellm.drop_params
|
||||
litellm.drop_params = True
|
||||
try:
|
||||
result = config._transform_request(
|
||||
model="bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
||||
messages=messages,
|
||||
optional_params={
|
||||
"maxTokens": 256,
|
||||
"output_config": {"effort": "low"},
|
||||
},
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
finally:
|
||||
litellm.drop_params = original
|
||||
|
||||
additional = result.get("additionalModelRequestFields", {})
|
||||
assert "output_config" not in additional
|
||||
|
||||
|
||||
def test_converse_drop_params_keeps_output_config_for_supporting_anthropic():
|
||||
"""``drop_params=True`` must not strip on supporting models."""
|
||||
config = AmazonConverseConfig()
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
original = litellm.drop_params
|
||||
litellm.drop_params = True
|
||||
try:
|
||||
result = config._transform_request(
|
||||
model="bedrock/converse/us.anthropic.claude-opus-4-7",
|
||||
messages=messages,
|
||||
optional_params={
|
||||
"maxTokens": 256,
|
||||
"thinking": {"type": "adaptive"},
|
||||
"output_config": {"effort": "high"},
|
||||
},
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
finally:
|
||||
litellm.drop_params = original
|
||||
|
||||
additional = result.get("additionalModelRequestFields", {})
|
||||
assert additional.get("output_config") == {"effort": "high"}
|
||||
|
||||
|
||||
def test_transform_response_native_structured_output():
|
||||
"""Test response handling when model returns JSON as text content (native structured output)."""
|
||||
response_json = {
|
||||
|
||||
+64
@@ -662,6 +662,70 @@ def test_bedrock_messages_forwards_output_config_with_output_format():
|
||||
assert "output_format" not in result
|
||||
|
||||
|
||||
def test_bedrock_messages_drop_params_strips_output_config_for_pre_4_5():
|
||||
"""
|
||||
``drop_params=True`` is the operator opt-in for "silently fix up"
|
||||
behavior. When a proxy fronts Claude Code at a pre-4.5 Anthropic model
|
||||
(haiku-3, sonnet-3.5, ...) on the /v1/messages route, the client always
|
||||
sends ``output_config.effort`` and the model rejects it. Stripping under
|
||||
``drop_params`` lets those requests succeed; otherwise we forward and
|
||||
surface the model's 400 as designed.
|
||||
"""
|
||||
import litellm
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
|
||||
cfg = AmazonAnthropicClaudeMessagesConfig()
|
||||
messages = [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]
|
||||
optional_params = {
|
||||
"max_tokens": 4096,
|
||||
"output_config": {"effort": "low"},
|
||||
}
|
||||
|
||||
original = litellm.drop_params
|
||||
litellm.drop_params = True
|
||||
try:
|
||||
result = cfg.transform_anthropic_messages_request(
|
||||
model="anthropic.claude-3-haiku-20240307-v1:0",
|
||||
messages=messages,
|
||||
anthropic_messages_optional_request_params=optional_params,
|
||||
litellm_params=GenericLiteLLMParams(),
|
||||
headers={},
|
||||
)
|
||||
finally:
|
||||
litellm.drop_params = original
|
||||
|
||||
assert "output_config" not in result
|
||||
|
||||
|
||||
def test_bedrock_messages_drop_params_keeps_output_config_for_4_7():
|
||||
"""``drop_params=True`` must not strip on supporting models — opus-4-7
|
||||
accepts effort, so the client's tier knob has to land on the wire."""
|
||||
import litellm
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
|
||||
cfg = AmazonAnthropicClaudeMessagesConfig()
|
||||
messages = [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]
|
||||
optional_params = {
|
||||
"max_tokens": 4096,
|
||||
"output_config": {"effort": "high"},
|
||||
}
|
||||
|
||||
original = litellm.drop_params
|
||||
litellm.drop_params = True
|
||||
try:
|
||||
result = cfg.transform_anthropic_messages_request(
|
||||
model="anthropic.claude-opus-4-7",
|
||||
messages=messages,
|
||||
anthropic_messages_optional_request_params=optional_params,
|
||||
litellm_params=GenericLiteLLMParams(),
|
||||
headers={},
|
||||
)
|
||||
finally:
|
||||
litellm.drop_params = original
|
||||
|
||||
assert result.get("output_config") == {"effort": "high"}
|
||||
|
||||
|
||||
def test_bedrock_messages_strips_context_management():
|
||||
"""
|
||||
Ensure context_management is stripped from the request before sending to
|
||||
|
||||
Reference in New Issue
Block a user