From 68d67212cdc11aed3bbfcffc4dddeffd95001aa6 Mon Sep 17 00:00:00 2001 From: Yassin Kortam Date: Sat, 6 Jun 2026 14:57:41 -0700 Subject: [PATCH] fix: 400 on Anthropic context overflow; seed identity on failed auth (#29848) --- litellm/integrations/otel/logger.py | 9 +- .../exception_mapping_utils.py | 6 +- litellm/proxy/auth/auth_exception_handler.py | 30 ++++- litellm/proxy/auth/user_api_key_auth.py | 2 + .../test_exception_mapping_utils.py | 27 ++++ .../proxy/auth/test_auth_exception_handler.py | 117 +++++++++++++++++- 6 files changed, 182 insertions(+), 9 deletions(-) diff --git a/litellm/integrations/otel/logger.py b/litellm/integrations/otel/logger.py index 57738c356f..5e683ce7b9 100644 --- a/litellm/integrations/otel/logger.py +++ b/litellm/integrations/otel/logger.py @@ -15,6 +15,7 @@ from litellm.integrations.otel.model.baggage import promoted_baggage from litellm.integrations.otel.model.config import OpenTelemetryV2Config from litellm.integrations.otel.plumbing.context import ( is_recordable_span, + request_root_span, resolve_parent_context, resolve_request_span_context, set_request_baggage, @@ -435,8 +436,12 @@ class OpenTelemetryV2(CustomLogger): attach(set_request_baggage(bag, context=get_current())) # The server span was started by the instrumentor before this ran, # so the Baggage processor (which only fires at span start) won't - # backfill it — stamp identity on it directly. - server_span = get_current_span() + # backfill it — stamp identity on it directly. Prefer the anchored + # root span over the ambient one so identity still lands on the + # server span when seeding from inside the live ``auth`` phase span + # (the auth-failure path), where ``get_current_span`` is the phase + # span, not the request's root. + server_span = request_root_span() or get_current_span() if is_recordable_span(server_span): # Re-capture the anchor here too: this runs post-auth with the # server span active and covers entrypoints that bypass diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py index 95658d0876..ffaa514091 100644 --- a/litellm/litellm_core_utils/exception_mapping_utils.py +++ b/litellm/litellm_core_utils/exception_mapping_utils.py @@ -655,7 +655,11 @@ def exception_type( # type: ignore # noqa: PLR0915 custom_llm_provider == "anthropic" or custom_llm_provider == "anthropic_text" ): # one of the anthropics - if "prompt is too long" in error_str or "prompt: length" in error_str: + if ( + "prompt is too long" in error_str + or "prompt: length" in error_str + or ExceptionCheckers.is_error_str_context_window_exceeded(error_str) + ): exception_mapping_worked = True raise ContextWindowExceededError( message="AnthropicError - {}".format(error_str), diff --git a/litellm/proxy/auth/auth_exception_handler.py b/litellm/proxy/auth/auth_exception_handler.py index 431db4254e..e06ac76023 100644 --- a/litellm/proxy/auth/auth_exception_handler.py +++ b/litellm/proxy/auth/auth_exception_handler.py @@ -14,6 +14,7 @@ from litellm.proxy._types import ( ProxyException, UserAPIKeyAuth, ) +from litellm.integrations.otel.runtime import seed_request_identity from litellm.proxy.auth.auth_utils import _get_request_ip_address from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler from litellm.types.services import ServiceTypes @@ -41,6 +42,7 @@ class UserAPIKeyAuthExceptionHandler: route: str, parent_otel_span: Optional[Span], api_key: str, + resolved_identity: Optional[UserAPIKeyAuth] = None, ) -> UserAPIKeyAuth: """ Handles Connection Errors when reading a Virtual Key from LiteLLM DB @@ -100,12 +102,30 @@ class UserAPIKeyAuthExceptionHandler: extra={"requester_ip": requester_ip}, ) - # Log this exception to OTEL, Datadog etc - user_api_key_dict = UserAPIKeyAuth( - parent_otel_span=parent_otel_span, - api_key=api_key, - request_route=route, + # Log this exception to OTEL, Datadog etc. Reuse the identity resolved + # before the failure (team alias/id, metadata, user) so the failed span + # is labeled — a fresh UserAPIKeyAuth here would drop everything auth had + # already looked up (e.g. an expired key whose team/user is known). Copy + # so the handler is side-effect-free for the caller's identity object. + user_api_key_dict = ( + resolved_identity.model_copy() + if resolved_identity is not None + else UserAPIKeyAuth() ) + user_api_key_dict.parent_otel_span = parent_otel_span + user_api_key_dict.request_route = route + user_api_key_dict.api_key = ( + user_api_key_dict.api_key or UserAPIKeyAuth(api_key=api_key).api_key + ) + + # Stamp identity onto the request's server span now, before the request + # is rejected; the OTEL failure hooks don't touch the server span, so + # without this the failed trace would carry no team/key attributes. + seed_request_identity( + user_api_key_dict, + model=request_data.get("model"), + ) + # Allow callbacks to transform the error response transformed_exception = await proxy_logging_obj.post_call_failure_hook( request_data=request_data, diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index 92765fc8ea..a5501fefa4 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -2070,6 +2070,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915 route=route, parent_otel_span=parent_otel_span, api_key=api_key, + resolved_identity=valid_token, ) @@ -2558,6 +2559,7 @@ async def user_api_key_auth( route=route, parent_otel_span=user_api_key_auth_obj.parent_otel_span, api_key=api_key, + resolved_identity=user_api_key_auth_obj, ) # Defense-in-depth: ``_user_api_key_auth_builder`` has multiple early-return diff --git a/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py b/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py index c768e8b6b1..7dab0e0262 100644 --- a/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py +++ b/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py @@ -286,6 +286,33 @@ def test_lemonade_context_window_error_mapping(): assert excinfo.value.model == model +@pytest.mark.parametrize( + "error_message", + [ + "AnthropicException - prompt is too long: 250000 tokens > 200000 maximum", + "AnthropicException - input length and max_tokens exceed context limit: " + "200000 + 8000 > 200000, decrease input length or max_tokens and try again", + ], +) +def test_anthropic_context_window_error_mapping(error_message): + """Anthropic context-window overflows (input too long, or input + max_tokens + over the context limit) must map to ContextWindowExceededError (400) even when + the upstream exception carries no ``status_code`` attribute. Previously only + "prompt is too long" was special-cased, so the "exceed context limit" phrasing + fell through to a generic APIConnectionError (500).""" + original_exception = Exception(error_message) + + with pytest.raises(litellm.ContextWindowExceededError) as excinfo: + exception_type( + model="claude-sonnet-4-5", + original_exception=original_exception, + custom_llm_provider="anthropic", + ) + + assert excinfo.value.status_code == 400 + assert excinfo.value.llm_provider == "anthropic" + + # Test cases for Vertex AI RateLimitError mapping # As per https://github.com/BerriAI/litellm/issues/16189 vertex_rate_limit_test_cases = [ diff --git a/tests/test_litellm/proxy/auth/test_auth_exception_handler.py b/tests/test_litellm/proxy/auth/test_auth_exception_handler.py index 4ccde85dae..27f6015e6f 100644 --- a/tests/test_litellm/proxy/auth/test_auth_exception_handler.py +++ b/tests/test_litellm/proxy/auth/test_auth_exception_handler.py @@ -25,7 +25,7 @@ sys.path.insert( ) # Adds the parent directory to the system path from litellm._logging import verbose_proxy_logger -from litellm.proxy._types import ProxyErrorTypes, ProxyException +from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth from litellm.proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler @@ -183,3 +183,118 @@ async def test_route_passed_to_post_call_failure_hook(): mock_post_call_failure_hook.assert_called_once() call_args = mock_post_call_failure_hook.call_args[1] assert call_args["user_api_key_dict"].request_route == test_route + + +@pytest.mark.asyncio +async def test_resolved_identity_exported_on_auth_failure(): + """Regression: when auth fails AFTER the key/team/user identity is resolved + (e.g. an expired key), that identity must still reach the failure logging / + span instead of being dropped for a blank UserAPIKeyAuth. Before the fix the + handler built a fresh empty object, so the failed trace showed no team alias, + team id, or metadata.""" + handler = UserAPIKeyAuthExceptionHandler() + + resolved_identity = UserAPIKeyAuth( + token="hashed-token", + team_id="team-123", + team_alias="acme-team", + user_id="user-456", + metadata={"foo": "bar"}, + team_metadata={"baz": "qux"}, + ) + + expired_key_error = ProxyException( + message="Authentication Error - Expired Key.", + type=ProxyErrorTypes.expired_key, + param="sk-...", + code=status.HTTP_401_UNAUTHORIZED, + ) + + seeded = {} + + def _capture_seed(user_api_key_dict, model=None): + seeded["dict"] = user_api_key_dict + seeded["model"] = model + + with ( + patch( + "litellm.proxy.auth.auth_exception_handler.seed_request_identity", + side_effect=_capture_seed, + ) as mock_seed, + patch( + "litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook", + new_callable=AsyncMock, + ) as mock_hook, + patch( + "litellm.proxy.proxy_server.general_settings", + {"allow_requests_on_db_unavailable": False}, + ), + ): + with pytest.raises(ProxyException): + await handler._handle_authentication_error( + expired_key_error, + MagicMock(), + {"model": "gpt-4o"}, + "/v1/chat/completions", + None, + "sk-raw-key", + resolved_identity=resolved_identity, + ) + + # The identity that auth already resolved is what gets logged on failure. + logged = mock_hook.call_args[1]["user_api_key_dict"] + assert logged.team_id == "team-123" + assert logged.team_alias == "acme-team" + assert logged.user_id == "user-456" + assert logged.metadata == {"foo": "bar"} + assert logged.team_metadata == {"baz": "qux"} + assert logged.request_route == "/v1/chat/completions" + + # And it is stamped onto the span eagerly, before the request is rejected. + mock_seed.assert_called_once() + assert seeded["dict"] is logged + assert seeded["dict"].team_alias == "acme-team" + assert seeded["model"] == "gpt-4o" + + +@pytest.mark.asyncio +async def test_auth_failure_without_resolved_identity_still_logs(): + """When auth fails before any identity is resolved (e.g. an unknown key), + the handler must still log a usable object carrying the raw api key and + route, not crash on the missing identity.""" + handler = UserAPIKeyAuthExceptionHandler() + + with ( + patch( + "litellm.proxy.auth.auth_exception_handler.seed_request_identity", + ), + patch( + "litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook", + new_callable=AsyncMock, + ) as mock_hook, + patch( + "litellm.proxy.proxy_server.general_settings", + {"allow_requests_on_db_unavailable": False}, + ), + ): + with pytest.raises(ProxyException): + await handler._handle_authentication_error( + ProxyException( + message="Invalid API key", + type=ProxyErrorTypes.auth_error, + param=None, + code=status.HTTP_401_UNAUTHORIZED, + ), + MagicMock(), + {}, + "/v1/chat/completions", + None, + "sk-unknown", + ) + + logged = mock_hook.call_args[1]["user_api_key_dict"] + # Raw key must NOT land on the object — it would be promoted into telemetry + # as litellm.api_key.hash and leak a real sk-... to anyone reading the trace. + assert logged.api_key != "sk-unknown" + assert logged.api_key == UserAPIKeyAuth(api_key="sk-unknown").api_key + assert logged.request_route == "/v1/chat/completions"