From 68d67212cdc11aed3bbfcffc4dddeffd95001aa6 Mon Sep 17 00:00:00 2001
From: Yassin Kortam <yassin@berri.ai>
Date: Sat, 6 Jun 2026 14:57:41 -0700
Subject: [PATCH] fix: 400 on Anthropic context overflow; seed identity on
 failed auth (#29848)

---
 litellm/integrations/otel/logger.py           |   9 +-
 .../exception_mapping_utils.py                |   6 +-
 litellm/proxy/auth/auth_exception_handler.py  |  30 ++++-
 litellm/proxy/auth/user_api_key_auth.py       |   2 +
 .../test_exception_mapping_utils.py           |  27 ++++
 .../proxy/auth/test_auth_exception_handler.py | 117 +++++++++++++++++-
 6 files changed, 182 insertions(+), 9 deletions(-)

diff --git a/litellm/integrations/otel/logger.py b/litellm/integrations/otel/logger.py
index 57738c356f..5e683ce7b9 100644
--- a/litellm/integrations/otel/logger.py
+++ b/litellm/integrations/otel/logger.py
@@ -15,6 +15,7 @@ from litellm.integrations.otel.model.baggage import promoted_baggage
 from litellm.integrations.otel.model.config import OpenTelemetryV2Config
 from litellm.integrations.otel.plumbing.context import (
     is_recordable_span,
+    request_root_span,
     resolve_parent_context,
     resolve_request_span_context,
     set_request_baggage,
@@ -435,8 +436,12 @@ class OpenTelemetryV2(CustomLogger):
                 attach(set_request_baggage(bag, context=get_current()))
                 # The server span was started by the instrumentor before this ran,
                 # so the Baggage processor (which only fires at span start) won't
-                # backfill it — stamp identity on it directly.
-                server_span = get_current_span()
+                # backfill it — stamp identity on it directly. Prefer the anchored
+                # root span over the ambient one so identity still lands on the
+                # server span when seeding from inside the live ``auth`` phase span
+                # (the auth-failure path), where ``get_current_span`` is the phase
+                # span, not the request's root.
+                server_span = request_root_span() or get_current_span()
                 if is_recordable_span(server_span):
                     # Re-capture the anchor here too: this runs post-auth with the
                     # server span active and covers entrypoints that bypass
diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py
index 95658d0876..ffaa514091 100644
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@@ -655,7 +655,11 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                 custom_llm_provider == "anthropic"
                 or custom_llm_provider == "anthropic_text"
             ):  # one of the anthropics
-                if "prompt is too long" in error_str or "prompt: length" in error_str:
+                if (
+                    "prompt is too long" in error_str
+                    or "prompt: length" in error_str
+                    or ExceptionCheckers.is_error_str_context_window_exceeded(error_str)
+                ):
                     exception_mapping_worked = True
                     raise ContextWindowExceededError(
                         message="AnthropicError - {}".format(error_str),
diff --git a/litellm/proxy/auth/auth_exception_handler.py b/litellm/proxy/auth/auth_exception_handler.py
index 431db4254e..e06ac76023 100644
--- a/litellm/proxy/auth/auth_exception_handler.py
+++ b/litellm/proxy/auth/auth_exception_handler.py
@@ -14,6 +14,7 @@ from litellm.proxy._types import (
     ProxyException,
     UserAPIKeyAuth,
 )
+from litellm.integrations.otel.runtime import seed_request_identity
 from litellm.proxy.auth.auth_utils import _get_request_ip_address
 from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler
 from litellm.types.services import ServiceTypes
@@ -41,6 +42,7 @@ class UserAPIKeyAuthExceptionHandler:
         route: str,
         parent_otel_span: Optional[Span],
         api_key: str,
+        resolved_identity: Optional[UserAPIKeyAuth] = None,
     ) -> UserAPIKeyAuth:
         """
         Handles Connection Errors when reading a Virtual Key from LiteLLM DB
@@ -100,12 +102,30 @@ class UserAPIKeyAuthExceptionHandler:
                 extra={"requester_ip": requester_ip},
             )
 
-            # Log this exception to OTEL, Datadog etc
-            user_api_key_dict = UserAPIKeyAuth(
-                parent_otel_span=parent_otel_span,
-                api_key=api_key,
-                request_route=route,
+            # Log this exception to OTEL, Datadog etc. Reuse the identity resolved
+            # before the failure (team alias/id, metadata, user) so the failed span
+            # is labeled — a fresh UserAPIKeyAuth here would drop everything auth had
+            # already looked up (e.g. an expired key whose team/user is known). Copy
+            # so the handler is side-effect-free for the caller's identity object.
+            user_api_key_dict = (
+                resolved_identity.model_copy()
+                if resolved_identity is not None
+                else UserAPIKeyAuth()
             )
+            user_api_key_dict.parent_otel_span = parent_otel_span
+            user_api_key_dict.request_route = route
+            user_api_key_dict.api_key = (
+                user_api_key_dict.api_key or UserAPIKeyAuth(api_key=api_key).api_key
+            )
+
+            # Stamp identity onto the request's server span now, before the request
+            # is rejected; the OTEL failure hooks don't touch the server span, so
+            # without this the failed trace would carry no team/key attributes.
+            seed_request_identity(
+                user_api_key_dict,
+                model=request_data.get("model"),
+            )
+
             # Allow callbacks to transform the error response
             transformed_exception = await proxy_logging_obj.post_call_failure_hook(
                 request_data=request_data,
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
index 92765fc8ea..a5501fefa4 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@@ -2070,6 +2070,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
             route=route,
             parent_otel_span=parent_otel_span,
             api_key=api_key,
+            resolved_identity=valid_token,
         )
 
 
@@ -2558,6 +2559,7 @@ async def user_api_key_auth(
                 route=route,
                 parent_otel_span=user_api_key_auth_obj.parent_otel_span,
                 api_key=api_key,
+                resolved_identity=user_api_key_auth_obj,
             )
 
         # Defense-in-depth: ``_user_api_key_auth_builder`` has multiple early-return
diff --git a/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py b/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py
index c768e8b6b1..7dab0e0262 100644
--- a/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py
+++ b/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py
@@ -286,6 +286,33 @@ def test_lemonade_context_window_error_mapping():
     assert excinfo.value.model == model
 
 
+@pytest.mark.parametrize(
+    "error_message",
+    [
+        "AnthropicException - prompt is too long: 250000 tokens > 200000 maximum",
+        "AnthropicException - input length and max_tokens exceed context limit: "
+        "200000 + 8000 > 200000, decrease input length or max_tokens and try again",
+    ],
+)
+def test_anthropic_context_window_error_mapping(error_message):
+    """Anthropic context-window overflows (input too long, or input + max_tokens
+    over the context limit) must map to ContextWindowExceededError (400) even when
+    the upstream exception carries no ``status_code`` attribute. Previously only
+    "prompt is too long" was special-cased, so the "exceed context limit" phrasing
+    fell through to a generic APIConnectionError (500)."""
+    original_exception = Exception(error_message)
+
+    with pytest.raises(litellm.ContextWindowExceededError) as excinfo:
+        exception_type(
+            model="claude-sonnet-4-5",
+            original_exception=original_exception,
+            custom_llm_provider="anthropic",
+        )
+
+    assert excinfo.value.status_code == 400
+    assert excinfo.value.llm_provider == "anthropic"
+
+
 # Test cases for Vertex AI RateLimitError mapping
 # As per https://github.com/BerriAI/litellm/issues/16189
 vertex_rate_limit_test_cases = [
diff --git a/tests/test_litellm/proxy/auth/test_auth_exception_handler.py b/tests/test_litellm/proxy/auth/test_auth_exception_handler.py
index 4ccde85dae..27f6015e6f 100644
--- a/tests/test_litellm/proxy/auth/test_auth_exception_handler.py
+++ b/tests/test_litellm/proxy/auth/test_auth_exception_handler.py
@@ -25,7 +25,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import ProxyErrorTypes, ProxyException
+from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
 from litellm.proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler
 
 
@@ -183,3 +183,118 @@ async def test_route_passed_to_post_call_failure_hook():
             mock_post_call_failure_hook.assert_called_once()
             call_args = mock_post_call_failure_hook.call_args[1]
             assert call_args["user_api_key_dict"].request_route == test_route
+
+
+@pytest.mark.asyncio
+async def test_resolved_identity_exported_on_auth_failure():
+    """Regression: when auth fails AFTER the key/team/user identity is resolved
+    (e.g. an expired key), that identity must still reach the failure logging /
+    span instead of being dropped for a blank UserAPIKeyAuth. Before the fix the
+    handler built a fresh empty object, so the failed trace showed no team alias,
+    team id, or metadata."""
+    handler = UserAPIKeyAuthExceptionHandler()
+
+    resolved_identity = UserAPIKeyAuth(
+        token="hashed-token",
+        team_id="team-123",
+        team_alias="acme-team",
+        user_id="user-456",
+        metadata={"foo": "bar"},
+        team_metadata={"baz": "qux"},
+    )
+
+    expired_key_error = ProxyException(
+        message="Authentication Error - Expired Key.",
+        type=ProxyErrorTypes.expired_key,
+        param="sk-...",
+        code=status.HTTP_401_UNAUTHORIZED,
+    )
+
+    seeded = {}
+
+    def _capture_seed(user_api_key_dict, model=None):
+        seeded["dict"] = user_api_key_dict
+        seeded["model"] = model
+
+    with (
+        patch(
+            "litellm.proxy.auth.auth_exception_handler.seed_request_identity",
+            side_effect=_capture_seed,
+        ) as mock_seed,
+        patch(
+            "litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
+            new_callable=AsyncMock,
+        ) as mock_hook,
+        patch(
+            "litellm.proxy.proxy_server.general_settings",
+            {"allow_requests_on_db_unavailable": False},
+        ),
+    ):
+        with pytest.raises(ProxyException):
+            await handler._handle_authentication_error(
+                expired_key_error,
+                MagicMock(),
+                {"model": "gpt-4o"},
+                "/v1/chat/completions",
+                None,
+                "sk-raw-key",
+                resolved_identity=resolved_identity,
+            )
+
+    # The identity that auth already resolved is what gets logged on failure.
+    logged = mock_hook.call_args[1]["user_api_key_dict"]
+    assert logged.team_id == "team-123"
+    assert logged.team_alias == "acme-team"
+    assert logged.user_id == "user-456"
+    assert logged.metadata == {"foo": "bar"}
+    assert logged.team_metadata == {"baz": "qux"}
+    assert logged.request_route == "/v1/chat/completions"
+
+    # And it is stamped onto the span eagerly, before the request is rejected.
+    mock_seed.assert_called_once()
+    assert seeded["dict"] is logged
+    assert seeded["dict"].team_alias == "acme-team"
+    assert seeded["model"] == "gpt-4o"
+
+
+@pytest.mark.asyncio
+async def test_auth_failure_without_resolved_identity_still_logs():
+    """When auth fails before any identity is resolved (e.g. an unknown key),
+    the handler must still log a usable object carrying the raw api key and
+    route, not crash on the missing identity."""
+    handler = UserAPIKeyAuthExceptionHandler()
+
+    with (
+        patch(
+            "litellm.proxy.auth.auth_exception_handler.seed_request_identity",
+        ),
+        patch(
+            "litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
+            new_callable=AsyncMock,
+        ) as mock_hook,
+        patch(
+            "litellm.proxy.proxy_server.general_settings",
+            {"allow_requests_on_db_unavailable": False},
+        ),
+    ):
+        with pytest.raises(ProxyException):
+            await handler._handle_authentication_error(
+                ProxyException(
+                    message="Invalid API key",
+                    type=ProxyErrorTypes.auth_error,
+                    param=None,
+                    code=status.HTTP_401_UNAUTHORIZED,
+                ),
+                MagicMock(),
+                {},
+                "/v1/chat/completions",
+                None,
+                "sk-unknown",
+            )
+
+    logged = mock_hook.call_args[1]["user_api_key_dict"]
+    # Raw key must NOT land on the object — it would be promoted into telemetry
+    # as litellm.api_key.hash and leak a real sk-... to anyone reading the trace.
+    assert logged.api_key != "sk-unknown"
+    assert logged.api_key == UserAPIKeyAuth(api_key="sk-unknown").api_key
+    assert logged.request_route == "/v1/chat/completions"