fix: 400 on Anthropic context overflow; seed identity on failed auth (#29848)

2026-08-02 04:21:34 +00:00 · 2026-06-06 14:57:41 -07:00
parent f1667b9137
commit 68d67212cd
6 changed files with 182 additions and 9 deletions
@@ -15,6 +15,7 @@ from litellm.integrations.otel.model.baggage import promoted_baggage
 from litellm.integrations.otel.model.config import OpenTelemetryV2Config
 from litellm.integrations.otel.plumbing.context import (
    is_recordable_span,
+    request_root_span,
    resolve_parent_context,
    resolve_request_span_context,
    set_request_baggage,
@@ -435,8 +436,12 @@ class OpenTelemetryV2(CustomLogger):
                attach(set_request_baggage(bag, context=get_current()))
                # The server span was started by the instrumentor before this ran,
                # so the Baggage processor (which only fires at span start) won't
-                # backfill it — stamp identity on it directly.
-                server_span = get_current_span()
+                # backfill it — stamp identity on it directly. Prefer the anchored
+                # root span over the ambient one so identity still lands on the
+                # server span when seeding from inside the live ``auth`` phase span
+                # (the auth-failure path), where ``get_current_span`` is the phase
+                # span, not the request's root.
+                server_span = request_root_span() or get_current_span()
                if is_recordable_span(server_span):
                    # Re-capture the anchor here too: this runs post-auth with the
                    # server span active and covers entrypoints that bypass
@@ -655,7 +655,11 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                custom_llm_provider == "anthropic"
                or custom_llm_provider == "anthropic_text"
            ):  # one of the anthropics
-                if "prompt is too long" in error_str or "prompt: length" in error_str:
+                if (
+                    "prompt is too long" in error_str
+                    or "prompt: length" in error_str
+                    or ExceptionCheckers.is_error_str_context_window_exceeded(error_str)
+                ):
                    exception_mapping_worked = True
                    raise ContextWindowExceededError(
                        message="AnthropicError - {}".format(error_str),
@@ -14,6 +14,7 @@ from litellm.proxy._types import (
    ProxyException,
    UserAPIKeyAuth,
 )
+from litellm.integrations.otel.runtime import seed_request_identity
 from litellm.proxy.auth.auth_utils import _get_request_ip_address
 from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler
 from litellm.types.services import ServiceTypes
@@ -41,6 +42,7 @@ class UserAPIKeyAuthExceptionHandler:
        route: str,
        parent_otel_span: Optional[Span],
        api_key: str,
+        resolved_identity: Optional[UserAPIKeyAuth] = None,
    ) -> UserAPIKeyAuth:
        """
        Handles Connection Errors when reading a Virtual Key from LiteLLM DB
@@ -100,12 +102,30 @@ class UserAPIKeyAuthExceptionHandler:
                extra={"requester_ip": requester_ip},
            )

-            # Log this exception to OTEL, Datadog etc
-            user_api_key_dict = UserAPIKeyAuth(
-                parent_otel_span=parent_otel_span,
-                api_key=api_key,
-                request_route=route,
+            # Log this exception to OTEL, Datadog etc. Reuse the identity resolved
+            # before the failure (team alias/id, metadata, user) so the failed span
+            # is labeled — a fresh UserAPIKeyAuth here would drop everything auth had
+            # already looked up (e.g. an expired key whose team/user is known). Copy
+            # so the handler is side-effect-free for the caller's identity object.
+            user_api_key_dict = (
+                resolved_identity.model_copy()
+                if resolved_identity is not None
+                else UserAPIKeyAuth()
            )
+            user_api_key_dict.parent_otel_span = parent_otel_span
+            user_api_key_dict.request_route = route
+            user_api_key_dict.api_key = (
+                user_api_key_dict.api_key or UserAPIKeyAuth(api_key=api_key).api_key
+            )
+
+            # Stamp identity onto the request's server span now, before the request
+            # is rejected; the OTEL failure hooks don't touch the server span, so
+            # without this the failed trace would carry no team/key attributes.
+            seed_request_identity(
+                user_api_key_dict,
+                model=request_data.get("model"),
+            )
+
            # Allow callbacks to transform the error response
            transformed_exception = await proxy_logging_obj.post_call_failure_hook(
                request_data=request_data,
@@ -2070,6 +2070,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
            route=route,
            parent_otel_span=parent_otel_span,
            api_key=api_key,
+            resolved_identity=valid_token,
        )


@@ -2558,6 +2559,7 @@ async def user_api_key_auth(
                route=route,
                parent_otel_span=user_api_key_auth_obj.parent_otel_span,
                api_key=api_key,
+                resolved_identity=user_api_key_auth_obj,
            )

        # Defense-in-depth: ``_user_api_key_auth_builder`` has multiple early-return
@@ -286,6 +286,33 @@ def test_lemonade_context_window_error_mapping():
    assert excinfo.value.model == model


+@pytest.mark.parametrize(
+    "error_message",
+    [
+        "AnthropicException - prompt is too long: 250000 tokens > 200000 maximum",
+        "AnthropicException - input length and max_tokens exceed context limit: "
+        "200000 + 8000 > 200000, decrease input length or max_tokens and try again",
+    ],
+)
+def test_anthropic_context_window_error_mapping(error_message):
+    """Anthropic context-window overflows (input too long, or input + max_tokens
+    over the context limit) must map to ContextWindowExceededError (400) even when
+    the upstream exception carries no ``status_code`` attribute. Previously only
+    "prompt is too long" was special-cased, so the "exceed context limit" phrasing
+    fell through to a generic APIConnectionError (500)."""
+    original_exception = Exception(error_message)
+
+    with pytest.raises(litellm.ContextWindowExceededError) as excinfo:
+        exception_type(
+            model="claude-sonnet-4-5",
+            original_exception=original_exception,
+            custom_llm_provider="anthropic",
+        )
+
+    assert excinfo.value.status_code == 400
+    assert excinfo.value.llm_provider == "anthropic"
+
+
 # Test cases for Vertex AI RateLimitError mapping
 # As per https://github.com/BerriAI/litellm/issues/16189
 vertex_rate_limit_test_cases = [
@@ -25,7 +25,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path

 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import ProxyErrorTypes, ProxyException
+from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
 from litellm.proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler


@@ -183,3 +183,118 @@ async def test_route_passed_to_post_call_failure_hook():
            mock_post_call_failure_hook.assert_called_once()
            call_args = mock_post_call_failure_hook.call_args[1]
            assert call_args["user_api_key_dict"].request_route == test_route
+
+
+@pytest.mark.asyncio
+async def test_resolved_identity_exported_on_auth_failure():
+    """Regression: when auth fails AFTER the key/team/user identity is resolved
+    (e.g. an expired key), that identity must still reach the failure logging /
+    span instead of being dropped for a blank UserAPIKeyAuth. Before the fix the
+    handler built a fresh empty object, so the failed trace showed no team alias,
+    team id, or metadata."""
+    handler = UserAPIKeyAuthExceptionHandler()
+
+    resolved_identity = UserAPIKeyAuth(
+        token="hashed-token",
+        team_id="team-123",
+        team_alias="acme-team",
+        user_id="user-456",
+        metadata={"foo": "bar"},
+        team_metadata={"baz": "qux"},
+    )
+
+    expired_key_error = ProxyException(
+        message="Authentication Error - Expired Key.",
+        type=ProxyErrorTypes.expired_key,
+        param="sk-...",
+        code=status.HTTP_401_UNAUTHORIZED,
+    )
+
+    seeded = {}
+
+    def _capture_seed(user_api_key_dict, model=None):
+        seeded["dict"] = user_api_key_dict
+        seeded["model"] = model
+
+    with (
+        patch(
+            "litellm.proxy.auth.auth_exception_handler.seed_request_identity",
+            side_effect=_capture_seed,
+        ) as mock_seed,
+        patch(
+            "litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
+            new_callable=AsyncMock,
+        ) as mock_hook,
+        patch(
+            "litellm.proxy.proxy_server.general_settings",
+            {"allow_requests_on_db_unavailable": False},
+        ),
+    ):
+        with pytest.raises(ProxyException):
+            await handler._handle_authentication_error(
+                expired_key_error,
+                MagicMock(),
+                {"model": "gpt-4o"},
+                "/v1/chat/completions",
+                None,
+                "sk-raw-key",
+                resolved_identity=resolved_identity,
+            )
+
+    # The identity that auth already resolved is what gets logged on failure.
+    logged = mock_hook.call_args[1]["user_api_key_dict"]
+    assert logged.team_id == "team-123"
+    assert logged.team_alias == "acme-team"
+    assert logged.user_id == "user-456"
+    assert logged.metadata == {"foo": "bar"}
+    assert logged.team_metadata == {"baz": "qux"}
+    assert logged.request_route == "/v1/chat/completions"
+
+    # And it is stamped onto the span eagerly, before the request is rejected.
+    mock_seed.assert_called_once()
+    assert seeded["dict"] is logged
+    assert seeded["dict"].team_alias == "acme-team"
+    assert seeded["model"] == "gpt-4o"
+
+
+@pytest.mark.asyncio
+async def test_auth_failure_without_resolved_identity_still_logs():
+    """When auth fails before any identity is resolved (e.g. an unknown key),
+    the handler must still log a usable object carrying the raw api key and
+    route, not crash on the missing identity."""
+    handler = UserAPIKeyAuthExceptionHandler()
+
+    with (
+        patch(
+            "litellm.proxy.auth.auth_exception_handler.seed_request_identity",
+        ),
+        patch(
+            "litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
+            new_callable=AsyncMock,
+        ) as mock_hook,
+        patch(
+            "litellm.proxy.proxy_server.general_settings",
+            {"allow_requests_on_db_unavailable": False},
+        ),
+    ):
+        with pytest.raises(ProxyException):
+            await handler._handle_authentication_error(
+                ProxyException(
+                    message="Invalid API key",
+                    type=ProxyErrorTypes.auth_error,
+                    param=None,
+                    code=status.HTTP_401_UNAUTHORIZED,
+                ),
+                MagicMock(),
+                {},
+                "/v1/chat/completions",
+                None,
+                "sk-unknown",
+            )
+
+    logged = mock_hook.call_args[1]["user_api_key_dict"]
+    # Raw key must NOT land on the object — it would be promoted into telemetry
+    # as litellm.api_key.hash and leak a real sk-... to anyone reading the trace.
+    assert logged.api_key != "sk-unknown"
+    assert logged.api_key == UserAPIKeyAuth(api_key="sk-unknown").api_key
+    assert logged.request_route == "/v1/chat/completions"