mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 12:48:57 +00:00
fix: 400 on Anthropic context overflow; seed identity on failed auth (#29848)
This commit is contained in:
@@ -15,6 +15,7 @@ from litellm.integrations.otel.model.baggage import promoted_baggage
|
||||
from litellm.integrations.otel.model.config import OpenTelemetryV2Config
|
||||
from litellm.integrations.otel.plumbing.context import (
|
||||
is_recordable_span,
|
||||
request_root_span,
|
||||
resolve_parent_context,
|
||||
resolve_request_span_context,
|
||||
set_request_baggage,
|
||||
@@ -435,8 +436,12 @@ class OpenTelemetryV2(CustomLogger):
|
||||
attach(set_request_baggage(bag, context=get_current()))
|
||||
# The server span was started by the instrumentor before this ran,
|
||||
# so the Baggage processor (which only fires at span start) won't
|
||||
# backfill it — stamp identity on it directly.
|
||||
server_span = get_current_span()
|
||||
# backfill it — stamp identity on it directly. Prefer the anchored
|
||||
# root span over the ambient one so identity still lands on the
|
||||
# server span when seeding from inside the live ``auth`` phase span
|
||||
# (the auth-failure path), where ``get_current_span`` is the phase
|
||||
# span, not the request's root.
|
||||
server_span = request_root_span() or get_current_span()
|
||||
if is_recordable_span(server_span):
|
||||
# Re-capture the anchor here too: this runs post-auth with the
|
||||
# server span active and covers entrypoints that bypass
|
||||
|
||||
@@ -655,7 +655,11 @@ def exception_type( # type: ignore # noqa: PLR0915
|
||||
custom_llm_provider == "anthropic"
|
||||
or custom_llm_provider == "anthropic_text"
|
||||
): # one of the anthropics
|
||||
if "prompt is too long" in error_str or "prompt: length" in error_str:
|
||||
if (
|
||||
"prompt is too long" in error_str
|
||||
or "prompt: length" in error_str
|
||||
or ExceptionCheckers.is_error_str_context_window_exceeded(error_str)
|
||||
):
|
||||
exception_mapping_worked = True
|
||||
raise ContextWindowExceededError(
|
||||
message="AnthropicError - {}".format(error_str),
|
||||
|
||||
@@ -14,6 +14,7 @@ from litellm.proxy._types import (
|
||||
ProxyException,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.integrations.otel.runtime import seed_request_identity
|
||||
from litellm.proxy.auth.auth_utils import _get_request_ip_address
|
||||
from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler
|
||||
from litellm.types.services import ServiceTypes
|
||||
@@ -41,6 +42,7 @@ class UserAPIKeyAuthExceptionHandler:
|
||||
route: str,
|
||||
parent_otel_span: Optional[Span],
|
||||
api_key: str,
|
||||
resolved_identity: Optional[UserAPIKeyAuth] = None,
|
||||
) -> UserAPIKeyAuth:
|
||||
"""
|
||||
Handles Connection Errors when reading a Virtual Key from LiteLLM DB
|
||||
@@ -100,12 +102,30 @@ class UserAPIKeyAuthExceptionHandler:
|
||||
extra={"requester_ip": requester_ip},
|
||||
)
|
||||
|
||||
# Log this exception to OTEL, Datadog etc
|
||||
user_api_key_dict = UserAPIKeyAuth(
|
||||
parent_otel_span=parent_otel_span,
|
||||
api_key=api_key,
|
||||
request_route=route,
|
||||
# Log this exception to OTEL, Datadog etc. Reuse the identity resolved
|
||||
# before the failure (team alias/id, metadata, user) so the failed span
|
||||
# is labeled — a fresh UserAPIKeyAuth here would drop everything auth had
|
||||
# already looked up (e.g. an expired key whose team/user is known). Copy
|
||||
# so the handler is side-effect-free for the caller's identity object.
|
||||
user_api_key_dict = (
|
||||
resolved_identity.model_copy()
|
||||
if resolved_identity is not None
|
||||
else UserAPIKeyAuth()
|
||||
)
|
||||
user_api_key_dict.parent_otel_span = parent_otel_span
|
||||
user_api_key_dict.request_route = route
|
||||
user_api_key_dict.api_key = (
|
||||
user_api_key_dict.api_key or UserAPIKeyAuth(api_key=api_key).api_key
|
||||
)
|
||||
|
||||
# Stamp identity onto the request's server span now, before the request
|
||||
# is rejected; the OTEL failure hooks don't touch the server span, so
|
||||
# without this the failed trace would carry no team/key attributes.
|
||||
seed_request_identity(
|
||||
user_api_key_dict,
|
||||
model=request_data.get("model"),
|
||||
)
|
||||
|
||||
# Allow callbacks to transform the error response
|
||||
transformed_exception = await proxy_logging_obj.post_call_failure_hook(
|
||||
request_data=request_data,
|
||||
|
||||
@@ -2070,6 +2070,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
|
||||
route=route,
|
||||
parent_otel_span=parent_otel_span,
|
||||
api_key=api_key,
|
||||
resolved_identity=valid_token,
|
||||
)
|
||||
|
||||
|
||||
@@ -2558,6 +2559,7 @@ async def user_api_key_auth(
|
||||
route=route,
|
||||
parent_otel_span=user_api_key_auth_obj.parent_otel_span,
|
||||
api_key=api_key,
|
||||
resolved_identity=user_api_key_auth_obj,
|
||||
)
|
||||
|
||||
# Defense-in-depth: ``_user_api_key_auth_builder`` has multiple early-return
|
||||
|
||||
@@ -286,6 +286,33 @@ def test_lemonade_context_window_error_mapping():
|
||||
assert excinfo.value.model == model
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"error_message",
|
||||
[
|
||||
"AnthropicException - prompt is too long: 250000 tokens > 200000 maximum",
|
||||
"AnthropicException - input length and max_tokens exceed context limit: "
|
||||
"200000 + 8000 > 200000, decrease input length or max_tokens and try again",
|
||||
],
|
||||
)
|
||||
def test_anthropic_context_window_error_mapping(error_message):
|
||||
"""Anthropic context-window overflows (input too long, or input + max_tokens
|
||||
over the context limit) must map to ContextWindowExceededError (400) even when
|
||||
the upstream exception carries no ``status_code`` attribute. Previously only
|
||||
"prompt is too long" was special-cased, so the "exceed context limit" phrasing
|
||||
fell through to a generic APIConnectionError (500)."""
|
||||
original_exception = Exception(error_message)
|
||||
|
||||
with pytest.raises(litellm.ContextWindowExceededError) as excinfo:
|
||||
exception_type(
|
||||
model="claude-sonnet-4-5",
|
||||
original_exception=original_exception,
|
||||
custom_llm_provider="anthropic",
|
||||
)
|
||||
|
||||
assert excinfo.value.status_code == 400
|
||||
assert excinfo.value.llm_provider == "anthropic"
|
||||
|
||||
|
||||
# Test cases for Vertex AI RateLimitError mapping
|
||||
# As per https://github.com/BerriAI/litellm/issues/16189
|
||||
vertex_rate_limit_test_cases = [
|
||||
|
||||
@@ -25,7 +25,7 @@ sys.path.insert(
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import ProxyErrorTypes, ProxyException
|
||||
from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
|
||||
from litellm.proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler
|
||||
|
||||
|
||||
@@ -183,3 +183,118 @@ async def test_route_passed_to_post_call_failure_hook():
|
||||
mock_post_call_failure_hook.assert_called_once()
|
||||
call_args = mock_post_call_failure_hook.call_args[1]
|
||||
assert call_args["user_api_key_dict"].request_route == test_route
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resolved_identity_exported_on_auth_failure():
|
||||
"""Regression: when auth fails AFTER the key/team/user identity is resolved
|
||||
(e.g. an expired key), that identity must still reach the failure logging /
|
||||
span instead of being dropped for a blank UserAPIKeyAuth. Before the fix the
|
||||
handler built a fresh empty object, so the failed trace showed no team alias,
|
||||
team id, or metadata."""
|
||||
handler = UserAPIKeyAuthExceptionHandler()
|
||||
|
||||
resolved_identity = UserAPIKeyAuth(
|
||||
token="hashed-token",
|
||||
team_id="team-123",
|
||||
team_alias="acme-team",
|
||||
user_id="user-456",
|
||||
metadata={"foo": "bar"},
|
||||
team_metadata={"baz": "qux"},
|
||||
)
|
||||
|
||||
expired_key_error = ProxyException(
|
||||
message="Authentication Error - Expired Key.",
|
||||
type=ProxyErrorTypes.expired_key,
|
||||
param="sk-...",
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
)
|
||||
|
||||
seeded = {}
|
||||
|
||||
def _capture_seed(user_api_key_dict, model=None):
|
||||
seeded["dict"] = user_api_key_dict
|
||||
seeded["model"] = model
|
||||
|
||||
with (
|
||||
patch(
|
||||
"litellm.proxy.auth.auth_exception_handler.seed_request_identity",
|
||||
side_effect=_capture_seed,
|
||||
) as mock_seed,
|
||||
patch(
|
||||
"litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_hook,
|
||||
patch(
|
||||
"litellm.proxy.proxy_server.general_settings",
|
||||
{"allow_requests_on_db_unavailable": False},
|
||||
),
|
||||
):
|
||||
with pytest.raises(ProxyException):
|
||||
await handler._handle_authentication_error(
|
||||
expired_key_error,
|
||||
MagicMock(),
|
||||
{"model": "gpt-4o"},
|
||||
"/v1/chat/completions",
|
||||
None,
|
||||
"sk-raw-key",
|
||||
resolved_identity=resolved_identity,
|
||||
)
|
||||
|
||||
# The identity that auth already resolved is what gets logged on failure.
|
||||
logged = mock_hook.call_args[1]["user_api_key_dict"]
|
||||
assert logged.team_id == "team-123"
|
||||
assert logged.team_alias == "acme-team"
|
||||
assert logged.user_id == "user-456"
|
||||
assert logged.metadata == {"foo": "bar"}
|
||||
assert logged.team_metadata == {"baz": "qux"}
|
||||
assert logged.request_route == "/v1/chat/completions"
|
||||
|
||||
# And it is stamped onto the span eagerly, before the request is rejected.
|
||||
mock_seed.assert_called_once()
|
||||
assert seeded["dict"] is logged
|
||||
assert seeded["dict"].team_alias == "acme-team"
|
||||
assert seeded["model"] == "gpt-4o"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auth_failure_without_resolved_identity_still_logs():
|
||||
"""When auth fails before any identity is resolved (e.g. an unknown key),
|
||||
the handler must still log a usable object carrying the raw api key and
|
||||
route, not crash on the missing identity."""
|
||||
handler = UserAPIKeyAuthExceptionHandler()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"litellm.proxy.auth.auth_exception_handler.seed_request_identity",
|
||||
),
|
||||
patch(
|
||||
"litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_hook,
|
||||
patch(
|
||||
"litellm.proxy.proxy_server.general_settings",
|
||||
{"allow_requests_on_db_unavailable": False},
|
||||
),
|
||||
):
|
||||
with pytest.raises(ProxyException):
|
||||
await handler._handle_authentication_error(
|
||||
ProxyException(
|
||||
message="Invalid API key",
|
||||
type=ProxyErrorTypes.auth_error,
|
||||
param=None,
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
),
|
||||
MagicMock(),
|
||||
{},
|
||||
"/v1/chat/completions",
|
||||
None,
|
||||
"sk-unknown",
|
||||
)
|
||||
|
||||
logged = mock_hook.call_args[1]["user_api_key_dict"]
|
||||
# Raw key must NOT land on the object — it would be promoted into telemetry
|
||||
# as litellm.api_key.hash and leak a real sk-... to anyone reading the trace.
|
||||
assert logged.api_key != "sk-unknown"
|
||||
assert logged.api_key == UserAPIKeyAuth(api_key="sk-unknown").api_key
|
||||
assert logged.request_route == "/v1/chat/completions"
|
||||
|
||||
Reference in New Issue
Block a user