Merge pull request #29862 from BerriAI/litellm_internal_staging

chore(ci): promote internal staging to main
This commit is contained in:
yuneng-jiang
2026-06-06 15:06:13 -07:00
committed by GitHub
6 changed files with 182 additions and 9 deletions
+7 -2
View File
@@ -15,6 +15,7 @@ from litellm.integrations.otel.model.baggage import promoted_baggage
from litellm.integrations.otel.model.config import OpenTelemetryV2Config
from litellm.integrations.otel.plumbing.context import (
is_recordable_span,
request_root_span,
resolve_parent_context,
resolve_request_span_context,
set_request_baggage,
@@ -435,8 +436,12 @@ class OpenTelemetryV2(CustomLogger):
attach(set_request_baggage(bag, context=get_current()))
# The server span was started by the instrumentor before this ran,
# so the Baggage processor (which only fires at span start) won't
# backfill it — stamp identity on it directly.
server_span = get_current_span()
# backfill it — stamp identity on it directly. Prefer the anchored
# root span over the ambient one so identity still lands on the
# server span when seeding from inside the live ``auth`` phase span
# (the auth-failure path), where ``get_current_span`` is the phase
# span, not the request's root.
server_span = request_root_span() or get_current_span()
if is_recordable_span(server_span):
# Re-capture the anchor here too: this runs post-auth with the
# server span active and covers entrypoints that bypass
@@ -655,7 +655,11 @@ def exception_type( # type: ignore # noqa: PLR0915
custom_llm_provider == "anthropic"
or custom_llm_provider == "anthropic_text"
): # one of the anthropics
if "prompt is too long" in error_str or "prompt: length" in error_str:
if (
"prompt is too long" in error_str
or "prompt: length" in error_str
or ExceptionCheckers.is_error_str_context_window_exceeded(error_str)
):
exception_mapping_worked = True
raise ContextWindowExceededError(
message="AnthropicError - {}".format(error_str),
+25 -5
View File
@@ -14,6 +14,7 @@ from litellm.proxy._types import (
ProxyException,
UserAPIKeyAuth,
)
from litellm.integrations.otel.runtime import seed_request_identity
from litellm.proxy.auth.auth_utils import _get_request_ip_address
from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler
from litellm.types.services import ServiceTypes
@@ -41,6 +42,7 @@ class UserAPIKeyAuthExceptionHandler:
route: str,
parent_otel_span: Optional[Span],
api_key: str,
resolved_identity: Optional[UserAPIKeyAuth] = None,
) -> UserAPIKeyAuth:
"""
Handles Connection Errors when reading a Virtual Key from LiteLLM DB
@@ -100,12 +102,30 @@ class UserAPIKeyAuthExceptionHandler:
extra={"requester_ip": requester_ip},
)
# Log this exception to OTEL, Datadog etc
user_api_key_dict = UserAPIKeyAuth(
parent_otel_span=parent_otel_span,
api_key=api_key,
request_route=route,
# Log this exception to OTEL, Datadog etc. Reuse the identity resolved
# before the failure (team alias/id, metadata, user) so the failed span
# is labeled — a fresh UserAPIKeyAuth here would drop everything auth had
# already looked up (e.g. an expired key whose team/user is known). Copy
# so the handler is side-effect-free for the caller's identity object.
user_api_key_dict = (
resolved_identity.model_copy()
if resolved_identity is not None
else UserAPIKeyAuth()
)
user_api_key_dict.parent_otel_span = parent_otel_span
user_api_key_dict.request_route = route
user_api_key_dict.api_key = (
user_api_key_dict.api_key or UserAPIKeyAuth(api_key=api_key).api_key
)
# Stamp identity onto the request's server span now, before the request
# is rejected; the OTEL failure hooks don't touch the server span, so
# without this the failed trace would carry no team/key attributes.
seed_request_identity(
user_api_key_dict,
model=request_data.get("model"),
)
# Allow callbacks to transform the error response
transformed_exception = await proxy_logging_obj.post_call_failure_hook(
request_data=request_data,
+2
View File
@@ -2070,6 +2070,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
route=route,
parent_otel_span=parent_otel_span,
api_key=api_key,
resolved_identity=valid_token,
)
@@ -2558,6 +2559,7 @@ async def user_api_key_auth(
route=route,
parent_otel_span=user_api_key_auth_obj.parent_otel_span,
api_key=api_key,
resolved_identity=user_api_key_auth_obj,
)
# Defense-in-depth: ``_user_api_key_auth_builder`` has multiple early-return
@@ -286,6 +286,33 @@ def test_lemonade_context_window_error_mapping():
assert excinfo.value.model == model
@pytest.mark.parametrize(
"error_message",
[
"AnthropicException - prompt is too long: 250000 tokens > 200000 maximum",
"AnthropicException - input length and max_tokens exceed context limit: "
"200000 + 8000 > 200000, decrease input length or max_tokens and try again",
],
)
def test_anthropic_context_window_error_mapping(error_message):
"""Anthropic context-window overflows (input too long, or input + max_tokens
over the context limit) must map to ContextWindowExceededError (400) even when
the upstream exception carries no ``status_code`` attribute. Previously only
"prompt is too long" was special-cased, so the "exceed context limit" phrasing
fell through to a generic APIConnectionError (500)."""
original_exception = Exception(error_message)
with pytest.raises(litellm.ContextWindowExceededError) as excinfo:
exception_type(
model="claude-sonnet-4-5",
original_exception=original_exception,
custom_llm_provider="anthropic",
)
assert excinfo.value.status_code == 400
assert excinfo.value.llm_provider == "anthropic"
# Test cases for Vertex AI RateLimitError mapping
# As per https://github.com/BerriAI/litellm/issues/16189
vertex_rate_limit_test_cases = [
@@ -25,7 +25,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import ProxyErrorTypes, ProxyException
from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
from litellm.proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler
@@ -183,3 +183,118 @@ async def test_route_passed_to_post_call_failure_hook():
mock_post_call_failure_hook.assert_called_once()
call_args = mock_post_call_failure_hook.call_args[1]
assert call_args["user_api_key_dict"].request_route == test_route
@pytest.mark.asyncio
async def test_resolved_identity_exported_on_auth_failure():
"""Regression: when auth fails AFTER the key/team/user identity is resolved
(e.g. an expired key), that identity must still reach the failure logging /
span instead of being dropped for a blank UserAPIKeyAuth. Before the fix the
handler built a fresh empty object, so the failed trace showed no team alias,
team id, or metadata."""
handler = UserAPIKeyAuthExceptionHandler()
resolved_identity = UserAPIKeyAuth(
token="hashed-token",
team_id="team-123",
team_alias="acme-team",
user_id="user-456",
metadata={"foo": "bar"},
team_metadata={"baz": "qux"},
)
expired_key_error = ProxyException(
message="Authentication Error - Expired Key.",
type=ProxyErrorTypes.expired_key,
param="sk-...",
code=status.HTTP_401_UNAUTHORIZED,
)
seeded = {}
def _capture_seed(user_api_key_dict, model=None):
seeded["dict"] = user_api_key_dict
seeded["model"] = model
with (
patch(
"litellm.proxy.auth.auth_exception_handler.seed_request_identity",
side_effect=_capture_seed,
) as mock_seed,
patch(
"litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
new_callable=AsyncMock,
) as mock_hook,
patch(
"litellm.proxy.proxy_server.general_settings",
{"allow_requests_on_db_unavailable": False},
),
):
with pytest.raises(ProxyException):
await handler._handle_authentication_error(
expired_key_error,
MagicMock(),
{"model": "gpt-4o"},
"/v1/chat/completions",
None,
"sk-raw-key",
resolved_identity=resolved_identity,
)
# The identity that auth already resolved is what gets logged on failure.
logged = mock_hook.call_args[1]["user_api_key_dict"]
assert logged.team_id == "team-123"
assert logged.team_alias == "acme-team"
assert logged.user_id == "user-456"
assert logged.metadata == {"foo": "bar"}
assert logged.team_metadata == {"baz": "qux"}
assert logged.request_route == "/v1/chat/completions"
# And it is stamped onto the span eagerly, before the request is rejected.
mock_seed.assert_called_once()
assert seeded["dict"] is logged
assert seeded["dict"].team_alias == "acme-team"
assert seeded["model"] == "gpt-4o"
@pytest.mark.asyncio
async def test_auth_failure_without_resolved_identity_still_logs():
"""When auth fails before any identity is resolved (e.g. an unknown key),
the handler must still log a usable object carrying the raw api key and
route, not crash on the missing identity."""
handler = UserAPIKeyAuthExceptionHandler()
with (
patch(
"litellm.proxy.auth.auth_exception_handler.seed_request_identity",
),
patch(
"litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
new_callable=AsyncMock,
) as mock_hook,
patch(
"litellm.proxy.proxy_server.general_settings",
{"allow_requests_on_db_unavailable": False},
),
):
with pytest.raises(ProxyException):
await handler._handle_authentication_error(
ProxyException(
message="Invalid API key",
type=ProxyErrorTypes.auth_error,
param=None,
code=status.HTTP_401_UNAUTHORIZED,
),
MagicMock(),
{},
"/v1/chat/completions",
None,
"sk-unknown",
)
logged = mock_hook.call_args[1]["user_api_key_dict"]
# Raw key must NOT land on the object — it would be promoted into telemetry
# as litellm.api_key.hash and leak a real sk-... to anyone reading the trace.
assert logged.api_key != "sk-unknown"
assert logged.api_key == UserAPIKeyAuth(api_key="sk-unknown").api_key
assert logged.request_route == "/v1/chat/completions"