security: prevent API key leaks in error tracebacks, logs, and alerts

Gemini API keys embedded in URLs as ?key= query parameters leak through
httpx error tracebacks, which are then captured by traceback.format_exc()
and forwarded to logging callbacks, Slack/Teams alerts, and HTTP client
responses.

Short-term: all httpx.HTTPStatusError handlers now raise
MaskedHTTPStatusError(...) from None, which masks the URL and breaks
exception chaining so the original error never appears in tracebacks.

Long-term: moved all Gemini/Vertex URL constructions from ?key={api_key}
to x-goog-api-key header (Google's documented auth method), so the key
is never in the URL at all. WebSocket realtime is the only exception
since WS clients cannot use custom headers.

Additionally hardened all outbound credential paths:
- WebSocket close reasons now pass through _redact_string()
- Callback pipeline (failure_handler) redacts traceback_exception and
  error_str before forwarding to integrations (Langfuse, Datadog, etc.)
- Slack/Teams alert messages redacted in send_llm_exception_alert,
  ProxyLogging.failure_handler, and post_call_failure_hook
- HTTP error responses in proxy SSE and health endpoints redacted
- Exception messages in exception_mapping_utils redacted
- print_verbose() stdout output redacted when set_verbose=True
- HTTPHandler.put() now has MaskedHTTPStatusError (was missing)
This commit is contained in:
user
2026-04-03 22:33:06 +00:00
parent 7b36cfc0de
commit 25f93bed91
23 changed files with 217 additions and 219 deletions
+2
View File
@@ -86,6 +86,8 @@ _SECRET_RE = _build_secret_patterns()
def _redact_string(value: str) -> str:
if not _ENABLE_SECRET_REDACTION:
return value
return _SECRET_RE.sub(_REDACTED, value)
@@ -6,7 +6,7 @@ from typing import Any, Optional
import httpx
import litellm
from litellm._logging import verbose_logger
from litellm._logging import _redact_string, verbose_logger
from litellm.types.utils import LlmProviders
from ..exceptions import (
@@ -2304,7 +2304,7 @@ def exception_type( # type: ignore # noqa: PLR0915
else:
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
raise APIConnectionError(
message=f"{exception_provider} APIConnectionError - {message}\n{traceback.format_exc()}",
message=f"{exception_provider} APIConnectionError - {message}\n{_redact_string(traceback.format_exc())}",
llm_provider="azure",
model=model,
litellm_debug_info=extra_information,
@@ -2431,7 +2431,7 @@ def exception_type( # type: ignore # noqa: PLR0915
else:
raise APIConnectionError(
message="{}\n{}".format(
str(original_exception), traceback.format_exc()
str(original_exception), _redact_string(traceback.format_exc())
),
llm_provider=custom_llm_provider,
model=model,
@@ -2460,7 +2460,7 @@ def exception_type( # type: ignore # noqa: PLR0915
setattr(e, "litellm_response_headers", litellm_response_headers)
raise e # it's already mapped
raised_exc = APIConnectionError(
message="{}\n{}".format(original_exception, traceback.format_exc()),
message="{}\n{}".format(original_exception, _redact_string(traceback.format_exc())),
llm_provider="",
model="",
)
@@ -36,7 +36,7 @@ from litellm import (
log_raw_request_response,
turn_off_message_logging,
)
from litellm._logging import _is_debugging_on, verbose_logger
from litellm._logging import _is_debugging_on, _redact_string, verbose_logger
from litellm._uuid import uuid
from litellm.batches.batch_utils import _handle_completed_batch
from litellm.caching.caching import DualCache, InMemoryCache
@@ -2848,7 +2848,11 @@ class Logging(LiteLLMLoggingBaseClass):
self.model_call_details["log_event_type"] = "failed_api_call"
self.model_call_details["exception"] = exception
self.model_call_details["traceback_exception"] = traceback_exception
self.model_call_details["traceback_exception"] = (
_redact_string(traceback_exception)
if isinstance(traceback_exception, str)
else traceback_exception
)
self.model_call_details["end_time"] = end_time
self.model_call_details.setdefault("original_response", None)
self.model_call_details["response_cost"] = 0
@@ -2871,7 +2875,7 @@ class Logging(LiteLLMLoggingBaseClass):
end_time=end_time,
logging_obj=self,
status="failure",
error_str=str(exception),
error_str=_redact_string(str(exception)),
original_exception=exception,
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
+2 -2
View File
@@ -6,7 +6,7 @@ This requires websockets, and is currently only supported on LiteLLM Proxy.
from typing import Any, Optional, cast
from litellm._logging import verbose_proxy_logger
from litellm._logging import _redact_string, verbose_proxy_logger
from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES
from ....litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
@@ -118,7 +118,7 @@ class AzureOpenAIRealtime(AzureChatCompletion):
await realtime_streaming.bidirectional_forward()
except websockets.exceptions.InvalidStatusCode as e: # type: ignore
await websocket.close(code=e.status_code, reason=str(e))
await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
except Exception:
verbose_proxy_logger.exception(
"Error in AzureOpenAIRealtime.async_realtime"
+2 -2
View File
@@ -8,7 +8,7 @@ import asyncio
import json
from typing import Any, Optional
from litellm._logging import verbose_proxy_logger
from litellm._logging import _redact_string, verbose_proxy_logger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from ..base_aws_llm import BaseAWSLLM
@@ -152,7 +152,7 @@ class BedrockRealtime(BaseAWSLLM):
f"Error in BedrockRealtime.async_realtime: {e}"
)
try:
await websocket.close(code=1011, reason=f"Internal error: {str(e)}")
await websocket.close(code=1011, reason=_redact_string(f"Internal error: {str(e)}"))
except Exception:
pass
raise
+62 -63
View File
@@ -316,16 +316,65 @@ def mask_sensitive_info(error_message):
return error_message
def _safe_get_response_text(response: httpx.Response) -> str:
"""Safely read response text, falling back to empty string on decoding errors."""
try:
return response.text
except Exception:
return ""
async def _safe_aread_response(response: httpx.Response) -> bytes:
"""Safely read async response body, falling back to empty bytes on errors."""
try:
return await response.aread()
except Exception:
return b""
def _safe_read_response(response: httpx.Response) -> bytes:
"""Safely read sync response body, falling back to empty bytes on errors."""
try:
return response.read()
except Exception:
return b""
def _raise_masked_sync_error(e: httpx.HTTPStatusError, stream: bool) -> None:
"""Raise a MaskedHTTPStatusError for sync HTTP handlers."""
if stream:
_body = mask_sensitive_info(_safe_read_response(e.response))
raise MaskedHTTPStatusError(e, message=_body, text=_body) from None
_text = mask_sensitive_info(_safe_get_response_text(e.response))
raise MaskedHTTPStatusError(e, message=_text, text=_text) from None
async def _raise_masked_async_error(e: httpx.HTTPStatusError, stream: bool) -> None:
"""Raise a MaskedHTTPStatusError for async HTTP handlers."""
if stream:
_body = await _safe_aread_response(e.response)
raise MaskedHTTPStatusError(e, message=_body, text=_body) from None
_text = mask_sensitive_info(_safe_get_response_text(e.response))
raise MaskedHTTPStatusError(e, message=_text, text=_text) from None
class MaskedHTTPStatusError(httpx.HTTPStatusError):
def __init__(
self, original_error, message: Optional[str] = None, text: Optional[str] = None
):
# Create a new error with the masked URL
masked_url = mask_sensitive_info(str(original_error.request.url))
# Create a new error that looks like the original, but with a masked URL
# Mask the original exception message too (it contains the full URL)
masked_original_message = mask_sensitive_info(str(original_error))
# Safely access response content — decompression can fail (e.g. zlib error)
try:
response_content = original_error.response.content
except Exception:
response_content = b""
super().__init__(
message=original_error.message,
message=masked_original_message,
request=httpx.Request(
method=original_error.request.method,
url=masked_url,
@@ -334,12 +383,13 @@ class MaskedHTTPStatusError(httpx.HTTPStatusError):
),
response=httpx.Response(
status_code=original_error.response.status_code,
content=original_error.response.content,
content=response_content,
headers=original_error.response.headers,
),
)
self.message = message
self.text = text
self.status_code = original_error.response.status_code
class AsyncHTTPHandler:
@@ -501,16 +551,7 @@ class AsyncHTTPHandler:
headers=headers,
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", await e.response.aread())
setattr(e, "text", await e.response.aread())
else:
setattr(e, "message", mask_sensitive_info(e.response.text))
setattr(e, "text", mask_sensitive_info(e.response.text))
setattr(e, "status_code", e.response.status_code)
raise e
await _raise_masked_async_error(e, stream)
except Exception as e:
raise e
@@ -571,12 +612,7 @@ class AsyncHTTPHandler:
headers=headers,
)
except httpx.HTTPStatusError as e:
setattr(e, "status_code", e.response.status_code)
if stream is True:
setattr(e, "message", await e.response.aread())
else:
setattr(e, "message", e.response.text)
raise e
await _raise_masked_async_error(e, stream)
except Exception as e:
raise e
@@ -637,12 +673,7 @@ class AsyncHTTPHandler:
headers=headers,
)
except httpx.HTTPStatusError as e:
setattr(e, "status_code", e.response.status_code)
if stream is True:
setattr(e, "message", await e.response.aread())
else:
setattr(e, "message", e.response.text)
raise e
await _raise_masked_async_error(e, stream)
except Exception as e:
raise e
@@ -690,12 +721,7 @@ class AsyncHTTPHandler:
finally:
await new_client.aclose()
except httpx.HTTPStatusError as e:
setattr(e, "status_code", e.response.status_code)
if stream is True:
setattr(e, "message", await e.response.aread())
else:
setattr(e, "message", e.response.text)
raise e
await _raise_masked_async_error(e, stream)
except Exception as e:
raise e
@@ -1035,16 +1061,7 @@ class HTTPHandler:
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", mask_sensitive_info(e.response.read()))
setattr(e, "text", mask_sensitive_info(e.response.read()))
else:
error_text = mask_sensitive_info(e.response.text)
setattr(e, "message", error_text)
setattr(e, "text", error_text)
setattr(e, "status_code", e.response.status_code)
raise e
_raise_masked_sync_error(e, stream)
except Exception as e:
raise e
@@ -1083,17 +1100,7 @@ class HTTPHandler:
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", mask_sensitive_info(e.response.read()))
setattr(e, "text", mask_sensitive_info(e.response.read()))
else:
error_text = mask_sensitive_info(e.response.text)
setattr(e, "message", error_text)
setattr(e, "text", error_text)
setattr(e, "status_code", e.response.status_code)
raise e
_raise_masked_sync_error(e, stream)
except Exception as e:
raise e
@@ -1130,6 +1137,8 @@ class HTTPHandler:
model="default-model-name",
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
_raise_masked_sync_error(e, stream)
except Exception as e:
raise e
@@ -1168,17 +1177,7 @@ class HTTPHandler:
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", mask_sensitive_info(e.response.read()))
setattr(e, "text", mask_sensitive_info(e.response.read()))
else:
error_text = mask_sensitive_info(e.response.text)
setattr(e, "message", error_text)
setattr(e, "text", error_text)
setattr(e, "status_code", e.response.status_code)
raise e
_raise_masked_sync_error(e, stream)
except Exception as e:
raise e
@@ -22,7 +22,7 @@ import litellm
import litellm.litellm_core_utils
import litellm.types
import litellm.types.utils
from litellm._logging import verbose_logger
from litellm._logging import _redact_string, verbose_logger
from litellm.anthropic_beta_headers_manager import update_headers_with_filtered_beta
from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES
from litellm.litellm_core_utils.realtime_streaming import RealTimeStreaming
@@ -4789,12 +4789,12 @@ class BaseLLMHTTPHandler:
except websockets.exceptions.InvalidStatusCode as e: # type: ignore
verbose_logger.exception(f"Error connecting to backend: {e}")
await websocket.close(code=e.status_code, reason=str(e))
await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
except Exception as e:
verbose_logger.exception(f"Error connecting to backend: {e}")
try:
await websocket.close(
code=1011, reason=f"Internal server error: {str(e)}"
code=1011, reason=_redact_string(f"Internal server error: {str(e)}")
)
except RuntimeError as close_error:
if "already completed" in str(close_error) or "websocket.close" in str(
@@ -5076,12 +5076,12 @@ class BaseLLMHTTPHandler:
except websockets.exceptions.InvalidStatusCode as e: # type: ignore
verbose_logger.exception(f"Error connecting to responses WS backend: {e}")
await websocket.close(code=e.status_code, reason=str(e))
await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
except Exception as e:
verbose_logger.exception(f"Error in responses WS: {e}")
try:
await websocket.close(
code=1011, reason=f"Internal server error: {str(e)}"
code=1011, reason=_redact_string(f"Internal server error: {str(e)}")
)
except RuntimeError as close_error:
if "already completed" in str(close_error) or "websocket.close" in str(
+3 -2
View File
@@ -28,7 +28,7 @@ class GeminiModelInfo(BaseLLMModelInfo):
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
"""Google AI Studio sends api key in query params"""
"""Google AI Studio sends api key via x-goog-api-key header"""
return headers
@property
@@ -75,7 +75,8 @@ class GeminiModelInfo(BaseLLMModelInfo):
)
response = litellm.module_level_client.get(
url=f"{api_base}{endpoint}?key={api_key}",
url=f"{api_base}{endpoint}",
headers={"x-goog-api-key": api_key},
)
if response.status_code != 200:
+3 -3
View File
@@ -86,7 +86,7 @@ class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
if not final_api_key:
raise ValueError("api_key is required")
url = "{}/{}?key={}".format(api_base, endpoint, final_api_key)
url = "{}/{}".format(api_base, endpoint)
return url
def get_supported_openai_params(
@@ -231,9 +231,9 @@ class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
)
api_base = api_base.rstrip("/")
url = f"{api_base}/v1beta/{file_part}?key={api_key}"
url = f"{api_base}/v1beta/{file_part}"
# Return empty params dict - API key is already in URL, no query params needed
# API key is passed via x-goog-api-key header (set in validate_environment)
return url, {}
def _normalize_gemini_file_id(self, file_id: str) -> str:
@@ -75,9 +75,13 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
model: str,
litellm_params: Optional[GenericLiteLLMParams],
) -> dict:
"""Google AI Studio uses API key in query params, not headers."""
"""Google AI Studio uses x-goog-api-key header for authentication."""
headers = headers or {}
headers["Content-Type"] = "application/json"
if litellm_params:
api_key = GeminiModelInfo.get_api_key(litellm_params.get("api_key"))
if api_key:
headers["x-goog-api-key"] = api_key
return headers
def get_complete_url(
@@ -98,11 +102,10 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
"Google API key is required. Set GOOGLE_API_KEY or GEMINI_API_KEY environment variable."
)
query_params = f"key={api_key}"
if stream:
query_params += "&alt=sse"
return f"{api_base}/{self.api_version}/interactions?alt=sse"
return f"{api_base}/{self.api_version}/interactions?{query_params}"
return f"{api_base}/{self.api_version}/interactions"
def transform_request(
self,
@@ -200,11 +203,10 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
) -> Tuple[str, Dict]:
"""GET /{api_version}/interactions/{interaction_id}"""
resolved_api_base = GeminiModelInfo.get_api_base(api_base)
api_key = GeminiModelInfo.get_api_key(litellm_params.api_key)
if not api_key:
if not GeminiModelInfo.get_api_key(litellm_params.api_key):
raise ValueError("Google API key is required")
return (
f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}?key={api_key}",
f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}",
{},
)
@@ -234,11 +236,10 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
) -> Tuple[str, Dict]:
"""DELETE /{api_version}/interactions/{interaction_id}"""
resolved_api_base = GeminiModelInfo.get_api_base(api_base)
api_key = GeminiModelInfo.get_api_key(litellm_params.api_key)
if not api_key:
if not GeminiModelInfo.get_api_key(litellm_params.api_key):
raise ValueError("Google API key is required")
return (
f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}?key={api_key}",
f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}",
{},
)
@@ -265,11 +266,10 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
) -> Tuple[str, Dict]:
"""POST /{api_version}/interactions/{interaction_id}:cancel (if supported)"""
resolved_api_base = GeminiModelInfo.get_api_base(api_base)
api_key = GeminiModelInfo.get_api_key(litellm_params.api_key)
if not api_key:
if not GeminiModelInfo.get_api_key(litellm_params.api_key):
raise ValueError("Google API key is required")
return (
f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}:cancel?key={api_key}",
f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}:cancel",
{},
)
@@ -85,6 +85,10 @@ class GeminiRealtimeConfig(BaseRealtimeConfig):
raise ValueError("api_key is required for Gemini API calls")
api_base = api_base.replace("https://", "wss://")
api_base = api_base.replace("http://", "ws://")
# WebSocket connections do not support custom HTTP headers in all clients,
# so the API key must remain as a query parameter here. This is an accepted
# limitation; httpx is not used for WebSocket so MaskedHTTPStatusError
# already covers the main leak vector.
return f"{api_base}/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key={api_key}"
def map_model_turn_event(
@@ -48,7 +48,7 @@ class GeminiVectorStoreConfig(BaseVectorStoreConfig):
def get_auth_credentials(
self, litellm_params: dict
) -> BaseVectorStoreAuthCredentials:
"""Gemini uses API key in query params, not headers."""
"""Gemini uses x-goog-api-key header for authentication."""
return {}
def get_vector_store_endpoints_by_type(self) -> VectorStoreIndexEndpoints:
@@ -79,6 +79,7 @@ class GeminiVectorStoreConfig(BaseVectorStoreConfig):
api_key = litellm_params.get("api_key") or get_api_key_from_env()
if api_key:
self._cached_api_key = api_key
headers["x-goog-api-key"] = api_key
return headers
@@ -133,13 +134,10 @@ class GeminiVectorStoreConfig(BaseVectorStoreConfig):
if model and model.startswith("gemini/"):
model = model.replace("gemini/", "")
# Get API key - Gemini requires it as a query parameter
api_key = litellm_params.get("api_key") or GeminiModelInfo.get_api_key()
if not api_key:
raise ValueError("GEMINI_API_KEY or GOOGLE_API_KEY is required")
# Build the URL for generateContent with API key
url = f"{api_base}/models/{model}:generateContent?key={api_key}"
url = f"{api_base}/models/{model}:generateContent"
# Build file_search tool configuration (using snake_case as per Gemini docs)
file_search_config: Dict[str, Any] = {
@@ -286,10 +284,7 @@ class GeminiVectorStoreConfig(BaseVectorStoreConfig):
"""
url = f"{api_base}/fileSearchStores"
# Append API key as query parameter (required by Gemini)
api_key = self._cached_api_key or get_api_key_from_env()
if api_key:
url = f"{url}?key={api_key}"
# API key is passed via x-goog-api-key header (set in validate_environment)
request_body: Dict[str, Any] = {}
+3 -2
View File
@@ -6,6 +6,7 @@ This requires websockets, and is currently only supported on LiteLLM Proxy.
from typing import Any, Optional, cast
from litellm._logging import _redact_string
from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES
from litellm.types.realtime import RealtimeQueryParams
@@ -148,11 +149,11 @@ class OpenAIRealtime(OpenAIChatCompletion):
await realtime_streaming.bidirectional_forward()
except websockets.exceptions.InvalidStatusCode as e: # type: ignore
await websocket.close(code=e.status_code, reason=str(e))
await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
except Exception as e:
try:
await websocket.close(
code=1011, reason=f"Internal server error: {str(e)}"
code=1011, reason=_redact_string(f"Internal server error: {str(e)}")
)
except RuntimeError as close_error:
if "already completed" in str(close_error) or "websocket.close" in str(
+16 -11
View File
@@ -337,8 +337,13 @@ def _get_gemini_url(
mode: all_gemini_url_modes,
model: str,
stream: Optional[bool],
gemini_api_key: Optional[str],
) -> Tuple[str, str]:
"""Build the Gemini API URL for the given mode.
The API key is NOT included in the URL. Callers must pass it via the
``x-goog-api-key`` header instead to avoid leaking credentials in
error tracebacks.
"""
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig,
)
@@ -352,27 +357,27 @@ def _get_gemini_url(
endpoint = "generateContent"
if stream is True:
endpoint = "streamGenerateContent"
url = "https://generativelanguage.googleapis.com/{}/{}:{}?key={}&alt=sse".format(
api_version, _gemini_model_name, endpoint, gemini_api_key
url = "https://generativelanguage.googleapis.com/{}/{}:{}?alt=sse".format(
api_version, _gemini_model_name, endpoint
)
else:
url = "https://generativelanguage.googleapis.com/{}/{}:{}?key={}".format(
api_version, _gemini_model_name, endpoint, gemini_api_key
url = "https://generativelanguage.googleapis.com/{}/{}:{}".format(
api_version, _gemini_model_name, endpoint
)
elif mode == "embedding":
endpoint = "embedContent"
url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
_gemini_model_name, endpoint, gemini_api_key
url = "https://generativelanguage.googleapis.com/v1beta/{}:{}".format(
_gemini_model_name, endpoint
)
elif mode == "batch_embedding":
endpoint = "batchEmbedContents"
url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
_gemini_model_name, endpoint, gemini_api_key
url = "https://generativelanguage.googleapis.com/v1beta/{}:{}".format(
_gemini_model_name, endpoint
)
elif mode == "count_tokens":
endpoint = "countTokens"
url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
_gemini_model_name, endpoint, gemini_api_key
url = "https://generativelanguage.googleapis.com/v1beta/{}:{}".format(
_gemini_model_name, endpoint
)
elif mode == "image_generation":
raise ValueError(
@@ -62,10 +62,10 @@ class ContextCachingEndpoints(VertexBase):
token, url
"""
if custom_llm_provider == "gemini":
auth_header = None
auth_header = {"x-goog-api-key": gemini_api_key} # type: ignore[assignment]
endpoint = "cachedContents"
url = "https://generativelanguage.googleapis.com/v1beta/{}?key={}".format(
endpoint, gemini_api_key
url = "https://generativelanguage.googleapis.com/v1beta/{}".format(
endpoint
)
elif custom_llm_provider == "vertex_ai":
auth_header = vertex_auth_header
@@ -353,7 +353,9 @@ class ContextCachingEndpoints(VertexBase):
headers = {
"Content-Type": "application/json",
}
if token is not None:
if isinstance(token, dict):
headers.update(token)
elif token is not None:
headers["Authorization"] = f"Bearer {token}"
if extra_headers is not None:
headers.update(extra_headers)
@@ -501,7 +503,9 @@ class ContextCachingEndpoints(VertexBase):
headers = {
"Content-Type": "application/json",
}
if token is not None:
if isinstance(token, dict):
headers.update(token)
elif token is not None:
headers["Authorization"] = f"Bearer {token}"
if extra_headers is not None:
headers.update(extra_headers)
+1 -2
View File
@@ -473,9 +473,8 @@ class VertexBase:
mode=mode,
model=model,
stream=stream,
gemini_api_key=gemini_api_key,
)
auth_header = None # this field is not used for gemin
auth_header = {"x-goog-api-key": gemini_api_key} # type: ignore[assignment]
else:
vertex_location = self.get_vertex_region(
vertex_region=vertex_location,
+2 -1
View File
@@ -40,6 +40,7 @@ from typing import (
get_args,
)
from litellm._logging import _redact_string
from litellm._uuid import uuid
if TYPE_CHECKING:
@@ -7244,7 +7245,7 @@ async def ahealth_check(
f"Mode {mode} not supported. See modes here: https://docs.litellm.ai/docs/proxy/health"
)
except Exception as e:
stack_trace = traceback.format_exc()
stack_trace = _redact_string(traceback.format_exc())
if isinstance(stack_trace, str):
stack_trace = stack_trace[:1000]
+2 -2
View File
@@ -22,7 +22,7 @@ from fastapi import HTTPException, Request, status
from fastapi.responses import JSONResponse, Response, StreamingResponse
import litellm
from litellm._logging import verbose_proxy_logger
from litellm._logging import _redact_string, verbose_proxy_logger
from litellm._uuid import uuid
from litellm.constants import (
DD_TRACER_STREAMING_CHUNK_YIELD_RESOURCE,
@@ -1785,7 +1785,7 @@ class ProxyBaseLLMRequestProcessing:
if isinstance(e, HTTPException):
raise e
error_traceback = traceback.format_exc()
error_traceback = _redact_string(traceback.format_exc())
error_msg = f"{str(e)}\n\n{error_traceback}"
proxy_exception = ProxyException(
message=getattr(e, "message", error_msg),
+4 -3
View File
@@ -77,7 +77,7 @@ from litellm import (
ModelResponseStream,
Router,
)
from litellm._logging import verbose_proxy_logger
from litellm._logging import _redact_string, verbose_proxy_logger
from litellm._service_logger import ServiceLogging, ServiceTypes
from litellm.caching.caching import DualCache, RedisCache
from litellm.caching.dual_cache import LimitedSizeOrderedDict
@@ -155,7 +155,7 @@ def print_verbose(print_statement):
verbose_proxy_logger.debug("{}\n{}".format(print_statement, traceback.format_exc()))
if litellm.set_verbose:
print(f"LiteLLM Proxy: {print_statement}") # noqa
print(f"LiteLLM Proxy: {_redact_string(str(print_statement))}") # noqa
def _get_email_logger_class():
@@ -1721,6 +1721,7 @@ class ProxyLogging:
error_message = str(original_exception)
if isinstance(traceback_str, str):
error_message += traceback_str[:1000]
error_message = _redact_string(error_message)
asyncio.create_task(
self.alerting_handler(
message=f"DB read/write call failed: {error_message}",
@@ -1791,7 +1792,7 @@ class ProxyLogging:
asyncio.create_task(
self.alerting_handler(
message=f"LLM API call failed: `{exception_str}`",
message=_redact_string(f"LLM API call failed: `{exception_str}`"),
level="High",
alert_type=AlertType.llm_exceptions,
request_data=request_data,
+7 -3
View File
@@ -143,7 +143,7 @@ class GeminiRAGIngestion(BaseRAGIngestion):
Returns:
Store name (format: fileSearchStores/xxxxxxx)
"""
url = f"{base_url}/fileSearchStores?key={api_key}"
url = f"{base_url}/fileSearchStores"
request_body = {"displayName": display_name}
@@ -154,7 +154,10 @@ class GeminiRAGIngestion(BaseRAGIngestion):
response = await client.post(
url,
json=request_body,
headers={"Content-Type": "application/json"},
headers={
"Content-Type": "application/json",
"x-goog-api-key": api_key,
},
)
if response.status_code != 200:
@@ -228,7 +231,7 @@ class GeminiRAGIngestion(BaseRAGIngestion):
# base_url is like: https://generativelanguage.googleapis.com/v1beta
# We need: https://generativelanguage.googleapis.com/upload/v1beta/{store_id}:uploadToFileSearchStore
api_base = base_url.replace("/v1beta", "") # Get base without version
url = f"{api_base}/upload/v1beta/{vector_store_id}:uploadToFileSearchStore?key={api_key}"
url = f"{api_base}/upload/v1beta/{vector_store_id}:uploadToFileSearchStore"
# Build request body with chunking config and metadata if provided
request_body: Dict[str, Any] = {"displayName": filename}
@@ -263,6 +266,7 @@ class GeminiRAGIngestion(BaseRAGIngestion):
"X-Goog-Upload-Header-Content-Length": str(file_size),
"X-Goog-Upload-Header-Content-Type": content_type,
"Content-Type": "application/json",
"x-goog-api-key": api_key,
}
verbose_logger.debug(f"Initiating resumable upload: {url}")
@@ -310,7 +310,7 @@ def test_gemini_multimodal_embedding_e2e():
) as mock_get_token:
mock_get_token.return_value = (
{"x-goog-api-key": "test-key"},
"https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-2-preview:embedContent?key=test-key"
"https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-2-preview:embedContent"
)
mock_response = MagicMock()
@@ -1127,80 +1127,58 @@ async def test_google_generate_content_with_openai():
passed_fields = passed_fields - set(GenericLiteLLMParams.model_fields.keys())
# extra_headers is now explicitly passed through for providers that need custom headers
assert passed_fields == set(["model", "messages", "extra_headers"]), f"Expected model, messages, and extra_headers to be passed through, got {passed_fields}"
@pytest.mark.asyncio
async def test_agenerate_content_x_goog_api_key_header():
def test_validate_environment_sets_x_goog_api_key():
"""
Test that agenerate_content passes x-goog-api-key header correctly.
This test verifies that when calling agenerate_content with a Google GenAI model,
the HTTP request includes the x-goog-api-key header with the correct API key value.
"""
import os
import unittest.mock
Test that VertexGeminiConfig.validate_environment correctly merges an
x-goog-api-key dict into the request headers.
This is the mechanism by which Google AI Studio (Gemini) requests get
authenticated via header instead of a query-string ?key= parameter.
"""
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig,
)
import httpx
test_api_key = "test-gemini-api-key-123"
# Mock environment to ensure we use our test API key
with unittest.mock.patch.dict(os.environ, {"GEMINI_API_KEY": test_api_key}, clear=False):
# Mock the AsyncHTTPHandler's post method to capture headers
with unittest.mock.patch("litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", new_callable=unittest.mock.AsyncMock) as mock_post:
# Mock a successful response
mock_response = unittest.mock.MagicMock()
mock_response.json.return_value = {
"candidates": [
{
"content": {
"parts": [{"text": "Hello! How can I help you today?"}],
"role": "model"
},
"finishReason": "STOP",
"index": 0
}
],
"usageMetadata": {
"promptTokenCount": 5,
"candidatesTokenCount": 10,
"totalTokenCount": 15
}
}
mock_response.status_code = 200
mock_response.headers = {}
mock_post.return_value = mock_response
# Call agenerate_content with Google AI Studio model
try:
response = await agenerate_content(
model="gemini/gemini-1.5-flash",
contents=[
{"role": "user", "parts": [{"text": "Hello, world!"}]}
],
api_key=test_api_key
)
except Exception:
# Ignore any response processing errors, we just want to check the headers
pass
# Verify that AsyncHTTPHandler.post was called
mock_post.assert_called_once()
# Get the arguments passed to the post call
call_args, call_kwargs = mock_post.call_args
# Verify that headers contain x-goog-api-key
headers = call_kwargs.get("headers", {})
assert "x-goog-api-key" in headers, f"x-goog-api-key header not found in headers: {list(headers.keys())}"
# Verify the API key is set (could be our test key or from api_key parameter)
api_key_value = headers["x-goog-api-key"]
assert api_key_value == test_api_key, f"Expected x-goog-api-key to be {test_api_key}, got {api_key_value}"
# Verify other expected headers
assert headers.get("Content-Type") == "application/json", f"Expected Content-Type application/json, got {headers.get('Content-Type')}"
print(f"✓ Test passed: x-goog-api-key header correctly set to {api_key_value}")
print(f"✓ All headers: {list(headers.keys())}")
# Simulate what _get_token_and_url returns for Gemini: a dict auth_header
auth_header_dict = {"x-goog-api-key": test_api_key}
headers = VertexGeminiConfig().validate_environment(
api_key=auth_header_dict,
headers=None,
model="gemini-2.5-flash",
messages=[],
optional_params={},
litellm_params={},
)
assert "x-goog-api-key" in headers, f"x-goog-api-key not in headers: {headers}"
assert headers["x-goog-api-key"] == test_api_key
assert headers["Content-Type"] == "application/json"
def test_get_gemini_url_excludes_api_key():
"""
Verify that _get_gemini_url never embeds the API key in the URL.
API keys in URLs leak through httpx error tracebacks. The key must be
sent via the x-goog-api-key header instead.
"""
from litellm.llms.vertex_ai.common_utils import _get_gemini_url
for mode in ("chat", "embedding", "batch_embedding", "count_tokens"):
url, _ = _get_gemini_url(
mode=mode,
model="gemini-2.5-flash",
stream=False,
)
assert "key=" not in url, f"API key found in URL for mode={mode}: {url}"
# Streaming chat should only have ?alt=sse
url, _ = _get_gemini_url(mode="chat", model="gemini-2.5-flash", stream=True)
assert "key=" not in url, f"API key found in streaming URL: {url}"
assert "alt=sse" in url, f"Missing alt=sse in streaming URL: {url}"
def test_inline_data_base64_image_transformation():
@@ -37,12 +37,12 @@ class TestGoogleAIStudioFilesTransformation:
litellm_params=litellm_params,
)
# Verify URL is constructed exactly as required:
# https://generativelanguage.googleapis.com/v1beta/files/{file_id}?key=API_KEY
# API key is passed via x-goog-api-key header, not in URL
assert (
url
== "https://generativelanguage.googleapis.com/v1beta/files/test123?key=test-api-key"
== "https://generativelanguage.googleapis.com/v1beta/files/test123"
)
assert "key=" not in url
# CRITICAL: params should be empty dict, not contain Content-Type or any other params
# These would be incorrectly interpreted as query parameters
@@ -64,12 +64,12 @@ class TestGoogleAIStudioFilesTransformation:
litellm_params=litellm_params,
)
# Verify URL is constructed exactly as required:
# https://generativelanguage.googleapis.com/v1beta/files/{file_id}?key=API_KEY
# API key is passed via x-goog-api-key header, not in URL
assert (
url
== "https://generativelanguage.googleapis.com/v1beta/files/test123?key=test-api-key"
== "https://generativelanguage.googleapis.com/v1beta/files/test123"
)
assert "key=" not in url
# CRITICAL: params should be empty dict
assert params == {}, f"Expected empty params dict, got: {params}"
@@ -79,11 +79,10 @@ class TestGoogleAIStudioFilesTransformation:
def test_transform_retrieve_file_request_with_raw_id_only(self):
"""
Regression guard for the exact retrieval URL format.
Regression guard: API key must NOT appear in the URL.
If someone changes the method and stops producing:
https://generativelanguage.googleapis.com/v1beta/files/{file_id}?key=API_KEY
this test should fail.
The key is sent via x-goog-api-key header to prevent leaking
credentials in httpx error tracebacks.
"""
file_id = "cctqueckiggb"
litellm_params = {"api_key": "test-api-key"}
@@ -96,8 +95,9 @@ class TestGoogleAIStudioFilesTransformation:
assert (
url
== "https://generativelanguage.googleapis.com/v1beta/files/cctqueckiggb?key=test-api-key"
== "https://generativelanguage.googleapis.com/v1beta/files/cctqueckiggb"
)
assert "key=" not in url
assert params == {}
@patch.dict("os.environ", {}, clear=True)
@@ -285,10 +285,10 @@ class TestGoogleAIStudioFilesTransformation:
litellm_params={},
)
# Verify URL structure
# Verify URL structure - API key must NOT be in URL
assert api_base in url
assert "upload/v1beta/files" in url
assert f"key={api_key}" in url
assert "key=" not in url
def test_transform_delete_file_request_with_full_uri(self):
"""Test delete file request transformation with full URI"""