security: prevent API key leaks in error tracebacks, logs, and alerts

Gemini API keys embedded in URLs as ?key= query parameters leak through httpx error tracebacks, which are then captured by traceback.format_exc() and forwarded to logging callbacks, Slack/Teams alerts, and HTTP client responses. Short-term: all httpx.HTTPStatusError handlers now raise MaskedHTTPStatusError(...) from None, which masks the URL and breaks exception chaining so the original error never appears in tracebacks. Long-term: moved all Gemini/Vertex URL constructions from ?key={api_key} to x-goog-api-key header (Google's documented auth method), so the key is never in the URL at all. WebSocket realtime is the only exception since WS clients cannot use custom headers. Additionally hardened all outbound credential paths: - WebSocket close reasons now pass through _redact_string() - Callback pipeline (failure_handler) redacts traceback_exception and error_str before forwarding to integrations (Langfuse, Datadog, etc.) - Slack/Teams alert messages redacted in send_llm_exception_alert, ProxyLogging.failure_handler, and post_call_failure_hook - HTTP error responses in proxy SSE and health endpoints redacted - Exception messages in exception_mapping_utils redacted - print_verbose() stdout output redacted when set_verbose=True - HTTPHandler.put() now has MaskedHTTPStatusError (was missing)
2026-06-19 13:45:17 +00:00 · 2026-04-03 22:33:06 +00:00
parent 7b36cfc0de
commit 25f93bed91
23 changed files with 217 additions and 219 deletions
@@ -86,6 +86,8 @@ _SECRET_RE = _build_secret_patterns()


 def _redact_string(value: str) -> str:
+    if not _ENABLE_SECRET_REDACTION:
+        return value
    return _SECRET_RE.sub(_REDACTED, value)


@@ -6,7 +6,7 @@ from typing import Any, Optional
 import httpx

 import litellm
-from litellm._logging import verbose_logger
+from litellm._logging import _redact_string, verbose_logger
 from litellm.types.utils import LlmProviders

 from ..exceptions import (
@@ -2304,7 +2304,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                else:
                    # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
                    raise APIConnectionError(
-                        message=f"{exception_provider} APIConnectionError - {message}\n{traceback.format_exc()}",
+                        message=f"{exception_provider} APIConnectionError - {message}\n{_redact_string(traceback.format_exc())}",
                        llm_provider="azure",
                        model=model,
                        litellm_debug_info=extra_information,
@@ -2431,7 +2431,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
            else:
                raise APIConnectionError(
                    message="{}\n{}".format(
-                        str(original_exception), traceback.format_exc()
+                        str(original_exception), _redact_string(traceback.format_exc())
                    ),
                    llm_provider=custom_llm_provider,
                    model=model,
@@ -2460,7 +2460,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                    setattr(e, "litellm_response_headers", litellm_response_headers)
                    raise e  # it's already mapped
            raised_exc = APIConnectionError(
-                message="{}\n{}".format(original_exception, traceback.format_exc()),
+                message="{}\n{}".format(original_exception, _redact_string(traceback.format_exc())),
                llm_provider="",
                model="",
            )
@@ -36,7 +36,7 @@ from litellm import (
    log_raw_request_response,
    turn_off_message_logging,
 )
-from litellm._logging import _is_debugging_on, verbose_logger
+from litellm._logging import _is_debugging_on, _redact_string, verbose_logger
 from litellm._uuid import uuid
 from litellm.batches.batch_utils import _handle_completed_batch
 from litellm.caching.caching import DualCache, InMemoryCache
@@ -2848,7 +2848,11 @@ class Logging(LiteLLMLoggingBaseClass):

        self.model_call_details["log_event_type"] = "failed_api_call"
        self.model_call_details["exception"] = exception
-        self.model_call_details["traceback_exception"] = traceback_exception
+        self.model_call_details["traceback_exception"] = (
+            _redact_string(traceback_exception)
+            if isinstance(traceback_exception, str)
+            else traceback_exception
+        )
        self.model_call_details["end_time"] = end_time
        self.model_call_details.setdefault("original_response", None)
        self.model_call_details["response_cost"] = 0
@@ -2871,7 +2875,7 @@ class Logging(LiteLLMLoggingBaseClass):
            end_time=end_time,
            logging_obj=self,
            status="failure",
-            error_str=str(exception),
+            error_str=_redact_string(str(exception)),
            original_exception=exception,
            standard_built_in_tools_params=self.standard_built_in_tools_params,
        )
@@ -6,7 +6,7 @@ This requires websockets, and is currently only supported on LiteLLM Proxy.

 from typing import Any, Optional, cast

-from litellm._logging import verbose_proxy_logger
+from litellm._logging import _redact_string, verbose_proxy_logger
 from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES

 from ....litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
@@ -118,7 +118,7 @@ class AzureOpenAIRealtime(AzureChatCompletion):
                await realtime_streaming.bidirectional_forward()

        except websockets.exceptions.InvalidStatusCode as e:  # type: ignore
-            await websocket.close(code=e.status_code, reason=str(e))
+            await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
        except Exception:
            verbose_proxy_logger.exception(
                "Error in AzureOpenAIRealtime.async_realtime"
@@ -8,7 +8,7 @@ import asyncio
 import json
 from typing import Any, Optional

-from litellm._logging import verbose_proxy_logger
+from litellm._logging import _redact_string, verbose_proxy_logger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging

 from ..base_aws_llm import BaseAWSLLM
@@ -152,7 +152,7 @@ class BedrockRealtime(BaseAWSLLM):
                f"Error in BedrockRealtime.async_realtime: {e}"
            )
            try:
-                await websocket.close(code=1011, reason=f"Internal error: {str(e)}")
+                await websocket.close(code=1011, reason=_redact_string(f"Internal error: {str(e)}"))
            except Exception:
                pass
            raise
@@ -316,16 +316,65 @@ def mask_sensitive_info(error_message):
    return error_message


+def _safe_get_response_text(response: httpx.Response) -> str:
+    """Safely read response text, falling back to empty string on decoding errors."""
+    try:
+        return response.text
+    except Exception:
+        return ""
+
+
+async def _safe_aread_response(response: httpx.Response) -> bytes:
+    """Safely read async response body, falling back to empty bytes on errors."""
+    try:
+        return await response.aread()
+    except Exception:
+        return b""
+
+
+def _safe_read_response(response: httpx.Response) -> bytes:
+    """Safely read sync response body, falling back to empty bytes on errors."""
+    try:
+        return response.read()
+    except Exception:
+        return b""
+
+
+def _raise_masked_sync_error(e: httpx.HTTPStatusError, stream: bool) -> None:
+    """Raise a MaskedHTTPStatusError for sync HTTP handlers."""
+    if stream:
+        _body = mask_sensitive_info(_safe_read_response(e.response))
+        raise MaskedHTTPStatusError(e, message=_body, text=_body) from None
+    _text = mask_sensitive_info(_safe_get_response_text(e.response))
+    raise MaskedHTTPStatusError(e, message=_text, text=_text) from None
+
+
+async def _raise_masked_async_error(e: httpx.HTTPStatusError, stream: bool) -> None:
+    """Raise a MaskedHTTPStatusError for async HTTP handlers."""
+    if stream:
+        _body = await _safe_aread_response(e.response)
+        raise MaskedHTTPStatusError(e, message=_body, text=_body) from None
+    _text = mask_sensitive_info(_safe_get_response_text(e.response))
+    raise MaskedHTTPStatusError(e, message=_text, text=_text) from None
+
+
 class MaskedHTTPStatusError(httpx.HTTPStatusError):
    def __init__(
        self, original_error, message: Optional[str] = None, text: Optional[str] = None
    ):
        # Create a new error with the masked URL
        masked_url = mask_sensitive_info(str(original_error.request.url))
-        # Create a new error that looks like the original, but with a masked URL
+        # Mask the original exception message too (it contains the full URL)
+        masked_original_message = mask_sensitive_info(str(original_error))
+
+        # Safely access response content — decompression can fail (e.g. zlib error)
+        try:
+            response_content = original_error.response.content
+        except Exception:
+            response_content = b""

        super().__init__(
-            message=original_error.message,
+            message=masked_original_message,
            request=httpx.Request(
                method=original_error.request.method,
                url=masked_url,
@@ -334,12 +383,13 @@ class MaskedHTTPStatusError(httpx.HTTPStatusError):
            ),
            response=httpx.Response(
                status_code=original_error.response.status_code,
-                content=original_error.response.content,
+                content=response_content,
                headers=original_error.response.headers,
            ),
        )
        self.message = message
        self.text = text
+        self.status_code = original_error.response.status_code


 class AsyncHTTPHandler:
@@ -501,16 +551,7 @@ class AsyncHTTPHandler:
                headers=headers,
            )
        except httpx.HTTPStatusError as e:
-            if stream is True:
-                setattr(e, "message", await e.response.aread())
-                setattr(e, "text", await e.response.aread())
-            else:
-                setattr(e, "message", mask_sensitive_info(e.response.text))
-                setattr(e, "text", mask_sensitive_info(e.response.text))
-
-            setattr(e, "status_code", e.response.status_code)
-
-            raise e
+            await _raise_masked_async_error(e, stream)
        except Exception as e:
            raise e

@@ -571,12 +612,7 @@ class AsyncHTTPHandler:
                headers=headers,
            )
        except httpx.HTTPStatusError as e:
-            setattr(e, "status_code", e.response.status_code)
-            if stream is True:
-                setattr(e, "message", await e.response.aread())
-            else:
-                setattr(e, "message", e.response.text)
-            raise e
+            await _raise_masked_async_error(e, stream)
        except Exception as e:
            raise e

@@ -637,12 +673,7 @@ class AsyncHTTPHandler:
                headers=headers,
            )
        except httpx.HTTPStatusError as e:
-            setattr(e, "status_code", e.response.status_code)
-            if stream is True:
-                setattr(e, "message", await e.response.aread())
-            else:
-                setattr(e, "message", e.response.text)
-            raise e
+            await _raise_masked_async_error(e, stream)
        except Exception as e:
            raise e

@@ -690,12 +721,7 @@ class AsyncHTTPHandler:
            finally:
                await new_client.aclose()
        except httpx.HTTPStatusError as e:
-            setattr(e, "status_code", e.response.status_code)
-            if stream is True:
-                setattr(e, "message", await e.response.aread())
-            else:
-                setattr(e, "message", e.response.text)
-            raise e
+            await _raise_masked_async_error(e, stream)
        except Exception as e:
            raise e

@@ -1035,16 +1061,7 @@ class HTTPHandler:
                llm_provider="litellm-httpx-handler",
            )
        except httpx.HTTPStatusError as e:
-            if stream is True:
-                setattr(e, "message", mask_sensitive_info(e.response.read()))
-                setattr(e, "text", mask_sensitive_info(e.response.read()))
-            else:
-                error_text = mask_sensitive_info(e.response.text)
-                setattr(e, "message", error_text)
-                setattr(e, "text", error_text)
-
-            setattr(e, "status_code", e.response.status_code)
-            raise e
+            _raise_masked_sync_error(e, stream)
        except Exception as e:
            raise e

@@ -1083,17 +1100,7 @@ class HTTPHandler:
                llm_provider="litellm-httpx-handler",
            )
        except httpx.HTTPStatusError as e:
-            if stream is True:
-                setattr(e, "message", mask_sensitive_info(e.response.read()))
-                setattr(e, "text", mask_sensitive_info(e.response.read()))
-            else:
-                error_text = mask_sensitive_info(e.response.text)
-                setattr(e, "message", error_text)
-                setattr(e, "text", error_text)
-
-            setattr(e, "status_code", e.response.status_code)
-
-            raise e
+            _raise_masked_sync_error(e, stream)
        except Exception as e:
            raise e

@@ -1130,6 +1137,8 @@ class HTTPHandler:
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
            )
+        except httpx.HTTPStatusError as e:
+            _raise_masked_sync_error(e, stream)
        except Exception as e:
            raise e

@@ -1168,17 +1177,7 @@ class HTTPHandler:
                llm_provider="litellm-httpx-handler",
            )
        except httpx.HTTPStatusError as e:
-            if stream is True:
-                setattr(e, "message", mask_sensitive_info(e.response.read()))
-                setattr(e, "text", mask_sensitive_info(e.response.read()))
-            else:
-                error_text = mask_sensitive_info(e.response.text)
-                setattr(e, "message", error_text)
-                setattr(e, "text", error_text)
-
-            setattr(e, "status_code", e.response.status_code)
-
-            raise e
+            _raise_masked_sync_error(e, stream)
        except Exception as e:
            raise e

@@ -22,7 +22,7 @@ import litellm
 import litellm.litellm_core_utils
 import litellm.types
 import litellm.types.utils
-from litellm._logging import verbose_logger
+from litellm._logging import _redact_string, verbose_logger
 from litellm.anthropic_beta_headers_manager import update_headers_with_filtered_beta
 from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES
 from litellm.litellm_core_utils.realtime_streaming import RealTimeStreaming
@@ -4789,12 +4789,12 @@ class BaseLLMHTTPHandler:

        except websockets.exceptions.InvalidStatusCode as e:  # type: ignore
            verbose_logger.exception(f"Error connecting to backend: {e}")
-            await websocket.close(code=e.status_code, reason=str(e))
+            await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
        except Exception as e:
            verbose_logger.exception(f"Error connecting to backend: {e}")
            try:
                await websocket.close(
-                    code=1011, reason=f"Internal server error: {str(e)}"
+                    code=1011, reason=_redact_string(f"Internal server error: {str(e)}")
                )
            except RuntimeError as close_error:
                if "already completed" in str(close_error) or "websocket.close" in str(
@@ -5076,12 +5076,12 @@ class BaseLLMHTTPHandler:

        except websockets.exceptions.InvalidStatusCode as e:  # type: ignore
            verbose_logger.exception(f"Error connecting to responses WS backend: {e}")
-            await websocket.close(code=e.status_code, reason=str(e))
+            await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
        except Exception as e:
            verbose_logger.exception(f"Error in responses WS: {e}")
            try:
                await websocket.close(
-                    code=1011, reason=f"Internal server error: {str(e)}"
+                    code=1011, reason=_redact_string(f"Internal server error: {str(e)}")
                )
            except RuntimeError as close_error:
                if "already completed" in str(close_error) or "websocket.close" in str(
@@ -28,7 +28,7 @@ class GeminiModelInfo(BaseLLMModelInfo):
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
    ) -> dict:
-        """Google AI Studio sends api key in query params"""
+        """Google AI Studio sends api key via x-goog-api-key header"""
        return headers

    @property
@@ -75,7 +75,8 @@ class GeminiModelInfo(BaseLLMModelInfo):
            )

        response = litellm.module_level_client.get(
-            url=f"{api_base}{endpoint}?key={api_key}",
+            url=f"{api_base}{endpoint}",
+            headers={"x-goog-api-key": api_key},
        )

        if response.status_code != 200:
@@ -86,7 +86,7 @@ class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
        if not final_api_key:
            raise ValueError("api_key is required")

-        url = "{}/{}?key={}".format(api_base, endpoint, final_api_key)
+        url = "{}/{}".format(api_base, endpoint)
        return url

    def get_supported_openai_params(
@@ -231,9 +231,9 @@ class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
        )
        api_base = api_base.rstrip("/")

-        url = f"{api_base}/v1beta/{file_part}?key={api_key}"
+        url = f"{api_base}/v1beta/{file_part}"

-        # Return empty params dict - API key is already in URL, no query params needed
+        # API key is passed via x-goog-api-key header (set in validate_environment)
        return url, {}

    def _normalize_gemini_file_id(self, file_id: str) -> str:
@@ -75,9 +75,13 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
        model: str,
        litellm_params: Optional[GenericLiteLLMParams],
    ) -> dict:
-        """Google AI Studio uses API key in query params, not headers."""
+        """Google AI Studio uses x-goog-api-key header for authentication."""
        headers = headers or {}
        headers["Content-Type"] = "application/json"
+        if litellm_params:
+            api_key = GeminiModelInfo.get_api_key(litellm_params.get("api_key"))
+            if api_key:
+                headers["x-goog-api-key"] = api_key
        return headers

    def get_complete_url(
@@ -98,11 +102,10 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
                "Google API key is required. Set GOOGLE_API_KEY or GEMINI_API_KEY environment variable."
            )

-        query_params = f"key={api_key}"
        if stream:
-            query_params += "&alt=sse"
+            return f"{api_base}/{self.api_version}/interactions?alt=sse"

-        return f"{api_base}/{self.api_version}/interactions?{query_params}"
+        return f"{api_base}/{self.api_version}/interactions"

    def transform_request(
        self,
@@ -200,11 +203,10 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
    ) -> Tuple[str, Dict]:
        """GET /{api_version}/interactions/{interaction_id}"""
        resolved_api_base = GeminiModelInfo.get_api_base(api_base)
-        api_key = GeminiModelInfo.get_api_key(litellm_params.api_key)
-        if not api_key:
+        if not GeminiModelInfo.get_api_key(litellm_params.api_key):
            raise ValueError("Google API key is required")
        return (
-            f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}?key={api_key}",
+            f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}",
            {},
        )

@@ -234,11 +236,10 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
    ) -> Tuple[str, Dict]:
        """DELETE /{api_version}/interactions/{interaction_id}"""
        resolved_api_base = GeminiModelInfo.get_api_base(api_base)
-        api_key = GeminiModelInfo.get_api_key(litellm_params.api_key)
-        if not api_key:
+        if not GeminiModelInfo.get_api_key(litellm_params.api_key):
            raise ValueError("Google API key is required")
        return (
-            f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}?key={api_key}",
+            f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}",
            {},
        )

@@ -265,11 +266,10 @@ class GoogleAIStudioInteractionsConfig(BaseInteractionsAPIConfig):
    ) -> Tuple[str, Dict]:
        """POST /{api_version}/interactions/{interaction_id}:cancel (if supported)"""
        resolved_api_base = GeminiModelInfo.get_api_base(api_base)
-        api_key = GeminiModelInfo.get_api_key(litellm_params.api_key)
-        if not api_key:
+        if not GeminiModelInfo.get_api_key(litellm_params.api_key):
            raise ValueError("Google API key is required")
        return (
-            f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}:cancel?key={api_key}",
+            f"{resolved_api_base}/{self.api_version}/interactions/{interaction_id}:cancel",
            {},
        )

@@ -85,6 +85,10 @@ class GeminiRealtimeConfig(BaseRealtimeConfig):
            raise ValueError("api_key is required for Gemini API calls")
        api_base = api_base.replace("https://", "wss://")
        api_base = api_base.replace("http://", "ws://")
+        # WebSocket connections do not support custom HTTP headers in all clients,
+        # so the API key must remain as a query parameter here. This is an accepted
+        # limitation; httpx is not used for WebSocket so MaskedHTTPStatusError
+        # already covers the main leak vector.
        return f"{api_base}/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key={api_key}"

    def map_model_turn_event(
@@ -48,7 +48,7 @@ class GeminiVectorStoreConfig(BaseVectorStoreConfig):
    def get_auth_credentials(
        self, litellm_params: dict
    ) -> BaseVectorStoreAuthCredentials:
-        """Gemini uses API key in query params, not headers."""
+        """Gemini uses x-goog-api-key header for authentication."""
        return {}

    def get_vector_store_endpoints_by_type(self) -> VectorStoreIndexEndpoints:
@@ -79,6 +79,7 @@ class GeminiVectorStoreConfig(BaseVectorStoreConfig):
            api_key = litellm_params.get("api_key") or get_api_key_from_env()
            if api_key:
                self._cached_api_key = api_key
+                headers["x-goog-api-key"] = api_key

        return headers

@@ -133,13 +134,10 @@ class GeminiVectorStoreConfig(BaseVectorStoreConfig):
        if model and model.startswith("gemini/"):
            model = model.replace("gemini/", "")

-        # Get API key - Gemini requires it as a query parameter
        api_key = litellm_params.get("api_key") or GeminiModelInfo.get_api_key()
        if not api_key:
            raise ValueError("GEMINI_API_KEY or GOOGLE_API_KEY is required")
-
-        # Build the URL for generateContent with API key
-        url = f"{api_base}/models/{model}:generateContent?key={api_key}"
+        url = f"{api_base}/models/{model}:generateContent"

        # Build file_search tool configuration (using snake_case as per Gemini docs)
        file_search_config: Dict[str, Any] = {
@@ -286,10 +284,7 @@ class GeminiVectorStoreConfig(BaseVectorStoreConfig):
        """
        url = f"{api_base}/fileSearchStores"

-        # Append API key as query parameter (required by Gemini)
-        api_key = self._cached_api_key or get_api_key_from_env()
-        if api_key:
-            url = f"{url}?key={api_key}"
+        # API key is passed via x-goog-api-key header (set in validate_environment)

        request_body: Dict[str, Any] = {}

@@ -6,6 +6,7 @@ This requires websockets, and is currently only supported on LiteLLM Proxy.

 from typing import Any, Optional, cast

+from litellm._logging import _redact_string
 from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES
 from litellm.types.realtime import RealtimeQueryParams

@@ -148,11 +149,11 @@ class OpenAIRealtime(OpenAIChatCompletion):
                await realtime_streaming.bidirectional_forward()

        except websockets.exceptions.InvalidStatusCode as e:  # type: ignore
-            await websocket.close(code=e.status_code, reason=str(e))
+            await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
        except Exception as e:
            try:
                await websocket.close(
-                    code=1011, reason=f"Internal server error: {str(e)}"
+                    code=1011, reason=_redact_string(f"Internal server error: {str(e)}")
                )
            except RuntimeError as close_error:
                if "already completed" in str(close_error) or "websocket.close" in str(
@@ -337,8 +337,13 @@ def _get_gemini_url(
    mode: all_gemini_url_modes,
    model: str,
    stream: Optional[bool],
-    gemini_api_key: Optional[str],
 ) -> Tuple[str, str]:
+    """Build the Gemini API URL for the given mode.
+
+    The API key is NOT included in the URL. Callers must pass it via the
+    ``x-goog-api-key`` header instead to avoid leaking credentials in
+    error tracebacks.
+    """
    from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
        VertexGeminiConfig,
    )
@@ -352,27 +357,27 @@ def _get_gemini_url(
        endpoint = "generateContent"
        if stream is True:
            endpoint = "streamGenerateContent"
-            url = "https://generativelanguage.googleapis.com/{}/{}:{}?key={}&alt=sse".format(
-                api_version, _gemini_model_name, endpoint, gemini_api_key
+            url = "https://generativelanguage.googleapis.com/{}/{}:{}?alt=sse".format(
+                api_version, _gemini_model_name, endpoint
            )
        else:
-            url = "https://generativelanguage.googleapis.com/{}/{}:{}?key={}".format(
-                api_version, _gemini_model_name, endpoint, gemini_api_key
+            url = "https://generativelanguage.googleapis.com/{}/{}:{}".format(
+                api_version, _gemini_model_name, endpoint
            )
    elif mode == "embedding":
        endpoint = "embedContent"
-        url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
-            _gemini_model_name, endpoint, gemini_api_key
+        url = "https://generativelanguage.googleapis.com/v1beta/{}:{}".format(
+            _gemini_model_name, endpoint
        )
    elif mode == "batch_embedding":
        endpoint = "batchEmbedContents"
-        url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
-            _gemini_model_name, endpoint, gemini_api_key
+        url = "https://generativelanguage.googleapis.com/v1beta/{}:{}".format(
+            _gemini_model_name, endpoint
        )
    elif mode == "count_tokens":
        endpoint = "countTokens"
-        url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
-            _gemini_model_name, endpoint, gemini_api_key
+        url = "https://generativelanguage.googleapis.com/v1beta/{}:{}".format(
+            _gemini_model_name, endpoint
        )
    elif mode == "image_generation":
        raise ValueError(
@@ -62,10 +62,10 @@ class ContextCachingEndpoints(VertexBase):
            token, url
        """
        if custom_llm_provider == "gemini":
-            auth_header = None
+            auth_header = {"x-goog-api-key": gemini_api_key}  # type: ignore[assignment]
            endpoint = "cachedContents"
-            url = "https://generativelanguage.googleapis.com/v1beta/{}?key={}".format(
-                endpoint, gemini_api_key
+            url = "https://generativelanguage.googleapis.com/v1beta/{}".format(
+                endpoint
            )
        elif custom_llm_provider == "vertex_ai":
            auth_header = vertex_auth_header
@@ -353,7 +353,9 @@ class ContextCachingEndpoints(VertexBase):
        headers = {
            "Content-Type": "application/json",
        }
-        if token is not None:
+        if isinstance(token, dict):
+            headers.update(token)
+        elif token is not None:
            headers["Authorization"] = f"Bearer {token}"
        if extra_headers is not None:
            headers.update(extra_headers)
@@ -501,7 +503,9 @@ class ContextCachingEndpoints(VertexBase):
        headers = {
            "Content-Type": "application/json",
        }
-        if token is not None:
+        if isinstance(token, dict):
+            headers.update(token)
+        elif token is not None:
            headers["Authorization"] = f"Bearer {token}"
        if extra_headers is not None:
            headers.update(extra_headers)
@@ -473,9 +473,8 @@ class VertexBase:
                mode=mode,
                model=model,
                stream=stream,
-                gemini_api_key=gemini_api_key,
            )
-            auth_header = None  # this field is not used for gemin
+            auth_header = {"x-goog-api-key": gemini_api_key}  # type: ignore[assignment]
        else:
            vertex_location = self.get_vertex_region(
                vertex_region=vertex_location,
@@ -40,6 +40,7 @@ from typing import (
    get_args,
 )

+from litellm._logging import _redact_string
 from litellm._uuid import uuid

 if TYPE_CHECKING:
@@ -7244,7 +7245,7 @@ async def ahealth_check(
                f"Mode {mode} not supported. See modes here: https://docs.litellm.ai/docs/proxy/health"
            )
    except Exception as e:
-        stack_trace = traceback.format_exc()
+        stack_trace = _redact_string(traceback.format_exc())
        if isinstance(stack_trace, str):
            stack_trace = stack_trace[:1000]

@@ -22,7 +22,7 @@ from fastapi import HTTPException, Request, status
 from fastapi.responses import JSONResponse, Response, StreamingResponse

 import litellm
-from litellm._logging import verbose_proxy_logger
+from litellm._logging import _redact_string, verbose_proxy_logger
 from litellm._uuid import uuid
 from litellm.constants import (
    DD_TRACER_STREAMING_CHUNK_YIELD_RESOURCE,
@@ -1785,7 +1785,7 @@ class ProxyBaseLLMRequestProcessing:

            if isinstance(e, HTTPException):
                raise e
-            error_traceback = traceback.format_exc()
+            error_traceback = _redact_string(traceback.format_exc())
            error_msg = f"{str(e)}\n\n{error_traceback}"
            proxy_exception = ProxyException(
                message=getattr(e, "message", error_msg),
@@ -77,7 +77,7 @@ from litellm import (
    ModelResponseStream,
    Router,
 )
-from litellm._logging import verbose_proxy_logger
+from litellm._logging import _redact_string, verbose_proxy_logger
 from litellm._service_logger import ServiceLogging, ServiceTypes
 from litellm.caching.caching import DualCache, RedisCache
 from litellm.caching.dual_cache import LimitedSizeOrderedDict
@@ -155,7 +155,7 @@ def print_verbose(print_statement):

    verbose_proxy_logger.debug("{}\n{}".format(print_statement, traceback.format_exc()))
    if litellm.set_verbose:
-        print(f"LiteLLM Proxy: {print_statement}")  # noqa
+        print(f"LiteLLM Proxy: {_redact_string(str(print_statement))}")  # noqa


 def _get_email_logger_class():
@@ -1721,6 +1721,7 @@ class ProxyLogging:
            error_message = str(original_exception)
        if isinstance(traceback_str, str):
            error_message += traceback_str[:1000]
+        error_message = _redact_string(error_message)
        asyncio.create_task(
            self.alerting_handler(
                message=f"DB read/write call failed: {error_message}",
@@ -1791,7 +1792,7 @@ class ProxyLogging:

            asyncio.create_task(
                self.alerting_handler(
-                    message=f"LLM API call failed: `{exception_str}`",
+                    message=_redact_string(f"LLM API call failed: `{exception_str}`"),
                    level="High",
                    alert_type=AlertType.llm_exceptions,
                    request_data=request_data,
@@ -143,7 +143,7 @@ class GeminiRAGIngestion(BaseRAGIngestion):
        Returns:
            Store name (format: fileSearchStores/xxxxxxx)
        """
-        url = f"{base_url}/fileSearchStores?key={api_key}"
+        url = f"{base_url}/fileSearchStores"

        request_body = {"displayName": display_name}

@@ -154,7 +154,10 @@ class GeminiRAGIngestion(BaseRAGIngestion):
        response = await client.post(
            url,
            json=request_body,
-            headers={"Content-Type": "application/json"},
+            headers={
+                "Content-Type": "application/json",
+                "x-goog-api-key": api_key,
+            },
        )

        if response.status_code != 200:
@@ -228,7 +231,7 @@ class GeminiRAGIngestion(BaseRAGIngestion):
        # base_url is like: https://generativelanguage.googleapis.com/v1beta
        # We need: https://generativelanguage.googleapis.com/upload/v1beta/{store_id}:uploadToFileSearchStore
        api_base = base_url.replace("/v1beta", "")  # Get base without version
-        url = f"{api_base}/upload/v1beta/{vector_store_id}:uploadToFileSearchStore?key={api_key}"
+        url = f"{api_base}/upload/v1beta/{vector_store_id}:uploadToFileSearchStore"

        # Build request body with chunking config and metadata if provided
        request_body: Dict[str, Any] = {"displayName": filename}
@@ -263,6 +266,7 @@ class GeminiRAGIngestion(BaseRAGIngestion):
            "X-Goog-Upload-Header-Content-Length": str(file_size),
            "X-Goog-Upload-Header-Content-Type": content_type,
            "Content-Type": "application/json",
+            "x-goog-api-key": api_key,
        }

        verbose_logger.debug(f"Initiating resumable upload: {url}")
@@ -310,7 +310,7 @@ def test_gemini_multimodal_embedding_e2e():
    ) as mock_get_token:
        mock_get_token.return_value = (
            {"x-goog-api-key": "test-key"},
-            "https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-2-preview:embedContent?key=test-key"
+            "https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-2-preview:embedContent"
        )
        
        mock_response = MagicMock()
@@ -1127,80 +1127,58 @@ async def test_google_generate_content_with_openai():
        passed_fields = passed_fields - set(GenericLiteLLMParams.model_fields.keys())
        # extra_headers is now explicitly passed through for providers that need custom headers
        assert passed_fields == set(["model", "messages", "extra_headers"]), f"Expected model, messages, and extra_headers to be passed through, got {passed_fields}"
-@pytest.mark.asyncio
-async def test_agenerate_content_x_goog_api_key_header():
+def test_validate_environment_sets_x_goog_api_key():
    """
-    Test that agenerate_content passes x-goog-api-key header correctly.
-    
-    This test verifies that when calling agenerate_content with a Google GenAI model,
-    the HTTP request includes the x-goog-api-key header with the correct API key value.
-    """
-    import os
-    import unittest.mock
+    Test that VertexGeminiConfig.validate_environment correctly merges an
+    x-goog-api-key dict into the request headers.
+
+    This is the mechanism by which Google AI Studio (Gemini) requests get
+    authenticated via header instead of a query-string ?key= parameter.
+    """
+    from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
+        VertexGeminiConfig,
+    )

-    import httpx
-    
    test_api_key = "test-gemini-api-key-123"
-    
-    # Mock environment to ensure we use our test API key
-    with unittest.mock.patch.dict(os.environ, {"GEMINI_API_KEY": test_api_key}, clear=False):
-        # Mock the AsyncHTTPHandler's post method to capture headers
-        with unittest.mock.patch("litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", new_callable=unittest.mock.AsyncMock) as mock_post:
-            # Mock a successful response
-            mock_response = unittest.mock.MagicMock()
-            mock_response.json.return_value = {
-                "candidates": [
-                    {
-                        "content": {
-                            "parts": [{"text": "Hello! How can I help you today?"}],
-                            "role": "model"
-                        },
-                        "finishReason": "STOP",
-                        "index": 0
-                    }
-                ],
-                "usageMetadata": {
-                    "promptTokenCount": 5,
-                    "candidatesTokenCount": 10,
-                    "totalTokenCount": 15
-                }
-            }
-            mock_response.status_code = 200
-            mock_response.headers = {}
-            mock_post.return_value = mock_response
-            
-            # Call agenerate_content with Google AI Studio model
-            try:
-                response = await agenerate_content(
-                    model="gemini/gemini-1.5-flash",
-                    contents=[
-                        {"role": "user", "parts": [{"text": "Hello, world!"}]}
-                    ],
-                    api_key=test_api_key
-                )
-            except Exception:
-                # Ignore any response processing errors, we just want to check the headers
-                pass
-            
-            # Verify that AsyncHTTPHandler.post was called
-            mock_post.assert_called_once()
-            
-            # Get the arguments passed to the post call
-            call_args, call_kwargs = mock_post.call_args
-            
-            # Verify that headers contain x-goog-api-key
-            headers = call_kwargs.get("headers", {})
-            assert "x-goog-api-key" in headers, f"x-goog-api-key header not found in headers: {list(headers.keys())}"
-            
-            # Verify the API key is set (could be our test key or from api_key parameter)
-            api_key_value = headers["x-goog-api-key"]
-            assert api_key_value == test_api_key, f"Expected x-goog-api-key to be {test_api_key}, got {api_key_value}"
-            
-            # Verify other expected headers
-            assert headers.get("Content-Type") == "application/json", f"Expected Content-Type application/json, got {headers.get('Content-Type')}"

-            print(f"✓ Test passed: x-goog-api-key header correctly set to {api_key_value}")
-            print(f"✓ All headers: {list(headers.keys())}")
+    # Simulate what _get_token_and_url returns for Gemini: a dict auth_header
+    auth_header_dict = {"x-goog-api-key": test_api_key}
+
+    headers = VertexGeminiConfig().validate_environment(
+        api_key=auth_header_dict,
+        headers=None,
+        model="gemini-2.5-flash",
+        messages=[],
+        optional_params={},
+        litellm_params={},
+    )
+
+    assert "x-goog-api-key" in headers, f"x-goog-api-key not in headers: {headers}"
+    assert headers["x-goog-api-key"] == test_api_key
+    assert headers["Content-Type"] == "application/json"
+
+
+def test_get_gemini_url_excludes_api_key():
+    """
+    Verify that _get_gemini_url never embeds the API key in the URL.
+
+    API keys in URLs leak through httpx error tracebacks. The key must be
+    sent via the x-goog-api-key header instead.
+    """
+    from litellm.llms.vertex_ai.common_utils import _get_gemini_url
+
+    for mode in ("chat", "embedding", "batch_embedding", "count_tokens"):
+        url, _ = _get_gemini_url(
+            mode=mode,
+            model="gemini-2.5-flash",
+            stream=False,
+        )
+        assert "key=" not in url, f"API key found in URL for mode={mode}: {url}"
+
+    # Streaming chat should only have ?alt=sse
+    url, _ = _get_gemini_url(mode="chat", model="gemini-2.5-flash", stream=True)
+    assert "key=" not in url, f"API key found in streaming URL: {url}"
+    assert "alt=sse" in url, f"Missing alt=sse in streaming URL: {url}"


 def test_inline_data_base64_image_transformation():
@@ -37,12 +37,12 @@ class TestGoogleAIStudioFilesTransformation:
            litellm_params=litellm_params,
        )

-        # Verify URL is constructed exactly as required:
-        # https://generativelanguage.googleapis.com/v1beta/files/{file_id}?key=API_KEY
+        # API key is passed via x-goog-api-key header, not in URL
        assert (
            url
-            == "https://generativelanguage.googleapis.com/v1beta/files/test123?key=test-api-key"
+            == "https://generativelanguage.googleapis.com/v1beta/files/test123"
        )
+        assert "key=" not in url

        # CRITICAL: params should be empty dict, not contain Content-Type or any other params
        # These would be incorrectly interpreted as query parameters
@@ -64,12 +64,12 @@ class TestGoogleAIStudioFilesTransformation:
            litellm_params=litellm_params,
        )

-        # Verify URL is constructed exactly as required:
-        # https://generativelanguage.googleapis.com/v1beta/files/{file_id}?key=API_KEY
+        # API key is passed via x-goog-api-key header, not in URL
        assert (
            url
-            == "https://generativelanguage.googleapis.com/v1beta/files/test123?key=test-api-key"
+            == "https://generativelanguage.googleapis.com/v1beta/files/test123"
        )
+        assert "key=" not in url

        # CRITICAL: params should be empty dict
        assert params == {}, f"Expected empty params dict, got: {params}"
@@ -79,11 +79,10 @@ class TestGoogleAIStudioFilesTransformation:

    def test_transform_retrieve_file_request_with_raw_id_only(self):
        """
-        Regression guard for the exact retrieval URL format.
+        Regression guard: API key must NOT appear in the URL.

-        If someone changes the method and stops producing:
-        https://generativelanguage.googleapis.com/v1beta/files/{file_id}?key=API_KEY
-        this test should fail.
+        The key is sent via x-goog-api-key header to prevent leaking
+        credentials in httpx error tracebacks.
        """
        file_id = "cctqueckiggb"
        litellm_params = {"api_key": "test-api-key"}
@@ -96,8 +95,9 @@ class TestGoogleAIStudioFilesTransformation:

        assert (
            url
-            == "https://generativelanguage.googleapis.com/v1beta/files/cctqueckiggb?key=test-api-key"
+            == "https://generativelanguage.googleapis.com/v1beta/files/cctqueckiggb"
        )
+        assert "key=" not in url
        assert params == {}

    @patch.dict("os.environ", {}, clear=True)
@@ -285,10 +285,10 @@ class TestGoogleAIStudioFilesTransformation:
            litellm_params={},
        )

-        # Verify URL structure
+        # Verify URL structure - API key must NOT be in URL
        assert api_base in url
        assert "upload/v1beta/files" in url
-        assert f"key={api_key}" in url
+        assert "key=" not in url

    def test_transform_delete_file_request_with_full_uri(self):
        """Test delete file request transformation with full URI"""