diff --git a/.dockerignore b/.dockerignore index 76e31546c2..a487d2a859 100644 --- a/.dockerignore +++ b/.dockerignore @@ -48,7 +48,7 @@ dist/ build/ *.egg-info/ .DS_Store -node_modules/ +**/node_modules *.log .env .env.local diff --git a/Dockerfile b/Dockerfile index 717ec2bcb7..5e93a0c627 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,7 +49,22 @@ USER root # Install runtime dependencies (libsndfile needed for audio processing on ARM64) RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile && \ - npm install -g npm@latest tar@latest + npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 && \ + # SECURITY FIX: npm bundles tar, glob, and brace-expansion at multiple nested + # levels inside its dependency tree. `npm install -g ` only creates a + # SEPARATE global package, it does NOT replace npm's internal copies. + # We must find and replace EVERY copy inside npm's directory. + GLOBAL="$(npm root -g)" && \ + find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done && \ + find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done && \ + find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done && \ + npm cache clean --force WORKDIR /app # Copy the current directory contents into the container at /app @@ -71,6 +86,20 @@ RUN NODEJS_WHEEL_NODE=$(find /usr/lib -path "*/nodejs_wheel/bin/node" 2>/dev/nul RUN find /usr/lib -type f -path "*/tornado/test/*" -delete && \ find /usr/lib -type d -path "*/tornado/test" -delete +# SECURITY FIX: nodejs-wheel-binaries (pip package used by Prisma) bundles a complete +# npm with old vulnerable deps at /usr/lib/python3.*/site-packages/nodejs_wheel/. +# Patch every copy of tar, glob, and brace-expansion inside that tree. +RUN GLOBAL="$(npm root -g)" && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/tar" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/glob" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/@isaacs/brace-expansion" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done + # Install semantic_router and aurelio-sdk using script # Convert Windows line endings to Unix and make executable RUN sed -i 's/\r$//' docker/install_auto_router.sh && chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh diff --git a/ci_cd/security_scans.sh b/ci_cd/security_scans.sh index 770610c2a3..3ffa13c444 100755 --- a/ci_cd/security_scans.sh +++ b/ci_cd/security_scans.sh @@ -155,10 +155,7 @@ run_grype_scans() { "CVE-2025-12781" # No fix available yet "CVE-2025-11468" # No fix available yet "CVE-2026-1299" # Python 3.13 email module header injection - not applicable, LiteLLM doesn't use BytesGenerator for email serialization - "GHSA-7h2j-956f-4vf2" # @isaacs/brace-expansion ReDoS - npm tooling dependency, not used in application runtime - "GHSA-hx9q-6w63-j58v" # orjson deep recursion - no fix available yet - "GHSA-8qq5-rm4j-mr97" # node-tar symlink poisoning - npm tooling dependency, tar CLI not exposed in application code - "GHSA-29xp-372q-xqph" # node-tar race condition - npm tooling dependency, tar CLI not exposed in application code + "CVE-2026-0775" # npm cli incorrect permission assignment - no fix available yet, npm is only used at build/prisma-generate time ) # Build JSON array of allowlisted CVE IDs for jq diff --git a/docker/Dockerfile.custom_ui b/docker/Dockerfile.custom_ui index 57926bcd17..177d7b7b12 100644 --- a/docker/Dockerfile.custom_ui +++ b/docker/Dockerfile.custom_ui @@ -6,7 +6,18 @@ WORKDIR /app # Install Node.js and npm (adjust version as needed) RUN apt-get update && apt-get install -y nodejs npm && \ - npm install -g npm@latest tar@latest + npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 && \ + GLOBAL="$(npm root -g)" && \ + find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done && \ + find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done && \ + find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done && \ + npm cache clean --force # Copy the UI source into the container COPY ./ui/litellm-dashboard /app/ui/litellm-dashboard diff --git a/docker/Dockerfile.database b/docker/Dockerfile.database index ecbe76446f..a6fcd98ab6 100644 --- a/docker/Dockerfile.database +++ b/docker/Dockerfile.database @@ -50,7 +50,18 @@ USER root # Install runtime dependencies RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile && \ - npm install -g npm@latest tar@latest + npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 && \ + GLOBAL="$(npm root -g)" && \ + find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done && \ + find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done && \ + find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done && \ + npm cache clean --force WORKDIR /app # Copy the current directory contents into the container at /app @@ -64,9 +75,19 @@ COPY --from=builder /wheels/ /wheels/ # Install the built wheel using pip; again using a wildcard if it's the only file RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels -# Replace the nodejs-wheel-binaries bundled node with the system node (fixes CVE-2025-55130) -RUN NODEJS_WHEEL_NODE=$(find /usr/lib -path "*/nodejs_wheel/bin/node" 2>/dev/null) && \ - if [ -n "$NODEJS_WHEEL_NODE" ]; then cp /usr/bin/node "$NODEJS_WHEEL_NODE"; fi +# SECURITY FIX: nodejs-wheel-binaries (pip package used by Prisma) bundles a complete +# npm with old vulnerable deps at /usr/lib/python3.*/site-packages/nodejs_wheel/. +# Patch every copy of tar, glob, and brace-expansion inside that tree. +RUN GLOBAL="$(npm root -g)" && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/tar" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/glob" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/@isaacs/brace-expansion" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done # Install semantic_router and aurelio-sdk using script # Convert Windows line endings to Unix and make executable diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev index ae557d4647..bc1d22d5e0 100644 --- a/docker/Dockerfile.dev +++ b/docker/Dockerfile.dev @@ -62,7 +62,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ nodejs \ npm \ && rm -rf /var/lib/apt/lists/* \ - && npm install -g npm@latest tar@latest + && npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 \ + && GLOBAL="$(npm root -g)" \ + && find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done \ + && find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done \ + && find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done \ + && npm cache clean --force WORKDIR /app @@ -80,6 +91,20 @@ RUN pip install --no-cache-dir *.whl /wheels/* --no-index --find-links=/wheels/ rm -f *.whl && \ rm -rf /wheels +# SECURITY FIX: nodejs-wheel-binaries (pip package used by Prisma) bundles a complete +# npm with old vulnerable deps at /usr/lib/python3.*/site-packages/nodejs_wheel/. +# Patch every copy of tar, glob, and brace-expansion inside that tree. +RUN GLOBAL="$(npm root -g)" && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/tar" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/glob" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/@isaacs/brace-expansion" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done + # Generate prisma client and set permissions # Convert Windows line endings to Unix for entrypoint scripts RUN prisma generate && \ diff --git a/docker/Dockerfile.non_root b/docker/Dockerfile.non_root index 4b09755ed7..64126bb029 100644 --- a/docker/Dockerfile.non_root +++ b/docker/Dockerfile.non_root @@ -104,7 +104,18 @@ RUN for i in 1 2 3; do \ && for i in 1 2 3; do \ apk add --no-cache python3 py3-pip bash openssl tzdata nodejs npm supervisor && break || sleep 5; \ done \ - && npm install -g npm@latest tar@latest + && npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 \ + && GLOBAL="$(npm root -g)" \ + && find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done \ + && find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done \ + && find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done \ + && npm cache clean --force # Copy artifacts from builder COPY --from=builder /app/requirements.txt /app/requirements.txt @@ -146,9 +157,19 @@ RUN pip install --no-index --find-links=/wheels/ -r requirements.txt && \ fi; \ fi -# Replace the nodejs-wheel-binaries bundled node with the system node (fixes CVE-2025-55130) -RUN NODEJS_WHEEL_NODE=$(find /usr/lib -path "*/nodejs_wheel/bin/node" 2>/dev/null) && \ - if [ -n "$NODEJS_WHEEL_NODE" ]; then cp /usr/bin/node "$NODEJS_WHEEL_NODE"; fi +# SECURITY FIX: nodejs-wheel-binaries (pip package used by Prisma) bundles a complete +# npm with old vulnerable deps at /usr/lib/python3.*/site-packages/nodejs_wheel/. +# Patch every copy of tar, glob, and brace-expansion inside that tree. +RUN GLOBAL="$(npm root -g)" && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/tar" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \ + done && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/glob" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \ + done && \ + find /usr/lib -path "*/nodejs_wheel/*/node_modules/@isaacs/brace-expansion" -type d | while read d; do \ + rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \ + done # Permissions, cleanup, and Prisma prep # Convert Windows line endings to Unix for entrypoint scripts diff --git a/docs/my-website/docs/providers/litellm_proxy.md b/docs/my-website/docs/providers/litellm_proxy.md index bfefc8a787..918ac6755a 100644 --- a/docs/my-website/docs/providers/litellm_proxy.md +++ b/docs/my-website/docs/providers/litellm_proxy.md @@ -227,6 +227,28 @@ response = litellm.completion( ) ``` +## OAuth2/JWT Authentication + +If your LiteLLM Proxy requires OAuth2/JWT authentication (e.g., Azure AD, Keycloak, Okta), the SDK can automatically obtain and refresh tokens for you. + +```python +import litellm +from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler + +litellm.proxy_auth = ProxyAuthHandler( + credential=AzureADCredential(), + scope="api://my-litellm-proxy/.default" +) +litellm.api_base = "https://my-proxy.example.com" + +response = litellm.completion( + model="gpt-4", + messages=[{"role": "user", "content": "Hello!"}] +) +``` + +[Learn more about SDK Proxy Authentication (OAuth2/JWT Auto-Refresh) →](../proxy_auth) + ## Sending `tags` to LiteLLM Proxy Tags allow you to categorize and track your API requests for monitoring, debugging, and analytics purposes. You can send tags as a list of strings to the LiteLLM Proxy using the `extra_body` parameter. diff --git a/docs/my-website/docs/proxy/guardrails/zscaler_ai_guard.md b/docs/my-website/docs/proxy/guardrails/zscaler_ai_guard.md index 94f31c3bfd..2e62600423 100644 --- a/docs/my-website/docs/proxy/guardrails/zscaler_ai_guard.md +++ b/docs/my-website/docs/proxy/guardrails/zscaler_ai_guard.md @@ -100,7 +100,7 @@ In cases where encounter other errors when apply Zscaler AI Guard, return exampl } } ``` -## 6. Sending User Information to Zscaler AI Guard for Analysis (Optional) +## 6. Sending User Information to Zscaler AI Guard (Optional) If you need to send end-user information to Zscaler AI Guard for analysis, you can set the configuration in the environment variables to True and include the relevant information in custom_headers on Zscaler AI Guard. - To send user_api_key_alias: @@ -133,4 +133,30 @@ curl -i http://localhost:8165/v1/chat/completions \ "zguard_policy_id": } }' +``` + +## 8. Set Custom Zscaler AI Guard Policy on Litellm Team OR Key Metadata (Optional) +In addition to setting `zguard_policy_id` in a request or the configuration file, you can also set it in the metadata for LiteLLM Team or Key. The `zguard_policy_id` is determined using the following order of precedence: request, Key, Team, config file. This logic is illustrated below: +``` +user_api_key_metadata = metadata.get("user_api_key_metadata", {}) or {} +team_metadata = metadata.get("team_metadata", {}) or {} +policy_id = ( + metadata.get("zguard_policy_id") + if "zguard_policy_id" in metadata + else ( + user_api_key_metadata.get("zguard_policy_id") + if "zguard_policy_id" in user_api_key_metadata + else ( + team_metadata.get("zguard_policy_id") + if "zguard_policy_id" in team_metadata + else self.policy_id + ) + ) + ) +``` +You can leverage this feature to apply multiple policies configured on the Zscaler AI Guard (ZGuard) to traffic from different applications. (Note: It is recommended to map policies using either Team or Key metadata, but not a mix of both.) + +Example set in Team/Key Metadata, you can set From UI: +``` +{"zguard_policy_id": 100} ``` \ No newline at end of file diff --git a/docs/my-website/docs/proxy_auth.md b/docs/my-website/docs/proxy_auth.md new file mode 100644 index 0000000000..91084b34a3 --- /dev/null +++ b/docs/my-website/docs/proxy_auth.md @@ -0,0 +1,333 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# SDK Proxy Authentication (OAuth2/JWT Auto-Refresh) + +Automatically obtain and refresh OAuth2/JWT tokens when using the LiteLLM Python SDK with a LiteLLM Proxy that requires JWT authentication. + +## Overview + +When your LiteLLM Proxy is protected by an OAuth2/OIDC provider (Azure AD, Keycloak, Okta, Auth0, etc.), your SDK clients need valid JWT tokens for every request. Instead of manually managing token lifecycle, `litellm.proxy_auth` handles this automatically: + +- Obtains tokens from your identity provider +- Caches tokens to avoid unnecessary requests +- Refreshes tokens before they expire (60-second buffer) +- Injects `Authorization: Bearer ` headers into every request + +## Quick Start + +### Azure AD + + + + +Uses the [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential) chain (environment variables, managed identity, Azure CLI, etc.): + +```python +import litellm +from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler + +# One-time setup +litellm.proxy_auth = ProxyAuthHandler( + credential=AzureADCredential(), # uses DefaultAzureCredential + scope="api://my-litellm-proxy/.default" +) +litellm.api_base = "https://my-proxy.example.com" + +# All requests now include Authorization headers automatically +response = litellm.completion( + model="gpt-4", + messages=[{"role": "user", "content": "Hello!"}] +) +``` + + + + +Use a specific Azure AD app registration: + +```python +import litellm +from azure.identity import ClientSecretCredential +from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler + +azure_cred = ClientSecretCredential( + tenant_id="your-tenant-id", + client_id="your-client-id", + client_secret="your-client-secret" +) + +litellm.proxy_auth = ProxyAuthHandler( + credential=AzureADCredential(credential=azure_cred), + scope="api://my-litellm-proxy/.default" +) +litellm.api_base = "https://my-proxy.example.com" + +response = litellm.completion( + model="gpt-4", + messages=[{"role": "user", "content": "Hello!"}] +) +``` + + + + +**Required package:** `pip install azure-identity` + +### Generic OAuth2 (Okta, Auth0, Keycloak, etc.) + +Works with any OAuth2 provider that supports the `client_credentials` grant type: + +```python +import litellm +from litellm.proxy_auth import GenericOAuth2Credential, ProxyAuthHandler + +litellm.proxy_auth = ProxyAuthHandler( + credential=GenericOAuth2Credential( + client_id="your-client-id", + client_secret="your-client-secret", + token_url="https://your-idp.example.com/oauth2/token" + ), + scope="litellm_proxy_api" +) +litellm.api_base = "https://my-proxy.example.com" + +response = litellm.completion( + model="gpt-4", + messages=[{"role": "user", "content": "Hello!"}] +) +``` + +### Custom Credential Provider + +Implement the `TokenCredential` protocol to use any authentication mechanism: + +```python +import time +import litellm +from litellm.proxy_auth import AccessToken, ProxyAuthHandler + +class MyCustomCredential: + """Any class with a get_token(scope) -> AccessToken method works.""" + + def get_token(self, scope: str) -> AccessToken: + # Your custom logic to obtain a token + token = my_auth_system.get_jwt(scope=scope) + return AccessToken( + token=token, + expires_on=int(time.time()) + 3600 + ) + +litellm.proxy_auth = ProxyAuthHandler( + credential=MyCustomCredential(), + scope="my-scope" +) +``` + +## Supported Endpoints + +Auth headers are automatically injected for: + +| Endpoint | Function | +|----------|----------| +| Chat Completions | `litellm.completion()` / `litellm.acompletion()` | +| Embeddings | `litellm.embedding()` / `litellm.aembedding()` | + +## How It Works + +``` +┌──────────┐ ┌──────────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Your │ │ ProxyAuthHandler │ │ Identity │ │ LiteLLM │ +│ Code │────▶│ (token cache) │────▶│ Provider │ │ Proxy │ +│ │ │ │◀────│ (Azure AD, │ │ │ +│ │ │ │ │ Okta, etc) │ │ │ +│ │ └────────┬─────────┘ └──────────────┘ │ │ +│ │ │ Authorization: Bearer │ │ +│ │──────────────┼───────────────────────────────────▶│ │ +│ │◀─────────────┼────────────────────────────────────│ │ +└──────────┘ │ └──────────────┘ +``` + +1. You set `litellm.proxy_auth` once at startup +2. On each SDK call (`completion()`, `embedding()`), the handler checks its cached token +3. If the token is missing or expires within 60 seconds, it requests a new one from your identity provider +4. The `Authorization: Bearer ` header is injected into the request +5. If token retrieval fails, a warning is logged and the request proceeds without auth headers + +## API Reference + +### ProxyAuthHandler + +The main handler that manages the token lifecycle. + +```python +from litellm.proxy_auth import ProxyAuthHandler + +handler = ProxyAuthHandler( + credential=, # required - credential provider + scope="" # required - OAuth2 scope to request +) +``` + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `credential` | `TokenCredential` | Yes | A credential provider (AzureADCredential, GenericOAuth2Credential, or custom) | +| `scope` | `str` | Yes | The OAuth2 scope to request tokens for | + +**Methods:** + +| Method | Returns | Description | +|--------|---------|-------------| +| `get_token()` | `AccessToken` | Get a valid token, refreshing if needed | +| `get_auth_headers()` | `dict` | Get `{"Authorization": "Bearer "}` headers | + +### AzureADCredential + +Wraps any `azure-identity` credential with lazy initialization. + +```python +from litellm.proxy_auth import AzureADCredential + +# Uses DefaultAzureCredential (recommended) +cred = AzureADCredential() + +# Or wrap a specific azure-identity credential +from azure.identity import ManagedIdentityCredential +cred = AzureADCredential(credential=ManagedIdentityCredential()) +``` + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `credential` | Azure `TokenCredential` | No | An azure-identity credential. If `None`, uses `DefaultAzureCredential` | + +### GenericOAuth2Credential + +Standard OAuth2 client credentials flow for any provider. + +```python +from litellm.proxy_auth import GenericOAuth2Credential + +cred = GenericOAuth2Credential( + client_id="your-client-id", + client_secret="your-client-secret", + token_url="https://your-idp.com/oauth2/token" +) +``` + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `client_id` | `str` | Yes | OAuth2 client ID | +| `client_secret` | `str` | Yes | OAuth2 client secret | +| `token_url` | `str` | Yes | Token endpoint URL | + +### AccessToken + +Dataclass representing an OAuth2 access token. + +```python +from litellm.proxy_auth import AccessToken + +token = AccessToken( + token="eyJhbG...", # JWT string + expires_on=1234567890 # Unix timestamp +) +``` + +### TokenCredential Protocol + +Any class implementing this protocol can be used as a credential provider: + +```python +from litellm.proxy_auth import AccessToken + +class MyCredential: + def get_token(self, scope: str) -> AccessToken: + ... +``` + +## Provider-Specific Examples + +### Keycloak + +```python +from litellm.proxy_auth import GenericOAuth2Credential, ProxyAuthHandler + +litellm.proxy_auth = ProxyAuthHandler( + credential=GenericOAuth2Credential( + client_id="litellm-client", + client_secret="your-keycloak-client-secret", + token_url="https://keycloak.example.com/realms/your-realm/protocol/openid-connect/token" + ), + scope="openid" +) +``` + +### Okta + +```python +from litellm.proxy_auth import GenericOAuth2Credential, ProxyAuthHandler + +litellm.proxy_auth = ProxyAuthHandler( + credential=GenericOAuth2Credential( + client_id="your-okta-client-id", + client_secret="your-okta-client-secret", + token_url="https://your-org.okta.com/oauth2/default/v1/token" + ), + scope="litellm_api" +) +``` + +### Auth0 + +```python +from litellm.proxy_auth import GenericOAuth2Credential, ProxyAuthHandler + +litellm.proxy_auth = ProxyAuthHandler( + credential=GenericOAuth2Credential( + client_id="your-auth0-client-id", + client_secret="your-auth0-client-secret", + token_url="https://your-tenant.auth0.com/oauth/token" + ), + scope="https://my-proxy.example.com/api" +) +``` + +### Azure AD with Managed Identity + +```python +from azure.identity import ManagedIdentityCredential +from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler + +litellm.proxy_auth = ProxyAuthHandler( + credential=AzureADCredential( + credential=ManagedIdentityCredential() + ), + scope="api://my-litellm-proxy/.default" +) +``` + +## Combining with `use_litellm_proxy` + +You can use `proxy_auth` together with [`use_litellm_proxy`](./providers/litellm_proxy#send-all-sdk-requests-to-litellm-proxy) to route all SDK requests through an authenticated proxy: + +```python +import os +import litellm +from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler + +# Route all requests through the proxy +os.environ["LITELLM_PROXY_API_BASE"] = "https://my-proxy.example.com" +litellm.use_litellm_proxy = True + +# Authenticate with OAuth2/JWT +litellm.proxy_auth = ProxyAuthHandler( + credential=AzureADCredential(), + scope="api://my-litellm-proxy/.default" +) + +# This request goes through the proxy with automatic JWT auth +response = litellm.completion( + model="vertex_ai/gemini-2.0-flash-001", + messages=[{"role": "user", "content": "Hello!"}] +) +``` diff --git a/docs/my-website/docs/tutorials/claude_code_prompt_cache_routing.md b/docs/my-website/docs/tutorials/claude_code_prompt_cache_routing.md new file mode 100644 index 0000000000..bbb2948985 --- /dev/null +++ b/docs/my-website/docs/tutorials/claude_code_prompt_cache_routing.md @@ -0,0 +1,43 @@ +# Claude Code - Prompt Cache Routing + +Claude's [Prompt Caching](https://platform.claude.com/docs/en/build-with-claude/prompt-caching) feature helps to optimize API usage through attempting to cache prompts and re-use cached prompts during subsequent API calls. This feature is used by Claude Code. + +When LiteLLM [load balancing](../proxy/load_balancing.md) is enabled, to ensure this prompt caching feature still works with Claude Code, LiteLLM needs to be configured to use the `PromptCachingDeploymentCheck` pre-call check. This pre-call check will ensure that API calls that used prompt caching are remembered and that subsequent API calls that try to use that prompt caching are routed to the same model deployment where a cache write occurred. + +## Set Up + +1. Configure the router so that it uses the `PromptCachingDeploymentCheck` (via setting the `optional_pre_call_checks` property), and configure the models so that they can access multiple deployments of Claude; below, we show an example for multiple AWS accounts (referred to as `account-1` and `account-2`, using the `aws_profile_name` property): +```yaml +router_settings: + optional_pre_call_checks: ["prompt_caching"] + +model_list: +- litellm_params: + model: us.anthropic.claude-sonnet-4-5-20250929-v1:0 + aws_profile_name: account-1 + aws_region_name: us-west-2 + model_info: + litellm_provider: bedrock + model_name: us.anthropic.claude-sonnet-4-5-20250929-v1:0 +- litellm_params: + model: us.anthropic.claude-sonnet-4-5-20250929-v1:0 + aws_profile_name: account-2 + aws_region_name: us-west-2 + model_info: + litellm_provider: bedrock + model_name: us.anthropic.claude-sonnet-4-5-20250929-v1:0 +``` +2. Utilize Claude Code: + 1. Launch Claude Code, which will do a warm-up API call that tries to cache its warm-up prompt and its system prompt. + 2. Wait a few seconds, then quit Claude Code and re-open it. + 3. You'll notice that the warm-up API call successfully gets a cache hit (if using Claude Code in an IDE like VS Code, ensure that you don't do anything between step 2.1 and 2.2 here, otherwise there may not be a cache hit): + 1. Go to the [LiteLLM Request Logs page](../proxy/ui_logs.md) in the Admin UI + 2. Click on the individual requests to see (a) the cache creation and cache read tokens; and (b) the Model ID. In particular, the API call from step 2.1 should show a cache write, and the API call from step 2.2 should show a cache read; in addition, the Model ID should be equal (meaning the API call is getting forwarded to the same AWS account). + +## Related + +- [Claude Code - Quickstart](./claude_responses_api.md) +- [Claude Code - Customer Tracking](./claude_code_customer_tracking.md) +- [Claude Code - Plugin Marketplace](./claude_code_plugin_marketplace.md) +- [Claude Code - WebSearch](./claude_code_websearch.md) +- [Proxy - Load Balancing](../proxy/load_balancing.md) diff --git a/docs/my-website/package.json b/docs/my-website/package.json index 4c3db68056..4af7a168f8 100644 --- a/docs/my-website/package.json +++ b/docs/my-website/package.json @@ -61,6 +61,8 @@ "mermaid": ">=11.10.0", "gray-matter": "4.0.3", "glob": ">=11.1.0", + "tar": ">=7.5.7", + "@isaacs/brace-expansion": ">=5.0.1", "node-forge": ">=1.3.2", "mdast-util-to-hast": ">=13.2.1", "lodash-es": ">=4.17.23" diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 6c354b7c04..343860cb15 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -125,6 +125,7 @@ const sidebars = { "tutorials/claude_responses_api", "tutorials/claude_code_max_subscription", "tutorials/claude_code_customer_tracking", + "tutorials/claude_code_prompt_cache_routing", "tutorials/claude_code_websearch", "tutorials/claude_mcp", "tutorials/claude_non_anthropic_models", @@ -223,6 +224,7 @@ const sidebars = { label: "Configuration", items: [ "set_keys", + "proxy_auth", "caching/all_caches", ], }, diff --git a/litellm-js/spend-logs/package.json b/litellm-js/spend-logs/package.json index 9c1c2d4f6d..6729256714 100644 --- a/litellm-js/spend-logs/package.json +++ b/litellm-js/spend-logs/package.json @@ -11,6 +11,8 @@ "tsx": "^4.7.1" }, "overrides": { - "glob": ">=11.1.0" + "glob": ">=11.1.0", + "tar": ">=7.5.7", + "@isaacs/brace-expansion": ">=5.0.1" } } diff --git a/litellm-proxy-extras/litellm_proxy_extras/migrations/20260209085821_add_verificationtoken_indexes/migration.sql b/litellm-proxy-extras/litellm_proxy_extras/migrations/20260209085821_add_verificationtoken_indexes/migration.sql new file mode 100644 index 0000000000..572eea9b52 --- /dev/null +++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20260209085821_add_verificationtoken_indexes/migration.sql @@ -0,0 +1,8 @@ +-- CreateIndex +CREATE INDEX "LiteLLM_VerificationToken_user_id_team_id_idx" ON "LiteLLM_VerificationToken"("user_id", "team_id"); + +-- CreateIndex +CREATE INDEX "LiteLLM_VerificationToken_team_id_idx" ON "LiteLLM_VerificationToken"("team_id"); + +-- CreateIndex +CREATE INDEX "LiteLLM_VerificationToken_budget_reset_at_expires_idx" ON "LiteLLM_VerificationToken"("budget_reset_at", "expires"); diff --git a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma index c2a599c178..b1ca1f71c9 100644 --- a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma +++ b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma @@ -310,6 +310,16 @@ model LiteLLM_VerificationToken { litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) object_permission LiteLLM_ObjectPermissionTable? @relation(fields: [object_permission_id], references: [object_permission_id]) + + // SELECT COUNT(*) FROM (SELECT "public"."LiteLLM_VerificationToken"."token" FROM "public"."LiteLLM_VerificationToken" WHERE ("public"."LiteLLM_VerificationToken"."user_id" = $1 AND ("public"."LiteLLM_VerificationToken"."team_id" IS NULL OR "public"."LiteLLM_VerificationToken"."team_id" <> $2)) OFFSET $3 ) AS "sub" + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."user_id" = $1 OFFSET $2 + @@index([user_id, team_id]) + + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."team_id" = $1 OFFSET $2 + @@index([team_id]) + + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE (("public"."LiteLLM_VerificationToken"."expires" IS NULL OR "public"."LiteLLM_VerificationToken"."expires" > $1) AND "public"."LiteLLM_VerificationToken"."budget_reset_at" < $2) OFFSET $3 + @@index([budget_reset_at, expires]) } // Audit table for deleted keys - preserves spend and key information for historical tracking diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 127b0e53fa..64e0b26a8e 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -45,7 +45,14 @@ from litellm.llms.custom_httpx.http_handler import ( httpxSpecialProvider, ) from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus -from litellm.types.integrations.datadog import * +from litellm.types.integrations.datadog import ( + DD_ERRORS, + DD_MAX_BATCH_SIZE, + DataDogStatus, + DatadogInitParams, + DatadogPayload, + DatadogProxyFailureHookJsonMessage, +) from litellm.types.services import ServiceLoggerPayload, ServiceTypes from litellm.types.utils import StandardLoggingPayload @@ -85,12 +92,14 @@ class DataDogLogger( """ try: verbose_logger.debug("Datadog: in init datadog logger") - + self.is_mock_mode = should_use_datadog_mock() - + if self.is_mock_mode: create_mock_datadog_client() - verbose_logger.debug("[DATADOG MOCK] Datadog logger initialized in mock mode") + verbose_logger.debug( + "[DATADOG MOCK] Datadog logger initialized in mock mode" + ) ######################################################### # Handle datadog_params set as litellm.datadog_params @@ -209,6 +218,96 @@ class DataDogLogger( ) pass + async def async_post_call_failure_hook( + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: Any, + traceback_str: Optional[str] = None, + ) -> Optional[Any]: + """ + Log proxy-level failures (e.g. 401 auth, DB connection errors) to Datadog. + + Ensures failures that occur before or outside the LLM completion flow + (e.g. ConnectError during auth when DB is down) are visible in Datadog + alongside Prometheus. + """ + try: + from litellm.litellm_core_utils.litellm_logging import ( + StandardLoggingPayloadSetup, + ) + from litellm.litellm_core_utils.safe_json_dumps import safe_dumps + + error_information = StandardLoggingPayloadSetup.get_error_information( + original_exception=original_exception, + traceback_str=traceback_str, + ) + _code = error_information.get("error_code") or "" + status_code: Optional[int] = None + if _code and str(_code).strip().isdigit(): + status_code = int(_code) + + # Use project-standard sanitized user context when running in proxy + user_context: Dict[str, Any] = {} + try: + from litellm.proxy.litellm_pre_call_utils import ( + LiteLLMProxyRequestSetup, + ) + + _meta = ( + LiteLLMProxyRequestSetup.get_sanitized_user_information_from_key( + user_api_key_dict=user_api_key_dict + ) + ) + user_context = dict(_meta) if isinstance(_meta, dict) else _meta + except Exception: + # Fallback if proxy not available (e.g. SDK-only): minimal safe fields + if hasattr(user_api_key_dict, "request_route"): + user_context["request_route"] = getattr( + user_api_key_dict, "request_route", None + ) + if hasattr(user_api_key_dict, "team_id"): + user_context["team_id"] = getattr( + user_api_key_dict, "team_id", None + ) + if hasattr(user_api_key_dict, "user_id"): + user_context["user_id"] = getattr( + user_api_key_dict, "user_id", None + ) + if hasattr(user_api_key_dict, "end_user_id"): + user_context["end_user_id"] = getattr( + user_api_key_dict, "end_user_id", None + ) + + message_payload: DatadogProxyFailureHookJsonMessage = { + "exception": error_information.get("error_message") + or str(original_exception), + "error_class": error_information.get("error_class") + or original_exception.__class__.__name__, + "status_code": status_code, + "traceback": error_information.get("traceback") or "", + "user_api_key_dict": user_context, + } + + dd_payload = DatadogPayload( + ddsource=get_datadog_source(), + ddtags=get_datadog_tags(), + hostname=get_datadog_hostname(), + message=safe_dumps(message_payload), + service=get_datadog_service(), + status=DataDogStatus.ERROR, + ) + self._add_trace_context_to_payload(dd_payload=dd_payload) + self.log_queue.append(dd_payload) + + if len(self.log_queue) >= self.batch_size: + await self.async_send_batch() + except Exception as e: + verbose_logger.exception( + f"Datadog: async_post_call_failure_hook - {str(e)}\n{traceback.format_exc()}" + ) + return None + async def async_send_batch(self): """ Sends the in memory logs queue to datadog api @@ -230,9 +329,11 @@ class DataDogLogger( len(self.log_queue), self.intake_url, ) - + if self.is_mock_mode: - verbose_logger.debug("[DATADOG MOCK] Mock mode enabled - API calls will be intercepted") + verbose_logger.debug( + "[DATADOG MOCK] Mock mode enabled - API calls will be intercepted" + ) response = await self.async_send_compressed_data(self.log_queue) if response.status_code == 413: diff --git a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py index 53252df0a2..76c7246b87 100644 --- a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py +++ b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py @@ -1,6 +1,6 @@ import base64 import time -from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast from litellm.types.llms.openai import ( ChatCompletionAssistantContentValue, @@ -326,10 +326,22 @@ class ChunkProcessor: thinking_blocks: List[ Union["ChatCompletionThinkingBlock", "ChatCompletionRedactedThinkingBlock"] ] = [] - combined_thinking_text: Optional[str] = None - data: Optional[str] = None - signature: Optional[str] = None - type: Literal["thinking", "redacted_thinking"] = "thinking" + current_thinking_text_parts: List[str] = [] + current_signature: Optional[str] = None + + def _flush_thinking_block() -> None: + nonlocal current_thinking_text_parts, current_signature + if len(current_thinking_text_parts) > 0 and current_signature: + thinking_blocks.append( + ChatCompletionThinkingBlock( + type="thinking", + thinking="".join(current_thinking_text_parts), + signature=current_signature, + ) + ) + current_thinking_text_parts = [] + current_signature = None + for chunk in chunks: choices = chunk["choices"] for choice in choices: @@ -339,33 +351,25 @@ class ChunkProcessor: for thinking_block in thinking: thinking_type = thinking_block.get("type", None) if thinking_type and thinking_type == "redacted_thinking": - type = "redacted_thinking" - data = thinking_block.get("data", None) + _flush_thinking_block() + redacted_data = thinking_block.get("data", None) + if redacted_data: + thinking_blocks.append( + ChatCompletionRedactedThinkingBlock( + type="redacted_thinking", + data=redacted_data, + ) + ) else: - type = "thinking" thinking_text = thinking_block.get("thinking", None) if thinking_text: - if combined_thinking_text is None: - combined_thinking_text = "" - - combined_thinking_text += thinking_text + current_thinking_text_parts.append(thinking_text) signature = thinking_block.get("signature", None) + if signature: + current_signature = signature + _flush_thinking_block() - if combined_thinking_text and type == "thinking" and signature: - thinking_blocks.append( - ChatCompletionThinkingBlock( - type=type, - thinking=combined_thinking_text, - signature=signature, - ) - ) - elif data and type == "redacted_thinking": - thinking_blocks.append( - ChatCompletionRedactedThinkingBlock( - type=type, - data=data, - ) - ) + _flush_thinking_block() if len(thinking_blocks) > 0: return thinking_blocks diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py index 9e28c13968..7d93e18409 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py @@ -46,8 +46,12 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig): "thinking", "context_management", "output_format", +<<<<<<< litellm_v1_messages_claude_4_6 "inference_geo", "speed", +======= + "output_config", +>>>>>>> main # TODO: Add Anthropic `metadata` support # "metadata", ] diff --git a/litellm/llms/oci/chat/transformation.py b/litellm/llms/oci/chat/transformation.py index 84f39ef252..e66394ae5f 100644 --- a/litellm/llms/oci/chat/transformation.py +++ b/litellm/llms/oci/chat/transformation.py @@ -218,6 +218,7 @@ class OCIChatConfig(BaseConfig): "parallel_tool_calls": False, "audio": False, "web_search_options": False, + "response_format": "responseFormat", } # Cohere and Gemini use the same parameter mapping as GENERIC @@ -269,6 +270,9 @@ class OCIChatConfig(BaseConfig): adapted_params[alias] = value + if alias == "responseFormat": + adapted_params["response_format"] = value + return adapted_params def _sign_with_oci_signer( @@ -673,6 +677,36 @@ class OCIChatConfig(BaseConfig): selected_params["tools"] = adapt_tool_definition_to_oci_standard( # type: ignore[assignment] selected_params["tools"], vendor # type: ignore[arg-type] ) + + # Transform response_format type to OCI uppercase format + if "responseFormat" in selected_params: + rf = selected_params["responseFormat"] + if isinstance(rf, dict) and "type" in rf: + rf_payload = dict(rf) + selected_params["responseFormat"] = rf_payload + + response_type = rf_payload["type"] + schema_payload: Optional[Any] = None + + if "json_schema" in rf_payload: + raw_schema_payload = rf_payload.pop("json_schema") + if isinstance(raw_schema_payload, dict): + schema_payload = dict(raw_schema_payload) + else: + schema_payload = raw_schema_payload + + if schema_payload is not None: + rf_payload["jsonSchema"] = schema_payload + + if vendor == OCIVendors.COHERE: + # Cohere expects lower-case type values + rf_payload["type"] = response_type + else: + format_type = response_type.upper() + if format_type == "JSON": + format_type = "JSON_OBJECT" + rf_payload["type"] = format_type + return selected_params def adapt_messages_to_cohere_standard(self, messages: List[AllMessageValues]) -> List[CohereMessage]: @@ -806,11 +840,12 @@ class OCIChatConfig(BaseConfig): # Create Cohere-specific chat request + optional_cohere_params = self._get_optional_params(OCIVendors.COHERE, optional_params) chat_request = CohereChatRequest( apiFormat="COHERE", message=self._extract_text_content(user_messages[-1]["content"]), chatHistory=self.adapt_messages_to_cohere_standard(messages), - **self._get_optional_params(OCIVendors.COHERE, optional_params) + **optional_cohere_params ) data = OCICompletionPayload( diff --git a/litellm/llms/openai/videos/transformation.py b/litellm/llms/openai/videos/transformation.py index 3073b22e1c..0dd7940a92 100644 --- a/litellm/llms/openai/videos/transformation.py +++ b/litellm/llms/openai/videos/transformation.py @@ -269,26 +269,27 @@ class OpenAIVideoConfig(BaseVideoConfig): ) -> Tuple[str, Dict]: """ Transform the video list request for OpenAI API. - + OpenAI API expects the following request: - GET /v1/videos """ # Use the api_base directly for video list url = api_base - + # Prepare query parameters params = {} if after is not None: - params["after"] = after + # Decode the wrapped video ID back to the original provider ID + params["after"] = extract_original_video_id(after) if limit is not None: params["limit"] = str(limit) if order is not None: params["order"] = order - + # Add any extra query parameters if extra_query: params.update(extra_query) - + return url, params def transform_video_list_response( @@ -296,18 +297,40 @@ class OpenAIVideoConfig(BaseVideoConfig): raw_response: httpx.Response, logging_obj: LiteLLMLoggingObj, custom_llm_provider: Optional[str] = None, - ) -> Dict[str,str]: + ) -> Dict[str, str]: response_data = raw_response.json() - + if custom_llm_provider and "data" in response_data: for video_obj in response_data.get("data", []): if isinstance(video_obj, dict) and "id" in video_obj: video_obj["id"] = encode_video_id_with_provider( - video_obj["id"], - custom_llm_provider, - video_obj.get("model") + video_obj["id"], + custom_llm_provider, + video_obj.get("model"), ) - + + # Encode pagination cursor IDs so they remain consistent + # with the wrapped data[].id format + data_list = response_data.get("data", []) + if response_data.get("first_id"): + first_model = None + if data_list and isinstance(data_list[0], dict): + first_model = data_list[0].get("model") + response_data["first_id"] = encode_video_id_with_provider( + response_data["first_id"], + custom_llm_provider, + first_model, + ) + if response_data.get("last_id"): + last_model = None + if data_list and isinstance(data_list[-1], dict): + last_model = data_list[-1].get("model") + response_data["last_id"] = encode_video_id_with_provider( + response_data["last_id"], + custom_llm_provider, + last_model, + ) + return response_data def transform_video_delete_request( diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py index 0b728d88e7..6a5b934661 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py @@ -56,34 +56,36 @@ class VertexAIAnthropicConfig(AnthropicConfig): ) -> None: """ Add context_management beta headers to the beta_set. - + - If any edit has type "compact_20260112", add compact-2026-01-12 header - For all other edits, add context-management-2025-06-27 header - + Args: beta_set: Set of beta headers to modify in-place context_management: The context_management dict from optional_params """ from litellm.types.llms.anthropic import ANTHROPIC_BETA_HEADER_VALUES - + edits = context_management.get("edits", []) has_compact = False has_other = False - + for edit in edits: edit_type = edit.get("type", "") if edit_type == "compact_20260112": has_compact = True else: has_other = True - + # Add compact header if any compact edits exist if has_compact: beta_set.add(ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value) - + # Add context management header if any other edits exist if has_other: - beta_set.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value) + beta_set.add( + ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value + ) def transform_request( self, @@ -102,10 +104,10 @@ class VertexAIAnthropicConfig(AnthropicConfig): ) data.pop("model", None) # vertex anthropic doesn't accept 'model' parameter - + # VertexAI doesn't support output_format parameter, remove it if present data.pop("output_format", None) - + tools = optional_params.get("tools") tool_search_used = self.is_tool_search_used(tools) auto_betas = self.get_anthropic_beta_list( @@ -119,16 +121,30 @@ class VertexAIAnthropicConfig(AnthropicConfig): beta_set = set(auto_betas) if tool_search_used: - beta_set.add("tool-search-tool-2025-10-19") # Vertex requires this header for tool search - + beta_set.add( + "tool-search-tool-2025-10-19" + ) # Vertex requires this header for tool search + # Add context_management beta headers (compact and/or context-management) context_management = optional_params.get("context_management") if context_management: self._add_context_management_beta_headers(beta_set, context_management) + extra_headers = optional_params.get("extra_headers") or {} + anthropic_beta_value = extra_headers.get("anthropic-beta", "") + if isinstance(anthropic_beta_value, str) and anthropic_beta_value: + for beta in anthropic_beta_value.split(","): + beta = beta.strip() + if beta: + beta_set.add(beta) + elif isinstance(anthropic_beta_value, list): + beta_set.update(anthropic_beta_value) + + data.pop("extra_headers", None) + if beta_set: data["anthropic_beta"] = list(beta_set) - + return data def map_openai_params( @@ -148,7 +164,7 @@ class VertexAIAnthropicConfig(AnthropicConfig): original_model = model if "response_format" in non_default_params: model = "claude-3-sonnet-20240229" # Use a model that will use tool-based approach - + # Call parent method with potentially modified model name optional_params = super().map_openai_params( non_default_params=non_default_params, @@ -156,10 +172,10 @@ class VertexAIAnthropicConfig(AnthropicConfig): model=model, drop_params=drop_params, ) - + # Restore original model name for any other processing model = original_model - + return optional_params def transform_response( diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 543f2f14d7..815d29c796 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -28540,6 +28540,193 @@ "supports_function_calling": true, "supports_tool_choice": true }, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet-20241022": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-3-7-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-haiku-4.5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.1": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.6": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4.5": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, "vercel_ai_gateway/cohere/command-a": { "input_cost_per_token": 2.5e-06, "litellm_provider": "vercel_ai_gateway", @@ -28549,7 +28736,8 @@ "mode": "chat", "output_cost_per_token": 1e-05, "supports_function_calling": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/cohere/command-r": { "input_cost_per_token": 1.5e-07, diff --git a/litellm/proxy/guardrails/guardrail_hooks/zscaler_ai_guard/zscaler_ai_guard.py b/litellm/proxy/guardrails/guardrail_hooks/zscaler_ai_guard/zscaler_ai_guard.py index d62bbb0b45..c60752d795 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/zscaler_ai_guard/zscaler_ai_guard.py +++ b/litellm/proxy/guardrails/guardrail_hooks/zscaler_ai_guard/zscaler_ai_guard.py @@ -92,14 +92,34 @@ class ZscalerAIGuard(CustomGuardrail): Raises: Exception: If content is blocked by Zscaler AI Guard """ + texts = inputs.get("texts", []) try: verbose_proxy_logger.debug(f"ZscalerAIGuard: Checking {len(texts)} text(s)") + metadata = request_data.get("metadata", {}) - custom_policy_id = request_data.get("metadata", {}).get( - "zguard_policy_id", self.policy_id + user_api_key_metadata = metadata.get("user_api_key_metadata", {}) or {} + team_metadata = metadata.get("team_metadata", {}) or {} + + # Precedence for policy_id: + # 1. metadata.zguard_policy_id # request level + # 2. user_api_key_metadata.zguard_policy_id # Key level + # 3. team_metadata.zguard_policy_id # Team level + # 4. self.policy_id (from environment) # Global + policy_id = ( + metadata.get("zguard_policy_id") + if "zguard_policy_id" in metadata + else ( + user_api_key_metadata.get("zguard_policy_id") + if "zguard_policy_id" in user_api_key_metadata + else ( + team_metadata.get("zguard_policy_id") + if "zguard_policy_id" in team_metadata + else self.policy_id + ) + ) ) - verbose_proxy_logger.debug(f"custom_policy_id: {custom_policy_id}") + verbose_proxy_logger.info(f"policy_id applied: {policy_id}") kwargs = {} if self.send_user_api_key_alias: @@ -116,27 +136,29 @@ class ZscalerAIGuard(CustomGuardrail): ) verbose_proxy_logger.debug(f"inside apply_guardrail kwargs: {kwargs}") - # Check each text (Zscaler processes one at a time) - for text in texts: + zscaler_ai_guard_result = None + direction = "OUT" if input_type == "response" else "IN" + verbose_proxy_logger.debug(f"direction: {direction}") + # Concatenate all texts and send to Zscaler AI Guard + if texts: + concatenated_text = " ".join(texts) zscaler_ai_guard_result = await self.make_zscaler_ai_guard_api_call( zscaler_ai_guard_url=self.zscaler_ai_guard_url, api_key=self.api_key, - policy_id=self.policy_id, - direction="IN", - content=text, + policy_id=policy_id, + direction=direction, + content=concatenated_text, **kwargs, ) - - if ( - zscaler_ai_guard_result - and zscaler_ai_guard_result.get("action") == "BLOCK" - ): - blocking_info = zscaler_ai_guard_result.get( - "zscaler_ai_guard_response" - ) - error_message = f"Content blocked by Zscaler AI Guard: {self.extract_blocking_info(blocking_info)}" - raise Exception(error_message) - + if ( + zscaler_ai_guard_result + and zscaler_ai_guard_result.get("action") == "BLOCK" + ): + blocking_info = zscaler_ai_guard_result.get( + "zscaler_ai_guard_response" + ) + error_message = f"Content blocked by Zscaler AI Guard: {self.extract_blocking_info(blocking_info)}" + raise Exception(error_message) except Exception as e: verbose_proxy_logger.error( "ZscalerAIGuard: Failed to apply guardrail: %s", str(e) diff --git a/litellm/proxy/management_endpoints/common_utils.py b/litellm/proxy/management_endpoints/common_utils.py index 8f7dd4f8df..24a41a2361 100644 --- a/litellm/proxy/management_endpoints/common_utils.py +++ b/litellm/proxy/management_endpoints/common_utils.py @@ -216,7 +216,14 @@ def _update_metadata_field(updated_kv: dict, field_name: str) -> None: field_name: Name of the metadata field being updated """ if field_name in LiteLLM_ManagementEndpoint_MetadataFields_Premium: - _premium_user_check() + value = updated_kv.get(field_name) + # Skip the premium check for empty collections ([] or {}). + # The UI sends these as defaults even when the user hasn't configured + # any enterprise features (see issue #20304). However, we still + # proceed with the update so that users can intentionally clear a + # previously-set field by sending an empty list/dict. + if value is not None and value != [] and value != {}: + _premium_user_check() if field_name in updated_kv and updated_kv[field_name] is not None: # remove field from updated_kv diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 279946f78d..1750efed92 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -308,6 +308,16 @@ model LiteLLM_VerificationToken { litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) object_permission LiteLLM_ObjectPermissionTable? @relation(fields: [object_permission_id], references: [object_permission_id]) + + // SELECT COUNT(*) FROM (SELECT "public"."LiteLLM_VerificationToken"."token" FROM "public"."LiteLLM_VerificationToken" WHERE ("public"."LiteLLM_VerificationToken"."user_id" = $1 AND ("public"."LiteLLM_VerificationToken"."team_id" IS NULL OR "public"."LiteLLM_VerificationToken"."team_id" <> $2)) OFFSET $3 ) AS "sub" + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."user_id" = $1 OFFSET $2 + @@index([user_id, team_id]) + + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."team_id" = $1 OFFSET $2 + @@index([team_id]) + + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE (("public"."LiteLLM_VerificationToken"."expires" IS NULL OR "public"."LiteLLM_VerificationToken"."expires" > $1) AND "public"."LiteLLM_VerificationToken"."budget_reset_at" < $2) OFFSET $3 + @@index([budget_reset_at, expires]) } // Audit table for deleted keys - preserves spend and key information for historical tracking diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py index bd148ecb48..cb8b9ec039 100644 --- a/litellm/proxy/spend_tracking/spend_tracking_utils.py +++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py @@ -1,4 +1,3 @@ -import copy import hashlib import json import secrets @@ -642,6 +641,34 @@ def _sanitize_request_body_for_spend_logs_payload( return {k: _sanitize_value(v) for k, v in request_body.items()} +def _convert_to_json_serializable_dict(obj: Any) -> Any: + """ + Convert object to JSON-serializable dict, handling Pydantic models safely. + + This avoids pickle-based deepcopy which fails on Pydantic v2 models + containing _thread.RLock objects. + + Args: + obj: Object to convert (dict, list, Pydantic model, or primitive) + + Returns: + JSON-serializable version of the object + """ + if isinstance(obj, BaseModel): + # Use Pydantic's model_dump() instead of pickle + return obj.model_dump() + elif isinstance(obj, dict): + return {k: _convert_to_json_serializable_dict(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [_convert_to_json_serializable_dict(item) for item in obj] + elif hasattr(obj, "__dict__"): + # Handle objects with __dict__ attribute + return _convert_to_json_serializable_dict(obj.__dict__) + else: + # Primitives (str, int, float, bool, None) pass through + return obj + + def _get_proxy_server_request_for_spend_logs_payload( metadata: dict, litellm_params: dict, @@ -649,7 +676,7 @@ def _get_proxy_server_request_for_spend_logs_payload( ) -> str: """ Only store if _should_store_prompts_and_responses_in_spend_logs() is True - + If turn_off_message_logging is enabled, redact messages in the request body. """ if _should_store_prompts_and_responses_in_spend_logs(): @@ -674,9 +701,9 @@ def _get_proxy_server_request_for_spend_logs_payload( ), } - # If redaction is enabled, deep copy request body before redacting + # If redaction is enabled, convert to serializable dict before redacting if should_redact_message_logging(model_call_details=model_call_details): - _request_body = copy.deepcopy(_request_body) + _request_body = _convert_to_json_serializable_dict(_request_body) perform_redaction(model_call_details=_request_body, result=None) _request_body = _sanitize_request_body_for_spend_logs_payload(_request_body) @@ -736,9 +763,9 @@ def _get_response_for_spend_logs_payload( ), } - # If redaction is enabled, deep copy response before redacting + # If redaction is enabled, convert to serializable dict before redacting if should_redact_message_logging(model_call_details=model_call_details): - response_obj = copy.deepcopy(response_obj) + response_obj = _convert_to_json_serializable_dict(response_obj) response_obj = perform_redaction(model_call_details={}, result=response_obj) sanitized_wrapper = _sanitize_request_body_for_spend_logs_payload( diff --git a/litellm/responses/litellm_completion_transformation/streaming_iterator.py b/litellm/responses/litellm_completion_transformation/streaming_iterator.py index 867c18b6dd..5c05526442 100644 --- a/litellm/responses/litellm_completion_transformation/streaming_iterator.py +++ b/litellm/responses/litellm_completion_transformation/streaming_iterator.py @@ -88,6 +88,8 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator): self._pending_tool_events: List[BaseLiteLLMOpenAIResponseObject] = [] self._tool_output_index_by_call_id: dict[str, int] = {} self._tool_args_by_call_id: dict[str, str] = {} + self._tool_call_id_by_index: dict[int, str] = {} + self._ambiguous_tool_call_indexes: set[int] = set() self._next_tool_output_index: int = 1 # output_index=0 reserved for the message item self._final_tool_events_queued: bool = False self._sequence_number: int = 0 @@ -111,6 +113,19 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator): self._tool_output_index_by_call_id[call_id] = idx return idx + def _normalize_tool_call_index(self, tool_call: object) -> Optional[int]: + idx_raw = ( + tool_call.get("index") + if isinstance(tool_call, dict) + else getattr(tool_call, "index", None) + ) + if idx_raw is None: + return None + try: + return int(idx_raw) + except (TypeError, ValueError): + return None + def _is_reasoning_end(self, chunk): delta = chunk.choices[0].delta @@ -143,10 +158,28 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator): return for tc in tool_calls: + tc_index = self._normalize_tool_call_index(tc) call_id_raw = tc.get("id") if isinstance(tc, dict) else getattr(tc, "id", None) - if not call_id_raw: + call_id = "" + + if call_id_raw: + call_id = str(call_id_raw) + if tc_index is not None: + existing_call_id = self._tool_call_id_by_index.get(tc_index) + if existing_call_id is not None and existing_call_id != call_id: + # Reusing the same index for multiple call_ids is ambiguous for id-less deltas. + # Guard against silent misrouting by disabling index fallback for this index. + self._ambiguous_tool_call_indexes.add(tc_index) + self._tool_call_id_by_index[tc_index] = call_id + elif tc_index is not None: + if tc_index in self._ambiguous_tool_call_indexes: + continue + mapped_call_id = self._tool_call_id_by_index.get(tc_index) + if mapped_call_id: + call_id = mapped_call_id + + if not call_id: continue - call_id = str(call_id_raw) fn = tc.get("function") if isinstance(tc, dict) else getattr(tc, "function", None) fn_name = "" diff --git a/litellm/router_utils/pre_call_checks/prompt_caching_deployment_check.py b/litellm/router_utils/pre_call_checks/prompt_caching_deployment_check.py index d3d237d9f2..e9c4b69d8e 100644 --- a/litellm/router_utils/pre_call_checks/prompt_caching_deployment_check.py +++ b/litellm/router_utils/pre_call_checks/prompt_caching_deployment_check.py @@ -61,9 +61,10 @@ class PromptCachingDeploymentCheck(CustomLogger): if ( call_type != CallTypes.completion.value and call_type != CallTypes.acompletion.value + and call_type != CallTypes.anthropic_messages.value ): # only use prompt caching for completion calls verbose_logger.debug( - "litellm.router_utils.pre_call_checks.prompt_caching_deployment_check: skipping adding model id to prompt caching cache, CALL TYPE IS NOT COMPLETION" + "litellm.router_utils.pre_call_checks.prompt_caching_deployment_check: skipping adding model id to prompt caching cache, CALL TYPE IS NOT COMPLETION or ANTHROPIC MESSAGE" ) return diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 84ac01a4ec..cef9c45042 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -362,6 +362,7 @@ class AnthropicMessagesRequestOptionalParams(TypedDict, total=False): container: Optional[Dict[str, Any]] # Container config with skills for code execution output_format: Optional[AnthropicOutputSchema] # Structured outputs support speed: Optional[str] # Fast mode support for Opus models + output_config: Optional[AnthropicOutputConfig] # Configuration for Claude's output behavior class AnthropicMessagesRequest(AnthropicMessagesRequestOptionalParams, total=False): diff --git a/litellm/types/llms/oci.py b/litellm/types/llms/oci.py index 9a654bc0f6..cb1dd39143 100644 --- a/litellm/types/llms/oci.py +++ b/litellm/types/llms/oci.py @@ -102,6 +102,7 @@ class OCIChatRequestPayload(BaseModel): seed: Optional[int] = None frequencyPenalty: Optional[float] = None presencePenalty: Optional[float] = None + responseFormat: Optional[Dict[str, Any]] = None class OCIServingMode(BaseModel): @@ -125,14 +126,14 @@ class OCICompletionPayload(BaseModel): class OCICompletionTokenDetails(BaseModel): """Completion token details in the OCI response.""" - acceptedPredictionTokens: int - reasoningTokens: int + acceptedPredictionTokens: Optional[int] = None + reasoningTokens: Optional[int] = None class OCIPromptTokensDetails(BaseModel): """Prompt token details in the OCI response.""" - cachedTokens: int + cachedTokens: Optional[int] = None class OCIResponseUsage(BaseModel): @@ -205,40 +206,40 @@ class CohereStreamChunk(BaseModel): class CohereMessage(BaseModel): """Base model for Cohere messages.""" - + role: str - message: str + message: Optional[str] = None toolCalls: Optional[List[CohereToolCall]] = None class CohereUserMessage(CohereMessage): """User message in Cohere chat.""" - + role: Literal["USER"] = "USER" class CohereChatBotMessage(CohereMessage): """Chatbot message in Cohere chat.""" - + role: Literal["CHATBOT"] = "CHATBOT" class CohereSystemMessage(CohereMessage): """System message in Cohere chat.""" - + role: Literal["SYSTEM"] = "SYSTEM" class CohereToolMessage(CohereMessage): """Tool message in Cohere chat.""" - + role: Literal["TOOL"] = "TOOL" toolCallId: str class CohereParameterDefinition(BaseModel): """Parameter definition for Cohere tools.""" - + description: str type: str isRequired: bool = False @@ -246,7 +247,7 @@ class CohereParameterDefinition(BaseModel): class CohereTool(BaseModel): """Tool definition for Cohere.""" - + name: str description: str parameterDefinitions: Dict[str, CohereParameterDefinition] @@ -254,38 +255,44 @@ class CohereTool(BaseModel): class CohereToolCall(BaseModel): """Tool call made by Cohere model.""" - + name: str parameters: Dict[str, Any] class CohereToolResult(BaseModel): """Result of a tool call.""" - + callId: str result: str class CohereResponseFormat(BaseModel): """Response format for Cohere.""" - + type: str class CohereResponseTextFormat(CohereResponseFormat): """Text response format for Cohere.""" - + type: Literal["text"] = "text" +class CohereResponseJSONSchemaFormat(CohereResponseFormat): + """JSON schema response format for Cohere.""" + + type: Literal["json_schema"] = "json_schema" + jsonSchema: Dict[str, Any] + class CohereChatRequest(BaseModel): """Cohere chat request model.""" - + # Required fields message: str apiFormat: Literal["COHERE"] = "COHERE" - + # Optional fields chatHistory: Optional[List[CohereMessage]] = None maxTokens: Optional[int] = None @@ -298,7 +305,7 @@ class CohereChatRequest(BaseModel): seed: Optional[int] = None tools: Optional[List[CohereTool]] = None toolChoice: Optional[Union[str, Dict[str, Any]]] = None - responseFormat: Optional[CohereResponseFormat] = None + responseFormat: Optional[Union[CohereResponseTextFormat, CohereResponseJSONSchemaFormat, CohereResponseFormat]] = None preambleOverride: Optional[str] = None documents: Optional[List[Dict[str, Any]]] = None searchQueriesOnly: Optional[bool] = None @@ -318,7 +325,7 @@ class CohereChatRequest(BaseModel): class CohereUsage(BaseModel): """Usage information for Cohere response.""" - + promptTokens: int completionTokens: int totalTokens: int @@ -328,7 +335,7 @@ class CohereUsage(BaseModel): class CohereCitation(BaseModel): """Citation in Cohere response.""" - + start: int end: int text: str @@ -337,19 +344,19 @@ class CohereCitation(BaseModel): class CohereSearchQuery(BaseModel): """Search query generated by Cohere.""" - + text: str generation_id: str class CohereChatResponse(BaseModel): """Cohere chat response model.""" - + # Required fields text: str apiFormat: Literal["COHERE"] = "COHERE" finishReason: Literal["COMPLETE", "ERROR_TOXIC", "ERROR_LIMIT", "ERROR", "USER_CANCEL", "MAX_TOKENS"] - + # Optional fields chatHistory: Optional[List[CohereMessage]] = None citations: Optional[List[CohereCitation]] = None @@ -364,7 +371,7 @@ class CohereChatResponse(BaseModel): class CohereChatDetails(BaseModel): """Chat details for Cohere request.""" - + compartmentId: str servingMode: OCIServingMode chatRequest: CohereChatRequest @@ -372,8 +379,7 @@ class CohereChatDetails(BaseModel): class CohereChatResult(BaseModel): """Complete Cohere chat result.""" - + modelId: str modelVersion: str chatResponse: CohereChatResponse - diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 543f2f14d7..815d29c796 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -28540,6 +28540,193 @@ "supports_function_calling": true, "supports_tool_choice": true }, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet-20241022": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-3-7-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-haiku-4.5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.1": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.6": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4.5": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, "vercel_ai_gateway/cohere/command-a": { "input_cost_per_token": 2.5e-06, "litellm_provider": "vercel_ai_gateway", @@ -28549,7 +28736,8 @@ "mode": "chat", "output_cost_per_token": 1e-05, "supports_function_calling": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/cohere/command-r": { "input_cost_per_token": 1.5e-07, diff --git a/package.json b/package.json index 7f90fd0aeb..ab9e15f46a 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,8 @@ "jest": "^29.7.0" }, "overrides": { - "glob": ">=11.1.0" + "glob": ">=11.1.0", + "tar": ">=7.5.7", + "@isaacs/brace-expansion": ">=5.0.1" } } diff --git a/requirements.txt b/requirements.txt index 1f21cc62bc..f680de120c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,9 @@ # LITELLM PROXY DEPENDENCIES # +# Security: explicit pins for transitive deps (CVE fixes) +urllib3>=2.6.0 # CVE-2025-66471, CVE-2025-66418, CVE-2026-21441 +tornado>=6.5.3 # CVE-2025-67725, CVE-2025-67726, CVE-2025-67724 +filelock>=3.20.1 # CVE-2025-68146 + anyio==4.8.0 # openai + http req. httpx==0.28.1 openai==2.9.0 # openai req. diff --git a/schema.prisma b/schema.prisma index ecf4e06ef6..9a87a491cf 100644 --- a/schema.prisma +++ b/schema.prisma @@ -310,6 +310,16 @@ model LiteLLM_VerificationToken { litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) object_permission LiteLLM_ObjectPermissionTable? @relation(fields: [object_permission_id], references: [object_permission_id]) + + // SELECT COUNT(*) FROM (SELECT "public"."LiteLLM_VerificationToken"."token" FROM "public"."LiteLLM_VerificationToken" WHERE ("public"."LiteLLM_VerificationToken"."user_id" = $1 AND ("public"."LiteLLM_VerificationToken"."team_id" IS NULL OR "public"."LiteLLM_VerificationToken"."team_id" <> $2)) OFFSET $3 ) AS "sub" + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."user_id" = $1 OFFSET $2 + @@index([user_id, team_id]) + + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."team_id" = $1 OFFSET $2 + @@index([team_id]) + + // SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE (("public"."LiteLLM_VerificationToken"."expires" IS NULL OR "public"."LiteLLM_VerificationToken"."expires" > $1) AND "public"."LiteLLM_VerificationToken"."budget_reset_at" < $2) OFFSET $3 + @@index([budget_reset_at, expires]) } // Audit table for deleted keys - preserves spend and key information for historical tracking diff --git a/tests/guardrails_tests/test_zscaler_ai_guard.py b/tests/guardrails_tests/test_zscaler_ai_guard.py index cf70af510c..9d519c17f6 100644 --- a/tests/guardrails_tests/test_zscaler_ai_guard.py +++ b/tests/guardrails_tests/test_zscaler_ai_guard.py @@ -116,4 +116,131 @@ def test_extract_blocking_info(): blocking_info = guardrail.extract_blocking_info(response) assert blocking_info["transactionId"] == "12345" - assert blocking_info["blockingDetectors"] == ["detector1"] \ No newline at end of file + assert blocking_info["blockingDetectors"] == ["detector1"] + + +@pytest.mark.asyncio +@patch( + "litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call", + new_callable=AsyncMock, +) +async def test_apply_guardrail_text_concatenation(mock_api_call): + """ + Test that `apply_guardrail` correctly concatenates texts. + """ + guardrail = ZscalerAIGuard(policy_id=100) + inputs = {"texts": ["Hello", "world"]} + request_data = {} + + await guardrail.apply_guardrail(inputs, request_data, "request") + + mock_api_call.assert_called_once() + call_args = mock_api_call.call_args + assert call_args.kwargs["content"] == "Hello world" + + +@pytest.mark.asyncio +@patch( + "litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call", + new_callable=AsyncMock, +) +async def test_policy_id_from_request_metadata(mock_api_call): + """ + Test policy_id is picked from request metadata (highest precedence). + """ + guardrail = ZscalerAIGuard(policy_id=100) + inputs = {"texts": ["test"]} + request_data = { + "metadata": { + "zguard_policy_id": 1, + "user_api_key_metadata": {"zguard_policy_id": 2}, + "team_metadata": {"zguard_policy_id": 3}, + } + } + + await guardrail.apply_guardrail(inputs, request_data, "request") + + mock_api_call.assert_called_once() + assert mock_api_call.call_args.kwargs["policy_id"] == 1 + + +@pytest.mark.asyncio +@patch( + "litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call", + new_callable=AsyncMock, +) +async def test_policy_id_from_user_api_key_metadata(mock_api_call): + """ + Test policy_id is picked from user_api_key_metadata (2nd precedence). + """ + guardrail = ZscalerAIGuard(policy_id=100) + inputs = {"texts": ["test"]} + request_data = { + "metadata": { + "user_api_key_metadata": {"zguard_policy_id": 2}, + "team_metadata": {"zguard_policy_id": 3}, + } + } + + await guardrail.apply_guardrail(inputs, request_data, "request") + + mock_api_call.assert_called_once() + assert mock_api_call.call_args.kwargs["policy_id"] == 2 + + +@pytest.mark.asyncio +@patch( + "litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call", + new_callable=AsyncMock, +) +async def test_policy_id_from_team_metadata(mock_api_call): + """ + Test policy_id is picked from team_metadata (3rd precedence). + """ + guardrail = ZscalerAIGuard(policy_id=100) + inputs = {"texts": ["test"]} + request_data = {"metadata": {"team_metadata": {"zguard_policy_id": 3}}} + + await guardrail.apply_guardrail(inputs, request_data, "request") + + mock_api_call.assert_called_once() + assert mock_api_call.call_args.kwargs["policy_id"] == 3 + + +@pytest.mark.asyncio +@patch( + "litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call", + new_callable=AsyncMock, +) +async def test_policy_id_from_init(mock_api_call): + """ + Test policy_id is picked from guardrail initialization (lowest precedence). + """ + guardrail = ZscalerAIGuard(policy_id=100) + inputs = {"texts": ["test"]} + request_data = {"metadata": {}} + + await guardrail.apply_guardrail(inputs, request_data, "request") + + mock_api_call.assert_called_once() + assert mock_api_call.call_args.kwargs["policy_id"] == 100 + +@pytest.mark.asyncio +@patch( + "litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call", + new_callable=AsyncMock, +) +async def test_policy_id_zero_from_request_metadata(mock_api_call): + """ + Test policy_id=0 is correctly picked. Make sure pick exact policy_id which users set + """ + guardrail = ZscalerAIGuard(policy_id=100) + inputs = {"texts": ["test"]} + request_data = { + "metadata": { + "zguard_policy_id": 0, + } + } + await guardrail.apply_guardrail(inputs, request_data, "request") + mock_api_call.assert_called_once() + assert mock_api_call.call_args.kwargs["policy_id"] == 0 diff --git a/tests/proxy_admin_ui_tests/package.json b/tests/proxy_admin_ui_tests/package.json index cbd25be881..48de2c1dba 100644 --- a/tests/proxy_admin_ui_tests/package.json +++ b/tests/proxy_admin_ui_tests/package.json @@ -12,6 +12,8 @@ "@types/node": "^22.5.5" }, "overrides": { - "glob": ">=11.1.0" + "glob": ">=11.1.0", + "tar": ">=7.5.7", + "@isaacs/brace-expansion": ">=5.0.1" } } diff --git a/tests/proxy_admin_ui_tests/ui_unit_tests/package.json b/tests/proxy_admin_ui_tests/ui_unit_tests/package.json index 7d82ee2e1a..4c7d7addf0 100644 --- a/tests/proxy_admin_ui_tests/ui_unit_tests/package.json +++ b/tests/proxy_admin_ui_tests/ui_unit_tests/package.json @@ -24,6 +24,8 @@ "react-dom": "^18.2.0" }, "overrides": { - "glob": ">=11.1.0" + "glob": ">=11.1.0", + "tar": ">=7.5.7", + "@isaacs/brace-expansion": ">=5.0.1" } } \ No newline at end of file diff --git a/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py b/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py index eef206ca66..da6d802792 100644 --- a/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py +++ b/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py @@ -158,6 +158,76 @@ def test_get_combined_tool_content(): ] +def test_get_combined_thinking_content_preserves_interleaved_blocks(): + base_chunk = { + "id": "chatcmpl-123", + "object": "chat.completion.chunk", + "created": 1234567890, + "model": "claude-sonnet-4-20250514", + } + + def make_chunk(**delta_kwargs): + return ModelResponseStream( + **base_chunk, + choices=[ + StreamingChoices( + index=0, + delta=Delta(**delta_kwargs), + finish_reason=None, + ) + ], + ) + + chunks = [ + make_chunk(role="assistant", content=None), + make_chunk( + thinking_blocks=[ + {"type": "thinking", "thinking": "Step 1 analysis...", "signature": None} + ] + ), + make_chunk( + thinking_blocks=[ + {"type": "thinking", "thinking": None, "signature": "sig_block1"} + ] + ), + make_chunk( + thinking_blocks=[ + { + "type": "redacted_thinking", + "data": "EuoBCoYBGAIi...encrypted...", + } + ] + ), + make_chunk( + thinking_blocks=[ + {"type": "thinking", "thinking": "Step 2 analysis...", "signature": None} + ] + ), + make_chunk( + thinking_blocks=[ + {"type": "thinking", "thinking": None, "signature": "sig_block2"} + ] + ), + ] + + thinking_chunks = [ + chunk for chunk in chunks if chunk["choices"][0]["delta"].get("thinking_blocks") + ] + processor = ChunkProcessor(chunks=chunks) + result = processor.get_combined_thinking_content(thinking_chunks) + + assert result is not None + assert len(result) == 3 + assert result[0]["type"] == "thinking" + assert result[0]["thinking"] == "Step 1 analysis..." + assert result[0]["signature"] == "sig_block1" + assert result[1]["type"] == "redacted_thinking" + assert result[1]["data"] == "EuoBCoYBGAIi...encrypted..." + assert result[2]["type"] == "thinking" + assert result[2]["thinking"] == "Step 2 analysis..." + assert result[2]["signature"] == "sig_block2" + + def test_cache_read_input_tokens_retained(): chunk1 = ModelResponseStream( id="chatcmpl-95aabb85-c39f-443d-ae96-0370c404d70c", @@ -441,4 +511,4 @@ def test_stream_chunk_builder_anthropic_web_search(): assert usage.prompt_tokens == 50 assert usage.completion_tokens == 27 assert usage.total_tokens == 77 - assert usage.server_tool_use['web_search_requests'] == 2 \ No newline at end of file + assert usage.server_tool_use['web_search_requests'] == 2 diff --git a/tests/test_litellm/llms/oci/chat/test_oci_chat_transformation.py b/tests/test_litellm/llms/oci/chat/test_oci_chat_transformation.py index 3bd46b84e6..3b53f9de71 100644 --- a/tests/test_litellm/llms/oci/chat/test_oci_chat_transformation.py +++ b/tests/test_litellm/llms/oci/chat/test_oci_chat_transformation.py @@ -287,6 +287,114 @@ class TestOCIChatConfig: # Verify the message content assert transformed_request["chatRequest"]["message"] == "What is quantum computing?" + def test_transform_request_response_format_json_object(self): + """ + Tests that response_format type 'json_object' is uppercased to 'JSON_OBJECT' for generic OCI models. + """ + config = OCIChatConfig() + optional_params = { + "oci_compartment_id": TEST_COMPARTMENT_ID, + "response_format": {"type": "json_object"}, + } + transformed_request = config.transform_request( + model=TEST_MODEL_NAME, + messages=TEST_MESSAGES, # type: ignore + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + rf = transformed_request["chatRequest"]["responseFormat"] + assert rf["type"] == "JSON_OBJECT" + + def test_transform_request_response_format_text(self): + """ + Tests that response_format type 'text' is uppercased to 'TEXT' for generic OCI models. + """ + config = OCIChatConfig() + optional_params = { + "oci_compartment_id": TEST_COMPARTMENT_ID, + "response_format": {"type": "text"}, + } + transformed_request = config.transform_request( + model=TEST_MODEL_NAME, + messages=TEST_MESSAGES, # type: ignore + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + rf = transformed_request["chatRequest"]["responseFormat"] + assert rf["type"] == "TEXT" + + def test_transform_request_response_format_json_shorthand(self): + """ + Tests that response_format type 'json' is mapped to 'JSON_OBJECT' for generic OCI models. + """ + config = OCIChatConfig() + optional_params = { + "oci_compartment_id": TEST_COMPARTMENT_ID, + "response_format": {"type": "json"}, + } + transformed_request = config.transform_request( + model=TEST_MODEL_NAME, + messages=TEST_MESSAGES, # type: ignore + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + rf = transformed_request["chatRequest"]["responseFormat"] + assert rf["type"] == "JSON_OBJECT" + + def test_transform_response_without_token_details(self): + """ + Tests that responses missing completionTokensDetails and promptTokensDetails + are handled correctly (fields are optional). + """ + config = OCIChatConfig() + created_time = datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z") + mock_oci_response = { + "modelId": TEST_MODEL_NAME, + "modelVersion": "1.0", + "chatResponse": { + "apiFormat": "GENERIC", + "choices": [ + { + "index": 0, + "message": { + "role": "ASSISTANT", + "content": [{"type": "TEXT", "text": "Hello!"}], + }, + "finishReason": "STOP", + } + ], + "timeCreated": created_time, + "usage": { + "promptTokens": 5, + "completionTokens": 10, + "totalTokens": 15, + }, + }, + } + response = httpx.Response( + status_code=200, json=mock_oci_response, headers={"Content-Type": "application/json"} + ) + result = config.transform_response( + model=TEST_MODEL_NAME, + raw_response=response, + model_response=ModelResponse(), + logging_obj={}, # type: ignore + request_data={}, + messages=[], + optional_params={}, + litellm_params={}, + encoding={}, + ) + + assert isinstance(result, ModelResponse) + assert result.choices[0].message.content == "Hello!" + assert result.usage.prompt_tokens == 5 # type: ignore + assert result.usage.completion_tokens == 10 # type: ignore + assert result.usage.total_tokens == 15 # type: ignore + def test_transform_response_simple_text(self): """ Tests if a simple text response is transformed correctly. diff --git a/tests/test_litellm/llms/oci/chat/test_oci_cohere_tool_calls.py b/tests/test_litellm/llms/oci/chat/test_oci_cohere_tool_calls.py index abbb7e3e30..a9c4bead82 100644 --- a/tests/test_litellm/llms/oci/chat/test_oci_cohere_tool_calls.py +++ b/tests/test_litellm/llms/oci/chat/test_oci_cohere_tool_calls.py @@ -239,6 +239,110 @@ class TestOCICohereToolCalls: assert result.usage.completion_tokens == 22 assert result.usage.total_tokens == 48 + def test_cohere_request_preserves_json_schema_response_format(self): + """Ensure Cohere requests retain JSON schema payloads in responseFormat.""" + config = OCIChatConfig() + messages = [{"role": "user", "content": "Return structured info"}] + response_format = { + "type": "json_schema", + "json_schema": { + "name": "test_schema", + "strict": True, + "schema": { + "type": "object", + "properties": { + "foo": {"type": "string"} + }, + "required": ["foo"] + } + } + } + optional_params = { + "oci_compartment_id": TEST_COMPARTMENT_ID, + "response_format": response_format, + } + + transformed_request = config.transform_request( + model="cohere.command-rplus", + messages=messages, # type: ignore[arg-type] + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + + chat_request = transformed_request["chatRequest"] + assert chat_request["apiFormat"] == "COHERE" + assert "responseFormat" in chat_request + + cohere_response_format = chat_request["responseFormat"] + assert cohere_response_format["type"] == "json_schema" + assert "json_schema" not in cohere_response_format + assert "jsonSchema" in cohere_response_format + assert cohere_response_format["jsonSchema"] == response_format["json_schema"] + + def test_cohere_request_response_format_text_stays_lowercase(self): + """Ensure Cohere keeps response_format type lowercase (e.g. 'text' not 'TEXT').""" + config = OCIChatConfig() + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "oci_compartment_id": TEST_COMPARTMENT_ID, + "response_format": {"type": "text"}, + } + + transformed_request = config.transform_request( + model="cohere.command-latest", + messages=messages, # type: ignore + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + + chat_request = transformed_request["chatRequest"] + assert chat_request["apiFormat"] == "COHERE" + assert "responseFormat" in chat_request + assert chat_request["responseFormat"]["type"] == "text" + + def test_cohere_tool_call_only_message_no_text(self): + """Test chat history with an assistant message that has tool calls but no text content.""" + config = OCIChatConfig() + + messages = [ + {"role": "user", "content": "What's the weather?"}, + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "get_weather", + "arguments": '{"location": "Paris"}', + }, + } + ], + }, + { + "role": "tool", + "content": "Sunny, 25C", + "tool_call_id": "call_1", + }, + ] + + chat_history = config.adapt_messages_to_cohere_standard(messages) + + # First message is the user message + assert chat_history[0].role == "USER" + assert chat_history[0].message == "What's the weather?" + + # Second message is the assistant with tool calls and no text + assistant_msg = chat_history[1] + assert assistant_msg.role == "CHATBOT" + assert assistant_msg.message is None or assistant_msg.message == "" + assert assistant_msg.toolCalls is not None + assert len(assistant_msg.toolCalls) == 1 + assert assistant_msg.toolCalls[0].name == "get_weather" + def test_cohere_chat_history_with_tool_calls(self): """Test chat history transformation with tool calls""" config = OCIChatConfig() diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py index 90ab41aadf..4bcafd4c57 100644 --- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py +++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py @@ -45,68 +45,65 @@ def test_vertex_ai_anthropic_web_search_header_in_completion(): # Create the config instance model_info = AnthropicModelInfo() - + # Test the header generation directly tools = [{"type": "web_search_20250305", "name": "web_search", "max_uses": 5}] - + # Check if web search tool is detected web_search_detected = model_info.is_web_search_tool_used(tools=tools) assert web_search_detected is True, "Web search tool should be detected" - + # Generate headers with is_vertex_request=True headers = model_info.get_anthropic_headers( api_key="test-key", web_search_tool_used=web_search_detected, is_vertex_request=True, ) - + # Assert that the anthropic-beta header with web-search is present assert "anthropic-beta" in headers, "anthropic-beta header should be present" - assert headers["anthropic-beta"] == "web-search-2025-03-05", \ - f"anthropic-beta should be 'web-search-2025-03-05', got: {headers['anthropic-beta']}" - + assert ( + headers["anthropic-beta"] == "web-search-2025-03-05" + ), f"anthropic-beta should be 'web-search-2025-03-05', got: {headers['anthropic-beta']}" + # Test that header is NOT added for non-Vertex requests headers_non_vertex = model_info.get_anthropic_headers( api_key="test-key", web_search_tool_used=web_search_detected, is_vertex_request=False, ) - + # For non-Vertex (Anthropic-hosted), the web search header should NOT be in anthropic-beta # because Anthropic doesn't require it - assert "anthropic-beta" not in headers_non_vertex or "web-search" not in headers_non_vertex.get("anthropic-beta", ""), \ - "anthropic-beta with web-search should not be present for non-Vertex requests" + assert ( + "anthropic-beta" not in headers_non_vertex + or "web-search" not in headers_non_vertex.get("anthropic-beta", "") + ), "anthropic-beta with web-search should not be present for non-Vertex requests" def test_vertex_ai_anthropic_context_management_compact_beta_header(): """Test that context_management with compact adds the correct beta header for Vertex AI""" config = VertexAIAnthropicConfig() - + messages = [{"role": "user", "content": "Hello"}] optional_params = { - "context_management": { - "edits": [ - { - "type": "compact_20260112" - } - ] - }, + "context_management": {"edits": [{"type": "compact_20260112"}]}, "max_tokens": 100, - "is_vertex_request": True + "is_vertex_request": True, } - + result = config.transform_request( model="claude-opus-4-6", messages=messages, optional_params=optional_params, litellm_params={}, - headers={} + headers={}, ) - + # Verify context_management is included assert "context_management" in result assert result["context_management"]["edits"][0]["type"] == "compact_20260112" - + # Verify compact beta header is in anthropic_beta field assert "anthropic_beta" in result assert "compact-2026-01-12" in result["anthropic_beta"] @@ -115,33 +112,27 @@ def test_vertex_ai_anthropic_context_management_compact_beta_header(): def test_vertex_ai_anthropic_context_management_mixed_edits(): """Test that context_management with both compact and other edits adds both beta headers""" config = VertexAIAnthropicConfig() - + messages = [{"role": "user", "content": "Hello"}] optional_params = { "context_management": { "edits": [ - { - "type": "compact_20260112" - }, - { - "type": "replace", - "message_id": "msg_123", - "content": "new content" - } + {"type": "compact_20260112"}, + {"type": "replace", "message_id": "msg_123", "content": "new content"}, ] }, "max_tokens": 100, - "is_vertex_request": True + "is_vertex_request": True, } - + result = config.transform_request( model="claude-opus-4-6", messages=messages, optional_params=optional_params, litellm_params={}, - headers={} + headers={}, ) - + # Verify both beta headers are present assert "anthropic_beta" in result assert "compact-2026-01-12" in result["anthropic_beta"] @@ -151,58 +142,65 @@ def test_vertex_ai_anthropic_context_management_mixed_edits(): def test_vertex_ai_anthropic_structured_output_header_not_added(): """Test that structured output beta headers are NOT added for Vertex AI requests""" from litellm.llms.anthropic.chat.transformation import AnthropicConfig - + config = AnthropicConfig() - + # Test case 1: Vertex request with output_format should NOT add beta header headers_vertex = {} optional_params_vertex = { - 'output_format': { - 'type': 'json_schema', - 'json_schema': { - 'name': 'MathResult', - 'schema': {'properties': {'result': {'type': 'integer'}}} - } + "output_format": { + "type": "json_schema", + "json_schema": { + "name": "MathResult", + "schema": {"properties": {"result": {"type": "integer"}}}, + }, }, - 'is_vertex_request': True + "is_vertex_request": True, } - result_vertex = config.update_headers_with_optional_anthropic_beta(headers_vertex, optional_params_vertex) - - assert "anthropic-beta" not in result_vertex, \ - f"Vertex request should NOT have anthropic-beta header for structured output, got: {result_vertex.get('anthropic-beta')}" - + result_vertex = config.update_headers_with_optional_anthropic_beta( + headers_vertex, optional_params_vertex + ) + + assert ( + "anthropic-beta" not in result_vertex + ), f"Vertex request should NOT have anthropic-beta header for structured output, got: {result_vertex.get('anthropic-beta')}" + # Test case 2: Non-Vertex request with output_format SHOULD add beta header headers_non_vertex = {} optional_params_non_vertex = { - 'output_format': { - 'type': 'json_schema', - 'json_schema': { - 'name': 'MathResult', - 'schema': {'properties': {'result': {'type': 'integer'}}} - } + "output_format": { + "type": "json_schema", + "json_schema": { + "name": "MathResult", + "schema": {"properties": {"result": {"type": "integer"}}}, + }, }, - 'is_vertex_request': False + "is_vertex_request": False, } - result_non_vertex = config.update_headers_with_optional_anthropic_beta(headers_non_vertex, optional_params_non_vertex) - - assert "anthropic-beta" in result_non_vertex, \ - "Non-Vertex request SHOULD have anthropic-beta header for structured output" - assert result_non_vertex["anthropic-beta"] == "structured-outputs-2025-11-13", \ - f"Expected 'structured-outputs-2025-11-13', got: {result_non_vertex.get('anthropic-beta')}" + result_non_vertex = config.update_headers_with_optional_anthropic_beta( + headers_non_vertex, optional_params_non_vertex + ) + + assert ( + "anthropic-beta" in result_non_vertex + ), "Non-Vertex request SHOULD have anthropic-beta header for structured output" + assert ( + result_non_vertex["anthropic-beta"] == "structured-outputs-2025-11-13" + ), f"Expected 'structured-outputs-2025-11-13', got: {result_non_vertex.get('anthropic-beta')}" def test_vertex_ai_claude_sonnet_4_5_structured_output_fix(): """ - Test fix for issue #18625: Claude Sonnet 4.5 on VertexAI should use tool-based + Test fix for issue #18625: Claude Sonnet 4.5 on VertexAI should use tool-based structured outputs instead of output_format parameter. - + This test verifies that: 1. Claude Sonnet 4.5 uses tool-based structured outputs on VertexAI 2. output_format parameter is removed from the final request 3. The fix prevents "Extra inputs are not permitted" error """ config = VertexAIAnthropicConfig() - + # Test data matching the issue report response_format = { "type": "json_schema", @@ -212,29 +210,23 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix(): "schema": { "type": "object", "properties": { - "question": { - "type": "string" - }, - "response": { - "type": "string" - } + "question": {"type": "string"}, + "response": {"type": "string"}, }, "required": ["question", "response"], - "additionalProperties": False - } - } + "additionalProperties": False, + }, + }, } - - messages = [ - {"role": "user", "content": "Generate a question and answer about AI."} - ] - + + messages = [{"role": "user", "content": "Generate a question and answer about AI."}] + # Test parameters that would trigger the issue non_default_params = { "response_format": response_format, "max_tokens": 1000, } - + # Test 1: Verify map_openai_params forces tool-based approach for Claude Sonnet 4.5 optional_params = {} result_params = config.map_openai_params( @@ -243,17 +235,19 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix(): model="claude-3-5-sonnet-20241022", # Claude Sonnet 4.5 model drop_params=False, ) - + # Should have tools and tool_choice (tool-based approach) assert "tools" in result_params, "Tools should be present for structured output" - assert "tool_choice" in result_params, "Tool choice should be present for structured output" + assert ( + "tool_choice" in result_params + ), "Tool choice should be present for structured output" assert "json_mode" in result_params, "JSON mode should be enabled" - + # Verify the tool is the response format tool tools = result_params["tools"] assert len(tools) == 1, "Should have exactly one tool for response format" assert tools[0]["name"] == "json_tool_call", "Tool should be named json_tool_call" - + # Test 2: Verify transform_request removes output_format parameter # Simulate what would happen if parent class added output_format test_data = { @@ -264,20 +258,22 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix(): "tool_choice": result_params["tool_choice"], "output_format": { # This would be added by parent class for Sonnet 4.5 "type": "json_schema", - "schema": response_format["json_schema"]["schema"] - } + "schema": response_format["json_schema"]["schema"], + }, } - + # Mock the parent transform_request to return data with output_format original_transform = config.__class__.__bases__[0].transform_request - - def mock_transform_request(self, model, messages, optional_params, litellm_params, headers): + + def mock_transform_request( + self, model, messages, optional_params, litellm_params, headers + ): # Return test data that includes output_format return test_data.copy() - + # Temporarily replace parent method config.__class__.__bases__[0].transform_request = mock_transform_request - + try: final_data = config.transform_request( model="claude-3-5-sonnet-20241022", @@ -286,13 +282,15 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix(): litellm_params={}, headers={}, ) - + # Verify that output_format was removed (fixes the "Extra inputs are not permitted" error) - assert "output_format" not in final_data, "output_format should be removed for VertexAI" + assert ( + "output_format" not in final_data + ), "output_format should be removed for VertexAI" assert "model" not in final_data, "model should be removed for VertexAI" assert "tools" in final_data, "tools should still be present" assert "tool_choice" in final_data, "tool_choice should still be present" - + finally: # Restore original method config.__class__.__bases__[0].transform_request = original_transform @@ -300,43 +298,149 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix(): def test_vertex_ai_anthropic_other_models_still_use_tools(): """ - Test that other Anthropic models (non-Sonnet 4.5) on VertexAI also use tool-based + Test that other Anthropic models (non-Sonnet 4.5) on VertexAI also use tool-based structured outputs, ensuring consistency across all models. """ config = VertexAIAnthropicConfig() - + response_format = { "type": "json_schema", "json_schema": { "name": "test_schema", - "schema": { - "type": "object", - "properties": { - "result": {"type": "string"} - } - } - } + "schema": {"type": "object", "properties": {"result": {"type": "string"}}}, + }, } - + # Test with Claude 3 Sonnet (not 4.5) non_default_params = {"response_format": response_format} optional_params = {} - + result_params = config.map_openai_params( non_default_params=non_default_params, optional_params=optional_params, model="claude-3-sonnet-20240229", drop_params=False, ) - + # Should still use tool-based approach - assert "tools" in result_params, "Claude 3 Sonnet should also use tool-based structured output" + assert ( + "tools" in result_params + ), "Claude 3 Sonnet should also use tool-based structured output" assert "tool_choice" in result_params, "Tool choice should be present" assert "json_mode" in result_params, "JSON mode should be enabled" + +def test_vertex_ai_anthropic_extra_headers_beta_propagation(): + """Test that anthropic-beta values from extra_headers are propagated to the + anthropic_beta request body field for Vertex AI requests. + + Vertex AI requires beta flags in the request body (anthropic_beta array), + not as HTTP headers. This mirrors the Bedrock handler's behavior of + extracting user-specified beta headers. + """ + config = VertexAIAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "max_tokens": 100, + "is_vertex_request": True, + "extra_headers": { + "anthropic-beta": "interleaved-thinking-2025-05-14", + }, + } + + result = config.transform_request( + model="claude-sonnet-4-20250514", + messages=messages, + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + + assert "anthropic_beta" in result + assert "interleaved-thinking-2025-05-14" in result["anthropic_beta"] + assert "extra_headers" not in result + + +def test_vertex_ai_anthropic_extra_headers_beta_merged_with_auto_betas(): + """Test that extra_headers betas are merged with auto-detected betas + rather than replacing them.""" + config = VertexAIAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "max_tokens": 100, + "is_vertex_request": True, + "extra_headers": { + "anthropic-beta": "interleaved-thinking-2025-05-14", + }, + "context_management": {"edits": [{"type": "compact_20260112"}]}, + } + + result = config.transform_request( + model="claude-opus-4-6", + messages=messages, + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + + assert "anthropic_beta" in result + assert "interleaved-thinking-2025-05-14" in result["anthropic_beta"] + assert "compact-2026-01-12" in result["anthropic_beta"] + + +def test_vertex_ai_anthropic_extra_headers_comma_separated_betas(): + """Test that comma-separated beta values in extra_headers are all extracted.""" + config = VertexAIAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "max_tokens": 100, + "is_vertex_request": True, + "extra_headers": { + "anthropic-beta": "interleaved-thinking-2025-05-14,dev-full-thinking-2025-05-14", + }, + } + + result = config.transform_request( + model="claude-sonnet-4-20250514", + messages=messages, + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + + assert "anthropic_beta" in result + assert "interleaved-thinking-2025-05-14" in result["anthropic_beta"] + assert "dev-full-thinking-2025-05-14" in result["anthropic_beta"] + + +def test_vertex_ai_anthropic_no_extra_headers_unchanged(): + """Test that requests without extra_headers still work normally.""" + config = VertexAIAnthropicConfig() + + messages = [{"role": "user", "content": "Hello"}] + optional_params = { + "max_tokens": 100, + "is_vertex_request": True, + } + + result = config.transform_request( + model="claude-sonnet-4-20250514", + messages=messages, + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + + assert "anthropic_beta" not in result + assert "extra_headers" not in result + + def test_vertex_ai_partner_models_anthropic_remove_prompt_caching_scope_beta_header(): """ - Test that remove_unsupported_beta correctly filters out prompt-caching-scope-2026-01-05 + Test that remove_unsupported_beta correctly filters out prompt-caching-scope-2026-01-05 from the anthropic-beta headers. """ from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.experimental_pass_through.transformation import ( @@ -352,13 +456,18 @@ def test_vertex_ai_partner_models_anthropic_remove_prompt_caching_scope_beta_hea headers = update_headers_with_filtered_beta(headers, "vertex_ai") beta_header = headers.get("anthropic-beta") - assert PROMPT_CACHING_BETA_HEADER not in (beta_header or ""), \ - f"{PROMPT_CACHING_BETA_HEADER} should be filtered out" - assert "other-feature" in (beta_header or ""), \ - "Other non-excluded beta headers should remain" - assert "web-search-2025-03-05" in (beta_header or ""), \ - "Other non-excluded beta headers should remain" + assert PROMPT_CACHING_BETA_HEADER not in ( + beta_header or "" + ), f"{PROMPT_CACHING_BETA_HEADER} should be filtered out" + assert "other-feature" in ( + beta_header or "" + ), "Other non-excluded beta headers should remain" + assert "web-search-2025-03-05" in ( + beta_header or "" + ), "Other non-excluded beta headers should remain" # If prompt-caching was the only value, header should be removed completely headers2 = {"anthropic-beta": PROMPT_CACHING_BETA_HEADER} headers2 = update_headers_with_filtered_beta(headers2, "vertex_ai") - assert "anthropic-beta" not in headers2, "Header should be removed if no supported values remain" \ No newline at end of file + assert ( + "anthropic-beta" not in headers2 + ), "Header should be removed if no supported values remain" diff --git a/tests/test_litellm/proxy/management_endpoints/test_common_utils.py b/tests/test_litellm/proxy/management_endpoints/test_common_utils.py new file mode 100644 index 0000000000..b372476c3d --- /dev/null +++ b/tests/test_litellm/proxy/management_endpoints/test_common_utils.py @@ -0,0 +1,162 @@ +""" +Tests for litellm/proxy/management_endpoints/common_utils.py + +Covers the fix for GitHub issue #20304: +Empty guardrails/policies arrays sent by the UI should NOT trigger the +enterprise (premium) license check, but should still be applied so that +users can intentionally clear previously-set fields. +""" + +from unittest.mock import patch + +from litellm.proxy.management_endpoints.common_utils import ( + _update_metadata_fields, +) + + +class TestUpdateMetadataFieldsEmptyCollections: + """ + Regression tests for issue #20304. + + The UI sends empty arrays (`[]`) for enterprise-only fields like + guardrails, policies, and logging even when the user hasn't configured + these features. The backend must not treat empty collections as an + intent to use the feature, and therefore must not trigger the premium + license check. + + However, empty collections must still be written into metadata so that + users can intentionally clear a previously-set field (e.g. removing all + guardrails by sending `guardrails: []`). + """ + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_empty_list_does_not_trigger_premium_check(self, mock_premium_check): + """Empty lists for premium fields must not trigger the premium check.""" + updated_kv = { + "team_id": "test-team", + "guardrails": [], + "policies": [], + "logging": [], + } + _update_metadata_fields(updated_kv=updated_kv) + mock_premium_check.assert_not_called() + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_empty_list_still_updates_metadata(self, mock_premium_check): + """ + Empty lists must still be moved into metadata so users can clear + previously-set fields (e.g. remove all guardrails). + """ + updated_kv = { + "team_id": "test-team", + "guardrails": [], + "policies": [], + } + _update_metadata_fields(updated_kv=updated_kv) + # The fields should have been moved into metadata + assert "guardrails" not in updated_kv, ( + "guardrails should be popped from top-level" + ) + assert "policies" not in updated_kv, ( + "policies should be popped from top-level" + ) + assert updated_kv["metadata"]["guardrails"] == [] + assert updated_kv["metadata"]["policies"] == [] + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_empty_dict_does_not_trigger_premium_check(self, mock_premium_check): + """Empty dicts for premium fields must not trigger the premium check.""" + updated_kv = { + "team_id": "test-team", + "secret_manager_settings": {}, + } + _update_metadata_fields(updated_kv=updated_kv) + mock_premium_check.assert_not_called() + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_empty_dict_still_updates_metadata(self, mock_premium_check): + """ + Empty dicts must still be moved into metadata so users can clear + previously-set fields. + """ + updated_kv = { + "team_id": "test-team", + "secret_manager_settings": {}, + } + _update_metadata_fields(updated_kv=updated_kv) + assert "secret_manager_settings" not in updated_kv, ( + "secret_manager_settings should be popped from top-level" + ) + assert updated_kv["metadata"]["secret_manager_settings"] == {} + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_none_value_does_not_trigger_premium_check(self, mock_premium_check): + """None values for premium fields should be silently ignored.""" + updated_kv = { + "team_id": "test-team", + "guardrails": None, + "policies": None, + } + _update_metadata_fields(updated_kv=updated_kv) + mock_premium_check.assert_not_called() + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_absent_fields_do_not_trigger_premium_check(self, mock_premium_check): + """Fields not present in the dict should not trigger premium check.""" + updated_kv = { + "team_id": "test-team", + "team_alias": "example-team", + } + _update_metadata_fields(updated_kv=updated_kv) + mock_premium_check.assert_not_called() + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_non_empty_list_triggers_premium_check(self, mock_premium_check): + """Non-empty lists for premium fields should trigger the premium check.""" + updated_kv = { + "team_id": "test-team", + "guardrails": ["my-guardrail"], + } + _update_metadata_fields(updated_kv=updated_kv) + mock_premium_check.assert_called() + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_non_empty_value_triggers_premium_check(self, mock_premium_check): + """Non-empty string values for premium fields should trigger the premium check.""" + updated_kv = { + "team_id": "test-team", + "tags": ["production"], + } + _update_metadata_fields(updated_kv=updated_kv) + mock_premium_check.assert_called() + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_non_empty_list_updates_metadata(self, mock_premium_check): + """Non-empty lists should be moved into metadata.""" + updated_kv = { + "team_id": "test-team", + "guardrails": ["my-guardrail"], + } + _update_metadata_fields(updated_kv=updated_kv) + assert "guardrails" not in updated_kv + assert updated_kv["metadata"]["guardrails"] == ["my-guardrail"] + + @patch("litellm.proxy.management_endpoints.common_utils._premium_user_check") + def test_ui_typical_payload_does_not_trigger_premium_check(self, mock_premium_check): + """ + Simulate the exact payload the UI sends when no enterprise features + are configured. This must NOT trigger the premium check. + """ + # This is the payload structure the UI sends (from issue #20304) + updated_kv = { + "team_id": "67848772-1a8b-4343-938c-17e60f1db860", + "team_alias": "example-team", + "models": ["gpt-4"], + "metadata": { + "guardrails": [], + "logging": [], + }, + "policies": [], + } + _update_metadata_fields(updated_kv=updated_kv) + mock_premium_check.assert_not_called() diff --git a/tests/test_litellm/responses/litellm_completion_transformation/test_tool_call_streaming_transformation.py b/tests/test_litellm/responses/litellm_completion_transformation/test_tool_call_streaming_transformation.py index 8d324bea61..071eefaef4 100644 --- a/tests/test_litellm/responses/litellm_completion_transformation/test_tool_call_streaming_transformation.py +++ b/tests/test_litellm/responses/litellm_completion_transformation/test_tool_call_streaming_transformation.py @@ -229,3 +229,164 @@ def test_tool_call_arguments_are_chunked_to_match_openai_behavior(): assert sequence_numbers == sorted(sequence_numbers) assert len(set(sequence_numbers)) == len(sequence_numbers) # All unique + +def test_tool_call_delta_without_id_uses_index_mapping(): + iterator = LiteLLMCompletionStreamingIterator( + model="test-model", + litellm_custom_stream_wrapper=AsyncMock(), + request_input="Test input", + responses_api_request={}, + ) + + chunks = [ + [ + { + "index": 0, + "id": "call_abc123", + "type": "function", + "function": {"name": "get_weather", "arguments": '{"lo'}, + } + ], + [{"index": 0, "type": "function", "function": {"arguments": 'cation":'}}], + [{"index": 0, "type": "function", "function": {"arguments": ' "New'}}], + [{"index": 0, "type": "function", "function": {"arguments": ' York"}'}}], + ] + + for tool_calls in chunks: + iterator._queue_tool_call_delta_events(tool_calls) + + all_events = [] + while iterator._pending_tool_events: + all_events.append(iterator._pending_tool_events.pop(0)) + + delta_events = [ + evt + for evt in all_events + if evt.type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA + ] + streamed_arguments = "".join(evt.delta for evt in delta_events) + + assert streamed_arguments == '{"location": "New York"}' + + output_item_added_events = [ + evt + for evt in all_events + if evt.type == ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED + ] + assert len(output_item_added_events) == 1 + assert output_item_added_events[0].item.id == "call_abc123" + + +def test_parallel_tool_calls_without_ids_use_index_mapping(): + iterator = LiteLLMCompletionStreamingIterator( + model="test-model", + litellm_custom_stream_wrapper=AsyncMock(), + request_input="Test input", + responses_api_request={}, + ) + + iterator._queue_tool_call_delta_events( + [ + { + "index": 0, + "id": "call_a", + "type": "function", + "function": {"name": "tool_a", "arguments": '{"x":'}, + }, + { + "index": 1, + "id": "call_b", + "type": "function", + "function": {"name": "tool_b", "arguments": '{"y":'}, + }, + ] + ) + iterator._queue_tool_call_delta_events( + [ + {"index": 0, "type": "function", "function": {"arguments": "1}"}}, + {"index": 1, "type": "function", "function": {"arguments": "2}"}}, + ] + ) + + all_events = [] + while iterator._pending_tool_events: + all_events.append(iterator._pending_tool_events.pop(0)) + + output_item_added_events = [ + evt + for evt in all_events + if evt.type == ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED + ] + assert len(output_item_added_events) == 2 + + delta_events = [ + evt + for evt in all_events + if evt.type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA + ] + arguments_by_call_id = {} + for evt in delta_events: + arguments_by_call_id.setdefault(evt.item_id, "") + arguments_by_call_id[evt.item_id] += evt.delta + + assert arguments_by_call_id["call_a"] == '{"x":1}' + assert arguments_by_call_id["call_b"] == '{"y":2}' + + +def test_reused_index_with_new_call_id_marks_fallback_ambiguous(): + iterator = LiteLLMCompletionStreamingIterator( + model="test-model", + litellm_custom_stream_wrapper=AsyncMock(), + request_input="Test input", + responses_api_request={}, + ) + + iterator._queue_tool_call_delta_events( + [ + { + "index": 0, + "id": "call_a", + "type": "function", + "function": {"name": "tool_a", "arguments": '{"a":'}, + } + ] + ) + iterator._queue_tool_call_delta_events( + [ + { + "index": 0, + "id": "call_b", + "type": "function", + "function": {"name": "tool_b", "arguments": '{"b":'}, + } + ] + ) + # Ambiguous chunk: index reused and id missing. We should skip fallback rather than misroute. + iterator._queue_tool_call_delta_events( + [ + { + "index": 0, + "type": "function", + "function": {"arguments": "1}"}, + } + ] + ) + + all_events = [] + while iterator._pending_tool_events: + all_events.append(iterator._pending_tool_events.pop(0)) + + delta_events = [ + evt + for evt in all_events + if evt.type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA + ] + arguments_by_call_id = {} + for evt in delta_events: + arguments_by_call_id.setdefault(evt.item_id, "") + arguments_by_call_id[evt.item_id] += evt.delta + + assert arguments_by_call_id["call_a"] == '{"a":' + assert arguments_by_call_id["call_b"] == '{"b":' + assert arguments_by_call_id["call_a"] != '{"a":1}' + assert arguments_by_call_id["call_b"] != '{"b":1}' diff --git a/tests/test_litellm/test_router.py b/tests/test_litellm/test_router.py index 08ae804ea8..75ec806ee1 100644 --- a/tests/test_litellm/test_router.py +++ b/tests/test_litellm/test_router.py @@ -1869,3 +1869,124 @@ async def test_aguardrail(): assert result["result"] == "success" assert result["selected_guardrail"]["id"] == "guardrail-1" + +@pytest.mark.asyncio +async def test_anthropic_messages_call_type_is_cached(): + """ + Regression test: Verify that anthropic_messages call type is allowed + in PromptCachingDeploymentCheck.async_log_success_event. + """ + import asyncio + from litellm.router_utils.pre_call_checks.prompt_caching_deployment_check import ( + PromptCachingDeploymentCheck, + ) + from litellm.router_utils.prompt_caching_cache import PromptCachingCache + from litellm.caching.dual_cache import DualCache + from litellm.types.utils import CallTypes + from litellm.types.utils import ( + StandardLoggingPayload, + StandardLoggingModelInformation, + StandardLoggingMetadata, + StandardLoggingHiddenParams, + ) + + # Create mock standard logging payload inline + def create_standard_logging_payload() -> StandardLoggingPayload: + return StandardLoggingPayload( + id="test_id", + call_type="completion", + response_cost=0.1, + response_cost_failure_debug_info=None, + status="success", + total_tokens=30, + prompt_tokens=20, + completion_tokens=10, + startTime=1234567890.0, + endTime=1234567891.0, + completionStartTime=1234567890.5, + model_map_information=StandardLoggingModelInformation( + model_map_key="gpt-3.5-turbo", model_map_value=None + ), + model="gpt-3.5-turbo", + model_id="model-123", + model_group="openai-gpt", + api_base="https://api.openai.com", + metadata=StandardLoggingMetadata( + user_api_key_hash="test_hash", + user_api_key_org_id=None, + user_api_key_alias="test_alias", + user_api_key_team_id="test_team", + user_api_key_user_id="test_user", + user_api_key_team_alias="test_team_alias", + spend_logs_metadata=None, + requester_ip_address="127.0.0.1", + requester_metadata=None, + ), + cache_hit=False, + cache_key=None, + saved_cache_cost=0.0, + request_tags=[], + end_user=None, + requester_ip_address="127.0.0.1", + messages=[{"role": "user", "content": "Hello, world!"}], + response={"choices": [{"message": {"content": "Hi there!"}}]}, + error_str=None, + model_parameters={"stream": True}, + hidden_params=StandardLoggingHiddenParams( + model_id="model-123", + cache_key=None, + api_base="https://api.openai.com", + response_cost="0.1", + additional_headers=None, + ), + ) + + cache = DualCache() + deployment_check = PromptCachingDeploymentCheck(cache=cache) + prompt_cache = PromptCachingCache(cache=cache) + + # Create messages with enough tokens to pass the caching threshold + test_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "test long message here" * 1024, + "cache_control": { + "type": "ephemeral", + "ttl": "5m" + } + } + ] + } + ] + test_model_id = "test-model-id-123" + + # Create a payload with anthropic_messages call type + payload = create_standard_logging_payload() + payload["call_type"] = CallTypes.anthropic_messages.value + payload["messages"] = test_messages + payload["model"] = "anthropic/claude-3-5-sonnet-20240620" + payload["model_id"] = test_model_id + + # Log the success event (should cache the model_id) + await deployment_check.async_log_success_event( + kwargs={"standard_logging_object": payload}, + response_obj={}, + start_time=1234567890.0, + end_time=1234567891.0, + ) + + # Small delay to ensure cache write completes + await asyncio.sleep(0.1) + + # Verify that the model_id was actually cached + cached_result = await prompt_cache.async_get_model_id( + messages=test_messages, + tools=None, + ) + + # This assertion will FAIL if anthropic_messages is filtered out + assert cached_result is not None, "Model ID should be cached for anthropic_messages call type" + assert cached_result["model_id"] == test_model_id, f"Expected {test_model_id}, got {cached_result['model_id']}" diff --git a/tests/test_litellm/test_video_generation.py b/tests/test_litellm/test_video_generation.py index cfc1535052..5446a0a7b3 100644 --- a/tests/test_litellm/test_video_generation.py +++ b/tests/test_litellm/test_video_generation.py @@ -916,6 +916,181 @@ def test_encode_video_id_with_provider_handles_azure_video_prefix(): ) assert encoded_twice == encoded_id # Should return the same encoded ID +class TestVideoListTransformation: + """Tests for video list request/response transformation with provider ID encoding.""" + + def test_transform_video_list_response_encodes_first_id_and_last_id(self): + """Verify that first_id and last_id are encoded with provider metadata.""" + config = OpenAIVideoConfig() + + mock_http_response = MagicMock() + mock_http_response.json.return_value = { + "object": "list", + "data": [ + { + "id": "video_aaa", + "object": "video", + "model": "sora-2", + "status": "completed", + }, + { + "id": "video_bbb", + "object": "video", + "model": "sora-2", + "status": "completed", + }, + ], + "first_id": "video_aaa", + "last_id": "video_bbb", + "has_more": False, + } + + result = config.transform_video_list_response( + raw_response=mock_http_response, + logging_obj=MagicMock(), + custom_llm_provider="azure", + ) + + from litellm.types.videos.utils import decode_video_id_with_provider + + # data[].id should be encoded + for item in result["data"]: + decoded = decode_video_id_with_provider(item["id"]) + assert decoded["custom_llm_provider"] == "azure" + + # first_id and last_id should also be encoded + first_decoded = decode_video_id_with_provider(result["first_id"]) + assert first_decoded["custom_llm_provider"] == "azure" + assert first_decoded["video_id"] == "video_aaa" + assert first_decoded["model_id"] == "sora-2" + + last_decoded = decode_video_id_with_provider(result["last_id"]) + assert last_decoded["custom_llm_provider"] == "azure" + assert last_decoded["video_id"] == "video_bbb" + assert last_decoded["model_id"] == "sora-2" + + def test_transform_video_list_response_no_provider_leaves_ids_unchanged(self): + """When custom_llm_provider is None, all IDs should remain unchanged.""" + config = OpenAIVideoConfig() + + mock_http_response = MagicMock() + mock_http_response.json.return_value = { + "object": "list", + "data": [ + {"id": "video_aaa", "object": "video", "model": "sora-2", "status": "completed"}, + ], + "first_id": "video_aaa", + "last_id": "video_aaa", + "has_more": False, + } + + result = config.transform_video_list_response( + raw_response=mock_http_response, + logging_obj=MagicMock(), + custom_llm_provider=None, + ) + + assert result["data"][0]["id"] == "video_aaa" + assert result["first_id"] == "video_aaa" + assert result["last_id"] == "video_aaa" + + def test_transform_video_list_response_missing_pagination_fields(self): + """first_id / last_id may be absent or null; should not raise.""" + config = OpenAIVideoConfig() + + mock_http_response = MagicMock() + mock_http_response.json.return_value = { + "object": "list", + "data": [ + {"id": "video_aaa", "object": "video", "model": "sora-2", "status": "completed"}, + ], + "has_more": False, + } + + result = config.transform_video_list_response( + raw_response=mock_http_response, + logging_obj=MagicMock(), + custom_llm_provider="azure", + ) + + # data[].id should still be encoded + from litellm.types.videos.utils import decode_video_id_with_provider + + decoded = decode_video_id_with_provider(result["data"][0]["id"]) + assert decoded["custom_llm_provider"] == "azure" + + # first_id / last_id should not be present + assert "first_id" not in result + assert "last_id" not in result + + def test_transform_video_list_request_decodes_after_parameter(self): + """Encoded 'after' cursor should be decoded back to the raw provider ID.""" + from litellm.types.videos.utils import encode_video_id_with_provider + + config = OpenAIVideoConfig() + + raw_id = "video_69888baee890819086dd3366bfc372fe" + encoded_id = encode_video_id_with_provider(raw_id, "azure", "sora-2") + + url, params = config.transform_video_list_request( + api_base="https://my-resource.openai.azure.com/openai/v1/videos", + litellm_params=MagicMock(), + headers={}, + after=encoded_id, + limit=10, + ) + + assert params["after"] == raw_id + assert params["limit"] == "10" + + def test_transform_video_list_request_passes_through_plain_after(self): + """A plain (non-encoded) 'after' value should pass through unchanged.""" + config = OpenAIVideoConfig() + + url, params = config.transform_video_list_request( + api_base="https://api.openai.com/v1/videos", + litellm_params=MagicMock(), + headers={}, + after="video_plain_id", + ) + + assert params["after"] == "video_plain_id" + + def test_transform_video_list_roundtrip(self): + """first_id from list response should decode correctly when used as after parameter.""" + config = OpenAIVideoConfig() + + # Simulate a list response + mock_http_response = MagicMock() + mock_http_response.json.return_value = { + "object": "list", + "data": [ + {"id": "video_aaa", "object": "video", "model": "sora-2", "status": "completed"}, + {"id": "video_bbb", "object": "video", "model": "sora-2", "status": "completed"}, + ], + "first_id": "video_aaa", + "last_id": "video_bbb", + "has_more": True, + } + + list_result = config.transform_video_list_response( + raw_response=mock_http_response, + logging_obj=MagicMock(), + custom_llm_provider="azure", + ) + + # Use the encoded last_id as the 'after' cursor for the next page + _, params = config.transform_video_list_request( + api_base="https://my-resource.openai.azure.com/openai/v1/videos", + litellm_params=MagicMock(), + headers={}, + after=list_result["last_id"], + ) + + # The after param sent to the upstream API should be the raw video ID + assert params["after"] == "video_bbb" + + class TestVideoEndpointsProxyLitellmParams: """Test that video proxy endpoints (status, content, remix) respect litellm_params from proxy config.""" diff --git a/ui/litellm-dashboard/package.json b/ui/litellm-dashboard/package.json index efb11fec38..76ac97f008 100644 --- a/ui/litellm-dashboard/package.json +++ b/ui/litellm-dashboard/package.json @@ -84,6 +84,8 @@ "mermaid": ">=11.10.0", "js-yaml": ">=4.1.1", "glob": ">=11.1.0", + "tar": ">=7.5.7", + "@isaacs/brace-expansion": ">=5.0.1", "node-forge": ">=1.3.2", "lodash-es": ">=4.17.23", "lodash": ">=4.17.23" diff --git a/ui/litellm-dashboard/src/components/VirtualKeysPage/VirtualKeysTable.test.tsx b/ui/litellm-dashboard/src/components/VirtualKeysPage/VirtualKeysTable.test.tsx index ef5bb2a037..749396c82f 100644 --- a/ui/litellm-dashboard/src/components/VirtualKeysPage/VirtualKeysTable.test.tsx +++ b/ui/litellm-dashboard/src/components/VirtualKeysPage/VirtualKeysTable.test.tsx @@ -542,3 +542,86 @@ it("should display 'Default Proxy Admin' for created_by when value is 'default_u expect(defaultProxyAdminElements.length).toBeGreaterThan(0); }); }); + + +it("should render table without crashing when models is null", async () => { + const keyWithNullModels = { + ...mockKey, + models: null as unknown as string[], + }; + + mockUseFilterLogic.mockReturnValue({ + filters: { + "Team ID": "", + "Organization ID": "", + "Key Alias": "", + "User ID": "", + "Sort By": "created_at", + "Sort Order": "desc", + }, + filteredKeys: [keyWithNullModels], + allKeyAliases: ["test-key-alias"], + allTeams: [mockTeam], + allOrganizations: [mockOrganization], + handleFilterChange: vi.fn(), + handleFilterReset: vi.fn(), + }); + + const mockProps = { + teams: [mockTeam], + organizations: [mockOrganization], + onSortChange: vi.fn(), + currentSort: { + sortBy: "created_at", + sortOrder: "desc" as const, + }, + }; + + // This should not throw an error + renderWithProviders(); + + await waitFor(() => { + expect(screen.getByText("Test Key Alias")).toBeInTheDocument(); + }); +}); + +it("should render table without crashing when models is undefined", async () => { + const keyWithUndefinedModels = { + ...mockKey, + models: undefined as unknown as string[], + }; + + mockUseFilterLogic.mockReturnValue({ + filters: { + "Team ID": "", + "Organization ID": "", + "Key Alias": "", + "User ID": "", + "Sort By": "created_at", + "Sort Order": "desc", + }, + filteredKeys: [keyWithUndefinedModels], + allKeyAliases: ["test-key-alias"], + allTeams: [mockTeam], + allOrganizations: [mockOrganization], + handleFilterChange: vi.fn(), + handleFilterReset: vi.fn(), + }); + + const mockProps = { + teams: [mockTeam], + organizations: [mockOrganization], + onSortChange: vi.fn(), + currentSort: { + sortBy: "created_at", + sortOrder: "desc" as const, + }, + }; + + // This should not throw an error + renderWithProviders(); + + await waitFor(() => { + expect(screen.getByText("Test Key Alias")).toBeInTheDocument(); + }); +}); diff --git a/ui/litellm-dashboard/src/components/VirtualKeysPage/VirtualKeysTable.tsx b/ui/litellm-dashboard/src/components/VirtualKeysPage/VirtualKeysTable.tsx index 465b9b8fbe..f7c47943e7 100644 --- a/ui/litellm-dashboard/src/components/VirtualKeysPage/VirtualKeysTable.tsx +++ b/ui/litellm-dashboard/src/components/VirtualKeysPage/VirtualKeysTable.tsx @@ -727,7 +727,7 @@ export function VirtualKeysTable({ teams, organizations, onSortChange, currentSo whiteSpace: "pre-wrap", overflow: "hidden", }} - className={`py-0.5 max-h-8 overflow-hidden text-ellipsis whitespace-nowrap ${cell.column.id === "models" && (cell.getValue() as string[]).length > 3 ? "px-0" : ""}`} + className={`py-0.5 max-h-8 overflow-hidden text-ellipsis whitespace-nowrap ${cell.column.id === "models" && Array.isArray(cell.getValue()) && (cell.getValue() as string[]).length > 3 ? "px-0" : ""}`} > {flexRender(cell.column.columnDef.cell, cell.getContext())} diff --git a/ui/litellm-dashboard/src/components/team/team_info.tsx b/ui/litellm-dashboard/src/components/team/team_info.tsx index 35f5b87e07..014f8fb901 100644 --- a/ui/litellm-dashboard/src/components/team/team_info.tsx +++ b/ui/litellm-dashboard/src/components/team/team_info.tsx @@ -465,8 +465,8 @@ const TeamInfoView: React.FC = ({ budget_duration: values.budget_duration, metadata: { ...parsedMetadata, - guardrails: values.guardrails || [], - logging: values.logging_settings || [], + ...(values.guardrails?.length > 0 ? { guardrails: values.guardrails } : {}), + ...(values.logging_settings?.length > 0 ? { logging: values.logging_settings } : {}), disable_global_guardrails: values.disable_global_guardrails || false, soft_budget_alerting_emails: typeof values.soft_budget_alerting_emails === "string" @@ -477,7 +477,7 @@ const TeamInfoView: React.FC = ({ : values.soft_budget_alerting_emails || [], ...(secretManagerSettings !== undefined ? { secret_manager_settings: secretManagerSettings } : {}), }, - policies: values.policies || [], + ...(values.policies?.length > 0 ? { policies: values.policies } : {}), organization_id: values.organization_id, }; diff --git a/ui/litellm-dashboard/src/components/view_logs/LogDetailsDrawer/LogDetailsDrawer.tsx b/ui/litellm-dashboard/src/components/view_logs/LogDetailsDrawer/LogDetailsDrawer.tsx index 54946eb096..a3f948296e 100644 --- a/ui/litellm-dashboard/src/components/view_logs/LogDetailsDrawer/LogDetailsDrawer.tsx +++ b/ui/litellm-dashboard/src/components/view_logs/LogDetailsDrawer/LogDetailsDrawer.tsx @@ -85,7 +85,7 @@ export function LogDetailsDrawer({ // Check if request/response data is present const hasMessages = checkHasMessages(logEntry.messages); const hasResponse = checkHasResponse(logEntry.response); - const missingData = !hasMessages && !hasResponse; + const missingData = !hasMessages && !hasResponse && !hasError; // Guardrail data const guardrailInfo = metadata?.guardrail_information; @@ -206,6 +206,7 @@ export function LogDetailsDrawer({ {/* Request/Response JSON - Collapsible */} any; getFormattedResponse: () => any; logEntry: LogEntry; @@ -346,6 +348,7 @@ interface RequestResponseSectionProps { function RequestResponseSection({ hasResponse, + hasError, getRawRequest, getFormattedResponse, logEntry, @@ -423,7 +426,7 @@ function RequestResponseSection({ text: getCopyText(), tooltips: ["Copy JSON", "Copied!"] }} - disabled={activeTab === TAB_RESPONSE && !hasResponse} + disabled={activeTab === TAB_RESPONSE && !hasResponse && !hasError} /> } items={[ @@ -441,7 +444,7 @@ function RequestResponseSection({ label: "Response", children: (
- {hasResponse ? ( + {hasResponse || hasError ? ( ) : (
diff --git a/ui/litellm-dashboard/src/components/view_logs/RequestResponsePanel.test.tsx b/ui/litellm-dashboard/src/components/view_logs/RequestResponsePanel.test.tsx index deeac3a8d0..b7c0318d9f 100644 --- a/ui/litellm-dashboard/src/components/view_logs/RequestResponsePanel.test.tsx +++ b/ui/litellm-dashboard/src/components/view_logs/RequestResponsePanel.test.tsx @@ -188,4 +188,78 @@ describe("RequestResponsePanel", () => { expect(responseData).toEqual({ responseData: "this should appear in response" }); expect(responseData).not.toEqual({ requestData: "this should not appear in response" }); }); + + it("should show error response data when hasError is true and hasResponse is false", () => { + const failedLogEntry: LogEntry = { + ...baseLogEntry, + messages: [], + response: {}, + metadata: { + status: "failure", + error_information: { + error_message: "Model not found", + error_class: "NotFoundError", + error_code: 404, + }, + additional_usage_values: { + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0, + }, + }, + }; + const errorResponse = { error: { message: "Model not found", type: "NotFoundError", code: 404, param: null } }; + const mockGetRawRequest = vi.fn().mockReturnValue({ messages: [] }); + const mockFormattedResponse = vi.fn().mockReturnValue(errorResponse); + render( + , + ); + expect(screen.queryByText("Response data not available")).not.toBeInTheDocument(); + expect(mockFormattedResponse).toHaveBeenCalled(); + const copyButtons = screen.getAllByRole("button"); + const copyResponseButton = copyButtons.find((button) => button.getAttribute("title") === "Copy response"); + expect(copyResponseButton).not.toBeDisabled(); + }); + + it("should show Response data not available when hasResponse and hasError are both false", () => { + const mockGetRawRequest = vi.fn().mockReturnValue({ messages: [] }); + const mockFormattedResponse = vi.fn().mockReturnValue({}); + render( + , + ); + expect(screen.getByText("Response data not available")).toBeInTheDocument(); + }); + + it("should show error code in response header when hasError is true", () => { + const errorInfo = { error_message: "Rate limit exceeded", error_class: "RateLimitError", error_code: 429 }; + const mockGetRawRequest = vi.fn().mockReturnValue({ messages: [] }); + const mockFormattedResponse = vi.fn().mockReturnValue({ error: { message: "Rate limit exceeded", type: "RateLimitError", code: 429, param: null } }); + render( + , + ); + expect(screen.getByText(/HTTP code 429/)).toBeInTheDocument(); + }); }); diff --git a/ui/litellm-dashboard/src/components/view_logs/RequestResponsePanel.tsx b/ui/litellm-dashboard/src/components/view_logs/RequestResponsePanel.tsx index b2cae68184..da9323f817 100644 --- a/ui/litellm-dashboard/src/components/view_logs/RequestResponsePanel.tsx +++ b/ui/litellm-dashboard/src/components/view_logs/RequestResponsePanel.tsx @@ -113,7 +113,7 @@ export function RequestResponsePanel({ onClick={handleCopyResponse} className="p-1 hover:bg-gray-200 rounded" title="Copy response" - disabled={!hasResponse} + disabled={!hasResponse && !hasError} >
- {hasResponse ? ( + {hasResponse || hasError ? (
diff --git a/ui/litellm-dashboard/src/components/view_logs/index.tsx b/ui/litellm-dashboard/src/components/view_logs/index.tsx index 3859a5e51f..87e11e00c7 100644 --- a/ui/litellm-dashboard/src/components/view_logs/index.tsx +++ b/ui/litellm-dashboard/src/components/view_logs/index.tsx @@ -822,7 +822,7 @@ export function RequestViewer({ row, onOpenSettings }: { row: Row; onO ? row.original.messages.length > 0 : Object.keys(row.original.messages).length > 0); const hasResponse = row.original.response && Object.keys(formatData(row.original.response)).length > 0; - const missingData = !hasMessages && !hasResponse; + const missingData = !hasMessages && !hasResponse && !hasError; // Format the response with error details if present const formattedResponse = () => {