mirror of
https://github.com/tiennm99/litellm.git
synced 2026-07-03 15:21:18 +00:00
Merge branch 'main' into litellm_v1_messages_claude_4_6
This commit is contained in:
+1
-1
@@ -48,7 +48,7 @@ dist/
|
||||
build/
|
||||
*.egg-info/
|
||||
.DS_Store
|
||||
node_modules/
|
||||
**/node_modules
|
||||
*.log
|
||||
.env
|
||||
.env.local
|
||||
|
||||
+30
-1
@@ -49,7 +49,22 @@ USER root
|
||||
|
||||
# Install runtime dependencies (libsndfile needed for audio processing on ARM64)
|
||||
RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile && \
|
||||
npm install -g npm@latest tar@latest
|
||||
npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 && \
|
||||
# SECURITY FIX: npm bundles tar, glob, and brace-expansion at multiple nested
|
||||
# levels inside its dependency tree. `npm install -g <pkg>` only creates a
|
||||
# SEPARATE global package, it does NOT replace npm's internal copies.
|
||||
# We must find and replace EVERY copy inside npm's directory.
|
||||
GLOBAL="$(npm root -g)" && \
|
||||
find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done && \
|
||||
npm cache clean --force
|
||||
|
||||
WORKDIR /app
|
||||
# Copy the current directory contents into the container at /app
|
||||
@@ -71,6 +86,20 @@ RUN NODEJS_WHEEL_NODE=$(find /usr/lib -path "*/nodejs_wheel/bin/node" 2>/dev/nul
|
||||
RUN find /usr/lib -type f -path "*/tornado/test/*" -delete && \
|
||||
find /usr/lib -type d -path "*/tornado/test" -delete
|
||||
|
||||
# SECURITY FIX: nodejs-wheel-binaries (pip package used by Prisma) bundles a complete
|
||||
# npm with old vulnerable deps at /usr/lib/python3.*/site-packages/nodejs_wheel/.
|
||||
# Patch every copy of tar, glob, and brace-expansion inside that tree.
|
||||
RUN GLOBAL="$(npm root -g)" && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/tar" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/glob" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/@isaacs/brace-expansion" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done
|
||||
|
||||
# Install semantic_router and aurelio-sdk using script
|
||||
# Convert Windows line endings to Unix and make executable
|
||||
RUN sed -i 's/\r$//' docker/install_auto_router.sh && chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh
|
||||
|
||||
@@ -155,10 +155,7 @@ run_grype_scans() {
|
||||
"CVE-2025-12781" # No fix available yet
|
||||
"CVE-2025-11468" # No fix available yet
|
||||
"CVE-2026-1299" # Python 3.13 email module header injection - not applicable, LiteLLM doesn't use BytesGenerator for email serialization
|
||||
"GHSA-7h2j-956f-4vf2" # @isaacs/brace-expansion ReDoS - npm tooling dependency, not used in application runtime
|
||||
"GHSA-hx9q-6w63-j58v" # orjson deep recursion - no fix available yet
|
||||
"GHSA-8qq5-rm4j-mr97" # node-tar symlink poisoning - npm tooling dependency, tar CLI not exposed in application code
|
||||
"GHSA-29xp-372q-xqph" # node-tar race condition - npm tooling dependency, tar CLI not exposed in application code
|
||||
"CVE-2026-0775" # npm cli incorrect permission assignment - no fix available yet, npm is only used at build/prisma-generate time
|
||||
)
|
||||
|
||||
# Build JSON array of allowlisted CVE IDs for jq
|
||||
|
||||
@@ -6,7 +6,18 @@ WORKDIR /app
|
||||
|
||||
# Install Node.js and npm (adjust version as needed)
|
||||
RUN apt-get update && apt-get install -y nodejs npm && \
|
||||
npm install -g npm@latest tar@latest
|
||||
npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 && \
|
||||
GLOBAL="$(npm root -g)" && \
|
||||
find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done && \
|
||||
npm cache clean --force
|
||||
|
||||
# Copy the UI source into the container
|
||||
COPY ./ui/litellm-dashboard /app/ui/litellm-dashboard
|
||||
|
||||
@@ -50,7 +50,18 @@ USER root
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile && \
|
||||
npm install -g npm@latest tar@latest
|
||||
npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 && \
|
||||
GLOBAL="$(npm root -g)" && \
|
||||
find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done && \
|
||||
npm cache clean --force
|
||||
|
||||
WORKDIR /app
|
||||
# Copy the current directory contents into the container at /app
|
||||
@@ -64,9 +75,19 @@ COPY --from=builder /wheels/ /wheels/
|
||||
# Install the built wheel using pip; again using a wildcard if it's the only file
|
||||
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
|
||||
|
||||
# Replace the nodejs-wheel-binaries bundled node with the system node (fixes CVE-2025-55130)
|
||||
RUN NODEJS_WHEEL_NODE=$(find /usr/lib -path "*/nodejs_wheel/bin/node" 2>/dev/null) && \
|
||||
if [ -n "$NODEJS_WHEEL_NODE" ]; then cp /usr/bin/node "$NODEJS_WHEEL_NODE"; fi
|
||||
# SECURITY FIX: nodejs-wheel-binaries (pip package used by Prisma) bundles a complete
|
||||
# npm with old vulnerable deps at /usr/lib/python3.*/site-packages/nodejs_wheel/.
|
||||
# Patch every copy of tar, glob, and brace-expansion inside that tree.
|
||||
RUN GLOBAL="$(npm root -g)" && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/tar" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/glob" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/@isaacs/brace-expansion" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done
|
||||
|
||||
# Install semantic_router and aurelio-sdk using script
|
||||
# Convert Windows line endings to Unix and make executable
|
||||
|
||||
+26
-1
@@ -62,7 +62,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
nodejs \
|
||||
npm \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& npm install -g npm@latest tar@latest
|
||||
&& npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 \
|
||||
&& GLOBAL="$(npm root -g)" \
|
||||
&& find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done \
|
||||
&& find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done \
|
||||
&& find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done \
|
||||
&& npm cache clean --force
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@@ -80,6 +91,20 @@ RUN pip install --no-cache-dir *.whl /wheels/* --no-index --find-links=/wheels/
|
||||
rm -f *.whl && \
|
||||
rm -rf /wheels
|
||||
|
||||
# SECURITY FIX: nodejs-wheel-binaries (pip package used by Prisma) bundles a complete
|
||||
# npm with old vulnerable deps at /usr/lib/python3.*/site-packages/nodejs_wheel/.
|
||||
# Patch every copy of tar, glob, and brace-expansion inside that tree.
|
||||
RUN GLOBAL="$(npm root -g)" && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/tar" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/glob" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/@isaacs/brace-expansion" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done
|
||||
|
||||
# Generate prisma client and set permissions
|
||||
# Convert Windows line endings to Unix for entrypoint scripts
|
||||
RUN prisma generate && \
|
||||
|
||||
@@ -104,7 +104,18 @@ RUN for i in 1 2 3; do \
|
||||
&& for i in 1 2 3; do \
|
||||
apk add --no-cache python3 py3-pip bash openssl tzdata nodejs npm supervisor && break || sleep 5; \
|
||||
done \
|
||||
&& npm install -g npm@latest tar@latest
|
||||
&& npm install -g npm@latest tar@7.5.7 glob@11.1.0 @isaacs/brace-expansion@5.0.1 \
|
||||
&& GLOBAL="$(npm root -g)" \
|
||||
&& find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done \
|
||||
&& find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done \
|
||||
&& find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done \
|
||||
&& npm cache clean --force
|
||||
|
||||
# Copy artifacts from builder
|
||||
COPY --from=builder /app/requirements.txt /app/requirements.txt
|
||||
@@ -146,9 +157,19 @@ RUN pip install --no-index --find-links=/wheels/ -r requirements.txt && \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
# Replace the nodejs-wheel-binaries bundled node with the system node (fixes CVE-2025-55130)
|
||||
RUN NODEJS_WHEEL_NODE=$(find /usr/lib -path "*/nodejs_wheel/bin/node" 2>/dev/null) && \
|
||||
if [ -n "$NODEJS_WHEEL_NODE" ]; then cp /usr/bin/node "$NODEJS_WHEEL_NODE"; fi
|
||||
# SECURITY FIX: nodejs-wheel-binaries (pip package used by Prisma) bundles a complete
|
||||
# npm with old vulnerable deps at /usr/lib/python3.*/site-packages/nodejs_wheel/.
|
||||
# Patch every copy of tar, glob, and brace-expansion inside that tree.
|
||||
RUN GLOBAL="$(npm root -g)" && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/tar" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/glob" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done && \
|
||||
find /usr/lib -path "*/nodejs_wheel/*/node_modules/@isaacs/brace-expansion" -type d | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done
|
||||
|
||||
# Permissions, cleanup, and Prisma prep
|
||||
# Convert Windows line endings to Unix for entrypoint scripts
|
||||
|
||||
@@ -227,6 +227,28 @@ response = litellm.completion(
|
||||
)
|
||||
```
|
||||
|
||||
## OAuth2/JWT Authentication
|
||||
|
||||
If your LiteLLM Proxy requires OAuth2/JWT authentication (e.g., Azure AD, Keycloak, Okta), the SDK can automatically obtain and refresh tokens for you.
|
||||
|
||||
```python
|
||||
import litellm
|
||||
from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler
|
||||
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=AzureADCredential(),
|
||||
scope="api://my-litellm-proxy/.default"
|
||||
)
|
||||
litellm.api_base = "https://my-proxy.example.com"
|
||||
|
||||
response = litellm.completion(
|
||||
model="gpt-4",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
```
|
||||
|
||||
[Learn more about SDK Proxy Authentication (OAuth2/JWT Auto-Refresh) →](../proxy_auth)
|
||||
|
||||
## Sending `tags` to LiteLLM Proxy
|
||||
|
||||
Tags allow you to categorize and track your API requests for monitoring, debugging, and analytics purposes. You can send tags as a list of strings to the LiteLLM Proxy using the `extra_body` parameter.
|
||||
|
||||
@@ -100,7 +100,7 @@ In cases where encounter other errors when apply Zscaler AI Guard, return exampl
|
||||
}
|
||||
}
|
||||
```
|
||||
## 6. Sending User Information to Zscaler AI Guard for Analysis (Optional)
|
||||
## 6. Sending User Information to Zscaler AI Guard (Optional)
|
||||
If you need to send end-user information to Zscaler AI Guard for analysis, you can set the configuration in the environment variables to True and include the relevant information in custom_headers on Zscaler AI Guard.
|
||||
|
||||
- To send user_api_key_alias:
|
||||
@@ -133,4 +133,30 @@ curl -i http://localhost:8165/v1/chat/completions \
|
||||
"zguard_policy_id": <the custom policy id>
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
## 8. Set Custom Zscaler AI Guard Policy on Litellm Team OR Key Metadata (Optional)
|
||||
In addition to setting `zguard_policy_id` in a request or the configuration file, you can also set it in the metadata for LiteLLM Team or Key. The `zguard_policy_id` is determined using the following order of precedence: request, Key, Team, config file. This logic is illustrated below:
|
||||
```
|
||||
user_api_key_metadata = metadata.get("user_api_key_metadata", {}) or {}
|
||||
team_metadata = metadata.get("team_metadata", {}) or {}
|
||||
policy_id = (
|
||||
metadata.get("zguard_policy_id")
|
||||
if "zguard_policy_id" in metadata
|
||||
else (
|
||||
user_api_key_metadata.get("zguard_policy_id")
|
||||
if "zguard_policy_id" in user_api_key_metadata
|
||||
else (
|
||||
team_metadata.get("zguard_policy_id")
|
||||
if "zguard_policy_id" in team_metadata
|
||||
else self.policy_id
|
||||
)
|
||||
)
|
||||
)
|
||||
```
|
||||
You can leverage this feature to apply multiple policies configured on the Zscaler AI Guard (ZGuard) to traffic from different applications. (Note: It is recommended to map policies using either Team or Key metadata, but not a mix of both.)
|
||||
|
||||
Example set in Team/Key Metadata, you can set From UI:
|
||||
```
|
||||
{"zguard_policy_id": 100}
|
||||
```
|
||||
@@ -0,0 +1,333 @@
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# SDK Proxy Authentication (OAuth2/JWT Auto-Refresh)
|
||||
|
||||
Automatically obtain and refresh OAuth2/JWT tokens when using the LiteLLM Python SDK with a LiteLLM Proxy that requires JWT authentication.
|
||||
|
||||
## Overview
|
||||
|
||||
When your LiteLLM Proxy is protected by an OAuth2/OIDC provider (Azure AD, Keycloak, Okta, Auth0, etc.), your SDK clients need valid JWT tokens for every request. Instead of manually managing token lifecycle, `litellm.proxy_auth` handles this automatically:
|
||||
|
||||
- Obtains tokens from your identity provider
|
||||
- Caches tokens to avoid unnecessary requests
|
||||
- Refreshes tokens before they expire (60-second buffer)
|
||||
- Injects `Authorization: Bearer <token>` headers into every request
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Azure AD
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="default" label="DefaultAzureCredential">
|
||||
|
||||
Uses the [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential) chain (environment variables, managed identity, Azure CLI, etc.):
|
||||
|
||||
```python
|
||||
import litellm
|
||||
from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler
|
||||
|
||||
# One-time setup
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=AzureADCredential(), # uses DefaultAzureCredential
|
||||
scope="api://my-litellm-proxy/.default"
|
||||
)
|
||||
litellm.api_base = "https://my-proxy.example.com"
|
||||
|
||||
# All requests now include Authorization headers automatically
|
||||
response = litellm.completion(
|
||||
model="gpt-4",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="client-secret" label="ClientSecretCredential">
|
||||
|
||||
Use a specific Azure AD app registration:
|
||||
|
||||
```python
|
||||
import litellm
|
||||
from azure.identity import ClientSecretCredential
|
||||
from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler
|
||||
|
||||
azure_cred = ClientSecretCredential(
|
||||
tenant_id="your-tenant-id",
|
||||
client_id="your-client-id",
|
||||
client_secret="your-client-secret"
|
||||
)
|
||||
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=AzureADCredential(credential=azure_cred),
|
||||
scope="api://my-litellm-proxy/.default"
|
||||
)
|
||||
litellm.api_base = "https://my-proxy.example.com"
|
||||
|
||||
response = litellm.completion(
|
||||
model="gpt-4",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
**Required package:** `pip install azure-identity`
|
||||
|
||||
### Generic OAuth2 (Okta, Auth0, Keycloak, etc.)
|
||||
|
||||
Works with any OAuth2 provider that supports the `client_credentials` grant type:
|
||||
|
||||
```python
|
||||
import litellm
|
||||
from litellm.proxy_auth import GenericOAuth2Credential, ProxyAuthHandler
|
||||
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=GenericOAuth2Credential(
|
||||
client_id="your-client-id",
|
||||
client_secret="your-client-secret",
|
||||
token_url="https://your-idp.example.com/oauth2/token"
|
||||
),
|
||||
scope="litellm_proxy_api"
|
||||
)
|
||||
litellm.api_base = "https://my-proxy.example.com"
|
||||
|
||||
response = litellm.completion(
|
||||
model="gpt-4",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
```
|
||||
|
||||
### Custom Credential Provider
|
||||
|
||||
Implement the `TokenCredential` protocol to use any authentication mechanism:
|
||||
|
||||
```python
|
||||
import time
|
||||
import litellm
|
||||
from litellm.proxy_auth import AccessToken, ProxyAuthHandler
|
||||
|
||||
class MyCustomCredential:
|
||||
"""Any class with a get_token(scope) -> AccessToken method works."""
|
||||
|
||||
def get_token(self, scope: str) -> AccessToken:
|
||||
# Your custom logic to obtain a token
|
||||
token = my_auth_system.get_jwt(scope=scope)
|
||||
return AccessToken(
|
||||
token=token,
|
||||
expires_on=int(time.time()) + 3600
|
||||
)
|
||||
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=MyCustomCredential(),
|
||||
scope="my-scope"
|
||||
)
|
||||
```
|
||||
|
||||
## Supported Endpoints
|
||||
|
||||
Auth headers are automatically injected for:
|
||||
|
||||
| Endpoint | Function |
|
||||
|----------|----------|
|
||||
| Chat Completions | `litellm.completion()` / `litellm.acompletion()` |
|
||||
| Embeddings | `litellm.embedding()` / `litellm.aembedding()` |
|
||||
|
||||
## How It Works
|
||||
|
||||
```
|
||||
┌──────────┐ ┌──────────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Your │ │ ProxyAuthHandler │ │ Identity │ │ LiteLLM │
|
||||
│ Code │────▶│ (token cache) │────▶│ Provider │ │ Proxy │
|
||||
│ │ │ │◀────│ (Azure AD, │ │ │
|
||||
│ │ │ │ │ Okta, etc) │ │ │
|
||||
│ │ └────────┬─────────┘ └──────────────┘ │ │
|
||||
│ │ │ Authorization: Bearer <token> │ │
|
||||
│ │──────────────┼───────────────────────────────────▶│ │
|
||||
│ │◀─────────────┼────────────────────────────────────│ │
|
||||
└──────────┘ │ └──────────────┘
|
||||
```
|
||||
|
||||
1. You set `litellm.proxy_auth` once at startup
|
||||
2. On each SDK call (`completion()`, `embedding()`), the handler checks its cached token
|
||||
3. If the token is missing or expires within 60 seconds, it requests a new one from your identity provider
|
||||
4. The `Authorization: Bearer <token>` header is injected into the request
|
||||
5. If token retrieval fails, a warning is logged and the request proceeds without auth headers
|
||||
|
||||
## API Reference
|
||||
|
||||
### ProxyAuthHandler
|
||||
|
||||
The main handler that manages the token lifecycle.
|
||||
|
||||
```python
|
||||
from litellm.proxy_auth import ProxyAuthHandler
|
||||
|
||||
handler = ProxyAuthHandler(
|
||||
credential=<TokenCredential>, # required - credential provider
|
||||
scope="<oauth2-scope>" # required - OAuth2 scope to request
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
|-----------|------|----------|-------------|
|
||||
| `credential` | `TokenCredential` | Yes | A credential provider (AzureADCredential, GenericOAuth2Credential, or custom) |
|
||||
| `scope` | `str` | Yes | The OAuth2 scope to request tokens for |
|
||||
|
||||
**Methods:**
|
||||
|
||||
| Method | Returns | Description |
|
||||
|--------|---------|-------------|
|
||||
| `get_token()` | `AccessToken` | Get a valid token, refreshing if needed |
|
||||
| `get_auth_headers()` | `dict` | Get `{"Authorization": "Bearer <token>"}` headers |
|
||||
|
||||
### AzureADCredential
|
||||
|
||||
Wraps any `azure-identity` credential with lazy initialization.
|
||||
|
||||
```python
|
||||
from litellm.proxy_auth import AzureADCredential
|
||||
|
||||
# Uses DefaultAzureCredential (recommended)
|
||||
cred = AzureADCredential()
|
||||
|
||||
# Or wrap a specific azure-identity credential
|
||||
from azure.identity import ManagedIdentityCredential
|
||||
cred = AzureADCredential(credential=ManagedIdentityCredential())
|
||||
```
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
|-----------|------|----------|-------------|
|
||||
| `credential` | Azure `TokenCredential` | No | An azure-identity credential. If `None`, uses `DefaultAzureCredential` |
|
||||
|
||||
### GenericOAuth2Credential
|
||||
|
||||
Standard OAuth2 client credentials flow for any provider.
|
||||
|
||||
```python
|
||||
from litellm.proxy_auth import GenericOAuth2Credential
|
||||
|
||||
cred = GenericOAuth2Credential(
|
||||
client_id="your-client-id",
|
||||
client_secret="your-client-secret",
|
||||
token_url="https://your-idp.com/oauth2/token"
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
|-----------|------|----------|-------------|
|
||||
| `client_id` | `str` | Yes | OAuth2 client ID |
|
||||
| `client_secret` | `str` | Yes | OAuth2 client secret |
|
||||
| `token_url` | `str` | Yes | Token endpoint URL |
|
||||
|
||||
### AccessToken
|
||||
|
||||
Dataclass representing an OAuth2 access token.
|
||||
|
||||
```python
|
||||
from litellm.proxy_auth import AccessToken
|
||||
|
||||
token = AccessToken(
|
||||
token="eyJhbG...", # JWT string
|
||||
expires_on=1234567890 # Unix timestamp
|
||||
)
|
||||
```
|
||||
|
||||
### TokenCredential Protocol
|
||||
|
||||
Any class implementing this protocol can be used as a credential provider:
|
||||
|
||||
```python
|
||||
from litellm.proxy_auth import AccessToken
|
||||
|
||||
class MyCredential:
|
||||
def get_token(self, scope: str) -> AccessToken:
|
||||
...
|
||||
```
|
||||
|
||||
## Provider-Specific Examples
|
||||
|
||||
### Keycloak
|
||||
|
||||
```python
|
||||
from litellm.proxy_auth import GenericOAuth2Credential, ProxyAuthHandler
|
||||
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=GenericOAuth2Credential(
|
||||
client_id="litellm-client",
|
||||
client_secret="your-keycloak-client-secret",
|
||||
token_url="https://keycloak.example.com/realms/your-realm/protocol/openid-connect/token"
|
||||
),
|
||||
scope="openid"
|
||||
)
|
||||
```
|
||||
|
||||
### Okta
|
||||
|
||||
```python
|
||||
from litellm.proxy_auth import GenericOAuth2Credential, ProxyAuthHandler
|
||||
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=GenericOAuth2Credential(
|
||||
client_id="your-okta-client-id",
|
||||
client_secret="your-okta-client-secret",
|
||||
token_url="https://your-org.okta.com/oauth2/default/v1/token"
|
||||
),
|
||||
scope="litellm_api"
|
||||
)
|
||||
```
|
||||
|
||||
### Auth0
|
||||
|
||||
```python
|
||||
from litellm.proxy_auth import GenericOAuth2Credential, ProxyAuthHandler
|
||||
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=GenericOAuth2Credential(
|
||||
client_id="your-auth0-client-id",
|
||||
client_secret="your-auth0-client-secret",
|
||||
token_url="https://your-tenant.auth0.com/oauth/token"
|
||||
),
|
||||
scope="https://my-proxy.example.com/api"
|
||||
)
|
||||
```
|
||||
|
||||
### Azure AD with Managed Identity
|
||||
|
||||
```python
|
||||
from azure.identity import ManagedIdentityCredential
|
||||
from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler
|
||||
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=AzureADCredential(
|
||||
credential=ManagedIdentityCredential()
|
||||
),
|
||||
scope="api://my-litellm-proxy/.default"
|
||||
)
|
||||
```
|
||||
|
||||
## Combining with `use_litellm_proxy`
|
||||
|
||||
You can use `proxy_auth` together with [`use_litellm_proxy`](./providers/litellm_proxy#send-all-sdk-requests-to-litellm-proxy) to route all SDK requests through an authenticated proxy:
|
||||
|
||||
```python
|
||||
import os
|
||||
import litellm
|
||||
from litellm.proxy_auth import AzureADCredential, ProxyAuthHandler
|
||||
|
||||
# Route all requests through the proxy
|
||||
os.environ["LITELLM_PROXY_API_BASE"] = "https://my-proxy.example.com"
|
||||
litellm.use_litellm_proxy = True
|
||||
|
||||
# Authenticate with OAuth2/JWT
|
||||
litellm.proxy_auth = ProxyAuthHandler(
|
||||
credential=AzureADCredential(),
|
||||
scope="api://my-litellm-proxy/.default"
|
||||
)
|
||||
|
||||
# This request goes through the proxy with automatic JWT auth
|
||||
response = litellm.completion(
|
||||
model="vertex_ai/gemini-2.0-flash-001",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,43 @@
|
||||
# Claude Code - Prompt Cache Routing
|
||||
|
||||
Claude's [Prompt Caching](https://platform.claude.com/docs/en/build-with-claude/prompt-caching) feature helps to optimize API usage through attempting to cache prompts and re-use cached prompts during subsequent API calls. This feature is used by Claude Code.
|
||||
|
||||
When LiteLLM [load balancing](../proxy/load_balancing.md) is enabled, to ensure this prompt caching feature still works with Claude Code, LiteLLM needs to be configured to use the `PromptCachingDeploymentCheck` pre-call check. This pre-call check will ensure that API calls that used prompt caching are remembered and that subsequent API calls that try to use that prompt caching are routed to the same model deployment where a cache write occurred.
|
||||
|
||||
## Set Up
|
||||
|
||||
1. Configure the router so that it uses the `PromptCachingDeploymentCheck` (via setting the `optional_pre_call_checks` property), and configure the models so that they can access multiple deployments of Claude; below, we show an example for multiple AWS accounts (referred to as `account-1` and `account-2`, using the `aws_profile_name` property):
|
||||
```yaml
|
||||
router_settings:
|
||||
optional_pre_call_checks: ["prompt_caching"]
|
||||
|
||||
model_list:
|
||||
- litellm_params:
|
||||
model: us.anthropic.claude-sonnet-4-5-20250929-v1:0
|
||||
aws_profile_name: account-1
|
||||
aws_region_name: us-west-2
|
||||
model_info:
|
||||
litellm_provider: bedrock
|
||||
model_name: us.anthropic.claude-sonnet-4-5-20250929-v1:0
|
||||
- litellm_params:
|
||||
model: us.anthropic.claude-sonnet-4-5-20250929-v1:0
|
||||
aws_profile_name: account-2
|
||||
aws_region_name: us-west-2
|
||||
model_info:
|
||||
litellm_provider: bedrock
|
||||
model_name: us.anthropic.claude-sonnet-4-5-20250929-v1:0
|
||||
```
|
||||
2. Utilize Claude Code:
|
||||
1. Launch Claude Code, which will do a warm-up API call that tries to cache its warm-up prompt and its system prompt.
|
||||
2. Wait a few seconds, then quit Claude Code and re-open it.
|
||||
3. You'll notice that the warm-up API call successfully gets a cache hit (if using Claude Code in an IDE like VS Code, ensure that you don't do anything between step 2.1 and 2.2 here, otherwise there may not be a cache hit):
|
||||
1. Go to the [LiteLLM Request Logs page](../proxy/ui_logs.md) in the Admin UI
|
||||
2. Click on the individual requests to see (a) the cache creation and cache read tokens; and (b) the Model ID. In particular, the API call from step 2.1 should show a cache write, and the API call from step 2.2 should show a cache read; in addition, the Model ID should be equal (meaning the API call is getting forwarded to the same AWS account).
|
||||
|
||||
## Related
|
||||
|
||||
- [Claude Code - Quickstart](./claude_responses_api.md)
|
||||
- [Claude Code - Customer Tracking](./claude_code_customer_tracking.md)
|
||||
- [Claude Code - Plugin Marketplace](./claude_code_plugin_marketplace.md)
|
||||
- [Claude Code - WebSearch](./claude_code_websearch.md)
|
||||
- [Proxy - Load Balancing](../proxy/load_balancing.md)
|
||||
@@ -61,6 +61,8 @@
|
||||
"mermaid": ">=11.10.0",
|
||||
"gray-matter": "4.0.3",
|
||||
"glob": ">=11.1.0",
|
||||
"tar": ">=7.5.7",
|
||||
"@isaacs/brace-expansion": ">=5.0.1",
|
||||
"node-forge": ">=1.3.2",
|
||||
"mdast-util-to-hast": ">=13.2.1",
|
||||
"lodash-es": ">=4.17.23"
|
||||
|
||||
@@ -125,6 +125,7 @@ const sidebars = {
|
||||
"tutorials/claude_responses_api",
|
||||
"tutorials/claude_code_max_subscription",
|
||||
"tutorials/claude_code_customer_tracking",
|
||||
"tutorials/claude_code_prompt_cache_routing",
|
||||
"tutorials/claude_code_websearch",
|
||||
"tutorials/claude_mcp",
|
||||
"tutorials/claude_non_anthropic_models",
|
||||
@@ -223,6 +224,7 @@ const sidebars = {
|
||||
label: "Configuration",
|
||||
items: [
|
||||
"set_keys",
|
||||
"proxy_auth",
|
||||
"caching/all_caches",
|
||||
],
|
||||
},
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
"tsx": "^4.7.1"
|
||||
},
|
||||
"overrides": {
|
||||
"glob": ">=11.1.0"
|
||||
"glob": ">=11.1.0",
|
||||
"tar": ">=7.5.7",
|
||||
"@isaacs/brace-expansion": ">=5.0.1"
|
||||
}
|
||||
}
|
||||
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
-- CreateIndex
|
||||
CREATE INDEX "LiteLLM_VerificationToken_user_id_team_id_idx" ON "LiteLLM_VerificationToken"("user_id", "team_id");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "LiteLLM_VerificationToken_team_id_idx" ON "LiteLLM_VerificationToken"("team_id");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "LiteLLM_VerificationToken_budget_reset_at_expires_idx" ON "LiteLLM_VerificationToken"("budget_reset_at", "expires");
|
||||
@@ -310,6 +310,16 @@ model LiteLLM_VerificationToken {
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||
object_permission LiteLLM_ObjectPermissionTable? @relation(fields: [object_permission_id], references: [object_permission_id])
|
||||
|
||||
// SELECT COUNT(*) FROM (SELECT "public"."LiteLLM_VerificationToken"."token" FROM "public"."LiteLLM_VerificationToken" WHERE ("public"."LiteLLM_VerificationToken"."user_id" = $1 AND ("public"."LiteLLM_VerificationToken"."team_id" IS NULL OR "public"."LiteLLM_VerificationToken"."team_id" <> $2)) OFFSET $3 ) AS "sub"
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."user_id" = $1 OFFSET $2
|
||||
@@index([user_id, team_id])
|
||||
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."team_id" = $1 OFFSET $2
|
||||
@@index([team_id])
|
||||
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE (("public"."LiteLLM_VerificationToken"."expires" IS NULL OR "public"."LiteLLM_VerificationToken"."expires" > $1) AND "public"."LiteLLM_VerificationToken"."budget_reset_at" < $2) OFFSET $3
|
||||
@@index([budget_reset_at, expires])
|
||||
}
|
||||
|
||||
// Audit table for deleted keys - preserves spend and key information for historical tracking
|
||||
|
||||
@@ -45,7 +45,14 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
|
||||
from litellm.types.integrations.datadog import *
|
||||
from litellm.types.integrations.datadog import (
|
||||
DD_ERRORS,
|
||||
DD_MAX_BATCH_SIZE,
|
||||
DataDogStatus,
|
||||
DatadogInitParams,
|
||||
DatadogPayload,
|
||||
DatadogProxyFailureHookJsonMessage,
|
||||
)
|
||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
@@ -85,12 +92,14 @@ class DataDogLogger(
|
||||
"""
|
||||
try:
|
||||
verbose_logger.debug("Datadog: in init datadog logger")
|
||||
|
||||
|
||||
self.is_mock_mode = should_use_datadog_mock()
|
||||
|
||||
|
||||
if self.is_mock_mode:
|
||||
create_mock_datadog_client()
|
||||
verbose_logger.debug("[DATADOG MOCK] Datadog logger initialized in mock mode")
|
||||
verbose_logger.debug(
|
||||
"[DATADOG MOCK] Datadog logger initialized in mock mode"
|
||||
)
|
||||
|
||||
#########################################################
|
||||
# Handle datadog_params set as litellm.datadog_params
|
||||
@@ -209,6 +218,96 @@ class DataDogLogger(
|
||||
)
|
||||
pass
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: Any,
|
||||
traceback_str: Optional[str] = None,
|
||||
) -> Optional[Any]:
|
||||
"""
|
||||
Log proxy-level failures (e.g. 401 auth, DB connection errors) to Datadog.
|
||||
|
||||
Ensures failures that occur before or outside the LLM completion flow
|
||||
(e.g. ConnectError during auth when DB is down) are visible in Datadog
|
||||
alongside Prometheus.
|
||||
"""
|
||||
try:
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
StandardLoggingPayloadSetup,
|
||||
)
|
||||
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
||||
|
||||
error_information = StandardLoggingPayloadSetup.get_error_information(
|
||||
original_exception=original_exception,
|
||||
traceback_str=traceback_str,
|
||||
)
|
||||
_code = error_information.get("error_code") or ""
|
||||
status_code: Optional[int] = None
|
||||
if _code and str(_code).strip().isdigit():
|
||||
status_code = int(_code)
|
||||
|
||||
# Use project-standard sanitized user context when running in proxy
|
||||
user_context: Dict[str, Any] = {}
|
||||
try:
|
||||
from litellm.proxy.litellm_pre_call_utils import (
|
||||
LiteLLMProxyRequestSetup,
|
||||
)
|
||||
|
||||
_meta = (
|
||||
LiteLLMProxyRequestSetup.get_sanitized_user_information_from_key(
|
||||
user_api_key_dict=user_api_key_dict
|
||||
)
|
||||
)
|
||||
user_context = dict(_meta) if isinstance(_meta, dict) else _meta
|
||||
except Exception:
|
||||
# Fallback if proxy not available (e.g. SDK-only): minimal safe fields
|
||||
if hasattr(user_api_key_dict, "request_route"):
|
||||
user_context["request_route"] = getattr(
|
||||
user_api_key_dict, "request_route", None
|
||||
)
|
||||
if hasattr(user_api_key_dict, "team_id"):
|
||||
user_context["team_id"] = getattr(
|
||||
user_api_key_dict, "team_id", None
|
||||
)
|
||||
if hasattr(user_api_key_dict, "user_id"):
|
||||
user_context["user_id"] = getattr(
|
||||
user_api_key_dict, "user_id", None
|
||||
)
|
||||
if hasattr(user_api_key_dict, "end_user_id"):
|
||||
user_context["end_user_id"] = getattr(
|
||||
user_api_key_dict, "end_user_id", None
|
||||
)
|
||||
|
||||
message_payload: DatadogProxyFailureHookJsonMessage = {
|
||||
"exception": error_information.get("error_message")
|
||||
or str(original_exception),
|
||||
"error_class": error_information.get("error_class")
|
||||
or original_exception.__class__.__name__,
|
||||
"status_code": status_code,
|
||||
"traceback": error_information.get("traceback") or "",
|
||||
"user_api_key_dict": user_context,
|
||||
}
|
||||
|
||||
dd_payload = DatadogPayload(
|
||||
ddsource=get_datadog_source(),
|
||||
ddtags=get_datadog_tags(),
|
||||
hostname=get_datadog_hostname(),
|
||||
message=safe_dumps(message_payload),
|
||||
service=get_datadog_service(),
|
||||
status=DataDogStatus.ERROR,
|
||||
)
|
||||
self._add_trace_context_to_payload(dd_payload=dd_payload)
|
||||
self.log_queue.append(dd_payload)
|
||||
|
||||
if len(self.log_queue) >= self.batch_size:
|
||||
await self.async_send_batch()
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
f"Datadog: async_post_call_failure_hook - {str(e)}\n{traceback.format_exc()}"
|
||||
)
|
||||
return None
|
||||
|
||||
async def async_send_batch(self):
|
||||
"""
|
||||
Sends the in memory logs queue to datadog api
|
||||
@@ -230,9 +329,11 @@ class DataDogLogger(
|
||||
len(self.log_queue),
|
||||
self.intake_url,
|
||||
)
|
||||
|
||||
|
||||
if self.is_mock_mode:
|
||||
verbose_logger.debug("[DATADOG MOCK] Mock mode enabled - API calls will be intercepted")
|
||||
verbose_logger.debug(
|
||||
"[DATADOG MOCK] Mock mode enabled - API calls will be intercepted"
|
||||
)
|
||||
|
||||
response = await self.async_send_compressed_data(self.log_queue)
|
||||
if response.status_code == 413:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import base64
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, cast
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
|
||||
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionAssistantContentValue,
|
||||
@@ -326,10 +326,22 @@ class ChunkProcessor:
|
||||
thinking_blocks: List[
|
||||
Union["ChatCompletionThinkingBlock", "ChatCompletionRedactedThinkingBlock"]
|
||||
] = []
|
||||
combined_thinking_text: Optional[str] = None
|
||||
data: Optional[str] = None
|
||||
signature: Optional[str] = None
|
||||
type: Literal["thinking", "redacted_thinking"] = "thinking"
|
||||
current_thinking_text_parts: List[str] = []
|
||||
current_signature: Optional[str] = None
|
||||
|
||||
def _flush_thinking_block() -> None:
|
||||
nonlocal current_thinking_text_parts, current_signature
|
||||
if len(current_thinking_text_parts) > 0 and current_signature:
|
||||
thinking_blocks.append(
|
||||
ChatCompletionThinkingBlock(
|
||||
type="thinking",
|
||||
thinking="".join(current_thinking_text_parts),
|
||||
signature=current_signature,
|
||||
)
|
||||
)
|
||||
current_thinking_text_parts = []
|
||||
current_signature = None
|
||||
|
||||
for chunk in chunks:
|
||||
choices = chunk["choices"]
|
||||
for choice in choices:
|
||||
@@ -339,33 +351,25 @@ class ChunkProcessor:
|
||||
for thinking_block in thinking:
|
||||
thinking_type = thinking_block.get("type", None)
|
||||
if thinking_type and thinking_type == "redacted_thinking":
|
||||
type = "redacted_thinking"
|
||||
data = thinking_block.get("data", None)
|
||||
_flush_thinking_block()
|
||||
redacted_data = thinking_block.get("data", None)
|
||||
if redacted_data:
|
||||
thinking_blocks.append(
|
||||
ChatCompletionRedactedThinkingBlock(
|
||||
type="redacted_thinking",
|
||||
data=redacted_data,
|
||||
)
|
||||
)
|
||||
else:
|
||||
type = "thinking"
|
||||
thinking_text = thinking_block.get("thinking", None)
|
||||
if thinking_text:
|
||||
if combined_thinking_text is None:
|
||||
combined_thinking_text = ""
|
||||
|
||||
combined_thinking_text += thinking_text
|
||||
current_thinking_text_parts.append(thinking_text)
|
||||
signature = thinking_block.get("signature", None)
|
||||
if signature:
|
||||
current_signature = signature
|
||||
_flush_thinking_block()
|
||||
|
||||
if combined_thinking_text and type == "thinking" and signature:
|
||||
thinking_blocks.append(
|
||||
ChatCompletionThinkingBlock(
|
||||
type=type,
|
||||
thinking=combined_thinking_text,
|
||||
signature=signature,
|
||||
)
|
||||
)
|
||||
elif data and type == "redacted_thinking":
|
||||
thinking_blocks.append(
|
||||
ChatCompletionRedactedThinkingBlock(
|
||||
type=type,
|
||||
data=data,
|
||||
)
|
||||
)
|
||||
_flush_thinking_block()
|
||||
|
||||
if len(thinking_blocks) > 0:
|
||||
return thinking_blocks
|
||||
|
||||
@@ -46,8 +46,12 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
|
||||
"thinking",
|
||||
"context_management",
|
||||
"output_format",
|
||||
<<<<<<< litellm_v1_messages_claude_4_6
|
||||
"inference_geo",
|
||||
"speed",
|
||||
=======
|
||||
"output_config",
|
||||
>>>>>>> main
|
||||
# TODO: Add Anthropic `metadata` support
|
||||
# "metadata",
|
||||
]
|
||||
|
||||
@@ -218,6 +218,7 @@ class OCIChatConfig(BaseConfig):
|
||||
"parallel_tool_calls": False,
|
||||
"audio": False,
|
||||
"web_search_options": False,
|
||||
"response_format": "responseFormat",
|
||||
}
|
||||
|
||||
# Cohere and Gemini use the same parameter mapping as GENERIC
|
||||
@@ -269,6 +270,9 @@ class OCIChatConfig(BaseConfig):
|
||||
|
||||
adapted_params[alias] = value
|
||||
|
||||
if alias == "responseFormat":
|
||||
adapted_params["response_format"] = value
|
||||
|
||||
return adapted_params
|
||||
|
||||
def _sign_with_oci_signer(
|
||||
@@ -673,6 +677,36 @@ class OCIChatConfig(BaseConfig):
|
||||
selected_params["tools"] = adapt_tool_definition_to_oci_standard( # type: ignore[assignment]
|
||||
selected_params["tools"], vendor # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
# Transform response_format type to OCI uppercase format
|
||||
if "responseFormat" in selected_params:
|
||||
rf = selected_params["responseFormat"]
|
||||
if isinstance(rf, dict) and "type" in rf:
|
||||
rf_payload = dict(rf)
|
||||
selected_params["responseFormat"] = rf_payload
|
||||
|
||||
response_type = rf_payload["type"]
|
||||
schema_payload: Optional[Any] = None
|
||||
|
||||
if "json_schema" in rf_payload:
|
||||
raw_schema_payload = rf_payload.pop("json_schema")
|
||||
if isinstance(raw_schema_payload, dict):
|
||||
schema_payload = dict(raw_schema_payload)
|
||||
else:
|
||||
schema_payload = raw_schema_payload
|
||||
|
||||
if schema_payload is not None:
|
||||
rf_payload["jsonSchema"] = schema_payload
|
||||
|
||||
if vendor == OCIVendors.COHERE:
|
||||
# Cohere expects lower-case type values
|
||||
rf_payload["type"] = response_type
|
||||
else:
|
||||
format_type = response_type.upper()
|
||||
if format_type == "JSON":
|
||||
format_type = "JSON_OBJECT"
|
||||
rf_payload["type"] = format_type
|
||||
|
||||
return selected_params
|
||||
|
||||
def adapt_messages_to_cohere_standard(self, messages: List[AllMessageValues]) -> List[CohereMessage]:
|
||||
@@ -806,11 +840,12 @@ class OCIChatConfig(BaseConfig):
|
||||
|
||||
|
||||
# Create Cohere-specific chat request
|
||||
optional_cohere_params = self._get_optional_params(OCIVendors.COHERE, optional_params)
|
||||
chat_request = CohereChatRequest(
|
||||
apiFormat="COHERE",
|
||||
message=self._extract_text_content(user_messages[-1]["content"]),
|
||||
chatHistory=self.adapt_messages_to_cohere_standard(messages),
|
||||
**self._get_optional_params(OCIVendors.COHERE, optional_params)
|
||||
**optional_cohere_params
|
||||
)
|
||||
|
||||
data = OCICompletionPayload(
|
||||
|
||||
@@ -269,26 +269,27 @@ class OpenAIVideoConfig(BaseVideoConfig):
|
||||
) -> Tuple[str, Dict]:
|
||||
"""
|
||||
Transform the video list request for OpenAI API.
|
||||
|
||||
|
||||
OpenAI API expects the following request:
|
||||
- GET /v1/videos
|
||||
"""
|
||||
# Use the api_base directly for video list
|
||||
url = api_base
|
||||
|
||||
|
||||
# Prepare query parameters
|
||||
params = {}
|
||||
if after is not None:
|
||||
params["after"] = after
|
||||
# Decode the wrapped video ID back to the original provider ID
|
||||
params["after"] = extract_original_video_id(after)
|
||||
if limit is not None:
|
||||
params["limit"] = str(limit)
|
||||
if order is not None:
|
||||
params["order"] = order
|
||||
|
||||
|
||||
# Add any extra query parameters
|
||||
if extra_query:
|
||||
params.update(extra_query)
|
||||
|
||||
|
||||
return url, params
|
||||
|
||||
def transform_video_list_response(
|
||||
@@ -296,18 +297,40 @@ class OpenAIVideoConfig(BaseVideoConfig):
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> Dict[str,str]:
|
||||
) -> Dict[str, str]:
|
||||
response_data = raw_response.json()
|
||||
|
||||
|
||||
if custom_llm_provider and "data" in response_data:
|
||||
for video_obj in response_data.get("data", []):
|
||||
if isinstance(video_obj, dict) and "id" in video_obj:
|
||||
video_obj["id"] = encode_video_id_with_provider(
|
||||
video_obj["id"],
|
||||
custom_llm_provider,
|
||||
video_obj.get("model")
|
||||
video_obj["id"],
|
||||
custom_llm_provider,
|
||||
video_obj.get("model"),
|
||||
)
|
||||
|
||||
|
||||
# Encode pagination cursor IDs so they remain consistent
|
||||
# with the wrapped data[].id format
|
||||
data_list = response_data.get("data", [])
|
||||
if response_data.get("first_id"):
|
||||
first_model = None
|
||||
if data_list and isinstance(data_list[0], dict):
|
||||
first_model = data_list[0].get("model")
|
||||
response_data["first_id"] = encode_video_id_with_provider(
|
||||
response_data["first_id"],
|
||||
custom_llm_provider,
|
||||
first_model,
|
||||
)
|
||||
if response_data.get("last_id"):
|
||||
last_model = None
|
||||
if data_list and isinstance(data_list[-1], dict):
|
||||
last_model = data_list[-1].get("model")
|
||||
response_data["last_id"] = encode_video_id_with_provider(
|
||||
response_data["last_id"],
|
||||
custom_llm_provider,
|
||||
last_model,
|
||||
)
|
||||
|
||||
return response_data
|
||||
|
||||
def transform_video_delete_request(
|
||||
|
||||
@@ -56,34 +56,36 @@ class VertexAIAnthropicConfig(AnthropicConfig):
|
||||
) -> None:
|
||||
"""
|
||||
Add context_management beta headers to the beta_set.
|
||||
|
||||
|
||||
- If any edit has type "compact_20260112", add compact-2026-01-12 header
|
||||
- For all other edits, add context-management-2025-06-27 header
|
||||
|
||||
|
||||
Args:
|
||||
beta_set: Set of beta headers to modify in-place
|
||||
context_management: The context_management dict from optional_params
|
||||
"""
|
||||
from litellm.types.llms.anthropic import ANTHROPIC_BETA_HEADER_VALUES
|
||||
|
||||
|
||||
edits = context_management.get("edits", [])
|
||||
has_compact = False
|
||||
has_other = False
|
||||
|
||||
|
||||
for edit in edits:
|
||||
edit_type = edit.get("type", "")
|
||||
if edit_type == "compact_20260112":
|
||||
has_compact = True
|
||||
else:
|
||||
has_other = True
|
||||
|
||||
|
||||
# Add compact header if any compact edits exist
|
||||
if has_compact:
|
||||
beta_set.add(ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value)
|
||||
|
||||
|
||||
# Add context management header if any other edits exist
|
||||
if has_other:
|
||||
beta_set.add(ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value)
|
||||
beta_set.add(
|
||||
ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
|
||||
)
|
||||
|
||||
def transform_request(
|
||||
self,
|
||||
@@ -102,10 +104,10 @@ class VertexAIAnthropicConfig(AnthropicConfig):
|
||||
)
|
||||
|
||||
data.pop("model", None) # vertex anthropic doesn't accept 'model' parameter
|
||||
|
||||
|
||||
# VertexAI doesn't support output_format parameter, remove it if present
|
||||
data.pop("output_format", None)
|
||||
|
||||
|
||||
tools = optional_params.get("tools")
|
||||
tool_search_used = self.is_tool_search_used(tools)
|
||||
auto_betas = self.get_anthropic_beta_list(
|
||||
@@ -119,16 +121,30 @@ class VertexAIAnthropicConfig(AnthropicConfig):
|
||||
|
||||
beta_set = set(auto_betas)
|
||||
if tool_search_used:
|
||||
beta_set.add("tool-search-tool-2025-10-19") # Vertex requires this header for tool search
|
||||
|
||||
beta_set.add(
|
||||
"tool-search-tool-2025-10-19"
|
||||
) # Vertex requires this header for tool search
|
||||
|
||||
# Add context_management beta headers (compact and/or context-management)
|
||||
context_management = optional_params.get("context_management")
|
||||
if context_management:
|
||||
self._add_context_management_beta_headers(beta_set, context_management)
|
||||
|
||||
extra_headers = optional_params.get("extra_headers") or {}
|
||||
anthropic_beta_value = extra_headers.get("anthropic-beta", "")
|
||||
if isinstance(anthropic_beta_value, str) and anthropic_beta_value:
|
||||
for beta in anthropic_beta_value.split(","):
|
||||
beta = beta.strip()
|
||||
if beta:
|
||||
beta_set.add(beta)
|
||||
elif isinstance(anthropic_beta_value, list):
|
||||
beta_set.update(anthropic_beta_value)
|
||||
|
||||
data.pop("extra_headers", None)
|
||||
|
||||
if beta_set:
|
||||
data["anthropic_beta"] = list(beta_set)
|
||||
|
||||
|
||||
return data
|
||||
|
||||
def map_openai_params(
|
||||
@@ -148,7 +164,7 @@ class VertexAIAnthropicConfig(AnthropicConfig):
|
||||
original_model = model
|
||||
if "response_format" in non_default_params:
|
||||
model = "claude-3-sonnet-20240229" # Use a model that will use tool-based approach
|
||||
|
||||
|
||||
# Call parent method with potentially modified model name
|
||||
optional_params = super().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
@@ -156,10 +172,10 @@ class VertexAIAnthropicConfig(AnthropicConfig):
|
||||
model=model,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
|
||||
|
||||
# Restore original model name for any other processing
|
||||
model = original_model
|
||||
|
||||
|
||||
return optional_params
|
||||
|
||||
def transform_response(
|
||||
|
||||
@@ -28540,6 +28540,193 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-3-5-sonnet": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_tokens": 8192,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-3-5-sonnet-20241022": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_tokens": 8192,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-3-7-sonnet": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-haiku-4.5": {
|
||||
"cache_creation_input_token_cost": 1.25e-06,
|
||||
"cache_read_input_token_cost": 1e-07,
|
||||
"input_cost_per_token": 1e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 5e-06,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-opus-4": {
|
||||
"cache_creation_input_token_cost": 1.875e-05,
|
||||
"cache_read_input_token_cost": 1.5e-06,
|
||||
"input_cost_per_token": 1.5e-05,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 32000,
|
||||
"max_tokens": 32000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 7.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-opus-4.1": {
|
||||
"cache_creation_input_token_cost": 1.875e-05,
|
||||
"cache_read_input_token_cost": 1.5e-06,
|
||||
"input_cost_per_token": 1.5e-05,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 32000,
|
||||
"max_tokens": 32000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 7.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-opus-4.5": {
|
||||
"cache_creation_input_token_cost": 6.25e-06,
|
||||
"cache_read_input_token_cost": 5e-07,
|
||||
"input_cost_per_token": 5e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 2.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-opus-4.6": {
|
||||
"cache_creation_input_token_cost": 6.25e-06,
|
||||
"cache_read_input_token_cost": 5e-07,
|
||||
"input_cost_per_token": 5e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 2.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-sonnet-4": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-sonnet-4.5": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/cohere/command-a": {
|
||||
"input_cost_per_token": 2.5e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
@@ -28549,7 +28736,8 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1e-05,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"vercel_ai_gateway/cohere/command-r": {
|
||||
"input_cost_per_token": 1.5e-07,
|
||||
|
||||
@@ -92,14 +92,34 @@ class ZscalerAIGuard(CustomGuardrail):
|
||||
Raises:
|
||||
Exception: If content is blocked by Zscaler AI Guard
|
||||
"""
|
||||
|
||||
texts = inputs.get("texts", [])
|
||||
try:
|
||||
verbose_proxy_logger.debug(f"ZscalerAIGuard: Checking {len(texts)} text(s)")
|
||||
metadata = request_data.get("metadata", {})
|
||||
|
||||
custom_policy_id = request_data.get("metadata", {}).get(
|
||||
"zguard_policy_id", self.policy_id
|
||||
user_api_key_metadata = metadata.get("user_api_key_metadata", {}) or {}
|
||||
team_metadata = metadata.get("team_metadata", {}) or {}
|
||||
|
||||
# Precedence for policy_id:
|
||||
# 1. metadata.zguard_policy_id # request level
|
||||
# 2. user_api_key_metadata.zguard_policy_id # Key level
|
||||
# 3. team_metadata.zguard_policy_id # Team level
|
||||
# 4. self.policy_id (from environment) # Global
|
||||
policy_id = (
|
||||
metadata.get("zguard_policy_id")
|
||||
if "zguard_policy_id" in metadata
|
||||
else (
|
||||
user_api_key_metadata.get("zguard_policy_id")
|
||||
if "zguard_policy_id" in user_api_key_metadata
|
||||
else (
|
||||
team_metadata.get("zguard_policy_id")
|
||||
if "zguard_policy_id" in team_metadata
|
||||
else self.policy_id
|
||||
)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(f"custom_policy_id: {custom_policy_id}")
|
||||
verbose_proxy_logger.info(f"policy_id applied: {policy_id}")
|
||||
|
||||
kwargs = {}
|
||||
if self.send_user_api_key_alias:
|
||||
@@ -116,27 +136,29 @@ class ZscalerAIGuard(CustomGuardrail):
|
||||
)
|
||||
verbose_proxy_logger.debug(f"inside apply_guardrail kwargs: {kwargs}")
|
||||
|
||||
# Check each text (Zscaler processes one at a time)
|
||||
for text in texts:
|
||||
zscaler_ai_guard_result = None
|
||||
direction = "OUT" if input_type == "response" else "IN"
|
||||
verbose_proxy_logger.debug(f"direction: {direction}")
|
||||
# Concatenate all texts and send to Zscaler AI Guard
|
||||
if texts:
|
||||
concatenated_text = " ".join(texts)
|
||||
zscaler_ai_guard_result = await self.make_zscaler_ai_guard_api_call(
|
||||
zscaler_ai_guard_url=self.zscaler_ai_guard_url,
|
||||
api_key=self.api_key,
|
||||
policy_id=self.policy_id,
|
||||
direction="IN",
|
||||
content=text,
|
||||
policy_id=policy_id,
|
||||
direction=direction,
|
||||
content=concatenated_text,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if (
|
||||
zscaler_ai_guard_result
|
||||
and zscaler_ai_guard_result.get("action") == "BLOCK"
|
||||
):
|
||||
blocking_info = zscaler_ai_guard_result.get(
|
||||
"zscaler_ai_guard_response"
|
||||
)
|
||||
error_message = f"Content blocked by Zscaler AI Guard: {self.extract_blocking_info(blocking_info)}"
|
||||
raise Exception(error_message)
|
||||
|
||||
if (
|
||||
zscaler_ai_guard_result
|
||||
and zscaler_ai_guard_result.get("action") == "BLOCK"
|
||||
):
|
||||
blocking_info = zscaler_ai_guard_result.get(
|
||||
"zscaler_ai_guard_response"
|
||||
)
|
||||
error_message = f"Content blocked by Zscaler AI Guard: {self.extract_blocking_info(blocking_info)}"
|
||||
raise Exception(error_message)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"ZscalerAIGuard: Failed to apply guardrail: %s", str(e)
|
||||
|
||||
@@ -216,7 +216,14 @@ def _update_metadata_field(updated_kv: dict, field_name: str) -> None:
|
||||
field_name: Name of the metadata field being updated
|
||||
"""
|
||||
if field_name in LiteLLM_ManagementEndpoint_MetadataFields_Premium:
|
||||
_premium_user_check()
|
||||
value = updated_kv.get(field_name)
|
||||
# Skip the premium check for empty collections ([] or {}).
|
||||
# The UI sends these as defaults even when the user hasn't configured
|
||||
# any enterprise features (see issue #20304). However, we still
|
||||
# proceed with the update so that users can intentionally clear a
|
||||
# previously-set field by sending an empty list/dict.
|
||||
if value is not None and value != [] and value != {}:
|
||||
_premium_user_check()
|
||||
|
||||
if field_name in updated_kv and updated_kv[field_name] is not None:
|
||||
# remove field from updated_kv
|
||||
|
||||
@@ -308,6 +308,16 @@ model LiteLLM_VerificationToken {
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||
object_permission LiteLLM_ObjectPermissionTable? @relation(fields: [object_permission_id], references: [object_permission_id])
|
||||
|
||||
// SELECT COUNT(*) FROM (SELECT "public"."LiteLLM_VerificationToken"."token" FROM "public"."LiteLLM_VerificationToken" WHERE ("public"."LiteLLM_VerificationToken"."user_id" = $1 AND ("public"."LiteLLM_VerificationToken"."team_id" IS NULL OR "public"."LiteLLM_VerificationToken"."team_id" <> $2)) OFFSET $3 ) AS "sub"
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."user_id" = $1 OFFSET $2
|
||||
@@index([user_id, team_id])
|
||||
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."team_id" = $1 OFFSET $2
|
||||
@@index([team_id])
|
||||
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE (("public"."LiteLLM_VerificationToken"."expires" IS NULL OR "public"."LiteLLM_VerificationToken"."expires" > $1) AND "public"."LiteLLM_VerificationToken"."budget_reset_at" < $2) OFFSET $3
|
||||
@@index([budget_reset_at, expires])
|
||||
}
|
||||
|
||||
// Audit table for deleted keys - preserves spend and key information for historical tracking
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import copy
|
||||
import hashlib
|
||||
import json
|
||||
import secrets
|
||||
@@ -642,6 +641,34 @@ def _sanitize_request_body_for_spend_logs_payload(
|
||||
return {k: _sanitize_value(v) for k, v in request_body.items()}
|
||||
|
||||
|
||||
def _convert_to_json_serializable_dict(obj: Any) -> Any:
|
||||
"""
|
||||
Convert object to JSON-serializable dict, handling Pydantic models safely.
|
||||
|
||||
This avoids pickle-based deepcopy which fails on Pydantic v2 models
|
||||
containing _thread.RLock objects.
|
||||
|
||||
Args:
|
||||
obj: Object to convert (dict, list, Pydantic model, or primitive)
|
||||
|
||||
Returns:
|
||||
JSON-serializable version of the object
|
||||
"""
|
||||
if isinstance(obj, BaseModel):
|
||||
# Use Pydantic's model_dump() instead of pickle
|
||||
return obj.model_dump()
|
||||
elif isinstance(obj, dict):
|
||||
return {k: _convert_to_json_serializable_dict(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [_convert_to_json_serializable_dict(item) for item in obj]
|
||||
elif hasattr(obj, "__dict__"):
|
||||
# Handle objects with __dict__ attribute
|
||||
return _convert_to_json_serializable_dict(obj.__dict__)
|
||||
else:
|
||||
# Primitives (str, int, float, bool, None) pass through
|
||||
return obj
|
||||
|
||||
|
||||
def _get_proxy_server_request_for_spend_logs_payload(
|
||||
metadata: dict,
|
||||
litellm_params: dict,
|
||||
@@ -649,7 +676,7 @@ def _get_proxy_server_request_for_spend_logs_payload(
|
||||
) -> str:
|
||||
"""
|
||||
Only store if _should_store_prompts_and_responses_in_spend_logs() is True
|
||||
|
||||
|
||||
If turn_off_message_logging is enabled, redact messages in the request body.
|
||||
"""
|
||||
if _should_store_prompts_and_responses_in_spend_logs():
|
||||
@@ -674,9 +701,9 @@ def _get_proxy_server_request_for_spend_logs_payload(
|
||||
),
|
||||
}
|
||||
|
||||
# If redaction is enabled, deep copy request body before redacting
|
||||
# If redaction is enabled, convert to serializable dict before redacting
|
||||
if should_redact_message_logging(model_call_details=model_call_details):
|
||||
_request_body = copy.deepcopy(_request_body)
|
||||
_request_body = _convert_to_json_serializable_dict(_request_body)
|
||||
perform_redaction(model_call_details=_request_body, result=None)
|
||||
|
||||
_request_body = _sanitize_request_body_for_spend_logs_payload(_request_body)
|
||||
@@ -736,9 +763,9 @@ def _get_response_for_spend_logs_payload(
|
||||
),
|
||||
}
|
||||
|
||||
# If redaction is enabled, deep copy response before redacting
|
||||
# If redaction is enabled, convert to serializable dict before redacting
|
||||
if should_redact_message_logging(model_call_details=model_call_details):
|
||||
response_obj = copy.deepcopy(response_obj)
|
||||
response_obj = _convert_to_json_serializable_dict(response_obj)
|
||||
response_obj = perform_redaction(model_call_details={}, result=response_obj)
|
||||
|
||||
sanitized_wrapper = _sanitize_request_body_for_spend_logs_payload(
|
||||
|
||||
@@ -88,6 +88,8 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||
self._pending_tool_events: List[BaseLiteLLMOpenAIResponseObject] = []
|
||||
self._tool_output_index_by_call_id: dict[str, int] = {}
|
||||
self._tool_args_by_call_id: dict[str, str] = {}
|
||||
self._tool_call_id_by_index: dict[int, str] = {}
|
||||
self._ambiguous_tool_call_indexes: set[int] = set()
|
||||
self._next_tool_output_index: int = 1 # output_index=0 reserved for the message item
|
||||
self._final_tool_events_queued: bool = False
|
||||
self._sequence_number: int = 0
|
||||
@@ -111,6 +113,19 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||
self._tool_output_index_by_call_id[call_id] = idx
|
||||
return idx
|
||||
|
||||
def _normalize_tool_call_index(self, tool_call: object) -> Optional[int]:
|
||||
idx_raw = (
|
||||
tool_call.get("index")
|
||||
if isinstance(tool_call, dict)
|
||||
else getattr(tool_call, "index", None)
|
||||
)
|
||||
if idx_raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(idx_raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _is_reasoning_end(self, chunk):
|
||||
delta = chunk.choices[0].delta
|
||||
@@ -143,10 +158,28 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||
return
|
||||
|
||||
for tc in tool_calls:
|
||||
tc_index = self._normalize_tool_call_index(tc)
|
||||
call_id_raw = tc.get("id") if isinstance(tc, dict) else getattr(tc, "id", None)
|
||||
if not call_id_raw:
|
||||
call_id = ""
|
||||
|
||||
if call_id_raw:
|
||||
call_id = str(call_id_raw)
|
||||
if tc_index is not None:
|
||||
existing_call_id = self._tool_call_id_by_index.get(tc_index)
|
||||
if existing_call_id is not None and existing_call_id != call_id:
|
||||
# Reusing the same index for multiple call_ids is ambiguous for id-less deltas.
|
||||
# Guard against silent misrouting by disabling index fallback for this index.
|
||||
self._ambiguous_tool_call_indexes.add(tc_index)
|
||||
self._tool_call_id_by_index[tc_index] = call_id
|
||||
elif tc_index is not None:
|
||||
if tc_index in self._ambiguous_tool_call_indexes:
|
||||
continue
|
||||
mapped_call_id = self._tool_call_id_by_index.get(tc_index)
|
||||
if mapped_call_id:
|
||||
call_id = mapped_call_id
|
||||
|
||||
if not call_id:
|
||||
continue
|
||||
call_id = str(call_id_raw)
|
||||
|
||||
fn = tc.get("function") if isinstance(tc, dict) else getattr(tc, "function", None)
|
||||
fn_name = ""
|
||||
|
||||
@@ -61,9 +61,10 @@ class PromptCachingDeploymentCheck(CustomLogger):
|
||||
if (
|
||||
call_type != CallTypes.completion.value
|
||||
and call_type != CallTypes.acompletion.value
|
||||
and call_type != CallTypes.anthropic_messages.value
|
||||
): # only use prompt caching for completion calls
|
||||
verbose_logger.debug(
|
||||
"litellm.router_utils.pre_call_checks.prompt_caching_deployment_check: skipping adding model id to prompt caching cache, CALL TYPE IS NOT COMPLETION"
|
||||
"litellm.router_utils.pre_call_checks.prompt_caching_deployment_check: skipping adding model id to prompt caching cache, CALL TYPE IS NOT COMPLETION or ANTHROPIC MESSAGE"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@@ -362,6 +362,7 @@ class AnthropicMessagesRequestOptionalParams(TypedDict, total=False):
|
||||
container: Optional[Dict[str, Any]] # Container config with skills for code execution
|
||||
output_format: Optional[AnthropicOutputSchema] # Structured outputs support
|
||||
speed: Optional[str] # Fast mode support for Opus models
|
||||
output_config: Optional[AnthropicOutputConfig] # Configuration for Claude's output behavior
|
||||
|
||||
|
||||
class AnthropicMessagesRequest(AnthropicMessagesRequestOptionalParams, total=False):
|
||||
|
||||
+32
-26
@@ -102,6 +102,7 @@ class OCIChatRequestPayload(BaseModel):
|
||||
seed: Optional[int] = None
|
||||
frequencyPenalty: Optional[float] = None
|
||||
presencePenalty: Optional[float] = None
|
||||
responseFormat: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class OCIServingMode(BaseModel):
|
||||
@@ -125,14 +126,14 @@ class OCICompletionPayload(BaseModel):
|
||||
class OCICompletionTokenDetails(BaseModel):
|
||||
"""Completion token details in the OCI response."""
|
||||
|
||||
acceptedPredictionTokens: int
|
||||
reasoningTokens: int
|
||||
acceptedPredictionTokens: Optional[int] = None
|
||||
reasoningTokens: Optional[int] = None
|
||||
|
||||
|
||||
class OCIPromptTokensDetails(BaseModel):
|
||||
"""Prompt token details in the OCI response."""
|
||||
|
||||
cachedTokens: int
|
||||
cachedTokens: Optional[int] = None
|
||||
|
||||
|
||||
class OCIResponseUsage(BaseModel):
|
||||
@@ -205,40 +206,40 @@ class CohereStreamChunk(BaseModel):
|
||||
|
||||
class CohereMessage(BaseModel):
|
||||
"""Base model for Cohere messages."""
|
||||
|
||||
|
||||
role: str
|
||||
message: str
|
||||
message: Optional[str] = None
|
||||
toolCalls: Optional[List[CohereToolCall]] = None
|
||||
|
||||
|
||||
class CohereUserMessage(CohereMessage):
|
||||
"""User message in Cohere chat."""
|
||||
|
||||
|
||||
role: Literal["USER"] = "USER"
|
||||
|
||||
|
||||
class CohereChatBotMessage(CohereMessage):
|
||||
"""Chatbot message in Cohere chat."""
|
||||
|
||||
|
||||
role: Literal["CHATBOT"] = "CHATBOT"
|
||||
|
||||
|
||||
class CohereSystemMessage(CohereMessage):
|
||||
"""System message in Cohere chat."""
|
||||
|
||||
|
||||
role: Literal["SYSTEM"] = "SYSTEM"
|
||||
|
||||
|
||||
class CohereToolMessage(CohereMessage):
|
||||
"""Tool message in Cohere chat."""
|
||||
|
||||
|
||||
role: Literal["TOOL"] = "TOOL"
|
||||
toolCallId: str
|
||||
|
||||
|
||||
class CohereParameterDefinition(BaseModel):
|
||||
"""Parameter definition for Cohere tools."""
|
||||
|
||||
|
||||
description: str
|
||||
type: str
|
||||
isRequired: bool = False
|
||||
@@ -246,7 +247,7 @@ class CohereParameterDefinition(BaseModel):
|
||||
|
||||
class CohereTool(BaseModel):
|
||||
"""Tool definition for Cohere."""
|
||||
|
||||
|
||||
name: str
|
||||
description: str
|
||||
parameterDefinitions: Dict[str, CohereParameterDefinition]
|
||||
@@ -254,38 +255,44 @@ class CohereTool(BaseModel):
|
||||
|
||||
class CohereToolCall(BaseModel):
|
||||
"""Tool call made by Cohere model."""
|
||||
|
||||
|
||||
name: str
|
||||
parameters: Dict[str, Any]
|
||||
|
||||
|
||||
class CohereToolResult(BaseModel):
|
||||
"""Result of a tool call."""
|
||||
|
||||
|
||||
callId: str
|
||||
result: str
|
||||
|
||||
|
||||
class CohereResponseFormat(BaseModel):
|
||||
"""Response format for Cohere."""
|
||||
|
||||
|
||||
type: str
|
||||
|
||||
|
||||
class CohereResponseTextFormat(CohereResponseFormat):
|
||||
"""Text response format for Cohere."""
|
||||
|
||||
|
||||
type: Literal["text"] = "text"
|
||||
|
||||
|
||||
class CohereResponseJSONSchemaFormat(CohereResponseFormat):
|
||||
"""JSON schema response format for Cohere."""
|
||||
|
||||
type: Literal["json_schema"] = "json_schema"
|
||||
jsonSchema: Dict[str, Any]
|
||||
|
||||
|
||||
class CohereChatRequest(BaseModel):
|
||||
"""Cohere chat request model."""
|
||||
|
||||
|
||||
# Required fields
|
||||
message: str
|
||||
apiFormat: Literal["COHERE"] = "COHERE"
|
||||
|
||||
|
||||
# Optional fields
|
||||
chatHistory: Optional[List[CohereMessage]] = None
|
||||
maxTokens: Optional[int] = None
|
||||
@@ -298,7 +305,7 @@ class CohereChatRequest(BaseModel):
|
||||
seed: Optional[int] = None
|
||||
tools: Optional[List[CohereTool]] = None
|
||||
toolChoice: Optional[Union[str, Dict[str, Any]]] = None
|
||||
responseFormat: Optional[CohereResponseFormat] = None
|
||||
responseFormat: Optional[Union[CohereResponseTextFormat, CohereResponseJSONSchemaFormat, CohereResponseFormat]] = None
|
||||
preambleOverride: Optional[str] = None
|
||||
documents: Optional[List[Dict[str, Any]]] = None
|
||||
searchQueriesOnly: Optional[bool] = None
|
||||
@@ -318,7 +325,7 @@ class CohereChatRequest(BaseModel):
|
||||
|
||||
class CohereUsage(BaseModel):
|
||||
"""Usage information for Cohere response."""
|
||||
|
||||
|
||||
promptTokens: int
|
||||
completionTokens: int
|
||||
totalTokens: int
|
||||
@@ -328,7 +335,7 @@ class CohereUsage(BaseModel):
|
||||
|
||||
class CohereCitation(BaseModel):
|
||||
"""Citation in Cohere response."""
|
||||
|
||||
|
||||
start: int
|
||||
end: int
|
||||
text: str
|
||||
@@ -337,19 +344,19 @@ class CohereCitation(BaseModel):
|
||||
|
||||
class CohereSearchQuery(BaseModel):
|
||||
"""Search query generated by Cohere."""
|
||||
|
||||
|
||||
text: str
|
||||
generation_id: str
|
||||
|
||||
|
||||
class CohereChatResponse(BaseModel):
|
||||
"""Cohere chat response model."""
|
||||
|
||||
|
||||
# Required fields
|
||||
text: str
|
||||
apiFormat: Literal["COHERE"] = "COHERE"
|
||||
finishReason: Literal["COMPLETE", "ERROR_TOXIC", "ERROR_LIMIT", "ERROR", "USER_CANCEL", "MAX_TOKENS"]
|
||||
|
||||
|
||||
# Optional fields
|
||||
chatHistory: Optional[List[CohereMessage]] = None
|
||||
citations: Optional[List[CohereCitation]] = None
|
||||
@@ -364,7 +371,7 @@ class CohereChatResponse(BaseModel):
|
||||
|
||||
class CohereChatDetails(BaseModel):
|
||||
"""Chat details for Cohere request."""
|
||||
|
||||
|
||||
compartmentId: str
|
||||
servingMode: OCIServingMode
|
||||
chatRequest: CohereChatRequest
|
||||
@@ -372,8 +379,7 @@ class CohereChatDetails(BaseModel):
|
||||
|
||||
class CohereChatResult(BaseModel):
|
||||
"""Complete Cohere chat result."""
|
||||
|
||||
|
||||
modelId: str
|
||||
modelVersion: str
|
||||
chatResponse: CohereChatResponse
|
||||
|
||||
|
||||
@@ -28540,6 +28540,193 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-3-5-sonnet": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_tokens": 8192,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-3-5-sonnet-20241022": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_tokens": 8192,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-3-7-sonnet": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-haiku-4.5": {
|
||||
"cache_creation_input_token_cost": 1.25e-06,
|
||||
"cache_read_input_token_cost": 1e-07,
|
||||
"input_cost_per_token": 1e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 5e-06,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-opus-4": {
|
||||
"cache_creation_input_token_cost": 1.875e-05,
|
||||
"cache_read_input_token_cost": 1.5e-06,
|
||||
"input_cost_per_token": 1.5e-05,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 32000,
|
||||
"max_tokens": 32000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 7.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-opus-4.1": {
|
||||
"cache_creation_input_token_cost": 1.875e-05,
|
||||
"cache_read_input_token_cost": 1.5e-06,
|
||||
"input_cost_per_token": 1.5e-05,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 32000,
|
||||
"max_tokens": 32000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 7.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-opus-4.5": {
|
||||
"cache_creation_input_token_cost": 6.25e-06,
|
||||
"cache_read_input_token_cost": 5e-07,
|
||||
"input_cost_per_token": 5e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 2.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-opus-4.6": {
|
||||
"cache_creation_input_token_cost": 6.25e-06,
|
||||
"cache_read_input_token_cost": 5e-07,
|
||||
"input_cost_per_token": 5e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 2.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-sonnet-4": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/anthropic/claude-sonnet-4.5": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
"input_cost_per_token": 3e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.5e-05,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"vercel_ai_gateway/cohere/command-a": {
|
||||
"input_cost_per_token": 2.5e-06,
|
||||
"litellm_provider": "vercel_ai_gateway",
|
||||
@@ -28549,7 +28736,8 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1e-05,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"vercel_ai_gateway/cohere/command-r": {
|
||||
"input_cost_per_token": 1.5e-07,
|
||||
|
||||
+3
-1
@@ -11,6 +11,8 @@
|
||||
"jest": "^29.7.0"
|
||||
},
|
||||
"overrides": {
|
||||
"glob": ">=11.1.0"
|
||||
"glob": ">=11.1.0",
|
||||
"tar": ">=7.5.7",
|
||||
"@isaacs/brace-expansion": ">=5.0.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
# LITELLM PROXY DEPENDENCIES #
|
||||
# Security: explicit pins for transitive deps (CVE fixes)
|
||||
urllib3>=2.6.0 # CVE-2025-66471, CVE-2025-66418, CVE-2026-21441
|
||||
tornado>=6.5.3 # CVE-2025-67725, CVE-2025-67726, CVE-2025-67724
|
||||
filelock>=3.20.1 # CVE-2025-68146
|
||||
|
||||
anyio==4.8.0 # openai + http req.
|
||||
httpx==0.28.1
|
||||
openai==2.9.0 # openai req.
|
||||
|
||||
@@ -310,6 +310,16 @@ model LiteLLM_VerificationToken {
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||
object_permission LiteLLM_ObjectPermissionTable? @relation(fields: [object_permission_id], references: [object_permission_id])
|
||||
|
||||
// SELECT COUNT(*) FROM (SELECT "public"."LiteLLM_VerificationToken"."token" FROM "public"."LiteLLM_VerificationToken" WHERE ("public"."LiteLLM_VerificationToken"."user_id" = $1 AND ("public"."LiteLLM_VerificationToken"."team_id" IS NULL OR "public"."LiteLLM_VerificationToken"."team_id" <> $2)) OFFSET $3 ) AS "sub"
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."user_id" = $1 OFFSET $2
|
||||
@@index([user_id, team_id])
|
||||
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."team_id" = $1 OFFSET $2
|
||||
@@index([team_id])
|
||||
|
||||
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE (("public"."LiteLLM_VerificationToken"."expires" IS NULL OR "public"."LiteLLM_VerificationToken"."expires" > $1) AND "public"."LiteLLM_VerificationToken"."budget_reset_at" < $2) OFFSET $3
|
||||
@@index([budget_reset_at, expires])
|
||||
}
|
||||
|
||||
// Audit table for deleted keys - preserves spend and key information for historical tracking
|
||||
|
||||
@@ -116,4 +116,131 @@ def test_extract_blocking_info():
|
||||
blocking_info = guardrail.extract_blocking_info(response)
|
||||
|
||||
assert blocking_info["transactionId"] == "12345"
|
||||
assert blocking_info["blockingDetectors"] == ["detector1"]
|
||||
assert blocking_info["blockingDetectors"] == ["detector1"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
"litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call",
|
||||
new_callable=AsyncMock,
|
||||
)
|
||||
async def test_apply_guardrail_text_concatenation(mock_api_call):
|
||||
"""
|
||||
Test that `apply_guardrail` correctly concatenates texts.
|
||||
"""
|
||||
guardrail = ZscalerAIGuard(policy_id=100)
|
||||
inputs = {"texts": ["Hello", "world"]}
|
||||
request_data = {}
|
||||
|
||||
await guardrail.apply_guardrail(inputs, request_data, "request")
|
||||
|
||||
mock_api_call.assert_called_once()
|
||||
call_args = mock_api_call.call_args
|
||||
assert call_args.kwargs["content"] == "Hello world"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
"litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call",
|
||||
new_callable=AsyncMock,
|
||||
)
|
||||
async def test_policy_id_from_request_metadata(mock_api_call):
|
||||
"""
|
||||
Test policy_id is picked from request metadata (highest precedence).
|
||||
"""
|
||||
guardrail = ZscalerAIGuard(policy_id=100)
|
||||
inputs = {"texts": ["test"]}
|
||||
request_data = {
|
||||
"metadata": {
|
||||
"zguard_policy_id": 1,
|
||||
"user_api_key_metadata": {"zguard_policy_id": 2},
|
||||
"team_metadata": {"zguard_policy_id": 3},
|
||||
}
|
||||
}
|
||||
|
||||
await guardrail.apply_guardrail(inputs, request_data, "request")
|
||||
|
||||
mock_api_call.assert_called_once()
|
||||
assert mock_api_call.call_args.kwargs["policy_id"] == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
"litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call",
|
||||
new_callable=AsyncMock,
|
||||
)
|
||||
async def test_policy_id_from_user_api_key_metadata(mock_api_call):
|
||||
"""
|
||||
Test policy_id is picked from user_api_key_metadata (2nd precedence).
|
||||
"""
|
||||
guardrail = ZscalerAIGuard(policy_id=100)
|
||||
inputs = {"texts": ["test"]}
|
||||
request_data = {
|
||||
"metadata": {
|
||||
"user_api_key_metadata": {"zguard_policy_id": 2},
|
||||
"team_metadata": {"zguard_policy_id": 3},
|
||||
}
|
||||
}
|
||||
|
||||
await guardrail.apply_guardrail(inputs, request_data, "request")
|
||||
|
||||
mock_api_call.assert_called_once()
|
||||
assert mock_api_call.call_args.kwargs["policy_id"] == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
"litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call",
|
||||
new_callable=AsyncMock,
|
||||
)
|
||||
async def test_policy_id_from_team_metadata(mock_api_call):
|
||||
"""
|
||||
Test policy_id is picked from team_metadata (3rd precedence).
|
||||
"""
|
||||
guardrail = ZscalerAIGuard(policy_id=100)
|
||||
inputs = {"texts": ["test"]}
|
||||
request_data = {"metadata": {"team_metadata": {"zguard_policy_id": 3}}}
|
||||
|
||||
await guardrail.apply_guardrail(inputs, request_data, "request")
|
||||
|
||||
mock_api_call.assert_called_once()
|
||||
assert mock_api_call.call_args.kwargs["policy_id"] == 3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
"litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call",
|
||||
new_callable=AsyncMock,
|
||||
)
|
||||
async def test_policy_id_from_init(mock_api_call):
|
||||
"""
|
||||
Test policy_id is picked from guardrail initialization (lowest precedence).
|
||||
"""
|
||||
guardrail = ZscalerAIGuard(policy_id=100)
|
||||
inputs = {"texts": ["test"]}
|
||||
request_data = {"metadata": {}}
|
||||
|
||||
await guardrail.apply_guardrail(inputs, request_data, "request")
|
||||
|
||||
mock_api_call.assert_called_once()
|
||||
assert mock_api_call.call_args.kwargs["policy_id"] == 100
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
"litellm.proxy.guardrails.guardrail_hooks.zscaler_ai_guard.ZscalerAIGuard.make_zscaler_ai_guard_api_call",
|
||||
new_callable=AsyncMock,
|
||||
)
|
||||
async def test_policy_id_zero_from_request_metadata(mock_api_call):
|
||||
"""
|
||||
Test policy_id=0 is correctly picked. Make sure pick exact policy_id which users set
|
||||
"""
|
||||
guardrail = ZscalerAIGuard(policy_id=100)
|
||||
inputs = {"texts": ["test"]}
|
||||
request_data = {
|
||||
"metadata": {
|
||||
"zguard_policy_id": 0,
|
||||
}
|
||||
}
|
||||
await guardrail.apply_guardrail(inputs, request_data, "request")
|
||||
mock_api_call.assert_called_once()
|
||||
assert mock_api_call.call_args.kwargs["policy_id"] == 0
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
"@types/node": "^22.5.5"
|
||||
},
|
||||
"overrides": {
|
||||
"glob": ">=11.1.0"
|
||||
"glob": ">=11.1.0",
|
||||
"tar": ">=7.5.7",
|
||||
"@isaacs/brace-expansion": ">=5.0.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
"react-dom": "^18.2.0"
|
||||
},
|
||||
"overrides": {
|
||||
"glob": ">=11.1.0"
|
||||
"glob": ">=11.1.0",
|
||||
"tar": ">=7.5.7",
|
||||
"@isaacs/brace-expansion": ">=5.0.1"
|
||||
}
|
||||
}
|
||||
@@ -158,6 +158,76 @@ def test_get_combined_tool_content():
|
||||
]
|
||||
|
||||
|
||||
def test_get_combined_thinking_content_preserves_interleaved_blocks():
|
||||
base_chunk = {
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": 1234567890,
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
}
|
||||
|
||||
def make_chunk(**delta_kwargs):
|
||||
return ModelResponseStream(
|
||||
**base_chunk,
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
index=0,
|
||||
delta=Delta(**delta_kwargs),
|
||||
finish_reason=None,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
chunks = [
|
||||
make_chunk(role="assistant", content=None),
|
||||
make_chunk(
|
||||
thinking_blocks=[
|
||||
{"type": "thinking", "thinking": "Step 1 analysis...", "signature": None}
|
||||
]
|
||||
),
|
||||
make_chunk(
|
||||
thinking_blocks=[
|
||||
{"type": "thinking", "thinking": None, "signature": "sig_block1"}
|
||||
]
|
||||
),
|
||||
make_chunk(
|
||||
thinking_blocks=[
|
||||
{
|
||||
"type": "redacted_thinking",
|
||||
"data": "EuoBCoYBGAIi...encrypted...",
|
||||
}
|
||||
]
|
||||
),
|
||||
make_chunk(
|
||||
thinking_blocks=[
|
||||
{"type": "thinking", "thinking": "Step 2 analysis...", "signature": None}
|
||||
]
|
||||
),
|
||||
make_chunk(
|
||||
thinking_blocks=[
|
||||
{"type": "thinking", "thinking": None, "signature": "sig_block2"}
|
||||
]
|
||||
),
|
||||
]
|
||||
|
||||
thinking_chunks = [
|
||||
chunk for chunk in chunks if chunk["choices"][0]["delta"].get("thinking_blocks")
|
||||
]
|
||||
processor = ChunkProcessor(chunks=chunks)
|
||||
result = processor.get_combined_thinking_content(thinking_chunks)
|
||||
|
||||
assert result is not None
|
||||
assert len(result) == 3
|
||||
assert result[0]["type"] == "thinking"
|
||||
assert result[0]["thinking"] == "Step 1 analysis..."
|
||||
assert result[0]["signature"] == "sig_block1"
|
||||
assert result[1]["type"] == "redacted_thinking"
|
||||
assert result[1]["data"] == "EuoBCoYBGAIi...encrypted..."
|
||||
assert result[2]["type"] == "thinking"
|
||||
assert result[2]["thinking"] == "Step 2 analysis..."
|
||||
assert result[2]["signature"] == "sig_block2"
|
||||
|
||||
|
||||
def test_cache_read_input_tokens_retained():
|
||||
chunk1 = ModelResponseStream(
|
||||
id="chatcmpl-95aabb85-c39f-443d-ae96-0370c404d70c",
|
||||
@@ -441,4 +511,4 @@ def test_stream_chunk_builder_anthropic_web_search():
|
||||
assert usage.prompt_tokens == 50
|
||||
assert usage.completion_tokens == 27
|
||||
assert usage.total_tokens == 77
|
||||
assert usage.server_tool_use['web_search_requests'] == 2
|
||||
assert usage.server_tool_use['web_search_requests'] == 2
|
||||
|
||||
@@ -287,6 +287,114 @@ class TestOCIChatConfig:
|
||||
# Verify the message content
|
||||
assert transformed_request["chatRequest"]["message"] == "What is quantum computing?"
|
||||
|
||||
def test_transform_request_response_format_json_object(self):
|
||||
"""
|
||||
Tests that response_format type 'json_object' is uppercased to 'JSON_OBJECT' for generic OCI models.
|
||||
"""
|
||||
config = OCIChatConfig()
|
||||
optional_params = {
|
||||
"oci_compartment_id": TEST_COMPARTMENT_ID,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
transformed_request = config.transform_request(
|
||||
model=TEST_MODEL_NAME,
|
||||
messages=TEST_MESSAGES, # type: ignore
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
rf = transformed_request["chatRequest"]["responseFormat"]
|
||||
assert rf["type"] == "JSON_OBJECT"
|
||||
|
||||
def test_transform_request_response_format_text(self):
|
||||
"""
|
||||
Tests that response_format type 'text' is uppercased to 'TEXT' for generic OCI models.
|
||||
"""
|
||||
config = OCIChatConfig()
|
||||
optional_params = {
|
||||
"oci_compartment_id": TEST_COMPARTMENT_ID,
|
||||
"response_format": {"type": "text"},
|
||||
}
|
||||
transformed_request = config.transform_request(
|
||||
model=TEST_MODEL_NAME,
|
||||
messages=TEST_MESSAGES, # type: ignore
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
rf = transformed_request["chatRequest"]["responseFormat"]
|
||||
assert rf["type"] == "TEXT"
|
||||
|
||||
def test_transform_request_response_format_json_shorthand(self):
|
||||
"""
|
||||
Tests that response_format type 'json' is mapped to 'JSON_OBJECT' for generic OCI models.
|
||||
"""
|
||||
config = OCIChatConfig()
|
||||
optional_params = {
|
||||
"oci_compartment_id": TEST_COMPARTMENT_ID,
|
||||
"response_format": {"type": "json"},
|
||||
}
|
||||
transformed_request = config.transform_request(
|
||||
model=TEST_MODEL_NAME,
|
||||
messages=TEST_MESSAGES, # type: ignore
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
rf = transformed_request["chatRequest"]["responseFormat"]
|
||||
assert rf["type"] == "JSON_OBJECT"
|
||||
|
||||
def test_transform_response_without_token_details(self):
|
||||
"""
|
||||
Tests that responses missing completionTokensDetails and promptTokensDetails
|
||||
are handled correctly (fields are optional).
|
||||
"""
|
||||
config = OCIChatConfig()
|
||||
created_time = datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
mock_oci_response = {
|
||||
"modelId": TEST_MODEL_NAME,
|
||||
"modelVersion": "1.0",
|
||||
"chatResponse": {
|
||||
"apiFormat": "GENERIC",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "ASSISTANT",
|
||||
"content": [{"type": "TEXT", "text": "Hello!"}],
|
||||
},
|
||||
"finishReason": "STOP",
|
||||
}
|
||||
],
|
||||
"timeCreated": created_time,
|
||||
"usage": {
|
||||
"promptTokens": 5,
|
||||
"completionTokens": 10,
|
||||
"totalTokens": 15,
|
||||
},
|
||||
},
|
||||
}
|
||||
response = httpx.Response(
|
||||
status_code=200, json=mock_oci_response, headers={"Content-Type": "application/json"}
|
||||
)
|
||||
result = config.transform_response(
|
||||
model=TEST_MODEL_NAME,
|
||||
raw_response=response,
|
||||
model_response=ModelResponse(),
|
||||
logging_obj={}, # type: ignore
|
||||
request_data={},
|
||||
messages=[],
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
encoding={},
|
||||
)
|
||||
|
||||
assert isinstance(result, ModelResponse)
|
||||
assert result.choices[0].message.content == "Hello!"
|
||||
assert result.usage.prompt_tokens == 5 # type: ignore
|
||||
assert result.usage.completion_tokens == 10 # type: ignore
|
||||
assert result.usage.total_tokens == 15 # type: ignore
|
||||
|
||||
def test_transform_response_simple_text(self):
|
||||
"""
|
||||
Tests if a simple text response is transformed correctly.
|
||||
|
||||
@@ -239,6 +239,110 @@ class TestOCICohereToolCalls:
|
||||
assert result.usage.completion_tokens == 22
|
||||
assert result.usage.total_tokens == 48
|
||||
|
||||
def test_cohere_request_preserves_json_schema_response_format(self):
|
||||
"""Ensure Cohere requests retain JSON schema payloads in responseFormat."""
|
||||
config = OCIChatConfig()
|
||||
messages = [{"role": "user", "content": "Return structured info"}]
|
||||
response_format = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "test_schema",
|
||||
"strict": True,
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"foo": {"type": "string"}
|
||||
},
|
||||
"required": ["foo"]
|
||||
}
|
||||
}
|
||||
}
|
||||
optional_params = {
|
||||
"oci_compartment_id": TEST_COMPARTMENT_ID,
|
||||
"response_format": response_format,
|
||||
}
|
||||
|
||||
transformed_request = config.transform_request(
|
||||
model="cohere.command-rplus",
|
||||
messages=messages, # type: ignore[arg-type]
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
|
||||
chat_request = transformed_request["chatRequest"]
|
||||
assert chat_request["apiFormat"] == "COHERE"
|
||||
assert "responseFormat" in chat_request
|
||||
|
||||
cohere_response_format = chat_request["responseFormat"]
|
||||
assert cohere_response_format["type"] == "json_schema"
|
||||
assert "json_schema" not in cohere_response_format
|
||||
assert "jsonSchema" in cohere_response_format
|
||||
assert cohere_response_format["jsonSchema"] == response_format["json_schema"]
|
||||
|
||||
def test_cohere_request_response_format_text_stays_lowercase(self):
|
||||
"""Ensure Cohere keeps response_format type lowercase (e.g. 'text' not 'TEXT')."""
|
||||
config = OCIChatConfig()
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
optional_params = {
|
||||
"oci_compartment_id": TEST_COMPARTMENT_ID,
|
||||
"response_format": {"type": "text"},
|
||||
}
|
||||
|
||||
transformed_request = config.transform_request(
|
||||
model="cohere.command-latest",
|
||||
messages=messages, # type: ignore
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
|
||||
chat_request = transformed_request["chatRequest"]
|
||||
assert chat_request["apiFormat"] == "COHERE"
|
||||
assert "responseFormat" in chat_request
|
||||
assert chat_request["responseFormat"]["type"] == "text"
|
||||
|
||||
def test_cohere_tool_call_only_message_no_text(self):
|
||||
"""Test chat history with an assistant message that has tool calls but no text content."""
|
||||
config = OCIChatConfig()
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "What's the weather?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"arguments": '{"location": "Paris"}',
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"content": "Sunny, 25C",
|
||||
"tool_call_id": "call_1",
|
||||
},
|
||||
]
|
||||
|
||||
chat_history = config.adapt_messages_to_cohere_standard(messages)
|
||||
|
||||
# First message is the user message
|
||||
assert chat_history[0].role == "USER"
|
||||
assert chat_history[0].message == "What's the weather?"
|
||||
|
||||
# Second message is the assistant with tool calls and no text
|
||||
assistant_msg = chat_history[1]
|
||||
assert assistant_msg.role == "CHATBOT"
|
||||
assert assistant_msg.message is None or assistant_msg.message == ""
|
||||
assert assistant_msg.toolCalls is not None
|
||||
assert len(assistant_msg.toolCalls) == 1
|
||||
assert assistant_msg.toolCalls[0].name == "get_weather"
|
||||
|
||||
def test_cohere_chat_history_with_tool_calls(self):
|
||||
"""Test chat history transformation with tool calls"""
|
||||
config = OCIChatConfig()
|
||||
|
||||
+225
-116
@@ -45,68 +45,65 @@ def test_vertex_ai_anthropic_web_search_header_in_completion():
|
||||
|
||||
# Create the config instance
|
||||
model_info = AnthropicModelInfo()
|
||||
|
||||
|
||||
# Test the header generation directly
|
||||
tools = [{"type": "web_search_20250305", "name": "web_search", "max_uses": 5}]
|
||||
|
||||
|
||||
# Check if web search tool is detected
|
||||
web_search_detected = model_info.is_web_search_tool_used(tools=tools)
|
||||
assert web_search_detected is True, "Web search tool should be detected"
|
||||
|
||||
|
||||
# Generate headers with is_vertex_request=True
|
||||
headers = model_info.get_anthropic_headers(
|
||||
api_key="test-key",
|
||||
web_search_tool_used=web_search_detected,
|
||||
is_vertex_request=True,
|
||||
)
|
||||
|
||||
|
||||
# Assert that the anthropic-beta header with web-search is present
|
||||
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
|
||||
assert headers["anthropic-beta"] == "web-search-2025-03-05", \
|
||||
f"anthropic-beta should be 'web-search-2025-03-05', got: {headers['anthropic-beta']}"
|
||||
|
||||
assert (
|
||||
headers["anthropic-beta"] == "web-search-2025-03-05"
|
||||
), f"anthropic-beta should be 'web-search-2025-03-05', got: {headers['anthropic-beta']}"
|
||||
|
||||
# Test that header is NOT added for non-Vertex requests
|
||||
headers_non_vertex = model_info.get_anthropic_headers(
|
||||
api_key="test-key",
|
||||
web_search_tool_used=web_search_detected,
|
||||
is_vertex_request=False,
|
||||
)
|
||||
|
||||
|
||||
# For non-Vertex (Anthropic-hosted), the web search header should NOT be in anthropic-beta
|
||||
# because Anthropic doesn't require it
|
||||
assert "anthropic-beta" not in headers_non_vertex or "web-search" not in headers_non_vertex.get("anthropic-beta", ""), \
|
||||
"anthropic-beta with web-search should not be present for non-Vertex requests"
|
||||
assert (
|
||||
"anthropic-beta" not in headers_non_vertex
|
||||
or "web-search" not in headers_non_vertex.get("anthropic-beta", "")
|
||||
), "anthropic-beta with web-search should not be present for non-Vertex requests"
|
||||
|
||||
|
||||
def test_vertex_ai_anthropic_context_management_compact_beta_header():
|
||||
"""Test that context_management with compact adds the correct beta header for Vertex AI"""
|
||||
config = VertexAIAnthropicConfig()
|
||||
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
optional_params = {
|
||||
"context_management": {
|
||||
"edits": [
|
||||
{
|
||||
"type": "compact_20260112"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context_management": {"edits": [{"type": "compact_20260112"}]},
|
||||
"max_tokens": 100,
|
||||
"is_vertex_request": True
|
||||
"is_vertex_request": True,
|
||||
}
|
||||
|
||||
|
||||
result = config.transform_request(
|
||||
model="claude-opus-4-6",
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={}
|
||||
headers={},
|
||||
)
|
||||
|
||||
|
||||
# Verify context_management is included
|
||||
assert "context_management" in result
|
||||
assert result["context_management"]["edits"][0]["type"] == "compact_20260112"
|
||||
|
||||
|
||||
# Verify compact beta header is in anthropic_beta field
|
||||
assert "anthropic_beta" in result
|
||||
assert "compact-2026-01-12" in result["anthropic_beta"]
|
||||
@@ -115,33 +112,27 @@ def test_vertex_ai_anthropic_context_management_compact_beta_header():
|
||||
def test_vertex_ai_anthropic_context_management_mixed_edits():
|
||||
"""Test that context_management with both compact and other edits adds both beta headers"""
|
||||
config = VertexAIAnthropicConfig()
|
||||
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
optional_params = {
|
||||
"context_management": {
|
||||
"edits": [
|
||||
{
|
||||
"type": "compact_20260112"
|
||||
},
|
||||
{
|
||||
"type": "replace",
|
||||
"message_id": "msg_123",
|
||||
"content": "new content"
|
||||
}
|
||||
{"type": "compact_20260112"},
|
||||
{"type": "replace", "message_id": "msg_123", "content": "new content"},
|
||||
]
|
||||
},
|
||||
"max_tokens": 100,
|
||||
"is_vertex_request": True
|
||||
"is_vertex_request": True,
|
||||
}
|
||||
|
||||
|
||||
result = config.transform_request(
|
||||
model="claude-opus-4-6",
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={}
|
||||
headers={},
|
||||
)
|
||||
|
||||
|
||||
# Verify both beta headers are present
|
||||
assert "anthropic_beta" in result
|
||||
assert "compact-2026-01-12" in result["anthropic_beta"]
|
||||
@@ -151,58 +142,65 @@ def test_vertex_ai_anthropic_context_management_mixed_edits():
|
||||
def test_vertex_ai_anthropic_structured_output_header_not_added():
|
||||
"""Test that structured output beta headers are NOT added for Vertex AI requests"""
|
||||
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
|
||||
|
||||
|
||||
config = AnthropicConfig()
|
||||
|
||||
|
||||
# Test case 1: Vertex request with output_format should NOT add beta header
|
||||
headers_vertex = {}
|
||||
optional_params_vertex = {
|
||||
'output_format': {
|
||||
'type': 'json_schema',
|
||||
'json_schema': {
|
||||
'name': 'MathResult',
|
||||
'schema': {'properties': {'result': {'type': 'integer'}}}
|
||||
}
|
||||
"output_format": {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "MathResult",
|
||||
"schema": {"properties": {"result": {"type": "integer"}}},
|
||||
},
|
||||
},
|
||||
'is_vertex_request': True
|
||||
"is_vertex_request": True,
|
||||
}
|
||||
result_vertex = config.update_headers_with_optional_anthropic_beta(headers_vertex, optional_params_vertex)
|
||||
|
||||
assert "anthropic-beta" not in result_vertex, \
|
||||
f"Vertex request should NOT have anthropic-beta header for structured output, got: {result_vertex.get('anthropic-beta')}"
|
||||
|
||||
result_vertex = config.update_headers_with_optional_anthropic_beta(
|
||||
headers_vertex, optional_params_vertex
|
||||
)
|
||||
|
||||
assert (
|
||||
"anthropic-beta" not in result_vertex
|
||||
), f"Vertex request should NOT have anthropic-beta header for structured output, got: {result_vertex.get('anthropic-beta')}"
|
||||
|
||||
# Test case 2: Non-Vertex request with output_format SHOULD add beta header
|
||||
headers_non_vertex = {}
|
||||
optional_params_non_vertex = {
|
||||
'output_format': {
|
||||
'type': 'json_schema',
|
||||
'json_schema': {
|
||||
'name': 'MathResult',
|
||||
'schema': {'properties': {'result': {'type': 'integer'}}}
|
||||
}
|
||||
"output_format": {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "MathResult",
|
||||
"schema": {"properties": {"result": {"type": "integer"}}},
|
||||
},
|
||||
},
|
||||
'is_vertex_request': False
|
||||
"is_vertex_request": False,
|
||||
}
|
||||
result_non_vertex = config.update_headers_with_optional_anthropic_beta(headers_non_vertex, optional_params_non_vertex)
|
||||
|
||||
assert "anthropic-beta" in result_non_vertex, \
|
||||
"Non-Vertex request SHOULD have anthropic-beta header for structured output"
|
||||
assert result_non_vertex["anthropic-beta"] == "structured-outputs-2025-11-13", \
|
||||
f"Expected 'structured-outputs-2025-11-13', got: {result_non_vertex.get('anthropic-beta')}"
|
||||
result_non_vertex = config.update_headers_with_optional_anthropic_beta(
|
||||
headers_non_vertex, optional_params_non_vertex
|
||||
)
|
||||
|
||||
assert (
|
||||
"anthropic-beta" in result_non_vertex
|
||||
), "Non-Vertex request SHOULD have anthropic-beta header for structured output"
|
||||
assert (
|
||||
result_non_vertex["anthropic-beta"] == "structured-outputs-2025-11-13"
|
||||
), f"Expected 'structured-outputs-2025-11-13', got: {result_non_vertex.get('anthropic-beta')}"
|
||||
|
||||
|
||||
def test_vertex_ai_claude_sonnet_4_5_structured_output_fix():
|
||||
"""
|
||||
Test fix for issue #18625: Claude Sonnet 4.5 on VertexAI should use tool-based
|
||||
Test fix for issue #18625: Claude Sonnet 4.5 on VertexAI should use tool-based
|
||||
structured outputs instead of output_format parameter.
|
||||
|
||||
|
||||
This test verifies that:
|
||||
1. Claude Sonnet 4.5 uses tool-based structured outputs on VertexAI
|
||||
2. output_format parameter is removed from the final request
|
||||
3. The fix prevents "Extra inputs are not permitted" error
|
||||
"""
|
||||
config = VertexAIAnthropicConfig()
|
||||
|
||||
|
||||
# Test data matching the issue report
|
||||
response_format = {
|
||||
"type": "json_schema",
|
||||
@@ -212,29 +210,23 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix():
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string"
|
||||
},
|
||||
"response": {
|
||||
"type": "string"
|
||||
}
|
||||
"question": {"type": "string"},
|
||||
"response": {"type": "string"},
|
||||
},
|
||||
"required": ["question", "response"],
|
||||
"additionalProperties": False
|
||||
}
|
||||
}
|
||||
"additionalProperties": False,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "Generate a question and answer about AI."}
|
||||
]
|
||||
|
||||
|
||||
messages = [{"role": "user", "content": "Generate a question and answer about AI."}]
|
||||
|
||||
# Test parameters that would trigger the issue
|
||||
non_default_params = {
|
||||
"response_format": response_format,
|
||||
"max_tokens": 1000,
|
||||
}
|
||||
|
||||
|
||||
# Test 1: Verify map_openai_params forces tool-based approach for Claude Sonnet 4.5
|
||||
optional_params = {}
|
||||
result_params = config.map_openai_params(
|
||||
@@ -243,17 +235,19 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix():
|
||||
model="claude-3-5-sonnet-20241022", # Claude Sonnet 4.5 model
|
||||
drop_params=False,
|
||||
)
|
||||
|
||||
|
||||
# Should have tools and tool_choice (tool-based approach)
|
||||
assert "tools" in result_params, "Tools should be present for structured output"
|
||||
assert "tool_choice" in result_params, "Tool choice should be present for structured output"
|
||||
assert (
|
||||
"tool_choice" in result_params
|
||||
), "Tool choice should be present for structured output"
|
||||
assert "json_mode" in result_params, "JSON mode should be enabled"
|
||||
|
||||
|
||||
# Verify the tool is the response format tool
|
||||
tools = result_params["tools"]
|
||||
assert len(tools) == 1, "Should have exactly one tool for response format"
|
||||
assert tools[0]["name"] == "json_tool_call", "Tool should be named json_tool_call"
|
||||
|
||||
|
||||
# Test 2: Verify transform_request removes output_format parameter
|
||||
# Simulate what would happen if parent class added output_format
|
||||
test_data = {
|
||||
@@ -264,20 +258,22 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix():
|
||||
"tool_choice": result_params["tool_choice"],
|
||||
"output_format": { # This would be added by parent class for Sonnet 4.5
|
||||
"type": "json_schema",
|
||||
"schema": response_format["json_schema"]["schema"]
|
||||
}
|
||||
"schema": response_format["json_schema"]["schema"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Mock the parent transform_request to return data with output_format
|
||||
original_transform = config.__class__.__bases__[0].transform_request
|
||||
|
||||
def mock_transform_request(self, model, messages, optional_params, litellm_params, headers):
|
||||
|
||||
def mock_transform_request(
|
||||
self, model, messages, optional_params, litellm_params, headers
|
||||
):
|
||||
# Return test data that includes output_format
|
||||
return test_data.copy()
|
||||
|
||||
|
||||
# Temporarily replace parent method
|
||||
config.__class__.__bases__[0].transform_request = mock_transform_request
|
||||
|
||||
|
||||
try:
|
||||
final_data = config.transform_request(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
@@ -286,13 +282,15 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix():
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
|
||||
|
||||
# Verify that output_format was removed (fixes the "Extra inputs are not permitted" error)
|
||||
assert "output_format" not in final_data, "output_format should be removed for VertexAI"
|
||||
assert (
|
||||
"output_format" not in final_data
|
||||
), "output_format should be removed for VertexAI"
|
||||
assert "model" not in final_data, "model should be removed for VertexAI"
|
||||
assert "tools" in final_data, "tools should still be present"
|
||||
assert "tool_choice" in final_data, "tool_choice should still be present"
|
||||
|
||||
|
||||
finally:
|
||||
# Restore original method
|
||||
config.__class__.__bases__[0].transform_request = original_transform
|
||||
@@ -300,43 +298,149 @@ def test_vertex_ai_claude_sonnet_4_5_structured_output_fix():
|
||||
|
||||
def test_vertex_ai_anthropic_other_models_still_use_tools():
|
||||
"""
|
||||
Test that other Anthropic models (non-Sonnet 4.5) on VertexAI also use tool-based
|
||||
Test that other Anthropic models (non-Sonnet 4.5) on VertexAI also use tool-based
|
||||
structured outputs, ensuring consistency across all models.
|
||||
"""
|
||||
config = VertexAIAnthropicConfig()
|
||||
|
||||
|
||||
response_format = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "test_schema",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"result": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
"schema": {"type": "object", "properties": {"result": {"type": "string"}}},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Test with Claude 3 Sonnet (not 4.5)
|
||||
non_default_params = {"response_format": response_format}
|
||||
optional_params = {}
|
||||
|
||||
|
||||
result_params = config.map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model="claude-3-sonnet-20240229",
|
||||
drop_params=False,
|
||||
)
|
||||
|
||||
|
||||
# Should still use tool-based approach
|
||||
assert "tools" in result_params, "Claude 3 Sonnet should also use tool-based structured output"
|
||||
assert (
|
||||
"tools" in result_params
|
||||
), "Claude 3 Sonnet should also use tool-based structured output"
|
||||
assert "tool_choice" in result_params, "Tool choice should be present"
|
||||
assert "json_mode" in result_params, "JSON mode should be enabled"
|
||||
|
||||
|
||||
def test_vertex_ai_anthropic_extra_headers_beta_propagation():
|
||||
"""Test that anthropic-beta values from extra_headers are propagated to the
|
||||
anthropic_beta request body field for Vertex AI requests.
|
||||
|
||||
Vertex AI requires beta flags in the request body (anthropic_beta array),
|
||||
not as HTTP headers. This mirrors the Bedrock handler's behavior of
|
||||
extracting user-specified beta headers.
|
||||
"""
|
||||
config = VertexAIAnthropicConfig()
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
optional_params = {
|
||||
"max_tokens": 100,
|
||||
"is_vertex_request": True,
|
||||
"extra_headers": {
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14",
|
||||
},
|
||||
}
|
||||
|
||||
result = config.transform_request(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
|
||||
assert "anthropic_beta" in result
|
||||
assert "interleaved-thinking-2025-05-14" in result["anthropic_beta"]
|
||||
assert "extra_headers" not in result
|
||||
|
||||
|
||||
def test_vertex_ai_anthropic_extra_headers_beta_merged_with_auto_betas():
|
||||
"""Test that extra_headers betas are merged with auto-detected betas
|
||||
rather than replacing them."""
|
||||
config = VertexAIAnthropicConfig()
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
optional_params = {
|
||||
"max_tokens": 100,
|
||||
"is_vertex_request": True,
|
||||
"extra_headers": {
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14",
|
||||
},
|
||||
"context_management": {"edits": [{"type": "compact_20260112"}]},
|
||||
}
|
||||
|
||||
result = config.transform_request(
|
||||
model="claude-opus-4-6",
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
|
||||
assert "anthropic_beta" in result
|
||||
assert "interleaved-thinking-2025-05-14" in result["anthropic_beta"]
|
||||
assert "compact-2026-01-12" in result["anthropic_beta"]
|
||||
|
||||
|
||||
def test_vertex_ai_anthropic_extra_headers_comma_separated_betas():
|
||||
"""Test that comma-separated beta values in extra_headers are all extracted."""
|
||||
config = VertexAIAnthropicConfig()
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
optional_params = {
|
||||
"max_tokens": 100,
|
||||
"is_vertex_request": True,
|
||||
"extra_headers": {
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14,dev-full-thinking-2025-05-14",
|
||||
},
|
||||
}
|
||||
|
||||
result = config.transform_request(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
|
||||
assert "anthropic_beta" in result
|
||||
assert "interleaved-thinking-2025-05-14" in result["anthropic_beta"]
|
||||
assert "dev-full-thinking-2025-05-14" in result["anthropic_beta"]
|
||||
|
||||
|
||||
def test_vertex_ai_anthropic_no_extra_headers_unchanged():
|
||||
"""Test that requests without extra_headers still work normally."""
|
||||
config = VertexAIAnthropicConfig()
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
optional_params = {
|
||||
"max_tokens": 100,
|
||||
"is_vertex_request": True,
|
||||
}
|
||||
|
||||
result = config.transform_request(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
|
||||
assert "anthropic_beta" not in result
|
||||
assert "extra_headers" not in result
|
||||
|
||||
|
||||
def test_vertex_ai_partner_models_anthropic_remove_prompt_caching_scope_beta_header():
|
||||
"""
|
||||
Test that remove_unsupported_beta correctly filters out prompt-caching-scope-2026-01-05
|
||||
Test that remove_unsupported_beta correctly filters out prompt-caching-scope-2026-01-05
|
||||
from the anthropic-beta headers.
|
||||
"""
|
||||
from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.experimental_pass_through.transformation import (
|
||||
@@ -352,13 +456,18 @@ def test_vertex_ai_partner_models_anthropic_remove_prompt_caching_scope_beta_hea
|
||||
headers = update_headers_with_filtered_beta(headers, "vertex_ai")
|
||||
|
||||
beta_header = headers.get("anthropic-beta")
|
||||
assert PROMPT_CACHING_BETA_HEADER not in (beta_header or ""), \
|
||||
f"{PROMPT_CACHING_BETA_HEADER} should be filtered out"
|
||||
assert "other-feature" in (beta_header or ""), \
|
||||
"Other non-excluded beta headers should remain"
|
||||
assert "web-search-2025-03-05" in (beta_header or ""), \
|
||||
"Other non-excluded beta headers should remain"
|
||||
assert PROMPT_CACHING_BETA_HEADER not in (
|
||||
beta_header or ""
|
||||
), f"{PROMPT_CACHING_BETA_HEADER} should be filtered out"
|
||||
assert "other-feature" in (
|
||||
beta_header or ""
|
||||
), "Other non-excluded beta headers should remain"
|
||||
assert "web-search-2025-03-05" in (
|
||||
beta_header or ""
|
||||
), "Other non-excluded beta headers should remain"
|
||||
# If prompt-caching was the only value, header should be removed completely
|
||||
headers2 = {"anthropic-beta": PROMPT_CACHING_BETA_HEADER}
|
||||
headers2 = update_headers_with_filtered_beta(headers2, "vertex_ai")
|
||||
assert "anthropic-beta" not in headers2, "Header should be removed if no supported values remain"
|
||||
assert (
|
||||
"anthropic-beta" not in headers2
|
||||
), "Header should be removed if no supported values remain"
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Tests for litellm/proxy/management_endpoints/common_utils.py
|
||||
|
||||
Covers the fix for GitHub issue #20304:
|
||||
Empty guardrails/policies arrays sent by the UI should NOT trigger the
|
||||
enterprise (premium) license check, but should still be applied so that
|
||||
users can intentionally clear previously-set fields.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from litellm.proxy.management_endpoints.common_utils import (
|
||||
_update_metadata_fields,
|
||||
)
|
||||
|
||||
|
||||
class TestUpdateMetadataFieldsEmptyCollections:
|
||||
"""
|
||||
Regression tests for issue #20304.
|
||||
|
||||
The UI sends empty arrays (`[]`) for enterprise-only fields like
|
||||
guardrails, policies, and logging even when the user hasn't configured
|
||||
these features. The backend must not treat empty collections as an
|
||||
intent to use the feature, and therefore must not trigger the premium
|
||||
license check.
|
||||
|
||||
However, empty collections must still be written into metadata so that
|
||||
users can intentionally clear a previously-set field (e.g. removing all
|
||||
guardrails by sending `guardrails: []`).
|
||||
"""
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_empty_list_does_not_trigger_premium_check(self, mock_premium_check):
|
||||
"""Empty lists for premium fields must not trigger the premium check."""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"guardrails": [],
|
||||
"policies": [],
|
||||
"logging": [],
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
mock_premium_check.assert_not_called()
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_empty_list_still_updates_metadata(self, mock_premium_check):
|
||||
"""
|
||||
Empty lists must still be moved into metadata so users can clear
|
||||
previously-set fields (e.g. remove all guardrails).
|
||||
"""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"guardrails": [],
|
||||
"policies": [],
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
# The fields should have been moved into metadata
|
||||
assert "guardrails" not in updated_kv, (
|
||||
"guardrails should be popped from top-level"
|
||||
)
|
||||
assert "policies" not in updated_kv, (
|
||||
"policies should be popped from top-level"
|
||||
)
|
||||
assert updated_kv["metadata"]["guardrails"] == []
|
||||
assert updated_kv["metadata"]["policies"] == []
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_empty_dict_does_not_trigger_premium_check(self, mock_premium_check):
|
||||
"""Empty dicts for premium fields must not trigger the premium check."""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"secret_manager_settings": {},
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
mock_premium_check.assert_not_called()
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_empty_dict_still_updates_metadata(self, mock_premium_check):
|
||||
"""
|
||||
Empty dicts must still be moved into metadata so users can clear
|
||||
previously-set fields.
|
||||
"""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"secret_manager_settings": {},
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
assert "secret_manager_settings" not in updated_kv, (
|
||||
"secret_manager_settings should be popped from top-level"
|
||||
)
|
||||
assert updated_kv["metadata"]["secret_manager_settings"] == {}
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_none_value_does_not_trigger_premium_check(self, mock_premium_check):
|
||||
"""None values for premium fields should be silently ignored."""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"guardrails": None,
|
||||
"policies": None,
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
mock_premium_check.assert_not_called()
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_absent_fields_do_not_trigger_premium_check(self, mock_premium_check):
|
||||
"""Fields not present in the dict should not trigger premium check."""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"team_alias": "example-team",
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
mock_premium_check.assert_not_called()
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_non_empty_list_triggers_premium_check(self, mock_premium_check):
|
||||
"""Non-empty lists for premium fields should trigger the premium check."""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"guardrails": ["my-guardrail"],
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
mock_premium_check.assert_called()
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_non_empty_value_triggers_premium_check(self, mock_premium_check):
|
||||
"""Non-empty string values for premium fields should trigger the premium check."""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"tags": ["production"],
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
mock_premium_check.assert_called()
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_non_empty_list_updates_metadata(self, mock_premium_check):
|
||||
"""Non-empty lists should be moved into metadata."""
|
||||
updated_kv = {
|
||||
"team_id": "test-team",
|
||||
"guardrails": ["my-guardrail"],
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
assert "guardrails" not in updated_kv
|
||||
assert updated_kv["metadata"]["guardrails"] == ["my-guardrail"]
|
||||
|
||||
@patch("litellm.proxy.management_endpoints.common_utils._premium_user_check")
|
||||
def test_ui_typical_payload_does_not_trigger_premium_check(self, mock_premium_check):
|
||||
"""
|
||||
Simulate the exact payload the UI sends when no enterprise features
|
||||
are configured. This must NOT trigger the premium check.
|
||||
"""
|
||||
# This is the payload structure the UI sends (from issue #20304)
|
||||
updated_kv = {
|
||||
"team_id": "67848772-1a8b-4343-938c-17e60f1db860",
|
||||
"team_alias": "example-team",
|
||||
"models": ["gpt-4"],
|
||||
"metadata": {
|
||||
"guardrails": [],
|
||||
"logging": [],
|
||||
},
|
||||
"policies": [],
|
||||
}
|
||||
_update_metadata_fields(updated_kv=updated_kv)
|
||||
mock_premium_check.assert_not_called()
|
||||
+161
@@ -229,3 +229,164 @@ def test_tool_call_arguments_are_chunked_to_match_openai_behavior():
|
||||
assert sequence_numbers == sorted(sequence_numbers)
|
||||
assert len(set(sequence_numbers)) == len(sequence_numbers) # All unique
|
||||
|
||||
|
||||
def test_tool_call_delta_without_id_uses_index_mapping():
|
||||
iterator = LiteLLMCompletionStreamingIterator(
|
||||
model="test-model",
|
||||
litellm_custom_stream_wrapper=AsyncMock(),
|
||||
request_input="Test input",
|
||||
responses_api_request={},
|
||||
)
|
||||
|
||||
chunks = [
|
||||
[
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_abc123",
|
||||
"type": "function",
|
||||
"function": {"name": "get_weather", "arguments": '{"lo'},
|
||||
}
|
||||
],
|
||||
[{"index": 0, "type": "function", "function": {"arguments": 'cation":'}}],
|
||||
[{"index": 0, "type": "function", "function": {"arguments": ' "New'}}],
|
||||
[{"index": 0, "type": "function", "function": {"arguments": ' York"}'}}],
|
||||
]
|
||||
|
||||
for tool_calls in chunks:
|
||||
iterator._queue_tool_call_delta_events(tool_calls)
|
||||
|
||||
all_events = []
|
||||
while iterator._pending_tool_events:
|
||||
all_events.append(iterator._pending_tool_events.pop(0))
|
||||
|
||||
delta_events = [
|
||||
evt
|
||||
for evt in all_events
|
||||
if evt.type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA
|
||||
]
|
||||
streamed_arguments = "".join(evt.delta for evt in delta_events)
|
||||
|
||||
assert streamed_arguments == '{"location": "New York"}'
|
||||
|
||||
output_item_added_events = [
|
||||
evt
|
||||
for evt in all_events
|
||||
if evt.type == ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED
|
||||
]
|
||||
assert len(output_item_added_events) == 1
|
||||
assert output_item_added_events[0].item.id == "call_abc123"
|
||||
|
||||
|
||||
def test_parallel_tool_calls_without_ids_use_index_mapping():
|
||||
iterator = LiteLLMCompletionStreamingIterator(
|
||||
model="test-model",
|
||||
litellm_custom_stream_wrapper=AsyncMock(),
|
||||
request_input="Test input",
|
||||
responses_api_request={},
|
||||
)
|
||||
|
||||
iterator._queue_tool_call_delta_events(
|
||||
[
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_a",
|
||||
"type": "function",
|
||||
"function": {"name": "tool_a", "arguments": '{"x":'},
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"id": "call_b",
|
||||
"type": "function",
|
||||
"function": {"name": "tool_b", "arguments": '{"y":'},
|
||||
},
|
||||
]
|
||||
)
|
||||
iterator._queue_tool_call_delta_events(
|
||||
[
|
||||
{"index": 0, "type": "function", "function": {"arguments": "1}"}},
|
||||
{"index": 1, "type": "function", "function": {"arguments": "2}"}},
|
||||
]
|
||||
)
|
||||
|
||||
all_events = []
|
||||
while iterator._pending_tool_events:
|
||||
all_events.append(iterator._pending_tool_events.pop(0))
|
||||
|
||||
output_item_added_events = [
|
||||
evt
|
||||
for evt in all_events
|
||||
if evt.type == ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED
|
||||
]
|
||||
assert len(output_item_added_events) == 2
|
||||
|
||||
delta_events = [
|
||||
evt
|
||||
for evt in all_events
|
||||
if evt.type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA
|
||||
]
|
||||
arguments_by_call_id = {}
|
||||
for evt in delta_events:
|
||||
arguments_by_call_id.setdefault(evt.item_id, "")
|
||||
arguments_by_call_id[evt.item_id] += evt.delta
|
||||
|
||||
assert arguments_by_call_id["call_a"] == '{"x":1}'
|
||||
assert arguments_by_call_id["call_b"] == '{"y":2}'
|
||||
|
||||
|
||||
def test_reused_index_with_new_call_id_marks_fallback_ambiguous():
|
||||
iterator = LiteLLMCompletionStreamingIterator(
|
||||
model="test-model",
|
||||
litellm_custom_stream_wrapper=AsyncMock(),
|
||||
request_input="Test input",
|
||||
responses_api_request={},
|
||||
)
|
||||
|
||||
iterator._queue_tool_call_delta_events(
|
||||
[
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_a",
|
||||
"type": "function",
|
||||
"function": {"name": "tool_a", "arguments": '{"a":'},
|
||||
}
|
||||
]
|
||||
)
|
||||
iterator._queue_tool_call_delta_events(
|
||||
[
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_b",
|
||||
"type": "function",
|
||||
"function": {"name": "tool_b", "arguments": '{"b":'},
|
||||
}
|
||||
]
|
||||
)
|
||||
# Ambiguous chunk: index reused and id missing. We should skip fallback rather than misroute.
|
||||
iterator._queue_tool_call_delta_events(
|
||||
[
|
||||
{
|
||||
"index": 0,
|
||||
"type": "function",
|
||||
"function": {"arguments": "1}"},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
all_events = []
|
||||
while iterator._pending_tool_events:
|
||||
all_events.append(iterator._pending_tool_events.pop(0))
|
||||
|
||||
delta_events = [
|
||||
evt
|
||||
for evt in all_events
|
||||
if evt.type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA
|
||||
]
|
||||
arguments_by_call_id = {}
|
||||
for evt in delta_events:
|
||||
arguments_by_call_id.setdefault(evt.item_id, "")
|
||||
arguments_by_call_id[evt.item_id] += evt.delta
|
||||
|
||||
assert arguments_by_call_id["call_a"] == '{"a":'
|
||||
assert arguments_by_call_id["call_b"] == '{"b":'
|
||||
assert arguments_by_call_id["call_a"] != '{"a":1}'
|
||||
assert arguments_by_call_id["call_b"] != '{"b":1}'
|
||||
|
||||
@@ -1869,3 +1869,124 @@ async def test_aguardrail():
|
||||
|
||||
assert result["result"] == "success"
|
||||
assert result["selected_guardrail"]["id"] == "guardrail-1"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_messages_call_type_is_cached():
|
||||
"""
|
||||
Regression test: Verify that anthropic_messages call type is allowed
|
||||
in PromptCachingDeploymentCheck.async_log_success_event.
|
||||
"""
|
||||
import asyncio
|
||||
from litellm.router_utils.pre_call_checks.prompt_caching_deployment_check import (
|
||||
PromptCachingDeploymentCheck,
|
||||
)
|
||||
from litellm.router_utils.prompt_caching_cache import PromptCachingCache
|
||||
from litellm.caching.dual_cache import DualCache
|
||||
from litellm.types.utils import CallTypes
|
||||
from litellm.types.utils import (
|
||||
StandardLoggingPayload,
|
||||
StandardLoggingModelInformation,
|
||||
StandardLoggingMetadata,
|
||||
StandardLoggingHiddenParams,
|
||||
)
|
||||
|
||||
# Create mock standard logging payload inline
|
||||
def create_standard_logging_payload() -> StandardLoggingPayload:
|
||||
return StandardLoggingPayload(
|
||||
id="test_id",
|
||||
call_type="completion",
|
||||
response_cost=0.1,
|
||||
response_cost_failure_debug_info=None,
|
||||
status="success",
|
||||
total_tokens=30,
|
||||
prompt_tokens=20,
|
||||
completion_tokens=10,
|
||||
startTime=1234567890.0,
|
||||
endTime=1234567891.0,
|
||||
completionStartTime=1234567890.5,
|
||||
model_map_information=StandardLoggingModelInformation(
|
||||
model_map_key="gpt-3.5-turbo", model_map_value=None
|
||||
),
|
||||
model="gpt-3.5-turbo",
|
||||
model_id="model-123",
|
||||
model_group="openai-gpt",
|
||||
api_base="https://api.openai.com",
|
||||
metadata=StandardLoggingMetadata(
|
||||
user_api_key_hash="test_hash",
|
||||
user_api_key_org_id=None,
|
||||
user_api_key_alias="test_alias",
|
||||
user_api_key_team_id="test_team",
|
||||
user_api_key_user_id="test_user",
|
||||
user_api_key_team_alias="test_team_alias",
|
||||
spend_logs_metadata=None,
|
||||
requester_ip_address="127.0.0.1",
|
||||
requester_metadata=None,
|
||||
),
|
||||
cache_hit=False,
|
||||
cache_key=None,
|
||||
saved_cache_cost=0.0,
|
||||
request_tags=[],
|
||||
end_user=None,
|
||||
requester_ip_address="127.0.0.1",
|
||||
messages=[{"role": "user", "content": "Hello, world!"}],
|
||||
response={"choices": [{"message": {"content": "Hi there!"}}]},
|
||||
error_str=None,
|
||||
model_parameters={"stream": True},
|
||||
hidden_params=StandardLoggingHiddenParams(
|
||||
model_id="model-123",
|
||||
cache_key=None,
|
||||
api_base="https://api.openai.com",
|
||||
response_cost="0.1",
|
||||
additional_headers=None,
|
||||
),
|
||||
)
|
||||
|
||||
cache = DualCache()
|
||||
deployment_check = PromptCachingDeploymentCheck(cache=cache)
|
||||
prompt_cache = PromptCachingCache(cache=cache)
|
||||
|
||||
# Create messages with enough tokens to pass the caching threshold
|
||||
test_messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "test long message here" * 1024,
|
||||
"cache_control": {
|
||||
"type": "ephemeral",
|
||||
"ttl": "5m"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
test_model_id = "test-model-id-123"
|
||||
|
||||
# Create a payload with anthropic_messages call type
|
||||
payload = create_standard_logging_payload()
|
||||
payload["call_type"] = CallTypes.anthropic_messages.value
|
||||
payload["messages"] = test_messages
|
||||
payload["model"] = "anthropic/claude-3-5-sonnet-20240620"
|
||||
payload["model_id"] = test_model_id
|
||||
|
||||
# Log the success event (should cache the model_id)
|
||||
await deployment_check.async_log_success_event(
|
||||
kwargs={"standard_logging_object": payload},
|
||||
response_obj={},
|
||||
start_time=1234567890.0,
|
||||
end_time=1234567891.0,
|
||||
)
|
||||
|
||||
# Small delay to ensure cache write completes
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Verify that the model_id was actually cached
|
||||
cached_result = await prompt_cache.async_get_model_id(
|
||||
messages=test_messages,
|
||||
tools=None,
|
||||
)
|
||||
|
||||
# This assertion will FAIL if anthropic_messages is filtered out
|
||||
assert cached_result is not None, "Model ID should be cached for anthropic_messages call type"
|
||||
assert cached_result["model_id"] == test_model_id, f"Expected {test_model_id}, got {cached_result['model_id']}"
|
||||
|
||||
@@ -916,6 +916,181 @@ def test_encode_video_id_with_provider_handles_azure_video_prefix():
|
||||
)
|
||||
assert encoded_twice == encoded_id # Should return the same encoded ID
|
||||
|
||||
class TestVideoListTransformation:
|
||||
"""Tests for video list request/response transformation with provider ID encoding."""
|
||||
|
||||
def test_transform_video_list_response_encodes_first_id_and_last_id(self):
|
||||
"""Verify that first_id and last_id are encoded with provider metadata."""
|
||||
config = OpenAIVideoConfig()
|
||||
|
||||
mock_http_response = MagicMock()
|
||||
mock_http_response.json.return_value = {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": "video_aaa",
|
||||
"object": "video",
|
||||
"model": "sora-2",
|
||||
"status": "completed",
|
||||
},
|
||||
{
|
||||
"id": "video_bbb",
|
||||
"object": "video",
|
||||
"model": "sora-2",
|
||||
"status": "completed",
|
||||
},
|
||||
],
|
||||
"first_id": "video_aaa",
|
||||
"last_id": "video_bbb",
|
||||
"has_more": False,
|
||||
}
|
||||
|
||||
result = config.transform_video_list_response(
|
||||
raw_response=mock_http_response,
|
||||
logging_obj=MagicMock(),
|
||||
custom_llm_provider="azure",
|
||||
)
|
||||
|
||||
from litellm.types.videos.utils import decode_video_id_with_provider
|
||||
|
||||
# data[].id should be encoded
|
||||
for item in result["data"]:
|
||||
decoded = decode_video_id_with_provider(item["id"])
|
||||
assert decoded["custom_llm_provider"] == "azure"
|
||||
|
||||
# first_id and last_id should also be encoded
|
||||
first_decoded = decode_video_id_with_provider(result["first_id"])
|
||||
assert first_decoded["custom_llm_provider"] == "azure"
|
||||
assert first_decoded["video_id"] == "video_aaa"
|
||||
assert first_decoded["model_id"] == "sora-2"
|
||||
|
||||
last_decoded = decode_video_id_with_provider(result["last_id"])
|
||||
assert last_decoded["custom_llm_provider"] == "azure"
|
||||
assert last_decoded["video_id"] == "video_bbb"
|
||||
assert last_decoded["model_id"] == "sora-2"
|
||||
|
||||
def test_transform_video_list_response_no_provider_leaves_ids_unchanged(self):
|
||||
"""When custom_llm_provider is None, all IDs should remain unchanged."""
|
||||
config = OpenAIVideoConfig()
|
||||
|
||||
mock_http_response = MagicMock()
|
||||
mock_http_response.json.return_value = {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "video_aaa", "object": "video", "model": "sora-2", "status": "completed"},
|
||||
],
|
||||
"first_id": "video_aaa",
|
||||
"last_id": "video_aaa",
|
||||
"has_more": False,
|
||||
}
|
||||
|
||||
result = config.transform_video_list_response(
|
||||
raw_response=mock_http_response,
|
||||
logging_obj=MagicMock(),
|
||||
custom_llm_provider=None,
|
||||
)
|
||||
|
||||
assert result["data"][0]["id"] == "video_aaa"
|
||||
assert result["first_id"] == "video_aaa"
|
||||
assert result["last_id"] == "video_aaa"
|
||||
|
||||
def test_transform_video_list_response_missing_pagination_fields(self):
|
||||
"""first_id / last_id may be absent or null; should not raise."""
|
||||
config = OpenAIVideoConfig()
|
||||
|
||||
mock_http_response = MagicMock()
|
||||
mock_http_response.json.return_value = {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "video_aaa", "object": "video", "model": "sora-2", "status": "completed"},
|
||||
],
|
||||
"has_more": False,
|
||||
}
|
||||
|
||||
result = config.transform_video_list_response(
|
||||
raw_response=mock_http_response,
|
||||
logging_obj=MagicMock(),
|
||||
custom_llm_provider="azure",
|
||||
)
|
||||
|
||||
# data[].id should still be encoded
|
||||
from litellm.types.videos.utils import decode_video_id_with_provider
|
||||
|
||||
decoded = decode_video_id_with_provider(result["data"][0]["id"])
|
||||
assert decoded["custom_llm_provider"] == "azure"
|
||||
|
||||
# first_id / last_id should not be present
|
||||
assert "first_id" not in result
|
||||
assert "last_id" not in result
|
||||
|
||||
def test_transform_video_list_request_decodes_after_parameter(self):
|
||||
"""Encoded 'after' cursor should be decoded back to the raw provider ID."""
|
||||
from litellm.types.videos.utils import encode_video_id_with_provider
|
||||
|
||||
config = OpenAIVideoConfig()
|
||||
|
||||
raw_id = "video_69888baee890819086dd3366bfc372fe"
|
||||
encoded_id = encode_video_id_with_provider(raw_id, "azure", "sora-2")
|
||||
|
||||
url, params = config.transform_video_list_request(
|
||||
api_base="https://my-resource.openai.azure.com/openai/v1/videos",
|
||||
litellm_params=MagicMock(),
|
||||
headers={},
|
||||
after=encoded_id,
|
||||
limit=10,
|
||||
)
|
||||
|
||||
assert params["after"] == raw_id
|
||||
assert params["limit"] == "10"
|
||||
|
||||
def test_transform_video_list_request_passes_through_plain_after(self):
|
||||
"""A plain (non-encoded) 'after' value should pass through unchanged."""
|
||||
config = OpenAIVideoConfig()
|
||||
|
||||
url, params = config.transform_video_list_request(
|
||||
api_base="https://api.openai.com/v1/videos",
|
||||
litellm_params=MagicMock(),
|
||||
headers={},
|
||||
after="video_plain_id",
|
||||
)
|
||||
|
||||
assert params["after"] == "video_plain_id"
|
||||
|
||||
def test_transform_video_list_roundtrip(self):
|
||||
"""first_id from list response should decode correctly when used as after parameter."""
|
||||
config = OpenAIVideoConfig()
|
||||
|
||||
# Simulate a list response
|
||||
mock_http_response = MagicMock()
|
||||
mock_http_response.json.return_value = {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "video_aaa", "object": "video", "model": "sora-2", "status": "completed"},
|
||||
{"id": "video_bbb", "object": "video", "model": "sora-2", "status": "completed"},
|
||||
],
|
||||
"first_id": "video_aaa",
|
||||
"last_id": "video_bbb",
|
||||
"has_more": True,
|
||||
}
|
||||
|
||||
list_result = config.transform_video_list_response(
|
||||
raw_response=mock_http_response,
|
||||
logging_obj=MagicMock(),
|
||||
custom_llm_provider="azure",
|
||||
)
|
||||
|
||||
# Use the encoded last_id as the 'after' cursor for the next page
|
||||
_, params = config.transform_video_list_request(
|
||||
api_base="https://my-resource.openai.azure.com/openai/v1/videos",
|
||||
litellm_params=MagicMock(),
|
||||
headers={},
|
||||
after=list_result["last_id"],
|
||||
)
|
||||
|
||||
# The after param sent to the upstream API should be the raw video ID
|
||||
assert params["after"] == "video_bbb"
|
||||
|
||||
|
||||
class TestVideoEndpointsProxyLitellmParams:
|
||||
"""Test that video proxy endpoints (status, content, remix) respect litellm_params from proxy config."""
|
||||
|
||||
|
||||
@@ -84,6 +84,8 @@
|
||||
"mermaid": ">=11.10.0",
|
||||
"js-yaml": ">=4.1.1",
|
||||
"glob": ">=11.1.0",
|
||||
"tar": ">=7.5.7",
|
||||
"@isaacs/brace-expansion": ">=5.0.1",
|
||||
"node-forge": ">=1.3.2",
|
||||
"lodash-es": ">=4.17.23",
|
||||
"lodash": ">=4.17.23"
|
||||
|
||||
@@ -542,3 +542,86 @@ it("should display 'Default Proxy Admin' for created_by when value is 'default_u
|
||||
expect(defaultProxyAdminElements.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
it("should render table without crashing when models is null", async () => {
|
||||
const keyWithNullModels = {
|
||||
...mockKey,
|
||||
models: null as unknown as string[],
|
||||
};
|
||||
|
||||
mockUseFilterLogic.mockReturnValue({
|
||||
filters: {
|
||||
"Team ID": "",
|
||||
"Organization ID": "",
|
||||
"Key Alias": "",
|
||||
"User ID": "",
|
||||
"Sort By": "created_at",
|
||||
"Sort Order": "desc",
|
||||
},
|
||||
filteredKeys: [keyWithNullModels],
|
||||
allKeyAliases: ["test-key-alias"],
|
||||
allTeams: [mockTeam],
|
||||
allOrganizations: [mockOrganization],
|
||||
handleFilterChange: vi.fn(),
|
||||
handleFilterReset: vi.fn(),
|
||||
});
|
||||
|
||||
const mockProps = {
|
||||
teams: [mockTeam],
|
||||
organizations: [mockOrganization],
|
||||
onSortChange: vi.fn(),
|
||||
currentSort: {
|
||||
sortBy: "created_at",
|
||||
sortOrder: "desc" as const,
|
||||
},
|
||||
};
|
||||
|
||||
// This should not throw an error
|
||||
renderWithProviders(<VirtualKeysTable {...mockProps} />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("Test Key Alias")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
it("should render table without crashing when models is undefined", async () => {
|
||||
const keyWithUndefinedModels = {
|
||||
...mockKey,
|
||||
models: undefined as unknown as string[],
|
||||
};
|
||||
|
||||
mockUseFilterLogic.mockReturnValue({
|
||||
filters: {
|
||||
"Team ID": "",
|
||||
"Organization ID": "",
|
||||
"Key Alias": "",
|
||||
"User ID": "",
|
||||
"Sort By": "created_at",
|
||||
"Sort Order": "desc",
|
||||
},
|
||||
filteredKeys: [keyWithUndefinedModels],
|
||||
allKeyAliases: ["test-key-alias"],
|
||||
allTeams: [mockTeam],
|
||||
allOrganizations: [mockOrganization],
|
||||
handleFilterChange: vi.fn(),
|
||||
handleFilterReset: vi.fn(),
|
||||
});
|
||||
|
||||
const mockProps = {
|
||||
teams: [mockTeam],
|
||||
organizations: [mockOrganization],
|
||||
onSortChange: vi.fn(),
|
||||
currentSort: {
|
||||
sortBy: "created_at",
|
||||
sortOrder: "desc" as const,
|
||||
},
|
||||
};
|
||||
|
||||
// This should not throw an error
|
||||
renderWithProviders(<VirtualKeysTable {...mockProps} />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("Test Key Alias")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -727,7 +727,7 @@ export function VirtualKeysTable({ teams, organizations, onSortChange, currentSo
|
||||
whiteSpace: "pre-wrap",
|
||||
overflow: "hidden",
|
||||
}}
|
||||
className={`py-0.5 max-h-8 overflow-hidden text-ellipsis whitespace-nowrap ${cell.column.id === "models" && (cell.getValue() as string[]).length > 3 ? "px-0" : ""}`}
|
||||
className={`py-0.5 max-h-8 overflow-hidden text-ellipsis whitespace-nowrap ${cell.column.id === "models" && Array.isArray(cell.getValue()) && (cell.getValue() as string[]).length > 3 ? "px-0" : ""}`}
|
||||
>
|
||||
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
||||
</TableCell>
|
||||
|
||||
@@ -465,8 +465,8 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
|
||||
budget_duration: values.budget_duration,
|
||||
metadata: {
|
||||
...parsedMetadata,
|
||||
guardrails: values.guardrails || [],
|
||||
logging: values.logging_settings || [],
|
||||
...(values.guardrails?.length > 0 ? { guardrails: values.guardrails } : {}),
|
||||
...(values.logging_settings?.length > 0 ? { logging: values.logging_settings } : {}),
|
||||
disable_global_guardrails: values.disable_global_guardrails || false,
|
||||
soft_budget_alerting_emails:
|
||||
typeof values.soft_budget_alerting_emails === "string"
|
||||
@@ -477,7 +477,7 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
|
||||
: values.soft_budget_alerting_emails || [],
|
||||
...(secretManagerSettings !== undefined ? { secret_manager_settings: secretManagerSettings } : {}),
|
||||
},
|
||||
policies: values.policies || [],
|
||||
...(values.policies?.length > 0 ? { policies: values.policies } : {}),
|
||||
organization_id: values.organization_id,
|
||||
};
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@ export function LogDetailsDrawer({
|
||||
// Check if request/response data is present
|
||||
const hasMessages = checkHasMessages(logEntry.messages);
|
||||
const hasResponse = checkHasResponse(logEntry.response);
|
||||
const missingData = !hasMessages && !hasResponse;
|
||||
const missingData = !hasMessages && !hasResponse && !hasError;
|
||||
|
||||
// Guardrail data
|
||||
const guardrailInfo = metadata?.guardrail_information;
|
||||
@@ -206,6 +206,7 @@ export function LogDetailsDrawer({
|
||||
{/* Request/Response JSON - Collapsible */}
|
||||
<RequestResponseSection
|
||||
hasResponse={hasResponse}
|
||||
hasError={hasError}
|
||||
getRawRequest={getRawRequest}
|
||||
getFormattedResponse={getFormattedResponse}
|
||||
logEntry={logEntry}
|
||||
@@ -339,6 +340,7 @@ function MetricsSection({ logEntry, metadata }: { logEntry: LogEntry; metadata:
|
||||
|
||||
interface RequestResponseSectionProps {
|
||||
hasResponse: boolean;
|
||||
hasError: boolean;
|
||||
getRawRequest: () => any;
|
||||
getFormattedResponse: () => any;
|
||||
logEntry: LogEntry;
|
||||
@@ -346,6 +348,7 @@ interface RequestResponseSectionProps {
|
||||
|
||||
function RequestResponseSection({
|
||||
hasResponse,
|
||||
hasError,
|
||||
getRawRequest,
|
||||
getFormattedResponse,
|
||||
logEntry,
|
||||
@@ -423,7 +426,7 @@ function RequestResponseSection({
|
||||
text: getCopyText(),
|
||||
tooltips: ["Copy JSON", "Copied!"]
|
||||
}}
|
||||
disabled={activeTab === TAB_RESPONSE && !hasResponse}
|
||||
disabled={activeTab === TAB_RESPONSE && !hasResponse && !hasError}
|
||||
/>
|
||||
}
|
||||
items={[
|
||||
@@ -441,7 +444,7 @@ function RequestResponseSection({
|
||||
label: "Response",
|
||||
children: (
|
||||
<div style={{ paddingTop: SPACING_XLARGE, paddingBottom: SPACING_XLARGE }}>
|
||||
{hasResponse ? (
|
||||
{hasResponse || hasError ? (
|
||||
<JsonViewer data={getFormattedResponse()} mode="formatted" />
|
||||
) : (
|
||||
<div style={{ textAlign: "center", padding: 20, color: "#999", fontStyle: "italic" }}>
|
||||
|
||||
@@ -188,4 +188,78 @@ describe("RequestResponsePanel", () => {
|
||||
expect(responseData).toEqual({ responseData: "this should appear in response" });
|
||||
expect(responseData).not.toEqual({ requestData: "this should not appear in response" });
|
||||
});
|
||||
|
||||
it("should show error response data when hasError is true and hasResponse is false", () => {
|
||||
const failedLogEntry: LogEntry = {
|
||||
...baseLogEntry,
|
||||
messages: [],
|
||||
response: {},
|
||||
metadata: {
|
||||
status: "failure",
|
||||
error_information: {
|
||||
error_message: "Model not found",
|
||||
error_class: "NotFoundError",
|
||||
error_code: 404,
|
||||
},
|
||||
additional_usage_values: {
|
||||
cache_read_input_tokens: 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
},
|
||||
},
|
||||
};
|
||||
const errorResponse = { error: { message: "Model not found", type: "NotFoundError", code: 404, param: null } };
|
||||
const mockGetRawRequest = vi.fn().mockReturnValue({ messages: [] });
|
||||
const mockFormattedResponse = vi.fn().mockReturnValue(errorResponse);
|
||||
render(
|
||||
<RequestResponsePanel
|
||||
row={{ original: failedLogEntry }}
|
||||
hasMessages={false}
|
||||
hasResponse={false}
|
||||
hasError={true}
|
||||
errorInfo={failedLogEntry.metadata.error_information}
|
||||
getRawRequest={mockGetRawRequest}
|
||||
formattedResponse={mockFormattedResponse}
|
||||
/>,
|
||||
);
|
||||
expect(screen.queryByText("Response data not available")).not.toBeInTheDocument();
|
||||
expect(mockFormattedResponse).toHaveBeenCalled();
|
||||
const copyButtons = screen.getAllByRole("button");
|
||||
const copyResponseButton = copyButtons.find((button) => button.getAttribute("title") === "Copy response");
|
||||
expect(copyResponseButton).not.toBeDisabled();
|
||||
});
|
||||
|
||||
it("should show Response data not available when hasResponse and hasError are both false", () => {
|
||||
const mockGetRawRequest = vi.fn().mockReturnValue({ messages: [] });
|
||||
const mockFormattedResponse = vi.fn().mockReturnValue({});
|
||||
render(
|
||||
<RequestResponsePanel
|
||||
row={{ original: baseLogEntry }}
|
||||
hasMessages={false}
|
||||
hasResponse={false}
|
||||
hasError={false}
|
||||
errorInfo={null}
|
||||
getRawRequest={mockGetRawRequest}
|
||||
formattedResponse={mockFormattedResponse}
|
||||
/>,
|
||||
);
|
||||
expect(screen.getByText("Response data not available")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should show error code in response header when hasError is true", () => {
|
||||
const errorInfo = { error_message: "Rate limit exceeded", error_class: "RateLimitError", error_code: 429 };
|
||||
const mockGetRawRequest = vi.fn().mockReturnValue({ messages: [] });
|
||||
const mockFormattedResponse = vi.fn().mockReturnValue({ error: { message: "Rate limit exceeded", type: "RateLimitError", code: 429, param: null } });
|
||||
render(
|
||||
<RequestResponsePanel
|
||||
row={{ original: baseLogEntry }}
|
||||
hasMessages={false}
|
||||
hasResponse={false}
|
||||
hasError={true}
|
||||
errorInfo={errorInfo}
|
||||
getRawRequest={mockGetRawRequest}
|
||||
formattedResponse={mockFormattedResponse}
|
||||
/>,
|
||||
);
|
||||
expect(screen.getByText(/HTTP code 429/)).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -113,7 +113,7 @@ export function RequestResponsePanel({
|
||||
onClick={handleCopyResponse}
|
||||
className="p-1 hover:bg-gray-200 rounded"
|
||||
title="Copy response"
|
||||
disabled={!hasResponse}
|
||||
disabled={!hasResponse && !hasError}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
@@ -132,7 +132,7 @@ export function RequestResponsePanel({
|
||||
</button>
|
||||
</div>
|
||||
<div className="p-4 overflow-auto max-h-96 w-full max-w-full box-border">
|
||||
{hasResponse ? (
|
||||
{hasResponse || hasError ? (
|
||||
<div className="[&_[role='tree']]:bg-white [&_[role='tree']]:text-slate-900">
|
||||
<JsonView data={formattedResponse()} style={defaultStyles} clickToExpandNode />
|
||||
</div>
|
||||
|
||||
@@ -822,7 +822,7 @@ export function RequestViewer({ row, onOpenSettings }: { row: Row<LogEntry>; onO
|
||||
? row.original.messages.length > 0
|
||||
: Object.keys(row.original.messages).length > 0);
|
||||
const hasResponse = row.original.response && Object.keys(formatData(row.original.response)).length > 0;
|
||||
const missingData = !hasMessages && !hasResponse;
|
||||
const missingData = !hasMessages && !hasResponse && !hasError;
|
||||
|
||||
// Format the response with error details if present
|
||||
const formattedResponse = () => {
|
||||
|
||||
Reference in New Issue
Block a user