Merge remote-tracking branch 'origin' into litellm_org_usage

This commit is contained in:
yuneng-jiang
2025-11-15 16:28:12 -08:00
328 changed files with 13124 additions and 4002 deletions
+374 -57
View File
@@ -24,6 +24,39 @@ commands:
cd enterprise
python -m pip install -e .
cd ..
setup_litellm_test_deps:
steps:
- checkout
- setup_google_dns
- restore_cache:
keys:
- v2-litellm-deps-{{ checksum "requirements.txt" }}-{{ checksum ".circleci/config.yml" }}
- v2-litellm-deps-
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
pip install "pytest-mock==3.12.0"
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-cov==5.0.0"
pip install "pytest-asyncio==0.21.1"
pip install "respx==0.22.0"
pip install "hypercorn==0.17.3"
pip install "pydantic==2.10.2"
pip install "mcp==1.10.1"
pip install "requests-mock>=1.12.1"
pip install "responses==0.25.7"
pip install "pytest-xdist==3.6.1"
pip install "pytest-timeout==2.2.0"
pip install "semantic_router==0.1.10"
pip install "fastapi-offline==1.7.3"
- setup_litellm_enterprise_pip
- save_cache:
paths:
- ~/.cache/pip
key: v2-litellm-deps-{{ checksum "requirements.txt" }}-{{ checksum ".circleci/config.yml" }}
jobs:
# Add Windows testing job
@@ -668,13 +701,16 @@ jobs:
paths:
- litellm_security_tests_coverage.xml
- litellm_security_tests_coverage
litellm_proxy_unit_testing: # Runs all tests with the "proxy", "key", "jwt" filenames
# Split proxy unit tests into 3 jobs for faster execution and better debugging
# test_key_generate_prisma runs separately without parallel execution to avoid event loop issues with logging worker
litellm_proxy_unit_testing_key_generation:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
resource_class: large
steps:
- checkout
- setup_google_dns
@@ -699,6 +735,114 @@ jobs:
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
pip install "pytest-cov==5.0.0"
pip install "pytest-timeout==2.2.0"
pip install "pytest-forked==1.6.0"
pip install "mypy==1.18.2"
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install "google-genai==1.22.0"
pip install pyarrow
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
pip install "langfuse==2.59.7"
pip install "logfire==0.29.0"
pip install numpydoc
pip install traceloop-sdk==0.21.1
pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.100.1
pip install prisma==0.11.0
pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1"
pip install "respx==0.22.0"
pip install fastapi
pip install "gunicorn==21.2.0"
pip install "anyio==4.2.0"
pip install "aiodynamo==23.10.1"
pip install "asyncio==3.4.3"
pip install "apscheduler==3.10.4"
pip install "PyGithub==1.59.1"
pip install argon2-cffi
pip install "pytest-mock==3.12.0"
pip install python-multipart
pip install google-cloud-aiplatform
pip install prometheus-client==0.20.0
pip install "pydantic==2.10.2"
pip install "diskcache==5.6.1"
pip install "Pillow==10.3.0"
pip install "jsonschema==4.22.0"
pip install "pytest-postgresql==7.0.1"
pip install "fakeredis==2.28.1"
- setup_litellm_enterprise_pip
- save_cache:
paths:
- ./venv
key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Run prisma ./docker/entrypoint.sh
command: |
set +e
chmod +x docker/entrypoint.sh
./docker/entrypoint.sh
set -e
- run:
name: Run key generation tests (no parallel execution to avoid event loop issues)
command: |
pwd
ls
# Run without -n flag to avoid pytest-xdist event loop conflicts with logging worker
python -m pytest tests/proxy_unit_tests/test_key_generate_prisma.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-key-generation.xml --durations=10 --timeout=300 -vv --log-cli-level=INFO
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml litellm_proxy_unit_tests_key_generation_coverage.xml
mv .coverage litellm_proxy_unit_tests_key_generation_coverage
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- litellm_proxy_unit_tests_key_generation_coverage.xml
- litellm_proxy_unit_tests_key_generation_coverage
litellm_proxy_unit_testing_part1:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
resource_class: large
steps:
- checkout
- setup_google_dns
- run:
name: Show git commit hash
command: |
echo "Git commit hash: $CIRCLE_SHA1"
- run:
name: Install PostgreSQL
command: |
sudo apt-get update
sudo apt-get install -y postgresql-14 postgresql-contrib-14
- restore_cache:
keys:
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r .circleci/requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
pip install "pytest-cov==5.0.0"
pip install "pytest-timeout==2.2.0"
pip install "pytest-forked==1.6.0"
pip install "mypy==1.18.2"
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
@@ -752,28 +896,132 @@ jobs:
chmod +x docker/entrypoint.sh
./docker/entrypoint.sh
set -e
# Run pytest and generate JUnit XML report
- run:
name: Run tests
name: Run proxy unit tests (part 1 - auth checks only, key generation in separate job)
command: |
pwd
ls
python -m pytest tests/proxy_unit_tests --cov=litellm --cov-report=xml -vv -x -v --junitxml=test-results/junit.xml --durations=5 -n 4
# Run auth tests with parallel execution (test_key_generate_prisma moved to separate job to avoid event loop issues)
python -m pytest tests/proxy_unit_tests/test_auth_checks.py tests/proxy_unit_tests/test_user_api_key_auth.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-part1.xml --durations=10 -n 8 --timeout=300 -vv --log-cli-level=INFO
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml litellm_proxy_unit_tests_coverage.xml
mv .coverage litellm_proxy_unit_tests_coverage
# Store test results
mv coverage.xml litellm_proxy_unit_tests_part1_coverage.xml
mv .coverage litellm_proxy_unit_tests_part1_coverage
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- litellm_proxy_unit_tests_coverage.xml
- litellm_proxy_unit_tests_coverage
- litellm_proxy_unit_tests_part1_coverage.xml
- litellm_proxy_unit_tests_part1_coverage
litellm_proxy_unit_testing_part2:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
resource_class: large
steps:
- checkout
- setup_google_dns
- run:
name: Show git commit hash
command: |
echo "Git commit hash: $CIRCLE_SHA1"
- run:
name: Install PostgreSQL
command: |
sudo apt-get update
sudo apt-get install -y postgresql-14 postgresql-contrib-14
- restore_cache:
keys:
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r .circleci/requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
pip install "pytest-cov==5.0.0"
pip install "pytest-timeout==2.2.0"
pip install "pytest-forked==1.6.0"
pip install "mypy==1.18.2"
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install "google-genai==1.22.0"
pip install pyarrow
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
pip install "langfuse==2.59.7"
pip install "logfire==0.29.0"
pip install numpydoc
pip install traceloop-sdk==0.21.1
pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.100.1
pip install prisma==0.11.0
pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1"
pip install "respx==0.22.0"
pip install fastapi
pip install "gunicorn==21.2.0"
pip install "anyio==4.2.0"
pip install "aiodynamo==23.10.1"
pip install "asyncio==3.4.3"
pip install "apscheduler==3.10.4"
pip install "PyGithub==1.59.1"
pip install argon2-cffi
pip install "pytest-mock==3.12.0"
pip install python-multipart
pip install google-cloud-aiplatform
pip install prometheus-client==0.20.0
pip install "pydantic==2.10.2"
pip install "diskcache==5.6.1"
pip install "Pillow==10.3.0"
pip install "jsonschema==4.22.0"
pip install "pytest-postgresql==7.0.1"
pip install "fakeredis==2.28.1"
pip install "pytest-xdist==3.6.1"
- setup_litellm_enterprise_pip
- save_cache:
paths:
- ./venv
key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Run prisma ./docker/entrypoint.sh
command: |
set +e
chmod +x docker/entrypoint.sh
./docker/entrypoint.sh
set -e
- run:
name: Run proxy unit tests (part 2 - remaining tests)
command: |
pwd
ls
python -m pytest tests/proxy_unit_tests --ignore=tests/proxy_unit_tests/test_key_generate_prisma.py --ignore=tests/proxy_unit_tests/test_auth_checks.py --ignore=tests/proxy_unit_tests/test_user_api_key_auth.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-part2.xml --durations=10 -n 8 --timeout=300 -vv --log-cli-level=INFO
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml litellm_proxy_unit_tests_part2_coverage.xml
mv .coverage litellm_proxy_unit_tests_part2_coverage
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- litellm_proxy_unit_tests_part2_coverage.xml
- litellm_proxy_unit_tests_part2_coverage
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
docker:
- image: cimg/python:3.13.1
@@ -1128,59 +1376,88 @@ jobs:
paths:
- search_coverage.xml
- search_coverage
litellm_mapped_tests:
# Split litellm_mapped_tests into 3 parallel jobs for 3x faster execution
litellm_mapped_tests_proxy:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
resource_class: xlarge
steps:
- checkout
- setup_google_dns
- setup_litellm_test_deps
- run:
name: Install Dependencies
name: Run proxy tests
command: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
pip install "pytest-mock==3.12.0"
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-cov==5.0.0"
pip install "pytest-asyncio==0.21.1"
pip install "respx==0.22.0"
pip install "hypercorn==0.17.3"
pip install "pydantic==2.10.2"
pip install "mcp==1.10.1"
pip install "requests-mock>=1.12.1"
pip install "responses==0.25.7"
pip install "pytest-xdist==3.6.1"
pip install "semantic_router==0.1.10"
pip install "fastapi-offline==1.7.3"
- setup_litellm_enterprise_pip
# Run pytest and generate JUnit XML report
- run:
name: Run litellm tests
command: |
pwd
ls
python -m pytest -vv tests/test_litellm --cov=litellm --cov-report=xml -v --junitxml=test-results/junit-litellm.xml --durations=10 -n 8
python -m pytest tests/test_litellm/proxy --cov=litellm --cov-report=xml --junitxml=test-results/junit-proxy.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml litellm_mapped_tests_coverage.xml
mv .coverage litellm_mapped_tests_coverage
# Store test results
mv coverage.xml litellm_proxy_tests_coverage.xml
mv .coverage litellm_proxy_tests_coverage
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- litellm_mapped_tests_coverage.xml
- litellm_mapped_tests_coverage
- litellm_proxy_tests_coverage.xml
- litellm_proxy_tests_coverage
litellm_mapped_tests_llms:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
resource_class: xlarge
steps:
- setup_litellm_test_deps
- run:
name: Run LLM provider tests
command: |
python -m pytest tests/test_litellm/llms --cov=litellm --cov-report=xml --junitxml=test-results/junit-llms.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml litellm_llms_tests_coverage.xml
mv .coverage litellm_llms_tests_coverage
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- litellm_llms_tests_coverage.xml
- litellm_llms_tests_coverage
litellm_mapped_tests_core:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
resource_class: xlarge
steps:
- setup_litellm_test_deps
- run:
name: Run core tests
command: |
python -m pytest tests/test_litellm --ignore=tests/test_litellm/proxy --ignore=tests/test_litellm/llms --cov=litellm --cov-report=xml --junitxml=test-results/junit-core.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml litellm_core_tests_coverage.xml
mv .coverage litellm_core_tests_coverage
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- litellm_core_tests_coverage.xml
- litellm_core_tests_coverage
litellm_mapped_enterprise_tests:
docker:
- image: cimg/python:3.11
@@ -1447,7 +1724,7 @@ jobs:
command: |
pwd
ls
python -m pytest -vv tests/logging_callback_tests --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
python -m pytest -vv tests/logging_callback_tests --cov=litellm --cov-report=xml -s -v --junitxml=test-results/junit.xml --durations=5
no_output_timeout: 120m
- run:
name: Rename the coverage files
@@ -1914,14 +2191,14 @@ jobs:
sudo usermod -aG docker $USER
docker version
- run:
name: Install Python 3.9
name: Install Python 3.10
command: |
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda init bash
source ~/.bashrc
conda create -n myenv python=3.9 -y
conda create -n myenv python=3.10 -y
conda activate myenv
python --version
- run:
@@ -2695,19 +2972,22 @@ jobs:
sudo usermod -aG docker $USER
docker version
- run:
name: Install Python 3.9
name: Install Python 3.10
command: |
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda init bash
source ~/.bashrc
conda create -n myenv python=3.9 -y
conda create -n myenv python=3.10 -y
conda activate myenv
python --version
- run:
name: Install Dependencies
command: |
export PATH="$HOME/miniconda/bin:$PATH"
source $HOME/miniconda/etc/profile.d/conda.sh
conda activate myenv
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
@@ -2736,6 +3016,8 @@ jobs:
pip install "langchain_mcp_adapters==0.0.5"
pip install "langchain_openai==0.2.1"
pip install "langgraph==0.3.18"
pip install "fastuuid==0.13.5"
pip install -r requirements.txt
- run:
name: Install dockerize
command: |
@@ -2848,6 +3130,9 @@ jobs:
- run:
name: Run tests
command: |
export PATH="$HOME/miniconda/bin:$PATH"
source $HOME/miniconda/etc/profile.d/conda.sh
conda activate myenv
pwd
ls
python -m pytest -vv tests/pass_through_tests/ -x --junitxml=test-results/junit.xml --durations=5
@@ -2878,7 +3163,7 @@ jobs:
python -m venv venv
. venv/bin/activate
pip install coverage
coverage combine llm_translation_coverage llm_responses_api_coverage ocr_coverage search_coverage mcp_coverage logging_coverage audio_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_security_tests_coverage guardrails_coverage
coverage combine llm_translation_coverage llm_responses_api_coverage ocr_coverage search_coverage mcp_coverage logging_coverage audio_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_part1_coverage litellm_proxy_unit_tests_part2_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_security_tests_coverage guardrails_coverage litellm_mapped_tests_coverage
coverage xml
- codecov/upload:
file: ./coverage.xml
@@ -3300,7 +3585,19 @@ workflows:
only:
- main
- /litellm_.*/
- litellm_proxy_unit_testing:
- litellm_proxy_unit_testing_key_generation:
filters:
branches:
only:
- main
- /litellm_.*/
- litellm_proxy_unit_testing_part1:
filters:
branches:
only:
- main
- /litellm_.*/
- litellm_proxy_unit_testing_part2:
filters:
branches:
only:
@@ -3444,7 +3741,19 @@ workflows:
only:
- main
- /litellm_.*/
- litellm_mapped_tests:
- litellm_mapped_tests_proxy:
filters:
branches:
only:
- main
- /litellm_.*/
- litellm_mapped_tests_llms:
filters:
branches:
only:
- main
- /litellm_.*/
- litellm_mapped_tests_core:
filters:
branches:
only:
@@ -3495,7 +3804,9 @@ workflows:
- llm_responses_api_testing
- ocr_testing
- search_testing
- litellm_mapped_tests
- litellm_mapped_tests_proxy
- litellm_mapped_tests_llms
- litellm_mapped_tests_core
- litellm_mapped_enterprise_tests
- batches_testing
- litellm_utils_testing
@@ -3506,7 +3817,9 @@ workflows:
- litellm_router_testing
- litellm_router_unit_testing
- caching_unit_tests
- litellm_proxy_unit_testing
- litellm_proxy_unit_testing_key_generation
- litellm_proxy_unit_testing_part1
- litellm_proxy_unit_testing_part2
- litellm_security_tests
- langfuse_logging_unit_tests
- local_testing
@@ -3560,7 +3873,9 @@ workflows:
- llm_responses_api_testing
- ocr_testing
- search_testing
- litellm_mapped_tests
- litellm_mapped_tests_proxy
- litellm_mapped_tests_llms
- litellm_mapped_tests_core
- litellm_mapped_enterprise_tests
- batches_testing
- litellm_utils_testing
@@ -3576,7 +3891,9 @@ workflows:
- auth_ui_unit_tests
- db_migration_disable_update_check
- e2e_ui_testing
- litellm_proxy_unit_testing
- litellm_proxy_unit_testing_key_generation
- litellm_proxy_unit_testing_part1
- litellm_proxy_unit_testing_part2
- litellm_security_tests
- installing_litellm_on_python
- installing_litellm_on_python_3_13
+7
View File
@@ -0,0 +1,7 @@
# js-yaml CVE-2025-64718
# This vulnerability is not applicable because we've forced js-yaml to version 4.1.1
# via npm overrides in package.json. Trivy incorrectly reports this based on
# dependency requirements in the lockfile, but the actual installed version is 4.1.1.
# Verified with: npm list js-yaml
CVE-2025-64718
+2 -2
View File
@@ -16,7 +16,7 @@ Get free 7-day trial key [here](https://www.litellm.ai/enterprise#trial)
Includes all enterprise features.
<Image img={require('../img/enterprise_vs_oss.png')} />
<Image img={require('../img/enterprise_vs_oss_2.png')} />
[**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)
@@ -40,7 +40,7 @@ Self-Managed Enterprise deployments require our team to understand your exact ne
### How does deployment with Enterprise License work?
You just deploy [our docker image](https://docs.litellm.ai/docs/proxy/deploy) and get an enterprise license key to add to your environment to unlock additional functionality (SSO, Prometheus metrics, etc.).
You just deploy [our docker image](https://docs.litellm.ai/docs/proxy/deploy) and get an enterprise license key to add to your environment to unlock additional functionality (SSO, etc.).
```env
LITELLM_LICENSE="eyJ..."
+9 -11
View File
@@ -211,11 +211,12 @@ mcp_servers:
oauth2_example:
url: "https://my-mcp-server.com/mcp"
auth_type: "oauth2" # 👈 KEY CHANGE
authorization_url: "https://my-mcp-server.com/oauth/authorize" # optional for client-credentials
token_url: "https://my-mcp-server.com/oauth/token" # required
authorization_url: "https://my-mcp-server.com/oauth/authorize" # optional override
token_url: "https://my-mcp-server.com/oauth/token" # optional override
registration_url: "https://my-mcp-server.com/oauth/register" # optional override
client_id: os.environ/OAUTH_CLIENT_ID
client_secret: os.environ/OAUTH_CLIENT_SECRET
scopes: ["tool.read", "tool.write"] # optional
scopes: ["tool.read", "tool.write"] # optional override
bearer_example:
url: "https://my-mcp-server.com/mcp"
@@ -325,6 +326,10 @@ mcp_servers:
| `spec_path` | Yes | Path or URL to your OpenAPI specification file (JSON or YAML) |
| `auth_type` | No | Authentication type: `none`, `api_key`, `bearer_token`, `basic`, `authorization` |
| `auth_value` | No | Authentication value (required if `auth_type` is set) |
| `authorization_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
| `token_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
| `registration_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
| `scopes` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM uses the scopes advertised by the server. |
| `description` | No | Optional description for the MCP server |
| `allowed_tools` | No | List of specific tools to allow (see [MCP Tool Filtering](#mcp-tool-filtering)) |
| `disallowed_tools` | No | List of specific tools to block (see [MCP Tool Filtering](#mcp-tool-filtering)) |
@@ -1224,17 +1229,10 @@ mcp_servers:
github_mcp:
url: "https://api.githubcopilot.com/mcp"
auth_type: oauth2
authorization_url: https://github.com/login/oauth/authorize
token_url: https://github.com/login/oauth/access_token
client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
scopes: ["public_repo", "user:email"]
```
**Note**
In the future, users will only need to specify the `url` of the MCP server.
LiteLLM will automatically resolve the corresponding `authorization_url`, `token_url`, and `registration_url` based on the MCP server metadata (e.g., `.well-known/oauth-authorization-server` or `oauth-protected-resource`).
[**See Claude Code Tutorial**](./tutorials/claude_responses_api#connecting-mcp-servers)
## Using your MCP with client side credentials
@@ -1887,4 +1885,4 @@ async with stdio_client(server_params) as (read, write):
```
</TabItem>
</Tabs>
</Tabs>
@@ -953,6 +953,30 @@ except Exception as e:
s/o @[Shekhar Patnaik](https://www.linkedin.com/in/patnaikshekhar) for requesting this!
### Context Management (Beta)
Anthropics [context editing](https://docs.claude.com/en/docs/build-with-claude/context-editing) API lets you automatically clear older tool results or thinking blocks. LiteLLM now forwards the native `context_management` payload when you call Anthropic models, and automatically attaches the required `context-management-2025-06-27` beta header.
```python
from litellm import completion
response = completion(
model="anthropic/claude-sonnet-4-20250514",
messages=[{"role": "user", "content": "Summarize the latest tool results"}],
context_management={
"edits": [
{
"type": "clear_tool_uses_20250919",
"trigger": {"type": "input_tokens", "value": 30000},
"keep": {"type": "tool_uses", "value": 3},
"clear_at_least": {"type": "input_tokens", "value": 5000},
"exclude_tools": ["web_search"],
}
]
},
)
```
### Anthropic Hosted Tools (Computer, Text Editor, Web Search, Memory)
+4
View File
@@ -31,10 +31,14 @@ Get your API key from [fal.ai](https://fal.ai/).
| Model Name | Description | Documentation |
|------------|-------------|---------------|
| `fal_ai/fal-ai/flux-pro/v1.1` | FLUX Pro v1.1 - Balanced speed and quality | [Docs ↗](https://fal.ai/models/fal-ai/flux-pro/v1.1) |
| `fal_ai/flux/schnell` | Flux Schnell - Low-latency generation with `image_size` support | [Docs ↗](https://fal.ai/models/fal-ai/flux/schnell) |
| `fal_ai/fal-ai/bytedance/seedream/v3/text-to-image` | ByteDance Seedream v3 - Text-to-image with `image_size` control | [Docs ↗](https://fal.ai/models/fal-ai/bytedance/seedream/v3/text-to-image) |
| `fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image` | ByteDance Dreamina v3.1 - Text-to-image with `image_size` control | [Docs ↗](https://fal.ai/models/fal-ai/bytedance/dreamina/v3.1/text-to-image) |
| `fal_ai/fal-ai/flux-pro/v1.1-ultra` | FLUX Pro v1.1 Ultra - High-quality image generation | [Docs ↗](https://fal.ai/models/fal-ai/flux-pro/v1.1-ultra) |
| `fal_ai/fal-ai/imagen4/preview` | Google's Imagen 4 - Highest quality model | [Docs ↗](https://fal.ai/models/fal-ai/imagen4/preview) |
| `fal_ai/fal-ai/recraft/v3/text-to-image` | Recraft v3 - Multiple style options | [Docs ↗](https://fal.ai/models/fal-ai/recraft/v3/text-to-image) |
| `fal_ai/fal-ai/ideogram/v3` | Ideogram v3 - Lettering-first creative model (Balanced: $0.06/image) | [Docs ↗](https://fal.ai/models/fal-ai/ideogram/v3) |
| `fal_ai/fal-ai/stable-diffusion-v35-medium` | Stable Diffusion v3.5 Medium | [Docs ↗](https://fal.ai/models/fal-ai/stable-diffusion-v35-medium) |
| `fal_ai/bria/text-to-image/3.2` | Bria 3.2 - Commercial-grade generation | [Docs ↗](https://fal.ai/models/bria/text-to-image/3.2) |
+47
View File
@@ -486,6 +486,53 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
See [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning) for more details on organization verification requirements.
### Verbosity Control for GPT-5 Models
The `verbosity` parameter controls the length and detail of responses from GPT-5 family models. It accepts three values: `"low"`, `"medium"`, or `"high"`.
**Supported models:** All GPT-5 family models (`gpt-5`, `gpt-5.1`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-5-pro`)
**Use cases:**
- **`"low"`**: Best for concise answers or simple code generation (e.g., SQL queries)
- **`"medium"`**: Default - balanced output length
- **`"high"`**: Use when you need thorough explanations or extensive code refactoring
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import litellm
# Low verbosity - concise responses
response = litellm.completion(
model="gpt-5.1",
messages=[{"role": "user", "content": "Write a function to reverse a string"}],
verbosity="low"
)
# High verbosity - detailed responses
response = litellm.completion(
model="gpt-5.1",
messages=[{"role": "user", "content": "Explain how neural networks work"}],
verbosity="high"
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
```bash
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-d '{
"model": "gpt-5.1",
"messages": [{"role": "user", "content": "Write a function to reverse a string"}],
"verbosity": "low"
}'
```
</TabItem>
</Tabs>
## OpenAI Chat Completion to Responses API Bridge
Call any Responses API model from OpenAI's `/chat/completions` endpoint.
+1 -5
View File
@@ -32,13 +32,9 @@ Features:
- ✅ [Set Model budgets for Virtual Keys](./users#-virtual-key-model-specific)
- ✅ [Exporting LLM Logs to GCS Bucket, Azure Blob Storage](./proxy/bucket#🪣-logging-gcs-s3-buckets)
- ✅ [`/spend/report` API endpoint](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend)
- **Prometheus Metrics**
- ✅ [Prometheus Metrics - Num Requests, failures, LLM Provider Outages](prometheus)
- ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens)
- **Control Guardrails per API Key**
- **Control Guardrails per API Key/Team**
- **Custom Branding**
- ✅ [Custom Branding + Routes on Swagger Docs](#swagger-docs---custom-routes--branding)
- ✅ [Public Model Hub](#public-model-hub)
- ✅ [Custom Email Branding](./email.md#customizing-email-branding)
-9
View File
@@ -4,15 +4,6 @@ import Image from '@theme/IdealImage';
# 📈 Prometheus metrics
:::info
✨ Prometheus metrics is on LiteLLM Enterprise
[Enterprise Pricing](https://www.litellm.ai/#pricing)
[Get free 7-day trial key](https://www.litellm.ai/enterprise#trial)
:::
LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll
@@ -237,11 +237,8 @@ mcp_servers:
github_mcp:
url: "https://api.githubcopilot.com/mcp"
auth_type: oauth2
authorization_url: https://github.com/login/oauth/authorize
token_url: https://github.com/login/oauth/access_token
client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
scopes: ["public_repo", "user:email"]
```
</TabItem>
@@ -255,9 +252,6 @@ atlassian_mcp:
url: "https://mcp.atlassian.com/v1/sse"
transport: "sse"
auth_type: oauth2
authorization_url: https://mcp.atlassian.com/v1/authorize
token_url: https://cf.mcp.atlassian.com/v1/token
registration_url: https://cf.mcp.atlassian.com/v1/register
```
</TabItem>
+120
View File
@@ -0,0 +1,120 @@
# /vector_stores/{vector_store_id}/files
Vector store files represent the individual files that live inside a vector store.
| Feature | Supported |
|---------|-----------|
| Logging | ✅ (full request/response logging) |
| Supported Providers | `openai` |
## Supported operations
| Operation | Description | OpenAI Python Client | LiteLLM Proxy |
|-----------|-------------|----------------------|---------------|
| Create vector store file | Attach a file to a vector store with optional chunking overrides | ✅ | ✅ |
| List vector store files | Paginated listing with filters | ✅ | ✅ |
| Retrieve vector store file | Fetch metadata for a single file | ✅ | ✅ |
| Delete vector store file | Remove a file from a store (file object persists) | ✅ | ✅ |
| Retrieve vector store file content | Stream processed chunks | ❌ | ✅ |
| Update vector store file attributes | Patch custom attributes | ❌ | ✅ |
:::note
Vector store support currently works **only with OpenAI vector stores and OpenAI-uploaded file IDs**.
:::
## Create vector store file
`POST http://localhost:4000/v1/vector_stores/{vector_store_id}/files`
```python
from openai import OpenAI
client = OpenAI(
base_url="http://localhost:4000", # LiteLLM proxy or OpenAI base
api_key="sk-1234"
)
vector_store_file = client.vector_stores.files.create(
vector_store_id="vs_69172088a18c8191ab3e2621aa87d1ee",
file_id="file-NDbEDJTfqVh7S4Ugi3CGYw",
chunking_strategy={
"type": "static",
"static": {
"max_chunk_size_tokens": 800,
"chunk_overlap_tokens": 400,
},
},
)
print(vector_store_file)
```
## List vector store files
`GET http://localhost:4000/v1/vector_stores/{vector_store_id}/files`
Parameters:
- `vector_store_id` (path, required)
- `after` / `before` (query, optional) pagination cursors
- `filter` (query, optional) `in_progress`, `completed`, `failed`, `cancelled`
- `limit` (query, optional, default `20`, range `1-100`)
- `order` (query, optional, default `desc`)
```python
vector_store_files = client.vector_stores.files.list(
vector_store_id="vs_abc123"
)
print(vector_store_files)
```
## Retrieve vector store file
`GET http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}`
```python
vector_store_file = client.vector_stores.files.retrieve(
vector_store_id="vs_abc123",
file_id="file-abc123"
)
print(vector_store_file)
```
## Delete vector store file
`DELETE http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}`
```python
deleted_vector_store_file = client.vector_stores.files.delete(
vector_store_id="vs_abc123",
file_id="file-abc123"
)
print(deleted_vector_store_file)
```
## Proxy-only endpoints
When you need raw content chunks or attribute updates, call the LiteLLM Proxy directly.
### Retrieve file content
```bash
curl -X GET "http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}/content" \
-H "Authorization: Bearer sk-1234"
```
### Update file attributes
```bash
curl -X POST "http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}" \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"attributes": {
"category": "support-faq",
"language": "en"
}
}'
```
Binary file not shown.

Before

Width:  |  Height:  |  Size: 418 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 324 KiB

+4326 -2170
View File
File diff suppressed because it is too large Load Diff
+5 -3
View File
@@ -18,7 +18,7 @@
"@docusaurus/plugin-google-gtag": "3.8.1",
"@docusaurus/plugin-ideal-image": "3.8.1",
"@docusaurus/preset-classic": "3.8.1",
"@docusaurus/theme-mermaid": "^3.8.1",
"@docusaurus/theme-mermaid": "3.8.1",
"@inkeep/cxkit-docusaurus": "^0.5.89",
"@mdx-js/react": "^3.0.0",
"clsx": "^1.2.1",
@@ -45,12 +45,14 @@
]
},
"engines": {
"node": ">=16.14"
"node": ">=16.14",
"npm": ">=8.3.0"
},
"overrides": {
"webpack-dev-server": ">=5.2.1",
"form-data": ">=4.0.4",
"mermaid": ">=11.10.0",
"js-yaml": ">=4.1.1"
"js-yaml": ">=4.1.1",
"gray-matter": ">=4.0.3"
}
}
@@ -1,5 +1,5 @@
---
title: "[Preview] v1.79.3-stable - Built-in Guardrails on AI Gateway"
title: "v1.79.3-stable - Built-in Guardrails on AI Gateway"
slug: "v1-79-3"
date: 2025-11-08T10:00:00
authors:
@@ -27,7 +27,7 @@ import TabItem from '@theme/TabItem';
docker run \
-e STORE_MODEL_IN_DB=True \
-p 4000:4000 \
ghcr.io/berriai/litellm:v1.79.3.rc.1
ghcr.io/berriai/litellm:v1.79.3-stable
```
</TabItem>
@@ -0,0 +1,482 @@
---
title: "[Preview] v1.80.0-stable - RunwayML Provider Support"
slug: "v1-80-0"
date: 2025-11-15T10:00:00
authors:
- name: Krrish Dholakia
title: CEO, LiteLLM
url: https://www.linkedin.com/in/krish-d/
image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
- name: Ishaan Jaff
title: CTO, LiteLLM
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
hide_table_of_contents: false
---
import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
## Deploy this version
<Tabs>
<TabItem value="docker" label="Docker">
``` showLineNumbers title="docker run litellm"
docker run \
-e STORE_MODEL_IN_DB=True \
-p 4000:4000 \
ghcr.io/berriai/litellm:v1.80.0.rc.1
```
</TabItem>
<TabItem value="pip" label="Pip">
``` showLineNumbers title="pip install litellm"
pip install litellm==1.80.0
```
</TabItem>
</Tabs>
---
## Key Highlights
- **🆕 RunwayML Provider** - Complete video generation, image generation, and text-to-speech support
- **GPT-5.1 Family Support** - Day-0 support for OpenAI's latest GPT-5.1 and GPT-5.1-Codex models
- **Prometheus OSS** - Prometheus metrics now available in open-source version
- **Vector Store Files API** - Complete OpenAI-compatible Vector Store Files API with full CRUD operations
- **Embeddings Performance** - O(1) lookup optimization for router embeddings with shared sessions
---
### 🆕 RunwayML
Complete integration for RunwayML's Gen-4 family of models, supporting video generation, image generation, and text-to-speech.
**Supported Endpoints:**
- `/v1/videos` - Video generation (Gen-4 Turbo, Gen-4 Aleph, Gen-3A Turbo)
- `/v1/images/generations` - Image generation (Gen-4 Image, Gen-4 Image Turbo)
- `/v1/audio/speech` - Text-to-speech (ElevenLabs Multilingual v2)
**Quick Start:**
```bash showLineNumbers title="Generate Video with RunwayML"
curl --location 'http://localhost:4000/v1/videos' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer sk-1234' \
--data '{
"model": "runwayml/gen4_turbo",
"prompt": "A high quality demo video of litellm ai gateway",
"input_reference": "https://example.com/image.jpg",
"seconds": 5,
"size": "1280x720"
}'
```
[Get Started with RunwayML](../../docs/providers/runwayml/videos)
---
### Prometheus Metrics - Open Source
Prometheus metrics are now available in the open-source version of LiteLLM, providing comprehensive observability for your AI Gateway without requiring an enterprise license.
**Quick Start:**
```yaml
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]
```
[Get Started with Prometheus](../../docs/proxy/logging#prometheus)
---
### Vector Store Files API
Complete OpenAI-compatible Vector Store Files API now stable, enabling full file lifecycle management within vector stores.
**Supported Endpoints:**
- `POST /v1/vector_stores/{vector_store_id}/files` - Create vector store file
- `GET /v1/vector_stores/{vector_store_id}/files` - List vector store files
- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}` - Retrieve vector store file
- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}/content` - Retrieve file content
- `DELETE /v1/vector_stores/{vector_store_id}/files/{file_id}` - Delete vector store file
- `DELETE /v1/vector_stores/{vector_store_id}` - Delete vector store
**Quick Start:**
```bash showLineNumbers title="Create Vector Store File"
curl --location 'http://localhost:4000/v1/vector_stores/vs_123/files' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer sk-1234' \
--data '{
"file_id": "file_abc"
}'
```
[Get Started with Vector Stores](../../docs/vector_store_files)
---
## New Providers and Endpoints
### New Providers
| Provider | Supported Endpoints | Description |
| -------- | ------------------- | ----------- |
| **[RunwayML](../../docs/providers/runwayml/videos)** | `/v1/videos`, `/v1/images/generations`, `/v1/audio/speech` | Gen-4 video generation, image generation, and text-to-speech |
### New LLM API Endpoints
| Endpoint | Method | Description | Documentation |
| -------- | ------ | ----------- | ------------- |
| `/v1/vector_stores/{vector_store_id}/files` | POST | Create vector store file | [Docs](../../docs/vector_store_files) |
| `/v1/vector_stores/{vector_store_id}/files` | GET | List vector store files | [Docs](../../docs/vector_store_files) |
| `/v1/vector_stores/{vector_store_id}/files/{file_id}` | GET | Retrieve vector store file | [Docs](../../docs/vector_store_files) |
| `/v1/vector_stores/{vector_store_id}/files/{file_id}/content` | GET | Retrieve file content | [Docs](../../docs/vector_store_files) |
| `/v1/vector_stores/{vector_store_id}/files/{file_id}` | DELETE | Delete vector store file | [Docs](../../docs/vector_store_files) |
| `/v1/vector_stores/{vector_store_id}` | DELETE | Delete vector store | [Docs](../../docs/vector_store_files) |
---
## New Models / Updated Models
#### New Model Support
| Provider | Model | Context Window | Input ($/1M tokens) | Output ($/1M tokens) | Features |
| -------- | ----- | -------------- | ------------------- | -------------------- | -------- |
| OpenAI | `gpt-5.1` | 272K | $1.25 | $10.00 | Reasoning, vision, PDF input, responses API |
| OpenAI | `gpt-5.1-2025-11-13` | 272K | $1.25 | $10.00 | Reasoning, vision, PDF input, responses API |
| OpenAI | `gpt-5.1-chat-latest` | 128K | $1.25 | $10.00 | Reasoning, vision, PDF input |
| OpenAI | `gpt-5.1-codex` | 272K | $1.25 | $10.00 | Responses API, reasoning, vision |
| OpenAI | `gpt-5.1-codex-mini` | 272K | $0.25 | $2.00 | Responses API, reasoning, vision |
| Moonshot | `moonshot/kimi-k2-thinking` | 262K | $0.60 | $2.50 | Function calling, web search, reasoning |
| Mistral | `mistral/magistral-medium-2509` | 40K | $2.00 | $5.00 | Reasoning, function calling |
| Vertex AI | `vertex_ai/moonshotai/kimi-k2-thinking-maas` | 256K | $0.60 | $2.50 | Function calling, web search |
| OpenRouter | `openrouter/deepseek/deepseek-v3.2-exp` | 164K | $0.20 | $0.40 | Function calling, prompt caching |
| OpenRouter | `openrouter/minimax/minimax-m2` | 205K | $0.26 | $1.02 | Function calling, reasoning |
| OpenRouter | `openrouter/z-ai/glm-4.6` | 203K | $0.40 | $1.75 | Function calling, reasoning |
| OpenRouter | `openrouter/z-ai/glm-4.6:exacto` | 203K | $0.45 | $1.90 | Function calling, reasoning |
| Voyage | `voyage/voyage-3.5` | 32K | $0.06 | - | Embeddings |
| Voyage | `voyage/voyage-3.5-lite` | 32K | $0.02 | - | Embeddings |
#### Video Generation Models
| Provider | Model | Cost Per Second | Resolutions | Features |
| -------- | ----- | --------------- | ----------- | -------- |
| RunwayML | `runwayml/gen4_turbo` | $0.05 | 1280x720, 720x1280 | Text + image to video |
| RunwayML | `runwayml/gen4_aleph` | $0.15 | 1280x720, 720x1280 | Text + image to video |
| RunwayML | `runwayml/gen3a_turbo` | $0.05 | 1280x720, 720x1280 | Text + image to video |
#### Image Generation Models
| Provider | Model | Cost Per Image | Resolutions | Features |
| -------- | ----- | -------------- | ----------- | -------- |
| RunwayML | `runwayml/gen4_image` | $0.05 | 1280x720, 1920x1080 | Text + image to image |
| RunwayML | `runwayml/gen4_image_turbo` | $0.02 | 1280x720, 1920x1080 | Text + image to image |
| Fal.ai | `fal_ai/fal-ai/flux-pro/v1.1` | $0.04/image | - | Image generation |
| Fal.ai | `fal_ai/fal-ai/flux/schnell` | $0.003/image | - | Fast image generation |
| Fal.ai | `fal_ai/fal-ai/bytedance/seedream/v3/text-to-image` | $0.03/image | - | Image generation |
| Fal.ai | `fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image` | $0.03/image | - | Image generation |
| Fal.ai | `fal_ai/fal-ai/ideogram/v3` | $0.06/image | - | Image generation |
| Fal.ai | `fal_ai/fal-ai/imagen4/preview/fast` | $0.02/image | - | Fast image generation |
| Fal.ai | `fal_ai/fal-ai/imagen4/preview/ultra` | $0.06/image | - | High-quality image generation |
#### Audio Models
| Provider | Model | Cost | Features |
| -------- | ----- | ---- | -------- |
| RunwayML | `runwayml/eleven_multilingual_v2` | $0.0003/char | Text-to-speech |
#### Features
- **[OpenAI](../../docs/providers/openai)**
- Add GPT-5.1 family support with reasoning capabilities - [PR #16598](https://github.com/BerriAI/litellm/pull/16598)
- Add support for `reasoning_effort='none'` for GPT-5.1 - [PR #16658](https://github.com/BerriAI/litellm/pull/16658)
- Add `verbosity` parameter support for GPT-5 family models - [PR #16660](https://github.com/BerriAI/litellm/pull/16660)
- Fix forward OpenAI organization for image generation - [PR #16607](https://github.com/BerriAI/litellm/pull/16607)
- **[Gemini (Google AI Studio + Vertex AI)](../../docs/providers/gemini)**
- Add support for `reasoning_effort='none'` for Gemini models - [PR #16548](https://github.com/BerriAI/litellm/pull/16548)
- Add all Gemini image models support in image generation - [PR #16526](https://github.com/BerriAI/litellm/pull/16526)
- Add Gemini image edit support - [PR #16430](https://github.com/BerriAI/litellm/pull/16430)
- Fix preserve non-ASCII characters in function call arguments - [PR #16550](https://github.com/BerriAI/litellm/pull/16550)
- Fix Gemini conversation format issue with MCP auto-execution - [PR #16592](https://github.com/BerriAI/litellm/pull/16592)
- **[Bedrock](../../docs/providers/bedrock)**
- Add support for filtering knowledge base queries - [PR #16543](https://github.com/BerriAI/litellm/pull/16543)
- Ensure correct `aws_region` is used when provided dynamically for embeddings - [PR #16547](https://github.com/BerriAI/litellm/pull/16547)
- Add support for custom KMS encryption keys in Bedrock Batch operations - [PR #16662](https://github.com/BerriAI/litellm/pull/16662)
- Add bearer token authentication support for AgentCore - [PR #16556](https://github.com/BerriAI/litellm/pull/16556)
- Fix AgentCore SSE stream iterator to async for proper streaming support - [PR #16293](https://github.com/BerriAI/litellm/pull/16293)
- **[Anthropic](../../docs/providers/anthropic)**
- Add context management param support - [PR #16528](https://github.com/BerriAI/litellm/pull/16528)
- Fix preserve `$defs` for Anthropic tools input schema - [PR #16648](https://github.com/BerriAI/litellm/pull/16648)
- Fix support Anthropic tool_use and tool_result in token counter - [PR #16351](https://github.com/BerriAI/litellm/pull/16351)
- **[Vertex AI](../../docs/providers/vertex_ai)**
- Add Vertex Kimi-K2-Thinking support - [PR #16671](https://github.com/BerriAI/litellm/pull/16671)
- Add `vertex_credentials` support to `litellm.rerank()` - [PR #16479](https://github.com/BerriAI/litellm/pull/16479)
- **[Mistral](../../docs/providers/mistral)**
- Fix Magistral streaming to emit reasoning chunks - [PR #16434](https://github.com/BerriAI/litellm/pull/16434)
- **[Moonshot (Kimi)](../../docs/providers/moonshot)**
- Add Kimi K2 thinking model support - [PR #16445](https://github.com/BerriAI/litellm/pull/16445)
- **[SambaNova](../../docs/providers/sambanova)**
- Fix SambaNova API rejecting requests when message content is passed as a list format - [PR #16612](https://github.com/BerriAI/litellm/pull/16612)
- **[VLLM](../../docs/providers/vllm)**
- Fix use vllm passthrough config for hosted vllm provider instead of raising error - [PR #16537](https://github.com/BerriAI/litellm/pull/16537)
- Add headers to VLLM Passthrough requests with success event logging - [PR #16532](https://github.com/BerriAI/litellm/pull/16532)
- **[Azure](../../docs/providers/azure)**
- Fix improve Azure auth parameter handling for None values - [PR #14436](https://github.com/BerriAI/litellm/pull/14436)
- **[Groq](../../docs/providers/groq)**
- Fix parse failed chunks for Groq - [PR #16595](https://github.com/BerriAI/litellm/pull/16595)
- **[Voyage](../../docs/providers/voyage)**
- Add Voyage 3.5 and 3.5-lite embeddings pricing and doc update - [PR #16641](https://github.com/BerriAI/litellm/pull/16641)
- **[Fal.ai](../../docs/image_generation)**
- Add fal-ai/flux/schnell support - [PR #16580](https://github.com/BerriAI/litellm/pull/16580)
- Add all Imagen4 variants of fal ai in model map - [PR #16579](https://github.com/BerriAI/litellm/pull/16579)
### Bug Fixes
- **General**
- Fix sanitize null token usage in OpenAI-compatible responses - [PR #16493](https://github.com/BerriAI/litellm/pull/16493)
- Fix apply provided timeout value to ClientTimeout.total - [PR #16395](https://github.com/BerriAI/litellm/pull/16395)
- Fix raising wrong 429 error on wrong exception - [PR #16482](https://github.com/BerriAI/litellm/pull/16482)
- Add new models, delete repeat models, update pricing - [PR #16491](https://github.com/BerriAI/litellm/pull/16491)
- Update model logging format for custom LLM provider - [PR #16485](https://github.com/BerriAI/litellm/pull/16485)
---
## LLM API Endpoints
#### New Endpoints
- **[GET /providers](../../docs/proxy/management_endpoints)**
- Add GET list of providers endpoint - [PR #16432](https://github.com/BerriAI/litellm/pull/16432)
#### Features
- **[Video Generation API](../../docs/video_generation)**
- Allow internal users to access video generation routes - [PR #16472](https://github.com/BerriAI/litellm/pull/16472)
- **[Vector Stores API](../../docs/vector_stores)**
- Vector store files stable release with complete CRUD operations - [PR #16643](https://github.com/BerriAI/litellm/pull/16643)
- `POST /v1/vector_stores/{vector_store_id}/files` - Create vector store file
- `GET /v1/vector_stores/{vector_store_id}/files` - List vector store files
- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}` - Retrieve vector store file
- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}/content` - Retrieve file content
- `DELETE /v1/vector_stores/{vector_store_id}/files/{file_id}` - Delete vector store file
- `DELETE /v1/vector_stores/{vector_store_id}` - Delete vector store
- Ensure users can access `search_results` for both stream + non-stream response - [PR #16459](https://github.com/BerriAI/litellm/pull/16459)
#### Bugs
- **[Video Generation API](../../docs/video_generation)**
- Fix use GET for `/v1/videos/{video_id}/content` - [PR #16672](https://github.com/BerriAI/litellm/pull/16672)
- **General**
- Fix remove generic exception handling - [PR #16599](https://github.com/BerriAI/litellm/pull/16599)
---
## Management Endpoints / UI
#### Features
- **Proxy CLI Auth**
- Fix remove strict master_key check in add_deployment - [PR #16453](https://github.com/BerriAI/litellm/pull/16453)
- **Virtual Keys**
- UI - Add Tags To Edit Key Flow - [PR #16500](https://github.com/BerriAI/litellm/pull/16500)
- UI - Test Key Page show models based on selected endpoint - [PR #16452](https://github.com/BerriAI/litellm/pull/16452)
- UI - Expose user_alias in view and update path - [PR #16669](https://github.com/BerriAI/litellm/pull/16669)
- **Models + Endpoints**
- UI - Add LiteLLM Params to Edit Model - [PR #16496](https://github.com/BerriAI/litellm/pull/16496)
- UI - Add Model use backend data - [PR #16664](https://github.com/BerriAI/litellm/pull/16664)
- UI - Remove Description Field from LLM Credentials - [PR #16608](https://github.com/BerriAI/litellm/pull/16608)
- UI - Add RunwayML on Admin UI supported models/providers - [PR #16606](https://github.com/BerriAI/litellm/pull/16606)
- Infra - Migrate Add Model Fields to Backend - [PR #16620](https://github.com/BerriAI/litellm/pull/16620)
- Add API Endpoint for creating model access group - [PR #16663](https://github.com/BerriAI/litellm/pull/16663)
- **Teams**
- UI - Invite User Searchable Team Select - [PR #16454](https://github.com/BerriAI/litellm/pull/16454)
- Fix use user budget instead of key budget when creating new team - [PR #16074](https://github.com/BerriAI/litellm/pull/16074)
- **Budgets**
- UI - Move Budgets out of Experimental - [PR #16544](https://github.com/BerriAI/litellm/pull/16544)
- **Guardrails**
- UI - Config Guardrails should not be deletable from table - [PR #16540](https://github.com/BerriAI/litellm/pull/16540)
- Fix remove enterprise restriction from guardrails list endpoint - [PR #15333](https://github.com/BerriAI/litellm/pull/15333)
- **Callbacks**
- UI - New Callbacks table - [PR #16512](https://github.com/BerriAI/litellm/pull/16512)
- Fix delete callbacks failing - [PR #16473](https://github.com/BerriAI/litellm/pull/16473)
- **Usage & Analytics**
- UI - Improve Usage Indicator - [PR #16504](https://github.com/BerriAI/litellm/pull/16504)
- UI - Model Info Page Health Check - [PR #16416](https://github.com/BerriAI/litellm/pull/16416)
- Infra - Show Deprecation Warning for Model Analytics Tab - [PR #16417](https://github.com/BerriAI/litellm/pull/16417)
- Fix Litellm tags usage add request_id - [PR #16111](https://github.com/BerriAI/litellm/pull/16111)
- **Health Check**
- Add Langfuse OTEL and SQS to Health Check - [PR #16514](https://github.com/BerriAI/litellm/pull/16514)
- **General UI**
- UI - Normalize table action columns appearance - [PR #16657](https://github.com/BerriAI/litellm/pull/16657)
- UI - Button Styles and Sizing in Settings Pages - [PR #16600](https://github.com/BerriAI/litellm/pull/16600)
- UI - SSO Modal Cosmetic Changes - [PR #16554](https://github.com/BerriAI/litellm/pull/16554)
- Fix UI logos loading with SERVER_ROOT_PATH - [PR #16618](https://github.com/BerriAI/litellm/pull/16618)
- Fix remove misleading 'Custom' option mention from OpenAI endpoint tooltips - [PR #16622](https://github.com/BerriAI/litellm/pull/16622)
#### Bugs
- **Management Endpoints**
- Fix inconsistent error responses in customer management endpoints - [PR #16450](https://github.com/BerriAI/litellm/pull/16450)
- Fix correct date range filtering in /spend/logs endpoint - [PR #16443](https://github.com/BerriAI/litellm/pull/16443)
- Fix /spend/logs/ui Access Control - [PR #16446](https://github.com/BerriAI/litellm/pull/16446)
- Add pagination for /spend/logs/session/ui endpoint - [PR #16603](https://github.com/BerriAI/litellm/pull/16603)
- Fix LiteLLM Usage shows key_hash - [PR #16471](https://github.com/BerriAI/litellm/pull/16471)
- Fix app_roles missing from jwt payload - [PR #16448](https://github.com/BerriAI/litellm/pull/16448)
---
## Logging / Guardrail / Prompt Management Integrations
#### New Integration
- **🆕 [Zscaler AI Guard](../../docs/proxy/guardrails/zscaler_ai_guard)**
- Add Zscaler AI Guard hook for security policy enforcement - [PR #15691](https://github.com/BerriAI/litellm/pull/15691)
#### Logging
- **[Langfuse](../../docs/proxy/logging#langfuse)**
- Fix handle null usage values to prevent validation errors - [PR #16396](https://github.com/BerriAI/litellm/pull/16396)
- **[CloudZero](../../docs/proxy/logging)**
- Fix updated spend would not be sent to CloudZero - [PR #16201](https://github.com/BerriAI/litellm/pull/16201)
#### Guardrails
- **[IBM Detector](../../docs/proxy/guardrails)**
- Ensure detector-id is passed as header to IBM detector server - [PR #16649](https://github.com/BerriAI/litellm/pull/16649)
#### Prompt Management
- **[Custom Prompt Management](../../docs/proxy/prompt_management)**
- Add SDK focused examples for custom prompt management - [PR #16441](https://github.com/BerriAI/litellm/pull/16441)
---
## Spend Tracking, Budgets and Rate Limiting
- **End User Budgets**
- Allow pointing max_end_user budget to an id, so the default ID applies to all end users - [PR #16456](https://github.com/BerriAI/litellm/pull/16456)
---
## MCP Gateway
- **Configuration**
- Add dynamic OAuth2 metadata discovery for MCP servers - [PR #16676](https://github.com/BerriAI/litellm/pull/16676)
- Fix allow tool call even when server name prefix is missing - [PR #16425](https://github.com/BerriAI/litellm/pull/16425)
- Fix exclude unauthorized MCP servers from allowed server list - [PR #16551](https://github.com/BerriAI/litellm/pull/16551)
- Fix unable to delete MCP server from permission settings - [PR #16407](https://github.com/BerriAI/litellm/pull/16407)
- Fix avoid crashing when MCP server record lacks credentials - [PR #16601](https://github.com/BerriAI/litellm/pull/16601)
---
## Agents
- **[Agent Registration (A2A Spec)](../../docs/agents)**
- Support agent registration + discovery following Agent-to-Agent specification - [PR #16615](https://github.com/BerriAI/litellm/pull/16615)
---
## Performance / Loadbalancing / Reliability improvements
- **Embeddings Performance**
- Use router's O(1) lookup and shared sessions for embeddings - [PR #16344](https://github.com/BerriAI/litellm/pull/16344)
- **Router Reliability**
- Support default fallbacks for unknown models - [PR #16419](https://github.com/BerriAI/litellm/pull/16419)
- **Callback Management**
- Add atexit handlers to flush callbacks for async completions - [PR #16487](https://github.com/BerriAI/litellm/pull/16487)
---
## General Proxy Improvements
- **Configuration Management**
- Fix update model_cost_map_url to use environment variable - [PR #16429](https://github.com/BerriAI/litellm/pull/16429)
---
## Documentation Updates
- **Provider Documentation**
- Fix streaming example in README - [PR #16461](https://github.com/BerriAI/litellm/pull/16461)
- Update broken Slack invite links to support page - [PR #16546](https://github.com/BerriAI/litellm/pull/16546)
- Fix code block indentation for fallbacks page - [PR #16542](https://github.com/BerriAI/litellm/pull/16542)
- Documentation code example corrections - [PR #16502](https://github.com/BerriAI/litellm/pull/16502)
- Document `reasoning_effort` summary field options - [PR #16549](https://github.com/BerriAI/litellm/pull/16549)
- **API Documentation**
- Add docs on APIs for model access management - [PR #16673](https://github.com/BerriAI/litellm/pull/16673)
- Add docs for showing how to auto reload new pricing data - [PR #16675](https://github.com/BerriAI/litellm/pull/16675)
- LiteLLM Quick start - show how model resolution works - [PR #16602](https://github.com/BerriAI/litellm/pull/16602)
- Add docs for tracking callback failure - [PR #16474](https://github.com/BerriAI/litellm/pull/16474)
- **General Documentation**
- Fix container api link in release page - [PR #16440](https://github.com/BerriAI/litellm/pull/16440)
- Add softgen to projects that are using litellm - [PR #16423](https://github.com/BerriAI/litellm/pull/16423)
---
## New Contributors
* @artplan1 made their first contribution in [PR #16423](https://github.com/BerriAI/litellm/pull/16423)
* @JehandadK made their first contribution in [PR #16472](https://github.com/BerriAI/litellm/pull/16472)
* @vmiscenko made their first contribution in [PR #16453](https://github.com/BerriAI/litellm/pull/16453)
* @mcowger made their first contribution in [PR #16429](https://github.com/BerriAI/litellm/pull/16429)
* @yellowsubmarine372 made their first contribution in [PR #16395](https://github.com/BerriAI/litellm/pull/16395)
* @Hebruwu made their first contribution in [PR #16201](https://github.com/BerriAI/litellm/pull/16201)
* @jwang-gif made their first contribution in [PR #15691](https://github.com/BerriAI/litellm/pull/15691)
* @AnthonyMonaco made their first contribution in [PR #16502](https://github.com/BerriAI/litellm/pull/16502)
* @andrewm4894 made their first contribution in [PR #16487](https://github.com/BerriAI/litellm/pull/16487)
* @f14-bertolotti made their first contribution in [PR #16485](https://github.com/BerriAI/litellm/pull/16485)
* @busla made their first contribution in [PR #16293](https://github.com/BerriAI/litellm/pull/16293)
* @MightyGoldenOctopus made their first contribution in [PR #16537](https://github.com/BerriAI/litellm/pull/16537)
* @ultmaster made their first contribution in [PR #14436](https://github.com/BerriAI/litellm/pull/14436)
* @bchrobot made their first contribution in [PR #16542](https://github.com/BerriAI/litellm/pull/16542)
* @sep-grindr made their first contribution in [PR #16622](https://github.com/BerriAI/litellm/pull/16622)
* @pnookala-godaddy made their first contribution in [PR #16607](https://github.com/BerriAI/litellm/pull/16607)
* @dtunikov made their first contribution in [PR #16592](https://github.com/BerriAI/litellm/pull/16592)
* @lukapecnik made their first contribution in [PR #16648](https://github.com/BerriAI/litellm/pull/16648)
* @jyeros made their first contribution in [PR #16618](https://github.com/BerriAI/litellm/pull/16618)
---
## Full Changelog
**[View complete changelog on GitHub](https://github.com/BerriAI/litellm/compare/v1.79.3.rc.1...v1.80.0.rc.1)**
---
+1
View File
@@ -368,6 +368,7 @@ const sidebars = {
]
},
"videos",
"vector_store_files",
{
type: "category",
label: "/mcp - Model Context Protocol",
Binary file not shown.
Binary file not shown.
+2 -2
View File
@@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm-enterprise"
version = "0.1.20"
version = "0.1.21"
description = "Package for LiteLLM Enterprise features"
authors = ["BerriAI"]
readme = "README.md"
@@ -22,7 +22,7 @@ requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "0.1.20"
version = "0.1.21"
version_files = [
"pyproject.toml:version",
"../requirements.txt:litellm-enterprise==",
Binary file not shown.
@@ -0,0 +1,17 @@
-- CreateTable
CREATE TABLE "LiteLLM_AgentsTable" (
"agent_id" TEXT NOT NULL,
"agent_name" TEXT NOT NULL,
"litellm_params" JSONB,
"agent_card_params" JSONB NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_AgentsTable_pkey" PRIMARY KEY ("agent_id")
);
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_AgentsTable_agent_name_key" ON "LiteLLM_AgentsTable"("agent_name");
@@ -54,6 +54,19 @@ model LiteLLM_ProxyModelTable {
updated_by String
}
// Agents on proxy
model LiteLLM_AgentsTable {
agent_id String @id @default(uuid())
agent_name String @unique
litellm_params Json?
agent_card_params Json
created_at DateTime @default(now()) @map("created_at")
created_by String
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
updated_by String
}
model LiteLLM_OrganizationTable {
organization_id String @id @default(uuid())
organization_alias String
@@ -639,4 +652,4 @@ model LiteLLM_CacheConfig {
cache_settings Json
created_at DateTime @default(now())
updated_at DateTime @updatedAt
}
}
+2 -2
View File
@@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm-proxy-extras"
version = "0.4.4"
version = "0.4.5"
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
authors = ["BerriAI"]
readme = "README.md"
@@ -22,7 +22,7 @@ requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "0.4.4"
version = "0.4.5"
version_files = [
"pyproject.toml:version",
"../requirements.txt:litellm-proxy-extras==",
+14
View File
@@ -1386,6 +1386,20 @@ from .search.main import *
from .realtime_api.main import _arealtime
from .fine_tuning.main import *
from .files.main import *
from .vector_store_files.main import (
acreate as avector_store_file_create,
adelete as avector_store_file_delete,
alist as avector_store_file_list,
aretrieve as avector_store_file_retrieve,
aretrieve_content as avector_store_file_content,
aupdate as avector_store_file_update,
create as vector_store_file_create,
delete as vector_store_file_delete,
list as vector_store_file_list,
retrieve as vector_store_file_retrieve,
retrieve_content as vector_store_file_content,
update as vector_store_file_update,
)
from .scheduler import *
from .cost_calculator import response_cost_calculator, cost_per_token
+1
View File
@@ -476,6 +476,7 @@ DEFAULT_CHAT_COMPLETION_PARAM_VALUES = {
"additional_drop_params": None,
"messages": None,
"reasoning_effort": None,
"verbosity": None,
"thinking": None,
"web_search_options": None,
"service_tier": None,
@@ -41,21 +41,9 @@ class PrometheusLogger(CustomLogger):
try:
from prometheus_client import Counter, Gauge, Histogram
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
# Always initialize label_filters, even for non-premium users
self.label_filters = self._parse_prometheus_config()
if premium_user is not True:
verbose_logger.warning(
f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise\n🚨 {CommonProxyErrors.not_premium_user.value}"
)
self.litellm_not_a_premium_user_metric = Counter(
name="litellm_not_a_premium_user_metric",
documentation=f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise. 🚨 {CommonProxyErrors.not_premium_user.value}",
)
return
# Create metric factory functions
self._counter_factory = self._create_metric_factory(Counter)
self._gauge_factory = self._create_metric_factory(Gauge)
@@ -2184,9 +2172,6 @@ class PrometheusLogger(CustomLogger):
It emits the current remaining budget metrics for all Keys and Teams.
"""
from enterprise.litellm_enterprise.integrations.prometheus import (
PrometheusLogger,
)
from litellm.constants import PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
from litellm.integrations.custom_logger import CustomLogger
@@ -2213,26 +2198,19 @@ class PrometheusLogger(CustomLogger):
)
@staticmethod
def _mount_metrics_endpoint(premium_user: bool):
def _mount_metrics_endpoint():
"""
Mount the Prometheus metrics endpoint with optional authentication.
Args:
premium_user (bool): Whether the user is a premium user
require_auth (bool, optional): Whether to require authentication for the metrics endpoint.
Defaults to False.
"""
from prometheus_client import make_asgi_app
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import CommonProxyErrors
from litellm.proxy.proxy_server import app
if premium_user is not True:
verbose_proxy_logger.warning(
f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}"
)
# Create metrics ASGI app
if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
from prometheus_client import CollectorRegistry, multiprocess
@@ -16,14 +16,16 @@ from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheCont
from litellm.integrations.argilla import ArgillaLogger
from litellm.integrations.azure_storage.azure_storage import AzureBlobStorageLogger
from litellm.integrations.bitbucket import BitBucketPromptManager
from litellm.integrations.gitlab import GitLabPromptManager
from litellm.integrations.braintrust_logging import BraintrustLogger
from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
from litellm.integrations.datadog.datadog import DataDogLogger
from litellm.integrations.datadog.datadog_llm_obs import DataDogLLMObsLogger
from litellm.integrations.deepeval import DeepEvalLogger
from litellm.integrations.dotprompt import DotpromptManager
from litellm.integrations.galileo import GalileoObserve
from litellm.integrations.gcs_bucket.gcs_bucket import GCSBucketLogger
from litellm.integrations.gcs_pubsub.pub_sub import GcsPubSubLogger
from litellm.integrations.gitlab import GitLabPromptManager
from litellm.integrations.humanloop import HumanloopLogger
from litellm.integrations.lago import LagoLogger
from litellm.integrations.langfuse.langfuse_prompt_management import (
@@ -36,13 +38,7 @@ from litellm.integrations.openmeter import OpenMeterLogger
from litellm.integrations.opentelemetry import OpenTelemetry
from litellm.integrations.opik.opik import OpikLogger
from litellm.integrations.posthog import PostHogLogger
try:
from litellm_enterprise.integrations.prometheus import PrometheusLogger
except Exception:
PrometheusLogger = None
from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
from litellm.integrations.dotprompt import DotpromptManager
from litellm.integrations.prometheus import PrometheusLogger
from litellm.integrations.s3_v2 import S3Logger
from litellm.integrations.sqs import SQSLogger
from litellm.integrations.vector_store_integrations.vector_store_pre_call_hook import (
+87 -38
View File
@@ -58,6 +58,7 @@ from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.deepeval.deepeval import DeepEvalLogger
from litellm.integrations.mlflow import MlflowLogger
from litellm.integrations.prometheus import PrometheusLogger
from litellm.integrations.sqs import SQSLogger
from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
@@ -176,7 +177,6 @@ try:
from litellm_enterprise.enterprise_callbacks.send_emails.smtp_email import (
SMTPEmailLogger,
)
from litellm_enterprise.integrations.prometheus import PrometheusLogger
from litellm_enterprise.litellm_core_utils.litellm_logging import (
StandardLoggingPayloadSetup as EnterpriseStandardLoggingPayloadSetup,
)
@@ -194,7 +194,6 @@ except Exception as e:
PagerDutyAlerting = CustomLogger # type: ignore
EnterpriseCallbackControls = None # type: ignore
EnterpriseStandardLoggingPayloadSetupVAR = None
PrometheusLogger = None
_in_memory_loggers: List[Any] = []
### GLOBAL VARIABLES ###
@@ -1475,33 +1474,58 @@ class Logging(LiteLLMLoggingBaseClass):
if self.model_call_details["litellm_params"]["metadata"] is None:
self.model_call_details["litellm_params"]["metadata"] = {}
self.model_call_details["litellm_params"]["metadata"]["hidden_params"] = getattr(logging_result, "_hidden_params", {}) # type: ignore
if "response_cost" in hidden_params:
self.model_call_details["response_cost"] = hidden_params["response_cost"]
else:
self.model_call_details["response_cost"] = self._response_cost_calculator(result=logging_result)
self.model_call_details["standard_logging_object"] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=logging_result,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
self.model_call_details["response_cost"] = self._response_cost_calculator(
result=logging_result
)
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=logging_result,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
def _transform_usage_objects(self, result):
if isinstance(result, ResponsesAPIResponse):
result = result.model_copy()
transformed_usage = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(result.usage)
setattr(result, "usage", transformed_usage.model_dump() if hasattr(transformed_usage, "model_dump") else dict(transformed_usage))
if (standard_logging_payload := self.model_call_details.get("standard_logging_object")) is not None:
standard_logging_payload["response"] = result.model_dump() if hasattr(result, "model_dump") else dict(result)
transformed_usage = (
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
result.usage
)
)
setattr(
result,
"usage",
(
transformed_usage.model_dump()
if hasattr(transformed_usage, "model_dump")
else dict(transformed_usage)
),
)
if (
standard_logging_payload := self.model_call_details.get(
"standard_logging_object"
)
) is not None:
standard_logging_payload["response"] = (
result.model_dump()
if hasattr(result, "model_dump")
else dict(result)
)
elif isinstance(result, TranscriptionResponse):
from litellm.litellm_core_utils.llm_cost_calc.usage_object_transformation import (
TranscriptionUsageObjectTransformation,
)
result = result.model_copy()
transformed_usage = TranscriptionUsageObjectTransformation.transform_transcription_usage_object(result.usage) # type: ignore
setattr(result, "usage", transformed_usage)
@@ -1522,40 +1546,67 @@ class Logging(LiteLLMLoggingBaseClass):
end_time = datetime.datetime.now()
if self.completion_start_time is None:
self.completion_start_time = end_time
self.model_call_details["completion_start_time"] = self.completion_start_time
self.model_call_details["completion_start_time"] = (
self.completion_start_time
)
self.model_call_details["log_event_type"] = "successful_api_call"
self.model_call_details["end_time"] = end_time
self.model_call_details["cache_hit"] = cache_hit
if self.call_type == CallTypes.anthropic_messages.value:
result = self._handle_anthropic_messages_response_logging(result=result)
elif self.call_type == CallTypes.generate_content.value or self.call_type == CallTypes.agenerate_content.value:
result = self._handle_non_streaming_google_genai_generate_content_response_logging(result=result)
elif (
self.call_type == CallTypes.generate_content.value
or self.call_type == CallTypes.agenerate_content.value
):
result = self._handle_non_streaming_google_genai_generate_content_response_logging(
result=result
)
logging_result = self.normalize_logging_result(result=result)
if standard_logging_object is None and result is not None and self.stream is not True:
if self._is_recognized_call_type_for_logging(logging_result=logging_result):
self._process_hidden_params_and_response_cost(logging_result=logging_result, start_time=start_time, end_time=end_time)
elif isinstance(result, dict) or isinstance(result, list):
self.model_call_details["standard_logging_object"] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=result,
if (
standard_logging_object is None
and result is not None
and self.stream is not True
):
if self._is_recognized_call_type_for_logging(
logging_result=logging_result
):
self._process_hidden_params_and_response_cost(
logging_result=logging_result,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
elif isinstance(result, dict) or isinstance(result, list):
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=result,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
elif standard_logging_object is not None:
self.model_call_details["standard_logging_object"] = standard_logging_object
self.model_call_details["standard_logging_object"] = (
standard_logging_object
)
else:
self.model_call_details["response_cost"] = None
result = self._transform_usage_objects(result=result)
if litellm.max_budget and self.stream is False and result is not None and isinstance(result, dict) and "content" in result:
if (
litellm.max_budget
and self.stream is False
and result is not None
and isinstance(result, dict)
and "content" in result
):
time_diff = (end_time - start_time).total_seconds()
float_diff = float(time_diff)
litellm._current_cost += litellm.completion_cost(
@@ -3340,8 +3391,6 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
_in_memory_loggers.append(_literalai_logger)
return _literalai_logger # type: ignore
elif logging_integration == "prometheus":
if PrometheusLogger is None:
raise ValueError("PrometheusLogger is not initialized")
for callback in _in_memory_loggers:
if isinstance(callback, PrometheusLogger):
return callback # type: ignore
+38 -8
View File
@@ -129,7 +129,7 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
"parallel_tool_calls",
"response_format",
"user",
"web_search_options",
"web_search_options"
]
if "claude-3-7-sonnet" in model or supports_reasoning(
@@ -646,6 +646,16 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
)
return tools
def _ensure_context_management_beta_header(self, headers: dict) -> None:
beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
existing_beta = headers.get("anthropic-beta")
if existing_beta is None:
headers["anthropic-beta"] = beta_value
return
existing_values = [beta.strip() for beta in existing_beta.split(",")]
if beta_value not in existing_values:
headers["anthropic-beta"] = f"{existing_beta}, {beta_value}"
def update_headers_with_optional_anthropic_beta(
self, headers: dict, optional_params: dict
) -> dict:
@@ -661,9 +671,11 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
elif tool.get("type", None) and tool.get("type").startswith(
ANTHROPIC_HOSTED_TOOLS.MEMORY.value
):
headers["anthropic-beta"] = (
ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
)
headers[
"anthropic-beta"
] = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
if optional_params.get("context_management") is not None:
self._ensure_context_management_beta_header(headers)
return headers
def transform_request(
@@ -973,13 +985,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
):
text_content = prefix_prompt + text_content
context_management: Optional[Dict] = completion_response.get(
"context_management"
)
provider_specific_fields: Dict[str, Any] = {
"citations": citations,
"thinking_blocks": thinking_blocks,
}
if context_management is not None:
provider_specific_fields["context_management"] = context_management
_message = litellm.Message(
tool_calls=tool_calls,
content=text_content or None,
provider_specific_fields={
"citations": citations,
"thinking_blocks": thinking_blocks,
},
provider_specific_fields=provider_specific_fields,
thinking_blocks=thinking_blocks,
reasoning_content=reasoning_content,
)
@@ -1012,6 +1032,16 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
model_response.created = int(time.time())
model_response.model = completion_response["model"]
context_management_response = completion_response.get("context_management")
if context_management_response is not None:
_hidden_params["context_management"] = context_management_response
try:
model_response.__dict__["context_management"] = (
context_management_response
)
except Exception:
pass
model_response._hidden_params = _hidden_params
return model_response
@@ -6,7 +6,10 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.llms.base_llm.anthropic_messages.transformation import (
BaseAnthropicMessagesConfig,
)
from litellm.types.llms.anthropic import AnthropicMessagesRequest
from litellm.types.llms.anthropic import (
ANTHROPIC_BETA_HEADER_VALUES,
AnthropicMessagesRequest,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
@@ -32,6 +35,7 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
"tools",
"tool_choice",
"thinking",
"context_management",
# TODO: Add Anthropic `metadata` support
# "metadata",
]
@@ -71,6 +75,11 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
if "content-type" not in headers:
headers["content-type"] = "application/json"
headers = self._update_headers_with_optional_anthropic_beta(
headers=headers,
context_management=optional_params.get("context_management"),
)
return headers, api_base
def transform_anthropic_messages_request(
@@ -142,3 +151,18 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
request_body=request_body,
litellm_logging_obj=litellm_logging_obj,
)
@staticmethod
def _update_headers_with_optional_anthropic_beta(
headers: dict, context_management: Optional[Dict]
) -> dict:
if context_management is None:
return headers
existing_beta = headers.get("anthropic-beta")
beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
if existing_beta is None:
headers["anthropic-beta"] = beta_value
elif beta_value not in [beta.strip() for beta in existing_beta.split(",")]:
headers["anthropic-beta"] = f"{existing_beta}, {beta_value}"
return headers
@@ -0,0 +1,226 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
import httpx
from litellm.types.router import GenericLiteLLMParams
from litellm.types.vector_store_files import (
VectorStoreFileAuthCredentials,
VectorStoreFileChunkingStrategy,
VectorStoreFileContentResponse,
VectorStoreFileCreateRequest,
VectorStoreFileDeleteResponse,
VectorStoreFileListQueryParams,
VectorStoreFileListResponse,
VectorStoreFileObject,
VectorStoreFileUpdateRequest,
)
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
from ..chat.transformation import BaseLLMException as _BaseLLMException
LiteLLMLoggingObj = _LiteLLMLoggingObj
BaseLLMException = _BaseLLMException
else:
LiteLLMLoggingObj = Any
BaseLLMException = Any
class BaseVectorStoreFilesConfig(ABC):
"""Base configuration contract for provider-specific vector store file implementations."""
def get_supported_openai_params(
self,
operation: str,
) -> Tuple[str, ...]:
"""Return the set of OpenAI params supported for the given operation."""
return tuple()
def map_openai_params(
self,
*,
operation: str,
non_default_params: Dict[str, Any],
optional_params: Dict[str, Any],
drop_params: bool,
) -> Dict[str, Any]:
"""Map non-default OpenAI params to provider-specific params."""
return optional_params
@abstractmethod
def get_auth_credentials(
self, litellm_params: Dict[str, Any]
) -> VectorStoreFileAuthCredentials:
...
@abstractmethod
def get_vector_store_file_endpoints_by_type(self) -> Dict[
str, Tuple[Tuple[str, str], ...]
]:
...
@abstractmethod
def validate_environment(
self,
*,
headers: Dict[str, str],
litellm_params: Optional[GenericLiteLLMParams],
) -> Dict[str, str]:
return {}
@abstractmethod
def get_complete_url(
self,
*,
api_base: Optional[str],
vector_store_id: str,
litellm_params: Dict[str, Any],
) -> str:
if api_base is None:
raise ValueError("api_base is required")
return api_base
@abstractmethod
def transform_create_vector_store_file_request(
self,
*,
vector_store_id: str,
create_request: VectorStoreFileCreateRequest,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
...
@abstractmethod
def transform_create_vector_store_file_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileObject:
...
@abstractmethod
def transform_list_vector_store_files_request(
self,
*,
vector_store_id: str,
query_params: VectorStoreFileListQueryParams,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
...
@abstractmethod
def transform_list_vector_store_files_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileListResponse:
...
@abstractmethod
def transform_retrieve_vector_store_file_request(
self,
*,
vector_store_id: str,
file_id: str,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
...
@abstractmethod
def transform_retrieve_vector_store_file_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileObject:
...
@abstractmethod
def transform_retrieve_vector_store_file_content_request(
self,
*,
vector_store_id: str,
file_id: str,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
...
@abstractmethod
def transform_retrieve_vector_store_file_content_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileContentResponse:
...
@abstractmethod
def transform_update_vector_store_file_request(
self,
*,
vector_store_id: str,
file_id: str,
update_request: VectorStoreFileUpdateRequest,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
...
@abstractmethod
def transform_update_vector_store_file_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileObject:
...
@abstractmethod
def transform_delete_vector_store_file_request(
self,
*,
vector_store_id: str,
file_id: str,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
...
@abstractmethod
def transform_delete_vector_store_file_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileDeleteResponse:
...
def get_error_class(
self,
*,
error_message: str,
status_code: int,
headers: Union[Dict[str, Any], httpx.Headers],
) -> BaseLLMException:
from ..chat.transformation import BaseLLMException
raise BaseLLMException(
status_code=status_code,
message=error_message,
headers=headers,
)
def sign_request(
self,
*,
headers: Dict[str, str],
optional_params: Dict[str, Any],
request_data: Dict[str, Any],
api_base: str,
api_key: Optional[str] = None,
) -> Tuple[Dict[str, str], Optional[bytes]]:
return headers, None
def prepare_chunking_strategy(
self,
chunking_strategy: Optional[VectorStoreFileChunkingStrategy],
) -> Optional[VectorStoreFileChunkingStrategy]:
return chunking_strategy
@@ -48,6 +48,9 @@ from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfi
from litellm.llms.base_llm.search.transformation import BaseSearchConfig, SearchResponse
from litellm.llms.base_llm.text_to_speech.transformation import BaseTextToSpeechConfig
from litellm.llms.base_llm.vector_store.transformation import BaseVectorStoreConfig
from litellm.llms.base_llm.vector_store_files.transformation import (
BaseVectorStoreFilesConfig,
)
from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
@@ -92,6 +95,15 @@ from litellm.types.vector_stores import (
VectorStoreSearchOptionalRequestParams,
VectorStoreSearchResponse,
)
from litellm.types.vector_store_files import (
VectorStoreFileContentResponse,
VectorStoreFileCreateRequest,
VectorStoreFileDeleteResponse,
VectorStoreFileListQueryParams,
VectorStoreFileListResponse,
VectorStoreFileObject,
VectorStoreFileUpdateRequest,
)
from litellm.types.videos.main import VideoObject
from litellm.utils import (
CustomStreamWrapper,
@@ -3529,6 +3541,7 @@ class BaseLLMHTTPHandler:
BaseImageEditConfig,
BaseImageGenerationConfig,
BaseVectorStoreConfig,
BaseVectorStoreFilesConfig,
BaseGoogleGenAIGenerateContentConfig,
BaseAnthropicMessagesConfig,
BaseBatchesConfig,
@@ -6000,6 +6013,909 @@ class BaseLLMHTTPHandler:
response=response,
)
#####################################################################
################ Vector Store Files HANDLERS ########################
#####################################################################
async def async_vector_store_file_create_handler(
self,
*,
vector_store_id: str,
create_request: VectorStoreFileCreateRequest,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> VectorStoreFileObject:
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
request_dict = dict(create_request)
if extra_body:
request_dict.update(extra_body)
(
url,
request_body,
) = vector_store_files_provider_config.transform_create_vector_store_file_request(
vector_store_id=vector_store_id,
create_request=cast(VectorStoreFileCreateRequest, request_dict),
api_base=api_base,
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_body,
"api_base": api_base,
"headers": headers,
},
)
try:
response = await async_httpx_client.post(
url=url, headers=headers, json=request_body, timeout=timeout
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_create_vector_store_file_response(
response=response
)
def vector_store_file_create_handler(
self,
*,
vector_store_id: str,
create_request: VectorStoreFileCreateRequest,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
if _is_async:
return self.async_vector_store_file_create_handler(
vector_store_id=vector_store_id,
create_request=create_request,
vector_store_files_provider_config=vector_store_files_provider_config,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
sync_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
request_dict = dict(create_request)
if extra_body:
request_dict.update(extra_body)
(
url,
request_body,
) = vector_store_files_provider_config.transform_create_vector_store_file_request(
vector_store_id=vector_store_id,
create_request=cast(VectorStoreFileCreateRequest, request_dict),
api_base=api_base,
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_body,
"api_base": api_base,
"headers": headers,
},
)
try:
response = sync_httpx_client.post(
url=url, headers=headers, json=request_body, timeout=timeout
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_create_vector_store_file_response(
response=response
)
async def async_vector_store_file_list_handler(
self,
*,
vector_store_id: str,
query_params: VectorStoreFileListQueryParams,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> VectorStoreFileListResponse:
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
params_dict = dict(query_params)
if extra_query:
params_dict.update(extra_query)
(
url,
request_params,
) = vector_store_files_provider_config.transform_list_vector_store_files_request(
vector_store_id=vector_store_id,
query_params=cast(VectorStoreFileListQueryParams, params_dict),
api_base=api_base,
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_params,
"api_base": api_base,
"headers": headers,
},
)
try:
response = await async_httpx_client.get(
url=url, headers=headers, params=request_params
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_list_vector_store_files_response(
response=response
)
def vector_store_file_list_handler(
self,
*,
vector_store_id: str,
query_params: VectorStoreFileListQueryParams,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[
VectorStoreFileListResponse, Coroutine[Any, Any, VectorStoreFileListResponse]
]:
if _is_async:
return self.async_vector_store_file_list_handler(
vector_store_id=vector_store_id,
query_params=query_params,
vector_store_files_provider_config=vector_store_files_provider_config,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=logging_obj,
extra_headers=extra_headers,
extra_query=extra_query,
timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
sync_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
params_dict = dict(query_params)
if extra_query:
params_dict.update(extra_query)
(
url,
request_params,
) = vector_store_files_provider_config.transform_list_vector_store_files_request(
vector_store_id=vector_store_id,
query_params=cast(VectorStoreFileListQueryParams, params_dict),
api_base=api_base,
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_params,
"api_base": api_base,
"headers": headers,
},
)
try:
response = sync_httpx_client.get(
url=url, headers=headers, params=request_params
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_list_vector_store_files_response(
response=response
)
async def async_vector_store_file_retrieve_handler(
self,
*,
vector_store_id: str,
file_id: str,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> VectorStoreFileObject:
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
url, request_params = (
vector_store_files_provider_config.transform_retrieve_vector_store_file_request(
vector_store_id=vector_store_id,
file_id=file_id,
api_base=api_base,
)
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_params,
"api_base": api_base,
"headers": headers,
},
)
try:
response = await async_httpx_client.get(
url=url, headers=headers, params=request_params
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_retrieve_vector_store_file_response(
response=response
)
def vector_store_file_retrieve_handler(
self,
*,
vector_store_id: str,
file_id: str,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
if _is_async:
return self.async_vector_store_file_retrieve_handler(
vector_store_id=vector_store_id,
file_id=file_id,
vector_store_files_provider_config=vector_store_files_provider_config,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=logging_obj,
extra_headers=extra_headers,
timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
sync_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
url, request_params = (
vector_store_files_provider_config.transform_retrieve_vector_store_file_request(
vector_store_id=vector_store_id,
file_id=file_id,
api_base=api_base,
)
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_params,
"api_base": api_base,
"headers": headers,
},
)
try:
response = sync_httpx_client.get(
url=url, headers=headers, params=request_params
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_retrieve_vector_store_file_response(
response=response
)
async def async_vector_store_file_content_handler(
self,
*,
vector_store_id: str,
file_id: str,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> VectorStoreFileContentResponse:
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
url, request_params = (
vector_store_files_provider_config.transform_retrieve_vector_store_file_content_request(
vector_store_id=vector_store_id,
file_id=file_id,
api_base=api_base,
)
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_params,
"api_base": api_base,
"headers": headers,
},
)
try:
response = await async_httpx_client.get(
url=url, headers=headers, params=request_params
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_retrieve_vector_store_file_content_response(
response=response
)
def vector_store_file_content_handler(
self,
*,
vector_store_id: str,
file_id: str,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[
VectorStoreFileContentResponse,
Coroutine[Any, Any, VectorStoreFileContentResponse],
]:
if _is_async:
return self.async_vector_store_file_content_handler(
vector_store_id=vector_store_id,
file_id=file_id,
vector_store_files_provider_config=vector_store_files_provider_config,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=logging_obj,
extra_headers=extra_headers,
timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
sync_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
url, request_params = (
vector_store_files_provider_config.transform_retrieve_vector_store_file_content_request(
vector_store_id=vector_store_id,
file_id=file_id,
api_base=api_base,
)
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_params,
"api_base": api_base,
"headers": headers,
},
)
try:
response = sync_httpx_client.get(
url=url, headers=headers, params=request_params
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_retrieve_vector_store_file_content_response(
response=response
)
async def async_vector_store_file_update_handler(
self,
*,
vector_store_id: str,
file_id: str,
update_request: VectorStoreFileUpdateRequest,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> VectorStoreFileObject:
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
request_dict = dict(update_request)
if extra_body:
request_dict.update(extra_body)
(
url,
request_body,
) = vector_store_files_provider_config.transform_update_vector_store_file_request(
vector_store_id=vector_store_id,
file_id=file_id,
update_request=cast(VectorStoreFileUpdateRequest, request_dict),
api_base=api_base,
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_body,
"api_base": api_base,
"headers": headers,
},
)
try:
response = await async_httpx_client.post(
url=url, headers=headers, json=request_body, timeout=timeout
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_update_vector_store_file_response(
response=response
)
def vector_store_file_update_handler(
self,
*,
vector_store_id: str,
file_id: str,
update_request: VectorStoreFileUpdateRequest,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
if _is_async:
return self.async_vector_store_file_update_handler(
vector_store_id=vector_store_id,
file_id=file_id,
update_request=update_request,
vector_store_files_provider_config=vector_store_files_provider_config,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
sync_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
request_dict = dict(update_request)
if extra_body:
request_dict.update(extra_body)
(
url,
request_body,
) = vector_store_files_provider_config.transform_update_vector_store_file_request(
vector_store_id=vector_store_id,
file_id=file_id,
update_request=cast(VectorStoreFileUpdateRequest, request_dict),
api_base=api_base,
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_body,
"api_base": api_base,
"headers": headers,
},
)
try:
response = sync_httpx_client.post(
url=url, headers=headers, json=request_body, timeout=timeout
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_update_vector_store_file_response(
response=response
)
async def async_vector_store_file_delete_handler(
self,
*,
vector_store_id: str,
file_id: str,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> VectorStoreFileDeleteResponse:
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
url, request_params = (
vector_store_files_provider_config.transform_delete_vector_store_file_request(
vector_store_id=vector_store_id,
file_id=file_id,
api_base=api_base,
)
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_params,
"api_base": api_base,
"headers": headers,
},
)
try:
response = await async_httpx_client.delete(
url=url, headers=headers, params=request_params, timeout=timeout
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_delete_vector_store_file_response(
response=response
)
def vector_store_file_delete_handler(
self,
*,
vector_store_id: str,
file_id: str,
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[
VectorStoreFileDeleteResponse,
Coroutine[Any, Any, VectorStoreFileDeleteResponse],
]:
if _is_async:
return self.async_vector_store_file_delete_handler(
vector_store_id=vector_store_id,
file_id=file_id,
vector_store_files_provider_config=vector_store_files_provider_config,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=logging_obj,
extra_headers=extra_headers,
timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
sync_httpx_client = client
headers = vector_store_files_provider_config.validate_environment(
headers=extra_headers or {}, litellm_params=litellm_params
)
if extra_headers:
headers.update(extra_headers)
api_base = vector_store_files_provider_config.get_complete_url(
api_base=litellm_params.api_base,
vector_store_id=vector_store_id,
litellm_params=dict(litellm_params),
)
url, request_params = (
vector_store_files_provider_config.transform_delete_vector_store_file_request(
vector_store_id=vector_store_id,
file_id=file_id,
api_base=api_base,
)
)
logging_obj.pre_call(
input="",
api_key="",
additional_args={
"complete_input_dict": request_params,
"api_base": api_base,
"headers": headers,
},
)
try:
response = sync_httpx_client.delete(
url=url, headers=headers, params=request_params, timeout=timeout
)
except Exception as e:
raise self._handle_error(
e=e, provider_config=vector_store_files_provider_config
)
return vector_store_files_provider_config.transform_delete_vector_store_file_response(
response=response
)
#####################################################################
################ Google GenAI GENERATE CONTENT HANDLER ###########################
#####################################################################
+2
View File
@@ -2,6 +2,7 @@ from .cost_calculator import cost_calculator
from .image_generation import (
FalAIBaseConfig,
FalAIBriaConfig,
FalAIFluxProV11Config,
FalAIFluxProV11UltraConfig,
FalAIFluxSchnellConfig,
FalAIImageGenerationConfig,
@@ -18,6 +19,7 @@ __all__ = [
"FalAIImagen4Config",
"FalAIRecraftV3Config",
"FalAIBriaConfig",
"FalAIFluxProV11Config",
"FalAIFluxProV11UltraConfig",
"FalAIFluxSchnellConfig",
"FalAIStableDiffusionConfig",
@@ -3,12 +3,18 @@ from litellm.llms.base_llm.image_generation.transformation import (
)
from .bria_transformation import FalAIBriaConfig
from .flux_pro_v11_transformation import FalAIFluxProV11Config
from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
from .flux_schnell_transformation import FalAIFluxSchnellConfig
from .imagen4_transformation import FalAIImagen4Config
from .recraft_v3_transformation import FalAIRecraftV3Config
from .ideogram_v3_transformation import FalAIIdeogramV3Config
from .stable_diffusion_transformation import FalAIStableDiffusionConfig
from .transformation import FalAIBaseConfig, FalAIImageGenerationConfig
from .bytedance_transformation import (
FalAIBytedanceSeedreamV3Config,
FalAIBytedanceDreaminaV31Config,
)
__all__ = [
"FalAIBaseConfig",
@@ -16,9 +22,13 @@ __all__ = [
"FalAIImagen4Config",
"FalAIRecraftV3Config",
"FalAIBriaConfig",
"FalAIFluxProV11Config",
"FalAIFluxProV11UltraConfig",
"FalAIFluxSchnellConfig",
"FalAIStableDiffusionConfig",
"FalAIBytedanceSeedreamV3Config",
"FalAIBytedanceDreaminaV31Config",
"FalAIIdeogramV3Config",
]
@@ -41,10 +51,18 @@ def get_fal_ai_image_generation_config(model: str) -> BaseImageGenerationConfig:
return FalAIRecraftV3Config()
elif "bria" in model_lower:
return FalAIBriaConfig()
elif "flux-pro" in model_lower and "ultra" in model_lower:
return FalAIFluxProV11UltraConfig()
elif "flux-pro" in model_lower:
if "ultra" in model_lower:
return FalAIFluxProV11UltraConfig()
return FalAIFluxProV11Config()
elif "flux/schnell" in model_lower or "flux-schnell" in model_lower or "schnell" in model_lower:
return FalAIFluxSchnellConfig()
elif "bytedance/seedream" in model_lower:
return FalAIBytedanceSeedreamV3Config()
elif "bytedance/dreamina" in model_lower:
return FalAIBytedanceDreaminaV31Config()
elif "ideogram" in model_lower:
return FalAIIdeogramV3Config()
elif "stable-diffusion" in model_lower:
return FalAIStableDiffusionConfig()
@@ -0,0 +1,106 @@
from typing import Any
from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
class FalAIBytedanceBaseConfig(FalAIFluxProV11UltraConfig):
"""
Shared configuration for Fal AI ByteDance text-to-image models that follow
the Flux Schnell style parameter mapping.
These models accept the OpenAI-compatible `size` parameter in LiteLLM
requests but expect `image_size` enums or custom size objects on Fal AI.
"""
_OPENAI_SIZE_TO_IMAGE_SIZE = {
"1024x1024": "square_hd",
"512x512": "square",
"1792x1024": "landscape_16_9",
"1024x1792": "portrait_16_9",
"1024x768": "landscape_4_3",
"768x1024": "portrait_4_3",
}
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
supported_params = self.get_supported_openai_params(model)
param_mapping = {
"n": "num_images",
"response_format": "output_format",
"size": "image_size",
}
for k in non_default_params.keys():
if k not in optional_params.keys():
if k in supported_params:
mapped_key = param_mapping.get(k, k)
mapped_value = non_default_params[k]
if k == "response_format":
if mapped_value in ["b64_json", "url"]:
mapped_value = "jpeg"
elif k == "size":
mapped_value = self._map_image_size(mapped_value)
optional_params[mapped_key] = mapped_value
elif drop_params:
continue
else:
raise ValueError(
f"Parameter {k} is not supported for model {model}. "
f"Supported parameters are {supported_params}. "
"Set drop_params=True to drop unsupported parameters."
)
return optional_params
def _map_image_size(self, size: Any) -> Any:
if isinstance(size, dict):
return size
if not isinstance(size, str):
return size
if size in self._OPENAI_SIZE_TO_IMAGE_SIZE:
return self._OPENAI_SIZE_TO_IMAGE_SIZE[size]
if "x" in size:
try:
width_str, height_str = size.split("x")
width = int(width_str)
height = int(height_str)
return {"width": width, "height": height}
except (ValueError, AttributeError, ZeroDivisionError):
pass
return "landscape_4_3"
class FalAIBytedanceSeedreamV3Config(FalAIBytedanceBaseConfig):
"""
Configuration for Fal AI ByteDance Seedream v3 text-to-image model.
Model endpoint: fal-ai/bytedance/seedream/v3/text-to-image
Documentation: https://fal.ai/models/fal-ai/bytedance/seedream/v3/text-to-image
"""
IMAGE_GENERATION_ENDPOINT: str = "fal-ai/bytedance/seedream/v3/text-to-image"
class FalAIBytedanceDreaminaV31Config(FalAIBytedanceBaseConfig):
"""
Configuration for Fal AI ByteDance Dreamina v3.1 text-to-image model.
Model endpoint: fal-ai/bytedance/dreamina/v3.1/text-to-image
Documentation: https://fal.ai/models/fal-ai/bytedance/dreamina/v3.1/text-to-image
"""
IMAGE_GENERATION_ENDPOINT: str = "fal-ai/bytedance/dreamina/v3.1/text-to-image"
@@ -0,0 +1,91 @@
from typing import Any
from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
class FalAIFluxProV11Config(FalAIFluxProV11UltraConfig):
"""
Configuration for Fal AI Flux Pro v1.1 model.
FLUX Pro v1.1 leverages the same overall request/response structure as the
Ultra variant but expects the `image_size` parameter instead of
`aspect_ratio`.
Model endpoint: fal-ai/flux-pro/v1.1
Documentation: https://fal.ai/models/fal-ai/flux-pro/v1.1
"""
IMAGE_GENERATION_ENDPOINT: str = "fal-ai/flux-pro/v1.1"
_OPENAI_SIZE_TO_IMAGE_SIZE = {
"1024x1024": "square_hd",
"512x512": "square",
"1792x1024": "landscape_16_9",
"1024x1792": "portrait_16_9",
"1024x768": "landscape_4_3",
"768x1024": "portrait_4_3",
}
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
"""
Override size handling to map to Flux Pro v1.1 image_size enums/object.
"""
supported_params = self.get_supported_openai_params(model)
param_mapping = {
"n": "num_images",
"response_format": "output_format",
"size": "image_size",
}
for k in non_default_params.keys():
if k not in optional_params.keys():
if k in supported_params:
mapped_key = param_mapping.get(k, k)
mapped_value = non_default_params[k]
if k == "response_format":
if mapped_value in ["b64_json", "url"]:
mapped_value = "jpeg"
elif k == "size":
mapped_value = self._map_image_size(mapped_value)
optional_params[mapped_key] = mapped_value
elif drop_params:
continue
else:
raise ValueError(
f"Parameter {k} is not supported for model {model}. "
f"Supported parameters are {supported_params}. "
"Set drop_params=True to drop unsupported parameters."
)
return optional_params
def _map_image_size(self, size: Any) -> Any:
if isinstance(size, dict):
return size
if not isinstance(size, str):
return size
if size in self._OPENAI_SIZE_TO_IMAGE_SIZE:
return self._OPENAI_SIZE_TO_IMAGE_SIZE[size]
if "x" in size:
try:
width_str, height_str = size.split("x")
width = int(width_str)
height = int(height_str)
return {"width": width, "height": height}
except (ValueError, AttributeError, ZeroDivisionError):
pass
return "landscape_4_3"
@@ -0,0 +1,193 @@
from typing import TYPE_CHECKING, Any, List, Optional
import httpx
from litellm.types.llms.openai import OpenAIImageGenerationOptionalParams
from litellm.types.utils import ImageObject, ImageResponse
from .transformation import FalAIBaseConfig
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
LiteLLMLoggingObj = _LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any
class FalAIIdeogramV3Config(FalAIBaseConfig):
"""
Configuration for fal-ai/ideogram/v3 image generation.
The Ideogram v3 endpoint exposes multiple generation modes (text-to-image,
remixing, reframing, background replacement, character workflows, etc.).
LiteLLM focuses on the text-to-image interface to maintain OpenAI parity.
Model endpoint: fal-ai/ideogram/v3
Documentation: https://fal.ai/models/fal-ai/ideogram/v3
"""
IMAGE_GENERATION_ENDPOINT: str = "fal-ai/ideogram/v3"
_OPENAI_SIZE_TO_IMAGE_SIZE = {
"1024x1024": "square_hd",
"512x512": "square",
"1024x768": "landscape_4_3",
"768x1024": "portrait_4_3",
"1536x1024": "landscape_16_9",
"1024x1536": "portrait_16_9",
}
def get_supported_openai_params(
self, model: str
) -> List[OpenAIImageGenerationOptionalParams]:
"""
Ideogram v3 accepts the core OpenAI image parameters.
"""
return [
"n",
"response_format",
"size",
]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
"""
Map OpenAI-style parameters onto Ideogram's request schema.
"""
supported_params = self.get_supported_openai_params(model)
for k in non_default_params.keys():
if k in optional_params:
continue
if k not in supported_params:
if drop_params:
continue
raise ValueError(
f"Parameter {k} is not supported for model {model}. "
f"Supported parameters are {supported_params}. "
"Set drop_params=True to drop unsupported parameters."
)
value = non_default_params[k]
if k == "n":
optional_params["num_images"] = value
elif k == "size":
optional_params["image_size"] = self._map_image_size(value)
elif k == "response_format":
# Ideogram always returns URLs; nothing to map but don't error.
continue
return optional_params
def _map_image_size(self, size: Any) -> Any:
if isinstance(size, dict):
width = size.get("width")
height = size.get("height")
if isinstance(width, int) and isinstance(height, int):
return {"width": width, "height": height}
return size
if not isinstance(size, str):
return size
normalized = size.strip()
if normalized in self._OPENAI_SIZE_TO_IMAGE_SIZE:
return self._OPENAI_SIZE_TO_IMAGE_SIZE[normalized]
if "x" in normalized:
try:
width_str, height_str = normalized.split("x")
width = int(width_str)
height = int(height_str)
return {"width": width, "height": height}
except (ValueError, AttributeError):
pass
# Fallback to a safe default that Ideogram accepts.
return "square_hd"
def transform_image_generation_request(
self,
model: str,
prompt: str,
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
"""
Construct the request payload for Ideogram v3.
Required:
- prompt: text prompt describing the scene.
Optional (subset):
- rendering_speed, style_preset, style, style_codes, color_palette,
image_urls, style_reference_images, expand_prompt, seed,
negative_prompt, image_size, etc.
"""
return {
"prompt": prompt,
**optional_params,
}
def transform_image_generation_response(
self,
model: str,
raw_response: httpx.Response,
model_response: ImageResponse,
logging_obj: LiteLLMLoggingObj,
request_data: dict,
optional_params: dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ImageResponse:
"""
Parse Ideogram v3 responses which contain a list of File objects.
"""
try:
response_data = raw_response.json()
except Exception as e:
raise self.get_error_class(
error_message=f"Error transforming image generation response: {e}",
status_code=raw_response.status_code,
headers=raw_response.headers,
)
if not model_response.data:
model_response.data = []
images = response_data.get("images", [])
if isinstance(images, list):
for image_entry in images:
if isinstance(image_entry, dict):
url = image_entry.get("url")
else:
url = image_entry
model_response.data.append(
ImageObject(
url=url,
b64_json=None,
)
)
if hasattr(model_response, "_hidden_params") and "seed" in response_data:
model_response._hidden_params["seed"] = response_data["seed"]
return model_response
@@ -30,7 +30,7 @@ class OpenAIGPT5Config(OpenAIGPTConfig):
from litellm.utils import supports_tool_choice
base_gpt_series_params = super().get_supported_openai_params(model=model)
gpt_5_only_params = ["reasoning_effort"]
gpt_5_only_params = ["reasoning_effort", "verbosity"]
base_gpt_series_params.extend(gpt_5_only_params)
if not supports_tool_choice(model=model):
base_gpt_series_params.remove("tool_choice")
@@ -0,0 +1,258 @@
from typing import Any, Dict, Optional, Tuple, cast
import httpx
import litellm
from litellm.llms.base_llm.vector_store_files.transformation import (
BaseVectorStoreFilesConfig,
)
from litellm.secret_managers.main import get_secret_str
from litellm.types.router import GenericLiteLLMParams
from litellm.types.vector_store_files import (
VectorStoreFileAuthCredentials,
VectorStoreFileContentResponse,
VectorStoreFileCreateRequest,
VectorStoreFileDeleteResponse,
VectorStoreFileListQueryParams,
VectorStoreFileListResponse,
VectorStoreFileObject,
VectorStoreFileUpdateRequest,
)
from litellm.utils import add_openai_metadata
def _clean_dict(source: Dict[str, Any]) -> Dict[str, Any]:
return {k: v for k, v in source.items() if v is not None}
class OpenAIVectorStoreFilesConfig(BaseVectorStoreFilesConfig):
ASSISTANTS_HEADER_KEY = "OpenAI-Beta"
ASSISTANTS_HEADER_VALUE = "assistants=v2"
def get_auth_credentials(
self, litellm_params: Dict[str, Any]
) -> VectorStoreFileAuthCredentials:
api_key = litellm_params.get("api_key")
if api_key is None:
raise ValueError("api_key is required")
return {
"headers": {
"Authorization": f"Bearer {api_key}",
}
}
def get_vector_store_file_endpoints_by_type(self) -> Dict[
str, Tuple[Tuple[str, str], ...]
]:
return {
"read": (
("GET", "/vector_stores/{vector_store_id}/files"),
("GET", "/vector_stores/{vector_store_id}/files/{file_id}"),
(
"GET",
"/vector_stores/{vector_store_id}/files/{file_id}/content",
),
),
"write": (
("POST", "/vector_stores/{vector_store_id}/files"),
("POST", "/vector_stores/{vector_store_id}/files/{file_id}"),
("DELETE", "/vector_stores/{vector_store_id}/files/{file_id}"),
),
}
def validate_environment(
self,
*,
headers: Dict[str, str],
litellm_params: Optional[GenericLiteLLMParams],
) -> Dict[str, str]:
litellm_params = litellm_params or GenericLiteLLMParams()
api_key = (
litellm_params.api_key
or litellm.api_key
or litellm.openai_key
or get_secret_str("OPENAI_API_KEY")
)
headers.update(
{
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
)
if self.ASSISTANTS_HEADER_KEY not in headers:
headers[self.ASSISTANTS_HEADER_KEY] = self.ASSISTANTS_HEADER_VALUE
return headers
def get_complete_url(
self,
*,
api_base: Optional[str],
vector_store_id: str,
litellm_params: Dict[str, Any],
) -> str:
base_url = (
api_base
or litellm.api_base
or get_secret_str("OPENAI_BASE_URL")
or get_secret_str("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
base_url = base_url.rstrip("/")
return f"{base_url}/vector_stores/{vector_store_id}/files"
def transform_create_vector_store_file_request(
self,
*,
vector_store_id: str,
create_request: VectorStoreFileCreateRequest,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
payload: Dict[str, Any] = _clean_dict(dict(create_request))
attributes = payload.get("attributes")
if isinstance(attributes, dict):
filtered_attributes = add_openai_metadata(attributes)
if filtered_attributes is not None:
payload["attributes"] = filtered_attributes
else:
payload.pop("attributes", None)
url = api_base
return url, payload
def transform_create_vector_store_file_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileObject:
try:
return cast(VectorStoreFileObject, response.json())
except Exception as exc: # noqa: BLE001
raise self.get_error_class(
error_message=str(exc),
status_code=response.status_code,
headers=response.headers,
)
def transform_list_vector_store_files_request(
self,
*,
vector_store_id: str,
query_params: VectorStoreFileListQueryParams,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
params = _clean_dict(dict(query_params))
return api_base, params
def transform_list_vector_store_files_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileListResponse:
try:
return cast(VectorStoreFileListResponse, response.json())
except Exception as exc: # noqa: BLE001
raise self.get_error_class(
error_message=str(exc),
status_code=response.status_code,
headers=response.headers,
)
def transform_retrieve_vector_store_file_request(
self,
*,
vector_store_id: str,
file_id: str,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
return f"{api_base}/{file_id}", {}
def transform_retrieve_vector_store_file_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileObject:
try:
return cast(VectorStoreFileObject, response.json())
except Exception as exc: # noqa: BLE001
raise self.get_error_class(
error_message=str(exc),
status_code=response.status_code,
headers=response.headers,
)
def transform_retrieve_vector_store_file_content_request(
self,
*,
vector_store_id: str,
file_id: str,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
return f"{api_base}/{file_id}/content", {}
def transform_retrieve_vector_store_file_content_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileContentResponse:
try:
return cast(VectorStoreFileContentResponse, response.json())
except Exception as exc: # noqa: BLE001
raise self.get_error_class(
error_message=str(exc),
status_code=response.status_code,
headers=response.headers,
)
def transform_update_vector_store_file_request(
self,
*,
vector_store_id: str,
file_id: str,
update_request: VectorStoreFileUpdateRequest,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
payload: Dict[str, Any] = dict(update_request)
attributes = payload.get("attributes")
if isinstance(attributes, dict):
filtered_attributes = add_openai_metadata(attributes)
if filtered_attributes is not None:
payload["attributes"] = filtered_attributes
else:
payload.pop("attributes", None)
return f"{api_base}/{file_id}", payload
def transform_update_vector_store_file_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileObject:
try:
return cast(VectorStoreFileObject, response.json())
except Exception as exc: # noqa: BLE001
raise self.get_error_class(
error_message=str(exc),
status_code=response.status_code,
headers=response.headers,
)
def transform_delete_vector_store_file_request(
self,
*,
vector_store_id: str,
file_id: str,
api_base: str,
) -> Tuple[str, Dict[str, Any]]:
return f"{api_base}/{file_id}", {}
def transform_delete_vector_store_file_response(
self,
*,
response: httpx.Response,
) -> VectorStoreFileDeleteResponse:
try:
return cast(VectorStoreFileDeleteResponse, response.json())
except Exception as exc: # noqa: BLE001
raise self.get_error_class(
error_message=str(exc),
status_code=response.status_code,
headers=response.headers,
)
@@ -145,6 +145,8 @@ class OpenAIVectorStoreConfig(BaseVectorStoreConfig):
) -> Tuple[str, Dict]:
url = api_base # Base URL for creating vector stores
metadata = vector_store_create_optional_params.get("metadata", None)
metadata_payload = add_openai_metadata(metadata)
typed_request_body = VectorStoreCreateRequest(
name=vector_store_create_optional_params.get("name", None),
file_ids=vector_store_create_optional_params.get("file_ids", None),
@@ -154,7 +156,7 @@ class OpenAIVectorStoreConfig(BaseVectorStoreConfig):
chunking_strategy=vector_store_create_optional_params.get(
"chunking_strategy", None
),
metadata=add_openai_metadata(metadata) if metadata is not None else None,
metadata=metadata_payload,
)
dict_request_body = cast(dict, typed_request_body)
+5 -5
View File
@@ -178,10 +178,10 @@ class OpenAIVideoConfig(BaseVideoConfig):
# Construct the URL for video content download
url = f"{api_base.rstrip('/')}/{original_video_id}/content"
# Add video_id as query parameter
params = {"video_id": original_video_id}
return url, params
# No additional data needed for GET content request
data: Dict[str, Any] = {}
return url, data
def transform_video_remix_request(
self,
@@ -404,4 +404,4 @@ class OpenAIVideoConfig(BaseVideoConfig):
if isinstance(image, BufferedReader):
files_list.append((field_name, (image.name, image, image_content_type)))
else:
files_list.append((field_name, ("input_reference.png", image, image_content_type)))
files_list.append((field_name, ("input_reference.png", image, image_content_type)))
+6 -4
View File
@@ -390,6 +390,7 @@ async def acompletion(
reasoning_effort: Optional[
Literal["none", "minimal", "low", "medium", "high", "default"]
] = None,
verbosity: Optional[Literal["low", "medium", "high"]] = None,
safety_identifier: Optional[str] = None,
service_tier: Optional[str] = None,
# set api_base, api_version, api_key
@@ -961,6 +962,7 @@ def completion( # type: ignore # noqa: PLR0915
reasoning_effort: Optional[
Literal["none", "minimal", "low", "medium", "high", "default"]
] = None,
verbosity: Optional[Literal["low", "medium", "high"]] = None,
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
seed: Optional[int] = None,
tools: Optional[List] = None,
@@ -2084,10 +2086,10 @@ def completion( # type: ignore # noqa: PLR0915
if extra_headers is not None:
optional_params["extra_headers"] = extra_headers
if (
litellm.enable_preview_features and metadata is not None
): # [PREVIEW] allow metadata to be passed to OPENAI
optional_params["metadata"] = add_openai_metadata(metadata)
if litellm.enable_preview_features:
metadata_payload = add_openai_metadata(metadata)
if metadata_payload is not None:
optional_params["metadata"] = metadata_payload
## LOAD CONFIG - if set
config = litellm.OpenAIConfig.get_config()
@@ -8515,10 +8515,18 @@
"/v1/images/generations"
]
},
"fal_ai/fal-ai/flux-pro/v1.1": {
"litellm_provider": "fal_ai",
"mode": "image_generation",
"output_cost_per_image": 0.04,
"supported_endpoints": [
"/v1/images/generations"
]
},
"fal_ai/fal-ai/flux-pro/v1.1-ultra": {
"litellm_provider": "fal_ai",
"mode": "image_generation",
"output_cost_per_image": 0.0398,
"output_cost_per_image": 0.06,
"supported_endpoints": [
"/v1/images/generations"
]
@@ -8531,6 +8539,30 @@
"/v1/images/generations"
]
},
"fal_ai/fal-ai/bytedance/seedream/v3/text-to-image": {
"litellm_provider": "fal_ai",
"mode": "image_generation",
"output_cost_per_image": 0.03,
"supported_endpoints": [
"/v1/images/generations"
]
},
"fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image": {
"litellm_provider": "fal_ai",
"mode": "image_generation",
"output_cost_per_image": 0.03,
"supported_endpoints": [
"/v1/images/generations"
]
},
"fal_ai/fal-ai/ideogram/v3": {
"litellm_provider": "fal_ai",
"mode": "image_generation",
"output_cost_per_image": 0.06,
"supported_endpoints": [
"/v1/images/generations"
]
},
"fal_ai/fal-ai/imagen4/preview": {
"litellm_provider": "fal_ai",
"mode": "image_generation",
@@ -10,9 +10,12 @@ import asyncio
import datetime
import hashlib
import json
from typing import Any, Dict, List, Optional, Set, Union, cast
import re
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
from urllib.parse import urlparse
from fastapi import HTTPException
from httpx import HTTPStatusError
from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
from mcp.types import CallToolResult
from mcp.types import Tool as MCPTool
@@ -20,6 +23,7 @@ from mcp.types import Tool as MCPTool
from litellm._logging import verbose_logger
from litellm.exceptions import BlockedPiiEntityError, GuardrailRaisedException
from litellm.experimental_mcp_client.client import MCPClient
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
from litellm.proxy._experimental.mcp_server.auth.user_api_key_auth_mcp import (
MCPRequestHandler,
)
@@ -38,12 +42,15 @@ from litellm.proxy._types import (
MCPTransportType,
UserAPIKeyAuth,
)
from litellm.proxy.common_utils.encrypt_decrypt_utils import (
decrypt_value_helper,
)
from litellm.proxy.common_utils.encrypt_decrypt_utils import decrypt_value_helper
from litellm.proxy.utils import ProxyLogging
from litellm.types.llms.custom_http import httpxSpecialProvider
from litellm.types.mcp import MCPAuth, MCPStdioConfig
from litellm.types.mcp_server.mcp_server_manager import MCPInfo, MCPServer
from litellm.types.mcp_server.mcp_server_manager import (
MCPInfo,
MCPOAuthMetadata,
MCPServer,
)
def _deserialize_json_dict(data: Any) -> Optional[Dict[str, str]]:
@@ -100,7 +107,7 @@ class MCPServerManager:
"""
return self.config_mcp_servers | self.registry
def load_servers_from_config(
async def load_servers_from_config(
self,
mcp_servers_config: Dict[str, Any],
mcp_aliases: Optional[Dict[str, str]] = None,
@@ -180,35 +187,57 @@ class MCPServerManager:
)()
name_for_prefix = get_server_prefix(temp_server)
server_url = server_config.get("url", None) or ""
# Generate stable server ID based on parameters
server_id = self._generate_stable_server_id(
server_name=server_name,
url=server_config.get("url", None) or "",
url=server_url,
transport=server_config.get("transport", MCPTransport.http),
auth_type=server_config.get("auth_type", None),
alias=alias,
)
auth_type = server_config.get("auth_type", None)
if server_url and auth_type is not None and auth_type == MCPAuth.oauth2:
mcp_oauth_metadata = await self._descovery_metadata(
server_url=server_url,
)
else:
mcp_oauth_metadata = None
resolved_scopes = server_config.get("scopes") or (
mcp_oauth_metadata.scopes if mcp_oauth_metadata else None
)
resolved_authorization_url = server_config.get("authorization_url") or (
mcp_oauth_metadata.authorization_url if mcp_oauth_metadata else None
)
resolved_token_url = server_config.get("token_url") or (
mcp_oauth_metadata.token_url if mcp_oauth_metadata else None
)
resolved_registration_url = server_config.get("registration_url") or (
mcp_oauth_metadata.registration_url if mcp_oauth_metadata else None
)
new_server = MCPServer(
server_id=server_id,
name=name_for_prefix,
alias=alias,
server_name=server_name,
spec_path=server_config.get("spec_path", None),
url=server_config.get("url", None) or "",
url=server_url,
command=server_config.get("command", None) or "",
args=server_config.get("args", None) or [],
env=server_config.get("env", None) or {},
# oauth specific fields
client_id=server_config.get("client_id", None),
client_secret=server_config.get("client_secret", None),
scopes=server_config.get("scopes", None),
authorization_url=server_config.get("authorization_url", None),
token_url=server_config.get("token_url", None),
registration_url=server_config.get("registration_url", None),
scopes=resolved_scopes,
authorization_url=resolved_authorization_url,
token_url=resolved_token_url,
registration_url=resolved_registration_url,
# TODO: utility fn the default values
transport=server_config.get("transport", MCPTransport.http),
auth_type=server_config.get("auth_type", None),
auth_type=auth_type,
authentication_token=server_config.get(
"authentication_token", server_config.get("auth_value", None)
),
@@ -356,12 +385,12 @@ class MCPServerManager:
)
# Update tool name to server name mapping (for both prefixed and base names)
self.tool_name_to_mcp_server_name_mapping[
base_tool_name
] = server_prefix
self.tool_name_to_mcp_server_name_mapping[
prefixed_tool_name
] = server_prefix
self.tool_name_to_mcp_server_name_mapping[base_tool_name] = (
server_prefix
)
self.tool_name_to_mcp_server_name_mapping[prefixed_tool_name] = (
server_prefix
)
registered_count += 1
verbose_logger.debug(
@@ -692,6 +721,252 @@ class MCPServerManager:
except Exception:
pass
async def _descovery_metadata(
self,
server_url: str,
) -> Optional[MCPOAuthMetadata]:
"""Discover OAuth metadata by following RFC 9728 (protected resource metadata discovery)."""
try:
client = get_async_httpx_client(llm_provider=httpxSpecialProvider.MCP)
response = await client.get(server_url)
response.raise_for_status()
verbose_logger.warning(
"MCP OAuth discovery unexpectedly succeeded for %s; server did not challenge",
server_url,
)
raise RuntimeError("OAuth discovery must not succeed without a challenge")
except HTTPStatusError as exc:
verbose_logger.debug(
"MCP OAuth discovery for %s received status error: %s",
server_url,
exc,
)
header_value: Optional[str] = None
if exc.response is not None:
header_value = exc.response.headers.get(
"WWW-Authenticate"
) or exc.response.headers.get("www-authenticate")
resource_metadata_url, scopes = self._parse_www_authenticate_header(
header_value
)
authorization_servers: List[str] = []
resource_scopes: Optional[List[str]] = None
if resource_metadata_url:
(
authorization_servers,
resource_scopes,
) = await self._fetch_oauth_metadata_from_resource(
resource_metadata_url
)
else:
(
authorization_servers,
resource_scopes,
) = await self._attempt_well_known_discovery(server_url)
metadata = None
if not authorization_servers:
try:
parsed_url = urlparse(server_url)
if parsed_url.scheme and parsed_url.netloc:
authorization_servers = [
f"{parsed_url.scheme}://{parsed_url.netloc}"
]
except Exception:
authorization_servers = []
if authorization_servers:
metadata = await self._fetch_authorization_server_metadata(
authorization_servers
)
preferred_scopes = scopes or resource_scopes
if metadata is None and preferred_scopes:
metadata = MCPOAuthMetadata(scopes=preferred_scopes)
elif metadata is not None and preferred_scopes:
metadata.scopes = preferred_scopes
return metadata
except Exception as exc: # pragma: no cover - network/transient issues
verbose_logger.debug(
"MCP OAuth discovery failed for %s: %s", server_url, exc
)
return None
def _parse_www_authenticate_header(
self, header_value: Optional[str]
) -> Tuple[Optional[str], Optional[List[str]]]:
if not header_value:
return None, None
_, _, params_section = header_value.partition(" ")
params_section = params_section or header_value
param_pattern = re.compile(r"([a-zA-Z0-9_]+)\s*=\s*\"?([^\",]+)\"?")
params: Dict[str, str] = {
match.group(1).lower(): match.group(2).strip()
for match in param_pattern.finditer(params_section)
}
resource_metadata_url = params.get("resource_metadata")
scope_value = params.get("scope")
scopes_list = [s for s in (scope_value.split() if scope_value else []) if s]
scopes = scopes_list or None
return resource_metadata_url, scopes
async def _fetch_oauth_metadata_from_resource(
self, resource_metadata_url: str
) -> Tuple[List[str], Optional[List[str]]]:
if not resource_metadata_url:
return [], None
try:
client = get_async_httpx_client(
llm_provider=httpxSpecialProvider.MCP,
params={"timeout": 10.0, "follow_redirects": True},
)
response = await client.get(resource_metadata_url)
response.raise_for_status()
data = response.json()
except Exception as exc: # pragma: no cover - network issues
verbose_logger.debug(
"Failed to fetch MCP OAuth metadata from %s: %s",
resource_metadata_url,
exc,
)
return [], None
raw_servers = data.get("authorization_servers")
if isinstance(raw_servers, list):
authorization_servers = [
entry
for entry in raw_servers
if isinstance(entry, str) and entry.strip() != ""
]
else:
authorization_servers = []
scopes = self._extract_scopes(
data.get("scopes_supported") or data.get("scopes")
)
return authorization_servers, scopes
async def _attempt_well_known_discovery(
self, server_url: str
) -> Tuple[List[str], Optional[List[str]]]:
try:
parsed = urlparse(server_url)
except Exception:
return [], None
if not parsed.scheme or not parsed.netloc:
return [], None
base = f"{parsed.scheme}://{parsed.netloc}"
path = parsed.path or ""
path = path.strip("/")
candidate_urls: List[str] = []
if path:
candidate_urls.append(f"{base}/.well-known/oauth-protected-resource/{path}")
candidate_urls.append(f"{base}/.well-known/oauth-protected-resource")
for url in candidate_urls:
(
authorization_servers,
scopes,
) = await self._fetch_oauth_metadata_from_resource(url)
if authorization_servers:
return authorization_servers, scopes
return [], None
async def _fetch_authorization_server_metadata(
self, authorization_servers: List[str]
) -> Optional[MCPOAuthMetadata]:
for issuer in authorization_servers:
metadata = await self._fetch_single_authorization_server_metadata(issuer)
if metadata is not None:
return metadata
return None
async def _fetch_single_authorization_server_metadata(
self, issuer_url: str
) -> Optional[MCPOAuthMetadata]:
try:
parsed = urlparse(issuer_url)
except Exception:
return None
if not parsed.scheme or not parsed.netloc:
return None
base = f"{parsed.scheme}://{parsed.netloc}"
path = (parsed.path or "").strip("/")
candidate_urls: List[str] = []
if path:
candidate_urls.append(
f"{base}/.well-known/oauth-authorization-server/{path}"
)
candidate_urls.append(f"{base}/.well-known/openid-configuration/{path}")
candidate_urls.append(f"{base}/.well-known/oauth-authorization-server")
candidate_urls.append(f"{base}/.well-known/openid-configuration")
candidate_urls.append(issuer_url.rstrip("/"))
for url in candidate_urls:
try:
client = get_async_httpx_client(
llm_provider=httpxSpecialProvider.MCP,
params={"timeout": 10.0, "follow_redirects": True},
)
response = await client.get(url)
response.raise_for_status()
data = response.json()
except Exception as exc: # pragma: no cover - network issues
verbose_logger.debug(
"Failed to fetch authorization metadata from %s: %s",
url,
exc,
)
continue
scopes = self._extract_scopes(data.get("scopes_supported"))
metadata = MCPOAuthMetadata(
scopes=scopes,
authorization_url=data.get("authorization_endpoint"),
token_url=data.get("token_endpoint"),
registration_url=data.get("registration_endpoint"),
)
if any(
[
metadata.scopes,
metadata.authorization_url,
metadata.token_url,
metadata.registration_url,
]
):
return metadata
return None
def _extract_scopes(self, scopes_value: Any) -> Optional[List[str]]:
if isinstance(scopes_value, str):
scopes = [s.strip() for s in scopes_value.split() if s.strip()]
return scopes or None
if isinstance(scopes_value, list):
scopes = [s for s in scopes_value if isinstance(s, str) and s.strip()]
return scopes or None
return None
async def _fetch_tools_with_timeout(
self, client: MCPClient, server_name: str
) -> List[MCPTool]:
@@ -721,11 +996,6 @@ class MCPServerManager:
f"Client operation failed for {server_name}: {str(e)}"
)
return []
finally:
try:
await client.disconnect()
except Exception:
pass
try:
return await asyncio.wait_for(_list_tools_task(), timeout=30.0)
@@ -640,24 +640,31 @@ if MCP_AVAILABLE:
allowed_mcp_servers = await _get_allowed_mcp_servers_from_mcp_server_names(
mcp_servers=mcp_servers,
allowed_mcp_servers=allowed_mcp_servers
allowed_mcp_servers=allowed_mcp_servers,
)
server_name: Optional[str]
if len(allowed_mcp_servers) == 1:
original_tool_name, server_name = name, allowed_mcp_servers[0].server_name
else:
# Remove prefix from tool name for logging and processing
original_tool_name, server_name = get_server_name_prefix_tool_mcp(name)
# Track resolved MCP server for both permission checks and dispatch
mcp_server: Optional[MCPServer] = None
if not server_name or not MCPRequestHandler.is_tool_allowed(
allowed_mcp_servers=[server.name for server in allowed_mcp_servers],
server_name=server_name,
):
raise HTTPException(
status_code=403,
detail=f"User not allowed to call this tool. Allowed MCP servers: {allowed_mcp_servers}",
)
# Remove prefix from tool name for logging and processing
original_tool_name, server_name = get_server_name_prefix_tool_mcp(name)
# If tool name is unprefixed, resolve its server so we can enforce permissions
if not server_name:
mcp_server = global_mcp_server_manager._get_mcp_server_from_tool_name(name)
if mcp_server:
server_name = mcp_server.name
# Only enforce server-level permissions when we can resolve a server
if server_name:
if not MCPRequestHandler.is_tool_allowed(
allowed_mcp_servers=[server.name for server in allowed_mcp_servers],
server_name=server_name,
):
raise HTTPException(
status_code=403,
detail=f"User not allowed to call this tool. Allowed MCP servers: {allowed_mcp_servers}",
)
standard_logging_mcp_tool_call: StandardLoggingMCPToolCall = (
_get_standard_logging_mcp_tool_call(
@@ -686,9 +693,11 @@ if MCP_AVAILABLE:
# Primary and recommended way to use external MCP servers
#########################################################
else:
mcp_server: Optional[
MCPServer
] = global_mcp_server_manager._get_mcp_server_from_tool_name(name)
# If we haven't already resolved the server, do it now for dispatch
if mcp_server is None:
mcp_server = global_mcp_server_manager._get_mcp_server_from_tool_name(
name
)
if mcp_server:
standard_logging_mcp_tool_call["mcp_server_cost_info"] = (
mcp_server.mcp_info or {}
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More