mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-18 00:48:01 +00:00
Merge remote-tracking branch 'origin' into litellm_org_usage
This commit is contained in:
+374
-57
@@ -24,6 +24,39 @@ commands:
|
||||
cd enterprise
|
||||
python -m pip install -e .
|
||||
cd ..
|
||||
setup_litellm_test_deps:
|
||||
steps:
|
||||
- checkout
|
||||
- setup_google_dns
|
||||
- restore_cache:
|
||||
keys:
|
||||
- v2-litellm-deps-{{ checksum "requirements.txt" }}-{{ checksum ".circleci/config.yml" }}
|
||||
- v2-litellm-deps-
|
||||
- run:
|
||||
name: Install Dependencies
|
||||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install -r requirements.txt
|
||||
pip install "pytest-mock==3.12.0"
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-retry==1.6.3"
|
||||
pip install "pytest-cov==5.0.0"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install "respx==0.22.0"
|
||||
pip install "hypercorn==0.17.3"
|
||||
pip install "pydantic==2.10.2"
|
||||
pip install "mcp==1.10.1"
|
||||
pip install "requests-mock>=1.12.1"
|
||||
pip install "responses==0.25.7"
|
||||
pip install "pytest-xdist==3.6.1"
|
||||
pip install "pytest-timeout==2.2.0"
|
||||
pip install "semantic_router==0.1.10"
|
||||
pip install "fastapi-offline==1.7.3"
|
||||
- setup_litellm_enterprise_pip
|
||||
- save_cache:
|
||||
paths:
|
||||
- ~/.cache/pip
|
||||
key: v2-litellm-deps-{{ checksum "requirements.txt" }}-{{ checksum ".circleci/config.yml" }}
|
||||
|
||||
jobs:
|
||||
# Add Windows testing job
|
||||
@@ -668,13 +701,16 @@ jobs:
|
||||
paths:
|
||||
- litellm_security_tests_coverage.xml
|
||||
- litellm_security_tests_coverage
|
||||
litellm_proxy_unit_testing: # Runs all tests with the "proxy", "key", "jwt" filenames
|
||||
# Split proxy unit tests into 3 jobs for faster execution and better debugging
|
||||
# test_key_generate_prisma runs separately without parallel execution to avoid event loop issues with logging worker
|
||||
litellm_proxy_unit_testing_key_generation:
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
auth:
|
||||
username: ${DOCKERHUB_USERNAME}
|
||||
password: ${DOCKERHUB_PASSWORD}
|
||||
working_directory: ~/project
|
||||
resource_class: large
|
||||
steps:
|
||||
- checkout
|
||||
- setup_google_dns
|
||||
@@ -699,6 +735,114 @@ jobs:
|
||||
pip install "pytest-retry==1.6.3"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install "pytest-cov==5.0.0"
|
||||
pip install "pytest-timeout==2.2.0"
|
||||
pip install "pytest-forked==1.6.0"
|
||||
pip install "mypy==1.18.2"
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install "google-genai==1.22.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install lunary==0.2.5
|
||||
pip install "azure-identity==1.16.1"
|
||||
pip install "langfuse==2.59.7"
|
||||
pip install "logfire==0.29.0"
|
||||
pip install numpydoc
|
||||
pip install traceloop-sdk==0.21.1
|
||||
pip install opentelemetry-api==1.25.0
|
||||
pip install opentelemetry-sdk==1.25.0
|
||||
pip install opentelemetry-exporter-otlp==1.25.0
|
||||
pip install openai==1.100.1
|
||||
pip install prisma==0.11.0
|
||||
pip install "detect_secrets==1.5.0"
|
||||
pip install "httpx==0.24.1"
|
||||
pip install "respx==0.22.0"
|
||||
pip install fastapi
|
||||
pip install "gunicorn==21.2.0"
|
||||
pip install "anyio==4.2.0"
|
||||
pip install "aiodynamo==23.10.1"
|
||||
pip install "asyncio==3.4.3"
|
||||
pip install "apscheduler==3.10.4"
|
||||
pip install "PyGithub==1.59.1"
|
||||
pip install argon2-cffi
|
||||
pip install "pytest-mock==3.12.0"
|
||||
pip install python-multipart
|
||||
pip install google-cloud-aiplatform
|
||||
pip install prometheus-client==0.20.0
|
||||
pip install "pydantic==2.10.2"
|
||||
pip install "diskcache==5.6.1"
|
||||
pip install "Pillow==10.3.0"
|
||||
pip install "jsonschema==4.22.0"
|
||||
pip install "pytest-postgresql==7.0.1"
|
||||
pip install "fakeredis==2.28.1"
|
||||
- setup_litellm_enterprise_pip
|
||||
- save_cache:
|
||||
paths:
|
||||
- ./venv
|
||||
key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
|
||||
- run:
|
||||
name: Run prisma ./docker/entrypoint.sh
|
||||
command: |
|
||||
set +e
|
||||
chmod +x docker/entrypoint.sh
|
||||
./docker/entrypoint.sh
|
||||
set -e
|
||||
- run:
|
||||
name: Run key generation tests (no parallel execution to avoid event loop issues)
|
||||
command: |
|
||||
pwd
|
||||
ls
|
||||
# Run without -n flag to avoid pytest-xdist event loop conflicts with logging worker
|
||||
python -m pytest tests/proxy_unit_tests/test_key_generate_prisma.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-key-generation.xml --durations=10 --timeout=300 -vv --log-cli-level=INFO
|
||||
no_output_timeout: 120m
|
||||
- run:
|
||||
name: Rename the coverage files
|
||||
command: |
|
||||
mv coverage.xml litellm_proxy_unit_tests_key_generation_coverage.xml
|
||||
mv .coverage litellm_proxy_unit_tests_key_generation_coverage
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
- persist_to_workspace:
|
||||
root: .
|
||||
paths:
|
||||
- litellm_proxy_unit_tests_key_generation_coverage.xml
|
||||
- litellm_proxy_unit_tests_key_generation_coverage
|
||||
litellm_proxy_unit_testing_part1:
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
auth:
|
||||
username: ${DOCKERHUB_USERNAME}
|
||||
password: ${DOCKERHUB_PASSWORD}
|
||||
working_directory: ~/project
|
||||
resource_class: large
|
||||
steps:
|
||||
- checkout
|
||||
- setup_google_dns
|
||||
- run:
|
||||
name: Show git commit hash
|
||||
command: |
|
||||
echo "Git commit hash: $CIRCLE_SHA1"
|
||||
- run:
|
||||
name: Install PostgreSQL
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y postgresql-14 postgresql-contrib-14
|
||||
- restore_cache:
|
||||
keys:
|
||||
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
|
||||
- run:
|
||||
name: Install Dependencies
|
||||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install -r .circleci/requirements.txt
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-retry==1.6.3"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install "pytest-cov==5.0.0"
|
||||
pip install "pytest-timeout==2.2.0"
|
||||
pip install "pytest-forked==1.6.0"
|
||||
pip install "mypy==1.18.2"
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
@@ -752,28 +896,132 @@ jobs:
|
||||
chmod +x docker/entrypoint.sh
|
||||
./docker/entrypoint.sh
|
||||
set -e
|
||||
# Run pytest and generate JUnit XML report
|
||||
- run:
|
||||
name: Run tests
|
||||
name: Run proxy unit tests (part 1 - auth checks only, key generation in separate job)
|
||||
command: |
|
||||
pwd
|
||||
ls
|
||||
python -m pytest tests/proxy_unit_tests --cov=litellm --cov-report=xml -vv -x -v --junitxml=test-results/junit.xml --durations=5 -n 4
|
||||
# Run auth tests with parallel execution (test_key_generate_prisma moved to separate job to avoid event loop issues)
|
||||
python -m pytest tests/proxy_unit_tests/test_auth_checks.py tests/proxy_unit_tests/test_user_api_key_auth.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-part1.xml --durations=10 -n 8 --timeout=300 -vv --log-cli-level=INFO
|
||||
no_output_timeout: 120m
|
||||
- run:
|
||||
name: Rename the coverage files
|
||||
command: |
|
||||
mv coverage.xml litellm_proxy_unit_tests_coverage.xml
|
||||
mv .coverage litellm_proxy_unit_tests_coverage
|
||||
# Store test results
|
||||
mv coverage.xml litellm_proxy_unit_tests_part1_coverage.xml
|
||||
mv .coverage litellm_proxy_unit_tests_part1_coverage
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
|
||||
- persist_to_workspace:
|
||||
root: .
|
||||
paths:
|
||||
- litellm_proxy_unit_tests_coverage.xml
|
||||
- litellm_proxy_unit_tests_coverage
|
||||
- litellm_proxy_unit_tests_part1_coverage.xml
|
||||
- litellm_proxy_unit_tests_part1_coverage
|
||||
litellm_proxy_unit_testing_part2:
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
auth:
|
||||
username: ${DOCKERHUB_USERNAME}
|
||||
password: ${DOCKERHUB_PASSWORD}
|
||||
working_directory: ~/project
|
||||
resource_class: large
|
||||
steps:
|
||||
- checkout
|
||||
- setup_google_dns
|
||||
- run:
|
||||
name: Show git commit hash
|
||||
command: |
|
||||
echo "Git commit hash: $CIRCLE_SHA1"
|
||||
- run:
|
||||
name: Install PostgreSQL
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y postgresql-14 postgresql-contrib-14
|
||||
- restore_cache:
|
||||
keys:
|
||||
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
|
||||
- run:
|
||||
name: Install Dependencies
|
||||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install -r .circleci/requirements.txt
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-retry==1.6.3"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install "pytest-cov==5.0.0"
|
||||
pip install "pytest-timeout==2.2.0"
|
||||
pip install "pytest-forked==1.6.0"
|
||||
pip install "mypy==1.18.2"
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install "google-genai==1.22.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install lunary==0.2.5
|
||||
pip install "azure-identity==1.16.1"
|
||||
pip install "langfuse==2.59.7"
|
||||
pip install "logfire==0.29.0"
|
||||
pip install numpydoc
|
||||
pip install traceloop-sdk==0.21.1
|
||||
pip install opentelemetry-api==1.25.0
|
||||
pip install opentelemetry-sdk==1.25.0
|
||||
pip install opentelemetry-exporter-otlp==1.25.0
|
||||
pip install openai==1.100.1
|
||||
pip install prisma==0.11.0
|
||||
pip install "detect_secrets==1.5.0"
|
||||
pip install "httpx==0.24.1"
|
||||
pip install "respx==0.22.0"
|
||||
pip install fastapi
|
||||
pip install "gunicorn==21.2.0"
|
||||
pip install "anyio==4.2.0"
|
||||
pip install "aiodynamo==23.10.1"
|
||||
pip install "asyncio==3.4.3"
|
||||
pip install "apscheduler==3.10.4"
|
||||
pip install "PyGithub==1.59.1"
|
||||
pip install argon2-cffi
|
||||
pip install "pytest-mock==3.12.0"
|
||||
pip install python-multipart
|
||||
pip install google-cloud-aiplatform
|
||||
pip install prometheus-client==0.20.0
|
||||
pip install "pydantic==2.10.2"
|
||||
pip install "diskcache==5.6.1"
|
||||
pip install "Pillow==10.3.0"
|
||||
pip install "jsonschema==4.22.0"
|
||||
pip install "pytest-postgresql==7.0.1"
|
||||
pip install "fakeredis==2.28.1"
|
||||
pip install "pytest-xdist==3.6.1"
|
||||
- setup_litellm_enterprise_pip
|
||||
- save_cache:
|
||||
paths:
|
||||
- ./venv
|
||||
key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
|
||||
- run:
|
||||
name: Run prisma ./docker/entrypoint.sh
|
||||
command: |
|
||||
set +e
|
||||
chmod +x docker/entrypoint.sh
|
||||
./docker/entrypoint.sh
|
||||
set -e
|
||||
- run:
|
||||
name: Run proxy unit tests (part 2 - remaining tests)
|
||||
command: |
|
||||
pwd
|
||||
ls
|
||||
python -m pytest tests/proxy_unit_tests --ignore=tests/proxy_unit_tests/test_key_generate_prisma.py --ignore=tests/proxy_unit_tests/test_auth_checks.py --ignore=tests/proxy_unit_tests/test_user_api_key_auth.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-part2.xml --durations=10 -n 8 --timeout=300 -vv --log-cli-level=INFO
|
||||
no_output_timeout: 120m
|
||||
- run:
|
||||
name: Rename the coverage files
|
||||
command: |
|
||||
mv coverage.xml litellm_proxy_unit_tests_part2_coverage.xml
|
||||
mv .coverage litellm_proxy_unit_tests_part2_coverage
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
- persist_to_workspace:
|
||||
root: .
|
||||
paths:
|
||||
- litellm_proxy_unit_tests_part2_coverage.xml
|
||||
- litellm_proxy_unit_tests_part2_coverage
|
||||
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
|
||||
docker:
|
||||
- image: cimg/python:3.13.1
|
||||
@@ -1128,59 +1376,88 @@ jobs:
|
||||
paths:
|
||||
- search_coverage.xml
|
||||
- search_coverage
|
||||
litellm_mapped_tests:
|
||||
# Split litellm_mapped_tests into 3 parallel jobs for 3x faster execution
|
||||
litellm_mapped_tests_proxy:
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
auth:
|
||||
username: ${DOCKERHUB_USERNAME}
|
||||
password: ${DOCKERHUB_PASSWORD}
|
||||
working_directory: ~/project
|
||||
|
||||
resource_class: xlarge
|
||||
steps:
|
||||
- checkout
|
||||
- setup_google_dns
|
||||
- setup_litellm_test_deps
|
||||
- run:
|
||||
name: Install Dependencies
|
||||
name: Run proxy tests
|
||||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install -r requirements.txt
|
||||
pip install "pytest-mock==3.12.0"
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-retry==1.6.3"
|
||||
pip install "pytest-cov==5.0.0"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install "respx==0.22.0"
|
||||
pip install "hypercorn==0.17.3"
|
||||
pip install "pydantic==2.10.2"
|
||||
pip install "mcp==1.10.1"
|
||||
pip install "requests-mock>=1.12.1"
|
||||
pip install "responses==0.25.7"
|
||||
pip install "pytest-xdist==3.6.1"
|
||||
pip install "semantic_router==0.1.10"
|
||||
pip install "fastapi-offline==1.7.3"
|
||||
- setup_litellm_enterprise_pip
|
||||
# Run pytest and generate JUnit XML report
|
||||
- run:
|
||||
name: Run litellm tests
|
||||
command: |
|
||||
pwd
|
||||
ls
|
||||
python -m pytest -vv tests/test_litellm --cov=litellm --cov-report=xml -v --junitxml=test-results/junit-litellm.xml --durations=10 -n 8
|
||||
python -m pytest tests/test_litellm/proxy --cov=litellm --cov-report=xml --junitxml=test-results/junit-proxy.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
|
||||
no_output_timeout: 120m
|
||||
- run:
|
||||
name: Rename the coverage files
|
||||
command: |
|
||||
mv coverage.xml litellm_mapped_tests_coverage.xml
|
||||
mv .coverage litellm_mapped_tests_coverage
|
||||
|
||||
# Store test results
|
||||
mv coverage.xml litellm_proxy_tests_coverage.xml
|
||||
mv .coverage litellm_proxy_tests_coverage
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
- persist_to_workspace:
|
||||
root: .
|
||||
paths:
|
||||
- litellm_mapped_tests_coverage.xml
|
||||
- litellm_mapped_tests_coverage
|
||||
- litellm_proxy_tests_coverage.xml
|
||||
- litellm_proxy_tests_coverage
|
||||
litellm_mapped_tests_llms:
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
auth:
|
||||
username: ${DOCKERHUB_USERNAME}
|
||||
password: ${DOCKERHUB_PASSWORD}
|
||||
working_directory: ~/project
|
||||
resource_class: xlarge
|
||||
steps:
|
||||
- setup_litellm_test_deps
|
||||
- run:
|
||||
name: Run LLM provider tests
|
||||
command: |
|
||||
python -m pytest tests/test_litellm/llms --cov=litellm --cov-report=xml --junitxml=test-results/junit-llms.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
|
||||
no_output_timeout: 120m
|
||||
- run:
|
||||
name: Rename the coverage files
|
||||
command: |
|
||||
mv coverage.xml litellm_llms_tests_coverage.xml
|
||||
mv .coverage litellm_llms_tests_coverage
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
- persist_to_workspace:
|
||||
root: .
|
||||
paths:
|
||||
- litellm_llms_tests_coverage.xml
|
||||
- litellm_llms_tests_coverage
|
||||
litellm_mapped_tests_core:
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
auth:
|
||||
username: ${DOCKERHUB_USERNAME}
|
||||
password: ${DOCKERHUB_PASSWORD}
|
||||
working_directory: ~/project
|
||||
resource_class: xlarge
|
||||
steps:
|
||||
- setup_litellm_test_deps
|
||||
- run:
|
||||
name: Run core tests
|
||||
command: |
|
||||
python -m pytest tests/test_litellm --ignore=tests/test_litellm/proxy --ignore=tests/test_litellm/llms --cov=litellm --cov-report=xml --junitxml=test-results/junit-core.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
|
||||
no_output_timeout: 120m
|
||||
- run:
|
||||
name: Rename the coverage files
|
||||
command: |
|
||||
mv coverage.xml litellm_core_tests_coverage.xml
|
||||
mv .coverage litellm_core_tests_coverage
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
- persist_to_workspace:
|
||||
root: .
|
||||
paths:
|
||||
- litellm_core_tests_coverage.xml
|
||||
- litellm_core_tests_coverage
|
||||
litellm_mapped_enterprise_tests:
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
@@ -1447,7 +1724,7 @@ jobs:
|
||||
command: |
|
||||
pwd
|
||||
ls
|
||||
python -m pytest -vv tests/logging_callback_tests --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
|
||||
python -m pytest -vv tests/logging_callback_tests --cov=litellm --cov-report=xml -s -v --junitxml=test-results/junit.xml --durations=5
|
||||
no_output_timeout: 120m
|
||||
- run:
|
||||
name: Rename the coverage files
|
||||
@@ -1914,14 +2191,14 @@ jobs:
|
||||
sudo usermod -aG docker $USER
|
||||
docker version
|
||||
- run:
|
||||
name: Install Python 3.9
|
||||
name: Install Python 3.10
|
||||
command: |
|
||||
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
|
||||
bash miniconda.sh -b -p $HOME/miniconda
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
conda init bash
|
||||
source ~/.bashrc
|
||||
conda create -n myenv python=3.9 -y
|
||||
conda create -n myenv python=3.10 -y
|
||||
conda activate myenv
|
||||
python --version
|
||||
- run:
|
||||
@@ -2695,19 +2972,22 @@ jobs:
|
||||
sudo usermod -aG docker $USER
|
||||
docker version
|
||||
- run:
|
||||
name: Install Python 3.9
|
||||
name: Install Python 3.10
|
||||
command: |
|
||||
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
|
||||
bash miniconda.sh -b -p $HOME/miniconda
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
conda init bash
|
||||
source ~/.bashrc
|
||||
conda create -n myenv python=3.9 -y
|
||||
conda create -n myenv python=3.10 -y
|
||||
conda activate myenv
|
||||
python --version
|
||||
- run:
|
||||
name: Install Dependencies
|
||||
command: |
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
source $HOME/miniconda/etc/profile.d/conda.sh
|
||||
conda activate myenv
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-retry==1.6.3"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
@@ -2736,6 +3016,8 @@ jobs:
|
||||
pip install "langchain_mcp_adapters==0.0.5"
|
||||
pip install "langchain_openai==0.2.1"
|
||||
pip install "langgraph==0.3.18"
|
||||
pip install "fastuuid==0.13.5"
|
||||
pip install -r requirements.txt
|
||||
- run:
|
||||
name: Install dockerize
|
||||
command: |
|
||||
@@ -2848,6 +3130,9 @@ jobs:
|
||||
- run:
|
||||
name: Run tests
|
||||
command: |
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
source $HOME/miniconda/etc/profile.d/conda.sh
|
||||
conda activate myenv
|
||||
pwd
|
||||
ls
|
||||
python -m pytest -vv tests/pass_through_tests/ -x --junitxml=test-results/junit.xml --durations=5
|
||||
@@ -2878,7 +3163,7 @@ jobs:
|
||||
python -m venv venv
|
||||
. venv/bin/activate
|
||||
pip install coverage
|
||||
coverage combine llm_translation_coverage llm_responses_api_coverage ocr_coverage search_coverage mcp_coverage logging_coverage audio_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_security_tests_coverage guardrails_coverage
|
||||
coverage combine llm_translation_coverage llm_responses_api_coverage ocr_coverage search_coverage mcp_coverage logging_coverage audio_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_part1_coverage litellm_proxy_unit_tests_part2_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_security_tests_coverage guardrails_coverage litellm_mapped_tests_coverage
|
||||
coverage xml
|
||||
- codecov/upload:
|
||||
file: ./coverage.xml
|
||||
@@ -3300,7 +3585,19 @@ workflows:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- litellm_proxy_unit_testing:
|
||||
- litellm_proxy_unit_testing_key_generation:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- litellm_proxy_unit_testing_part1:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- litellm_proxy_unit_testing_part2:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
@@ -3444,7 +3741,19 @@ workflows:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- litellm_mapped_tests:
|
||||
- litellm_mapped_tests_proxy:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- litellm_mapped_tests_llms:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- litellm_mapped_tests_core:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
@@ -3495,7 +3804,9 @@ workflows:
|
||||
- llm_responses_api_testing
|
||||
- ocr_testing
|
||||
- search_testing
|
||||
- litellm_mapped_tests
|
||||
- litellm_mapped_tests_proxy
|
||||
- litellm_mapped_tests_llms
|
||||
- litellm_mapped_tests_core
|
||||
- litellm_mapped_enterprise_tests
|
||||
- batches_testing
|
||||
- litellm_utils_testing
|
||||
@@ -3506,7 +3817,9 @@ workflows:
|
||||
- litellm_router_testing
|
||||
- litellm_router_unit_testing
|
||||
- caching_unit_tests
|
||||
- litellm_proxy_unit_testing
|
||||
- litellm_proxy_unit_testing_key_generation
|
||||
- litellm_proxy_unit_testing_part1
|
||||
- litellm_proxy_unit_testing_part2
|
||||
- litellm_security_tests
|
||||
- langfuse_logging_unit_tests
|
||||
- local_testing
|
||||
@@ -3560,7 +3873,9 @@ workflows:
|
||||
- llm_responses_api_testing
|
||||
- ocr_testing
|
||||
- search_testing
|
||||
- litellm_mapped_tests
|
||||
- litellm_mapped_tests_proxy
|
||||
- litellm_mapped_tests_llms
|
||||
- litellm_mapped_tests_core
|
||||
- litellm_mapped_enterprise_tests
|
||||
- batches_testing
|
||||
- litellm_utils_testing
|
||||
@@ -3576,7 +3891,9 @@ workflows:
|
||||
- auth_ui_unit_tests
|
||||
- db_migration_disable_update_check
|
||||
- e2e_ui_testing
|
||||
- litellm_proxy_unit_testing
|
||||
- litellm_proxy_unit_testing_key_generation
|
||||
- litellm_proxy_unit_testing_part1
|
||||
- litellm_proxy_unit_testing_part2
|
||||
- litellm_security_tests
|
||||
- installing_litellm_on_python
|
||||
- installing_litellm_on_python_3_13
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
# js-yaml CVE-2025-64718
|
||||
# This vulnerability is not applicable because we've forced js-yaml to version 4.1.1
|
||||
# via npm overrides in package.json. Trivy incorrectly reports this based on
|
||||
# dependency requirements in the lockfile, but the actual installed version is 4.1.1.
|
||||
# Verified with: npm list js-yaml
|
||||
CVE-2025-64718
|
||||
|
||||
@@ -16,7 +16,7 @@ Get free 7-day trial key [here](https://www.litellm.ai/enterprise#trial)
|
||||
|
||||
Includes all enterprise features.
|
||||
|
||||
<Image img={require('../img/enterprise_vs_oss.png')} />
|
||||
<Image img={require('../img/enterprise_vs_oss_2.png')} />
|
||||
|
||||
[**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)
|
||||
|
||||
@@ -40,7 +40,7 @@ Self-Managed Enterprise deployments require our team to understand your exact ne
|
||||
|
||||
### How does deployment with Enterprise License work?
|
||||
|
||||
You just deploy [our docker image](https://docs.litellm.ai/docs/proxy/deploy) and get an enterprise license key to add to your environment to unlock additional functionality (SSO, Prometheus metrics, etc.).
|
||||
You just deploy [our docker image](https://docs.litellm.ai/docs/proxy/deploy) and get an enterprise license key to add to your environment to unlock additional functionality (SSO, etc.).
|
||||
|
||||
```env
|
||||
LITELLM_LICENSE="eyJ..."
|
||||
|
||||
@@ -211,11 +211,12 @@ mcp_servers:
|
||||
oauth2_example:
|
||||
url: "https://my-mcp-server.com/mcp"
|
||||
auth_type: "oauth2" # 👈 KEY CHANGE
|
||||
authorization_url: "https://my-mcp-server.com/oauth/authorize" # optional for client-credentials
|
||||
token_url: "https://my-mcp-server.com/oauth/token" # required
|
||||
authorization_url: "https://my-mcp-server.com/oauth/authorize" # optional override
|
||||
token_url: "https://my-mcp-server.com/oauth/token" # optional override
|
||||
registration_url: "https://my-mcp-server.com/oauth/register" # optional override
|
||||
client_id: os.environ/OAUTH_CLIENT_ID
|
||||
client_secret: os.environ/OAUTH_CLIENT_SECRET
|
||||
scopes: ["tool.read", "tool.write"] # optional
|
||||
scopes: ["tool.read", "tool.write"] # optional override
|
||||
|
||||
bearer_example:
|
||||
url: "https://my-mcp-server.com/mcp"
|
||||
@@ -325,6 +326,10 @@ mcp_servers:
|
||||
| `spec_path` | Yes | Path or URL to your OpenAPI specification file (JSON or YAML) |
|
||||
| `auth_type` | No | Authentication type: `none`, `api_key`, `bearer_token`, `basic`, `authorization` |
|
||||
| `auth_value` | No | Authentication value (required if `auth_type` is set) |
|
||||
| `authorization_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
|
||||
| `token_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
|
||||
| `registration_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
|
||||
| `scopes` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM uses the scopes advertised by the server. |
|
||||
| `description` | No | Optional description for the MCP server |
|
||||
| `allowed_tools` | No | List of specific tools to allow (see [MCP Tool Filtering](#mcp-tool-filtering)) |
|
||||
| `disallowed_tools` | No | List of specific tools to block (see [MCP Tool Filtering](#mcp-tool-filtering)) |
|
||||
@@ -1224,17 +1229,10 @@ mcp_servers:
|
||||
github_mcp:
|
||||
url: "https://api.githubcopilot.com/mcp"
|
||||
auth_type: oauth2
|
||||
authorization_url: https://github.com/login/oauth/authorize
|
||||
token_url: https://github.com/login/oauth/access_token
|
||||
client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
|
||||
client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
|
||||
scopes: ["public_repo", "user:email"]
|
||||
```
|
||||
|
||||
**Note**
|
||||
In the future, users will only need to specify the `url` of the MCP server.
|
||||
LiteLLM will automatically resolve the corresponding `authorization_url`, `token_url`, and `registration_url` based on the MCP server metadata (e.g., `.well-known/oauth-authorization-server` or `oauth-protected-resource`).
|
||||
|
||||
[**See Claude Code Tutorial**](./tutorials/claude_responses_api#connecting-mcp-servers)
|
||||
|
||||
## Using your MCP with client side credentials
|
||||
@@ -1887,4 +1885,4 @@ async with stdio_client(server_params) as (read, write):
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
</Tabs>
|
||||
|
||||
@@ -953,6 +953,30 @@ except Exception as e:
|
||||
|
||||
s/o @[Shekhar Patnaik](https://www.linkedin.com/in/patnaikshekhar) for requesting this!
|
||||
|
||||
### Context Management (Beta)
|
||||
|
||||
Anthropic’s [context editing](https://docs.claude.com/en/docs/build-with-claude/context-editing) API lets you automatically clear older tool results or thinking blocks. LiteLLM now forwards the native `context_management` payload when you call Anthropic models, and automatically attaches the required `context-management-2025-06-27` beta header.
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
response = completion(
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
messages=[{"role": "user", "content": "Summarize the latest tool results"}],
|
||||
context_management={
|
||||
"edits": [
|
||||
{
|
||||
"type": "clear_tool_uses_20250919",
|
||||
"trigger": {"type": "input_tokens", "value": 30000},
|
||||
"keep": {"type": "tool_uses", "value": 3},
|
||||
"clear_at_least": {"type": "input_tokens", "value": 5000},
|
||||
"exclude_tools": ["web_search"],
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
### Anthropic Hosted Tools (Computer, Text Editor, Web Search, Memory)
|
||||
|
||||
|
||||
|
||||
@@ -31,10 +31,14 @@ Get your API key from [fal.ai](https://fal.ai/).
|
||||
|
||||
| Model Name | Description | Documentation |
|
||||
|------------|-------------|---------------|
|
||||
| `fal_ai/fal-ai/flux-pro/v1.1` | FLUX Pro v1.1 - Balanced speed and quality | [Docs ↗](https://fal.ai/models/fal-ai/flux-pro/v1.1) |
|
||||
| `fal_ai/flux/schnell` | Flux Schnell - Low-latency generation with `image_size` support | [Docs ↗](https://fal.ai/models/fal-ai/flux/schnell) |
|
||||
| `fal_ai/fal-ai/bytedance/seedream/v3/text-to-image` | ByteDance Seedream v3 - Text-to-image with `image_size` control | [Docs ↗](https://fal.ai/models/fal-ai/bytedance/seedream/v3/text-to-image) |
|
||||
| `fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image` | ByteDance Dreamina v3.1 - Text-to-image with `image_size` control | [Docs ↗](https://fal.ai/models/fal-ai/bytedance/dreamina/v3.1/text-to-image) |
|
||||
| `fal_ai/fal-ai/flux-pro/v1.1-ultra` | FLUX Pro v1.1 Ultra - High-quality image generation | [Docs ↗](https://fal.ai/models/fal-ai/flux-pro/v1.1-ultra) |
|
||||
| `fal_ai/fal-ai/imagen4/preview` | Google's Imagen 4 - Highest quality model | [Docs ↗](https://fal.ai/models/fal-ai/imagen4/preview) |
|
||||
| `fal_ai/fal-ai/recraft/v3/text-to-image` | Recraft v3 - Multiple style options | [Docs ↗](https://fal.ai/models/fal-ai/recraft/v3/text-to-image) |
|
||||
| `fal_ai/fal-ai/ideogram/v3` | Ideogram v3 - Lettering-first creative model (Balanced: $0.06/image) | [Docs ↗](https://fal.ai/models/fal-ai/ideogram/v3) |
|
||||
| `fal_ai/fal-ai/stable-diffusion-v35-medium` | Stable Diffusion v3.5 Medium | [Docs ↗](https://fal.ai/models/fal-ai/stable-diffusion-v35-medium) |
|
||||
| `fal_ai/bria/text-to-image/3.2` | Bria 3.2 - Commercial-grade generation | [Docs ↗](https://fal.ai/models/bria/text-to-image/3.2) |
|
||||
|
||||
|
||||
@@ -486,6 +486,53 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||
|
||||
See [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning) for more details on organization verification requirements.
|
||||
|
||||
### Verbosity Control for GPT-5 Models
|
||||
|
||||
The `verbosity` parameter controls the length and detail of responses from GPT-5 family models. It accepts three values: `"low"`, `"medium"`, or `"high"`.
|
||||
|
||||
**Supported models:** All GPT-5 family models (`gpt-5`, `gpt-5.1`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-5-pro`)
|
||||
|
||||
**Use cases:**
|
||||
- **`"low"`**: Best for concise answers or simple code generation (e.g., SQL queries)
|
||||
- **`"medium"`**: Default - balanced output length
|
||||
- **`"high"`**: Use when you need thorough explanations or extensive code refactoring
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
```python
|
||||
import litellm
|
||||
|
||||
# Low verbosity - concise responses
|
||||
response = litellm.completion(
|
||||
model="gpt-5.1",
|
||||
messages=[{"role": "user", "content": "Write a function to reverse a string"}],
|
||||
verbosity="low"
|
||||
)
|
||||
|
||||
# High verbosity - detailed responses
|
||||
response = litellm.completion(
|
||||
model="gpt-5.1",
|
||||
messages=[{"role": "user", "content": "Explain how neural networks work"}],
|
||||
verbosity="high"
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
```bash
|
||||
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"model": "gpt-5.1",
|
||||
"messages": [{"role": "user", "content": "Write a function to reverse a string"}],
|
||||
"verbosity": "low"
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## OpenAI Chat Completion to Responses API Bridge
|
||||
|
||||
Call any Responses API model from OpenAI's `/chat/completions` endpoint.
|
||||
|
||||
@@ -32,13 +32,9 @@ Features:
|
||||
- ✅ [Set Model budgets for Virtual Keys](./users#-virtual-key-model-specific)
|
||||
- ✅ [Exporting LLM Logs to GCS Bucket, Azure Blob Storage](./proxy/bucket#🪣-logging-gcs-s3-buckets)
|
||||
- ✅ [`/spend/report` API endpoint](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend)
|
||||
- **Prometheus Metrics**
|
||||
- ✅ [Prometheus Metrics - Num Requests, failures, LLM Provider Outages](prometheus)
|
||||
- ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens)
|
||||
- **Control Guardrails per API Key**
|
||||
- **Control Guardrails per API Key/Team**
|
||||
- **Custom Branding**
|
||||
- ✅ [Custom Branding + Routes on Swagger Docs](#swagger-docs---custom-routes--branding)
|
||||
- ✅ [Public Model Hub](#public-model-hub)
|
||||
- ✅ [Custom Email Branding](./email.md#customizing-email-branding)
|
||||
|
||||
|
||||
|
||||
@@ -4,15 +4,6 @@ import Image from '@theme/IdealImage';
|
||||
|
||||
# 📈 Prometheus metrics
|
||||
|
||||
:::info
|
||||
|
||||
✨ Prometheus metrics is on LiteLLM Enterprise
|
||||
|
||||
[Enterprise Pricing](https://www.litellm.ai/#pricing)
|
||||
|
||||
[Get free 7-day trial key](https://www.litellm.ai/enterprise#trial)
|
||||
|
||||
:::
|
||||
|
||||
LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll
|
||||
|
||||
|
||||
@@ -237,11 +237,8 @@ mcp_servers:
|
||||
github_mcp:
|
||||
url: "https://api.githubcopilot.com/mcp"
|
||||
auth_type: oauth2
|
||||
authorization_url: https://github.com/login/oauth/authorize
|
||||
token_url: https://github.com/login/oauth/access_token
|
||||
client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
|
||||
client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
|
||||
scopes: ["public_repo", "user:email"]
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
@@ -255,9 +252,6 @@ atlassian_mcp:
|
||||
url: "https://mcp.atlassian.com/v1/sse"
|
||||
transport: "sse"
|
||||
auth_type: oauth2
|
||||
authorization_url: https://mcp.atlassian.com/v1/authorize
|
||||
token_url: https://cf.mcp.atlassian.com/v1/token
|
||||
registration_url: https://cf.mcp.atlassian.com/v1/register
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
# /vector_stores/{vector_store_id}/files
|
||||
|
||||
Vector store files represent the individual files that live inside a vector store.
|
||||
|
||||
| Feature | Supported |
|
||||
|---------|-----------|
|
||||
| Logging | ✅ (full request/response logging) |
|
||||
| Supported Providers | `openai` |
|
||||
|
||||
|
||||
## Supported operations
|
||||
|
||||
| Operation | Description | OpenAI Python Client | LiteLLM Proxy |
|
||||
|-----------|-------------|----------------------|---------------|
|
||||
| Create vector store file | Attach a file to a vector store with optional chunking overrides | ✅ | ✅ |
|
||||
| List vector store files | Paginated listing with filters | ✅ | ✅ |
|
||||
| Retrieve vector store file | Fetch metadata for a single file | ✅ | ✅ |
|
||||
| Delete vector store file | Remove a file from a store (file object persists) | ✅ | ✅ |
|
||||
| Retrieve vector store file content | Stream processed chunks | ❌ | ✅ |
|
||||
| Update vector store file attributes | Patch custom attributes | ❌ | ✅ |
|
||||
|
||||
:::note
|
||||
Vector store support currently works **only with OpenAI vector stores and OpenAI-uploaded file IDs**.
|
||||
:::
|
||||
|
||||
|
||||
## Create vector store file
|
||||
|
||||
`POST http://localhost:4000/v1/vector_stores/{vector_store_id}/files`
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
base_url="http://localhost:4000", # LiteLLM proxy or OpenAI base
|
||||
api_key="sk-1234"
|
||||
)
|
||||
|
||||
vector_store_file = client.vector_stores.files.create(
|
||||
vector_store_id="vs_69172088a18c8191ab3e2621aa87d1ee",
|
||||
file_id="file-NDbEDJTfqVh7S4Ugi3CGYw",
|
||||
chunking_strategy={
|
||||
"type": "static",
|
||||
"static": {
|
||||
"max_chunk_size_tokens": 800,
|
||||
"chunk_overlap_tokens": 400,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
print(vector_store_file)
|
||||
```
|
||||
|
||||
## List vector store files
|
||||
|
||||
`GET http://localhost:4000/v1/vector_stores/{vector_store_id}/files`
|
||||
|
||||
Parameters:
|
||||
|
||||
- `vector_store_id` (path, required)
|
||||
- `after` / `before` (query, optional) – pagination cursors
|
||||
- `filter` (query, optional) – `in_progress`, `completed`, `failed`, `cancelled`
|
||||
- `limit` (query, optional, default `20`, range `1-100`)
|
||||
- `order` (query, optional, default `desc`)
|
||||
|
||||
```python
|
||||
vector_store_files = client.vector_stores.files.list(
|
||||
vector_store_id="vs_abc123"
|
||||
)
|
||||
print(vector_store_files)
|
||||
```
|
||||
|
||||
## Retrieve vector store file
|
||||
|
||||
`GET http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}`
|
||||
|
||||
```python
|
||||
vector_store_file = client.vector_stores.files.retrieve(
|
||||
vector_store_id="vs_abc123",
|
||||
file_id="file-abc123"
|
||||
)
|
||||
print(vector_store_file)
|
||||
```
|
||||
|
||||
## Delete vector store file
|
||||
|
||||
`DELETE http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}`
|
||||
|
||||
```python
|
||||
deleted_vector_store_file = client.vector_stores.files.delete(
|
||||
vector_store_id="vs_abc123",
|
||||
file_id="file-abc123"
|
||||
)
|
||||
print(deleted_vector_store_file)
|
||||
```
|
||||
|
||||
## Proxy-only endpoints
|
||||
|
||||
When you need raw content chunks or attribute updates, call the LiteLLM Proxy directly.
|
||||
|
||||
### Retrieve file content
|
||||
|
||||
```bash
|
||||
curl -X GET "http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}/content" \
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
### Update file attributes
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"attributes": {
|
||||
"category": "support-faq",
|
||||
"language": "en"
|
||||
}
|
||||
}'
|
||||
```
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 418 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 324 KiB |
Generated
+4326
-2170
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@
|
||||
"@docusaurus/plugin-google-gtag": "3.8.1",
|
||||
"@docusaurus/plugin-ideal-image": "3.8.1",
|
||||
"@docusaurus/preset-classic": "3.8.1",
|
||||
"@docusaurus/theme-mermaid": "^3.8.1",
|
||||
"@docusaurus/theme-mermaid": "3.8.1",
|
||||
"@inkeep/cxkit-docusaurus": "^0.5.89",
|
||||
"@mdx-js/react": "^3.0.0",
|
||||
"clsx": "^1.2.1",
|
||||
@@ -45,12 +45,14 @@
|
||||
]
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16.14"
|
||||
"node": ">=16.14",
|
||||
"npm": ">=8.3.0"
|
||||
},
|
||||
"overrides": {
|
||||
"webpack-dev-server": ">=5.2.1",
|
||||
"form-data": ">=4.0.4",
|
||||
"mermaid": ">=11.10.0",
|
||||
"js-yaml": ">=4.1.1"
|
||||
"js-yaml": ">=4.1.1",
|
||||
"gray-matter": ">=4.0.3"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: "[Preview] v1.79.3-stable - Built-in Guardrails on AI Gateway"
|
||||
title: "v1.79.3-stable - Built-in Guardrails on AI Gateway"
|
||||
slug: "v1-79-3"
|
||||
date: 2025-11-08T10:00:00
|
||||
authors:
|
||||
@@ -27,7 +27,7 @@ import TabItem from '@theme/TabItem';
|
||||
docker run \
|
||||
-e STORE_MODEL_IN_DB=True \
|
||||
-p 4000:4000 \
|
||||
ghcr.io/berriai/litellm:v1.79.3.rc.1
|
||||
ghcr.io/berriai/litellm:v1.79.3-stable
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
@@ -0,0 +1,482 @@
|
||||
---
|
||||
title: "[Preview] v1.80.0-stable - RunwayML Provider Support"
|
||||
slug: "v1-80-0"
|
||||
date: 2025-11-15T10:00:00
|
||||
authors:
|
||||
- name: Krrish Dholakia
|
||||
title: CEO, LiteLLM
|
||||
url: https://www.linkedin.com/in/krish-d/
|
||||
image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
|
||||
- name: Ishaan Jaff
|
||||
title: CTO, LiteLLM
|
||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||
hide_table_of_contents: false
|
||||
---
|
||||
|
||||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
## Deploy this version
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="docker" label="Docker">
|
||||
|
||||
``` showLineNumbers title="docker run litellm"
|
||||
docker run \
|
||||
-e STORE_MODEL_IN_DB=True \
|
||||
-p 4000:4000 \
|
||||
ghcr.io/berriai/litellm:v1.80.0.rc.1
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="pip" label="Pip">
|
||||
|
||||
``` showLineNumbers title="pip install litellm"
|
||||
pip install litellm==1.80.0
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Key Highlights
|
||||
|
||||
- **🆕 RunwayML Provider** - Complete video generation, image generation, and text-to-speech support
|
||||
- **GPT-5.1 Family Support** - Day-0 support for OpenAI's latest GPT-5.1 and GPT-5.1-Codex models
|
||||
- **Prometheus OSS** - Prometheus metrics now available in open-source version
|
||||
- **Vector Store Files API** - Complete OpenAI-compatible Vector Store Files API with full CRUD operations
|
||||
- **Embeddings Performance** - O(1) lookup optimization for router embeddings with shared sessions
|
||||
|
||||
---
|
||||
|
||||
### 🆕 RunwayML
|
||||
|
||||
Complete integration for RunwayML's Gen-4 family of models, supporting video generation, image generation, and text-to-speech.
|
||||
|
||||
**Supported Endpoints:**
|
||||
- `/v1/videos` - Video generation (Gen-4 Turbo, Gen-4 Aleph, Gen-3A Turbo)
|
||||
- `/v1/images/generations` - Image generation (Gen-4 Image, Gen-4 Image Turbo)
|
||||
- `/v1/audio/speech` - Text-to-speech (ElevenLabs Multilingual v2)
|
||||
|
||||
**Quick Start:**
|
||||
|
||||
```bash showLineNumbers title="Generate Video with RunwayML"
|
||||
curl --location 'http://localhost:4000/v1/videos' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--data '{
|
||||
"model": "runwayml/gen4_turbo",
|
||||
"prompt": "A high quality demo video of litellm ai gateway",
|
||||
"input_reference": "https://example.com/image.jpg",
|
||||
"seconds": 5,
|
||||
"size": "1280x720"
|
||||
}'
|
||||
```
|
||||
|
||||
[Get Started with RunwayML](../../docs/providers/runwayml/videos)
|
||||
|
||||
---
|
||||
|
||||
### Prometheus Metrics - Open Source
|
||||
|
||||
Prometheus metrics are now available in the open-source version of LiteLLM, providing comprehensive observability for your AI Gateway without requiring an enterprise license.
|
||||
|
||||
**Quick Start:**
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
```
|
||||
|
||||
[Get Started with Prometheus](../../docs/proxy/logging#prometheus)
|
||||
|
||||
---
|
||||
|
||||
### Vector Store Files API
|
||||
|
||||
Complete OpenAI-compatible Vector Store Files API now stable, enabling full file lifecycle management within vector stores.
|
||||
|
||||
**Supported Endpoints:**
|
||||
- `POST /v1/vector_stores/{vector_store_id}/files` - Create vector store file
|
||||
- `GET /v1/vector_stores/{vector_store_id}/files` - List vector store files
|
||||
- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}` - Retrieve vector store file
|
||||
- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}/content` - Retrieve file content
|
||||
- `DELETE /v1/vector_stores/{vector_store_id}/files/{file_id}` - Delete vector store file
|
||||
- `DELETE /v1/vector_stores/{vector_store_id}` - Delete vector store
|
||||
|
||||
**Quick Start:**
|
||||
|
||||
```bash showLineNumbers title="Create Vector Store File"
|
||||
curl --location 'http://localhost:4000/v1/vector_stores/vs_123/files' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--data '{
|
||||
"file_id": "file_abc"
|
||||
}'
|
||||
```
|
||||
|
||||
[Get Started with Vector Stores](../../docs/vector_store_files)
|
||||
|
||||
---
|
||||
|
||||
## New Providers and Endpoints
|
||||
|
||||
### New Providers
|
||||
|
||||
| Provider | Supported Endpoints | Description |
|
||||
| -------- | ------------------- | ----------- |
|
||||
| **[RunwayML](../../docs/providers/runwayml/videos)** | `/v1/videos`, `/v1/images/generations`, `/v1/audio/speech` | Gen-4 video generation, image generation, and text-to-speech |
|
||||
|
||||
### New LLM API Endpoints
|
||||
|
||||
| Endpoint | Method | Description | Documentation |
|
||||
| -------- | ------ | ----------- | ------------- |
|
||||
| `/v1/vector_stores/{vector_store_id}/files` | POST | Create vector store file | [Docs](../../docs/vector_store_files) |
|
||||
| `/v1/vector_stores/{vector_store_id}/files` | GET | List vector store files | [Docs](../../docs/vector_store_files) |
|
||||
| `/v1/vector_stores/{vector_store_id}/files/{file_id}` | GET | Retrieve vector store file | [Docs](../../docs/vector_store_files) |
|
||||
| `/v1/vector_stores/{vector_store_id}/files/{file_id}/content` | GET | Retrieve file content | [Docs](../../docs/vector_store_files) |
|
||||
| `/v1/vector_stores/{vector_store_id}/files/{file_id}` | DELETE | Delete vector store file | [Docs](../../docs/vector_store_files) |
|
||||
| `/v1/vector_stores/{vector_store_id}` | DELETE | Delete vector store | [Docs](../../docs/vector_store_files) |
|
||||
|
||||
---
|
||||
|
||||
## New Models / Updated Models
|
||||
|
||||
#### New Model Support
|
||||
|
||||
| Provider | Model | Context Window | Input ($/1M tokens) | Output ($/1M tokens) | Features |
|
||||
| -------- | ----- | -------------- | ------------------- | -------------------- | -------- |
|
||||
| OpenAI | `gpt-5.1` | 272K | $1.25 | $10.00 | Reasoning, vision, PDF input, responses API |
|
||||
| OpenAI | `gpt-5.1-2025-11-13` | 272K | $1.25 | $10.00 | Reasoning, vision, PDF input, responses API |
|
||||
| OpenAI | `gpt-5.1-chat-latest` | 128K | $1.25 | $10.00 | Reasoning, vision, PDF input |
|
||||
| OpenAI | `gpt-5.1-codex` | 272K | $1.25 | $10.00 | Responses API, reasoning, vision |
|
||||
| OpenAI | `gpt-5.1-codex-mini` | 272K | $0.25 | $2.00 | Responses API, reasoning, vision |
|
||||
| Moonshot | `moonshot/kimi-k2-thinking` | 262K | $0.60 | $2.50 | Function calling, web search, reasoning |
|
||||
| Mistral | `mistral/magistral-medium-2509` | 40K | $2.00 | $5.00 | Reasoning, function calling |
|
||||
| Vertex AI | `vertex_ai/moonshotai/kimi-k2-thinking-maas` | 256K | $0.60 | $2.50 | Function calling, web search |
|
||||
| OpenRouter | `openrouter/deepseek/deepseek-v3.2-exp` | 164K | $0.20 | $0.40 | Function calling, prompt caching |
|
||||
| OpenRouter | `openrouter/minimax/minimax-m2` | 205K | $0.26 | $1.02 | Function calling, reasoning |
|
||||
| OpenRouter | `openrouter/z-ai/glm-4.6` | 203K | $0.40 | $1.75 | Function calling, reasoning |
|
||||
| OpenRouter | `openrouter/z-ai/glm-4.6:exacto` | 203K | $0.45 | $1.90 | Function calling, reasoning |
|
||||
| Voyage | `voyage/voyage-3.5` | 32K | $0.06 | - | Embeddings |
|
||||
| Voyage | `voyage/voyage-3.5-lite` | 32K | $0.02 | - | Embeddings |
|
||||
|
||||
#### Video Generation Models
|
||||
|
||||
| Provider | Model | Cost Per Second | Resolutions | Features |
|
||||
| -------- | ----- | --------------- | ----------- | -------- |
|
||||
| RunwayML | `runwayml/gen4_turbo` | $0.05 | 1280x720, 720x1280 | Text + image to video |
|
||||
| RunwayML | `runwayml/gen4_aleph` | $0.15 | 1280x720, 720x1280 | Text + image to video |
|
||||
| RunwayML | `runwayml/gen3a_turbo` | $0.05 | 1280x720, 720x1280 | Text + image to video |
|
||||
|
||||
#### Image Generation Models
|
||||
|
||||
| Provider | Model | Cost Per Image | Resolutions | Features |
|
||||
| -------- | ----- | -------------- | ----------- | -------- |
|
||||
| RunwayML | `runwayml/gen4_image` | $0.05 | 1280x720, 1920x1080 | Text + image to image |
|
||||
| RunwayML | `runwayml/gen4_image_turbo` | $0.02 | 1280x720, 1920x1080 | Text + image to image |
|
||||
| Fal.ai | `fal_ai/fal-ai/flux-pro/v1.1` | $0.04/image | - | Image generation |
|
||||
| Fal.ai | `fal_ai/fal-ai/flux/schnell` | $0.003/image | - | Fast image generation |
|
||||
| Fal.ai | `fal_ai/fal-ai/bytedance/seedream/v3/text-to-image` | $0.03/image | - | Image generation |
|
||||
| Fal.ai | `fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image` | $0.03/image | - | Image generation |
|
||||
| Fal.ai | `fal_ai/fal-ai/ideogram/v3` | $0.06/image | - | Image generation |
|
||||
| Fal.ai | `fal_ai/fal-ai/imagen4/preview/fast` | $0.02/image | - | Fast image generation |
|
||||
| Fal.ai | `fal_ai/fal-ai/imagen4/preview/ultra` | $0.06/image | - | High-quality image generation |
|
||||
|
||||
#### Audio Models
|
||||
|
||||
| Provider | Model | Cost | Features |
|
||||
| -------- | ----- | ---- | -------- |
|
||||
| RunwayML | `runwayml/eleven_multilingual_v2` | $0.0003/char | Text-to-speech |
|
||||
|
||||
#### Features
|
||||
|
||||
- **[OpenAI](../../docs/providers/openai)**
|
||||
- Add GPT-5.1 family support with reasoning capabilities - [PR #16598](https://github.com/BerriAI/litellm/pull/16598)
|
||||
- Add support for `reasoning_effort='none'` for GPT-5.1 - [PR #16658](https://github.com/BerriAI/litellm/pull/16658)
|
||||
- Add `verbosity` parameter support for GPT-5 family models - [PR #16660](https://github.com/BerriAI/litellm/pull/16660)
|
||||
- Fix forward OpenAI organization for image generation - [PR #16607](https://github.com/BerriAI/litellm/pull/16607)
|
||||
|
||||
- **[Gemini (Google AI Studio + Vertex AI)](../../docs/providers/gemini)**
|
||||
- Add support for `reasoning_effort='none'` for Gemini models - [PR #16548](https://github.com/BerriAI/litellm/pull/16548)
|
||||
- Add all Gemini image models support in image generation - [PR #16526](https://github.com/BerriAI/litellm/pull/16526)
|
||||
- Add Gemini image edit support - [PR #16430](https://github.com/BerriAI/litellm/pull/16430)
|
||||
- Fix preserve non-ASCII characters in function call arguments - [PR #16550](https://github.com/BerriAI/litellm/pull/16550)
|
||||
- Fix Gemini conversation format issue with MCP auto-execution - [PR #16592](https://github.com/BerriAI/litellm/pull/16592)
|
||||
|
||||
- **[Bedrock](../../docs/providers/bedrock)**
|
||||
- Add support for filtering knowledge base queries - [PR #16543](https://github.com/BerriAI/litellm/pull/16543)
|
||||
- Ensure correct `aws_region` is used when provided dynamically for embeddings - [PR #16547](https://github.com/BerriAI/litellm/pull/16547)
|
||||
- Add support for custom KMS encryption keys in Bedrock Batch operations - [PR #16662](https://github.com/BerriAI/litellm/pull/16662)
|
||||
- Add bearer token authentication support for AgentCore - [PR #16556](https://github.com/BerriAI/litellm/pull/16556)
|
||||
- Fix AgentCore SSE stream iterator to async for proper streaming support - [PR #16293](https://github.com/BerriAI/litellm/pull/16293)
|
||||
|
||||
- **[Anthropic](../../docs/providers/anthropic)**
|
||||
- Add context management param support - [PR #16528](https://github.com/BerriAI/litellm/pull/16528)
|
||||
- Fix preserve `$defs` for Anthropic tools input schema - [PR #16648](https://github.com/BerriAI/litellm/pull/16648)
|
||||
- Fix support Anthropic tool_use and tool_result in token counter - [PR #16351](https://github.com/BerriAI/litellm/pull/16351)
|
||||
|
||||
- **[Vertex AI](../../docs/providers/vertex_ai)**
|
||||
- Add Vertex Kimi-K2-Thinking support - [PR #16671](https://github.com/BerriAI/litellm/pull/16671)
|
||||
- Add `vertex_credentials` support to `litellm.rerank()` - [PR #16479](https://github.com/BerriAI/litellm/pull/16479)
|
||||
|
||||
- **[Mistral](../../docs/providers/mistral)**
|
||||
- Fix Magistral streaming to emit reasoning chunks - [PR #16434](https://github.com/BerriAI/litellm/pull/16434)
|
||||
|
||||
- **[Moonshot (Kimi)](../../docs/providers/moonshot)**
|
||||
- Add Kimi K2 thinking model support - [PR #16445](https://github.com/BerriAI/litellm/pull/16445)
|
||||
|
||||
- **[SambaNova](../../docs/providers/sambanova)**
|
||||
- Fix SambaNova API rejecting requests when message content is passed as a list format - [PR #16612](https://github.com/BerriAI/litellm/pull/16612)
|
||||
|
||||
- **[VLLM](../../docs/providers/vllm)**
|
||||
- Fix use vllm passthrough config for hosted vllm provider instead of raising error - [PR #16537](https://github.com/BerriAI/litellm/pull/16537)
|
||||
- Add headers to VLLM Passthrough requests with success event logging - [PR #16532](https://github.com/BerriAI/litellm/pull/16532)
|
||||
|
||||
- **[Azure](../../docs/providers/azure)**
|
||||
- Fix improve Azure auth parameter handling for None values - [PR #14436](https://github.com/BerriAI/litellm/pull/14436)
|
||||
|
||||
- **[Groq](../../docs/providers/groq)**
|
||||
- Fix parse failed chunks for Groq - [PR #16595](https://github.com/BerriAI/litellm/pull/16595)
|
||||
|
||||
- **[Voyage](../../docs/providers/voyage)**
|
||||
- Add Voyage 3.5 and 3.5-lite embeddings pricing and doc update - [PR #16641](https://github.com/BerriAI/litellm/pull/16641)
|
||||
|
||||
- **[Fal.ai](../../docs/image_generation)**
|
||||
- Add fal-ai/flux/schnell support - [PR #16580](https://github.com/BerriAI/litellm/pull/16580)
|
||||
- Add all Imagen4 variants of fal ai in model map - [PR #16579](https://github.com/BerriAI/litellm/pull/16579)
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- **General**
|
||||
- Fix sanitize null token usage in OpenAI-compatible responses - [PR #16493](https://github.com/BerriAI/litellm/pull/16493)
|
||||
- Fix apply provided timeout value to ClientTimeout.total - [PR #16395](https://github.com/BerriAI/litellm/pull/16395)
|
||||
- Fix raising wrong 429 error on wrong exception - [PR #16482](https://github.com/BerriAI/litellm/pull/16482)
|
||||
- Add new models, delete repeat models, update pricing - [PR #16491](https://github.com/BerriAI/litellm/pull/16491)
|
||||
- Update model logging format for custom LLM provider - [PR #16485](https://github.com/BerriAI/litellm/pull/16485)
|
||||
|
||||
---
|
||||
|
||||
## LLM API Endpoints
|
||||
|
||||
#### New Endpoints
|
||||
|
||||
- **[GET /providers](../../docs/proxy/management_endpoints)**
|
||||
- Add GET list of providers endpoint - [PR #16432](https://github.com/BerriAI/litellm/pull/16432)
|
||||
|
||||
#### Features
|
||||
|
||||
- **[Video Generation API](../../docs/video_generation)**
|
||||
- Allow internal users to access video generation routes - [PR #16472](https://github.com/BerriAI/litellm/pull/16472)
|
||||
|
||||
- **[Vector Stores API](../../docs/vector_stores)**
|
||||
- Vector store files stable release with complete CRUD operations - [PR #16643](https://github.com/BerriAI/litellm/pull/16643)
|
||||
- `POST /v1/vector_stores/{vector_store_id}/files` - Create vector store file
|
||||
- `GET /v1/vector_stores/{vector_store_id}/files` - List vector store files
|
||||
- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}` - Retrieve vector store file
|
||||
- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}/content` - Retrieve file content
|
||||
- `DELETE /v1/vector_stores/{vector_store_id}/files/{file_id}` - Delete vector store file
|
||||
- `DELETE /v1/vector_stores/{vector_store_id}` - Delete vector store
|
||||
- Ensure users can access `search_results` for both stream + non-stream response - [PR #16459](https://github.com/BerriAI/litellm/pull/16459)
|
||||
|
||||
#### Bugs
|
||||
|
||||
- **[Video Generation API](../../docs/video_generation)**
|
||||
- Fix use GET for `/v1/videos/{video_id}/content` - [PR #16672](https://github.com/BerriAI/litellm/pull/16672)
|
||||
|
||||
- **General**
|
||||
- Fix remove generic exception handling - [PR #16599](https://github.com/BerriAI/litellm/pull/16599)
|
||||
|
||||
---
|
||||
|
||||
## Management Endpoints / UI
|
||||
|
||||
#### Features
|
||||
|
||||
- **Proxy CLI Auth**
|
||||
- Fix remove strict master_key check in add_deployment - [PR #16453](https://github.com/BerriAI/litellm/pull/16453)
|
||||
|
||||
- **Virtual Keys**
|
||||
- UI - Add Tags To Edit Key Flow - [PR #16500](https://github.com/BerriAI/litellm/pull/16500)
|
||||
- UI - Test Key Page show models based on selected endpoint - [PR #16452](https://github.com/BerriAI/litellm/pull/16452)
|
||||
- UI - Expose user_alias in view and update path - [PR #16669](https://github.com/BerriAI/litellm/pull/16669)
|
||||
|
||||
- **Models + Endpoints**
|
||||
- UI - Add LiteLLM Params to Edit Model - [PR #16496](https://github.com/BerriAI/litellm/pull/16496)
|
||||
- UI - Add Model use backend data - [PR #16664](https://github.com/BerriAI/litellm/pull/16664)
|
||||
- UI - Remove Description Field from LLM Credentials - [PR #16608](https://github.com/BerriAI/litellm/pull/16608)
|
||||
- UI - Add RunwayML on Admin UI supported models/providers - [PR #16606](https://github.com/BerriAI/litellm/pull/16606)
|
||||
- Infra - Migrate Add Model Fields to Backend - [PR #16620](https://github.com/BerriAI/litellm/pull/16620)
|
||||
- Add API Endpoint for creating model access group - [PR #16663](https://github.com/BerriAI/litellm/pull/16663)
|
||||
|
||||
- **Teams**
|
||||
- UI - Invite User Searchable Team Select - [PR #16454](https://github.com/BerriAI/litellm/pull/16454)
|
||||
- Fix use user budget instead of key budget when creating new team - [PR #16074](https://github.com/BerriAI/litellm/pull/16074)
|
||||
|
||||
- **Budgets**
|
||||
- UI - Move Budgets out of Experimental - [PR #16544](https://github.com/BerriAI/litellm/pull/16544)
|
||||
|
||||
- **Guardrails**
|
||||
- UI - Config Guardrails should not be deletable from table - [PR #16540](https://github.com/BerriAI/litellm/pull/16540)
|
||||
- Fix remove enterprise restriction from guardrails list endpoint - [PR #15333](https://github.com/BerriAI/litellm/pull/15333)
|
||||
|
||||
- **Callbacks**
|
||||
- UI - New Callbacks table - [PR #16512](https://github.com/BerriAI/litellm/pull/16512)
|
||||
- Fix delete callbacks failing - [PR #16473](https://github.com/BerriAI/litellm/pull/16473)
|
||||
|
||||
- **Usage & Analytics**
|
||||
- UI - Improve Usage Indicator - [PR #16504](https://github.com/BerriAI/litellm/pull/16504)
|
||||
- UI - Model Info Page Health Check - [PR #16416](https://github.com/BerriAI/litellm/pull/16416)
|
||||
- Infra - Show Deprecation Warning for Model Analytics Tab - [PR #16417](https://github.com/BerriAI/litellm/pull/16417)
|
||||
- Fix Litellm tags usage add request_id - [PR #16111](https://github.com/BerriAI/litellm/pull/16111)
|
||||
|
||||
- **Health Check**
|
||||
- Add Langfuse OTEL and SQS to Health Check - [PR #16514](https://github.com/BerriAI/litellm/pull/16514)
|
||||
|
||||
- **General UI**
|
||||
- UI - Normalize table action columns appearance - [PR #16657](https://github.com/BerriAI/litellm/pull/16657)
|
||||
- UI - Button Styles and Sizing in Settings Pages - [PR #16600](https://github.com/BerriAI/litellm/pull/16600)
|
||||
- UI - SSO Modal Cosmetic Changes - [PR #16554](https://github.com/BerriAI/litellm/pull/16554)
|
||||
- Fix UI logos loading with SERVER_ROOT_PATH - [PR #16618](https://github.com/BerriAI/litellm/pull/16618)
|
||||
- Fix remove misleading 'Custom' option mention from OpenAI endpoint tooltips - [PR #16622](https://github.com/BerriAI/litellm/pull/16622)
|
||||
|
||||
#### Bugs
|
||||
|
||||
- **Management Endpoints**
|
||||
- Fix inconsistent error responses in customer management endpoints - [PR #16450](https://github.com/BerriAI/litellm/pull/16450)
|
||||
- Fix correct date range filtering in /spend/logs endpoint - [PR #16443](https://github.com/BerriAI/litellm/pull/16443)
|
||||
- Fix /spend/logs/ui Access Control - [PR #16446](https://github.com/BerriAI/litellm/pull/16446)
|
||||
- Add pagination for /spend/logs/session/ui endpoint - [PR #16603](https://github.com/BerriAI/litellm/pull/16603)
|
||||
- Fix LiteLLM Usage shows key_hash - [PR #16471](https://github.com/BerriAI/litellm/pull/16471)
|
||||
- Fix app_roles missing from jwt payload - [PR #16448](https://github.com/BerriAI/litellm/pull/16448)
|
||||
|
||||
---
|
||||
|
||||
## Logging / Guardrail / Prompt Management Integrations
|
||||
|
||||
|
||||
#### New Integration
|
||||
|
||||
- **🆕 [Zscaler AI Guard](../../docs/proxy/guardrails/zscaler_ai_guard)**
|
||||
- Add Zscaler AI Guard hook for security policy enforcement - [PR #15691](https://github.com/BerriAI/litellm/pull/15691)
|
||||
|
||||
#### Logging
|
||||
|
||||
- **[Langfuse](../../docs/proxy/logging#langfuse)**
|
||||
- Fix handle null usage values to prevent validation errors - [PR #16396](https://github.com/BerriAI/litellm/pull/16396)
|
||||
|
||||
- **[CloudZero](../../docs/proxy/logging)**
|
||||
- Fix updated spend would not be sent to CloudZero - [PR #16201](https://github.com/BerriAI/litellm/pull/16201)
|
||||
|
||||
#### Guardrails
|
||||
|
||||
- **[IBM Detector](../../docs/proxy/guardrails)**
|
||||
- Ensure detector-id is passed as header to IBM detector server - [PR #16649](https://github.com/BerriAI/litellm/pull/16649)
|
||||
|
||||
#### Prompt Management
|
||||
|
||||
- **[Custom Prompt Management](../../docs/proxy/prompt_management)**
|
||||
- Add SDK focused examples for custom prompt management - [PR #16441](https://github.com/BerriAI/litellm/pull/16441)
|
||||
|
||||
---
|
||||
|
||||
## Spend Tracking, Budgets and Rate Limiting
|
||||
|
||||
- **End User Budgets**
|
||||
- Allow pointing max_end_user budget to an id, so the default ID applies to all end users - [PR #16456](https://github.com/BerriAI/litellm/pull/16456)
|
||||
|
||||
---
|
||||
|
||||
## MCP Gateway
|
||||
|
||||
- **Configuration**
|
||||
- Add dynamic OAuth2 metadata discovery for MCP servers - [PR #16676](https://github.com/BerriAI/litellm/pull/16676)
|
||||
- Fix allow tool call even when server name prefix is missing - [PR #16425](https://github.com/BerriAI/litellm/pull/16425)
|
||||
- Fix exclude unauthorized MCP servers from allowed server list - [PR #16551](https://github.com/BerriAI/litellm/pull/16551)
|
||||
- Fix unable to delete MCP server from permission settings - [PR #16407](https://github.com/BerriAI/litellm/pull/16407)
|
||||
- Fix avoid crashing when MCP server record lacks credentials - [PR #16601](https://github.com/BerriAI/litellm/pull/16601)
|
||||
|
||||
---
|
||||
|
||||
## Agents
|
||||
|
||||
- **[Agent Registration (A2A Spec)](../../docs/agents)**
|
||||
- Support agent registration + discovery following Agent-to-Agent specification - [PR #16615](https://github.com/BerriAI/litellm/pull/16615)
|
||||
|
||||
---
|
||||
|
||||
## Performance / Loadbalancing / Reliability improvements
|
||||
|
||||
- **Embeddings Performance**
|
||||
- Use router's O(1) lookup and shared sessions for embeddings - [PR #16344](https://github.com/BerriAI/litellm/pull/16344)
|
||||
|
||||
- **Router Reliability**
|
||||
- Support default fallbacks for unknown models - [PR #16419](https://github.com/BerriAI/litellm/pull/16419)
|
||||
|
||||
- **Callback Management**
|
||||
- Add atexit handlers to flush callbacks for async completions - [PR #16487](https://github.com/BerriAI/litellm/pull/16487)
|
||||
|
||||
---
|
||||
|
||||
## General Proxy Improvements
|
||||
|
||||
- **Configuration Management**
|
||||
- Fix update model_cost_map_url to use environment variable - [PR #16429](https://github.com/BerriAI/litellm/pull/16429)
|
||||
|
||||
---
|
||||
|
||||
## Documentation Updates
|
||||
|
||||
- **Provider Documentation**
|
||||
- Fix streaming example in README - [PR #16461](https://github.com/BerriAI/litellm/pull/16461)
|
||||
- Update broken Slack invite links to support page - [PR #16546](https://github.com/BerriAI/litellm/pull/16546)
|
||||
- Fix code block indentation for fallbacks page - [PR #16542](https://github.com/BerriAI/litellm/pull/16542)
|
||||
- Documentation code example corrections - [PR #16502](https://github.com/BerriAI/litellm/pull/16502)
|
||||
- Document `reasoning_effort` summary field options - [PR #16549](https://github.com/BerriAI/litellm/pull/16549)
|
||||
|
||||
- **API Documentation**
|
||||
- Add docs on APIs for model access management - [PR #16673](https://github.com/BerriAI/litellm/pull/16673)
|
||||
- Add docs for showing how to auto reload new pricing data - [PR #16675](https://github.com/BerriAI/litellm/pull/16675)
|
||||
- LiteLLM Quick start - show how model resolution works - [PR #16602](https://github.com/BerriAI/litellm/pull/16602)
|
||||
- Add docs for tracking callback failure - [PR #16474](https://github.com/BerriAI/litellm/pull/16474)
|
||||
|
||||
- **General Documentation**
|
||||
- Fix container api link in release page - [PR #16440](https://github.com/BerriAI/litellm/pull/16440)
|
||||
- Add softgen to projects that are using litellm - [PR #16423](https://github.com/BerriAI/litellm/pull/16423)
|
||||
|
||||
---
|
||||
|
||||
## New Contributors
|
||||
|
||||
* @artplan1 made their first contribution in [PR #16423](https://github.com/BerriAI/litellm/pull/16423)
|
||||
* @JehandadK made their first contribution in [PR #16472](https://github.com/BerriAI/litellm/pull/16472)
|
||||
* @vmiscenko made their first contribution in [PR #16453](https://github.com/BerriAI/litellm/pull/16453)
|
||||
* @mcowger made their first contribution in [PR #16429](https://github.com/BerriAI/litellm/pull/16429)
|
||||
* @yellowsubmarine372 made their first contribution in [PR #16395](https://github.com/BerriAI/litellm/pull/16395)
|
||||
* @Hebruwu made their first contribution in [PR #16201](https://github.com/BerriAI/litellm/pull/16201)
|
||||
* @jwang-gif made their first contribution in [PR #15691](https://github.com/BerriAI/litellm/pull/15691)
|
||||
* @AnthonyMonaco made their first contribution in [PR #16502](https://github.com/BerriAI/litellm/pull/16502)
|
||||
* @andrewm4894 made their first contribution in [PR #16487](https://github.com/BerriAI/litellm/pull/16487)
|
||||
* @f14-bertolotti made their first contribution in [PR #16485](https://github.com/BerriAI/litellm/pull/16485)
|
||||
* @busla made their first contribution in [PR #16293](https://github.com/BerriAI/litellm/pull/16293)
|
||||
* @MightyGoldenOctopus made their first contribution in [PR #16537](https://github.com/BerriAI/litellm/pull/16537)
|
||||
* @ultmaster made their first contribution in [PR #14436](https://github.com/BerriAI/litellm/pull/14436)
|
||||
* @bchrobot made their first contribution in [PR #16542](https://github.com/BerriAI/litellm/pull/16542)
|
||||
* @sep-grindr made their first contribution in [PR #16622](https://github.com/BerriAI/litellm/pull/16622)
|
||||
* @pnookala-godaddy made their first contribution in [PR #16607](https://github.com/BerriAI/litellm/pull/16607)
|
||||
* @dtunikov made their first contribution in [PR #16592](https://github.com/BerriAI/litellm/pull/16592)
|
||||
* @lukapecnik made their first contribution in [PR #16648](https://github.com/BerriAI/litellm/pull/16648)
|
||||
* @jyeros made their first contribution in [PR #16618](https://github.com/BerriAI/litellm/pull/16618)
|
||||
|
||||
---
|
||||
|
||||
## Full Changelog
|
||||
|
||||
**[View complete changelog on GitHub](https://github.com/BerriAI/litellm/compare/v1.79.3.rc.1...v1.80.0.rc.1)**
|
||||
|
||||
---
|
||||
@@ -368,6 +368,7 @@ const sidebars = {
|
||||
]
|
||||
},
|
||||
"videos",
|
||||
"vector_store_files",
|
||||
{
|
||||
type: "category",
|
||||
label: "/mcp - Model Context Protocol",
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "litellm-enterprise"
|
||||
version = "0.1.20"
|
||||
version = "0.1.21"
|
||||
description = "Package for LiteLLM Enterprise features"
|
||||
authors = ["BerriAI"]
|
||||
readme = "README.md"
|
||||
@@ -22,7 +22,7 @@ requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "0.1.20"
|
||||
version = "0.1.21"
|
||||
version_files = [
|
||||
"pyproject.toml:version",
|
||||
"../requirements.txt:litellm-enterprise==",
|
||||
|
||||
Binary file not shown.
Binary file not shown.
+17
@@ -0,0 +1,17 @@
|
||||
-- CreateTable
|
||||
CREATE TABLE "LiteLLM_AgentsTable" (
|
||||
"agent_id" TEXT NOT NULL,
|
||||
"agent_name" TEXT NOT NULL,
|
||||
"litellm_params" JSONB,
|
||||
"agent_card_params" JSONB NOT NULL,
|
||||
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"created_by" TEXT NOT NULL,
|
||||
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"updated_by" TEXT NOT NULL,
|
||||
|
||||
CONSTRAINT "LiteLLM_AgentsTable_pkey" PRIMARY KEY ("agent_id")
|
||||
);
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "LiteLLM_AgentsTable_agent_name_key" ON "LiteLLM_AgentsTable"("agent_name");
|
||||
|
||||
@@ -54,6 +54,19 @@ model LiteLLM_ProxyModelTable {
|
||||
updated_by String
|
||||
}
|
||||
|
||||
|
||||
// Agents on proxy
|
||||
model LiteLLM_AgentsTable {
|
||||
agent_id String @id @default(uuid())
|
||||
agent_name String @unique
|
||||
litellm_params Json?
|
||||
agent_card_params Json
|
||||
created_at DateTime @default(now()) @map("created_at")
|
||||
created_by String
|
||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
updated_by String
|
||||
}
|
||||
|
||||
model LiteLLM_OrganizationTable {
|
||||
organization_id String @id @default(uuid())
|
||||
organization_alias String
|
||||
@@ -639,4 +652,4 @@ model LiteLLM_CacheConfig {
|
||||
cache_settings Json
|
||||
created_at DateTime @default(now())
|
||||
updated_at DateTime @updatedAt
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "litellm-proxy-extras"
|
||||
version = "0.4.4"
|
||||
version = "0.4.5"
|
||||
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
|
||||
authors = ["BerriAI"]
|
||||
readme = "README.md"
|
||||
@@ -22,7 +22,7 @@ requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "0.4.4"
|
||||
version = "0.4.5"
|
||||
version_files = [
|
||||
"pyproject.toml:version",
|
||||
"../requirements.txt:litellm-proxy-extras==",
|
||||
|
||||
@@ -1386,6 +1386,20 @@ from .search.main import *
|
||||
from .realtime_api.main import _arealtime
|
||||
from .fine_tuning.main import *
|
||||
from .files.main import *
|
||||
from .vector_store_files.main import (
|
||||
acreate as avector_store_file_create,
|
||||
adelete as avector_store_file_delete,
|
||||
alist as avector_store_file_list,
|
||||
aretrieve as avector_store_file_retrieve,
|
||||
aretrieve_content as avector_store_file_content,
|
||||
aupdate as avector_store_file_update,
|
||||
create as vector_store_file_create,
|
||||
delete as vector_store_file_delete,
|
||||
list as vector_store_file_list,
|
||||
retrieve as vector_store_file_retrieve,
|
||||
retrieve_content as vector_store_file_content,
|
||||
update as vector_store_file_update,
|
||||
)
|
||||
from .scheduler import *
|
||||
from .cost_calculator import response_cost_calculator, cost_per_token
|
||||
|
||||
|
||||
@@ -476,6 +476,7 @@ DEFAULT_CHAT_COMPLETION_PARAM_VALUES = {
|
||||
"additional_drop_params": None,
|
||||
"messages": None,
|
||||
"reasoning_effort": None,
|
||||
"verbosity": None,
|
||||
"thinking": None,
|
||||
"web_search_options": None,
|
||||
"service_tier": None,
|
||||
|
||||
+1
-23
@@ -41,21 +41,9 @@ class PrometheusLogger(CustomLogger):
|
||||
try:
|
||||
from prometheus_client import Counter, Gauge, Histogram
|
||||
|
||||
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
|
||||
|
||||
# Always initialize label_filters, even for non-premium users
|
||||
self.label_filters = self._parse_prometheus_config()
|
||||
|
||||
if premium_user is not True:
|
||||
verbose_logger.warning(
|
||||
f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise\n🚨 {CommonProxyErrors.not_premium_user.value}"
|
||||
)
|
||||
self.litellm_not_a_premium_user_metric = Counter(
|
||||
name="litellm_not_a_premium_user_metric",
|
||||
documentation=f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise. 🚨 {CommonProxyErrors.not_premium_user.value}",
|
||||
)
|
||||
return
|
||||
|
||||
# Create metric factory functions
|
||||
self._counter_factory = self._create_metric_factory(Counter)
|
||||
self._gauge_factory = self._create_metric_factory(Gauge)
|
||||
@@ -2184,9 +2172,6 @@ class PrometheusLogger(CustomLogger):
|
||||
|
||||
It emits the current remaining budget metrics for all Keys and Teams.
|
||||
"""
|
||||
from enterprise.litellm_enterprise.integrations.prometheus import (
|
||||
PrometheusLogger,
|
||||
)
|
||||
from litellm.constants import PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
@@ -2213,26 +2198,19 @@ class PrometheusLogger(CustomLogger):
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _mount_metrics_endpoint(premium_user: bool):
|
||||
def _mount_metrics_endpoint():
|
||||
"""
|
||||
Mount the Prometheus metrics endpoint with optional authentication.
|
||||
|
||||
Args:
|
||||
premium_user (bool): Whether the user is a premium user
|
||||
require_auth (bool, optional): Whether to require authentication for the metrics endpoint.
|
||||
Defaults to False.
|
||||
"""
|
||||
from prometheus_client import make_asgi_app
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import CommonProxyErrors
|
||||
from litellm.proxy.proxy_server import app
|
||||
|
||||
if premium_user is not True:
|
||||
verbose_proxy_logger.warning(
|
||||
f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}"
|
||||
)
|
||||
|
||||
# Create metrics ASGI app
|
||||
if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
|
||||
from prometheus_client import CollectorRegistry, multiprocess
|
||||
@@ -16,14 +16,16 @@ from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheCont
|
||||
from litellm.integrations.argilla import ArgillaLogger
|
||||
from litellm.integrations.azure_storage.azure_storage import AzureBlobStorageLogger
|
||||
from litellm.integrations.bitbucket import BitBucketPromptManager
|
||||
from litellm.integrations.gitlab import GitLabPromptManager
|
||||
from litellm.integrations.braintrust_logging import BraintrustLogger
|
||||
from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
|
||||
from litellm.integrations.datadog.datadog import DataDogLogger
|
||||
from litellm.integrations.datadog.datadog_llm_obs import DataDogLLMObsLogger
|
||||
from litellm.integrations.deepeval import DeepEvalLogger
|
||||
from litellm.integrations.dotprompt import DotpromptManager
|
||||
from litellm.integrations.galileo import GalileoObserve
|
||||
from litellm.integrations.gcs_bucket.gcs_bucket import GCSBucketLogger
|
||||
from litellm.integrations.gcs_pubsub.pub_sub import GcsPubSubLogger
|
||||
from litellm.integrations.gitlab import GitLabPromptManager
|
||||
from litellm.integrations.humanloop import HumanloopLogger
|
||||
from litellm.integrations.lago import LagoLogger
|
||||
from litellm.integrations.langfuse.langfuse_prompt_management import (
|
||||
@@ -36,13 +38,7 @@ from litellm.integrations.openmeter import OpenMeterLogger
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
from litellm.integrations.opik.opik import OpikLogger
|
||||
from litellm.integrations.posthog import PostHogLogger
|
||||
|
||||
try:
|
||||
from litellm_enterprise.integrations.prometheus import PrometheusLogger
|
||||
except Exception:
|
||||
PrometheusLogger = None
|
||||
from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
|
||||
from litellm.integrations.dotprompt import DotpromptManager
|
||||
from litellm.integrations.prometheus import PrometheusLogger
|
||||
from litellm.integrations.s3_v2 import S3Logger
|
||||
from litellm.integrations.sqs import SQSLogger
|
||||
from litellm.integrations.vector_store_integrations.vector_store_pre_call_hook import (
|
||||
|
||||
@@ -58,6 +58,7 @@ from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.integrations.deepeval.deepeval import DeepEvalLogger
|
||||
from litellm.integrations.mlflow import MlflowLogger
|
||||
from litellm.integrations.prometheus import PrometheusLogger
|
||||
from litellm.integrations.sqs import SQSLogger
|
||||
from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
|
||||
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
|
||||
@@ -176,7 +177,6 @@ try:
|
||||
from litellm_enterprise.enterprise_callbacks.send_emails.smtp_email import (
|
||||
SMTPEmailLogger,
|
||||
)
|
||||
from litellm_enterprise.integrations.prometheus import PrometheusLogger
|
||||
from litellm_enterprise.litellm_core_utils.litellm_logging import (
|
||||
StandardLoggingPayloadSetup as EnterpriseStandardLoggingPayloadSetup,
|
||||
)
|
||||
@@ -194,7 +194,6 @@ except Exception as e:
|
||||
PagerDutyAlerting = CustomLogger # type: ignore
|
||||
EnterpriseCallbackControls = None # type: ignore
|
||||
EnterpriseStandardLoggingPayloadSetupVAR = None
|
||||
PrometheusLogger = None
|
||||
_in_memory_loggers: List[Any] = []
|
||||
|
||||
### GLOBAL VARIABLES ###
|
||||
@@ -1475,33 +1474,58 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||
if self.model_call_details["litellm_params"]["metadata"] is None:
|
||||
self.model_call_details["litellm_params"]["metadata"] = {}
|
||||
self.model_call_details["litellm_params"]["metadata"]["hidden_params"] = getattr(logging_result, "_hidden_params", {}) # type: ignore
|
||||
|
||||
|
||||
if "response_cost" in hidden_params:
|
||||
self.model_call_details["response_cost"] = hidden_params["response_cost"]
|
||||
else:
|
||||
self.model_call_details["response_cost"] = self._response_cost_calculator(result=logging_result)
|
||||
|
||||
self.model_call_details["standard_logging_object"] = get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=logging_result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
self.model_call_details["response_cost"] = self._response_cost_calculator(
|
||||
result=logging_result
|
||||
)
|
||||
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=logging_result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
)
|
||||
)
|
||||
|
||||
def _transform_usage_objects(self, result):
|
||||
if isinstance(result, ResponsesAPIResponse):
|
||||
result = result.model_copy()
|
||||
transformed_usage = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(result.usage)
|
||||
setattr(result, "usage", transformed_usage.model_dump() if hasattr(transformed_usage, "model_dump") else dict(transformed_usage))
|
||||
if (standard_logging_payload := self.model_call_details.get("standard_logging_object")) is not None:
|
||||
standard_logging_payload["response"] = result.model_dump() if hasattr(result, "model_dump") else dict(result)
|
||||
transformed_usage = (
|
||||
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||
result.usage
|
||||
)
|
||||
)
|
||||
setattr(
|
||||
result,
|
||||
"usage",
|
||||
(
|
||||
transformed_usage.model_dump()
|
||||
if hasattr(transformed_usage, "model_dump")
|
||||
else dict(transformed_usage)
|
||||
),
|
||||
)
|
||||
if (
|
||||
standard_logging_payload := self.model_call_details.get(
|
||||
"standard_logging_object"
|
||||
)
|
||||
) is not None:
|
||||
standard_logging_payload["response"] = (
|
||||
result.model_dump()
|
||||
if hasattr(result, "model_dump")
|
||||
else dict(result)
|
||||
)
|
||||
elif isinstance(result, TranscriptionResponse):
|
||||
from litellm.litellm_core_utils.llm_cost_calc.usage_object_transformation import (
|
||||
TranscriptionUsageObjectTransformation,
|
||||
)
|
||||
|
||||
result = result.model_copy()
|
||||
transformed_usage = TranscriptionUsageObjectTransformation.transform_transcription_usage_object(result.usage) # type: ignore
|
||||
setattr(result, "usage", transformed_usage)
|
||||
@@ -1522,40 +1546,67 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||
end_time = datetime.datetime.now()
|
||||
if self.completion_start_time is None:
|
||||
self.completion_start_time = end_time
|
||||
self.model_call_details["completion_start_time"] = self.completion_start_time
|
||||
|
||||
self.model_call_details["completion_start_time"] = (
|
||||
self.completion_start_time
|
||||
)
|
||||
|
||||
self.model_call_details["log_event_type"] = "successful_api_call"
|
||||
self.model_call_details["end_time"] = end_time
|
||||
self.model_call_details["cache_hit"] = cache_hit
|
||||
|
||||
|
||||
if self.call_type == CallTypes.anthropic_messages.value:
|
||||
result = self._handle_anthropic_messages_response_logging(result=result)
|
||||
elif self.call_type == CallTypes.generate_content.value or self.call_type == CallTypes.agenerate_content.value:
|
||||
result = self._handle_non_streaming_google_genai_generate_content_response_logging(result=result)
|
||||
|
||||
elif (
|
||||
self.call_type == CallTypes.generate_content.value
|
||||
or self.call_type == CallTypes.agenerate_content.value
|
||||
):
|
||||
result = self._handle_non_streaming_google_genai_generate_content_response_logging(
|
||||
result=result
|
||||
)
|
||||
|
||||
logging_result = self.normalize_logging_result(result=result)
|
||||
|
||||
if standard_logging_object is None and result is not None and self.stream is not True:
|
||||
if self._is_recognized_call_type_for_logging(logging_result=logging_result):
|
||||
self._process_hidden_params_and_response_cost(logging_result=logging_result, start_time=start_time, end_time=end_time)
|
||||
elif isinstance(result, dict) or isinstance(result, list):
|
||||
self.model_call_details["standard_logging_object"] = get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=result,
|
||||
if (
|
||||
standard_logging_object is None
|
||||
and result is not None
|
||||
and self.stream is not True
|
||||
):
|
||||
if self._is_recognized_call_type_for_logging(
|
||||
logging_result=logging_result
|
||||
):
|
||||
self._process_hidden_params_and_response_cost(
|
||||
logging_result=logging_result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
)
|
||||
elif isinstance(result, dict) or isinstance(result, list):
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
)
|
||||
)
|
||||
elif standard_logging_object is not None:
|
||||
self.model_call_details["standard_logging_object"] = standard_logging_object
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
standard_logging_object
|
||||
)
|
||||
else:
|
||||
self.model_call_details["response_cost"] = None
|
||||
|
||||
result = self._transform_usage_objects(result=result)
|
||||
|
||||
if litellm.max_budget and self.stream is False and result is not None and isinstance(result, dict) and "content" in result:
|
||||
|
||||
if (
|
||||
litellm.max_budget
|
||||
and self.stream is False
|
||||
and result is not None
|
||||
and isinstance(result, dict)
|
||||
and "content" in result
|
||||
):
|
||||
time_diff = (end_time - start_time).total_seconds()
|
||||
float_diff = float(time_diff)
|
||||
litellm._current_cost += litellm.completion_cost(
|
||||
@@ -3340,8 +3391,6 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
||||
_in_memory_loggers.append(_literalai_logger)
|
||||
return _literalai_logger # type: ignore
|
||||
elif logging_integration == "prometheus":
|
||||
if PrometheusLogger is None:
|
||||
raise ValueError("PrometheusLogger is not initialized")
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
return callback # type: ignore
|
||||
|
||||
@@ -129,7 +129,7 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
"parallel_tool_calls",
|
||||
"response_format",
|
||||
"user",
|
||||
"web_search_options",
|
||||
"web_search_options"
|
||||
]
|
||||
|
||||
if "claude-3-7-sonnet" in model or supports_reasoning(
|
||||
@@ -646,6 +646,16 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
)
|
||||
return tools
|
||||
|
||||
def _ensure_context_management_beta_header(self, headers: dict) -> None:
|
||||
beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
|
||||
existing_beta = headers.get("anthropic-beta")
|
||||
if existing_beta is None:
|
||||
headers["anthropic-beta"] = beta_value
|
||||
return
|
||||
existing_values = [beta.strip() for beta in existing_beta.split(",")]
|
||||
if beta_value not in existing_values:
|
||||
headers["anthropic-beta"] = f"{existing_beta}, {beta_value}"
|
||||
|
||||
def update_headers_with_optional_anthropic_beta(
|
||||
self, headers: dict, optional_params: dict
|
||||
) -> dict:
|
||||
@@ -661,9 +671,11 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
elif tool.get("type", None) and tool.get("type").startswith(
|
||||
ANTHROPIC_HOSTED_TOOLS.MEMORY.value
|
||||
):
|
||||
headers["anthropic-beta"] = (
|
||||
ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
|
||||
)
|
||||
headers[
|
||||
"anthropic-beta"
|
||||
] = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
|
||||
if optional_params.get("context_management") is not None:
|
||||
self._ensure_context_management_beta_header(headers)
|
||||
return headers
|
||||
|
||||
def transform_request(
|
||||
@@ -973,13 +985,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
):
|
||||
text_content = prefix_prompt + text_content
|
||||
|
||||
context_management: Optional[Dict] = completion_response.get(
|
||||
"context_management"
|
||||
)
|
||||
|
||||
provider_specific_fields: Dict[str, Any] = {
|
||||
"citations": citations,
|
||||
"thinking_blocks": thinking_blocks,
|
||||
}
|
||||
if context_management is not None:
|
||||
provider_specific_fields["context_management"] = context_management
|
||||
|
||||
_message = litellm.Message(
|
||||
tool_calls=tool_calls,
|
||||
content=text_content or None,
|
||||
provider_specific_fields={
|
||||
"citations": citations,
|
||||
"thinking_blocks": thinking_blocks,
|
||||
},
|
||||
provider_specific_fields=provider_specific_fields,
|
||||
thinking_blocks=thinking_blocks,
|
||||
reasoning_content=reasoning_content,
|
||||
)
|
||||
@@ -1012,6 +1032,16 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = completion_response["model"]
|
||||
|
||||
context_management_response = completion_response.get("context_management")
|
||||
if context_management_response is not None:
|
||||
_hidden_params["context_management"] = context_management_response
|
||||
try:
|
||||
model_response.__dict__["context_management"] = (
|
||||
context_management_response
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
model_response._hidden_params = _hidden_params
|
||||
|
||||
return model_response
|
||||
|
||||
@@ -6,7 +6,10 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||
from litellm.llms.base_llm.anthropic_messages.transformation import (
|
||||
BaseAnthropicMessagesConfig,
|
||||
)
|
||||
from litellm.types.llms.anthropic import AnthropicMessagesRequest
|
||||
from litellm.types.llms.anthropic import (
|
||||
ANTHROPIC_BETA_HEADER_VALUES,
|
||||
AnthropicMessagesRequest,
|
||||
)
|
||||
from litellm.types.llms.anthropic_messages.anthropic_response import (
|
||||
AnthropicMessagesResponse,
|
||||
)
|
||||
@@ -32,6 +35,7 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"thinking",
|
||||
"context_management",
|
||||
# TODO: Add Anthropic `metadata` support
|
||||
# "metadata",
|
||||
]
|
||||
@@ -71,6 +75,11 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
|
||||
if "content-type" not in headers:
|
||||
headers["content-type"] = "application/json"
|
||||
|
||||
headers = self._update_headers_with_optional_anthropic_beta(
|
||||
headers=headers,
|
||||
context_management=optional_params.get("context_management"),
|
||||
)
|
||||
|
||||
return headers, api_base
|
||||
|
||||
def transform_anthropic_messages_request(
|
||||
@@ -142,3 +151,18 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
|
||||
request_body=request_body,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _update_headers_with_optional_anthropic_beta(
|
||||
headers: dict, context_management: Optional[Dict]
|
||||
) -> dict:
|
||||
if context_management is None:
|
||||
return headers
|
||||
|
||||
existing_beta = headers.get("anthropic-beta")
|
||||
beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
|
||||
if existing_beta is None:
|
||||
headers["anthropic-beta"] = beta_value
|
||||
elif beta_value not in [beta.strip() for beta in existing_beta.split(",")]:
|
||||
headers["anthropic-beta"] = f"{existing_beta}, {beta_value}"
|
||||
return headers
|
||||
|
||||
@@ -0,0 +1,226 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.vector_store_files import (
|
||||
VectorStoreFileAuthCredentials,
|
||||
VectorStoreFileChunkingStrategy,
|
||||
VectorStoreFileContentResponse,
|
||||
VectorStoreFileCreateRequest,
|
||||
VectorStoreFileDeleteResponse,
|
||||
VectorStoreFileListQueryParams,
|
||||
VectorStoreFileListResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileUpdateRequest,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
||||
|
||||
from ..chat.transformation import BaseLLMException as _BaseLLMException
|
||||
|
||||
LiteLLMLoggingObj = _LiteLLMLoggingObj
|
||||
BaseLLMException = _BaseLLMException
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
BaseLLMException = Any
|
||||
|
||||
|
||||
class BaseVectorStoreFilesConfig(ABC):
|
||||
"""Base configuration contract for provider-specific vector store file implementations."""
|
||||
|
||||
def get_supported_openai_params(
|
||||
self,
|
||||
operation: str,
|
||||
) -> Tuple[str, ...]:
|
||||
"""Return the set of OpenAI params supported for the given operation."""
|
||||
|
||||
return tuple()
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
*,
|
||||
operation: str,
|
||||
non_default_params: Dict[str, Any],
|
||||
optional_params: Dict[str, Any],
|
||||
drop_params: bool,
|
||||
) -> Dict[str, Any]:
|
||||
"""Map non-default OpenAI params to provider-specific params."""
|
||||
|
||||
return optional_params
|
||||
|
||||
@abstractmethod
|
||||
def get_auth_credentials(
|
||||
self, litellm_params: Dict[str, Any]
|
||||
) -> VectorStoreFileAuthCredentials:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def get_vector_store_file_endpoints_by_type(self) -> Dict[
|
||||
str, Tuple[Tuple[str, str], ...]
|
||||
]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def validate_environment(
|
||||
self,
|
||||
*,
|
||||
headers: Dict[str, str],
|
||||
litellm_params: Optional[GenericLiteLLMParams],
|
||||
) -> Dict[str, str]:
|
||||
return {}
|
||||
|
||||
@abstractmethod
|
||||
def get_complete_url(
|
||||
self,
|
||||
*,
|
||||
api_base: Optional[str],
|
||||
vector_store_id: str,
|
||||
litellm_params: Dict[str, Any],
|
||||
) -> str:
|
||||
if api_base is None:
|
||||
raise ValueError("api_base is required")
|
||||
return api_base
|
||||
|
||||
@abstractmethod
|
||||
def transform_create_vector_store_file_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
create_request: VectorStoreFileCreateRequest,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_create_vector_store_file_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileObject:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_list_vector_store_files_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
query_params: VectorStoreFileListQueryParams,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_list_vector_store_files_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileListResponse:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_retrieve_vector_store_file_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_retrieve_vector_store_file_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileObject:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_retrieve_vector_store_file_content_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_retrieve_vector_store_file_content_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_update_vector_store_file_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
update_request: VectorStoreFileUpdateRequest,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_update_vector_store_file_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileObject:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_delete_vector_store_file_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_delete_vector_store_file_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileDeleteResponse:
|
||||
...
|
||||
|
||||
def get_error_class(
|
||||
self,
|
||||
*,
|
||||
error_message: str,
|
||||
status_code: int,
|
||||
headers: Union[Dict[str, Any], httpx.Headers],
|
||||
) -> BaseLLMException:
|
||||
from ..chat.transformation import BaseLLMException
|
||||
|
||||
raise BaseLLMException(
|
||||
status_code=status_code,
|
||||
message=error_message,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def sign_request(
|
||||
self,
|
||||
*,
|
||||
headers: Dict[str, str],
|
||||
optional_params: Dict[str, Any],
|
||||
request_data: Dict[str, Any],
|
||||
api_base: str,
|
||||
api_key: Optional[str] = None,
|
||||
) -> Tuple[Dict[str, str], Optional[bytes]]:
|
||||
return headers, None
|
||||
|
||||
def prepare_chunking_strategy(
|
||||
self,
|
||||
chunking_strategy: Optional[VectorStoreFileChunkingStrategy],
|
||||
) -> Optional[VectorStoreFileChunkingStrategy]:
|
||||
return chunking_strategy
|
||||
@@ -48,6 +48,9 @@ from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfi
|
||||
from litellm.llms.base_llm.search.transformation import BaseSearchConfig, SearchResponse
|
||||
from litellm.llms.base_llm.text_to_speech.transformation import BaseTextToSpeechConfig
|
||||
from litellm.llms.base_llm.vector_store.transformation import BaseVectorStoreConfig
|
||||
from litellm.llms.base_llm.vector_store_files.transformation import (
|
||||
BaseVectorStoreFilesConfig,
|
||||
)
|
||||
from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
@@ -92,6 +95,15 @@ from litellm.types.vector_stores import (
|
||||
VectorStoreSearchOptionalRequestParams,
|
||||
VectorStoreSearchResponse,
|
||||
)
|
||||
from litellm.types.vector_store_files import (
|
||||
VectorStoreFileContentResponse,
|
||||
VectorStoreFileCreateRequest,
|
||||
VectorStoreFileDeleteResponse,
|
||||
VectorStoreFileListQueryParams,
|
||||
VectorStoreFileListResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileUpdateRequest,
|
||||
)
|
||||
from litellm.types.videos.main import VideoObject
|
||||
from litellm.utils import (
|
||||
CustomStreamWrapper,
|
||||
@@ -3529,6 +3541,7 @@ class BaseLLMHTTPHandler:
|
||||
BaseImageEditConfig,
|
||||
BaseImageGenerationConfig,
|
||||
BaseVectorStoreConfig,
|
||||
BaseVectorStoreFilesConfig,
|
||||
BaseGoogleGenAIGenerateContentConfig,
|
||||
BaseAnthropicMessagesConfig,
|
||||
BaseBatchesConfig,
|
||||
@@ -6000,6 +6013,909 @@ class BaseLLMHTTPHandler:
|
||||
response=response,
|
||||
)
|
||||
|
||||
#####################################################################
|
||||
################ Vector Store Files HANDLERS ########################
|
||||
#####################################################################
|
||||
async def async_vector_store_file_create_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
create_request: VectorStoreFileCreateRequest,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
) -> VectorStoreFileObject:
|
||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||
async_httpx_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders(custom_llm_provider),
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
|
||||
)
|
||||
else:
|
||||
async_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
request_dict = dict(create_request)
|
||||
if extra_body:
|
||||
request_dict.update(extra_body)
|
||||
|
||||
(
|
||||
url,
|
||||
request_body,
|
||||
) = vector_store_files_provider_config.transform_create_vector_store_file_request(
|
||||
vector_store_id=vector_store_id,
|
||||
create_request=cast(VectorStoreFileCreateRequest, request_dict),
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_body,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = await async_httpx_client.post(
|
||||
url=url, headers=headers, json=request_body, timeout=timeout
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_create_vector_store_file_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
def vector_store_file_create_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
create_request: VectorStoreFileCreateRequest,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
_is_async: bool = False,
|
||||
) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
|
||||
if _is_async:
|
||||
return self.async_vector_store_file_create_handler(
|
||||
vector_store_id=vector_store_id,
|
||||
create_request=create_request,
|
||||
vector_store_files_provider_config=vector_store_files_provider_config,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_params=litellm_params,
|
||||
logging_obj=logging_obj,
|
||||
extra_headers=extra_headers,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout,
|
||||
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
sync_httpx_client = _get_httpx_client(
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
|
||||
)
|
||||
else:
|
||||
sync_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
request_dict = dict(create_request)
|
||||
if extra_body:
|
||||
request_dict.update(extra_body)
|
||||
|
||||
(
|
||||
url,
|
||||
request_body,
|
||||
) = vector_store_files_provider_config.transform_create_vector_store_file_request(
|
||||
vector_store_id=vector_store_id,
|
||||
create_request=cast(VectorStoreFileCreateRequest, request_dict),
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_body,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = sync_httpx_client.post(
|
||||
url=url, headers=headers, json=request_body, timeout=timeout
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_create_vector_store_file_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
async def async_vector_store_file_list_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
query_params: VectorStoreFileListQueryParams,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_query: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
) -> VectorStoreFileListResponse:
|
||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||
async_httpx_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders(custom_llm_provider),
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
|
||||
)
|
||||
else:
|
||||
async_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
params_dict = dict(query_params)
|
||||
if extra_query:
|
||||
params_dict.update(extra_query)
|
||||
|
||||
(
|
||||
url,
|
||||
request_params,
|
||||
) = vector_store_files_provider_config.transform_list_vector_store_files_request(
|
||||
vector_store_id=vector_store_id,
|
||||
query_params=cast(VectorStoreFileListQueryParams, params_dict),
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_params,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = await async_httpx_client.get(
|
||||
url=url, headers=headers, params=request_params
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_list_vector_store_files_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
def vector_store_file_list_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
query_params: VectorStoreFileListQueryParams,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_query: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
_is_async: bool = False,
|
||||
) -> Union[
|
||||
VectorStoreFileListResponse, Coroutine[Any, Any, VectorStoreFileListResponse]
|
||||
]:
|
||||
if _is_async:
|
||||
return self.async_vector_store_file_list_handler(
|
||||
vector_store_id=vector_store_id,
|
||||
query_params=query_params,
|
||||
vector_store_files_provider_config=vector_store_files_provider_config,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_params=litellm_params,
|
||||
logging_obj=logging_obj,
|
||||
extra_headers=extra_headers,
|
||||
extra_query=extra_query,
|
||||
timeout=timeout,
|
||||
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
sync_httpx_client = _get_httpx_client(
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
|
||||
)
|
||||
else:
|
||||
sync_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
params_dict = dict(query_params)
|
||||
if extra_query:
|
||||
params_dict.update(extra_query)
|
||||
|
||||
(
|
||||
url,
|
||||
request_params,
|
||||
) = vector_store_files_provider_config.transform_list_vector_store_files_request(
|
||||
vector_store_id=vector_store_id,
|
||||
query_params=cast(VectorStoreFileListQueryParams, params_dict),
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_params,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = sync_httpx_client.get(
|
||||
url=url, headers=headers, params=request_params
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_list_vector_store_files_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
async def async_vector_store_file_retrieve_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
) -> VectorStoreFileObject:
|
||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||
async_httpx_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders(custom_llm_provider),
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
|
||||
)
|
||||
else:
|
||||
async_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
url, request_params = (
|
||||
vector_store_files_provider_config.transform_retrieve_vector_store_file_request(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
api_base=api_base,
|
||||
)
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_params,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = await async_httpx_client.get(
|
||||
url=url, headers=headers, params=request_params
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_retrieve_vector_store_file_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
def vector_store_file_retrieve_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
_is_async: bool = False,
|
||||
) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
|
||||
if _is_async:
|
||||
return self.async_vector_store_file_retrieve_handler(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
vector_store_files_provider_config=vector_store_files_provider_config,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_params=litellm_params,
|
||||
logging_obj=logging_obj,
|
||||
extra_headers=extra_headers,
|
||||
timeout=timeout,
|
||||
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
sync_httpx_client = _get_httpx_client(
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
|
||||
)
|
||||
else:
|
||||
sync_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
url, request_params = (
|
||||
vector_store_files_provider_config.transform_retrieve_vector_store_file_request(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
api_base=api_base,
|
||||
)
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_params,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = sync_httpx_client.get(
|
||||
url=url, headers=headers, params=request_params
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_retrieve_vector_store_file_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
async def async_vector_store_file_content_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||
async_httpx_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders(custom_llm_provider),
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
|
||||
)
|
||||
else:
|
||||
async_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
url, request_params = (
|
||||
vector_store_files_provider_config.transform_retrieve_vector_store_file_content_request(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
api_base=api_base,
|
||||
)
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_params,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = await async_httpx_client.get(
|
||||
url=url, headers=headers, params=request_params
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_retrieve_vector_store_file_content_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
def vector_store_file_content_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
_is_async: bool = False,
|
||||
) -> Union[
|
||||
VectorStoreFileContentResponse,
|
||||
Coroutine[Any, Any, VectorStoreFileContentResponse],
|
||||
]:
|
||||
if _is_async:
|
||||
return self.async_vector_store_file_content_handler(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
vector_store_files_provider_config=vector_store_files_provider_config,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_params=litellm_params,
|
||||
logging_obj=logging_obj,
|
||||
extra_headers=extra_headers,
|
||||
timeout=timeout,
|
||||
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
sync_httpx_client = _get_httpx_client(
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
|
||||
)
|
||||
else:
|
||||
sync_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
url, request_params = (
|
||||
vector_store_files_provider_config.transform_retrieve_vector_store_file_content_request(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
api_base=api_base,
|
||||
)
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_params,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = sync_httpx_client.get(
|
||||
url=url, headers=headers, params=request_params
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_retrieve_vector_store_file_content_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
async def async_vector_store_file_update_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
update_request: VectorStoreFileUpdateRequest,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
) -> VectorStoreFileObject:
|
||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||
async_httpx_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders(custom_llm_provider),
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
|
||||
)
|
||||
else:
|
||||
async_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
request_dict = dict(update_request)
|
||||
if extra_body:
|
||||
request_dict.update(extra_body)
|
||||
|
||||
(
|
||||
url,
|
||||
request_body,
|
||||
) = vector_store_files_provider_config.transform_update_vector_store_file_request(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
update_request=cast(VectorStoreFileUpdateRequest, request_dict),
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_body,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = await async_httpx_client.post(
|
||||
url=url, headers=headers, json=request_body, timeout=timeout
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_update_vector_store_file_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
def vector_store_file_update_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
update_request: VectorStoreFileUpdateRequest,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
_is_async: bool = False,
|
||||
) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
|
||||
if _is_async:
|
||||
return self.async_vector_store_file_update_handler(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
update_request=update_request,
|
||||
vector_store_files_provider_config=vector_store_files_provider_config,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_params=litellm_params,
|
||||
logging_obj=logging_obj,
|
||||
extra_headers=extra_headers,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout,
|
||||
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
sync_httpx_client = _get_httpx_client(
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
|
||||
)
|
||||
else:
|
||||
sync_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
request_dict = dict(update_request)
|
||||
if extra_body:
|
||||
request_dict.update(extra_body)
|
||||
|
||||
(
|
||||
url,
|
||||
request_body,
|
||||
) = vector_store_files_provider_config.transform_update_vector_store_file_request(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
update_request=cast(VectorStoreFileUpdateRequest, request_dict),
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_body,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = sync_httpx_client.post(
|
||||
url=url, headers=headers, json=request_body, timeout=timeout
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_update_vector_store_file_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
async def async_vector_store_file_delete_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
) -> VectorStoreFileDeleteResponse:
|
||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||
async_httpx_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders(custom_llm_provider),
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
|
||||
)
|
||||
else:
|
||||
async_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
url, request_params = (
|
||||
vector_store_files_provider_config.transform_delete_vector_store_file_request(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
api_base=api_base,
|
||||
)
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_params,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = await async_httpx_client.delete(
|
||||
url=url, headers=headers, params=request_params, timeout=timeout
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_delete_vector_store_file_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
def vector_store_file_delete_handler(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
vector_store_files_provider_config: BaseVectorStoreFilesConfig,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
_is_async: bool = False,
|
||||
) -> Union[
|
||||
VectorStoreFileDeleteResponse,
|
||||
Coroutine[Any, Any, VectorStoreFileDeleteResponse],
|
||||
]:
|
||||
if _is_async:
|
||||
return self.async_vector_store_file_delete_handler(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
vector_store_files_provider_config=vector_store_files_provider_config,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_params=litellm_params,
|
||||
logging_obj=logging_obj,
|
||||
extra_headers=extra_headers,
|
||||
timeout=timeout,
|
||||
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
sync_httpx_client = _get_httpx_client(
|
||||
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
|
||||
)
|
||||
else:
|
||||
sync_httpx_client = client
|
||||
|
||||
headers = vector_store_files_provider_config.validate_environment(
|
||||
headers=extra_headers or {}, litellm_params=litellm_params
|
||||
)
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
api_base = vector_store_files_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
vector_store_id=vector_store_id,
|
||||
litellm_params=dict(litellm_params),
|
||||
)
|
||||
|
||||
url, request_params = (
|
||||
vector_store_files_provider_config.transform_delete_vector_store_file_request(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
api_base=api_base,
|
||||
)
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input="",
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_params,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = sync_httpx_client.delete(
|
||||
url=url, headers=headers, params=request_params, timeout=timeout
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e, provider_config=vector_store_files_provider_config
|
||||
)
|
||||
|
||||
return vector_store_files_provider_config.transform_delete_vector_store_file_response(
|
||||
response=response
|
||||
)
|
||||
|
||||
#####################################################################
|
||||
################ Google GenAI GENERATE CONTENT HANDLER ###########################
|
||||
#####################################################################
|
||||
|
||||
@@ -2,6 +2,7 @@ from .cost_calculator import cost_calculator
|
||||
from .image_generation import (
|
||||
FalAIBaseConfig,
|
||||
FalAIBriaConfig,
|
||||
FalAIFluxProV11Config,
|
||||
FalAIFluxProV11UltraConfig,
|
||||
FalAIFluxSchnellConfig,
|
||||
FalAIImageGenerationConfig,
|
||||
@@ -18,6 +19,7 @@ __all__ = [
|
||||
"FalAIImagen4Config",
|
||||
"FalAIRecraftV3Config",
|
||||
"FalAIBriaConfig",
|
||||
"FalAIFluxProV11Config",
|
||||
"FalAIFluxProV11UltraConfig",
|
||||
"FalAIFluxSchnellConfig",
|
||||
"FalAIStableDiffusionConfig",
|
||||
|
||||
@@ -3,12 +3,18 @@ from litellm.llms.base_llm.image_generation.transformation import (
|
||||
)
|
||||
|
||||
from .bria_transformation import FalAIBriaConfig
|
||||
from .flux_pro_v11_transformation import FalAIFluxProV11Config
|
||||
from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
|
||||
from .flux_schnell_transformation import FalAIFluxSchnellConfig
|
||||
from .imagen4_transformation import FalAIImagen4Config
|
||||
from .recraft_v3_transformation import FalAIRecraftV3Config
|
||||
from .ideogram_v3_transformation import FalAIIdeogramV3Config
|
||||
from .stable_diffusion_transformation import FalAIStableDiffusionConfig
|
||||
from .transformation import FalAIBaseConfig, FalAIImageGenerationConfig
|
||||
from .bytedance_transformation import (
|
||||
FalAIBytedanceSeedreamV3Config,
|
||||
FalAIBytedanceDreaminaV31Config,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"FalAIBaseConfig",
|
||||
@@ -16,9 +22,13 @@ __all__ = [
|
||||
"FalAIImagen4Config",
|
||||
"FalAIRecraftV3Config",
|
||||
"FalAIBriaConfig",
|
||||
"FalAIFluxProV11Config",
|
||||
"FalAIFluxProV11UltraConfig",
|
||||
"FalAIFluxSchnellConfig",
|
||||
"FalAIStableDiffusionConfig",
|
||||
"FalAIBytedanceSeedreamV3Config",
|
||||
"FalAIBytedanceDreaminaV31Config",
|
||||
"FalAIIdeogramV3Config",
|
||||
]
|
||||
|
||||
|
||||
@@ -41,10 +51,18 @@ def get_fal_ai_image_generation_config(model: str) -> BaseImageGenerationConfig:
|
||||
return FalAIRecraftV3Config()
|
||||
elif "bria" in model_lower:
|
||||
return FalAIBriaConfig()
|
||||
elif "flux-pro" in model_lower and "ultra" in model_lower:
|
||||
return FalAIFluxProV11UltraConfig()
|
||||
elif "flux-pro" in model_lower:
|
||||
if "ultra" in model_lower:
|
||||
return FalAIFluxProV11UltraConfig()
|
||||
return FalAIFluxProV11Config()
|
||||
elif "flux/schnell" in model_lower or "flux-schnell" in model_lower or "schnell" in model_lower:
|
||||
return FalAIFluxSchnellConfig()
|
||||
elif "bytedance/seedream" in model_lower:
|
||||
return FalAIBytedanceSeedreamV3Config()
|
||||
elif "bytedance/dreamina" in model_lower:
|
||||
return FalAIBytedanceDreaminaV31Config()
|
||||
elif "ideogram" in model_lower:
|
||||
return FalAIIdeogramV3Config()
|
||||
elif "stable-diffusion" in model_lower:
|
||||
return FalAIStableDiffusionConfig()
|
||||
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
from typing import Any
|
||||
|
||||
from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
|
||||
|
||||
|
||||
class FalAIBytedanceBaseConfig(FalAIFluxProV11UltraConfig):
|
||||
"""
|
||||
Shared configuration for Fal AI ByteDance text-to-image models that follow
|
||||
the Flux Schnell style parameter mapping.
|
||||
|
||||
These models accept the OpenAI-compatible `size` parameter in LiteLLM
|
||||
requests but expect `image_size` enums or custom size objects on Fal AI.
|
||||
"""
|
||||
|
||||
_OPENAI_SIZE_TO_IMAGE_SIZE = {
|
||||
"1024x1024": "square_hd",
|
||||
"512x512": "square",
|
||||
"1792x1024": "landscape_16_9",
|
||||
"1024x1792": "portrait_16_9",
|
||||
"1024x768": "landscape_4_3",
|
||||
"768x1024": "portrait_4_3",
|
||||
}
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
supported_params = self.get_supported_openai_params(model)
|
||||
|
||||
param_mapping = {
|
||||
"n": "num_images",
|
||||
"response_format": "output_format",
|
||||
"size": "image_size",
|
||||
}
|
||||
|
||||
for k in non_default_params.keys():
|
||||
if k not in optional_params.keys():
|
||||
if k in supported_params:
|
||||
mapped_key = param_mapping.get(k, k)
|
||||
mapped_value = non_default_params[k]
|
||||
|
||||
if k == "response_format":
|
||||
if mapped_value in ["b64_json", "url"]:
|
||||
mapped_value = "jpeg"
|
||||
elif k == "size":
|
||||
mapped_value = self._map_image_size(mapped_value)
|
||||
|
||||
optional_params[mapped_key] = mapped_value
|
||||
elif drop_params:
|
||||
continue
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Parameter {k} is not supported for model {model}. "
|
||||
f"Supported parameters are {supported_params}. "
|
||||
"Set drop_params=True to drop unsupported parameters."
|
||||
)
|
||||
|
||||
return optional_params
|
||||
|
||||
def _map_image_size(self, size: Any) -> Any:
|
||||
if isinstance(size, dict):
|
||||
return size
|
||||
|
||||
if not isinstance(size, str):
|
||||
return size
|
||||
|
||||
if size in self._OPENAI_SIZE_TO_IMAGE_SIZE:
|
||||
return self._OPENAI_SIZE_TO_IMAGE_SIZE[size]
|
||||
|
||||
if "x" in size:
|
||||
try:
|
||||
width_str, height_str = size.split("x")
|
||||
width = int(width_str)
|
||||
height = int(height_str)
|
||||
return {"width": width, "height": height}
|
||||
except (ValueError, AttributeError, ZeroDivisionError):
|
||||
pass
|
||||
|
||||
return "landscape_4_3"
|
||||
|
||||
|
||||
class FalAIBytedanceSeedreamV3Config(FalAIBytedanceBaseConfig):
|
||||
"""
|
||||
Configuration for Fal AI ByteDance Seedream v3 text-to-image model.
|
||||
|
||||
Model endpoint: fal-ai/bytedance/seedream/v3/text-to-image
|
||||
Documentation: https://fal.ai/models/fal-ai/bytedance/seedream/v3/text-to-image
|
||||
"""
|
||||
|
||||
IMAGE_GENERATION_ENDPOINT: str = "fal-ai/bytedance/seedream/v3/text-to-image"
|
||||
|
||||
|
||||
class FalAIBytedanceDreaminaV31Config(FalAIBytedanceBaseConfig):
|
||||
"""
|
||||
Configuration for Fal AI ByteDance Dreamina v3.1 text-to-image model.
|
||||
|
||||
Model endpoint: fal-ai/bytedance/dreamina/v3.1/text-to-image
|
||||
Documentation: https://fal.ai/models/fal-ai/bytedance/dreamina/v3.1/text-to-image
|
||||
"""
|
||||
|
||||
IMAGE_GENERATION_ENDPOINT: str = "fal-ai/bytedance/dreamina/v3.1/text-to-image"
|
||||
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
from typing import Any
|
||||
|
||||
from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
|
||||
|
||||
|
||||
class FalAIFluxProV11Config(FalAIFluxProV11UltraConfig):
|
||||
"""
|
||||
Configuration for Fal AI Flux Pro v1.1 model.
|
||||
|
||||
FLUX Pro v1.1 leverages the same overall request/response structure as the
|
||||
Ultra variant but expects the `image_size` parameter instead of
|
||||
`aspect_ratio`.
|
||||
|
||||
Model endpoint: fal-ai/flux-pro/v1.1
|
||||
Documentation: https://fal.ai/models/fal-ai/flux-pro/v1.1
|
||||
"""
|
||||
|
||||
IMAGE_GENERATION_ENDPOINT: str = "fal-ai/flux-pro/v1.1"
|
||||
|
||||
_OPENAI_SIZE_TO_IMAGE_SIZE = {
|
||||
"1024x1024": "square_hd",
|
||||
"512x512": "square",
|
||||
"1792x1024": "landscape_16_9",
|
||||
"1024x1792": "portrait_16_9",
|
||||
"1024x768": "landscape_4_3",
|
||||
"768x1024": "portrait_4_3",
|
||||
}
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
"""
|
||||
Override size handling to map to Flux Pro v1.1 image_size enums/object.
|
||||
"""
|
||||
supported_params = self.get_supported_openai_params(model)
|
||||
|
||||
param_mapping = {
|
||||
"n": "num_images",
|
||||
"response_format": "output_format",
|
||||
"size": "image_size",
|
||||
}
|
||||
|
||||
for k in non_default_params.keys():
|
||||
if k not in optional_params.keys():
|
||||
if k in supported_params:
|
||||
mapped_key = param_mapping.get(k, k)
|
||||
mapped_value = non_default_params[k]
|
||||
|
||||
if k == "response_format":
|
||||
if mapped_value in ["b64_json", "url"]:
|
||||
mapped_value = "jpeg"
|
||||
elif k == "size":
|
||||
mapped_value = self._map_image_size(mapped_value)
|
||||
|
||||
optional_params[mapped_key] = mapped_value
|
||||
elif drop_params:
|
||||
continue
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Parameter {k} is not supported for model {model}. "
|
||||
f"Supported parameters are {supported_params}. "
|
||||
"Set drop_params=True to drop unsupported parameters."
|
||||
)
|
||||
|
||||
return optional_params
|
||||
|
||||
def _map_image_size(self, size: Any) -> Any:
|
||||
if isinstance(size, dict):
|
||||
return size
|
||||
if not isinstance(size, str):
|
||||
return size
|
||||
|
||||
if size in self._OPENAI_SIZE_TO_IMAGE_SIZE:
|
||||
return self._OPENAI_SIZE_TO_IMAGE_SIZE[size]
|
||||
|
||||
if "x" in size:
|
||||
try:
|
||||
width_str, height_str = size.split("x")
|
||||
width = int(width_str)
|
||||
height = int(height_str)
|
||||
return {"width": width, "height": height}
|
||||
except (ValueError, AttributeError, ZeroDivisionError):
|
||||
pass
|
||||
|
||||
return "landscape_4_3"
|
||||
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
from typing import TYPE_CHECKING, Any, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm.types.llms.openai import OpenAIImageGenerationOptionalParams
|
||||
from litellm.types.utils import ImageObject, ImageResponse
|
||||
|
||||
from .transformation import FalAIBaseConfig
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
||||
|
||||
LiteLLMLoggingObj = _LiteLLMLoggingObj
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
|
||||
class FalAIIdeogramV3Config(FalAIBaseConfig):
|
||||
"""
|
||||
Configuration for fal-ai/ideogram/v3 image generation.
|
||||
|
||||
The Ideogram v3 endpoint exposes multiple generation modes (text-to-image,
|
||||
remixing, reframing, background replacement, character workflows, etc.).
|
||||
LiteLLM focuses on the text-to-image interface to maintain OpenAI parity.
|
||||
|
||||
Model endpoint: fal-ai/ideogram/v3
|
||||
Documentation: https://fal.ai/models/fal-ai/ideogram/v3
|
||||
"""
|
||||
|
||||
IMAGE_GENERATION_ENDPOINT: str = "fal-ai/ideogram/v3"
|
||||
|
||||
_OPENAI_SIZE_TO_IMAGE_SIZE = {
|
||||
"1024x1024": "square_hd",
|
||||
"512x512": "square",
|
||||
"1024x768": "landscape_4_3",
|
||||
"768x1024": "portrait_4_3",
|
||||
"1536x1024": "landscape_16_9",
|
||||
"1024x1536": "portrait_16_9",
|
||||
}
|
||||
|
||||
def get_supported_openai_params(
|
||||
self, model: str
|
||||
) -> List[OpenAIImageGenerationOptionalParams]:
|
||||
"""
|
||||
Ideogram v3 accepts the core OpenAI image parameters.
|
||||
"""
|
||||
|
||||
return [
|
||||
"n",
|
||||
"response_format",
|
||||
"size",
|
||||
]
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
"""
|
||||
Map OpenAI-style parameters onto Ideogram's request schema.
|
||||
"""
|
||||
|
||||
supported_params = self.get_supported_openai_params(model)
|
||||
|
||||
for k in non_default_params.keys():
|
||||
if k in optional_params:
|
||||
continue
|
||||
|
||||
if k not in supported_params:
|
||||
if drop_params:
|
||||
continue
|
||||
raise ValueError(
|
||||
f"Parameter {k} is not supported for model {model}. "
|
||||
f"Supported parameters are {supported_params}. "
|
||||
"Set drop_params=True to drop unsupported parameters."
|
||||
)
|
||||
|
||||
value = non_default_params[k]
|
||||
|
||||
if k == "n":
|
||||
optional_params["num_images"] = value
|
||||
elif k == "size":
|
||||
optional_params["image_size"] = self._map_image_size(value)
|
||||
elif k == "response_format":
|
||||
# Ideogram always returns URLs; nothing to map but don't error.
|
||||
continue
|
||||
|
||||
return optional_params
|
||||
|
||||
def _map_image_size(self, size: Any) -> Any:
|
||||
if isinstance(size, dict):
|
||||
width = size.get("width")
|
||||
height = size.get("height")
|
||||
if isinstance(width, int) and isinstance(height, int):
|
||||
return {"width": width, "height": height}
|
||||
return size
|
||||
|
||||
if not isinstance(size, str):
|
||||
return size
|
||||
|
||||
normalized = size.strip()
|
||||
if normalized in self._OPENAI_SIZE_TO_IMAGE_SIZE:
|
||||
return self._OPENAI_SIZE_TO_IMAGE_SIZE[normalized]
|
||||
|
||||
if "x" in normalized:
|
||||
try:
|
||||
width_str, height_str = normalized.split("x")
|
||||
width = int(width_str)
|
||||
height = int(height_str)
|
||||
return {"width": width, "height": height}
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
# Fallback to a safe default that Ideogram accepts.
|
||||
return "square_hd"
|
||||
|
||||
def transform_image_generation_request(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
headers: dict,
|
||||
) -> dict:
|
||||
"""
|
||||
Construct the request payload for Ideogram v3.
|
||||
|
||||
Required:
|
||||
- prompt: text prompt describing the scene.
|
||||
|
||||
Optional (subset):
|
||||
- rendering_speed, style_preset, style, style_codes, color_palette,
|
||||
image_urls, style_reference_images, expand_prompt, seed,
|
||||
negative_prompt, image_size, etc.
|
||||
"""
|
||||
|
||||
return {
|
||||
"prompt": prompt,
|
||||
**optional_params,
|
||||
}
|
||||
|
||||
def transform_image_generation_response(
|
||||
self,
|
||||
model: str,
|
||||
raw_response: httpx.Response,
|
||||
model_response: ImageResponse,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
request_data: dict,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
encoding: Any,
|
||||
api_key: Optional[str] = None,
|
||||
json_mode: Optional[bool] = None,
|
||||
) -> ImageResponse:
|
||||
"""
|
||||
Parse Ideogram v3 responses which contain a list of File objects.
|
||||
"""
|
||||
|
||||
try:
|
||||
response_data = raw_response.json()
|
||||
except Exception as e:
|
||||
raise self.get_error_class(
|
||||
error_message=f"Error transforming image generation response: {e}",
|
||||
status_code=raw_response.status_code,
|
||||
headers=raw_response.headers,
|
||||
)
|
||||
|
||||
if not model_response.data:
|
||||
model_response.data = []
|
||||
|
||||
images = response_data.get("images", [])
|
||||
if isinstance(images, list):
|
||||
for image_entry in images:
|
||||
if isinstance(image_entry, dict):
|
||||
url = image_entry.get("url")
|
||||
else:
|
||||
url = image_entry
|
||||
|
||||
model_response.data.append(
|
||||
ImageObject(
|
||||
url=url,
|
||||
b64_json=None,
|
||||
)
|
||||
)
|
||||
|
||||
if hasattr(model_response, "_hidden_params") and "seed" in response_data:
|
||||
model_response._hidden_params["seed"] = response_data["seed"]
|
||||
|
||||
return model_response
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ class OpenAIGPT5Config(OpenAIGPTConfig):
|
||||
from litellm.utils import supports_tool_choice
|
||||
|
||||
base_gpt_series_params = super().get_supported_openai_params(model=model)
|
||||
gpt_5_only_params = ["reasoning_effort"]
|
||||
gpt_5_only_params = ["reasoning_effort", "verbosity"]
|
||||
base_gpt_series_params.extend(gpt_5_only_params)
|
||||
if not supports_tool_choice(model=model):
|
||||
base_gpt_series_params.remove("tool_choice")
|
||||
|
||||
@@ -0,0 +1,258 @@
|
||||
from typing import Any, Dict, Optional, Tuple, cast
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.llms.base_llm.vector_store_files.transformation import (
|
||||
BaseVectorStoreFilesConfig,
|
||||
)
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.vector_store_files import (
|
||||
VectorStoreFileAuthCredentials,
|
||||
VectorStoreFileContentResponse,
|
||||
VectorStoreFileCreateRequest,
|
||||
VectorStoreFileDeleteResponse,
|
||||
VectorStoreFileListQueryParams,
|
||||
VectorStoreFileListResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileUpdateRequest,
|
||||
)
|
||||
from litellm.utils import add_openai_metadata
|
||||
|
||||
|
||||
def _clean_dict(source: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {k: v for k, v in source.items() if v is not None}
|
||||
|
||||
|
||||
class OpenAIVectorStoreFilesConfig(BaseVectorStoreFilesConfig):
|
||||
ASSISTANTS_HEADER_KEY = "OpenAI-Beta"
|
||||
ASSISTANTS_HEADER_VALUE = "assistants=v2"
|
||||
|
||||
def get_auth_credentials(
|
||||
self, litellm_params: Dict[str, Any]
|
||||
) -> VectorStoreFileAuthCredentials:
|
||||
api_key = litellm_params.get("api_key")
|
||||
if api_key is None:
|
||||
raise ValueError("api_key is required")
|
||||
return {
|
||||
"headers": {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
}
|
||||
|
||||
def get_vector_store_file_endpoints_by_type(self) -> Dict[
|
||||
str, Tuple[Tuple[str, str], ...]
|
||||
]:
|
||||
return {
|
||||
"read": (
|
||||
("GET", "/vector_stores/{vector_store_id}/files"),
|
||||
("GET", "/vector_stores/{vector_store_id}/files/{file_id}"),
|
||||
(
|
||||
"GET",
|
||||
"/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||
),
|
||||
),
|
||||
"write": (
|
||||
("POST", "/vector_stores/{vector_store_id}/files"),
|
||||
("POST", "/vector_stores/{vector_store_id}/files/{file_id}"),
|
||||
("DELETE", "/vector_stores/{vector_store_id}/files/{file_id}"),
|
||||
),
|
||||
}
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
*,
|
||||
headers: Dict[str, str],
|
||||
litellm_params: Optional[GenericLiteLLMParams],
|
||||
) -> Dict[str, str]:
|
||||
litellm_params = litellm_params or GenericLiteLLMParams()
|
||||
api_key = (
|
||||
litellm_params.api_key
|
||||
or litellm.api_key
|
||||
or litellm.openai_key
|
||||
or get_secret_str("OPENAI_API_KEY")
|
||||
)
|
||||
headers.update(
|
||||
{
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
)
|
||||
if self.ASSISTANTS_HEADER_KEY not in headers:
|
||||
headers[self.ASSISTANTS_HEADER_KEY] = self.ASSISTANTS_HEADER_VALUE
|
||||
return headers
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
*,
|
||||
api_base: Optional[str],
|
||||
vector_store_id: str,
|
||||
litellm_params: Dict[str, Any],
|
||||
) -> str:
|
||||
base_url = (
|
||||
api_base
|
||||
or litellm.api_base
|
||||
or get_secret_str("OPENAI_BASE_URL")
|
||||
or get_secret_str("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
base_url = base_url.rstrip("/")
|
||||
return f"{base_url}/vector_stores/{vector_store_id}/files"
|
||||
|
||||
def transform_create_vector_store_file_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
create_request: VectorStoreFileCreateRequest,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
payload: Dict[str, Any] = _clean_dict(dict(create_request))
|
||||
attributes = payload.get("attributes")
|
||||
if isinstance(attributes, dict):
|
||||
filtered_attributes = add_openai_metadata(attributes)
|
||||
if filtered_attributes is not None:
|
||||
payload["attributes"] = filtered_attributes
|
||||
else:
|
||||
payload.pop("attributes", None)
|
||||
url = api_base
|
||||
return url, payload
|
||||
|
||||
def transform_create_vector_store_file_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileObject:
|
||||
try:
|
||||
return cast(VectorStoreFileObject, response.json())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise self.get_error_class(
|
||||
error_message=str(exc),
|
||||
status_code=response.status_code,
|
||||
headers=response.headers,
|
||||
)
|
||||
|
||||
def transform_list_vector_store_files_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
query_params: VectorStoreFileListQueryParams,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
params = _clean_dict(dict(query_params))
|
||||
return api_base, params
|
||||
|
||||
def transform_list_vector_store_files_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileListResponse:
|
||||
try:
|
||||
return cast(VectorStoreFileListResponse, response.json())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise self.get_error_class(
|
||||
error_message=str(exc),
|
||||
status_code=response.status_code,
|
||||
headers=response.headers,
|
||||
)
|
||||
|
||||
def transform_retrieve_vector_store_file_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
return f"{api_base}/{file_id}", {}
|
||||
|
||||
def transform_retrieve_vector_store_file_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileObject:
|
||||
try:
|
||||
return cast(VectorStoreFileObject, response.json())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise self.get_error_class(
|
||||
error_message=str(exc),
|
||||
status_code=response.status_code,
|
||||
headers=response.headers,
|
||||
)
|
||||
|
||||
def transform_retrieve_vector_store_file_content_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
return f"{api_base}/{file_id}/content", {}
|
||||
|
||||
def transform_retrieve_vector_store_file_content_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
try:
|
||||
return cast(VectorStoreFileContentResponse, response.json())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise self.get_error_class(
|
||||
error_message=str(exc),
|
||||
status_code=response.status_code,
|
||||
headers=response.headers,
|
||||
)
|
||||
|
||||
def transform_update_vector_store_file_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
update_request: VectorStoreFileUpdateRequest,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
payload: Dict[str, Any] = dict(update_request)
|
||||
attributes = payload.get("attributes")
|
||||
if isinstance(attributes, dict):
|
||||
filtered_attributes = add_openai_metadata(attributes)
|
||||
if filtered_attributes is not None:
|
||||
payload["attributes"] = filtered_attributes
|
||||
else:
|
||||
payload.pop("attributes", None)
|
||||
return f"{api_base}/{file_id}", payload
|
||||
|
||||
def transform_update_vector_store_file_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileObject:
|
||||
try:
|
||||
return cast(VectorStoreFileObject, response.json())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise self.get_error_class(
|
||||
error_message=str(exc),
|
||||
status_code=response.status_code,
|
||||
headers=response.headers,
|
||||
)
|
||||
|
||||
def transform_delete_vector_store_file_request(
|
||||
self,
|
||||
*,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
api_base: str,
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
return f"{api_base}/{file_id}", {}
|
||||
|
||||
def transform_delete_vector_store_file_response(
|
||||
self,
|
||||
*,
|
||||
response: httpx.Response,
|
||||
) -> VectorStoreFileDeleteResponse:
|
||||
try:
|
||||
return cast(VectorStoreFileDeleteResponse, response.json())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise self.get_error_class(
|
||||
error_message=str(exc),
|
||||
status_code=response.status_code,
|
||||
headers=response.headers,
|
||||
)
|
||||
@@ -145,6 +145,8 @@ class OpenAIVectorStoreConfig(BaseVectorStoreConfig):
|
||||
) -> Tuple[str, Dict]:
|
||||
url = api_base # Base URL for creating vector stores
|
||||
metadata = vector_store_create_optional_params.get("metadata", None)
|
||||
metadata_payload = add_openai_metadata(metadata)
|
||||
|
||||
typed_request_body = VectorStoreCreateRequest(
|
||||
name=vector_store_create_optional_params.get("name", None),
|
||||
file_ids=vector_store_create_optional_params.get("file_ids", None),
|
||||
@@ -154,7 +156,7 @@ class OpenAIVectorStoreConfig(BaseVectorStoreConfig):
|
||||
chunking_strategy=vector_store_create_optional_params.get(
|
||||
"chunking_strategy", None
|
||||
),
|
||||
metadata=add_openai_metadata(metadata) if metadata is not None else None,
|
||||
metadata=metadata_payload,
|
||||
)
|
||||
|
||||
dict_request_body = cast(dict, typed_request_body)
|
||||
|
||||
@@ -178,10 +178,10 @@ class OpenAIVideoConfig(BaseVideoConfig):
|
||||
# Construct the URL for video content download
|
||||
url = f"{api_base.rstrip('/')}/{original_video_id}/content"
|
||||
|
||||
# Add video_id as query parameter
|
||||
params = {"video_id": original_video_id}
|
||||
|
||||
return url, params
|
||||
# No additional data needed for GET content request
|
||||
data: Dict[str, Any] = {}
|
||||
|
||||
return url, data
|
||||
|
||||
def transform_video_remix_request(
|
||||
self,
|
||||
@@ -404,4 +404,4 @@ class OpenAIVideoConfig(BaseVideoConfig):
|
||||
if isinstance(image, BufferedReader):
|
||||
files_list.append((field_name, (image.name, image, image_content_type)))
|
||||
else:
|
||||
files_list.append((field_name, ("input_reference.png", image, image_content_type)))
|
||||
files_list.append((field_name, ("input_reference.png", image, image_content_type)))
|
||||
|
||||
+6
-4
@@ -390,6 +390,7 @@ async def acompletion(
|
||||
reasoning_effort: Optional[
|
||||
Literal["none", "minimal", "low", "medium", "high", "default"]
|
||||
] = None,
|
||||
verbosity: Optional[Literal["low", "medium", "high"]] = None,
|
||||
safety_identifier: Optional[str] = None,
|
||||
service_tier: Optional[str] = None,
|
||||
# set api_base, api_version, api_key
|
||||
@@ -961,6 +962,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||
reasoning_effort: Optional[
|
||||
Literal["none", "minimal", "low", "medium", "high", "default"]
|
||||
] = None,
|
||||
verbosity: Optional[Literal["low", "medium", "high"]] = None,
|
||||
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
||||
seed: Optional[int] = None,
|
||||
tools: Optional[List] = None,
|
||||
@@ -2084,10 +2086,10 @@ def completion( # type: ignore # noqa: PLR0915
|
||||
if extra_headers is not None:
|
||||
optional_params["extra_headers"] = extra_headers
|
||||
|
||||
if (
|
||||
litellm.enable_preview_features and metadata is not None
|
||||
): # [PREVIEW] allow metadata to be passed to OPENAI
|
||||
optional_params["metadata"] = add_openai_metadata(metadata)
|
||||
if litellm.enable_preview_features:
|
||||
metadata_payload = add_openai_metadata(metadata)
|
||||
if metadata_payload is not None:
|
||||
optional_params["metadata"] = metadata_payload
|
||||
|
||||
## LOAD CONFIG - if set
|
||||
config = litellm.OpenAIConfig.get_config()
|
||||
|
||||
@@ -8515,10 +8515,18 @@
|
||||
"/v1/images/generations"
|
||||
]
|
||||
},
|
||||
"fal_ai/fal-ai/flux-pro/v1.1": {
|
||||
"litellm_provider": "fal_ai",
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.04,
|
||||
"supported_endpoints": [
|
||||
"/v1/images/generations"
|
||||
]
|
||||
},
|
||||
"fal_ai/fal-ai/flux-pro/v1.1-ultra": {
|
||||
"litellm_provider": "fal_ai",
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.0398,
|
||||
"output_cost_per_image": 0.06,
|
||||
"supported_endpoints": [
|
||||
"/v1/images/generations"
|
||||
]
|
||||
@@ -8531,6 +8539,30 @@
|
||||
"/v1/images/generations"
|
||||
]
|
||||
},
|
||||
"fal_ai/fal-ai/bytedance/seedream/v3/text-to-image": {
|
||||
"litellm_provider": "fal_ai",
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.03,
|
||||
"supported_endpoints": [
|
||||
"/v1/images/generations"
|
||||
]
|
||||
},
|
||||
"fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image": {
|
||||
"litellm_provider": "fal_ai",
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.03,
|
||||
"supported_endpoints": [
|
||||
"/v1/images/generations"
|
||||
]
|
||||
},
|
||||
"fal_ai/fal-ai/ideogram/v3": {
|
||||
"litellm_provider": "fal_ai",
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.06,
|
||||
"supported_endpoints": [
|
||||
"/v1/images/generations"
|
||||
]
|
||||
},
|
||||
"fal_ai/fal-ai/imagen4/preview": {
|
||||
"litellm_provider": "fal_ai",
|
||||
"mode": "image_generation",
|
||||
|
||||
@@ -10,9 +10,12 @@ import asyncio
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Set, Union, cast
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import HTTPException
|
||||
from httpx import HTTPStatusError
|
||||
from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
|
||||
from mcp.types import CallToolResult
|
||||
from mcp.types import Tool as MCPTool
|
||||
@@ -20,6 +23,7 @@ from mcp.types import Tool as MCPTool
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.exceptions import BlockedPiiEntityError, GuardrailRaisedException
|
||||
from litellm.experimental_mcp_client.client import MCPClient
|
||||
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
||||
from litellm.proxy._experimental.mcp_server.auth.user_api_key_auth_mcp import (
|
||||
MCPRequestHandler,
|
||||
)
|
||||
@@ -38,12 +42,15 @@ from litellm.proxy._types import (
|
||||
MCPTransportType,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.common_utils.encrypt_decrypt_utils import (
|
||||
decrypt_value_helper,
|
||||
)
|
||||
from litellm.proxy.common_utils.encrypt_decrypt_utils import decrypt_value_helper
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
from litellm.types.llms.custom_http import httpxSpecialProvider
|
||||
from litellm.types.mcp import MCPAuth, MCPStdioConfig
|
||||
from litellm.types.mcp_server.mcp_server_manager import MCPInfo, MCPServer
|
||||
from litellm.types.mcp_server.mcp_server_manager import (
|
||||
MCPInfo,
|
||||
MCPOAuthMetadata,
|
||||
MCPServer,
|
||||
)
|
||||
|
||||
|
||||
def _deserialize_json_dict(data: Any) -> Optional[Dict[str, str]]:
|
||||
@@ -100,7 +107,7 @@ class MCPServerManager:
|
||||
"""
|
||||
return self.config_mcp_servers | self.registry
|
||||
|
||||
def load_servers_from_config(
|
||||
async def load_servers_from_config(
|
||||
self,
|
||||
mcp_servers_config: Dict[str, Any],
|
||||
mcp_aliases: Optional[Dict[str, str]] = None,
|
||||
@@ -180,35 +187,57 @@ class MCPServerManager:
|
||||
)()
|
||||
name_for_prefix = get_server_prefix(temp_server)
|
||||
|
||||
server_url = server_config.get("url", None) or ""
|
||||
# Generate stable server ID based on parameters
|
||||
server_id = self._generate_stable_server_id(
|
||||
server_name=server_name,
|
||||
url=server_config.get("url", None) or "",
|
||||
url=server_url,
|
||||
transport=server_config.get("transport", MCPTransport.http),
|
||||
auth_type=server_config.get("auth_type", None),
|
||||
alias=alias,
|
||||
)
|
||||
|
||||
auth_type = server_config.get("auth_type", None)
|
||||
if server_url and auth_type is not None and auth_type == MCPAuth.oauth2:
|
||||
mcp_oauth_metadata = await self._descovery_metadata(
|
||||
server_url=server_url,
|
||||
)
|
||||
else:
|
||||
mcp_oauth_metadata = None
|
||||
|
||||
resolved_scopes = server_config.get("scopes") or (
|
||||
mcp_oauth_metadata.scopes if mcp_oauth_metadata else None
|
||||
)
|
||||
resolved_authorization_url = server_config.get("authorization_url") or (
|
||||
mcp_oauth_metadata.authorization_url if mcp_oauth_metadata else None
|
||||
)
|
||||
resolved_token_url = server_config.get("token_url") or (
|
||||
mcp_oauth_metadata.token_url if mcp_oauth_metadata else None
|
||||
)
|
||||
resolved_registration_url = server_config.get("registration_url") or (
|
||||
mcp_oauth_metadata.registration_url if mcp_oauth_metadata else None
|
||||
)
|
||||
|
||||
new_server = MCPServer(
|
||||
server_id=server_id,
|
||||
name=name_for_prefix,
|
||||
alias=alias,
|
||||
server_name=server_name,
|
||||
spec_path=server_config.get("spec_path", None),
|
||||
url=server_config.get("url", None) or "",
|
||||
url=server_url,
|
||||
command=server_config.get("command", None) or "",
|
||||
args=server_config.get("args", None) or [],
|
||||
env=server_config.get("env", None) or {},
|
||||
# oauth specific fields
|
||||
client_id=server_config.get("client_id", None),
|
||||
client_secret=server_config.get("client_secret", None),
|
||||
scopes=server_config.get("scopes", None),
|
||||
authorization_url=server_config.get("authorization_url", None),
|
||||
token_url=server_config.get("token_url", None),
|
||||
registration_url=server_config.get("registration_url", None),
|
||||
scopes=resolved_scopes,
|
||||
authorization_url=resolved_authorization_url,
|
||||
token_url=resolved_token_url,
|
||||
registration_url=resolved_registration_url,
|
||||
# TODO: utility fn the default values
|
||||
transport=server_config.get("transport", MCPTransport.http),
|
||||
auth_type=server_config.get("auth_type", None),
|
||||
auth_type=auth_type,
|
||||
authentication_token=server_config.get(
|
||||
"authentication_token", server_config.get("auth_value", None)
|
||||
),
|
||||
@@ -356,12 +385,12 @@ class MCPServerManager:
|
||||
)
|
||||
|
||||
# Update tool name to server name mapping (for both prefixed and base names)
|
||||
self.tool_name_to_mcp_server_name_mapping[
|
||||
base_tool_name
|
||||
] = server_prefix
|
||||
self.tool_name_to_mcp_server_name_mapping[
|
||||
prefixed_tool_name
|
||||
] = server_prefix
|
||||
self.tool_name_to_mcp_server_name_mapping[base_tool_name] = (
|
||||
server_prefix
|
||||
)
|
||||
self.tool_name_to_mcp_server_name_mapping[prefixed_tool_name] = (
|
||||
server_prefix
|
||||
)
|
||||
|
||||
registered_count += 1
|
||||
verbose_logger.debug(
|
||||
@@ -692,6 +721,252 @@ class MCPServerManager:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _descovery_metadata(
|
||||
self,
|
||||
server_url: str,
|
||||
) -> Optional[MCPOAuthMetadata]:
|
||||
"""Discover OAuth metadata by following RFC 9728 (protected resource metadata discovery)."""
|
||||
|
||||
try:
|
||||
client = get_async_httpx_client(llm_provider=httpxSpecialProvider.MCP)
|
||||
response = await client.get(server_url)
|
||||
response.raise_for_status()
|
||||
verbose_logger.warning(
|
||||
"MCP OAuth discovery unexpectedly succeeded for %s; server did not challenge",
|
||||
server_url,
|
||||
)
|
||||
raise RuntimeError("OAuth discovery must not succeed without a challenge")
|
||||
except HTTPStatusError as exc:
|
||||
verbose_logger.debug(
|
||||
"MCP OAuth discovery for %s received status error: %s",
|
||||
server_url,
|
||||
exc,
|
||||
)
|
||||
|
||||
header_value: Optional[str] = None
|
||||
if exc.response is not None:
|
||||
header_value = exc.response.headers.get(
|
||||
"WWW-Authenticate"
|
||||
) or exc.response.headers.get("www-authenticate")
|
||||
|
||||
resource_metadata_url, scopes = self._parse_www_authenticate_header(
|
||||
header_value
|
||||
)
|
||||
|
||||
authorization_servers: List[str] = []
|
||||
resource_scopes: Optional[List[str]] = None
|
||||
if resource_metadata_url:
|
||||
(
|
||||
authorization_servers,
|
||||
resource_scopes,
|
||||
) = await self._fetch_oauth_metadata_from_resource(
|
||||
resource_metadata_url
|
||||
)
|
||||
else:
|
||||
(
|
||||
authorization_servers,
|
||||
resource_scopes,
|
||||
) = await self._attempt_well_known_discovery(server_url)
|
||||
|
||||
metadata = None
|
||||
if not authorization_servers:
|
||||
try:
|
||||
parsed_url = urlparse(server_url)
|
||||
if parsed_url.scheme and parsed_url.netloc:
|
||||
authorization_servers = [
|
||||
f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||
]
|
||||
except Exception:
|
||||
authorization_servers = []
|
||||
|
||||
if authorization_servers:
|
||||
metadata = await self._fetch_authorization_server_metadata(
|
||||
authorization_servers
|
||||
)
|
||||
|
||||
preferred_scopes = scopes or resource_scopes
|
||||
if metadata is None and preferred_scopes:
|
||||
metadata = MCPOAuthMetadata(scopes=preferred_scopes)
|
||||
elif metadata is not None and preferred_scopes:
|
||||
metadata.scopes = preferred_scopes
|
||||
|
||||
return metadata
|
||||
except Exception as exc: # pragma: no cover - network/transient issues
|
||||
verbose_logger.debug(
|
||||
"MCP OAuth discovery failed for %s: %s", server_url, exc
|
||||
)
|
||||
return None
|
||||
|
||||
def _parse_www_authenticate_header(
|
||||
self, header_value: Optional[str]
|
||||
) -> Tuple[Optional[str], Optional[List[str]]]:
|
||||
if not header_value:
|
||||
return None, None
|
||||
|
||||
_, _, params_section = header_value.partition(" ")
|
||||
params_section = params_section or header_value
|
||||
|
||||
param_pattern = re.compile(r"([a-zA-Z0-9_]+)\s*=\s*\"?([^\",]+)\"?")
|
||||
params: Dict[str, str] = {
|
||||
match.group(1).lower(): match.group(2).strip()
|
||||
for match in param_pattern.finditer(params_section)
|
||||
}
|
||||
|
||||
resource_metadata_url = params.get("resource_metadata")
|
||||
|
||||
scope_value = params.get("scope")
|
||||
scopes_list = [s for s in (scope_value.split() if scope_value else []) if s]
|
||||
scopes = scopes_list or None
|
||||
|
||||
return resource_metadata_url, scopes
|
||||
|
||||
async def _fetch_oauth_metadata_from_resource(
|
||||
self, resource_metadata_url: str
|
||||
) -> Tuple[List[str], Optional[List[str]]]:
|
||||
if not resource_metadata_url:
|
||||
return [], None
|
||||
|
||||
try:
|
||||
client = get_async_httpx_client(
|
||||
llm_provider=httpxSpecialProvider.MCP,
|
||||
params={"timeout": 10.0, "follow_redirects": True},
|
||||
)
|
||||
response = await client.get(resource_metadata_url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except Exception as exc: # pragma: no cover - network issues
|
||||
verbose_logger.debug(
|
||||
"Failed to fetch MCP OAuth metadata from %s: %s",
|
||||
resource_metadata_url,
|
||||
exc,
|
||||
)
|
||||
return [], None
|
||||
|
||||
raw_servers = data.get("authorization_servers")
|
||||
if isinstance(raw_servers, list):
|
||||
authorization_servers = [
|
||||
entry
|
||||
for entry in raw_servers
|
||||
if isinstance(entry, str) and entry.strip() != ""
|
||||
]
|
||||
else:
|
||||
authorization_servers = []
|
||||
|
||||
scopes = self._extract_scopes(
|
||||
data.get("scopes_supported") or data.get("scopes")
|
||||
)
|
||||
|
||||
return authorization_servers, scopes
|
||||
|
||||
async def _attempt_well_known_discovery(
|
||||
self, server_url: str
|
||||
) -> Tuple[List[str], Optional[List[str]]]:
|
||||
try:
|
||||
parsed = urlparse(server_url)
|
||||
except Exception:
|
||||
return [], None
|
||||
|
||||
if not parsed.scheme or not parsed.netloc:
|
||||
return [], None
|
||||
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
path = parsed.path or ""
|
||||
path = path.strip("/")
|
||||
|
||||
candidate_urls: List[str] = []
|
||||
if path:
|
||||
candidate_urls.append(f"{base}/.well-known/oauth-protected-resource/{path}")
|
||||
candidate_urls.append(f"{base}/.well-known/oauth-protected-resource")
|
||||
|
||||
for url in candidate_urls:
|
||||
(
|
||||
authorization_servers,
|
||||
scopes,
|
||||
) = await self._fetch_oauth_metadata_from_resource(url)
|
||||
if authorization_servers:
|
||||
return authorization_servers, scopes
|
||||
|
||||
return [], None
|
||||
|
||||
async def _fetch_authorization_server_metadata(
|
||||
self, authorization_servers: List[str]
|
||||
) -> Optional[MCPOAuthMetadata]:
|
||||
for issuer in authorization_servers:
|
||||
metadata = await self._fetch_single_authorization_server_metadata(issuer)
|
||||
if metadata is not None:
|
||||
return metadata
|
||||
return None
|
||||
|
||||
async def _fetch_single_authorization_server_metadata(
|
||||
self, issuer_url: str
|
||||
) -> Optional[MCPOAuthMetadata]:
|
||||
try:
|
||||
parsed = urlparse(issuer_url)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if not parsed.scheme or not parsed.netloc:
|
||||
return None
|
||||
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
path = (parsed.path or "").strip("/")
|
||||
|
||||
candidate_urls: List[str] = []
|
||||
if path:
|
||||
candidate_urls.append(
|
||||
f"{base}/.well-known/oauth-authorization-server/{path}"
|
||||
)
|
||||
candidate_urls.append(f"{base}/.well-known/openid-configuration/{path}")
|
||||
candidate_urls.append(f"{base}/.well-known/oauth-authorization-server")
|
||||
candidate_urls.append(f"{base}/.well-known/openid-configuration")
|
||||
candidate_urls.append(issuer_url.rstrip("/"))
|
||||
|
||||
for url in candidate_urls:
|
||||
try:
|
||||
client = get_async_httpx_client(
|
||||
llm_provider=httpxSpecialProvider.MCP,
|
||||
params={"timeout": 10.0, "follow_redirects": True},
|
||||
)
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except Exception as exc: # pragma: no cover - network issues
|
||||
verbose_logger.debug(
|
||||
"Failed to fetch authorization metadata from %s: %s",
|
||||
url,
|
||||
exc,
|
||||
)
|
||||
continue
|
||||
|
||||
scopes = self._extract_scopes(data.get("scopes_supported"))
|
||||
metadata = MCPOAuthMetadata(
|
||||
scopes=scopes,
|
||||
authorization_url=data.get("authorization_endpoint"),
|
||||
token_url=data.get("token_endpoint"),
|
||||
registration_url=data.get("registration_endpoint"),
|
||||
)
|
||||
|
||||
if any(
|
||||
[
|
||||
metadata.scopes,
|
||||
metadata.authorization_url,
|
||||
metadata.token_url,
|
||||
metadata.registration_url,
|
||||
]
|
||||
):
|
||||
return metadata
|
||||
|
||||
return None
|
||||
|
||||
def _extract_scopes(self, scopes_value: Any) -> Optional[List[str]]:
|
||||
if isinstance(scopes_value, str):
|
||||
scopes = [s.strip() for s in scopes_value.split() if s.strip()]
|
||||
return scopes or None
|
||||
if isinstance(scopes_value, list):
|
||||
scopes = [s for s in scopes_value if isinstance(s, str) and s.strip()]
|
||||
return scopes or None
|
||||
return None
|
||||
|
||||
async def _fetch_tools_with_timeout(
|
||||
self, client: MCPClient, server_name: str
|
||||
) -> List[MCPTool]:
|
||||
@@ -721,11 +996,6 @@ class MCPServerManager:
|
||||
f"Client operation failed for {server_name}: {str(e)}"
|
||||
)
|
||||
return []
|
||||
finally:
|
||||
try:
|
||||
await client.disconnect()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
return await asyncio.wait_for(_list_tools_task(), timeout=30.0)
|
||||
|
||||
@@ -640,24 +640,31 @@ if MCP_AVAILABLE:
|
||||
|
||||
allowed_mcp_servers = await _get_allowed_mcp_servers_from_mcp_server_names(
|
||||
mcp_servers=mcp_servers,
|
||||
allowed_mcp_servers=allowed_mcp_servers
|
||||
allowed_mcp_servers=allowed_mcp_servers,
|
||||
)
|
||||
|
||||
server_name: Optional[str]
|
||||
if len(allowed_mcp_servers) == 1:
|
||||
original_tool_name, server_name = name, allowed_mcp_servers[0].server_name
|
||||
else:
|
||||
# Remove prefix from tool name for logging and processing
|
||||
original_tool_name, server_name = get_server_name_prefix_tool_mcp(name)
|
||||
# Track resolved MCP server for both permission checks and dispatch
|
||||
mcp_server: Optional[MCPServer] = None
|
||||
|
||||
if not server_name or not MCPRequestHandler.is_tool_allowed(
|
||||
allowed_mcp_servers=[server.name for server in allowed_mcp_servers],
|
||||
server_name=server_name,
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"User not allowed to call this tool. Allowed MCP servers: {allowed_mcp_servers}",
|
||||
)
|
||||
# Remove prefix from tool name for logging and processing
|
||||
original_tool_name, server_name = get_server_name_prefix_tool_mcp(name)
|
||||
|
||||
# If tool name is unprefixed, resolve its server so we can enforce permissions
|
||||
if not server_name:
|
||||
mcp_server = global_mcp_server_manager._get_mcp_server_from_tool_name(name)
|
||||
if mcp_server:
|
||||
server_name = mcp_server.name
|
||||
|
||||
# Only enforce server-level permissions when we can resolve a server
|
||||
if server_name:
|
||||
if not MCPRequestHandler.is_tool_allowed(
|
||||
allowed_mcp_servers=[server.name for server in allowed_mcp_servers],
|
||||
server_name=server_name,
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"User not allowed to call this tool. Allowed MCP servers: {allowed_mcp_servers}",
|
||||
)
|
||||
|
||||
standard_logging_mcp_tool_call: StandardLoggingMCPToolCall = (
|
||||
_get_standard_logging_mcp_tool_call(
|
||||
@@ -686,9 +693,11 @@ if MCP_AVAILABLE:
|
||||
# Primary and recommended way to use external MCP servers
|
||||
#########################################################
|
||||
else:
|
||||
mcp_server: Optional[
|
||||
MCPServer
|
||||
] = global_mcp_server_manager._get_mcp_server_from_tool_name(name)
|
||||
# If we haven't already resolved the server, do it now for dispatch
|
||||
if mcp_server is None:
|
||||
mcp_server = global_mcp_server_manager._get_mcp_server_from_tool_name(
|
||||
name
|
||||
)
|
||||
if mcp_server:
|
||||
standard_logging_mcp_tool_call["mcp_server_cost_info"] = (
|
||||
mcp_server.mcp_info or {}
|
||||
|
||||
+1
-1
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1
-1
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1
-1
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1
-1
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1
-1
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1
-1
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1
-1
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1
-1
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user