Merge remote-tracking branch 'origin' into litellm_org_usage

2026-06-18 00:48:01 +00:00 · 2025-11-15 16:28:12 -08:00
parent 24c23c62ba c7807f4841
commit f36cbccd56
328 changed files with 13124 additions and 4002 deletions
@@ -24,6 +24,39 @@ commands:
            cd enterprise
            python -m pip install -e .
            cd ..
+  setup_litellm_test_deps:
+    steps:
+      - checkout
+      - setup_google_dns
+      - restore_cache:
+          keys:
+            - v2-litellm-deps-{{ checksum "requirements.txt" }}-{{ checksum ".circleci/config.yml" }}
+            - v2-litellm-deps-
+      - run:
+          name: Install Dependencies
+          command: |
+            python -m pip install --upgrade pip
+            python -m pip install -r requirements.txt
+            pip install "pytest-mock==3.12.0"
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-cov==5.0.0"
+            pip install "pytest-asyncio==0.21.1"
+            pip install "respx==0.22.0"
+            pip install "hypercorn==0.17.3"
+            pip install "pydantic==2.10.2"
+            pip install "mcp==1.10.1"
+            pip install "requests-mock>=1.12.1"
+            pip install "responses==0.25.7"
+            pip install "pytest-xdist==3.6.1"
+            pip install "pytest-timeout==2.2.0"
+            pip install "semantic_router==0.1.10"
+            pip install "fastapi-offline==1.7.3"
+      - setup_litellm_enterprise_pip
+      - save_cache:
+          paths:
+            - ~/.cache/pip
+          key: v2-litellm-deps-{{ checksum "requirements.txt" }}-{{ checksum ".circleci/config.yml" }}

 jobs:
  # Add Windows testing job
@@ -668,13 +701,16 @@ jobs:
          paths:
            - litellm_security_tests_coverage.xml
            - litellm_security_tests_coverage
-  litellm_proxy_unit_testing: # Runs all tests with the "proxy", "key", "jwt" filenames
+  # Split proxy unit tests into 3 jobs for faster execution and better debugging
+  # test_key_generate_prisma runs separately without parallel execution to avoid event loop issues with logging worker
+  litellm_proxy_unit_testing_key_generation:
    docker:
      - image: cimg/python:3.11
        auth:
          username: ${DOCKERHUB_USERNAME}
          password: ${DOCKERHUB_PASSWORD}
    working_directory: ~/project
+    resource_class: large
    steps:
      - checkout
      - setup_google_dns
@@ -699,6 +735,114 @@ jobs:
            pip install "pytest-retry==1.6.3"
            pip install "pytest-asyncio==0.21.1"
            pip install "pytest-cov==5.0.0"
+            pip install "pytest-timeout==2.2.0"
+            pip install "pytest-forked==1.6.0"
+            pip install "mypy==1.18.2"
+            pip install "google-generativeai==0.3.2"
+            pip install "google-cloud-aiplatform==1.43.0"
+            pip install "google-genai==1.22.0"
+            pip install pyarrow
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
+            pip install langchain
+            pip install lunary==0.2.5
+            pip install "azure-identity==1.16.1"
+            pip install "langfuse==2.59.7"
+            pip install "logfire==0.29.0"
+            pip install numpydoc
+            pip install traceloop-sdk==0.21.1
+            pip install opentelemetry-api==1.25.0
+            pip install opentelemetry-sdk==1.25.0
+            pip install opentelemetry-exporter-otlp==1.25.0
+            pip install openai==1.100.1
+            pip install prisma==0.11.0
+            pip install "detect_secrets==1.5.0"
+            pip install "httpx==0.24.1"
+            pip install "respx==0.22.0"
+            pip install fastapi
+            pip install "gunicorn==21.2.0"
+            pip install "anyio==4.2.0"
+            pip install "aiodynamo==23.10.1"
+            pip install "asyncio==3.4.3"
+            pip install "apscheduler==3.10.4"
+            pip install "PyGithub==1.59.1"
+            pip install argon2-cffi
+            pip install "pytest-mock==3.12.0"
+            pip install python-multipart
+            pip install google-cloud-aiplatform
+            pip install prometheus-client==0.20.0
+            pip install "pydantic==2.10.2"
+            pip install "diskcache==5.6.1"
+            pip install "Pillow==10.3.0"
+            pip install "jsonschema==4.22.0"
+            pip install "pytest-postgresql==7.0.1"
+            pip install "fakeredis==2.28.1"
+      - setup_litellm_enterprise_pip
+      - save_cache:
+          paths:
+            - ./venv
+          key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
+      - run:
+          name: Run prisma ./docker/entrypoint.sh
+          command: |
+            set +e
+            chmod +x docker/entrypoint.sh
+            ./docker/entrypoint.sh
+            set -e
+      - run:
+          name: Run key generation tests (no parallel execution to avoid event loop issues)
+          command: |
+            pwd
+            ls
+            # Run without -n flag to avoid pytest-xdist event loop conflicts with logging worker
+            python -m pytest tests/proxy_unit_tests/test_key_generate_prisma.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-key-generation.xml --durations=10 --timeout=300 -vv --log-cli-level=INFO
+          no_output_timeout: 120m
+      - run:
+          name: Rename the coverage files
+          command: |
+            mv coverage.xml litellm_proxy_unit_tests_key_generation_coverage.xml
+            mv .coverage litellm_proxy_unit_tests_key_generation_coverage
+      - store_test_results:
+          path: test-results
+      - persist_to_workspace:
+          root: .
+          paths:
+            - litellm_proxy_unit_tests_key_generation_coverage.xml
+            - litellm_proxy_unit_tests_key_generation_coverage
+  litellm_proxy_unit_testing_part1:
+    docker:
+      - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
+    working_directory: ~/project
+    resource_class: large
+    steps:
+      - checkout
+      - setup_google_dns
+      - run:
+          name: Show git commit hash
+          command: |
+            echo "Git commit hash: $CIRCLE_SHA1"
+      - run:
+          name: Install PostgreSQL
+          command: |
+            sudo apt-get update
+            sudo apt-get install -y postgresql-14 postgresql-contrib-14
+      - restore_cache:
+          keys:
+            - v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
+      - run:
+          name: Install Dependencies
+          command: |
+            python -m pip install --upgrade pip
+            python -m pip install -r .circleci/requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-asyncio==0.21.1"
+            pip install "pytest-cov==5.0.0"
+            pip install "pytest-timeout==2.2.0"
+            pip install "pytest-forked==1.6.0"
            pip install "mypy==1.18.2"
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
@@ -752,28 +896,132 @@ jobs:
            chmod +x docker/entrypoint.sh
            ./docker/entrypoint.sh
            set -e
-      # Run pytest and generate JUnit XML report
      - run:
-          name: Run tests
+          name: Run proxy unit tests (part 1 - auth checks only, key generation in separate job)
          command: |
            pwd
            ls
-            python -m pytest tests/proxy_unit_tests --cov=litellm --cov-report=xml -vv -x -v --junitxml=test-results/junit.xml --durations=5 -n 4
+            # Run auth tests with parallel execution (test_key_generate_prisma moved to separate job to avoid event loop issues)
+            python -m pytest tests/proxy_unit_tests/test_auth_checks.py tests/proxy_unit_tests/test_user_api_key_auth.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-part1.xml --durations=10 -n 8 --timeout=300 -vv --log-cli-level=INFO
          no_output_timeout: 120m
      - run:
          name: Rename the coverage files
          command: |
-            mv coverage.xml litellm_proxy_unit_tests_coverage.xml
-            mv .coverage litellm_proxy_unit_tests_coverage
-      # Store test results
+            mv coverage.xml litellm_proxy_unit_tests_part1_coverage.xml
+            mv .coverage litellm_proxy_unit_tests_part1_coverage
      - store_test_results:
          path: test-results
-
      - persist_to_workspace:
          root: .
          paths:
-            - litellm_proxy_unit_tests_coverage.xml
-            - litellm_proxy_unit_tests_coverage
+            - litellm_proxy_unit_tests_part1_coverage.xml
+            - litellm_proxy_unit_tests_part1_coverage
+  litellm_proxy_unit_testing_part2:
+    docker:
+      - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
+    working_directory: ~/project
+    resource_class: large
+    steps:
+      - checkout
+      - setup_google_dns
+      - run:
+          name: Show git commit hash
+          command: |
+            echo "Git commit hash: $CIRCLE_SHA1"
+      - run:
+          name: Install PostgreSQL
+          command: |
+            sudo apt-get update
+            sudo apt-get install -y postgresql-14 postgresql-contrib-14
+      - restore_cache:
+          keys:
+            - v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
+      - run:
+          name: Install Dependencies
+          command: |
+            python -m pip install --upgrade pip
+            python -m pip install -r .circleci/requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-asyncio==0.21.1"
+            pip install "pytest-cov==5.0.0"
+            pip install "pytest-timeout==2.2.0"
+            pip install "pytest-forked==1.6.0"
+            pip install "mypy==1.18.2"
+            pip install "google-generativeai==0.3.2"
+            pip install "google-cloud-aiplatform==1.43.0"
+            pip install "google-genai==1.22.0"
+            pip install pyarrow
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
+            pip install langchain
+            pip install lunary==0.2.5
+            pip install "azure-identity==1.16.1"
+            pip install "langfuse==2.59.7"
+            pip install "logfire==0.29.0"
+            pip install numpydoc
+            pip install traceloop-sdk==0.21.1
+            pip install opentelemetry-api==1.25.0
+            pip install opentelemetry-sdk==1.25.0
+            pip install opentelemetry-exporter-otlp==1.25.0
+            pip install openai==1.100.1
+            pip install prisma==0.11.0
+            pip install "detect_secrets==1.5.0"
+            pip install "httpx==0.24.1"
+            pip install "respx==0.22.0"
+            pip install fastapi
+            pip install "gunicorn==21.2.0"
+            pip install "anyio==4.2.0"
+            pip install "aiodynamo==23.10.1"
+            pip install "asyncio==3.4.3"
+            pip install "apscheduler==3.10.4"
+            pip install "PyGithub==1.59.1"
+            pip install argon2-cffi
+            pip install "pytest-mock==3.12.0"
+            pip install python-multipart
+            pip install google-cloud-aiplatform
+            pip install prometheus-client==0.20.0
+            pip install "pydantic==2.10.2"
+            pip install "diskcache==5.6.1"
+            pip install "Pillow==10.3.0"
+            pip install "jsonschema==4.22.0"
+            pip install "pytest-postgresql==7.0.1"
+            pip install "fakeredis==2.28.1"
+            pip install "pytest-xdist==3.6.1"
+      - setup_litellm_enterprise_pip
+      - save_cache:
+          paths:
+            - ./venv
+          key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
+      - run:
+          name: Run prisma ./docker/entrypoint.sh
+          command: |
+            set +e
+            chmod +x docker/entrypoint.sh
+            ./docker/entrypoint.sh
+            set -e
+      - run:
+          name: Run proxy unit tests (part 2 - remaining tests)
+          command: |
+            pwd
+            ls
+            python -m pytest tests/proxy_unit_tests --ignore=tests/proxy_unit_tests/test_key_generate_prisma.py --ignore=tests/proxy_unit_tests/test_auth_checks.py --ignore=tests/proxy_unit_tests/test_user_api_key_auth.py --cov=litellm --cov-report=xml --junitxml=test-results/junit-part2.xml --durations=10 -n 8 --timeout=300 -vv --log-cli-level=INFO
+          no_output_timeout: 120m
+      - run:
+          name: Rename the coverage files
+          command: |
+            mv coverage.xml litellm_proxy_unit_tests_part2_coverage.xml
+            mv .coverage litellm_proxy_unit_tests_part2_coverage
+      - store_test_results:
+          path: test-results
+      - persist_to_workspace:
+          root: .
+          paths:
+            - litellm_proxy_unit_tests_part2_coverage.xml
+            - litellm_proxy_unit_tests_part2_coverage
  litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
    docker:
      - image: cimg/python:3.13.1
@@ -1128,59 +1376,88 @@ jobs:
          paths:
            - search_coverage.xml
            - search_coverage
-  litellm_mapped_tests:
+  # Split litellm_mapped_tests into 3 parallel jobs for 3x faster execution
+  litellm_mapped_tests_proxy:
    docker:
      - image: cimg/python:3.11
        auth:
          username: ${DOCKERHUB_USERNAME}
          password: ${DOCKERHUB_PASSWORD}
    working_directory: ~/project
-
+    resource_class: xlarge
    steps:
-      - checkout
-      - setup_google_dns
+      - setup_litellm_test_deps
      - run:
-          name: Install Dependencies
+          name: Run proxy tests
          command: |
-            python -m pip install --upgrade pip
-            python -m pip install -r requirements.txt
-            pip install "pytest-mock==3.12.0"
-            pip install "pytest==7.3.1"
-            pip install "pytest-retry==1.6.3"
-            pip install "pytest-cov==5.0.0"
-            pip install "pytest-asyncio==0.21.1"
-            pip install "respx==0.22.0"
-            pip install "hypercorn==0.17.3"
-            pip install "pydantic==2.10.2"
-            pip install "mcp==1.10.1"
-            pip install "requests-mock>=1.12.1"
-            pip install "responses==0.25.7"
-            pip install "pytest-xdist==3.6.1"
-            pip install "semantic_router==0.1.10"
-            pip install "fastapi-offline==1.7.3"
-      - setup_litellm_enterprise_pip
-      # Run pytest and generate JUnit XML report
-      - run:
-          name: Run litellm tests
-          command: |
-            pwd
-            ls
-            python -m pytest -vv tests/test_litellm --cov=litellm --cov-report=xml -v --junitxml=test-results/junit-litellm.xml --durations=10 -n 8
+            python -m pytest tests/test_litellm/proxy --cov=litellm --cov-report=xml --junitxml=test-results/junit-proxy.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
          no_output_timeout: 120m
      - run:
          name: Rename the coverage files
          command: |
-            mv coverage.xml litellm_mapped_tests_coverage.xml
-            mv .coverage litellm_mapped_tests_coverage
-
-      # Store test results
+            mv coverage.xml litellm_proxy_tests_coverage.xml
+            mv .coverage litellm_proxy_tests_coverage
      - store_test_results:
          path: test-results
      - persist_to_workspace:
          root: .
          paths:
-            - litellm_mapped_tests_coverage.xml
-            - litellm_mapped_tests_coverage
+            - litellm_proxy_tests_coverage.xml
+            - litellm_proxy_tests_coverage
+  litellm_mapped_tests_llms:
+    docker:
+      - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
+    working_directory: ~/project
+    resource_class: xlarge
+    steps:
+      - setup_litellm_test_deps
+      - run:
+          name: Run LLM provider tests
+          command: |
+            python -m pytest tests/test_litellm/llms --cov=litellm --cov-report=xml --junitxml=test-results/junit-llms.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
+          no_output_timeout: 120m
+      - run:
+          name: Rename the coverage files
+          command: |
+            mv coverage.xml litellm_llms_tests_coverage.xml
+            mv .coverage litellm_llms_tests_coverage
+      - store_test_results:
+          path: test-results
+      - persist_to_workspace:
+          root: .
+          paths:
+            - litellm_llms_tests_coverage.xml
+            - litellm_llms_tests_coverage
+  litellm_mapped_tests_core:
+    docker:
+      - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
+    working_directory: ~/project
+    resource_class: xlarge
+    steps:
+      - setup_litellm_test_deps
+      - run:
+          name: Run core tests
+          command: |
+            python -m pytest tests/test_litellm --ignore=tests/test_litellm/proxy --ignore=tests/test_litellm/llms --cov=litellm --cov-report=xml --junitxml=test-results/junit-core.xml --durations=10 -n 16 --maxfail=5 --timeout=300 -vv --log-cli-level=WARNING
+          no_output_timeout: 120m
+      - run:
+          name: Rename the coverage files
+          command: |
+            mv coverage.xml litellm_core_tests_coverage.xml
+            mv .coverage litellm_core_tests_coverage
+      - store_test_results:
+          path: test-results
+      - persist_to_workspace:
+          root: .
+          paths:
+            - litellm_core_tests_coverage.xml
+            - litellm_core_tests_coverage
  litellm_mapped_enterprise_tests:
    docker:
      - image: cimg/python:3.11
@@ -1447,7 +1724,7 @@ jobs:
          command: |
            pwd
            ls
-            python -m pytest -vv tests/logging_callback_tests --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
+            python -m pytest -vv tests/logging_callback_tests --cov=litellm --cov-report=xml -s -v --junitxml=test-results/junit.xml --durations=5
          no_output_timeout: 120m
      - run:
          name: Rename the coverage files
@@ -1914,14 +2191,14 @@ jobs:
            sudo usermod -aG docker $USER
            docker version
      - run:
-          name: Install Python 3.9
+          name: Install Python 3.10
          command: |
            curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
            bash miniconda.sh -b -p $HOME/miniconda
            export PATH="$HOME/miniconda/bin:$PATH"
            conda init bash
            source ~/.bashrc
-            conda create -n myenv python=3.9 -y
+            conda create -n myenv python=3.10 -y
            conda activate myenv
            python --version
      - run:
@@ -2695,19 +2972,22 @@ jobs:
            sudo usermod -aG docker $USER
            docker version
      - run:
-          name: Install Python 3.9
+          name: Install Python 3.10
          command: |
            curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
            bash miniconda.sh -b -p $HOME/miniconda
            export PATH="$HOME/miniconda/bin:$PATH"
            conda init bash
            source ~/.bashrc
-            conda create -n myenv python=3.9 -y
+            conda create -n myenv python=3.10 -y
            conda activate myenv
            python --version
      - run:
          name: Install Dependencies
          command: |
+            export PATH="$HOME/miniconda/bin:$PATH"
+            source $HOME/miniconda/etc/profile.d/conda.sh
+            conda activate myenv
            pip install "pytest==7.3.1"
            pip install "pytest-retry==1.6.3"
            pip install "pytest-asyncio==0.21.1"
@@ -2736,6 +3016,8 @@ jobs:
            pip install "langchain_mcp_adapters==0.0.5"
            pip install "langchain_openai==0.2.1"
            pip install "langgraph==0.3.18"
+            pip install "fastuuid==0.13.5"
+            pip install -r requirements.txt
      - run:
          name: Install dockerize
          command: |
@@ -2848,6 +3130,9 @@ jobs:
      - run:
          name: Run tests
          command: |
+            export PATH="$HOME/miniconda/bin:$PATH"
+            source $HOME/miniconda/etc/profile.d/conda.sh
+            conda activate myenv
            pwd
            ls
            python -m pytest -vv tests/pass_through_tests/ -x --junitxml=test-results/junit.xml --durations=5
@@ -2878,7 +3163,7 @@ jobs:
            python -m venv venv
            . venv/bin/activate
            pip install coverage
-            coverage combine llm_translation_coverage llm_responses_api_coverage ocr_coverage search_coverage mcp_coverage logging_coverage audio_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_security_tests_coverage guardrails_coverage
+            coverage combine llm_translation_coverage llm_responses_api_coverage ocr_coverage search_coverage mcp_coverage logging_coverage audio_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_part1_coverage litellm_proxy_unit_tests_part2_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_security_tests_coverage guardrails_coverage litellm_mapped_tests_coverage
            coverage xml
      - codecov/upload:
          file: ./coverage.xml
@@ -3300,7 +3585,19 @@ workflows:
              only:
                - main
                - /litellm_.*/
-      - litellm_proxy_unit_testing:
+      - litellm_proxy_unit_testing_key_generation:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
+      - litellm_proxy_unit_testing_part1:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
+      - litellm_proxy_unit_testing_part2:
          filters:
            branches:
              only:
@@ -3444,7 +3741,19 @@ workflows:
              only:
                - main
                - /litellm_.*/
-      - litellm_mapped_tests:
+      - litellm_mapped_tests_proxy:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
+      - litellm_mapped_tests_llms:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
+      - litellm_mapped_tests_core:
          filters:
            branches:
              only:
@@ -3495,7 +3804,9 @@ workflows:
            - llm_responses_api_testing
            - ocr_testing
            - search_testing
-            - litellm_mapped_tests
+            - litellm_mapped_tests_proxy
+            - litellm_mapped_tests_llms
+            - litellm_mapped_tests_core
            - litellm_mapped_enterprise_tests
            - batches_testing
            - litellm_utils_testing
@@ -3506,7 +3817,9 @@ workflows:
            - litellm_router_testing
            - litellm_router_unit_testing
            - caching_unit_tests
-            - litellm_proxy_unit_testing
+            - litellm_proxy_unit_testing_key_generation
+            - litellm_proxy_unit_testing_part1
+            - litellm_proxy_unit_testing_part2
            - litellm_security_tests
            - langfuse_logging_unit_tests
            - local_testing
@@ -3560,7 +3873,9 @@ workflows:
            - llm_responses_api_testing
            - ocr_testing
            - search_testing
-            - litellm_mapped_tests
+            - litellm_mapped_tests_proxy
+            - litellm_mapped_tests_llms
+            - litellm_mapped_tests_core
            - litellm_mapped_enterprise_tests
            - batches_testing
            - litellm_utils_testing
@@ -3576,7 +3891,9 @@ workflows:
            - auth_ui_unit_tests
            - db_migration_disable_update_check
            - e2e_ui_testing
-            - litellm_proxy_unit_testing
+            - litellm_proxy_unit_testing_key_generation
+            - litellm_proxy_unit_testing_part1
+            - litellm_proxy_unit_testing_part2
            - litellm_security_tests
            - installing_litellm_on_python
            - installing_litellm_on_python_3_13
@@ -0,0 +1,7 @@
+# js-yaml CVE-2025-64718
+# This vulnerability is not applicable because we've forced js-yaml to version 4.1.1
+# via npm overrides in package.json. Trivy incorrectly reports this based on
+# dependency requirements in the lockfile, but the actual installed version is 4.1.1.
+# Verified with: npm list js-yaml
+CVE-2025-64718
+
@@ -16,7 +16,7 @@ Get free 7-day trial key [here](https://www.litellm.ai/enterprise#trial)

 Includes all enterprise features.

-<Image img={require('../img/enterprise_vs_oss.png')} />
+<Image img={require('../img/enterprise_vs_oss_2.png')} />

 [**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)

@@ -40,7 +40,7 @@ Self-Managed Enterprise deployments require our team to understand your exact ne

 ### How does deployment with Enterprise License work? 

-You just deploy [our docker image](https://docs.litellm.ai/docs/proxy/deploy) and get an enterprise license key to add to your environment to unlock additional functionality (SSO, Prometheus metrics, etc.). 
+You just deploy [our docker image](https://docs.litellm.ai/docs/proxy/deploy) and get an enterprise license key to add to your environment to unlock additional functionality (SSO, etc.). 

 ```env
 LITELLM_LICENSE="eyJ..."
@@ -211,11 +211,12 @@ mcp_servers:
  oauth2_example:
    url: "https://my-mcp-server.com/mcp"
    auth_type: "oauth2"         # 👈 KEY CHANGE
-    authorization_url: "https://my-mcp-server.com/oauth/authorize" # optional for client-credentials
-    token_url: "https://my-mcp-server.com/oauth/token"             # required
+    authorization_url: "https://my-mcp-server.com/oauth/authorize" # optional override
+    token_url: "https://my-mcp-server.com/oauth/token"             # optional override
+    registration_url: "https://my-mcp-server.com/oauth/register"   # optional override
    client_id: os.environ/OAUTH_CLIENT_ID
    client_secret: os.environ/OAUTH_CLIENT_SECRET
-    scopes: ["tool.read", "tool.write"] # optional
+    scopes: ["tool.read", "tool.write"] # optional override

  bearer_example:
    url: "https://my-mcp-server.com/mcp"
@@ -325,6 +326,10 @@ mcp_servers:
 | `spec_path` | Yes | Path or URL to your OpenAPI specification file (JSON or YAML) |
 | `auth_type` | No | Authentication type: `none`, `api_key`, `bearer_token`, `basic`, `authorization` |
 | `auth_value` | No | Authentication value (required if `auth_type` is set) |
+| `authorization_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
+| `token_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
+| `registration_url` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM auto-discovers it. |
+| `scopes` | No | For `auth_type: oauth2`. Optional override; if omitted LiteLLM uses the scopes advertised by the server. |
 | `description` | No | Optional description for the MCP server |
 | `allowed_tools` | No | List of specific tools to allow (see [MCP Tool Filtering](#mcp-tool-filtering)) |
 | `disallowed_tools` | No | List of specific tools to block (see [MCP Tool Filtering](#mcp-tool-filtering)) |
@@ -1224,17 +1229,10 @@ mcp_servers:
  github_mcp:
    url: "https://api.githubcopilot.com/mcp"
    auth_type: oauth2
-    authorization_url: https://github.com/login/oauth/authorize
-    token_url: https://github.com/login/oauth/access_token
    client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
    client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
-    scopes: ["public_repo", "user:email"]
 ```

-**Note**  
-In the future, users will only need to specify the `url` of the MCP server.
-LiteLLM will automatically resolve the corresponding `authorization_url`, `token_url`, and `registration_url` based on the MCP server metadata (e.g., `.well-known/oauth-authorization-server` or `oauth-protected-resource`).
-
 [**See Claude Code Tutorial**](./tutorials/claude_responses_api#connecting-mcp-servers)

 ## Using your MCP with client side credentials
@@ -1887,4 +1885,4 @@ async with stdio_client(server_params) as (read, write):
 ```

 </TabItem>
-</Tabs>
+</Tabs>
@@ -953,6 +953,30 @@ except Exception as e:

 s/o @[Shekhar Patnaik](https://www.linkedin.com/in/patnaikshekhar) for requesting this!

+### Context Management (Beta)
+
+Anthropic’s [context editing](https://docs.claude.com/en/docs/build-with-claude/context-editing) API lets you automatically clear older tool results or thinking blocks. LiteLLM now forwards the native `context_management` payload when you call Anthropic models, and automatically attaches the required `context-management-2025-06-27` beta header.
+
+```python
+from litellm import completion
+
+response = completion(
+    model="anthropic/claude-sonnet-4-20250514",
+    messages=[{"role": "user", "content": "Summarize the latest tool results"}],
+    context_management={
+        "edits": [
+            {
+                "type": "clear_tool_uses_20250919",
+                "trigger": {"type": "input_tokens", "value": 30000},
+                "keep": {"type": "tool_uses", "value": 3},
+                "clear_at_least": {"type": "input_tokens", "value": 5000},
+                "exclude_tools": ["web_search"],
+            }
+        ]
+    },
+)
+```
+
 ### Anthropic Hosted Tools (Computer, Text Editor, Web Search, Memory)


@@ -31,10 +31,14 @@ Get your API key from [fal.ai](https://fal.ai/).

 | Model Name | Description | Documentation |
 |------------|-------------|---------------|
+| `fal_ai/fal-ai/flux-pro/v1.1` | FLUX Pro v1.1 - Balanced speed and quality | [Docs ↗](https://fal.ai/models/fal-ai/flux-pro/v1.1) |
 | `fal_ai/flux/schnell` | Flux Schnell - Low-latency generation with `image_size` support | [Docs ↗](https://fal.ai/models/fal-ai/flux/schnell) |
+| `fal_ai/fal-ai/bytedance/seedream/v3/text-to-image` | ByteDance Seedream v3 - Text-to-image with `image_size` control | [Docs ↗](https://fal.ai/models/fal-ai/bytedance/seedream/v3/text-to-image) |
+| `fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image` | ByteDance Dreamina v3.1 - Text-to-image with `image_size` control | [Docs ↗](https://fal.ai/models/fal-ai/bytedance/dreamina/v3.1/text-to-image) |
 | `fal_ai/fal-ai/flux-pro/v1.1-ultra` | FLUX Pro v1.1 Ultra - High-quality image generation | [Docs ↗](https://fal.ai/models/fal-ai/flux-pro/v1.1-ultra) |
 | `fal_ai/fal-ai/imagen4/preview` | Google's Imagen 4 - Highest quality model | [Docs ↗](https://fal.ai/models/fal-ai/imagen4/preview) |
 | `fal_ai/fal-ai/recraft/v3/text-to-image` | Recraft v3 - Multiple style options | [Docs ↗](https://fal.ai/models/fal-ai/recraft/v3/text-to-image) |
+| `fal_ai/fal-ai/ideogram/v3` | Ideogram v3 - Lettering-first creative model (Balanced: $0.06/image) | [Docs ↗](https://fal.ai/models/fal-ai/ideogram/v3) |
 | `fal_ai/fal-ai/stable-diffusion-v35-medium` | Stable Diffusion v3.5 Medium | [Docs ↗](https://fal.ai/models/fal-ai/stable-diffusion-v35-medium) |
 | `fal_ai/bria/text-to-image/3.2` | Bria 3.2 - Commercial-grade generation | [Docs ↗](https://fal.ai/models/bria/text-to-image/3.2) |

@@ -486,6 +486,53 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \

 See [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning) for more details on organization verification requirements.

+### Verbosity Control for GPT-5 Models
+
+The `verbosity` parameter controls the length and detail of responses from GPT-5 family models. It accepts three values: `"low"`, `"medium"`, or `"high"`.
+
+**Supported models:** All GPT-5 family models (`gpt-5`, `gpt-5.1`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-5-pro`)
+
+**Use cases:**
+- **`"low"`**: Best for concise answers or simple code generation (e.g., SQL queries)
+- **`"medium"`**: Default - balanced output length
+- **`"high"`**: Use when you need thorough explanations or extensive code refactoring
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+```python
+import litellm
+
+# Low verbosity - concise responses
+response = litellm.completion(
+    model="gpt-5.1",
+    messages=[{"role": "user", "content": "Write a function to reverse a string"}],
+    verbosity="low"
+)
+
+# High verbosity - detailed responses
+response = litellm.completion(
+    model="gpt-5.1",
+    messages=[{"role": "user", "content": "Explain how neural networks work"}],
+    verbosity="high"
+)
+```
+</TabItem>
+
+<TabItem value="proxy" label="PROXY">
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+    "model": "gpt-5.1",
+    "messages": [{"role": "user", "content": "Write a function to reverse a string"}],
+    "verbosity": "low"
+}'
+```
+</TabItem>
+</Tabs>
+
+
 ## OpenAI Chat Completion to Responses API Bridge

 Call any Responses API model from OpenAI's `/chat/completions` endpoint. 
@@ -32,13 +32,9 @@ Features:
    - ✅ [Set Model budgets for Virtual Keys](./users#-virtual-key-model-specific)
    - ✅ [Exporting LLM Logs to GCS Bucket, Azure Blob Storage](./proxy/bucket#🪣-logging-gcs-s3-buckets)
    - ✅ [`/spend/report` API endpoint](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend)
- **Prometheus Metrics**
-    - ✅ [Prometheus Metrics - Num Requests, failures, LLM Provider Outages](prometheus)
-    - ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens)
- **Control Guardrails per API Key**
+- **Control Guardrails per API Key/Team**
 - **Custom Branding**
    - ✅ [Custom Branding + Routes on Swagger Docs](#swagger-docs---custom-routes--branding)
-    - ✅ [Public Model Hub](#public-model-hub)
    - ✅ [Custom Email Branding](./email.md#customizing-email-branding)


@@ -4,15 +4,6 @@ import Image from '@theme/IdealImage';

 # 📈 Prometheus metrics

-:::info
-
-✨ Prometheus metrics is on LiteLLM Enterprise
-
-[Enterprise Pricing](https://www.litellm.ai/#pricing)
-
-[Get free 7-day trial key](https://www.litellm.ai/enterprise#trial)
-
-:::

 LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll

@@ -237,11 +237,8 @@ mcp_servers:
  github_mcp:
    url: "https://api.githubcopilot.com/mcp"
    auth_type: oauth2
-    authorization_url: https://github.com/login/oauth/authorize
-    token_url: https://github.com/login/oauth/access_token
    client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
    client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
-    scopes: ["public_repo", "user:email"]
 ```

 </TabItem>
@@ -255,9 +252,6 @@ atlassian_mcp:
  url: "https://mcp.atlassian.com/v1/sse"
  transport: "sse"
  auth_type: oauth2
-  authorization_url: https://mcp.atlassian.com/v1/authorize
-  token_url: https://cf.mcp.atlassian.com/v1/token
-  registration_url: https://cf.mcp.atlassian.com/v1/register
 ```

 </TabItem>
@@ -0,0 +1,120 @@
+# /vector_stores/{vector_store_id}/files
+
+Vector store files represent the individual files that live inside a vector store.
+
+| Feature | Supported |
+|---------|-----------|
+| Logging | ✅ (full request/response logging) |
+| Supported Providers | `openai` |
+
+
+## Supported operations
+
+| Operation | Description | OpenAI Python Client | LiteLLM Proxy |
+|-----------|-------------|----------------------|---------------|
+| Create vector store file | Attach a file to a vector store with optional chunking overrides | ✅ | ✅ |
+| List vector store files | Paginated listing with filters | ✅ | ✅ |
+| Retrieve vector store file | Fetch metadata for a single file | ✅ | ✅ |
+| Delete vector store file | Remove a file from a store (file object persists) | ✅ | ✅ |
+| Retrieve vector store file content | Stream processed chunks | ❌ | ✅ |
+| Update vector store file attributes | Patch custom attributes | ❌ | ✅ |
+
+:::note
+Vector store support currently works **only with OpenAI vector stores and OpenAI-uploaded file IDs**.
+:::
+
+
+## Create vector store file
+
+`POST http://localhost:4000/v1/vector_stores/{vector_store_id}/files`
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:4000",  # LiteLLM proxy or OpenAI base
+    api_key="sk-1234"
+)
+
+vector_store_file = client.vector_stores.files.create(
+    vector_store_id="vs_69172088a18c8191ab3e2621aa87d1ee",
+    file_id="file-NDbEDJTfqVh7S4Ugi3CGYw",
+    chunking_strategy={
+        "type": "static",
+        "static": {
+            "max_chunk_size_tokens": 800,
+            "chunk_overlap_tokens": 400,
+        },
+    },
+)
+
+print(vector_store_file)
+```
+
+## List vector store files
+
+`GET http://localhost:4000/v1/vector_stores/{vector_store_id}/files`
+
+Parameters:
+
+- `vector_store_id` (path, required)
+- `after` / `before` (query, optional) – pagination cursors
+- `filter` (query, optional) – `in_progress`, `completed`, `failed`, `cancelled`
+- `limit` (query, optional, default `20`, range `1-100`)
+- `order` (query, optional, default `desc`)
+
+```python
+vector_store_files = client.vector_stores.files.list(
+    vector_store_id="vs_abc123"
+)
+print(vector_store_files)
+```
+
+## Retrieve vector store file
+
+`GET http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}`
+
+```python
+vector_store_file = client.vector_stores.files.retrieve(
+    vector_store_id="vs_abc123",
+    file_id="file-abc123"
+)
+print(vector_store_file)
+```
+
+## Delete vector store file
+
+`DELETE http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}`
+
+```python
+deleted_vector_store_file = client.vector_stores.files.delete(
+    vector_store_id="vs_abc123",
+    file_id="file-abc123"
+)
+print(deleted_vector_store_file)
+```
+
+## Proxy-only endpoints
+
+When you need raw content chunks or attribute updates, call the LiteLLM Proxy directly.
+
+### Retrieve file content
+
+```bash
+curl -X GET "http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}/content" \
+  -H "Authorization: Bearer sk-1234"
+```
+
+### Update file attributes
+
+```bash
+curl -X POST "http://localhost:4000/v1/vector_stores/{vector_store_id}/files/{file_id}" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+        "attributes": {
+          "category": "support-faq",
+          "language": "en"
+        }
+      }'
+```
@@ -18,7 +18,7 @@
    "@docusaurus/plugin-google-gtag": "3.8.1",
    "@docusaurus/plugin-ideal-image": "3.8.1",
    "@docusaurus/preset-classic": "3.8.1",
-    "@docusaurus/theme-mermaid": "^3.8.1",
+    "@docusaurus/theme-mermaid": "3.8.1",
    "@inkeep/cxkit-docusaurus": "^0.5.89",
    "@mdx-js/react": "^3.0.0",
    "clsx": "^1.2.1",
@@ -45,12 +45,14 @@
    ]
  },
  "engines": {
-    "node": ">=16.14"
+    "node": ">=16.14",
+    "npm": ">=8.3.0"
  },
  "overrides": {
    "webpack-dev-server": ">=5.2.1",
    "form-data": ">=4.0.4",
    "mermaid": ">=11.10.0",
-    "js-yaml": ">=4.1.1"
+    "js-yaml": ">=4.1.1",
+    "gray-matter": ">=4.0.3"
  }
 }
@@ -1,5 +1,5 @@
 ---
-title: "[Preview] v1.79.3-stable - Built-in Guardrails on AI Gateway"
+title: "v1.79.3-stable - Built-in Guardrails on AI Gateway"
 slug: "v1-79-3"
 date: 2025-11-08T10:00:00
 authors:
@@ -27,7 +27,7 @@ import TabItem from '@theme/TabItem';
 docker run \
 -e STORE_MODEL_IN_DB=True \
 -p 4000:4000 \
-ghcr.io/berriai/litellm:v1.79.3.rc.1
+ghcr.io/berriai/litellm:v1.79.3-stable
 ```

 </TabItem>
@@ -0,0 +1,482 @@
+---
+title: "[Preview] v1.80.0-stable - RunwayML Provider Support"
+slug: "v1-80-0"
+date: 2025-11-15T10:00:00
+authors:
+  - name: Krrish Dholakia
+    title: CEO, LiteLLM
+    url: https://www.linkedin.com/in/krish-d/
+    image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
+  - name: Ishaan Jaff
+    title: CTO, LiteLLM
+    url: https://www.linkedin.com/in/reffajnaahsi/
+    image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
+hide_table_of_contents: false
+---
+
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## Deploy this version
+
+<Tabs>
+<TabItem value="docker" label="Docker">
+
+``` showLineNumbers title="docker run litellm"
+docker run \
+-e STORE_MODEL_IN_DB=True \
+-p 4000:4000 \
+ghcr.io/berriai/litellm:v1.80.0.rc.1
+```
+
+</TabItem>
+
+<TabItem value="pip" label="Pip">
+
+``` showLineNumbers title="pip install litellm"
+pip install litellm==1.80.0
+```
+
+</TabItem>
+</Tabs>
+
+---
+
+## Key Highlights
+
+- **🆕 RunwayML Provider** - Complete video generation, image generation, and text-to-speech support
+- **GPT-5.1 Family Support** - Day-0 support for OpenAI's latest GPT-5.1 and GPT-5.1-Codex models
+- **Prometheus OSS** - Prometheus metrics now available in open-source version
+- **Vector Store Files API** - Complete OpenAI-compatible Vector Store Files API with full CRUD operations
+- **Embeddings Performance** - O(1) lookup optimization for router embeddings with shared sessions
+
+---
+
+### 🆕 RunwayML
+
+Complete integration for RunwayML's Gen-4 family of models, supporting video generation, image generation, and text-to-speech.
+
+**Supported Endpoints:**
+- `/v1/videos` - Video generation (Gen-4 Turbo, Gen-4 Aleph, Gen-3A Turbo)
+- `/v1/images/generations` - Image generation (Gen-4 Image, Gen-4 Image Turbo)
+- `/v1/audio/speech` - Text-to-speech (ElevenLabs Multilingual v2)
+
+**Quick Start:**
+
+```bash showLineNumbers title="Generate Video with RunwayML"
+curl --location 'http://localhost:4000/v1/videos' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-1234' \
+--data '{
+    "model": "runwayml/gen4_turbo",
+    "prompt": "A high quality demo video of litellm ai gateway",
+    "input_reference": "https://example.com/image.jpg",
+    "seconds": 5,
+    "size": "1280x720"
+}'
+```
+
+[Get Started with RunwayML](../../docs/providers/runwayml/videos)
+
+---
+
+### Prometheus Metrics - Open Source
+
+Prometheus metrics are now available in the open-source version of LiteLLM, providing comprehensive observability for your AI Gateway without requiring an enterprise license.
+
+**Quick Start:**
+
+```yaml
+litellm_settings:
+  success_callback: ["prometheus"]
+  failure_callback: ["prometheus"]
+```
+
+[Get Started with Prometheus](../../docs/proxy/logging#prometheus)
+
+---
+
+### Vector Store Files API
+
+Complete OpenAI-compatible Vector Store Files API now stable, enabling full file lifecycle management within vector stores.
+
+**Supported Endpoints:**
+- `POST /v1/vector_stores/{vector_store_id}/files` - Create vector store file
+- `GET /v1/vector_stores/{vector_store_id}/files` - List vector store files
+- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}` - Retrieve vector store file
+- `GET /v1/vector_stores/{vector_store_id}/files/{file_id}/content` - Retrieve file content
+- `DELETE /v1/vector_stores/{vector_store_id}/files/{file_id}` - Delete vector store file
+- `DELETE /v1/vector_stores/{vector_store_id}` - Delete vector store
+
+**Quick Start:**
+
+```bash showLineNumbers title="Create Vector Store File"
+curl --location 'http://localhost:4000/v1/vector_stores/vs_123/files' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-1234' \
+--data '{
+    "file_id": "file_abc"
+}'
+```
+
+[Get Started with Vector Stores](../../docs/vector_store_files)
+
+---
+
+## New Providers and Endpoints
+
+### New Providers
+
+| Provider | Supported Endpoints | Description |
+| -------- | ------------------- | ----------- |
+| **[RunwayML](../../docs/providers/runwayml/videos)** | `/v1/videos`, `/v1/images/generations`, `/v1/audio/speech` | Gen-4 video generation, image generation, and text-to-speech |
+
+### New LLM API Endpoints
+
+| Endpoint | Method | Description | Documentation |
+| -------- | ------ | ----------- | ------------- |
+| `/v1/vector_stores/{vector_store_id}/files` | POST | Create vector store file | [Docs](../../docs/vector_store_files) |
+| `/v1/vector_stores/{vector_store_id}/files` | GET | List vector store files | [Docs](../../docs/vector_store_files) |
+| `/v1/vector_stores/{vector_store_id}/files/{file_id}` | GET | Retrieve vector store file | [Docs](../../docs/vector_store_files) |
+| `/v1/vector_stores/{vector_store_id}/files/{file_id}/content` | GET | Retrieve file content | [Docs](../../docs/vector_store_files) |
+| `/v1/vector_stores/{vector_store_id}/files/{file_id}` | DELETE | Delete vector store file | [Docs](../../docs/vector_store_files) |
+| `/v1/vector_stores/{vector_store_id}` | DELETE | Delete vector store | [Docs](../../docs/vector_store_files) |
+
+---
+
+## New Models / Updated Models
+
+#### New Model Support
+
+| Provider | Model | Context Window | Input ($/1M tokens) | Output ($/1M tokens) | Features |
+| -------- | ----- | -------------- | ------------------- | -------------------- | -------- |
+| OpenAI | `gpt-5.1` | 272K | $1.25 | $10.00 | Reasoning, vision, PDF input, responses API |
+| OpenAI | `gpt-5.1-2025-11-13` | 272K | $1.25 | $10.00 | Reasoning, vision, PDF input, responses API |
+| OpenAI | `gpt-5.1-chat-latest` | 128K | $1.25 | $10.00 | Reasoning, vision, PDF input |
+| OpenAI | `gpt-5.1-codex` | 272K | $1.25 | $10.00 | Responses API, reasoning, vision |
+| OpenAI | `gpt-5.1-codex-mini` | 272K | $0.25 | $2.00 | Responses API, reasoning, vision |
+| Moonshot | `moonshot/kimi-k2-thinking` | 262K | $0.60 | $2.50 | Function calling, web search, reasoning |
+| Mistral | `mistral/magistral-medium-2509` | 40K | $2.00 | $5.00 | Reasoning, function calling |
+| Vertex AI | `vertex_ai/moonshotai/kimi-k2-thinking-maas` | 256K | $0.60 | $2.50 | Function calling, web search |
+| OpenRouter | `openrouter/deepseek/deepseek-v3.2-exp` | 164K | $0.20 | $0.40 | Function calling, prompt caching |
+| OpenRouter | `openrouter/minimax/minimax-m2` | 205K | $0.26 | $1.02 | Function calling, reasoning |
+| OpenRouter | `openrouter/z-ai/glm-4.6` | 203K | $0.40 | $1.75 | Function calling, reasoning |
+| OpenRouter | `openrouter/z-ai/glm-4.6:exacto` | 203K | $0.45 | $1.90 | Function calling, reasoning |
+| Voyage | `voyage/voyage-3.5` | 32K | $0.06 | - | Embeddings |
+| Voyage | `voyage/voyage-3.5-lite` | 32K | $0.02 | - | Embeddings |
+
+#### Video Generation Models
+
+| Provider | Model | Cost Per Second | Resolutions | Features |
+| -------- | ----- | --------------- | ----------- | -------- |
+| RunwayML | `runwayml/gen4_turbo` | $0.05 | 1280x720, 720x1280 | Text + image to video |
+| RunwayML | `runwayml/gen4_aleph` | $0.15 | 1280x720, 720x1280 | Text + image to video |
+| RunwayML | `runwayml/gen3a_turbo` | $0.05 | 1280x720, 720x1280 | Text + image to video |
+
+#### Image Generation Models
+
+| Provider | Model | Cost Per Image | Resolutions | Features |
+| -------- | ----- | -------------- | ----------- | -------- |
+| RunwayML | `runwayml/gen4_image` | $0.05 | 1280x720, 1920x1080 | Text + image to image |
+| RunwayML | `runwayml/gen4_image_turbo` | $0.02 | 1280x720, 1920x1080 | Text + image to image |
+| Fal.ai | `fal_ai/fal-ai/flux-pro/v1.1` | $0.04/image | - | Image generation |
+| Fal.ai | `fal_ai/fal-ai/flux/schnell` | $0.003/image | - | Fast image generation |
+| Fal.ai | `fal_ai/fal-ai/bytedance/seedream/v3/text-to-image` | $0.03/image | - | Image generation |
+| Fal.ai | `fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image` | $0.03/image | - | Image generation |
+| Fal.ai | `fal_ai/fal-ai/ideogram/v3` | $0.06/image | - | Image generation |
+| Fal.ai | `fal_ai/fal-ai/imagen4/preview/fast` | $0.02/image | - | Fast image generation |
+| Fal.ai | `fal_ai/fal-ai/imagen4/preview/ultra` | $0.06/image | - | High-quality image generation |
+
+#### Audio Models
+
+| Provider | Model | Cost | Features |
+| -------- | ----- | ---- | -------- |
+| RunwayML | `runwayml/eleven_multilingual_v2` | $0.0003/char | Text-to-speech |
+
+#### Features
+
+- **[OpenAI](../../docs/providers/openai)**
+    - Add GPT-5.1 family support with reasoning capabilities - [PR #16598](https://github.com/BerriAI/litellm/pull/16598)
+    - Add support for `reasoning_effort='none'` for GPT-5.1 - [PR #16658](https://github.com/BerriAI/litellm/pull/16658)
+    - Add `verbosity` parameter support for GPT-5 family models - [PR #16660](https://github.com/BerriAI/litellm/pull/16660)
+    - Fix forward OpenAI organization for image generation - [PR #16607](https://github.com/BerriAI/litellm/pull/16607)
+
+- **[Gemini (Google AI Studio + Vertex AI)](../../docs/providers/gemini)**
+    - Add support for `reasoning_effort='none'` for Gemini models - [PR #16548](https://github.com/BerriAI/litellm/pull/16548)
+    - Add all Gemini image models support in image generation - [PR #16526](https://github.com/BerriAI/litellm/pull/16526)
+    - Add Gemini image edit support - [PR #16430](https://github.com/BerriAI/litellm/pull/16430)
+    - Fix preserve non-ASCII characters in function call arguments - [PR #16550](https://github.com/BerriAI/litellm/pull/16550)
+    - Fix Gemini conversation format issue with MCP auto-execution - [PR #16592](https://github.com/BerriAI/litellm/pull/16592)
+
+- **[Bedrock](../../docs/providers/bedrock)**
+    - Add support for filtering knowledge base queries - [PR #16543](https://github.com/BerriAI/litellm/pull/16543)
+    - Ensure correct `aws_region` is used when provided dynamically for embeddings - [PR #16547](https://github.com/BerriAI/litellm/pull/16547)
+    - Add support for custom KMS encryption keys in Bedrock Batch operations - [PR #16662](https://github.com/BerriAI/litellm/pull/16662)
+    - Add bearer token authentication support for AgentCore - [PR #16556](https://github.com/BerriAI/litellm/pull/16556)
+    - Fix AgentCore SSE stream iterator to async for proper streaming support - [PR #16293](https://github.com/BerriAI/litellm/pull/16293)
+
+- **[Anthropic](../../docs/providers/anthropic)**
+    - Add context management param support - [PR #16528](https://github.com/BerriAI/litellm/pull/16528)
+    - Fix preserve `$defs` for Anthropic tools input schema - [PR #16648](https://github.com/BerriAI/litellm/pull/16648)
+    - Fix support Anthropic tool_use and tool_result in token counter - [PR #16351](https://github.com/BerriAI/litellm/pull/16351)
+
+- **[Vertex AI](../../docs/providers/vertex_ai)**
+    - Add Vertex Kimi-K2-Thinking support - [PR #16671](https://github.com/BerriAI/litellm/pull/16671)
+    - Add `vertex_credentials` support to `litellm.rerank()` - [PR #16479](https://github.com/BerriAI/litellm/pull/16479)
+
+- **[Mistral](../../docs/providers/mistral)**
+    - Fix Magistral streaming to emit reasoning chunks - [PR #16434](https://github.com/BerriAI/litellm/pull/16434)
+
+- **[Moonshot (Kimi)](../../docs/providers/moonshot)**
+    - Add Kimi K2 thinking model support - [PR #16445](https://github.com/BerriAI/litellm/pull/16445)
+
+- **[SambaNova](../../docs/providers/sambanova)**
+    - Fix SambaNova API rejecting requests when message content is passed as a list format - [PR #16612](https://github.com/BerriAI/litellm/pull/16612)
+
+- **[VLLM](../../docs/providers/vllm)**
+    - Fix use vllm passthrough config for hosted vllm provider instead of raising error - [PR #16537](https://github.com/BerriAI/litellm/pull/16537)
+    - Add headers to VLLM Passthrough requests with success event logging - [PR #16532](https://github.com/BerriAI/litellm/pull/16532)
+
+- **[Azure](../../docs/providers/azure)**
+    - Fix improve Azure auth parameter handling for None values - [PR #14436](https://github.com/BerriAI/litellm/pull/14436)
+
+- **[Groq](../../docs/providers/groq)**
+    - Fix parse failed chunks for Groq - [PR #16595](https://github.com/BerriAI/litellm/pull/16595)
+
+- **[Voyage](../../docs/providers/voyage)**
+    - Add Voyage 3.5 and 3.5-lite embeddings pricing and doc update - [PR #16641](https://github.com/BerriAI/litellm/pull/16641)
+
+- **[Fal.ai](../../docs/image_generation)**
+    - Add fal-ai/flux/schnell support - [PR #16580](https://github.com/BerriAI/litellm/pull/16580)
+    - Add all Imagen4 variants of fal ai in model map - [PR #16579](https://github.com/BerriAI/litellm/pull/16579)
+
+### Bug Fixes
+
+- **General**
+    - Fix sanitize null token usage in OpenAI-compatible responses - [PR #16493](https://github.com/BerriAI/litellm/pull/16493)
+    - Fix apply provided timeout value to ClientTimeout.total - [PR #16395](https://github.com/BerriAI/litellm/pull/16395)
+    - Fix raising wrong 429 error on wrong exception - [PR #16482](https://github.com/BerriAI/litellm/pull/16482)
+    - Add new models, delete repeat models, update pricing - [PR #16491](https://github.com/BerriAI/litellm/pull/16491)
+    - Update model logging format for custom LLM provider - [PR #16485](https://github.com/BerriAI/litellm/pull/16485)
+
+---
+
+## LLM API Endpoints
+
+#### New Endpoints
+
+- **[GET /providers](../../docs/proxy/management_endpoints)**
+    - Add GET list of providers endpoint - [PR #16432](https://github.com/BerriAI/litellm/pull/16432)
+
+#### Features
+
+- **[Video Generation API](../../docs/video_generation)**
+    - Allow internal users to access video generation routes - [PR #16472](https://github.com/BerriAI/litellm/pull/16472)
+
+- **[Vector Stores API](../../docs/vector_stores)**
+    - Vector store files stable release with complete CRUD operations - [PR #16643](https://github.com/BerriAI/litellm/pull/16643)
+      - `POST /v1/vector_stores/{vector_store_id}/files` - Create vector store file
+      - `GET /v1/vector_stores/{vector_store_id}/files` - List vector store files
+      - `GET /v1/vector_stores/{vector_store_id}/files/{file_id}` - Retrieve vector store file
+      - `GET /v1/vector_stores/{vector_store_id}/files/{file_id}/content` - Retrieve file content
+      - `DELETE /v1/vector_stores/{vector_store_id}/files/{file_id}` - Delete vector store file
+      - `DELETE /v1/vector_stores/{vector_store_id}` - Delete vector store
+    - Ensure users can access `search_results` for both stream + non-stream response - [PR #16459](https://github.com/BerriAI/litellm/pull/16459)
+
+#### Bugs
+
+- **[Video Generation API](../../docs/video_generation)**
+    - Fix use GET for `/v1/videos/{video_id}/content` - [PR #16672](https://github.com/BerriAI/litellm/pull/16672)
+
+- **General**
+    - Fix remove generic exception handling - [PR #16599](https://github.com/BerriAI/litellm/pull/16599)
+
+---
+
+## Management Endpoints / UI
+
+#### Features
+
+- **Proxy CLI Auth**
+    - Fix remove strict master_key check in add_deployment - [PR #16453](https://github.com/BerriAI/litellm/pull/16453)
+
+- **Virtual Keys**
+    - UI - Add Tags To Edit Key Flow - [PR #16500](https://github.com/BerriAI/litellm/pull/16500)
+    - UI - Test Key Page show models based on selected endpoint - [PR #16452](https://github.com/BerriAI/litellm/pull/16452)
+    - UI - Expose user_alias in view and update path - [PR #16669](https://github.com/BerriAI/litellm/pull/16669)
+
+- **Models + Endpoints**
+    - UI - Add LiteLLM Params to Edit Model - [PR #16496](https://github.com/BerriAI/litellm/pull/16496)
+    - UI - Add Model use backend data - [PR #16664](https://github.com/BerriAI/litellm/pull/16664)
+    - UI - Remove Description Field from LLM Credentials - [PR #16608](https://github.com/BerriAI/litellm/pull/16608)
+    - UI - Add RunwayML on Admin UI supported models/providers - [PR #16606](https://github.com/BerriAI/litellm/pull/16606)
+    - Infra - Migrate Add Model Fields to Backend - [PR #16620](https://github.com/BerriAI/litellm/pull/16620)
+    - Add API Endpoint for creating model access group - [PR #16663](https://github.com/BerriAI/litellm/pull/16663)
+
+- **Teams**
+    - UI - Invite User Searchable Team Select - [PR #16454](https://github.com/BerriAI/litellm/pull/16454)
+    - Fix use user budget instead of key budget when creating new team - [PR #16074](https://github.com/BerriAI/litellm/pull/16074)
+
+- **Budgets**
+    - UI - Move Budgets out of Experimental - [PR #16544](https://github.com/BerriAI/litellm/pull/16544)
+
+- **Guardrails**
+    - UI - Config Guardrails should not be deletable from table - [PR #16540](https://github.com/BerriAI/litellm/pull/16540)
+    - Fix remove enterprise restriction from guardrails list endpoint - [PR #15333](https://github.com/BerriAI/litellm/pull/15333)
+
+- **Callbacks**
+    - UI - New Callbacks table - [PR #16512](https://github.com/BerriAI/litellm/pull/16512)
+    - Fix delete callbacks failing - [PR #16473](https://github.com/BerriAI/litellm/pull/16473)
+
+- **Usage & Analytics**
+    - UI - Improve Usage Indicator - [PR #16504](https://github.com/BerriAI/litellm/pull/16504)
+    - UI - Model Info Page Health Check - [PR #16416](https://github.com/BerriAI/litellm/pull/16416)
+    - Infra - Show Deprecation Warning for Model Analytics Tab - [PR #16417](https://github.com/BerriAI/litellm/pull/16417)
+    - Fix Litellm tags usage add request_id - [PR #16111](https://github.com/BerriAI/litellm/pull/16111)
+
+- **Health Check**
+    - Add Langfuse OTEL and SQS to Health Check - [PR #16514](https://github.com/BerriAI/litellm/pull/16514)
+
+- **General UI**
+    - UI - Normalize table action columns appearance - [PR #16657](https://github.com/BerriAI/litellm/pull/16657)
+    - UI - Button Styles and Sizing in Settings Pages - [PR #16600](https://github.com/BerriAI/litellm/pull/16600)
+    - UI - SSO Modal Cosmetic Changes - [PR #16554](https://github.com/BerriAI/litellm/pull/16554)
+    - Fix UI logos loading with SERVER_ROOT_PATH - [PR #16618](https://github.com/BerriAI/litellm/pull/16618)
+    - Fix remove misleading 'Custom' option mention from OpenAI endpoint tooltips - [PR #16622](https://github.com/BerriAI/litellm/pull/16622)
+
+#### Bugs
+
+- **Management Endpoints**
+    - Fix inconsistent error responses in customer management endpoints - [PR #16450](https://github.com/BerriAI/litellm/pull/16450)
+    - Fix correct date range filtering in /spend/logs endpoint - [PR #16443](https://github.com/BerriAI/litellm/pull/16443)
+    - Fix /spend/logs/ui Access Control - [PR #16446](https://github.com/BerriAI/litellm/pull/16446)
+    - Add pagination for /spend/logs/session/ui endpoint - [PR #16603](https://github.com/BerriAI/litellm/pull/16603)
+    - Fix LiteLLM Usage shows key_hash - [PR #16471](https://github.com/BerriAI/litellm/pull/16471)
+    - Fix app_roles missing from jwt payload - [PR #16448](https://github.com/BerriAI/litellm/pull/16448)
+
+---
+
+## Logging / Guardrail / Prompt Management Integrations
+
+
+#### New Integration
+
+- **🆕 [Zscaler AI Guard](../../docs/proxy/guardrails/zscaler_ai_guard)**
+    - Add Zscaler AI Guard hook for security policy enforcement - [PR #15691](https://github.com/BerriAI/litellm/pull/15691)
+
+#### Logging
+
+- **[Langfuse](../../docs/proxy/logging#langfuse)**
+    - Fix handle null usage values to prevent validation errors - [PR #16396](https://github.com/BerriAI/litellm/pull/16396)
+
+- **[CloudZero](../../docs/proxy/logging)**
+    - Fix updated spend would not be sent to CloudZero - [PR #16201](https://github.com/BerriAI/litellm/pull/16201)
+
+#### Guardrails
+
+- **[IBM Detector](../../docs/proxy/guardrails)**
+    - Ensure detector-id is passed as header to IBM detector server - [PR #16649](https://github.com/BerriAI/litellm/pull/16649)
+
+#### Prompt Management
+
+- **[Custom Prompt Management](../../docs/proxy/prompt_management)**
+    - Add SDK focused examples for custom prompt management - [PR #16441](https://github.com/BerriAI/litellm/pull/16441)
+
+---
+
+## Spend Tracking, Budgets and Rate Limiting
+
+- **End User Budgets**
+    - Allow pointing max_end_user budget to an id, so the default ID applies to all end users - [PR #16456](https://github.com/BerriAI/litellm/pull/16456)
+
+---
+
+## MCP Gateway
+
+- **Configuration**
+    - Add dynamic OAuth2 metadata discovery for MCP servers - [PR #16676](https://github.com/BerriAI/litellm/pull/16676)
+    - Fix allow tool call even when server name prefix is missing - [PR #16425](https://github.com/BerriAI/litellm/pull/16425)
+    - Fix exclude unauthorized MCP servers from allowed server list - [PR #16551](https://github.com/BerriAI/litellm/pull/16551)
+    - Fix unable to delete MCP server from permission settings - [PR #16407](https://github.com/BerriAI/litellm/pull/16407)
+    - Fix avoid crashing when MCP server record lacks credentials - [PR #16601](https://github.com/BerriAI/litellm/pull/16601)
+
+---
+
+## Agents
+
+- **[Agent Registration (A2A Spec)](../../docs/agents)**
+    - Support agent registration + discovery following Agent-to-Agent specification - [PR #16615](https://github.com/BerriAI/litellm/pull/16615)
+
+---
+
+## Performance / Loadbalancing / Reliability improvements
+
+- **Embeddings Performance**
+    - Use router's O(1) lookup and shared sessions for embeddings - [PR #16344](https://github.com/BerriAI/litellm/pull/16344)
+
+- **Router Reliability**
+    - Support default fallbacks for unknown models - [PR #16419](https://github.com/BerriAI/litellm/pull/16419)
+
+- **Callback Management**
+    - Add atexit handlers to flush callbacks for async completions - [PR #16487](https://github.com/BerriAI/litellm/pull/16487)
+
+---
+
+## General Proxy Improvements
+
+- **Configuration Management**
+    - Fix update model_cost_map_url to use environment variable - [PR #16429](https://github.com/BerriAI/litellm/pull/16429)
+
+---
+
+## Documentation Updates
+
+- **Provider Documentation**
+    - Fix streaming example in README - [PR #16461](https://github.com/BerriAI/litellm/pull/16461)
+    - Update broken Slack invite links to support page - [PR #16546](https://github.com/BerriAI/litellm/pull/16546)
+    - Fix code block indentation for fallbacks page - [PR #16542](https://github.com/BerriAI/litellm/pull/16542)
+    - Documentation code example corrections - [PR #16502](https://github.com/BerriAI/litellm/pull/16502)
+    - Document `reasoning_effort` summary field options - [PR #16549](https://github.com/BerriAI/litellm/pull/16549)
+
+- **API Documentation**
+    - Add docs on APIs for model access management - [PR #16673](https://github.com/BerriAI/litellm/pull/16673)
+    - Add docs for showing how to auto reload new pricing data - [PR #16675](https://github.com/BerriAI/litellm/pull/16675)
+    - LiteLLM Quick start - show how model resolution works - [PR #16602](https://github.com/BerriAI/litellm/pull/16602)
+    - Add docs for tracking callback failure - [PR #16474](https://github.com/BerriAI/litellm/pull/16474)
+
+- **General Documentation**
+    - Fix container api link in release page - [PR #16440](https://github.com/BerriAI/litellm/pull/16440)
+    - Add softgen to projects that are using litellm - [PR #16423](https://github.com/BerriAI/litellm/pull/16423)
+
+---
+
+## New Contributors
+
+* @artplan1 made their first contribution in [PR #16423](https://github.com/BerriAI/litellm/pull/16423)
+* @JehandadK made their first contribution in [PR #16472](https://github.com/BerriAI/litellm/pull/16472)
+* @vmiscenko made their first contribution in [PR #16453](https://github.com/BerriAI/litellm/pull/16453)
+* @mcowger made their first contribution in [PR #16429](https://github.com/BerriAI/litellm/pull/16429)
+* @yellowsubmarine372 made their first contribution in [PR #16395](https://github.com/BerriAI/litellm/pull/16395)
+* @Hebruwu made their first contribution in [PR #16201](https://github.com/BerriAI/litellm/pull/16201)
+* @jwang-gif made their first contribution in [PR #15691](https://github.com/BerriAI/litellm/pull/15691)
+* @AnthonyMonaco made their first contribution in [PR #16502](https://github.com/BerriAI/litellm/pull/16502)
+* @andrewm4894 made their first contribution in [PR #16487](https://github.com/BerriAI/litellm/pull/16487)
+* @f14-bertolotti made their first contribution in [PR #16485](https://github.com/BerriAI/litellm/pull/16485)
+* @busla made their first contribution in [PR #16293](https://github.com/BerriAI/litellm/pull/16293)
+* @MightyGoldenOctopus made their first contribution in [PR #16537](https://github.com/BerriAI/litellm/pull/16537)
+* @ultmaster made their first contribution in [PR #14436](https://github.com/BerriAI/litellm/pull/14436)
+* @bchrobot made their first contribution in [PR #16542](https://github.com/BerriAI/litellm/pull/16542)
+* @sep-grindr made their first contribution in [PR #16622](https://github.com/BerriAI/litellm/pull/16622)
+* @pnookala-godaddy made their first contribution in [PR #16607](https://github.com/BerriAI/litellm/pull/16607)
+* @dtunikov made their first contribution in [PR #16592](https://github.com/BerriAI/litellm/pull/16592)
+* @lukapecnik made their first contribution in [PR #16648](https://github.com/BerriAI/litellm/pull/16648)
+* @jyeros made their first contribution in [PR #16618](https://github.com/BerriAI/litellm/pull/16618)
+
+---
+
+## Full Changelog
+
+**[View complete changelog on GitHub](https://github.com/BerriAI/litellm/compare/v1.79.3.rc.1...v1.80.0.rc.1)**
+
+---
@@ -368,6 +368,7 @@ const sidebars = {
          ]
        },
        "videos",
+        "vector_store_files",
        {
          type: "category",
          label: "/mcp - Model Context Protocol",
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm-enterprise"
-version = "0.1.20"
+version = "0.1.21"
 description = "Package for LiteLLM Enterprise features"
 authors = ["BerriAI"]
 readme = "README.md"
@@ -22,7 +22,7 @@ requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "0.1.20"
+version = "0.1.21"
 version_files = [
    "pyproject.toml:version",
    "../requirements.txt:litellm-enterprise==",
@@ -0,0 +1,17 @@
+-- CreateTable
+CREATE TABLE "LiteLLM_AgentsTable" (
+    "agent_id" TEXT NOT NULL,
+    "agent_name" TEXT NOT NULL,
+    "litellm_params" JSONB,
+    "agent_card_params" JSONB NOT NULL,
+    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "created_by" TEXT NOT NULL,
+    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updated_by" TEXT NOT NULL,
+
+    CONSTRAINT "LiteLLM_AgentsTable_pkey" PRIMARY KEY ("agent_id")
+);
+
+-- CreateIndex
+CREATE UNIQUE INDEX "LiteLLM_AgentsTable_agent_name_key" ON "LiteLLM_AgentsTable"("agent_name");
+
@@ -54,6 +54,19 @@ model LiteLLM_ProxyModelTable {
  updated_by String
 }

+
+// Agents on proxy
+model LiteLLM_AgentsTable {
+  agent_id String @id @default(uuid())
+  agent_name String @unique
+  litellm_params Json?
+  agent_card_params Json
+  created_at    DateTime               @default(now()) @map("created_at")
+  created_by String
+  updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+  updated_by String
+}
+
 model LiteLLM_OrganizationTable {
 		organization_id String @id @default(uuid())
    organization_alias  String
@@ -639,4 +652,4 @@ model LiteLLM_CacheConfig {
  cache_settings Json
  created_at DateTime @default(now())
  updated_at DateTime @updatedAt
-}
+}
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm-proxy-extras"
-version = "0.4.4"
+version = "0.4.5"
 description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
 authors = ["BerriAI"]
 readme = "README.md"
@@ -22,7 +22,7 @@ requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "0.4.4"
+version = "0.4.5"
 version_files = [
    "pyproject.toml:version",
    "../requirements.txt:litellm-proxy-extras==",
@@ -1386,6 +1386,20 @@ from .search.main import *
 from .realtime_api.main import _arealtime
 from .fine_tuning.main import *
 from .files.main import *
+from .vector_store_files.main import (
+    acreate as avector_store_file_create,
+    adelete as avector_store_file_delete,
+    alist as avector_store_file_list,
+    aretrieve as avector_store_file_retrieve,
+    aretrieve_content as avector_store_file_content,
+    aupdate as avector_store_file_update,
+    create as vector_store_file_create,
+    delete as vector_store_file_delete,
+    list as vector_store_file_list,
+    retrieve as vector_store_file_retrieve,
+    retrieve_content as vector_store_file_content,
+    update as vector_store_file_update,
+)
 from .scheduler import *
 from .cost_calculator import response_cost_calculator, cost_per_token

@@ -476,6 +476,7 @@ DEFAULT_CHAT_COMPLETION_PARAM_VALUES = {
    "additional_drop_params": None,
    "messages": None,
    "reasoning_effort": None,
+    "verbosity": None,
    "thinking": None,
    "web_search_options": None,
    "service_tier": None,
@@ -41,21 +41,9 @@ class PrometheusLogger(CustomLogger):
        try:
            from prometheus_client import Counter, Gauge, Histogram

-            from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
-
            # Always initialize label_filters, even for non-premium users
            self.label_filters = self._parse_prometheus_config()

-            if premium_user is not True:
-                verbose_logger.warning(
-                    f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise\n🚨 {CommonProxyErrors.not_premium_user.value}"
-                )
-                self.litellm_not_a_premium_user_metric = Counter(
-                    name="litellm_not_a_premium_user_metric",
-                    documentation=f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise. 🚨 {CommonProxyErrors.not_premium_user.value}",
-                )
-                return
-
            # Create metric factory functions
            self._counter_factory = self._create_metric_factory(Counter)
            self._gauge_factory = self._create_metric_factory(Gauge)
@@ -2184,9 +2172,6 @@ class PrometheusLogger(CustomLogger):

        It emits the current remaining budget metrics for all Keys and Teams.
        """
-        from enterprise.litellm_enterprise.integrations.prometheus import (
-            PrometheusLogger,
-        )
        from litellm.constants import PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
        from litellm.integrations.custom_logger import CustomLogger

@@ -2213,26 +2198,19 @@ class PrometheusLogger(CustomLogger):
            )

    @staticmethod
-    def _mount_metrics_endpoint(premium_user: bool):
+    def _mount_metrics_endpoint():
        """
        Mount the Prometheus metrics endpoint with optional authentication.

        Args:
-            premium_user (bool): Whether the user is a premium user
            require_auth (bool, optional): Whether to require authentication for the metrics endpoint.
                                        Defaults to False.
        """
        from prometheus_client import make_asgi_app

        from litellm._logging import verbose_proxy_logger
-        from litellm.proxy._types import CommonProxyErrors
        from litellm.proxy.proxy_server import app

-        if premium_user is not True:
-            verbose_proxy_logger.warning(
-                f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}"
-            )
-
        # Create metrics ASGI app
        if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
            from prometheus_client import CollectorRegistry, multiprocess
@@ -16,14 +16,16 @@ from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheCont
 from litellm.integrations.argilla import ArgillaLogger
 from litellm.integrations.azure_storage.azure_storage import AzureBlobStorageLogger
 from litellm.integrations.bitbucket import BitBucketPromptManager
-from litellm.integrations.gitlab import GitLabPromptManager
 from litellm.integrations.braintrust_logging import BraintrustLogger
+from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
 from litellm.integrations.datadog.datadog import DataDogLogger
 from litellm.integrations.datadog.datadog_llm_obs import DataDogLLMObsLogger
 from litellm.integrations.deepeval import DeepEvalLogger
+from litellm.integrations.dotprompt import DotpromptManager
 from litellm.integrations.galileo import GalileoObserve
 from litellm.integrations.gcs_bucket.gcs_bucket import GCSBucketLogger
 from litellm.integrations.gcs_pubsub.pub_sub import GcsPubSubLogger
+from litellm.integrations.gitlab import GitLabPromptManager
 from litellm.integrations.humanloop import HumanloopLogger
 from litellm.integrations.lago import LagoLogger
 from litellm.integrations.langfuse.langfuse_prompt_management import (
@@ -36,13 +38,7 @@ from litellm.integrations.openmeter import OpenMeterLogger
 from litellm.integrations.opentelemetry import OpenTelemetry
 from litellm.integrations.opik.opik import OpikLogger
 from litellm.integrations.posthog import PostHogLogger
-
-try:
-    from litellm_enterprise.integrations.prometheus import PrometheusLogger
-except Exception:
-    PrometheusLogger = None
-from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
-from litellm.integrations.dotprompt import DotpromptManager
+from litellm.integrations.prometheus import PrometheusLogger
 from litellm.integrations.s3_v2 import S3Logger
 from litellm.integrations.sqs import SQSLogger
 from litellm.integrations.vector_store_integrations.vector_store_pre_call_hook import (
@@ -58,6 +58,7 @@ from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.deepeval.deepeval import DeepEvalLogger
 from litellm.integrations.mlflow import MlflowLogger
+from litellm.integrations.prometheus import PrometheusLogger
 from litellm.integrations.sqs import SQSLogger
 from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
 from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
@@ -176,7 +177,6 @@ try:
    from litellm_enterprise.enterprise_callbacks.send_emails.smtp_email import (
        SMTPEmailLogger,
    )
-    from litellm_enterprise.integrations.prometheus import PrometheusLogger
    from litellm_enterprise.litellm_core_utils.litellm_logging import (
        StandardLoggingPayloadSetup as EnterpriseStandardLoggingPayloadSetup,
    )
@@ -194,7 +194,6 @@ except Exception as e:
    PagerDutyAlerting = CustomLogger  # type: ignore
    EnterpriseCallbackControls = None  # type: ignore
    EnterpriseStandardLoggingPayloadSetupVAR = None
-    PrometheusLogger = None
 _in_memory_loggers: List[Any] = []

 ### GLOBAL VARIABLES ###
@@ -1475,33 +1474,58 @@ class Logging(LiteLLMLoggingBaseClass):
                if self.model_call_details["litellm_params"]["metadata"] is None:
                    self.model_call_details["litellm_params"]["metadata"] = {}
                self.model_call_details["litellm_params"]["metadata"]["hidden_params"] = getattr(logging_result, "_hidden_params", {})  # type: ignore
-        
+
        if "response_cost" in hidden_params:
            self.model_call_details["response_cost"] = hidden_params["response_cost"]
        else:
-            self.model_call_details["response_cost"] = self._response_cost_calculator(result=logging_result)
-        
-        self.model_call_details["standard_logging_object"] = get_standard_logging_object_payload(
-            kwargs=self.model_call_details,
-            init_response_obj=logging_result,
-            start_time=start_time,
-            end_time=end_time,
-            logging_obj=self,
-            status="success",
-            standard_built_in_tools_params=self.standard_built_in_tools_params,
+            self.model_call_details["response_cost"] = self._response_cost_calculator(
+                result=logging_result
+            )
+
+        self.model_call_details["standard_logging_object"] = (
+            get_standard_logging_object_payload(
+                kwargs=self.model_call_details,
+                init_response_obj=logging_result,
+                start_time=start_time,
+                end_time=end_time,
+                logging_obj=self,
+                status="success",
+                standard_built_in_tools_params=self.standard_built_in_tools_params,
+            )
        )

    def _transform_usage_objects(self, result):
        if isinstance(result, ResponsesAPIResponse):
            result = result.model_copy()
-            transformed_usage = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(result.usage)
-            setattr(result, "usage", transformed_usage.model_dump() if hasattr(transformed_usage, "model_dump") else dict(transformed_usage))
-            if (standard_logging_payload := self.model_call_details.get("standard_logging_object")) is not None:
-                standard_logging_payload["response"] = result.model_dump() if hasattr(result, "model_dump") else dict(result)
+            transformed_usage = (
+                ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+                    result.usage
+                )
+            )
+            setattr(
+                result,
+                "usage",
+                (
+                    transformed_usage.model_dump()
+                    if hasattr(transformed_usage, "model_dump")
+                    else dict(transformed_usage)
+                ),
+            )
+            if (
+                standard_logging_payload := self.model_call_details.get(
+                    "standard_logging_object"
+                )
+            ) is not None:
+                standard_logging_payload["response"] = (
+                    result.model_dump()
+                    if hasattr(result, "model_dump")
+                    else dict(result)
+                )
        elif isinstance(result, TranscriptionResponse):
            from litellm.litellm_core_utils.llm_cost_calc.usage_object_transformation import (
                TranscriptionUsageObjectTransformation,
            )
+
            result = result.model_copy()
            transformed_usage = TranscriptionUsageObjectTransformation.transform_transcription_usage_object(result.usage)  # type: ignore
            setattr(result, "usage", transformed_usage)
@@ -1522,40 +1546,67 @@ class Logging(LiteLLMLoggingBaseClass):
                end_time = datetime.datetime.now()
            if self.completion_start_time is None:
                self.completion_start_time = end_time
-                self.model_call_details["completion_start_time"] = self.completion_start_time
-            
+                self.model_call_details["completion_start_time"] = (
+                    self.completion_start_time
+                )
+
            self.model_call_details["log_event_type"] = "successful_api_call"
            self.model_call_details["end_time"] = end_time
            self.model_call_details["cache_hit"] = cache_hit
-            
+
            if self.call_type == CallTypes.anthropic_messages.value:
                result = self._handle_anthropic_messages_response_logging(result=result)
-            elif self.call_type == CallTypes.generate_content.value or self.call_type == CallTypes.agenerate_content.value:
-                result = self._handle_non_streaming_google_genai_generate_content_response_logging(result=result)
-            
+            elif (
+                self.call_type == CallTypes.generate_content.value
+                or self.call_type == CallTypes.agenerate_content.value
+            ):
+                result = self._handle_non_streaming_google_genai_generate_content_response_logging(
+                    result=result
+                )
+
            logging_result = self.normalize_logging_result(result=result)

-            if standard_logging_object is None and result is not None and self.stream is not True:
-                if self._is_recognized_call_type_for_logging(logging_result=logging_result):
-                    self._process_hidden_params_and_response_cost(logging_result=logging_result, start_time=start_time, end_time=end_time)
-                elif isinstance(result, dict) or isinstance(result, list):
-                    self.model_call_details["standard_logging_object"] = get_standard_logging_object_payload(
-                        kwargs=self.model_call_details,
-                        init_response_obj=result,
+            if (
+                standard_logging_object is None
+                and result is not None
+                and self.stream is not True
+            ):
+                if self._is_recognized_call_type_for_logging(
+                    logging_result=logging_result
+                ):
+                    self._process_hidden_params_and_response_cost(
+                        logging_result=logging_result,
                        start_time=start_time,
                        end_time=end_time,
-                        logging_obj=self,
-                        status="success",
-                        standard_built_in_tools_params=self.standard_built_in_tools_params,
+                    )
+                elif isinstance(result, dict) or isinstance(result, list):
+                    self.model_call_details["standard_logging_object"] = (
+                        get_standard_logging_object_payload(
+                            kwargs=self.model_call_details,
+                            init_response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            logging_obj=self,
+                            status="success",
+                            standard_built_in_tools_params=self.standard_built_in_tools_params,
+                        )
                    )
            elif standard_logging_object is not None:
-                self.model_call_details["standard_logging_object"] = standard_logging_object
+                self.model_call_details["standard_logging_object"] = (
+                    standard_logging_object
+                )
            else:
                self.model_call_details["response_cost"] = None

            result = self._transform_usage_objects(result=result)
-            
-            if litellm.max_budget and self.stream is False and result is not None and isinstance(result, dict) and "content" in result:
+
+            if (
+                litellm.max_budget
+                and self.stream is False
+                and result is not None
+                and isinstance(result, dict)
+                and "content" in result
+            ):
                time_diff = (end_time - start_time).total_seconds()
                float_diff = float(time_diff)
                litellm._current_cost += litellm.completion_cost(
@@ -3340,8 +3391,6 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
            _in_memory_loggers.append(_literalai_logger)
            return _literalai_logger  # type: ignore
        elif logging_integration == "prometheus":
-            if PrometheusLogger is None:
-                raise ValueError("PrometheusLogger is not initialized")
            for callback in _in_memory_loggers:
                if isinstance(callback, PrometheusLogger):
                    return callback  # type: ignore
@@ -129,7 +129,7 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
            "parallel_tool_calls",
            "response_format",
            "user",
-            "web_search_options",
+            "web_search_options"
        ]

        if "claude-3-7-sonnet" in model or supports_reasoning(
@@ -646,6 +646,16 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
            )
        return tools

+    def _ensure_context_management_beta_header(self, headers: dict) -> None:
+        beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
+        existing_beta = headers.get("anthropic-beta")
+        if existing_beta is None:
+            headers["anthropic-beta"] = beta_value
+            return
+        existing_values = [beta.strip() for beta in existing_beta.split(",")]
+        if beta_value not in existing_values:
+            headers["anthropic-beta"] = f"{existing_beta}, {beta_value}"
+
    def update_headers_with_optional_anthropic_beta(
        self, headers: dict, optional_params: dict
    ) -> dict:
@@ -661,9 +671,11 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
            elif tool.get("type", None) and tool.get("type").startswith(
                ANTHROPIC_HOSTED_TOOLS.MEMORY.value
            ):
-                headers["anthropic-beta"] = (
-                    ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
-                )
+                headers[
+                    "anthropic-beta"
+                ] = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
+        if optional_params.get("context_management") is not None:
+            self._ensure_context_management_beta_header(headers)
        return headers

    def transform_request(
@@ -973,13 +985,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
            ):
                text_content = prefix_prompt + text_content

+            context_management: Optional[Dict] = completion_response.get(
+                "context_management"
+            )
+
+            provider_specific_fields: Dict[str, Any] = {
+                "citations": citations,
+                "thinking_blocks": thinking_blocks,
+            }
+            if context_management is not None:
+                provider_specific_fields["context_management"] = context_management
+
            _message = litellm.Message(
                tool_calls=tool_calls,
                content=text_content or None,
-                provider_specific_fields={
-                    "citations": citations,
-                    "thinking_blocks": thinking_blocks,
-                },
+                provider_specific_fields=provider_specific_fields,
                thinking_blocks=thinking_blocks,
                reasoning_content=reasoning_content,
            )
@@ -1012,6 +1032,16 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
        model_response.created = int(time.time())
        model_response.model = completion_response["model"]

+        context_management_response = completion_response.get("context_management")
+        if context_management_response is not None:
+            _hidden_params["context_management"] = context_management_response
+            try:
+                model_response.__dict__["context_management"] = (
+                    context_management_response
+                )
+            except Exception:
+                pass
+
        model_response._hidden_params = _hidden_params

        return model_response
@@ -6,7 +6,10 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
 from litellm.llms.base_llm.anthropic_messages.transformation import (
    BaseAnthropicMessagesConfig,
 )
-from litellm.types.llms.anthropic import AnthropicMessagesRequest
+from litellm.types.llms.anthropic import (
+    ANTHROPIC_BETA_HEADER_VALUES,
+    AnthropicMessagesRequest,
+)
 from litellm.types.llms.anthropic_messages.anthropic_response import (
    AnthropicMessagesResponse,
 )
@@ -32,6 +35,7 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
            "tools",
            "tool_choice",
            "thinking",
+            "context_management",
            # TODO: Add Anthropic `metadata` support
            # "metadata",
        ]
@@ -71,6 +75,11 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
        if "content-type" not in headers:
            headers["content-type"] = "application/json"

+        headers = self._update_headers_with_optional_anthropic_beta(
+            headers=headers,
+            context_management=optional_params.get("context_management"),
+        )
+
        return headers, api_base

    def transform_anthropic_messages_request(
@@ -142,3 +151,18 @@ class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
            request_body=request_body,
            litellm_logging_obj=litellm_logging_obj,
        )
+
+    @staticmethod
+    def _update_headers_with_optional_anthropic_beta(
+        headers: dict, context_management: Optional[Dict]
+    ) -> dict:
+        if context_management is None:
+            return headers
+
+        existing_beta = headers.get("anthropic-beta")
+        beta_value = ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
+        if existing_beta is None:
+            headers["anthropic-beta"] = beta_value
+        elif beta_value not in [beta.strip() for beta in existing_beta.split(",")]:
+            headers["anthropic-beta"] = f"{existing_beta}, {beta_value}"
+        return headers
@@ -0,0 +1,226 @@
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+
+import httpx
+
+from litellm.types.router import GenericLiteLLMParams
+from litellm.types.vector_store_files import (
+    VectorStoreFileAuthCredentials,
+    VectorStoreFileChunkingStrategy,
+    VectorStoreFileContentResponse,
+    VectorStoreFileCreateRequest,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileListQueryParams,
+    VectorStoreFileListResponse,
+    VectorStoreFileObject,
+    VectorStoreFileUpdateRequest,
+)
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    from ..chat.transformation import BaseLLMException as _BaseLLMException
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+    BaseLLMException = _BaseLLMException
+else:
+    LiteLLMLoggingObj = Any
+    BaseLLMException = Any
+
+
+class BaseVectorStoreFilesConfig(ABC):
+    """Base configuration contract for provider-specific vector store file implementations."""
+
+    def get_supported_openai_params(
+        self,
+        operation: str,
+    ) -> Tuple[str, ...]:
+        """Return the set of OpenAI params supported for the given operation."""
+
+        return tuple()
+
+    def map_openai_params(
+        self,
+        *,
+        operation: str,
+        non_default_params: Dict[str, Any],
+        optional_params: Dict[str, Any],
+        drop_params: bool,
+    ) -> Dict[str, Any]:
+        """Map non-default OpenAI params to provider-specific params."""
+
+        return optional_params
+
+    @abstractmethod
+    def get_auth_credentials(
+        self, litellm_params: Dict[str, Any]
+    ) -> VectorStoreFileAuthCredentials:
+        ...
+
+    @abstractmethod
+    def get_vector_store_file_endpoints_by_type(self) -> Dict[
+        str, Tuple[Tuple[str, str], ...]
+    ]:
+        ...
+
+    @abstractmethod
+    def validate_environment(
+        self,
+        *,
+        headers: Dict[str, str],
+        litellm_params: Optional[GenericLiteLLMParams],
+    ) -> Dict[str, str]:
+        return {}
+
+    @abstractmethod
+    def get_complete_url(
+        self,
+        *,
+        api_base: Optional[str],
+        vector_store_id: str,
+        litellm_params: Dict[str, Any],
+    ) -> str:
+        if api_base is None:
+            raise ValueError("api_base is required")
+        return api_base
+
+    @abstractmethod
+    def transform_create_vector_store_file_request(
+        self,
+        *,
+        vector_store_id: str,
+        create_request: VectorStoreFileCreateRequest,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        ...
+
+    @abstractmethod
+    def transform_create_vector_store_file_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileObject:
+        ...
+
+    @abstractmethod
+    def transform_list_vector_store_files_request(
+        self,
+        *,
+        vector_store_id: str,
+        query_params: VectorStoreFileListQueryParams,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        ...
+
+    @abstractmethod
+    def transform_list_vector_store_files_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileListResponse:
+        ...
+
+    @abstractmethod
+    def transform_retrieve_vector_store_file_request(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        ...
+
+    @abstractmethod
+    def transform_retrieve_vector_store_file_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileObject:
+        ...
+
+    @abstractmethod
+    def transform_retrieve_vector_store_file_content_request(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        ...
+
+    @abstractmethod
+    def transform_retrieve_vector_store_file_content_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileContentResponse:
+        ...
+
+    @abstractmethod
+    def transform_update_vector_store_file_request(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        update_request: VectorStoreFileUpdateRequest,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        ...
+
+    @abstractmethod
+    def transform_update_vector_store_file_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileObject:
+        ...
+
+    @abstractmethod
+    def transform_delete_vector_store_file_request(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        ...
+
+    @abstractmethod
+    def transform_delete_vector_store_file_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileDeleteResponse:
+        ...
+
+    def get_error_class(
+        self,
+        *,
+        error_message: str,
+        status_code: int,
+        headers: Union[Dict[str, Any], httpx.Headers],
+    ) -> BaseLLMException:
+        from ..chat.transformation import BaseLLMException
+
+        raise BaseLLMException(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
+
+    def sign_request(
+        self,
+        *,
+        headers: Dict[str, str],
+        optional_params: Dict[str, Any],
+        request_data: Dict[str, Any],
+        api_base: str,
+        api_key: Optional[str] = None,
+    ) -> Tuple[Dict[str, str], Optional[bytes]]:
+        return headers, None
+
+    def prepare_chunking_strategy(
+        self,
+        chunking_strategy: Optional[VectorStoreFileChunkingStrategy],
+    ) -> Optional[VectorStoreFileChunkingStrategy]:
+        return chunking_strategy
@@ -48,6 +48,9 @@ from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfi
 from litellm.llms.base_llm.search.transformation import BaseSearchConfig, SearchResponse
 from litellm.llms.base_llm.text_to_speech.transformation import BaseTextToSpeechConfig
 from litellm.llms.base_llm.vector_store.transformation import BaseVectorStoreConfig
+from litellm.llms.base_llm.vector_store_files.transformation import (
+    BaseVectorStoreFilesConfig,
+)
 from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
@@ -92,6 +95,15 @@ from litellm.types.vector_stores import (
    VectorStoreSearchOptionalRequestParams,
    VectorStoreSearchResponse,
 )
+from litellm.types.vector_store_files import (
+    VectorStoreFileContentResponse,
+    VectorStoreFileCreateRequest,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileListQueryParams,
+    VectorStoreFileListResponse,
+    VectorStoreFileObject,
+    VectorStoreFileUpdateRequest,
+)
 from litellm.types.videos.main import VideoObject
 from litellm.utils import (
    CustomStreamWrapper,
@@ -3529,6 +3541,7 @@ class BaseLLMHTTPHandler:
            BaseImageEditConfig,
            BaseImageGenerationConfig,
            BaseVectorStoreConfig,
+            BaseVectorStoreFilesConfig,
            BaseGoogleGenAIGenerateContentConfig,
            BaseAnthropicMessagesConfig,
            BaseBatchesConfig,
@@ -6000,6 +6013,909 @@ class BaseLLMHTTPHandler:
            response=response,
        )

+    #####################################################################
+    ################ Vector Store Files HANDLERS ########################
+    #####################################################################
+    async def async_vector_store_file_create_handler(
+        self,
+        *,
+        vector_store_id: str,
+        create_request: VectorStoreFileCreateRequest,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> VectorStoreFileObject:
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        request_dict = dict(create_request)
+        if extra_body:
+            request_dict.update(extra_body)
+
+        (
+            url,
+            request_body,
+        ) = vector_store_files_provider_config.transform_create_vector_store_file_request(
+            vector_store_id=vector_store_id,
+            create_request=cast(VectorStoreFileCreateRequest, request_dict),
+            api_base=api_base,
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_body,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = await async_httpx_client.post(
+                url=url, headers=headers, json=request_body, timeout=timeout
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_create_vector_store_file_response(
+            response=response
+        )
+
+    def vector_store_file_create_handler(
+        self,
+        *,
+        vector_store_id: str,
+        create_request: VectorStoreFileCreateRequest,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
+        if _is_async:
+            return self.async_vector_store_file_create_handler(
+                vector_store_id=vector_store_id,
+                create_request=create_request,
+                vector_store_files_provider_config=vector_store_files_provider_config,
+                custom_llm_provider=custom_llm_provider,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                timeout=timeout,
+                client=client if isinstance(client, AsyncHTTPHandler) else None,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        request_dict = dict(create_request)
+        if extra_body:
+            request_dict.update(extra_body)
+
+        (
+            url,
+            request_body,
+        ) = vector_store_files_provider_config.transform_create_vector_store_file_request(
+            vector_store_id=vector_store_id,
+            create_request=cast(VectorStoreFileCreateRequest, request_dict),
+            api_base=api_base,
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_body,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = sync_httpx_client.post(
+                url=url, headers=headers, json=request_body, timeout=timeout
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_create_vector_store_file_response(
+            response=response
+        )
+
+    async def async_vector_store_file_list_handler(
+        self,
+        *,
+        vector_store_id: str,
+        query_params: VectorStoreFileListQueryParams,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_query: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> VectorStoreFileListResponse:
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        params_dict = dict(query_params)
+        if extra_query:
+            params_dict.update(extra_query)
+
+        (
+            url,
+            request_params,
+        ) = vector_store_files_provider_config.transform_list_vector_store_files_request(
+            vector_store_id=vector_store_id,
+            query_params=cast(VectorStoreFileListQueryParams, params_dict),
+            api_base=api_base,
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_params,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = await async_httpx_client.get(
+                url=url, headers=headers, params=request_params
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_list_vector_store_files_response(
+            response=response
+        )
+
+    def vector_store_file_list_handler(
+        self,
+        *,
+        vector_store_id: str,
+        query_params: VectorStoreFileListQueryParams,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_query: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[
+        VectorStoreFileListResponse, Coroutine[Any, Any, VectorStoreFileListResponse]
+    ]:
+        if _is_async:
+            return self.async_vector_store_file_list_handler(
+                vector_store_id=vector_store_id,
+                query_params=query_params,
+                vector_store_files_provider_config=vector_store_files_provider_config,
+                custom_llm_provider=custom_llm_provider,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                timeout=timeout,
+                client=client if isinstance(client, AsyncHTTPHandler) else None,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        params_dict = dict(query_params)
+        if extra_query:
+            params_dict.update(extra_query)
+
+        (
+            url,
+            request_params,
+        ) = vector_store_files_provider_config.transform_list_vector_store_files_request(
+            vector_store_id=vector_store_id,
+            query_params=cast(VectorStoreFileListQueryParams, params_dict),
+            api_base=api_base,
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_params,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = sync_httpx_client.get(
+                url=url, headers=headers, params=request_params
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_list_vector_store_files_response(
+            response=response
+        )
+
+    async def async_vector_store_file_retrieve_handler(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> VectorStoreFileObject:
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        url, request_params = (
+            vector_store_files_provider_config.transform_retrieve_vector_store_file_request(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                api_base=api_base,
+            )
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_params,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = await async_httpx_client.get(
+                url=url, headers=headers, params=request_params
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_retrieve_vector_store_file_response(
+            response=response
+        )
+
+    def vector_store_file_retrieve_handler(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
+        if _is_async:
+            return self.async_vector_store_file_retrieve_handler(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                vector_store_files_provider_config=vector_store_files_provider_config,
+                custom_llm_provider=custom_llm_provider,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                extra_headers=extra_headers,
+                timeout=timeout,
+                client=client if isinstance(client, AsyncHTTPHandler) else None,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        url, request_params = (
+            vector_store_files_provider_config.transform_retrieve_vector_store_file_request(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                api_base=api_base,
+            )
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_params,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = sync_httpx_client.get(
+                url=url, headers=headers, params=request_params
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_retrieve_vector_store_file_response(
+            response=response
+        )
+
+    async def async_vector_store_file_content_handler(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> VectorStoreFileContentResponse:
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        url, request_params = (
+            vector_store_files_provider_config.transform_retrieve_vector_store_file_content_request(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                api_base=api_base,
+            )
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_params,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = await async_httpx_client.get(
+                url=url, headers=headers, params=request_params
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_retrieve_vector_store_file_content_response(
+            response=response
+        )
+
+    def vector_store_file_content_handler(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[
+        VectorStoreFileContentResponse,
+        Coroutine[Any, Any, VectorStoreFileContentResponse],
+    ]:
+        if _is_async:
+            return self.async_vector_store_file_content_handler(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                vector_store_files_provider_config=vector_store_files_provider_config,
+                custom_llm_provider=custom_llm_provider,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                extra_headers=extra_headers,
+                timeout=timeout,
+                client=client if isinstance(client, AsyncHTTPHandler) else None,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        url, request_params = (
+            vector_store_files_provider_config.transform_retrieve_vector_store_file_content_request(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                api_base=api_base,
+            )
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_params,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = sync_httpx_client.get(
+                url=url, headers=headers, params=request_params
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_retrieve_vector_store_file_content_response(
+            response=response
+        )
+
+    async def async_vector_store_file_update_handler(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        update_request: VectorStoreFileUpdateRequest,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> VectorStoreFileObject:
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        request_dict = dict(update_request)
+        if extra_body:
+            request_dict.update(extra_body)
+
+        (
+            url,
+            request_body,
+        ) = vector_store_files_provider_config.transform_update_vector_store_file_request(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+            update_request=cast(VectorStoreFileUpdateRequest, request_dict),
+            api_base=api_base,
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_body,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = await async_httpx_client.post(
+                url=url, headers=headers, json=request_body, timeout=timeout
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_update_vector_store_file_response(
+            response=response
+        )
+
+    def vector_store_file_update_handler(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        update_request: VectorStoreFileUpdateRequest,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[VectorStoreFileObject, Coroutine[Any, Any, VectorStoreFileObject]]:
+        if _is_async:
+            return self.async_vector_store_file_update_handler(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                update_request=update_request,
+                vector_store_files_provider_config=vector_store_files_provider_config,
+                custom_llm_provider=custom_llm_provider,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                timeout=timeout,
+                client=client if isinstance(client, AsyncHTTPHandler) else None,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        request_dict = dict(update_request)
+        if extra_body:
+            request_dict.update(extra_body)
+
+        (
+            url,
+            request_body,
+        ) = vector_store_files_provider_config.transform_update_vector_store_file_request(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+            update_request=cast(VectorStoreFileUpdateRequest, request_dict),
+            api_base=api_base,
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_body,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = sync_httpx_client.post(
+                url=url, headers=headers, json=request_body, timeout=timeout
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_update_vector_store_file_response(
+            response=response
+        )
+
+    async def async_vector_store_file_delete_handler(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> VectorStoreFileDeleteResponse:
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        url, request_params = (
+            vector_store_files_provider_config.transform_delete_vector_store_file_request(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                api_base=api_base,
+            )
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_params,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = await async_httpx_client.delete(
+                url=url, headers=headers, params=request_params, timeout=timeout
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_delete_vector_store_file_response(
+            response=response
+        )
+
+    def vector_store_file_delete_handler(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        vector_store_files_provider_config: BaseVectorStoreFilesConfig,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[
+        VectorStoreFileDeleteResponse,
+        Coroutine[Any, Any, VectorStoreFileDeleteResponse],
+    ]:
+        if _is_async:
+            return self.async_vector_store_file_delete_handler(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                vector_store_files_provider_config=vector_store_files_provider_config,
+                custom_llm_provider=custom_llm_provider,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                extra_headers=extra_headers,
+                timeout=timeout,
+                client=client if isinstance(client, AsyncHTTPHandler) else None,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = vector_store_files_provider_config.validate_environment(
+            headers=extra_headers or {}, litellm_params=litellm_params
+        )
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = vector_store_files_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            vector_store_id=vector_store_id,
+            litellm_params=dict(litellm_params),
+        )
+
+        url, request_params = (
+            vector_store_files_provider_config.transform_delete_vector_store_file_request(
+                vector_store_id=vector_store_id,
+                file_id=file_id,
+                api_base=api_base,
+            )
+        )
+
+        logging_obj.pre_call(
+            input="",
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_params,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = sync_httpx_client.delete(
+                url=url, headers=headers, params=request_params, timeout=timeout
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e, provider_config=vector_store_files_provider_config
+            )
+
+        return vector_store_files_provider_config.transform_delete_vector_store_file_response(
+            response=response
+        )
+
    #####################################################################
    ################ Google GenAI GENERATE CONTENT HANDLER ###########################
    #####################################################################
@@ -2,6 +2,7 @@ from .cost_calculator import cost_calculator
 from .image_generation import (
    FalAIBaseConfig,
    FalAIBriaConfig,
+    FalAIFluxProV11Config,
    FalAIFluxProV11UltraConfig,
    FalAIFluxSchnellConfig,
    FalAIImageGenerationConfig,
@@ -18,6 +19,7 @@ __all__ = [
    "FalAIImagen4Config",
    "FalAIRecraftV3Config",
    "FalAIBriaConfig",
+    "FalAIFluxProV11Config",
    "FalAIFluxProV11UltraConfig",
    "FalAIFluxSchnellConfig",
    "FalAIStableDiffusionConfig",
@@ -3,12 +3,18 @@ from litellm.llms.base_llm.image_generation.transformation import (
 )

 from .bria_transformation import FalAIBriaConfig
+from .flux_pro_v11_transformation import FalAIFluxProV11Config
 from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
 from .flux_schnell_transformation import FalAIFluxSchnellConfig
 from .imagen4_transformation import FalAIImagen4Config
 from .recraft_v3_transformation import FalAIRecraftV3Config
+from .ideogram_v3_transformation import FalAIIdeogramV3Config
 from .stable_diffusion_transformation import FalAIStableDiffusionConfig
 from .transformation import FalAIBaseConfig, FalAIImageGenerationConfig
+from .bytedance_transformation import (
+    FalAIBytedanceSeedreamV3Config,
+    FalAIBytedanceDreaminaV31Config,
+)

 __all__ = [
    "FalAIBaseConfig",
@@ -16,9 +22,13 @@ __all__ = [
    "FalAIImagen4Config",
    "FalAIRecraftV3Config",
    "FalAIBriaConfig",
+    "FalAIFluxProV11Config",
    "FalAIFluxProV11UltraConfig",
    "FalAIFluxSchnellConfig",
    "FalAIStableDiffusionConfig",
+    "FalAIBytedanceSeedreamV3Config",
+    "FalAIBytedanceDreaminaV31Config",
+    "FalAIIdeogramV3Config",
 ]


@@ -41,10 +51,18 @@ def get_fal_ai_image_generation_config(model: str) -> BaseImageGenerationConfig:
        return FalAIRecraftV3Config()
    elif "bria" in model_lower:
        return FalAIBriaConfig()
-    elif "flux-pro" in model_lower and "ultra" in model_lower:
-        return FalAIFluxProV11UltraConfig()
+    elif "flux-pro" in model_lower:
+        if "ultra" in model_lower:
+            return FalAIFluxProV11UltraConfig()
+        return FalAIFluxProV11Config()
    elif "flux/schnell" in model_lower or "flux-schnell" in model_lower or "schnell" in model_lower:
        return FalAIFluxSchnellConfig()
+    elif "bytedance/seedream" in model_lower:
+        return FalAIBytedanceSeedreamV3Config()
+    elif "bytedance/dreamina" in model_lower:
+        return FalAIBytedanceDreaminaV31Config()
+    elif "ideogram" in model_lower:
+        return FalAIIdeogramV3Config()
    elif "stable-diffusion" in model_lower:
        return FalAIStableDiffusionConfig()
    
@@ -0,0 +1,106 @@
+from typing import Any
+
+from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
+
+
+class FalAIBytedanceBaseConfig(FalAIFluxProV11UltraConfig):
+    """
+    Shared configuration for Fal AI ByteDance text-to-image models that follow
+    the Flux Schnell style parameter mapping.
+
+    These models accept the OpenAI-compatible `size` parameter in LiteLLM
+    requests but expect `image_size` enums or custom size objects on Fal AI.
+    """
+
+    _OPENAI_SIZE_TO_IMAGE_SIZE = {
+        "1024x1024": "square_hd",
+        "512x512": "square",
+        "1792x1024": "landscape_16_9",
+        "1024x1792": "portrait_16_9",
+        "1024x768": "landscape_4_3",
+        "768x1024": "portrait_4_3",
+    }
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        supported_params = self.get_supported_openai_params(model)
+
+        param_mapping = {
+            "n": "num_images",
+            "response_format": "output_format",
+            "size": "image_size",
+        }
+
+        for k in non_default_params.keys():
+            if k not in optional_params.keys():
+                if k in supported_params:
+                    mapped_key = param_mapping.get(k, k)
+                    mapped_value = non_default_params[k]
+
+                    if k == "response_format":
+                        if mapped_value in ["b64_json", "url"]:
+                            mapped_value = "jpeg"
+                    elif k == "size":
+                        mapped_value = self._map_image_size(mapped_value)
+
+                    optional_params[mapped_key] = mapped_value
+                elif drop_params:
+                    continue
+                else:
+                    raise ValueError(
+                        f"Parameter {k} is not supported for model {model}. "
+                        f"Supported parameters are {supported_params}. "
+                        "Set drop_params=True to drop unsupported parameters."
+                    )
+
+        return optional_params
+
+    def _map_image_size(self, size: Any) -> Any:
+        if isinstance(size, dict):
+            return size
+
+        if not isinstance(size, str):
+            return size
+
+        if size in self._OPENAI_SIZE_TO_IMAGE_SIZE:
+            return self._OPENAI_SIZE_TO_IMAGE_SIZE[size]
+
+        if "x" in size:
+            try:
+                width_str, height_str = size.split("x")
+                width = int(width_str)
+                height = int(height_str)
+                return {"width": width, "height": height}
+            except (ValueError, AttributeError, ZeroDivisionError):
+                pass
+
+        return "landscape_4_3"
+
+
+class FalAIBytedanceSeedreamV3Config(FalAIBytedanceBaseConfig):
+    """
+    Configuration for Fal AI ByteDance Seedream v3 text-to-image model.
+
+    Model endpoint: fal-ai/bytedance/seedream/v3/text-to-image
+    Documentation: https://fal.ai/models/fal-ai/bytedance/seedream/v3/text-to-image
+    """
+
+    IMAGE_GENERATION_ENDPOINT: str = "fal-ai/bytedance/seedream/v3/text-to-image"
+
+
+class FalAIBytedanceDreaminaV31Config(FalAIBytedanceBaseConfig):
+    """
+    Configuration for Fal AI ByteDance Dreamina v3.1 text-to-image model.
+
+    Model endpoint: fal-ai/bytedance/dreamina/v3.1/text-to-image
+    Documentation: https://fal.ai/models/fal-ai/bytedance/dreamina/v3.1/text-to-image
+    """
+
+    IMAGE_GENERATION_ENDPOINT: str = "fal-ai/bytedance/dreamina/v3.1/text-to-image"
+
+
@@ -0,0 +1,91 @@
+from typing import Any
+
+from .flux_pro_v11_ultra_transformation import FalAIFluxProV11UltraConfig
+
+
+class FalAIFluxProV11Config(FalAIFluxProV11UltraConfig):
+    """
+    Configuration for Fal AI Flux Pro v1.1 model.
+
+    FLUX Pro v1.1 leverages the same overall request/response structure as the
+    Ultra variant but expects the `image_size` parameter instead of
+    `aspect_ratio`.
+
+    Model endpoint: fal-ai/flux-pro/v1.1
+    Documentation: https://fal.ai/models/fal-ai/flux-pro/v1.1
+    """
+
+    IMAGE_GENERATION_ENDPOINT: str = "fal-ai/flux-pro/v1.1"
+
+    _OPENAI_SIZE_TO_IMAGE_SIZE = {
+        "1024x1024": "square_hd",
+        "512x512": "square",
+        "1792x1024": "landscape_16_9",
+        "1024x1792": "portrait_16_9",
+        "1024x768": "landscape_4_3",
+        "768x1024": "portrait_4_3",
+    }
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        Override size handling to map to Flux Pro v1.1 image_size enums/object.
+        """
+        supported_params = self.get_supported_openai_params(model)
+
+        param_mapping = {
+            "n": "num_images",
+            "response_format": "output_format",
+            "size": "image_size",
+        }
+
+        for k in non_default_params.keys():
+            if k not in optional_params.keys():
+                if k in supported_params:
+                    mapped_key = param_mapping.get(k, k)
+                    mapped_value = non_default_params[k]
+
+                    if k == "response_format":
+                        if mapped_value in ["b64_json", "url"]:
+                            mapped_value = "jpeg"
+                    elif k == "size":
+                        mapped_value = self._map_image_size(mapped_value)
+
+                    optional_params[mapped_key] = mapped_value
+                elif drop_params:
+                    continue
+                else:
+                    raise ValueError(
+                        f"Parameter {k} is not supported for model {model}. "
+                        f"Supported parameters are {supported_params}. "
+                        "Set drop_params=True to drop unsupported parameters."
+                    )
+
+        return optional_params
+
+    def _map_image_size(self, size: Any) -> Any:
+        if isinstance(size, dict):
+            return size
+        if not isinstance(size, str):
+            return size
+
+        if size in self._OPENAI_SIZE_TO_IMAGE_SIZE:
+            return self._OPENAI_SIZE_TO_IMAGE_SIZE[size]
+
+        if "x" in size:
+            try:
+                width_str, height_str = size.split("x")
+                width = int(width_str)
+                height = int(height_str)
+                return {"width": width, "height": height}
+            except (ValueError, AttributeError, ZeroDivisionError):
+                pass
+
+        return "landscape_4_3"
+
+
@@ -0,0 +1,193 @@
+from typing import TYPE_CHECKING, Any, List, Optional
+
+import httpx
+
+from litellm.types.llms.openai import OpenAIImageGenerationOptionalParams
+from litellm.types.utils import ImageObject, ImageResponse
+
+from .transformation import FalAIBaseConfig
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+
+class FalAIIdeogramV3Config(FalAIBaseConfig):
+    """
+    Configuration for fal-ai/ideogram/v3 image generation.
+
+    The Ideogram v3 endpoint exposes multiple generation modes (text-to-image,
+    remixing, reframing, background replacement, character workflows, etc.).
+    LiteLLM focuses on the text-to-image interface to maintain OpenAI parity.
+
+    Model endpoint: fal-ai/ideogram/v3
+    Documentation: https://fal.ai/models/fal-ai/ideogram/v3
+    """
+
+    IMAGE_GENERATION_ENDPOINT: str = "fal-ai/ideogram/v3"
+
+    _OPENAI_SIZE_TO_IMAGE_SIZE = {
+        "1024x1024": "square_hd",
+        "512x512": "square",
+        "1024x768": "landscape_4_3",
+        "768x1024": "portrait_4_3",
+        "1536x1024": "landscape_16_9",
+        "1024x1536": "portrait_16_9",
+    }
+
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAIImageGenerationOptionalParams]:
+        """
+        Ideogram v3 accepts the core OpenAI image parameters.
+        """
+
+        return [
+            "n",
+            "response_format",
+            "size",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        Map OpenAI-style parameters onto Ideogram's request schema.
+        """
+
+        supported_params = self.get_supported_openai_params(model)
+
+        for k in non_default_params.keys():
+            if k in optional_params:
+                continue
+
+            if k not in supported_params:
+                if drop_params:
+                    continue
+                raise ValueError(
+                    f"Parameter {k} is not supported for model {model}. "
+                    f"Supported parameters are {supported_params}. "
+                    "Set drop_params=True to drop unsupported parameters."
+                )
+
+            value = non_default_params[k]
+
+            if k == "n":
+                optional_params["num_images"] = value
+            elif k == "size":
+                optional_params["image_size"] = self._map_image_size(value)
+            elif k == "response_format":
+                # Ideogram always returns URLs; nothing to map but don't error.
+                continue
+
+        return optional_params
+
+    def _map_image_size(self, size: Any) -> Any:
+        if isinstance(size, dict):
+            width = size.get("width")
+            height = size.get("height")
+            if isinstance(width, int) and isinstance(height, int):
+                return {"width": width, "height": height}
+            return size
+
+        if not isinstance(size, str):
+            return size
+
+        normalized = size.strip()
+        if normalized in self._OPENAI_SIZE_TO_IMAGE_SIZE:
+            return self._OPENAI_SIZE_TO_IMAGE_SIZE[normalized]
+
+        if "x" in normalized:
+            try:
+                width_str, height_str = normalized.split("x")
+                width = int(width_str)
+                height = int(height_str)
+                return {"width": width, "height": height}
+            except (ValueError, AttributeError):
+                pass
+
+        # Fallback to a safe default that Ideogram accepts.
+        return "square_hd"
+
+    def transform_image_generation_request(
+        self,
+        model: str,
+        prompt: str,
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        """
+        Construct the request payload for Ideogram v3.
+
+        Required:
+            - prompt: text prompt describing the scene.
+
+        Optional (subset):
+            - rendering_speed, style_preset, style, style_codes, color_palette,
+              image_urls, style_reference_images, expand_prompt, seed,
+              negative_prompt, image_size, etc.
+        """
+
+        return {
+            "prompt": prompt,
+            **optional_params,
+        }
+
+    def transform_image_generation_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ImageResponse,
+        logging_obj: LiteLLMLoggingObj,
+        request_data: dict,
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ImageResponse:
+        """
+        Parse Ideogram v3 responses which contain a list of File objects.
+        """
+
+        try:
+            response_data = raw_response.json()
+        except Exception as e:
+            raise self.get_error_class(
+                error_message=f"Error transforming image generation response: {e}",
+                status_code=raw_response.status_code,
+                headers=raw_response.headers,
+            )
+
+        if not model_response.data:
+            model_response.data = []
+
+        images = response_data.get("images", [])
+        if isinstance(images, list):
+            for image_entry in images:
+                if isinstance(image_entry, dict):
+                    url = image_entry.get("url")
+                else:
+                    url = image_entry
+
+                model_response.data.append(
+                    ImageObject(
+                        url=url,
+                        b64_json=None,
+                    )
+                )
+
+        if hasattr(model_response, "_hidden_params") and "seed" in response_data:
+            model_response._hidden_params["seed"] = response_data["seed"]
+
+        return model_response
+
+
@@ -30,7 +30,7 @@ class OpenAIGPT5Config(OpenAIGPTConfig):
        from litellm.utils import supports_tool_choice

        base_gpt_series_params = super().get_supported_openai_params(model=model)
-        gpt_5_only_params = ["reasoning_effort"]
+        gpt_5_only_params = ["reasoning_effort", "verbosity"]
        base_gpt_series_params.extend(gpt_5_only_params)
        if not supports_tool_choice(model=model):
            base_gpt_series_params.remove("tool_choice")
@@ -0,0 +1,258 @@
+from typing import Any, Dict, Optional, Tuple, cast
+
+import httpx
+
+import litellm
+from litellm.llms.base_llm.vector_store_files.transformation import (
+    BaseVectorStoreFilesConfig,
+)
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.router import GenericLiteLLMParams
+from litellm.types.vector_store_files import (
+    VectorStoreFileAuthCredentials,
+    VectorStoreFileContentResponse,
+    VectorStoreFileCreateRequest,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileListQueryParams,
+    VectorStoreFileListResponse,
+    VectorStoreFileObject,
+    VectorStoreFileUpdateRequest,
+)
+from litellm.utils import add_openai_metadata
+
+
+def _clean_dict(source: Dict[str, Any]) -> Dict[str, Any]:
+    return {k: v for k, v in source.items() if v is not None}
+
+
+class OpenAIVectorStoreFilesConfig(BaseVectorStoreFilesConfig):
+    ASSISTANTS_HEADER_KEY = "OpenAI-Beta"
+    ASSISTANTS_HEADER_VALUE = "assistants=v2"
+
+    def get_auth_credentials(
+        self, litellm_params: Dict[str, Any]
+    ) -> VectorStoreFileAuthCredentials:
+        api_key = litellm_params.get("api_key")
+        if api_key is None:
+            raise ValueError("api_key is required")
+        return {
+            "headers": {
+                "Authorization": f"Bearer {api_key}",
+            }
+        }
+
+    def get_vector_store_file_endpoints_by_type(self) -> Dict[
+        str, Tuple[Tuple[str, str], ...]
+    ]:
+        return {
+            "read": (
+                ("GET", "/vector_stores/{vector_store_id}/files"),
+                ("GET", "/vector_stores/{vector_store_id}/files/{file_id}"),
+                (
+                    "GET",
+                    "/vector_stores/{vector_store_id}/files/{file_id}/content",
+                ),
+            ),
+            "write": (
+                ("POST", "/vector_stores/{vector_store_id}/files"),
+                ("POST", "/vector_stores/{vector_store_id}/files/{file_id}"),
+                ("DELETE", "/vector_stores/{vector_store_id}/files/{file_id}"),
+            ),
+        }
+
+    def validate_environment(
+        self,
+        *,
+        headers: Dict[str, str],
+        litellm_params: Optional[GenericLiteLLMParams],
+    ) -> Dict[str, str]:
+        litellm_params = litellm_params or GenericLiteLLMParams()
+        api_key = (
+            litellm_params.api_key
+            or litellm.api_key
+            or litellm.openai_key
+            or get_secret_str("OPENAI_API_KEY")
+        )
+        headers.update(
+            {
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            }
+        )
+        if self.ASSISTANTS_HEADER_KEY not in headers:
+            headers[self.ASSISTANTS_HEADER_KEY] = self.ASSISTANTS_HEADER_VALUE
+        return headers
+
+    def get_complete_url(
+        self,
+        *,
+        api_base: Optional[str],
+        vector_store_id: str,
+        litellm_params: Dict[str, Any],
+    ) -> str:
+        base_url = (
+            api_base
+            or litellm.api_base
+            or get_secret_str("OPENAI_BASE_URL")
+            or get_secret_str("OPENAI_API_BASE")
+            or "https://api.openai.com/v1"
+        )
+        base_url = base_url.rstrip("/")
+        return f"{base_url}/vector_stores/{vector_store_id}/files"
+
+    def transform_create_vector_store_file_request(
+        self,
+        *,
+        vector_store_id: str,
+        create_request: VectorStoreFileCreateRequest,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        payload: Dict[str, Any] = _clean_dict(dict(create_request))
+        attributes = payload.get("attributes")
+        if isinstance(attributes, dict):
+            filtered_attributes = add_openai_metadata(attributes)
+            if filtered_attributes is not None:
+                payload["attributes"] = filtered_attributes
+            else:
+                payload.pop("attributes", None)
+        url = api_base
+        return url, payload
+
+    def transform_create_vector_store_file_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileObject:
+        try:
+            return cast(VectorStoreFileObject, response.json())
+        except Exception as exc:  # noqa: BLE001
+            raise self.get_error_class(
+                error_message=str(exc),
+                status_code=response.status_code,
+                headers=response.headers,
+            )
+
+    def transform_list_vector_store_files_request(
+        self,
+        *,
+        vector_store_id: str,
+        query_params: VectorStoreFileListQueryParams,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        params = _clean_dict(dict(query_params))
+        return api_base, params
+
+    def transform_list_vector_store_files_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileListResponse:
+        try:
+            return cast(VectorStoreFileListResponse, response.json())
+        except Exception as exc:  # noqa: BLE001
+            raise self.get_error_class(
+                error_message=str(exc),
+                status_code=response.status_code,
+                headers=response.headers,
+            )
+
+    def transform_retrieve_vector_store_file_request(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        return f"{api_base}/{file_id}", {}
+
+    def transform_retrieve_vector_store_file_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileObject:
+        try:
+            return cast(VectorStoreFileObject, response.json())
+        except Exception as exc:  # noqa: BLE001
+            raise self.get_error_class(
+                error_message=str(exc),
+                status_code=response.status_code,
+                headers=response.headers,
+            )
+
+    def transform_retrieve_vector_store_file_content_request(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        return f"{api_base}/{file_id}/content", {}
+
+    def transform_retrieve_vector_store_file_content_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileContentResponse:
+        try:
+            return cast(VectorStoreFileContentResponse, response.json())
+        except Exception as exc:  # noqa: BLE001
+            raise self.get_error_class(
+                error_message=str(exc),
+                status_code=response.status_code,
+                headers=response.headers,
+            )
+
+    def transform_update_vector_store_file_request(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        update_request: VectorStoreFileUpdateRequest,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        payload: Dict[str, Any] = dict(update_request)
+        attributes = payload.get("attributes")
+        if isinstance(attributes, dict):
+            filtered_attributes = add_openai_metadata(attributes)
+            if filtered_attributes is not None:
+                payload["attributes"] = filtered_attributes
+            else:
+                payload.pop("attributes", None)
+        return f"{api_base}/{file_id}", payload
+
+    def transform_update_vector_store_file_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileObject:
+        try:
+            return cast(VectorStoreFileObject, response.json())
+        except Exception as exc:  # noqa: BLE001
+            raise self.get_error_class(
+                error_message=str(exc),
+                status_code=response.status_code,
+                headers=response.headers,
+            )
+
+    def transform_delete_vector_store_file_request(
+        self,
+        *,
+        vector_store_id: str,
+        file_id: str,
+        api_base: str,
+    ) -> Tuple[str, Dict[str, Any]]:
+        return f"{api_base}/{file_id}", {}
+
+    def transform_delete_vector_store_file_response(
+        self,
+        *,
+        response: httpx.Response,
+    ) -> VectorStoreFileDeleteResponse:
+        try:
+            return cast(VectorStoreFileDeleteResponse, response.json())
+        except Exception as exc:  # noqa: BLE001
+            raise self.get_error_class(
+                error_message=str(exc),
+                status_code=response.status_code,
+                headers=response.headers,
+            )
@@ -145,6 +145,8 @@ class OpenAIVectorStoreConfig(BaseVectorStoreConfig):
    ) -> Tuple[str, Dict]:
        url = api_base  # Base URL for creating vector stores
        metadata = vector_store_create_optional_params.get("metadata", None)
+        metadata_payload = add_openai_metadata(metadata)
+
        typed_request_body = VectorStoreCreateRequest(
            name=vector_store_create_optional_params.get("name", None),
            file_ids=vector_store_create_optional_params.get("file_ids", None),
@@ -154,7 +156,7 @@ class OpenAIVectorStoreConfig(BaseVectorStoreConfig):
            chunking_strategy=vector_store_create_optional_params.get(
                "chunking_strategy", None
            ),
-            metadata=add_openai_metadata(metadata) if metadata is not None else None,
+            metadata=metadata_payload,
        )

        dict_request_body = cast(dict, typed_request_body)
@@ -178,10 +178,10 @@ class OpenAIVideoConfig(BaseVideoConfig):
        # Construct the URL for video content download
        url = f"{api_base.rstrip('/')}/{original_video_id}/content"
        
-        # Add video_id as query parameter
-        params = {"video_id": original_video_id}
-        
-        return url, params
+        # No additional data needed for GET content request
+        data: Dict[str, Any] = {}
+
+        return url, data

    def transform_video_remix_request(
        self,
@@ -404,4 +404,4 @@ class OpenAIVideoConfig(BaseVideoConfig):
        if isinstance(image, BufferedReader):
            files_list.append((field_name, (image.name, image, image_content_type)))
        else:
-            files_list.append((field_name, ("input_reference.png", image, image_content_type)))
+            files_list.append((field_name, ("input_reference.png", image, image_content_type)))
@@ -390,6 +390,7 @@ async def acompletion(
    reasoning_effort: Optional[
        Literal["none", "minimal", "low", "medium", "high", "default"]
    ] = None,
+    verbosity: Optional[Literal["low", "medium", "high"]] = None,
    safety_identifier: Optional[str] = None,
    service_tier: Optional[str] = None,
    # set api_base, api_version, api_key
@@ -961,6 +962,7 @@ def completion(  # type: ignore # noqa: PLR0915
    reasoning_effort: Optional[
        Literal["none", "minimal", "low", "medium", "high", "default"]
    ] = None,
+    verbosity: Optional[Literal["low", "medium", "high"]] = None,
    response_format: Optional[Union[dict, Type[BaseModel]]] = None,
    seed: Optional[int] = None,
    tools: Optional[List] = None,
@@ -2084,10 +2086,10 @@ def completion(  # type: ignore # noqa: PLR0915
            if extra_headers is not None:
                optional_params["extra_headers"] = extra_headers

-            if (
-                litellm.enable_preview_features and metadata is not None
-            ):  # [PREVIEW] allow metadata to be passed to OPENAI
-                optional_params["metadata"] = add_openai_metadata(metadata)
+            if litellm.enable_preview_features:
+                metadata_payload = add_openai_metadata(metadata)
+                if metadata_payload is not None:
+                    optional_params["metadata"] = metadata_payload

            ## LOAD CONFIG - if set
            config = litellm.OpenAIConfig.get_config()
@@ -8515,10 +8515,18 @@
            "/v1/images/generations"
        ]
    },
+    "fal_ai/fal-ai/flux-pro/v1.1": {
+        "litellm_provider": "fal_ai",
+        "mode": "image_generation",
+        "output_cost_per_image": 0.04,
+        "supported_endpoints": [
+            "/v1/images/generations"
+        ]
+    },
    "fal_ai/fal-ai/flux-pro/v1.1-ultra": {
        "litellm_provider": "fal_ai",
        "mode": "image_generation",
-        "output_cost_per_image": 0.0398,
+        "output_cost_per_image": 0.06,
        "supported_endpoints": [
            "/v1/images/generations"
        ]
@@ -8531,6 +8539,30 @@
            "/v1/images/generations"
        ]
    },
+    "fal_ai/fal-ai/bytedance/seedream/v3/text-to-image": {
+        "litellm_provider": "fal_ai",
+        "mode": "image_generation",
+        "output_cost_per_image": 0.03,
+        "supported_endpoints": [
+            "/v1/images/generations"
+        ]
+    },
+    "fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image": {
+        "litellm_provider": "fal_ai",
+        "mode": "image_generation",
+        "output_cost_per_image": 0.03,
+        "supported_endpoints": [
+            "/v1/images/generations"
+        ]
+    },
+    "fal_ai/fal-ai/ideogram/v3": {
+        "litellm_provider": "fal_ai",
+        "mode": "image_generation",
+        "output_cost_per_image": 0.06,
+        "supported_endpoints": [
+            "/v1/images/generations"
+        ]
+    },
    "fal_ai/fal-ai/imagen4/preview": {
        "litellm_provider": "fal_ai",
        "mode": "image_generation",
@@ -10,9 +10,12 @@ import asyncio
 import datetime
 import hashlib
 import json
-from typing import Any, Dict, List, Optional, Set, Union, cast
+import re
+from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
+from urllib.parse import urlparse

 from fastapi import HTTPException
+from httpx import HTTPStatusError
 from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
 from mcp.types import CallToolResult
 from mcp.types import Tool as MCPTool
@@ -20,6 +23,7 @@ from mcp.types import Tool as MCPTool
 from litellm._logging import verbose_logger
 from litellm.exceptions import BlockedPiiEntityError, GuardrailRaisedException
 from litellm.experimental_mcp_client.client import MCPClient
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.proxy._experimental.mcp_server.auth.user_api_key_auth_mcp import (
    MCPRequestHandler,
 )
@@ -38,12 +42,15 @@ from litellm.proxy._types import (
    MCPTransportType,
    UserAPIKeyAuth,
 )
-from litellm.proxy.common_utils.encrypt_decrypt_utils import (
-    decrypt_value_helper,
-)
+from litellm.proxy.common_utils.encrypt_decrypt_utils import decrypt_value_helper
 from litellm.proxy.utils import ProxyLogging
+from litellm.types.llms.custom_http import httpxSpecialProvider
 from litellm.types.mcp import MCPAuth, MCPStdioConfig
-from litellm.types.mcp_server.mcp_server_manager import MCPInfo, MCPServer
+from litellm.types.mcp_server.mcp_server_manager import (
+    MCPInfo,
+    MCPOAuthMetadata,
+    MCPServer,
+)


 def _deserialize_json_dict(data: Any) -> Optional[Dict[str, str]]:
@@ -100,7 +107,7 @@ class MCPServerManager:
        """
        return self.config_mcp_servers | self.registry

-    def load_servers_from_config(
+    async def load_servers_from_config(
        self,
        mcp_servers_config: Dict[str, Any],
        mcp_aliases: Optional[Dict[str, str]] = None,
@@ -180,35 +187,57 @@ class MCPServerManager:
            )()
            name_for_prefix = get_server_prefix(temp_server)

+            server_url = server_config.get("url", None) or ""
            # Generate stable server ID based on parameters
            server_id = self._generate_stable_server_id(
                server_name=server_name,
-                url=server_config.get("url", None) or "",
+                url=server_url,
                transport=server_config.get("transport", MCPTransport.http),
                auth_type=server_config.get("auth_type", None),
                alias=alias,
            )

+            auth_type = server_config.get("auth_type", None)
+            if server_url and auth_type is not None and auth_type == MCPAuth.oauth2:
+                mcp_oauth_metadata = await self._descovery_metadata(
+                    server_url=server_url,
+                )
+            else:
+                mcp_oauth_metadata = None
+
+            resolved_scopes = server_config.get("scopes") or (
+                mcp_oauth_metadata.scopes if mcp_oauth_metadata else None
+            )
+            resolved_authorization_url = server_config.get("authorization_url") or (
+                mcp_oauth_metadata.authorization_url if mcp_oauth_metadata else None
+            )
+            resolved_token_url = server_config.get("token_url") or (
+                mcp_oauth_metadata.token_url if mcp_oauth_metadata else None
+            )
+            resolved_registration_url = server_config.get("registration_url") or (
+                mcp_oauth_metadata.registration_url if mcp_oauth_metadata else None
+            )
+
            new_server = MCPServer(
                server_id=server_id,
                name=name_for_prefix,
                alias=alias,
                server_name=server_name,
                spec_path=server_config.get("spec_path", None),
-                url=server_config.get("url", None) or "",
+                url=server_url,
                command=server_config.get("command", None) or "",
                args=server_config.get("args", None) or [],
                env=server_config.get("env", None) or {},
                # oauth specific fields
                client_id=server_config.get("client_id", None),
                client_secret=server_config.get("client_secret", None),
-                scopes=server_config.get("scopes", None),
-                authorization_url=server_config.get("authorization_url", None),
-                token_url=server_config.get("token_url", None),
-                registration_url=server_config.get("registration_url", None),
+                scopes=resolved_scopes,
+                authorization_url=resolved_authorization_url,
+                token_url=resolved_token_url,
+                registration_url=resolved_registration_url,
                # TODO: utility fn the default values
                transport=server_config.get("transport", MCPTransport.http),
-                auth_type=server_config.get("auth_type", None),
+                auth_type=auth_type,
                authentication_token=server_config.get(
                    "authentication_token", server_config.get("auth_value", None)
                ),
@@ -356,12 +385,12 @@ class MCPServerManager:
                    )

                    # Update tool name to server name mapping (for both prefixed and base names)
-                    self.tool_name_to_mcp_server_name_mapping[
-                        base_tool_name
-                    ] = server_prefix
-                    self.tool_name_to_mcp_server_name_mapping[
-                        prefixed_tool_name
-                    ] = server_prefix
+                    self.tool_name_to_mcp_server_name_mapping[base_tool_name] = (
+                        server_prefix
+                    )
+                    self.tool_name_to_mcp_server_name_mapping[prefixed_tool_name] = (
+                        server_prefix
+                    )

                    registered_count += 1
                    verbose_logger.debug(
@@ -692,6 +721,252 @@ class MCPServerManager:
                except Exception:
                    pass

+    async def _descovery_metadata(
+        self,
+        server_url: str,
+    ) -> Optional[MCPOAuthMetadata]:
+        """Discover OAuth metadata by following RFC 9728 (protected resource metadata discovery)."""
+
+        try:
+            client = get_async_httpx_client(llm_provider=httpxSpecialProvider.MCP)
+            response = await client.get(server_url)
+            response.raise_for_status()
+            verbose_logger.warning(
+                "MCP OAuth discovery unexpectedly succeeded for %s; server did not challenge",
+                server_url,
+            )
+            raise RuntimeError("OAuth discovery must not succeed without a challenge")
+        except HTTPStatusError as exc:
+            verbose_logger.debug(
+                "MCP OAuth discovery for %s received status error: %s",
+                server_url,
+                exc,
+            )
+
+            header_value: Optional[str] = None
+            if exc.response is not None:
+                header_value = exc.response.headers.get(
+                    "WWW-Authenticate"
+                ) or exc.response.headers.get("www-authenticate")
+
+            resource_metadata_url, scopes = self._parse_www_authenticate_header(
+                header_value
+            )
+
+            authorization_servers: List[str] = []
+            resource_scopes: Optional[List[str]] = None
+            if resource_metadata_url:
+                (
+                    authorization_servers,
+                    resource_scopes,
+                ) = await self._fetch_oauth_metadata_from_resource(
+                    resource_metadata_url
+                )
+            else:
+                (
+                    authorization_servers,
+                    resource_scopes,
+                ) = await self._attempt_well_known_discovery(server_url)
+
+            metadata = None
+            if not authorization_servers:
+                try:
+                    parsed_url = urlparse(server_url)
+                    if parsed_url.scheme and parsed_url.netloc:
+                        authorization_servers = [
+                            f"{parsed_url.scheme}://{parsed_url.netloc}"
+                        ]
+                except Exception:
+                    authorization_servers = []
+
+            if authorization_servers:
+                metadata = await self._fetch_authorization_server_metadata(
+                    authorization_servers
+                )
+
+            preferred_scopes = scopes or resource_scopes
+            if metadata is None and preferred_scopes:
+                metadata = MCPOAuthMetadata(scopes=preferred_scopes)
+            elif metadata is not None and preferred_scopes:
+                metadata.scopes = preferred_scopes
+
+            return metadata
+        except Exception as exc:  # pragma: no cover - network/transient issues
+            verbose_logger.debug(
+                "MCP OAuth discovery failed for %s: %s", server_url, exc
+            )
+            return None
+
+    def _parse_www_authenticate_header(
+        self, header_value: Optional[str]
+    ) -> Tuple[Optional[str], Optional[List[str]]]:
+        if not header_value:
+            return None, None
+
+        _, _, params_section = header_value.partition(" ")
+        params_section = params_section or header_value
+
+        param_pattern = re.compile(r"([a-zA-Z0-9_]+)\s*=\s*\"?([^\",]+)\"?")
+        params: Dict[str, str] = {
+            match.group(1).lower(): match.group(2).strip()
+            for match in param_pattern.finditer(params_section)
+        }
+
+        resource_metadata_url = params.get("resource_metadata")
+
+        scope_value = params.get("scope")
+        scopes_list = [s for s in (scope_value.split() if scope_value else []) if s]
+        scopes = scopes_list or None
+
+        return resource_metadata_url, scopes
+
+    async def _fetch_oauth_metadata_from_resource(
+        self, resource_metadata_url: str
+    ) -> Tuple[List[str], Optional[List[str]]]:
+        if not resource_metadata_url:
+            return [], None
+
+        try:
+            client = get_async_httpx_client(
+                llm_provider=httpxSpecialProvider.MCP,
+                params={"timeout": 10.0, "follow_redirects": True},
+            )
+            response = await client.get(resource_metadata_url)
+            response.raise_for_status()
+            data = response.json()
+        except Exception as exc:  # pragma: no cover - network issues
+            verbose_logger.debug(
+                "Failed to fetch MCP OAuth metadata from %s: %s",
+                resource_metadata_url,
+                exc,
+            )
+            return [], None
+
+        raw_servers = data.get("authorization_servers")
+        if isinstance(raw_servers, list):
+            authorization_servers = [
+                entry
+                for entry in raw_servers
+                if isinstance(entry, str) and entry.strip() != ""
+            ]
+        else:
+            authorization_servers = []
+
+        scopes = self._extract_scopes(
+            data.get("scopes_supported") or data.get("scopes")
+        )
+
+        return authorization_servers, scopes
+
+    async def _attempt_well_known_discovery(
+        self, server_url: str
+    ) -> Tuple[List[str], Optional[List[str]]]:
+        try:
+            parsed = urlparse(server_url)
+        except Exception:
+            return [], None
+
+        if not parsed.scheme or not parsed.netloc:
+            return [], None
+
+        base = f"{parsed.scheme}://{parsed.netloc}"
+        path = parsed.path or ""
+        path = path.strip("/")
+
+        candidate_urls: List[str] = []
+        if path:
+            candidate_urls.append(f"{base}/.well-known/oauth-protected-resource/{path}")
+        candidate_urls.append(f"{base}/.well-known/oauth-protected-resource")
+
+        for url in candidate_urls:
+            (
+                authorization_servers,
+                scopes,
+            ) = await self._fetch_oauth_metadata_from_resource(url)
+            if authorization_servers:
+                return authorization_servers, scopes
+
+        return [], None
+
+    async def _fetch_authorization_server_metadata(
+        self, authorization_servers: List[str]
+    ) -> Optional[MCPOAuthMetadata]:
+        for issuer in authorization_servers:
+            metadata = await self._fetch_single_authorization_server_metadata(issuer)
+            if metadata is not None:
+                return metadata
+        return None
+
+    async def _fetch_single_authorization_server_metadata(
+        self, issuer_url: str
+    ) -> Optional[MCPOAuthMetadata]:
+        try:
+            parsed = urlparse(issuer_url)
+        except Exception:
+            return None
+
+        if not parsed.scheme or not parsed.netloc:
+            return None
+
+        base = f"{parsed.scheme}://{parsed.netloc}"
+        path = (parsed.path or "").strip("/")
+
+        candidate_urls: List[str] = []
+        if path:
+            candidate_urls.append(
+                f"{base}/.well-known/oauth-authorization-server/{path}"
+            )
+            candidate_urls.append(f"{base}/.well-known/openid-configuration/{path}")
+        candidate_urls.append(f"{base}/.well-known/oauth-authorization-server")
+        candidate_urls.append(f"{base}/.well-known/openid-configuration")
+        candidate_urls.append(issuer_url.rstrip("/"))
+
+        for url in candidate_urls:
+            try:
+                client = get_async_httpx_client(
+                    llm_provider=httpxSpecialProvider.MCP,
+                    params={"timeout": 10.0, "follow_redirects": True},
+                )
+                response = await client.get(url)
+                response.raise_for_status()
+                data = response.json()
+            except Exception as exc:  # pragma: no cover - network issues
+                verbose_logger.debug(
+                    "Failed to fetch authorization metadata from %s: %s",
+                    url,
+                    exc,
+                )
+                continue
+
+            scopes = self._extract_scopes(data.get("scopes_supported"))
+            metadata = MCPOAuthMetadata(
+                scopes=scopes,
+                authorization_url=data.get("authorization_endpoint"),
+                token_url=data.get("token_endpoint"),
+                registration_url=data.get("registration_endpoint"),
+            )
+
+            if any(
+                [
+                    metadata.scopes,
+                    metadata.authorization_url,
+                    metadata.token_url,
+                    metadata.registration_url,
+                ]
+            ):
+                return metadata
+
+        return None
+
+    def _extract_scopes(self, scopes_value: Any) -> Optional[List[str]]:
+        if isinstance(scopes_value, str):
+            scopes = [s.strip() for s in scopes_value.split() if s.strip()]
+            return scopes or None
+        if isinstance(scopes_value, list):
+            scopes = [s for s in scopes_value if isinstance(s, str) and s.strip()]
+            return scopes or None
+        return None
+
    async def _fetch_tools_with_timeout(
        self, client: MCPClient, server_name: str
    ) -> List[MCPTool]:
@@ -721,11 +996,6 @@ class MCPServerManager:
                    f"Client operation failed for {server_name}: {str(e)}"
                )
                return []
-            finally:
-                try:
-                    await client.disconnect()
-                except Exception:
-                    pass

        try:
            return await asyncio.wait_for(_list_tools_task(), timeout=30.0)
@@ -640,24 +640,31 @@ if MCP_AVAILABLE:

        allowed_mcp_servers = await _get_allowed_mcp_servers_from_mcp_server_names(
            mcp_servers=mcp_servers,
-            allowed_mcp_servers=allowed_mcp_servers
+            allowed_mcp_servers=allowed_mcp_servers,
        )

-        server_name: Optional[str]
-        if len(allowed_mcp_servers) == 1:
-            original_tool_name, server_name = name, allowed_mcp_servers[0].server_name
-        else:
-            # Remove prefix from tool name for logging and processing
-            original_tool_name, server_name = get_server_name_prefix_tool_mcp(name)
+        # Track resolved MCP server for both permission checks and dispatch
+        mcp_server: Optional[MCPServer] = None

-        if not server_name or not MCPRequestHandler.is_tool_allowed(
-            allowed_mcp_servers=[server.name for server in allowed_mcp_servers],
-            server_name=server_name,
-        ):
-            raise HTTPException(
-                status_code=403,
-                detail=f"User not allowed to call this tool. Allowed MCP servers: {allowed_mcp_servers}",
-            )
+        # Remove prefix from tool name for logging and processing
+        original_tool_name, server_name = get_server_name_prefix_tool_mcp(name)
+
+        # If tool name is unprefixed, resolve its server so we can enforce permissions
+        if not server_name:
+            mcp_server = global_mcp_server_manager._get_mcp_server_from_tool_name(name)
+            if mcp_server:
+                server_name = mcp_server.name
+
+        # Only enforce server-level permissions when we can resolve a server
+        if server_name:
+            if not MCPRequestHandler.is_tool_allowed(
+                allowed_mcp_servers=[server.name for server in allowed_mcp_servers],
+                server_name=server_name,
+            ):
+                raise HTTPException(
+                    status_code=403,
+                    detail=f"User not allowed to call this tool. Allowed MCP servers: {allowed_mcp_servers}",
+                )

        standard_logging_mcp_tool_call: StandardLoggingMCPToolCall = (
            _get_standard_logging_mcp_tool_call(
@@ -686,9 +693,11 @@ if MCP_AVAILABLE:
        # Primary and recommended way to use external MCP servers
        #########################################################
        else:
-            mcp_server: Optional[
-                MCPServer
-            ] = global_mcp_server_manager._get_mcp_server_from_tool_name(name)
+            # If we haven't already resolved the server, do it now for dispatch
+            if mcp_server is None:
+                mcp_server = global_mcp_server_manager._get_mcp_server_from_tool_name(
+                    name
+                )
            if mcp_server:
                standard_logging_mcp_tool_call["mcp_server_cost_info"] = (
                    mcp_server.mcp_info or {}
--- a/Show More
+++ b/Show More