litellm/pyproject.toml

[project]
name = "litellm"
version = "1.89.0"
description = "Library to easily interface with LLM API providers"
readme = "README.md"
requires-python = ">=3.10, <3.14"
license = "MIT"
license-files = ["LICENSE"]
authors = [
    { name = "BerriAI" },
]
dependencies = [
    # Ranges (not exact pins) so SDK consumers can coexist with their other
    # deps. Reproducibility for our Docker/CI comes from `uv.lock`.
    # When changing a floor, verify it installs + imports on every supported
    # Python with: `uv pip install --resolution=lowest-direct .`
    "fastuuid>=0.14.0,<1.0",
    "httpx>=0.28.0,<1.0",
    "openai>=2.20.0,<3.0.0",
    "python-dotenv>=1.0.0,<2.0",
    "tiktoken>=0.8.0,<1.0",
    "importlib-metadata>=8.0.0,<9.0",
    "tokenizers>=0.21.0,<1.0",
    "click>=8.0.0,<9.0",
    "jinja2>=3.1.6,<4.0",
    "aiohttp>=3.10,<4.0",
    "pydantic>=2.10.0,<3.0.0",
    "jsonschema>=4.0.0,<5.0",
]

[project.urls]
Homepage = "https://litellm.ai"
Repository = "https://github.com/BerriAI/litellm"
Documentation = "https://docs.litellm.ai"

# Optional extras use compatible ranges (like the core SDK above) so downstream
# consumers can coexist with other packages and pick up security patches without
# forking. Reproducibility for our Docker/CI comes from `uv.lock` (images install
# via `uv sync --frozen`). A few deps stay exact-pinned: litellm's own
# sub-packages and the opentelemetry trio move in lockstep, and grpcio is
# supply-chain-pinned to a vetted, aged release.
[project.optional-dependencies]
proxy = [
    "gunicorn>=23.0.0,<24.0",
    "uvicorn>=0.33.0,<1.0",
    "granian>=2.7.4,<3.0",
    "uvloop>=0.21.0,<1.0; sys_platform != 'win32'",
    "fastapi>=0.136.3,<1.0",
    "starlette>=1.0.1,<2.0",
    "backoff>=2.2.1,<3.0",
    "pyyaml>=6.0.3,<7.0",
    "rq>=2.7.0,<3.0",
    "orjson>=3.11.6,<4.0",
    "apscheduler>=3.11.2,<4.0",
    "fastapi-sso>=0.19.0,<1.0",
    "PyJWT>=2.12.0,<3.0",
    "python-multipart>=0.0.27,<1.0",
    "cryptography>=46.0.7,<47.0",
    "pynacl>=1.6.2,<2.0",
    "websockets>=15.0.1,<16.0",
    "boto3>=1.43.1,<2.0",
    "azure-identity>=1.25.2,<2.0",
    "azure-storage-blob>=12.28.0,<13.0",
    "mcp>=1.26.0,<2.0",
    "litellm-proxy-extras==0.4.74",
    "litellm-enterprise==0.1.42",
    "RestrictedPython>=8.1,<9.0",
    "rich>=13.9.4,<14.0",
    "polars>=1.38.1,<2.0",
    "soundfile>=0.12.1,<1.0",
    "pyroscope-io>=0.8.16,<1.0; sys_platform != 'win32'",
    "pydantic-settings>=2.14.1,<3.0",
]
extra_proxy = [
    "prisma>=0.11.0,<1.0",
    "azure-identity>=1.25.2,<2.0",
    "azure-keyvault-secrets>=4.10.0,<5.0",
    # Not in PyPI proxy extra.
    "google-cloud-kms>=2.24.2,<3.0",
    "google-cloud-iam>=2.19.1,<3.0",
    # Not in PyPI proxy extra.
    "resend>=2.23.0,<3.0",
    "redisvl>=0.4.1,<1.0; python_version < '3.14'",
    "a2a-sdk>=0.3.24,<1.0",
]
utils = [
    # Not in Docker or PyPI proxy extra.
    "numpydoc>=1.8.0,<2.0",
]
caching = ["diskcache>=5.6.3,<6.0"]
semantic-router = [
    "semantic-router>=0.1.15,<1.0; python_version < '3.14'",
    "aurelio-sdk>=0.0.19,<1.0; python_version < '3.14'",
]
mlflow = ["mlflow>=3.11.1,<4.0"]
grpc = [
    # Newest non-yanked release older than the 30-day cutoff.
    "grpcio==1.78.0",
]
stt-nvidia-riva = [
    # NVIDIA Riva STT provider (gRPC). These are imported lazily inside the
    # provider handler so litellm core remains usable without them.
    "nvidia-riva-client>=2.15.0",
    "soundfile>=0.12.1",
    "audioread>=3.0.1",
    "numpy>=1.26.0",
]
google = ["google-cloud-aiplatform>=1.133.0,<2.0"]
proxy-runtime = [
    # Historically bundled in the proxy Docker images via requirements.txt.
    # Keep these in a dedicated extra so uv-based images preserve the same
    # feature surface without forcing the base SDK install to grow.
    "google-cloud-aiplatform>=1.133.0,<2.0",
    "google-genai>=1.37.0,<2.0",
    "anthropic[vertex]>=0.84.0,<1.0",
    "grpcio==1.78.0",
    "prometheus-client>=0.20.0,<1.0",
    "langfuse>=2.59.7,<3.0",
    "opentelemetry-api==1.28.0",
    "opentelemetry-sdk==1.28.0",
    "opentelemetry-exporter-otlp==1.28.0",
    "opentelemetry-instrumentation-fastapi==0.49b0",
    "ddtrace>=2.19.0,<3.0",
    "sentry-sdk>=2.21.0,<3.0",
    "mangum>=0.17.0,<1.0",
    "azure-ai-contentsafety>=1.0.0,<2.0",
    "azure-storage-file-datalake>=12.20.0,<13.0",
    "pypdf>=6.10.2,<7.0; python_version < '3.14'",
    "llm-sandbox>=0.3.39,<1.0",
    "detect-secrets>=1.5.0,<2.0",
]

[project.scripts]
litellm = "litellm:run_server"
litellm-proxy = "litellm.proxy.client.cli:cli"

[dependency-groups]
dev = [
    "diff-cover==9.7.2",
    "flake8==7.3.0",
    "black==26.3.1",
    "mypy==1.19.0",
    "pytest==9.0.3",
    "pytest-mock==3.15.1",
    "pytest-asyncio==1.3.0",
    "pytest-postgresql==7.0.2",
    # pytest-postgresql imports psycopg v3 during pytest startup. Keep the base
    # package and the binary wheel in the default dev environment so local
    # pytest works without requiring a system libpq install.
    "psycopg==3.3.3",
    "psycopg-binary==3.3.3",
    "pytest-xdist==3.8.0",
    "requests-mock==1.12.1",
    "responses==0.26.0",
    "respx==0.22.0",
    "ruff==0.15.3",
    "types-requests==2.32.4.20260107",
    "types-setuptools==75.8.0.20250225",
    "types-redis==4.6.0.20241004",
    "types-PyYAML==6.0.12.20250915",
    "opentelemetry-api==1.28.0",
    "opentelemetry-sdk==1.28.0",
    "opentelemetry-exporter-otlp==1.28.0",
    "opentelemetry-instrumentation-fastapi==0.49b0",
    "langfuse==2.59.7",
    "fastapi-offline==1.7.6",
    "fakeredis==2.34.1",
    "pytest-rerunfailures==15.1",
    "pytest-cov==5.0.0",
    "parameterized==0.9.0",
    "openapi-core==0.22.0; python_version < '3.14'",
    "pytest-timeout==2.4.0",
    "vcrpy==8.1.1",
    "pytest-recording==0.13.4",
]
proxy-dev = [
    "prisma==0.11.0",
    "hypercorn==0.17.3",
    "prometheus-client==0.20.0",
    "opentelemetry-api==1.28.0",
    "opentelemetry-sdk==1.28.0",
    "opentelemetry-exporter-otlp==1.28.0",
    "opentelemetry-instrumentation-fastapi==0.49b0",
    "azure-identity==1.25.2",
    "a2a-sdk==0.3.24",
]
ci = [
    # These are lazily imported at call sites; keep them out of core deps to
    # avoid bloating the base SDK install (google-generativeai pulls grpcio +
    # protobuf, Pillow is a compiled C extension).
    "tenacity==8.5.0",
    "google-generativeai==0.8.6",
    "Pillow==12.2.0",
    # Azure batch E2E tests still import psycopg2 directly.
    "psycopg2-binary==2.9.11",
    "pytest-codspeed==4.3.0",
    "pytest-retry==1.7.0",
    "pyarrow==23.0.1",
    "langchain==1.2.10",
    "lunary==1.4.36; python_version == '3.10'",
    "lunary==1.4.37; python_version >= '3.11'",
    "logfire==4.6.0",
    "traceloop-sdk==0.33.12",
    "detect-secrets==1.5.0",
    "PyGithub==2.8.1",
    "aiodynamo==24.7",
    "argon2-cffi==25.1.0",
    "assemblyai==0.52.4",
    "jsonlines==4.0.0",
    "anthropic==0.84.0",
    "blockbuster==1.5.26",
    "beautifulsoup4==4.14.3",
    "pylint==4.0.5",
    "pyright==1.1.408",
    "langchain-mcp-adapters==0.2.1",
    "langchain-openai==1.1.14",
    "langgraph==1.0.10",
    # langgraph-prebuilt 1.0.9 imports ExecutionInfo/ServerInfo from
    # langgraph.runtime, which is not exported until langgraph 1.1.0.
    # Pin to 1.0.8 so it pairs correctly with langgraph==1.0.10.
    "langgraph-prebuilt==1.0.8",
    "claude-agent-sdk==0.1.44",
]
healthcheck = [
    "httpx==0.28.1",
    "pyyaml==6.0.3",
]

[build-system]
requires = ["uv_build==0.11.8"]
build-backend = "uv_build"

[tool.uv]
default-groups = ["dev"]
required-version = ">=0.10.9"
exclude-newer = "3 days"

[tool.uv.sources]
litellm-proxy-extras = { workspace = true }
litellm-enterprise = { workspace = true }

[tool.uv.workspace]
members = ["enterprise", "litellm-proxy-extras"]

[tool.uv.build-backend]
module-root = ""
source-exclude = [
    "litellm/proxy/enterprise",
    "**/__pycache__",
    "**/__pycache__/**",
    "**/.mypy_cache",
    "**/.mypy_cache/**",
    "**/.pytest_cache",
    "**/.pytest_cache/**",
    "**/.ruff_cache",
    "**/.ruff_cache/**",
]

[tool.isort]
profile = "black"

[tool.commitizen]
version = "1.89.0"
version_files = [
    "pyproject.toml:^version",
]

[tool.mypy]
plugins = "pydantic.mypy"

[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "session"
markers = [
    "asyncio: mark test as an asyncio test",
    "limit_leaks: mark test with memory limit for leak detection (e.g., '40 MB')",
    "no_parallel: mark test to run sequentially (not in parallel) - typically for memory measurement tests",
]
filterwarnings = [
    # Suppress Pydantic serializer warnings from mock server responses (non-critical for memory tests)
    # These occur because the mock server returns a simplified response format
    "ignore:Pydantic serializer warnings:UserWarning",
    "ignore::UserWarning:pydantic.main",
    # Suppress pytest-asyncio event loop deprecation warning (handled automatically by pytest-asyncio)
    "ignore::DeprecationWarning:pytest_asyncio.plugin",
]

[tool.mutmut]
# Mutation-testing scope. Driven by the manually-triggered workflow at
# .github/workflows/mutation-test.yml. mutmut is not part of the project's
# default install; it is pulled in via `uv run --with mutmut==<version>` in CI.
# `also_copy = ["litellm/"]` is required because mutmut runs in a `mutants/`
# sandbox and the test conftest imports from across the litellm package.
paths_to_mutate = [
    "litellm/proxy/management_endpoints/",
]
tests_dir = [
    "tests/test_litellm/proxy/management_endpoints/",
    "tests/proxy_behavior/management/",
]
also_copy = [
    "litellm/",
]
# Run the test suite once before mutation to gather line coverage, then skip
# mutating lines no test exercises. Those mutants would survive regardless
# (no test hits the line to kill them), so generating them wastes hours of CI.
# The score now reads as "mutation score over covered code" — pair with a
# line-coverage number when reporting.
mutate_only_covered_lines = true
# Disable rerun/parallel plugins for mutation runs:
# - pytest-retry triggers an `INTERNALERROR: no option named 'filtered_exceptions'`
#   when invoked via mutmut's in-process `pytest.main()` call.
# - rerunning a "failed" test on a mutant would mask which mutants are killed
#   vs. survive, so reruns are wrong for mutation testing regardless.
# - xdist is unnecessary inside mutmut (mutmut handles its own parallelism).
pytest_add_cli_args = [
    "-p", "no:retry",
    "-p", "no:rerunfailures",
    "-p", "no:xdist",
]

[tool.coverage.run]
source = ["litellm"]
relative_files = true