mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-28 13:11:20 +00:00
278c9babc6
* fix(test): add missing mocks for test_streamable_http_mcp_handler_mock
The test was missing mocks for extract_mcp_auth_context and set_auth_context,
causing the handler to fail silently in the except block instead of reaching
session_manager.handle_request. This mirrors the fix already applied to the
sibling test_sse_mcp_handler_mock.
Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>
* fix(ci): route OpenAI models through chat completions in pass-through tests
The test_anthropic_messages_openai_model_streaming_cost_injection test fails
because the OpenAI Responses API returns 400 for requests routed through the
Anthropic Messages endpoint. Setting LITELLM_USE_CHAT_COMPLETIONS_URL_FOR_ANTHROPIC_MESSAGES=true
routes OpenAI models through the stable chat completions path instead.
Cost injection still works since it happens at the proxy level.
Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>
* fix(ci): fix assemblyai custom auth and router wildcard test flakiness
1. custom_auth_basic.py: Add user_role='proxy_admin' so the custom auth
user can access management endpoints like /key/generate. The test
test_assemblyai_transcribe_with_non_admin_key was hidden behind an
earlier -x failure and was never reached before.
2. test_router_utils.py: Add flaky(retries=3) and increase sleep from 1s
to 2s for test_router_get_model_group_usage_wildcard_routes. The async
callback needs time to write usage to cache, and 1s is insufficient on
slower CI hardware.
Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>
* ci: retrigger CI pipeline
Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>
* fix(mypy): use LitellmUserRoles enum instead of raw string in custom_auth_basic
Fixes mypy error: Argument 'user_role' has incompatible type 'str'; expected 'LitellmUserRoles | None'
Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>
* fix: don't close HTTP/SDK clients on LLMClientCache eviction (#22926)
* fix: don't close HTTP/SDK clients on LLMClientCache eviction
Removing the _remove_key override that eagerly called aclose()/close()
on evicted clients. Evicted clients may still be held by in-flight
streaming requests; closing them causes:
RuntimeError: Cannot send a request, as the client has been closed.
This is a regression from commit fb72979432. Clients that are no longer
referenced will be garbage-collected naturally. Explicit shutdown cleanup
happens via close_litellm_async_clients().
Fixes production crashes after the 1-hour cache TTL expires.
* test: update LLMClientCache unit tests for no-close-on-eviction behavior
Flip the assertions: evicted clients must NOT be closed. Replace
test_remove_key_closes_async_client → test_remove_key_does_not_close_async_client
and equivalents for sync/eviction paths.
Add test_remove_key_removes_plain_values for non-client cache entries.
Remove test_background_tasks_cleaned_up_after_completion (no more _background_tasks).
Remove test_remove_key_no_event_loop variant that depended on old behavior.
* test: add e2e tests for OpenAI SDK client surviving cache eviction
Add two new e2e tests using real AsyncOpenAI clients:
- test_evicted_openai_sdk_client_stays_usable: verifies size-based eviction
doesn't close the client
- test_ttl_expired_openai_sdk_client_stays_usable: verifies TTL expiry
eviction doesn't close the client
Both tests sleep after eviction so any create_task()-based close would
have time to run, making the regression detectable.
Also expand the module docstring to explain why the sleep is required.
* docs(AGENTS.md): add rule — never close HTTP/SDK clients on cache eviction
* docs(CLAUDE.md): add HTTP client cache safety guideline
* [Fix] Install bsdmainutils for column command in security scans
The security_scans.sh script uses `column` to format vulnerability
output, but the package wasn't installed in the CI environment.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* fix: handle string callback values in prometheus multiproc setup
When callbacks are configured as a plain string (e.g., `callbacks: "my_callback"`)
instead of a list, the proxy crashes on startup with:
TypeError: can only concatenate str (not "list") to str
Normalize each callback setting to a list before concatenating.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* bump: version 1.82.2 → 1.82.3
* fix(test): update test_startup_fails_when_db_setup_fails for opt-in enforcement
The --enforce_prisma_migration_check flag is now required to trigger
sys.exit(1) on DB migration failure, after #23675 flipped the default
behavior to warn-and-continue.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* fix(cost_calculator): use model name for per-request custom pricing when router_model_id has no pricing
When custom pricing is passed as per-request kwargs (input_cost_per_token/output_cost_per_token),
completion() registers pricing under the model name, but _select_model_name_for_cost_calc was
selecting the router deployment hash (which has no pricing data), causing response_cost to be 0.0.
Now checks whether the router_model_id entry actually has pricing before preferring it.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
Co-authored-by: Ishaan Jaff <ishaan-jaff@users.noreply.github.com>
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
210 lines
6.2 KiB
TOML
210 lines
6.2 KiB
TOML
[tool.poetry]
|
|
name = "litellm"
|
|
version = "1.82.3"
|
|
description = "Library to easily interface with LLM API providers"
|
|
authors = ["BerriAI"]
|
|
license = "MIT"
|
|
readme = "README.md"
|
|
packages = [
|
|
{ include = "litellm" },
|
|
{ include = "litellm/py.typed"},
|
|
]
|
|
|
|
[tool.poetry.urls]
|
|
homepage = "https://litellm.ai"
|
|
Homepage = "https://litellm.ai"
|
|
repository = "https://github.com/BerriAI/litellm"
|
|
Repository = "https://github.com/BerriAI/litellm"
|
|
documentation = "https://docs.litellm.ai"
|
|
Documentation = "https://docs.litellm.ai"
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.9,<4.0"
|
|
fastuuid = ">=0.13.0"
|
|
httpx = ">=0.23.0"
|
|
openai = ">=2.8.0"
|
|
python-dotenv = ">=0.2.0"
|
|
tiktoken = ">=0.7.0"
|
|
importlib-metadata = ">=6.8.0"
|
|
tokenizers = "*"
|
|
click = "*"
|
|
jinja2 = "^3.1.2"
|
|
aiohttp = ">=3.10"
|
|
pydantic = "^2.5.0"
|
|
jsonschema = ">=4.23.0,<5.0.0"
|
|
numpydoc = {version = "*", optional = true} # used in utils.py
|
|
|
|
uvicorn = {version = ">=0.32.1,<1.0.0", optional = true}
|
|
uvloop = {version = "^0.21.0", optional = true, markers="sys_platform != 'win32'"}
|
|
gunicorn = {version = "^23.0.0", optional = true}
|
|
fastapi = {version = ">=0.120.1", optional = true}
|
|
backoff = {version = "*", optional = true}
|
|
pyyaml = {version = "^6.0.1", optional = true}
|
|
rq = {version = "*", optional = true}
|
|
orjson = {version = "^3.9.7", optional = true}
|
|
apscheduler = {version = "^3.10.4", optional = true}
|
|
fastapi-sso = { version = "^0.16.0", optional = true }
|
|
PyJWT = { version = "^2.12.0", optional = true, python = ">=3.9" }
|
|
python-multipart = { version = ">=0.0.20", optional = true}
|
|
cryptography = {version = "*", optional = true}
|
|
prisma = {version = "^0.11.0", optional = true}
|
|
azure-identity = {version = "^1.15.0", optional = true, python = ">=3.9"}
|
|
azure-keyvault-secrets = {version = "^4.8.0", optional = true}
|
|
azure-storage-blob = {version="^12.25.1", optional=true}
|
|
google-cloud-kms = {version = "^2.21.3", optional = true}
|
|
google-cloud-iam = {version = "^2.19.1", optional = true}
|
|
google-cloud-aiplatform = {version = ">=1.38.0", optional = true}
|
|
resend = {version = ">=0.8.0", optional = true}
|
|
pynacl = {version = "^1.5.0", optional = true}
|
|
websockets = {version = "^15.0.1", optional = true}
|
|
boto3 = { version = "^1.40.76", optional = true }
|
|
redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
|
|
mcp = {version = ">=1.25.0,<2.0.0", optional = true, python = ">=3.10"}
|
|
a2a-sdk = {version = "^0.3.22", optional = true, python = ">=3.10"}
|
|
litellm-proxy-extras = {version = "^0.4.56", optional = true}
|
|
rich = {version = "^13.7.1", optional = true}
|
|
litellm-enterprise = {version = "^0.1.33", optional = true}
|
|
diskcache = {version = "^5.6.1", optional = true}
|
|
polars = {version = "^1.31.0", optional = true, python = ">=3.10"}
|
|
semantic-router = {version = ">=0.1.12", optional = true, python = ">=3.9,<3.14"}
|
|
mlflow = {version = ">3.1.4", optional = true, python = ">=3.10"}
|
|
soundfile = {version = "^0.12.1", optional = true}
|
|
pyroscope-io = {version = "^0.8", optional = true, markers = "sys_platform != 'win32'"}
|
|
# grpcio constraints:
|
|
# - 1.62.3+ required by grpcio-status
|
|
# - 1.68.0-1.68.1 has reconnect bug (https://github.com/grpc/grpc/issues/38290)
|
|
# - 1.75.0+ has Python 3.14 wheels and bug fix
|
|
grpcio = [
|
|
{version = ">=1.62.3,!=1.68.*,!=1.69.*,!=1.70.*,!=1.71.0,!=1.71.1,!=1.72.0,!=1.72.1,!=1.73.0", python = "<3.14", optional = true},
|
|
{version = ">=1.75.0", python = ">=3.14", optional = true},
|
|
]
|
|
|
|
[tool.poetry.extras]
|
|
proxy = [
|
|
"gunicorn",
|
|
"uvicorn",
|
|
"uvloop",
|
|
"fastapi",
|
|
"backoff",
|
|
"pyyaml",
|
|
"rq",
|
|
"orjson",
|
|
"apscheduler",
|
|
"fastapi-sso",
|
|
"PyJWT",
|
|
"python-multipart",
|
|
"cryptography",
|
|
"pynacl",
|
|
"websockets",
|
|
"boto3",
|
|
"azure-identity",
|
|
"azure-storage-blob",
|
|
"mcp",
|
|
"litellm-proxy-extras",
|
|
"litellm-enterprise",
|
|
"rich",
|
|
"polars",
|
|
"soundfile",
|
|
"pyroscope-io",
|
|
]
|
|
|
|
extra_proxy = [
|
|
"prisma",
|
|
"azure-identity",
|
|
"azure-keyvault-secrets",
|
|
"google-cloud-kms",
|
|
"google-cloud-iam",
|
|
"resend",
|
|
"redisvl",
|
|
"a2a-sdk"
|
|
]
|
|
|
|
utils = [
|
|
"numpydoc",
|
|
]
|
|
|
|
|
|
|
|
caching = ["diskcache"]
|
|
|
|
semantic-router = ["semantic-router"]
|
|
|
|
mlflow = ["mlflow"]
|
|
|
|
grpc = ["grpcio"]
|
|
|
|
google = ["google-cloud-aiplatform"]
|
|
|
|
[tool.isort]
|
|
profile = "black"
|
|
|
|
[tool.poetry.scripts]
|
|
litellm = 'litellm:run_server'
|
|
litellm-proxy = 'litellm.proxy.client.cli:cli'
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
diff-cover = "^9.0"
|
|
flake8 = "^6.1.0"
|
|
black = "^23.12.0"
|
|
mypy = "^1.0"
|
|
pytest = "^7.4.3"
|
|
pytest-mock = "^3.12.0"
|
|
pytest-asyncio = "^0.21.1"
|
|
pytest-postgresql = "^6.0.0"
|
|
pytest-xdist = "^3.5.0"
|
|
requests-mock = "^1.12.1"
|
|
responses = "^0.25.7"
|
|
respx = "^0.22.0"
|
|
ruff = "^0.2.1"
|
|
types-requests = "*"
|
|
types-setuptools = "*"
|
|
types-redis = "*"
|
|
types-PyYAML = "*"
|
|
opentelemetry-api = "^1.28.0"
|
|
opentelemetry-sdk = "^1.28.0"
|
|
opentelemetry-exporter-otlp = "^1.28.0"
|
|
langfuse = "^2.45.0"
|
|
fastapi-offline = "^1.7.3"
|
|
fakeredis = "^2.27.1"
|
|
pytest-rerunfailures = "^14.0"
|
|
|
|
[tool.poetry.group.proxy-dev.dependencies]
|
|
prisma = "0.11.0"
|
|
hypercorn = "^0.15.0"
|
|
prometheus-client = "0.20.0"
|
|
opentelemetry-api = "^1.28.0"
|
|
opentelemetry-sdk = "^1.28.0"
|
|
opentelemetry-exporter-otlp = "^1.28.0"
|
|
azure-identity = {version = "^1.15.0", python = ">=3.9"}
|
|
a2a-sdk = {version = "^0.3.22", python = ">=3.10"}
|
|
|
|
[build-system]
|
|
requires = ["poetry-core", "wheel"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.commitizen]
|
|
version = "1.82.3"
|
|
version_files = [
|
|
"pyproject.toml:^version"
|
|
]
|
|
|
|
[tool.mypy]
|
|
plugins = "pydantic.mypy"
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
asyncio_default_fixture_loop_scope = "session"
|
|
markers = [
|
|
"asyncio: mark test as an asyncio test",
|
|
"limit_leaks: mark test with memory limit for leak detection (e.g., '40 MB')",
|
|
"no_parallel: mark test to run sequentially (not in parallel) - typically for memory measurement tests",
|
|
]
|
|
filterwarnings = [
|
|
# Suppress Pydantic serializer warnings from mock server responses (non-critical for memory tests)
|
|
# These occur because the mock server returns a simplified response format
|
|
"ignore:Pydantic serializer warnings:UserWarning",
|
|
"ignore::UserWarning:pydantic.main",
|
|
# Suppress pytest-asyncio event loop deprecation warning (handled automatically by pytest-asyncio)
|
|
"ignore::DeprecationWarning:pytest_asyncio.plugin",
|
|
]
|