mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 14:48:44 +00:00
12d29a38a7
* test(proxy/proxy_server): pin forwarding routes (PR2) (#28887) * test(proxy): pin proxy_server.py forwarding-route behavior PR2 of the proxy_server.py behavior-pinning project: fills the 12 forwarding-route test files added by the harness PR with happy + error pins for all 52 LLM-facing routes (models, chat/completions, completions, embeddings, moderations, audio, assistants, threads, utils, model-info, model-metrics, queue). Every happy-path test asserts the full response dict via normalize() so the gate enforces real shape pinning rather than status codes. * test(proxy): drop task-plumbing comments from PR2 test files * test(proxy): tighten PR2 error-path status-code pins Apply the same review feedback Greptile gave on PR1 (#28856) and PR3 (#28850) to PR2's forwarding-route tests: - Replace permissive `>= 400` / `in (X, Y)` status assertions with the exact 500/405 the handler actually returns, so a regression that silently shifts the code now fails the pin. - Add a body-presence check alongside each tightened status assertion to satisfy _pin_check.py's no-status-only rule. --------- Co-authored-by: Claude <noreply@anthropic.com> * test(proxy): pin proxy_server.py non-route surface behavior (PR1) (#28856) * test(proxy): pin proxy_server.py non-route surface behavior (PR1) Fills the 7 PR1 placeholder files under tests/test_litellm/proxy/proxy_server/ with behavior pins for the non-route surface of proxy_server.py: lifecycle/init/shutdown, ProxyConfig class methods, DB-overlay config scrubbers, spend counters, background-health helpers, OpenAPI customization, exception handlers, and streaming-generator helpers. 233 tests cover 101 pin-list symbols (1+ happy + 1+ error each). New-tests-only coverage on litellm/proxy/proxy_server.py: 32.80% line / 20.91% branch (PR1 gate: 25% line / 18% branch). Full directory runs in ~22s with -n 4. Plan: https://www.notion.so/Plan-Pin-proxy_server-py-behavior-2026-05-25-36c43b8acdab81ee845fd5365128a2fc * test(proxy): address Greptile review comments on test_lifecycle.py - test_initialize_signature_is_async_with_expected_params: hard-code expected_param_count so a signature change actually trips the gate (previously both sides of the comparison were len(sig.parameters)). - test_check_request_disconnection_invalid_when_connected_times_out: patch asyncio.sleep so the test no longer spins for ~1.2 s of real wall-clock; timeout lowered to 0.05 s. --------- Co-authored-by: Claude <noreply@anthropic.com> * test(proxy/proxy_server): pin control-plane routes (PR3) (#28850) * test(proxy/proxy_server): pin misc routes (PR3, partial) Adds happy + error tests for the misc control-plane routes: GET /, /routes, /adaptive_router/state, /get_logo_url, /get_image, /get_favicon. Also gitignores .pin_list.txt (used by the pin gate). * test(proxy/proxy_server): pin login/SSO routes (PR3, partial) Adds happy + error tests for the 5 login/SSO control-plane routes: GET /fallback/login, POST /login, POST /v2/login, POST /v3/login, POST /v3/login/exchange. Mocks authenticate_user and create_ui_token_object at their imported location. * test(proxy/proxy_server): pin onboarding routes (PR3, partial) Adds happy + error tests for the 2 onboarding control-plane routes: GET /onboarding/get_token, POST /onboarding/claim_token. Wires a MagicMock async context manager for prisma_client.db.tx() and signs the onboarding JWT with the patched master_key. * test(proxy/proxy_server): pin model_cost_map reload routes (PR3, partial) Adds happy + error tests for the 5 model-cost-map control-plane routes: POST /reload/model_cost_map, POST|DELETE|GET /schedule/model_cost_map_reload(/status), GET /model/cost_map/source. Attaches litellm_config to mock_prisma per-test (the table is not in the default _PRISMA_TABLES fixture). * test(proxy/proxy_server): pin anthropic_beta_headers reload routes (PR3, partial) Adds happy + error tests for the 4 anthropic-beta-headers control-plane routes: POST /reload/anthropic_beta_headers, POST|DELETE|GET /schedule/anthropic_beta_headers_reload(/status). Stubs db.litellm_config (not in default _PRISMA_TABLES) and monkeypatches reload_beta_headers_config so no network calls fire. * test(proxy/proxy_server): pin invitation routes (PR3, partial) Adds happy + error tests for the 4 invitation control-plane routes: POST /invitation/new, GET /invitation/info, POST /invitation/update, POST /invitation/delete. Patches _user_has_admin_privileges / _user_has_admin_view to avoid extensive get_user_object mocking. * test(proxy/proxy_server): pin config CRUD routes (PR3, partial) Adds happy + error tests for the 8 config-CRUD control-plane routes: POST /config/update, POST|GET /config/field/update|info, GET /config/list, POST /config/field/delete, POST /config/callback/delete, GET /get/config/callbacks, GET /config/yaml. Attaches litellm_config to mock_prisma per-test. * test(proxy/proxy_server): tighten pin assertions per review - test_routes_misc.py: `b"" in response.content` is trivially true; replace with `len(response.content) > 0` so an empty 405 body trips the gate. - test_routes_login_sso.py: `len(response.content) >= 0` is trivially true; tighten to `> 0`. - test_routes_anthropic_beta.py: replace brittle string-literal checks on the serialized JSON (`'"interval_hours": 12' in payload`) with `json.loads` + dict access so the assertion survives any serializer spacing. - test_routes_config.py: `assert status_code in (404, 500)` was too permissive; the handler re-raises HTTPException(404) verbatim, so pin 404 strictly. --------- Co-authored-by: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
92 lines
2.6 KiB
Python
92 lines
2.6 KiB
Python
"""Behavior pins for ``proxy_server.py`` queue routes.
|
|
|
|
Pins (PR2):
|
|
- POST /queue/chat/completions
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from litellm.proxy import proxy_server
|
|
|
|
from .conftest import normalize # type: ignore[import-not-found]
|
|
|
|
HAPPY_RESPONSE = {
|
|
"id": "chatcmpl-queue",
|
|
"object": "chat.completion",
|
|
"created": 0,
|
|
"model": "gpt-4",
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"finish_reason": "stop",
|
|
"message": {"role": "assistant", "content": "queued reply"},
|
|
}
|
|
],
|
|
"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2},
|
|
"priority": 0,
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def patched_queue(monkeypatch):
|
|
router = MagicMock()
|
|
router.schedule_acompletion = AsyncMock(return_value=dict(HAPPY_RESPONSE))
|
|
monkeypatch.setattr(proxy_server, "llm_router", router)
|
|
monkeypatch.setattr(
|
|
proxy_server,
|
|
"proxy_logging_obj",
|
|
MagicMock(post_call_failure_hook=AsyncMock()),
|
|
)
|
|
return router
|
|
|
|
|
|
@pytest.fixture
|
|
def queue_no_router(monkeypatch):
|
|
monkeypatch.setattr(proxy_server, "llm_router", None)
|
|
monkeypatch.setattr(
|
|
proxy_server,
|
|
"proxy_logging_obj",
|
|
MagicMock(post_call_failure_hook=AsyncMock()),
|
|
)
|
|
yield
|
|
|
|
|
|
def test_queue_chat_completions_happy(client, auth_as, patched_queue):
|
|
"""Pins ``POST /queue/chat/completions`` (happy)."""
|
|
payload = {
|
|
"model": "gpt-4",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
"priority": 0,
|
|
}
|
|
with auth_as():
|
|
response = client.post("/queue/chat/completions", json=payload)
|
|
assert response.status_code == 200
|
|
assert normalize(response.json()) == {
|
|
"id": "<VOLATILE>",
|
|
"object": "chat.completion",
|
|
"created": "<VOLATILE>",
|
|
"model": "gpt-4",
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"finish_reason": "stop",
|
|
"message": {"role": "assistant", "content": "queued reply"},
|
|
}
|
|
],
|
|
"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2},
|
|
"priority": 0,
|
|
}
|
|
|
|
|
|
def test_queue_chat_completions_no_router_error(client, auth_as, queue_no_router):
|
|
"""Pins ``POST /queue/chat/completions`` (error: no llm_router)."""
|
|
payload = {"model": "gpt-4", "messages": [{"role": "user", "content": "hi"}]}
|
|
with auth_as():
|
|
response = client.post("/queue/chat/completions", json=payload)
|
|
assert response.status_code == 500
|
|
assert len(response.content) > 0
|