mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 22:48:35 +00:00
a588f76789
* [Test] Add Azure async chat completion timeout test. WIP * Capture TTFT for /v1/messages streaming responses The pass-through streaming path for /v1/messages (Anthropic, Bedrock, Vertex AI, Azure AI, Minimax) logged completion_start_time only after the entire stream finished. async_success_handler then fell back to end_time, making TTFT equal to total duration or null in the UI and Prometheus. Record the timestamp of the first chunk in async_sse_wrapper and propagate it to model_call_details before the logging handler runs, so gen_ai.response.time_to_first_token reflects the real first-chunk latency. Fixes #25598 * [Refactor] Implement timeout resolution logic in completion function add fetch ``request_timeout`` from litellm_settings * remove stale test case * remove extra print statement * default request timeout value in constants to 600s to match timeout defaults handled in the proxy * fix request timeout if using default value from constants.py * update code structure, test cases * only override if the global timeout sets timeout to 6000s * update code structure, move hard coded values to const and make the reslve function readable by moving fallback logic to a seperate function * modify default timeout values, replacing hard coded ones with default values defined --------- Co-authored-by: harish876 <harishgokul01@gmail.com> Co-authored-by: Joaquin Hui Gomez <joaquinhuigomez@users.noreply.github.com>
146 lines
3.5 KiB
Python
146 lines
3.5 KiB
Python
"""Unit tests for litellm.litellm_core_utils.completion_timeout.CompletionTimeout."""
|
|
|
|
import os
|
|
import sys
|
|
|
|
import httpx
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
|
|
)
|
|
|
|
from litellm.litellm_core_utils.completion_timeout import CompletionTimeout
|
|
from litellm.utils import supports_httpx_timeout
|
|
|
|
|
|
def test_explicit_timeout_wins():
|
|
assert (
|
|
CompletionTimeout.resolve(
|
|
12.5,
|
|
{"timeout": 99.0, "request_timeout": 88.0},
|
|
"openai",
|
|
global_timeout=None,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
== 12.5
|
|
)
|
|
|
|
|
|
def test_kwargs_timeout_when_param_none():
|
|
assert (
|
|
CompletionTimeout.resolve(
|
|
None,
|
|
{"timeout": 21.0},
|
|
"azure_ai",
|
|
global_timeout=None,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
== 21.0
|
|
)
|
|
|
|
|
|
def test_request_timeout_alias_in_kwargs():
|
|
assert (
|
|
CompletionTimeout.resolve(
|
|
None,
|
|
{"request_timeout": 33.0},
|
|
"bedrock",
|
|
global_timeout=None,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
== 33.0
|
|
)
|
|
|
|
|
|
def test_global_timeout_from_litellm_settings():
|
|
assert (
|
|
CompletionTimeout.resolve(
|
|
None,
|
|
{},
|
|
"vertex_ai",
|
|
global_timeout=360.0,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
== 360.0
|
|
)
|
|
|
|
|
|
def test_global_timeout_package_default_coerced_to_600_for_completion():
|
|
"""Package default 6000s → 600s for completion-only path."""
|
|
assert (
|
|
CompletionTimeout.resolve(
|
|
None,
|
|
{},
|
|
"openai",
|
|
global_timeout=6000.0,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
== 600.0
|
|
)
|
|
|
|
|
|
def test_explicit_request_timeout_6000_preserved():
|
|
"""Explicit deployment/request timeout must not be truncated by the package sentinel."""
|
|
assert (
|
|
CompletionTimeout.resolve(
|
|
None,
|
|
{"request_timeout": 6000.0},
|
|
"openai",
|
|
global_timeout=None,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
== 6000.0
|
|
)
|
|
|
|
|
|
def test_explicit_model_timeout_6000_preserved():
|
|
assert (
|
|
CompletionTimeout.resolve(
|
|
6000.0,
|
|
{"timeout": 1.0, "request_timeout": 2.0},
|
|
"openai",
|
|
global_timeout=None,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
== 6000.0
|
|
)
|
|
|
|
|
|
def test_fallback_600_when_no_global_timeout():
|
|
assert (
|
|
CompletionTimeout.resolve(
|
|
None,
|
|
{},
|
|
"azure_ai",
|
|
global_timeout=None,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
== 600.0
|
|
)
|
|
|
|
|
|
def test_httpx_timeout_coerced_for_provider_without_httpx_timeout_support():
|
|
t = httpx.Timeout(50.0, connect=2.0)
|
|
out = CompletionTimeout.resolve(
|
|
t,
|
|
{},
|
|
"azure_ai",
|
|
global_timeout=None,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
assert out == 50.0
|
|
assert not isinstance(out, httpx.Timeout)
|
|
|
|
|
|
def test_httpx_timeout_preserved_for_openai():
|
|
t = httpx.Timeout(40.0, connect=5.0)
|
|
out = CompletionTimeout.resolve(
|
|
t,
|
|
{},
|
|
"openai",
|
|
global_timeout=None,
|
|
supports_httpx_timeout=supports_httpx_timeout,
|
|
)
|
|
assert out is t
|
|
assert isinstance(out, httpx.Timeout)
|