Files
litellm/tests/test_litellm/test_completion_timeout_resolution.py
T
ishaan-berri a588f76789 Litellm ishaan april15 2 (#25828)
* [Test] Add Azure async chat completion timeout test. WIP

* Capture TTFT for /v1/messages streaming responses

The pass-through streaming path for /v1/messages (Anthropic, Bedrock,
Vertex AI, Azure AI, Minimax) logged completion_start_time only after
the entire stream finished. async_success_handler then fell back to
end_time, making TTFT equal to total duration or null in the UI and
Prometheus.

Record the timestamp of the first chunk in async_sse_wrapper and
propagate it to model_call_details before the logging handler runs,
so gen_ai.response.time_to_first_token reflects the real first-chunk
latency.

Fixes #25598

* [Refactor] Implement timeout resolution logic in completion function

add fetch ``request_timeout`` from litellm_settings

* remove stale test case

* remove extra print statement

* default request timeout value in constants to 600s to match timeout defaults handled in the proxy

* fix request timeout if using default value from constants.py

* update code structure, test cases

* only override if the global timeout sets timeout to 6000s

* update code structure, move hard coded values to const and make the reslve function readable by moving fallback logic to a seperate function

* modify default timeout values, replacing hard coded ones with default values defined

---------

Co-authored-by: harish876 <harishgokul01@gmail.com>
Co-authored-by: Joaquin Hui Gomez <joaquinhuigomez@users.noreply.github.com>
2026-04-15 18:42:23 -07:00

146 lines
3.5 KiB
Python

"""Unit tests for litellm.litellm_core_utils.completion_timeout.CompletionTimeout."""
import os
import sys
import httpx
sys.path.insert(
0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
)
from litellm.litellm_core_utils.completion_timeout import CompletionTimeout
from litellm.utils import supports_httpx_timeout
def test_explicit_timeout_wins():
assert (
CompletionTimeout.resolve(
12.5,
{"timeout": 99.0, "request_timeout": 88.0},
"openai",
global_timeout=None,
supports_httpx_timeout=supports_httpx_timeout,
)
== 12.5
)
def test_kwargs_timeout_when_param_none():
assert (
CompletionTimeout.resolve(
None,
{"timeout": 21.0},
"azure_ai",
global_timeout=None,
supports_httpx_timeout=supports_httpx_timeout,
)
== 21.0
)
def test_request_timeout_alias_in_kwargs():
assert (
CompletionTimeout.resolve(
None,
{"request_timeout": 33.0},
"bedrock",
global_timeout=None,
supports_httpx_timeout=supports_httpx_timeout,
)
== 33.0
)
def test_global_timeout_from_litellm_settings():
assert (
CompletionTimeout.resolve(
None,
{},
"vertex_ai",
global_timeout=360.0,
supports_httpx_timeout=supports_httpx_timeout,
)
== 360.0
)
def test_global_timeout_package_default_coerced_to_600_for_completion():
"""Package default 6000s → 600s for completion-only path."""
assert (
CompletionTimeout.resolve(
None,
{},
"openai",
global_timeout=6000.0,
supports_httpx_timeout=supports_httpx_timeout,
)
== 600.0
)
def test_explicit_request_timeout_6000_preserved():
"""Explicit deployment/request timeout must not be truncated by the package sentinel."""
assert (
CompletionTimeout.resolve(
None,
{"request_timeout": 6000.0},
"openai",
global_timeout=None,
supports_httpx_timeout=supports_httpx_timeout,
)
== 6000.0
)
def test_explicit_model_timeout_6000_preserved():
assert (
CompletionTimeout.resolve(
6000.0,
{"timeout": 1.0, "request_timeout": 2.0},
"openai",
global_timeout=None,
supports_httpx_timeout=supports_httpx_timeout,
)
== 6000.0
)
def test_fallback_600_when_no_global_timeout():
assert (
CompletionTimeout.resolve(
None,
{},
"azure_ai",
global_timeout=None,
supports_httpx_timeout=supports_httpx_timeout,
)
== 600.0
)
def test_httpx_timeout_coerced_for_provider_without_httpx_timeout_support():
t = httpx.Timeout(50.0, connect=2.0)
out = CompletionTimeout.resolve(
t,
{},
"azure_ai",
global_timeout=None,
supports_httpx_timeout=supports_httpx_timeout,
)
assert out == 50.0
assert not isinstance(out, httpx.Timeout)
def test_httpx_timeout_preserved_for_openai():
t = httpx.Timeout(40.0, connect=5.0)
out = CompletionTimeout.resolve(
t,
{},
"openai",
global_timeout=None,
supports_httpx_timeout=supports_httpx_timeout,
)
assert out is t
assert isinstance(out, httpx.Timeout)