mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-18 03:31:23 +00:00
379 lines
12 KiB
Python
379 lines
12 KiB
Python
#### What this tests ####
|
|
# This tests the timeout decorator
|
|
|
|
import os
|
|
import sys
|
|
import traceback
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
import time
|
|
from litellm._uuid import uuid
|
|
|
|
import httpx
|
|
import openai
|
|
import pytest
|
|
|
|
import litellm
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model, provider",
|
|
[
|
|
("gpt-3.5-turbo", "openai"),
|
|
("azure/gpt-4.1-mini", "azure"),
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_httpx_timeout(model, provider, sync_mode):
|
|
"""
|
|
Test if setting httpx.timeout works for completion calls
|
|
"""
|
|
timeout_val = httpx.Timeout(10.0, connect=60.0)
|
|
|
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
|
|
|
if sync_mode:
|
|
response = litellm.completion(
|
|
model=model, messages=messages, timeout=timeout_val
|
|
)
|
|
else:
|
|
response = await litellm.acompletion(
|
|
model=model, messages=messages, timeout=timeout_val
|
|
)
|
|
|
|
print(f"response: {response}")
|
|
|
|
|
|
def test_timeout():
|
|
# this Will Raise a timeout
|
|
litellm.set_verbose = False
|
|
try:
|
|
response = litellm.completion(
|
|
model="gpt-3.5-turbo",
|
|
timeout=0.01,
|
|
messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
|
|
)
|
|
except openai.APITimeoutError as e:
|
|
print(
|
|
"Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
|
|
)
|
|
print(type(e))
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(
|
|
f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
|
|
)
|
|
|
|
|
|
# test_timeout()
|
|
|
|
|
|
def test_bedrock_timeout():
|
|
# this Will Raise a timeout
|
|
litellm.set_verbose = True
|
|
try:
|
|
response = litellm.completion(
|
|
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
|
timeout=0.01,
|
|
messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
|
|
)
|
|
pytest.fail("Did not raise error `openai.APITimeoutError`")
|
|
except openai.APITimeoutError as e:
|
|
print(
|
|
"Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
|
|
)
|
|
print(type(e))
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(
|
|
f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
|
|
)
|
|
|
|
|
|
def test_hanging_request_azure():
|
|
litellm.set_verbose = True
|
|
import asyncio
|
|
|
|
try:
|
|
router = litellm.Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "azure-gpt",
|
|
"litellm_params": {
|
|
"model": "azure/gpt-4o-new-test",
|
|
"api_base": os.environ["AZURE_API_BASE"],
|
|
"api_key": os.environ["AZURE_API_KEY"],
|
|
},
|
|
},
|
|
{
|
|
"model_name": "openai-gpt",
|
|
"litellm_params": {"model": "gpt-3.5-turbo"},
|
|
},
|
|
],
|
|
num_retries=0,
|
|
)
|
|
|
|
encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
|
|
|
|
async def _test():
|
|
response = await router.acompletion(
|
|
model="azure-gpt",
|
|
messages=[
|
|
{"role": "user", "content": f"what color is red {uuid.uuid4()}"}
|
|
],
|
|
logit_bias={encoded: 100},
|
|
timeout=0.01,
|
|
)
|
|
print(response)
|
|
return response
|
|
|
|
response = asyncio.run(_test())
|
|
|
|
if response.choices[0].message.content is not None:
|
|
pytest.fail("Got a response, expected a timeout")
|
|
except openai.APITimeoutError as e:
|
|
print(
|
|
"Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
|
|
)
|
|
print(type(e))
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(
|
|
f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
|
|
)
|
|
|
|
|
|
# test_hanging_request_azure()
|
|
|
|
|
|
def test_hanging_request_openai():
|
|
litellm.set_verbose = True
|
|
try:
|
|
router = litellm.Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "azure-gpt",
|
|
"litellm_params": {
|
|
"model": "azure/gpt-4.1-mini",
|
|
"api_base": os.environ["AZURE_API_BASE"],
|
|
"api_key": os.environ["AZURE_API_KEY"],
|
|
},
|
|
},
|
|
{
|
|
"model_name": "openai-gpt",
|
|
"litellm_params": {"model": "gpt-3.5-turbo"},
|
|
},
|
|
],
|
|
num_retries=0,
|
|
)
|
|
|
|
encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
|
|
response = router.completion(
|
|
model="openai-gpt",
|
|
messages=[{"role": "user", "content": "what color is red"}],
|
|
logit_bias={encoded: 100},
|
|
timeout=0.01,
|
|
)
|
|
print(response)
|
|
|
|
if response.choices[0].message.content is not None:
|
|
pytest.fail("Got a response, expected a timeout")
|
|
except openai.APITimeoutError as e:
|
|
print(
|
|
"Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
|
|
)
|
|
print(type(e))
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(
|
|
f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
|
|
)
|
|
|
|
|
|
# test_hanging_request_openai()
|
|
|
|
# test_timeout()
|
|
|
|
|
|
def test_timeout_streaming():
|
|
# this Will Raise a timeout
|
|
litellm.set_verbose = False
|
|
try:
|
|
response = litellm.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
|
|
timeout=0.0001,
|
|
stream=True,
|
|
)
|
|
for chunk in response:
|
|
print(chunk)
|
|
except openai.APITimeoutError as e:
|
|
print(
|
|
"Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
|
|
)
|
|
print(type(e))
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(
|
|
f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
|
|
)
|
|
|
|
|
|
# test_timeout_streaming()
|
|
|
|
|
|
@pytest.mark.skip(reason="local test")
|
|
def test_timeout_ollama():
|
|
# this Will Raise a timeout
|
|
import litellm
|
|
|
|
litellm.set_verbose = True
|
|
try:
|
|
litellm.request_timeout = 0.1
|
|
litellm.set_verbose = True
|
|
response = litellm.completion(
|
|
model="ollama/phi",
|
|
messages=[{"role": "user", "content": "hello, what llm are u"}],
|
|
max_tokens=1,
|
|
api_base="https://test-ollama-endpoint.onrender.com",
|
|
)
|
|
# Add any assertions here to check the response
|
|
litellm.request_timeout = None
|
|
print(response)
|
|
except openai.APITimeoutError as e:
|
|
print("got a timeout error! Passed ! ")
|
|
pass
|
|
|
|
|
|
# test_timeout_ollama()
|
|
|
|
|
|
@pytest.mark.parametrize("streaming", [True, False])
|
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_anthropic_timeout(streaming, sync_mode):
|
|
litellm.set_verbose = False
|
|
|
|
try:
|
|
if sync_mode:
|
|
response = litellm.completion(
|
|
model="claude-3-5-sonnet-20240620",
|
|
timeout=0.01,
|
|
messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
|
|
stream=streaming,
|
|
)
|
|
if isinstance(response, litellm.CustomStreamWrapper):
|
|
for chunk in response:
|
|
pass
|
|
else:
|
|
response = await litellm.acompletion(
|
|
model="claude-3-5-sonnet-20240620",
|
|
timeout=0.01,
|
|
messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
|
|
stream=streaming,
|
|
)
|
|
if isinstance(response, litellm.CustomStreamWrapper):
|
|
async for chunk in response:
|
|
pass
|
|
pytest.fail("Did not raise error `openai.APITimeoutError`")
|
|
except openai.APITimeoutError as e:
|
|
print(
|
|
"Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
|
|
)
|
|
print(type(e))
|
|
pass
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_timeout_respects_total_time_not_per_retry():
|
|
"""
|
|
Test that timeout applies to the TOTAL operation time, not per-retry.
|
|
|
|
This test ensures that when a user sets timeout=2, the entire operation
|
|
(including all retries) times out at ~2 seconds, not at 2s * num_retries.
|
|
|
|
This is a regression test for the issue where timeout was being applied
|
|
per-retry attempt, causing the total time to be much longer than expected.
|
|
"""
|
|
litellm.set_verbose = False
|
|
|
|
timeout_value = 2.0
|
|
# Allow for some overhead (network, processing, etc.)
|
|
# but ensure we don't wait for multiple retries
|
|
max_allowed_time = timeout_value + 1.0 # 3 seconds max
|
|
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# This should timeout because we're asking for a long response
|
|
# with a very short timeout
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
timeout=timeout_value,
|
|
messages=[{"role": "user", "content": "Write a very long detailed essay about the history of computing, at least 5000 words."}],
|
|
)
|
|
pytest.fail("Expected timeout error but got a response")
|
|
except (openai.APITimeoutError, litellm.exceptions.Timeout) as e:
|
|
elapsed_time = time.time() - start_time
|
|
|
|
print(f"Timeout occurred after {elapsed_time:.2f} seconds")
|
|
print(f"Expected timeout: {timeout_value} seconds")
|
|
print(f"Max allowed time: {max_allowed_time} seconds")
|
|
|
|
# Verify that the timeout happened within the expected time window
|
|
# It should be close to timeout_value, not timeout_value * num_retries
|
|
assert elapsed_time < max_allowed_time, (
|
|
f"Timeout took too long! Expected ~{timeout_value}s, "
|
|
f"got {elapsed_time:.2f}s. This suggests timeout is being "
|
|
f"applied per-retry instead of to the total operation."
|
|
)
|
|
|
|
# Also verify it's not TOO fast (sanity check)
|
|
assert elapsed_time >= timeout_value * 0.5, (
|
|
f"Timeout happened too quickly: {elapsed_time:.2f}s. "
|
|
f"Expected at least {timeout_value * 0.5}s"
|
|
)
|
|
|
|
print("✓ Timeout correctly applied to total operation time, not per-retry")
|
|
except Exception as e:
|
|
pytest.fail(
|
|
f"Expected timeout error but got different error: {type(e).__name__}: {e}"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_timeout_with_retries_disabled():
|
|
"""
|
|
Test that timeout works correctly when retries are explicitly disabled.
|
|
This should timeout even faster since there are no retry attempts.
|
|
"""
|
|
litellm.set_verbose = False
|
|
|
|
timeout_value = 2.0
|
|
max_allowed_time = timeout_value + 0.5 # Even tighter bound with no retries
|
|
|
|
start_time = time.time()
|
|
|
|
try:
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
timeout=timeout_value,
|
|
max_retries=0, # Disable retries
|
|
messages=[{"role": "user", "content": "Write a very long detailed essay about the history of computing, at least 5000 words."}],
|
|
)
|
|
pytest.fail("Expected timeout error but got a response")
|
|
except (openai.APITimeoutError, litellm.exceptions.Timeout) as e:
|
|
elapsed_time = time.time() - start_time
|
|
|
|
print(f"Timeout with no retries occurred after {elapsed_time:.2f} seconds")
|
|
|
|
assert elapsed_time < max_allowed_time, (
|
|
f"Timeout took too long even with retries disabled! "
|
|
f"Expected ~{timeout_value}s, got {elapsed_time:.2f}s"
|
|
)
|
|
|
|
print("✓ Timeout works correctly with retries disabled")
|