litellm/tests/local_testing/test_timeout.py

#### What this tests ####
#    This tests the timeout decorator

import os
import sys
import traceback

sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path
import time
from litellm._uuid import uuid

import httpx
import openai
import pytest

import litellm


@pytest.mark.parametrize(
    "model, provider",
    [
        ("gpt-3.5-turbo", "openai"),
        ("azure/gpt-4.1-mini", "azure"),
    ],
)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_httpx_timeout(model, provider, sync_mode):
    """
    Test if setting httpx.timeout works for completion calls
    """
    timeout_val = httpx.Timeout(10.0, connect=60.0)

    messages = [{"role": "user", "content": "Hey, how's it going?"}]

    if sync_mode:
        response = litellm.completion(
            model=model, messages=messages, timeout=timeout_val
        )
    else:
        response = await litellm.acompletion(
            model=model, messages=messages, timeout=timeout_val
        )

    print(f"response: {response}")


def test_timeout():
    # this Will Raise a timeout
    litellm.set_verbose = False
    try:
        response = litellm.completion(
            model="gpt-3.5-turbo",
            timeout=0.01,
            messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
        )
    except openai.APITimeoutError as e:
        print(
            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
        )
        print(type(e))
        pass
    except Exception as e:
        pytest.fail(
            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
        )


# test_timeout()


def test_bedrock_timeout():
    # this Will Raise a timeout
    litellm.set_verbose = True
    try:
        response = litellm.completion(
            model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
            timeout=0.01,
            messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
        )
        pytest.fail("Did not raise error `openai.APITimeoutError`")
    except openai.APITimeoutError as e:
        print(
            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
        )
        print(type(e))
        pass
    except Exception as e:
        pytest.fail(
            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
        )


def test_hanging_request_azure():
    litellm.set_verbose = True
    import asyncio

    try:
        router = litellm.Router(
            model_list=[
                {
                    "model_name": "azure-gpt",
                    "litellm_params": {
                        "model": "azure/gpt-4o-new-test",
                        "api_base": os.environ["AZURE_API_BASE"],
                        "api_key": os.environ["AZURE_API_KEY"],
                    },
                },
                {
                    "model_name": "openai-gpt",
                    "litellm_params": {"model": "gpt-3.5-turbo"},
                },
            ],
            num_retries=0,
        )

        encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]

        async def _test():
            response = await router.acompletion(
                model="azure-gpt",
                messages=[
                    {"role": "user", "content": f"what color is red {uuid.uuid4()}"}
                ],
                logit_bias={encoded: 100},
                timeout=0.01,
            )
            print(response)
            return response

        response = asyncio.run(_test())

        if response.choices[0].message.content is not None:
            pytest.fail("Got a response, expected a timeout")
    except openai.APITimeoutError as e:
        print(
            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
        )
        print(type(e))
        pass
    except Exception as e:
        pytest.fail(
            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
        )


# test_hanging_request_azure()


def test_hanging_request_openai():
    litellm.set_verbose = True
    try:
        router = litellm.Router(
            model_list=[
                {
                    "model_name": "azure-gpt",
                    "litellm_params": {
                        "model": "azure/gpt-4.1-mini",
                        "api_base": os.environ["AZURE_API_BASE"],
                        "api_key": os.environ["AZURE_API_KEY"],
                    },
                },
                {
                    "model_name": "openai-gpt",
                    "litellm_params": {"model": "gpt-3.5-turbo"},
                },
            ],
            num_retries=0,
        )

        encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
        response = router.completion(
            model="openai-gpt",
            messages=[{"role": "user", "content": "what color is red"}],
            logit_bias={encoded: 100},
            timeout=0.01,
        )
        print(response)

        if response.choices[0].message.content is not None:
            pytest.fail("Got a response, expected a timeout")
    except openai.APITimeoutError as e:
        print(
            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
        )
        print(type(e))
        pass
    except Exception as e:
        pytest.fail(
            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
        )


# test_hanging_request_openai()

# test_timeout()


def test_timeout_streaming():
    # this Will Raise a timeout
    litellm.set_verbose = False
    try:
        response = litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
            timeout=0.0001,
            stream=True,
        )
        for chunk in response:
            print(chunk)
    except openai.APITimeoutError as e:
        print(
            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
        )
        print(type(e))
        pass
    except Exception as e:
        pytest.fail(
            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
        )


# test_timeout_streaming()


@pytest.mark.skip(reason="local test")
def test_timeout_ollama():
    # this Will Raise a timeout
    import litellm

    litellm.set_verbose = True
    try:
        litellm.request_timeout = 0.1
        litellm.set_verbose = True
        response = litellm.completion(
            model="ollama/phi",
            messages=[{"role": "user", "content": "hello, what llm are u"}],
            max_tokens=1,
            api_base="https://test-ollama-endpoint.onrender.com",
        )
        # Add any assertions here to check the response
        litellm.request_timeout = None
        print(response)
    except openai.APITimeoutError as e:
        print("got a timeout error! Passed ! ")
        pass


# test_timeout_ollama()


@pytest.mark.parametrize("streaming", [True, False])
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_anthropic_timeout(streaming, sync_mode):
    litellm.set_verbose = False

    try:
        if sync_mode:
            response = litellm.completion(
                model="claude-3-5-sonnet-20240620",
                timeout=0.01,
                messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
                stream=streaming,
            )
            if isinstance(response, litellm.CustomStreamWrapper):
                for chunk in response:
                    pass
        else:
            response = await litellm.acompletion(
                model="claude-3-5-sonnet-20240620",
                timeout=0.01,
                messages=[{"role": "user", "content": "hello, write a 20 pg essay"}],
                stream=streaming,
            )
            if isinstance(response, litellm.CustomStreamWrapper):
                async for chunk in response:
                    pass
        pytest.fail("Did not raise error `openai.APITimeoutError`")
    except openai.APITimeoutError as e:
        print(
            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
        )
        print(type(e))
        pass


@pytest.mark.asyncio
async def test_timeout_respects_total_time_not_per_retry():
    """
    Test that timeout applies to the TOTAL operation time, not per-retry.

    This test ensures that when a user sets timeout=2, the entire operation
    (including all retries) times out at ~2 seconds, not at 2s * num_retries.

    This is a regression test for the issue where timeout was being applied
    per-retry attempt, causing the total time to be much longer than expected.
    """
    litellm.set_verbose = False

    timeout_value = 2.0
    # Allow for some overhead (network, processing, etc.)
    # but ensure we don't wait for multiple retries
    max_allowed_time = timeout_value + 1.0  # 3 seconds max

    start_time = time.time()

    try:
        # This should timeout because we're asking for a long response
        # with a very short timeout
        response = await litellm.acompletion(
            model="gpt-3.5-turbo",
            timeout=timeout_value,
            messages=[{"role": "user", "content": "Write a very long detailed essay about the history of computing, at least 5000 words."}],
        )
        pytest.fail("Expected timeout error but got a response")
    except (openai.APITimeoutError, litellm.exceptions.Timeout) as e:
        elapsed_time = time.time() - start_time

        print(f"Timeout occurred after {elapsed_time:.2f} seconds")
        print(f"Expected timeout: {timeout_value} seconds")
        print(f"Max allowed time: {max_allowed_time} seconds")

        # Verify that the timeout happened within the expected time window
        # It should be close to timeout_value, not timeout_value * num_retries
        assert elapsed_time < max_allowed_time, (
            f"Timeout took too long! Expected ~{timeout_value}s, "
            f"got {elapsed_time:.2f}s. This suggests timeout is being "
            f"applied per-retry instead of to the total operation."
        )

        # Also verify it's not TOO fast (sanity check)
        assert elapsed_time >= timeout_value * 0.5, (
            f"Timeout happened too quickly: {elapsed_time:.2f}s. "
            f"Expected at least {timeout_value * 0.5}s"
        )

        print("✓ Timeout correctly applied to total operation time, not per-retry")
    except Exception as e:
        pytest.fail(
            f"Expected timeout error but got different error: {type(e).__name__}: {e}"
        )


@pytest.mark.asyncio
async def test_timeout_with_retries_disabled():
    """
    Test that timeout works correctly when retries are explicitly disabled.
    This should timeout even faster since there are no retry attempts.
    """
    litellm.set_verbose = False

    timeout_value = 2.0
    max_allowed_time = timeout_value + 0.5  # Even tighter bound with no retries

    start_time = time.time()

    try:
        response = await litellm.acompletion(
            model="gpt-3.5-turbo",
            timeout=timeout_value,
            max_retries=0,  # Disable retries
            messages=[{"role": "user", "content": "Write a very long detailed essay about the history of computing, at least 5000 words."}],
        )
        pytest.fail("Expected timeout error but got a response")
    except (openai.APITimeoutError, litellm.exceptions.Timeout) as e:
        elapsed_time = time.time() - start_time

        print(f"Timeout with no retries occurred after {elapsed_time:.2f} seconds")

        assert elapsed_time < max_allowed_time, (
            f"Timeout took too long even with retries disabled! "
            f"Expected ~{timeout_value}s, got {elapsed_time:.2f}s"
        )

        print("✓ Timeout works correctly with retries disabled")