litellm/tests/benchmarks/test_benchmarks.py

"""
Performance benchmarks for litellm core operations.

These benchmarks measure the performance of frequently called functions
in the litellm hot path: token counting, model info lookup, provider
resolution, and cost calculation.
"""

import pytest

import litellm
from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
from litellm.litellm_core_utils.token_counter import token_counter


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------

SIMPLE_MESSAGES = [{"role": "user", "content": "Hello, how are you?"}]

MULTI_TURN_MESSAGES = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is the capital of France?"},
    {
        "role": "assistant",
        "content": "The capital of France is Paris. It is known as the City of Light.",
    },
    {"role": "user", "content": "Tell me more about Paris."},
    {
        "role": "assistant",
        "content": (
            "Paris is the capital and most populous city of France. "
            "With an estimated population of 2,165,423 in 2019, it is the "
            "centre of the Ile-de-France region. The city is a major European "
            "cultural and commercial centre."
        ),
    },
    {"role": "user", "content": "What are the top tourist attractions?"},
]

LONG_CONTENT_MESSAGE = [
    {
        "role": "user",
        "content": "Explain the following concept in detail: " + "word " * 500,
    }
]

TOOL_DEFINITIONS = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                    },
                },
                "required": ["location"],
            },
        },
    }
]


# ---------------------------------------------------------------------------
# Token counting benchmarks
# ---------------------------------------------------------------------------


@pytest.mark.benchmark
def test_token_counter_simple_message():
    """Benchmark token counting for a single short message."""
    token_counter(model="gpt-4o", messages=SIMPLE_MESSAGES)


@pytest.mark.benchmark
def test_token_counter_multi_turn():
    """Benchmark token counting for a multi-turn conversation."""
    token_counter(model="gpt-4o", messages=MULTI_TURN_MESSAGES)


@pytest.mark.benchmark
def test_token_counter_long_content():
    """Benchmark token counting for a message with long content."""
    token_counter(model="gpt-4o", messages=LONG_CONTENT_MESSAGE)


@pytest.mark.benchmark
def test_token_counter_with_tools():
    """Benchmark token counting with tool definitions."""
    token_counter(
        model="gpt-4o",
        messages=SIMPLE_MESSAGES,
        tools=TOOL_DEFINITIONS,
    )


@pytest.mark.benchmark
def test_token_counter_raw_text():
    """Benchmark token counting for raw text input."""
    token_counter(model="gpt-4o", text="The quick brown fox jumps over the lazy dog.")


# ---------------------------------------------------------------------------
# Model info lookup benchmarks
# ---------------------------------------------------------------------------


@pytest.mark.benchmark
def test_get_model_info_openai():
    """Benchmark model info lookup for an OpenAI model."""
    litellm.get_model_info("gpt-4o")


@pytest.mark.benchmark
def test_get_model_info_anthropic():
    """Benchmark model info lookup for an Anthropic model."""
    litellm.get_model_info("claude-sonnet-4-20250514")


@pytest.mark.benchmark
def test_get_model_info_with_provider():
    """Benchmark model info lookup with an explicit provider prefix."""
    litellm.get_model_info("openai/gpt-4o", custom_llm_provider="openai")


# ---------------------------------------------------------------------------
# Provider resolution benchmarks
# ---------------------------------------------------------------------------


@pytest.mark.benchmark
def test_get_llm_provider_openai():
    """Benchmark LLM provider resolution for OpenAI."""
    get_llm_provider(model="gpt-4o")


@pytest.mark.benchmark
def test_get_llm_provider_anthropic():
    """Benchmark LLM provider resolution for Anthropic."""
    get_llm_provider(model="claude-sonnet-4-20250514")


@pytest.mark.benchmark
def test_get_llm_provider_with_prefix():
    """Benchmark LLM provider resolution with provider prefix."""
    get_llm_provider(model="openai/gpt-4o")


@pytest.mark.benchmark
def test_get_llm_provider_azure():
    """Benchmark LLM provider resolution for Azure."""
    get_llm_provider(
        model="azure/gpt-4o",
        api_base="https://my-endpoint.openai.azure.com",
    )


# ---------------------------------------------------------------------------
# Cost calculation benchmarks
# ---------------------------------------------------------------------------


@pytest.mark.benchmark
def test_cost_per_token_openai():
    """Benchmark cost-per-token calculation for OpenAI models."""
    litellm.cost_per_token(
        model="gpt-4o",
        prompt_tokens=1000,
        completion_tokens=500,
    )


@pytest.mark.benchmark
def test_cost_per_token_anthropic():
    """Benchmark cost-per-token calculation for Anthropic models."""
    litellm.cost_per_token(
        model="claude-sonnet-4-20250514",
        prompt_tokens=1000,
        completion_tokens=500,
    )


# ---------------------------------------------------------------------------
# Model cost key resolution benchmarks
# ---------------------------------------------------------------------------


@pytest.mark.benchmark
def test_get_model_cost_key_exact_match():
    """Benchmark model cost key lookup with an exact match."""
    litellm.utils._get_model_cost_key("gpt-4o")


@pytest.mark.benchmark
def test_get_model_cost_key_case_insensitive():
    """Benchmark model cost key lookup with case-insensitive fallback."""
    litellm.utils._get_model_cost_key("GPT-4o")