Files
litellm/tests/benchmarks/test_benchmarks.py
T
codspeed-hq[bot] be20a8a93d Add CodSpeed performance benchmarks (#23676)
Co-authored-by: codspeed-hq[bot] <117304815+codspeed-hq[bot]@users.noreply.github.com>
2026-03-14 18:44:36 -07:00

208 lines
6.3 KiB
Python

"""
Performance benchmarks for litellm core operations.
These benchmarks measure the performance of frequently called functions
in the litellm hot path: token counting, model info lookup, provider
resolution, and cost calculation.
"""
import pytest
import litellm
from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
from litellm.litellm_core_utils.token_counter import token_counter
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
SIMPLE_MESSAGES = [{"role": "user", "content": "Hello, how are you?"}]
MULTI_TURN_MESSAGES = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"},
{
"role": "assistant",
"content": "The capital of France is Paris. It is known as the City of Light.",
},
{"role": "user", "content": "Tell me more about Paris."},
{
"role": "assistant",
"content": (
"Paris is the capital and most populous city of France. "
"With an estimated population of 2,165,423 in 2019, it is the "
"centre of the Ile-de-France region. The city is a major European "
"cultural and commercial centre."
),
},
{"role": "user", "content": "What are the top tourist attractions?"},
]
LONG_CONTENT_MESSAGE = [
{
"role": "user",
"content": "Explain the following concept in detail: " + "word " * 500,
}
]
TOOL_DEFINITIONS = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
},
},
"required": ["location"],
},
},
}
]
# ---------------------------------------------------------------------------
# Token counting benchmarks
# ---------------------------------------------------------------------------
@pytest.mark.benchmark
def test_token_counter_simple_message():
"""Benchmark token counting for a single short message."""
token_counter(model="gpt-4o", messages=SIMPLE_MESSAGES)
@pytest.mark.benchmark
def test_token_counter_multi_turn():
"""Benchmark token counting for a multi-turn conversation."""
token_counter(model="gpt-4o", messages=MULTI_TURN_MESSAGES)
@pytest.mark.benchmark
def test_token_counter_long_content():
"""Benchmark token counting for a message with long content."""
token_counter(model="gpt-4o", messages=LONG_CONTENT_MESSAGE)
@pytest.mark.benchmark
def test_token_counter_with_tools():
"""Benchmark token counting with tool definitions."""
token_counter(
model="gpt-4o",
messages=SIMPLE_MESSAGES,
tools=TOOL_DEFINITIONS,
)
@pytest.mark.benchmark
def test_token_counter_raw_text():
"""Benchmark token counting for raw text input."""
token_counter(model="gpt-4o", text="The quick brown fox jumps over the lazy dog.")
# ---------------------------------------------------------------------------
# Model info lookup benchmarks
# ---------------------------------------------------------------------------
@pytest.mark.benchmark
def test_get_model_info_openai():
"""Benchmark model info lookup for an OpenAI model."""
litellm.get_model_info("gpt-4o")
@pytest.mark.benchmark
def test_get_model_info_anthropic():
"""Benchmark model info lookup for an Anthropic model."""
litellm.get_model_info("claude-sonnet-4-20250514")
@pytest.mark.benchmark
def test_get_model_info_with_provider():
"""Benchmark model info lookup with an explicit provider prefix."""
litellm.get_model_info("openai/gpt-4o", custom_llm_provider="openai")
# ---------------------------------------------------------------------------
# Provider resolution benchmarks
# ---------------------------------------------------------------------------
@pytest.mark.benchmark
def test_get_llm_provider_openai():
"""Benchmark LLM provider resolution for OpenAI."""
get_llm_provider(model="gpt-4o")
@pytest.mark.benchmark
def test_get_llm_provider_anthropic():
"""Benchmark LLM provider resolution for Anthropic."""
get_llm_provider(model="claude-sonnet-4-20250514")
@pytest.mark.benchmark
def test_get_llm_provider_with_prefix():
"""Benchmark LLM provider resolution with provider prefix."""
get_llm_provider(model="openai/gpt-4o")
@pytest.mark.benchmark
def test_get_llm_provider_azure():
"""Benchmark LLM provider resolution for Azure."""
get_llm_provider(
model="azure/gpt-4o",
api_base="https://my-endpoint.openai.azure.com",
)
# ---------------------------------------------------------------------------
# Cost calculation benchmarks
# ---------------------------------------------------------------------------
@pytest.mark.benchmark
def test_cost_per_token_openai():
"""Benchmark cost-per-token calculation for OpenAI models."""
litellm.cost_per_token(
model="gpt-4o",
prompt_tokens=1000,
completion_tokens=500,
)
@pytest.mark.benchmark
def test_cost_per_token_anthropic():
"""Benchmark cost-per-token calculation for Anthropic models."""
litellm.cost_per_token(
model="claude-sonnet-4-20250514",
prompt_tokens=1000,
completion_tokens=500,
)
# ---------------------------------------------------------------------------
# Model cost key resolution benchmarks
# ---------------------------------------------------------------------------
@pytest.mark.benchmark
def test_get_model_cost_key_exact_match():
"""Benchmark model cost key lookup with an exact match."""
litellm.utils._get_model_cost_key("gpt-4o")
@pytest.mark.benchmark
def test_get_model_cost_key_case_insensitive():
"""Benchmark model cost key lookup with case-insensitive fallback."""
litellm.utils._get_model_cost_key("GPT-4o")