mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-24 23:38:15 +00:00
66fafa3a7f
* self._pretty_print_invalid_metric_error * docs prometheus.md * test prom validation checks * update metric name * fix _pretty_print_validation_errors * fix linting * test prometheus * test fixes - prometheus
837 lines
29 KiB
Python
837 lines
29 KiB
Python
"""
|
|
Mock prometheus unit tests, these don't rely on LLM API calls
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../../..")
|
|
) # Adds the parent directory to the system path
|
|
|
|
from unittest.mock import patch
|
|
|
|
import pytest_asyncio
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
|
|
# Add prometheus_client import for registry cleanup
|
|
from prometheus_client import REGISTRY
|
|
|
|
import litellm
|
|
from litellm.constants import PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
|
|
from litellm.integrations.prometheus import PrometheusLogger, prometheus_label_factory
|
|
from litellm.types.integrations.prometheus import (
|
|
PrometheusMetricLabels,
|
|
PrometheusMetricsConfig,
|
|
UserAPIKeyLabelValues,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def prometheus_logger() -> PrometheusLogger:
|
|
"""
|
|
Fixture that creates a clean PrometheusLogger instance by clearing the registry first.
|
|
This prevents "Duplicated timeseries in CollectorRegistry" errors.
|
|
"""
|
|
collectors = list(REGISTRY._collector_to_names.keys())
|
|
for collector in collectors:
|
|
REGISTRY.unregister(collector)
|
|
return PrometheusLogger()
|
|
|
|
|
|
def clear_prometheus_registry():
|
|
"""Helper function to clear the Prometheus registry"""
|
|
collectors = list(REGISTRY._collector_to_names.keys())
|
|
for collector in collectors:
|
|
REGISTRY.unregister(collector)
|
|
|
|
|
|
def test_initialize_budget_metrics_cron_job():
|
|
# Clear registry before test
|
|
clear_prometheus_registry()
|
|
|
|
# Create a scheduler
|
|
scheduler = AsyncIOScheduler()
|
|
|
|
# Create and register a PrometheusLogger
|
|
prometheus_logger = PrometheusLogger()
|
|
litellm.callbacks = [prometheus_logger]
|
|
|
|
# Initialize the cron job
|
|
PrometheusLogger.initialize_budget_metrics_cron_job(scheduler)
|
|
|
|
# Verify that a job was added to the scheduler
|
|
jobs = scheduler.get_jobs()
|
|
assert len(jobs) == 1
|
|
|
|
# Verify job properties
|
|
job = jobs[0]
|
|
assert (
|
|
job.trigger.interval.total_seconds() / 60
|
|
== PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
|
|
)
|
|
assert job.func.__name__ == "initialize_remaining_budget_metrics"
|
|
|
|
|
|
def test_end_user_not_tracked_for_all_prometheus_metrics():
|
|
"""
|
|
Test that end_user is not tracked for all Prometheus metrics by default.
|
|
|
|
This test ensures that:
|
|
1. By default, end_user is filtered out from all Prometheus metrics
|
|
2. Future metrics that include end_user in their label definitions will also be filtered
|
|
3. The filtering happens through the prometheus_label_factory function
|
|
"""
|
|
# Reset any previous settings
|
|
original_setting = getattr(
|
|
litellm, "enable_end_user_cost_tracking_prometheus_only", None
|
|
)
|
|
litellm.enable_end_user_cost_tracking_prometheus_only = None # Default behavior
|
|
|
|
try:
|
|
# Test data with end_user present
|
|
test_end_user_id = "test_user_123"
|
|
enum_values = UserAPIKeyLabelValues(
|
|
end_user=test_end_user_id,
|
|
hashed_api_key="test_key",
|
|
api_key_alias="test_alias",
|
|
team="test_team",
|
|
team_alias="test_team_alias",
|
|
user="test_user",
|
|
requested_model="gpt-4",
|
|
model="gpt-4",
|
|
litellm_model_name="gpt-4",
|
|
)
|
|
|
|
# Get all defined Prometheus metrics that include end_user in their labels
|
|
metrics_with_end_user = []
|
|
for metric_name in PrometheusMetricLabels.__dict__:
|
|
if not metric_name.startswith("_") and metric_name != "get_labels":
|
|
labels = getattr(PrometheusMetricLabels, metric_name)
|
|
if isinstance(labels, list) and "end_user" in labels:
|
|
metrics_with_end_user.append(metric_name)
|
|
|
|
# Ensure we found some metrics with end_user (sanity check)
|
|
assert (
|
|
len(metrics_with_end_user) > 0
|
|
), "No metrics with end_user found - test setup issue"
|
|
|
|
# Test each metric that includes end_user in its label definition
|
|
for metric_name in metrics_with_end_user:
|
|
supported_labels = PrometheusMetricLabels.get_labels(metric_name)
|
|
|
|
# Verify that end_user is in the supported labels (before filtering)
|
|
assert (
|
|
"end_user" in supported_labels
|
|
), f"end_user should be in {metric_name} labels"
|
|
|
|
# Call prometheus_label_factory to get filtered labels
|
|
filtered_labels = prometheus_label_factory(
|
|
supported_enum_labels=supported_labels, enum_values=enum_values
|
|
)
|
|
print("filtered labels logged on prometheus=", filtered_labels)
|
|
|
|
# Verify that end_user is None in the filtered labels (filtered out)
|
|
assert filtered_labels.get("end_user") is None, (
|
|
f"end_user should be None for metric {metric_name} when "
|
|
f"enable_end_user_cost_tracking_prometheus_only is not True. "
|
|
f"Got: {filtered_labels.get('end_user')}"
|
|
)
|
|
|
|
# Test that when enable_end_user_cost_tracking_prometheus_only is True, end_user is tracked
|
|
litellm.enable_end_user_cost_tracking_prometheus_only = True
|
|
|
|
# Test one metric to verify end_user is now included
|
|
test_metric = metrics_with_end_user[0]
|
|
supported_labels = PrometheusMetricLabels.get_labels(test_metric)
|
|
filtered_labels = prometheus_label_factory(
|
|
supported_enum_labels=supported_labels, enum_values=enum_values
|
|
)
|
|
|
|
# Now end_user should be present
|
|
assert filtered_labels.get("end_user") == test_end_user_id, (
|
|
f"end_user should be present for metric {test_metric} when "
|
|
f"enable_end_user_cost_tracking_prometheus_only is True"
|
|
)
|
|
|
|
finally:
|
|
# Restore original setting
|
|
litellm.enable_end_user_cost_tracking_prometheus_only = original_setting
|
|
|
|
|
|
def test_future_metrics_with_end_user_are_filtered():
|
|
"""
|
|
Test that ensures future metrics that include end_user will also be filtered.
|
|
This simulates adding a new metric with end_user in its labels.
|
|
"""
|
|
# Reset setting
|
|
original_setting = getattr(
|
|
litellm, "enable_end_user_cost_tracking_prometheus_only", None
|
|
)
|
|
litellm.enable_end_user_cost_tracking_prometheus_only = None
|
|
|
|
try:
|
|
# Simulate a new metric that includes end_user
|
|
simulated_new_metric_labels = [
|
|
"end_user",
|
|
"hashed_api_key",
|
|
"api_key_alias",
|
|
"model",
|
|
"team",
|
|
"new_label", # Some new label that might be added in the future
|
|
]
|
|
|
|
test_end_user_id = "future_test_user"
|
|
enum_values = UserAPIKeyLabelValues(
|
|
end_user=test_end_user_id,
|
|
hashed_api_key="test_key",
|
|
api_key_alias="test_alias",
|
|
team="test_team",
|
|
model="gpt-4",
|
|
)
|
|
|
|
# Test the filtering
|
|
filtered_labels = prometheus_label_factory(
|
|
supported_enum_labels=simulated_new_metric_labels, enum_values=enum_values
|
|
)
|
|
print("filtered labels logged on prometheus=", filtered_labels)
|
|
|
|
# Verify end_user is filtered out even for this "new" metric
|
|
assert (
|
|
filtered_labels.get("end_user") is None
|
|
), "end_user should be filtered out for future metrics by default"
|
|
|
|
# Verify other labels are present
|
|
assert filtered_labels.get("hashed_api_key") == "test_key"
|
|
assert filtered_labels.get("team") == "test_team"
|
|
|
|
finally:
|
|
# Restore original setting
|
|
litellm.enable_end_user_cost_tracking_prometheus_only = original_setting
|
|
|
|
|
|
def test_prometheus_config_parsing():
|
|
"""Test that prometheus metrics configuration is parsed correctly"""
|
|
# Clear registry before test
|
|
clear_prometheus_registry()
|
|
|
|
# Set up test configuration
|
|
test_config = [
|
|
{
|
|
"group": "service_metrics",
|
|
"metrics": [
|
|
"litellm_deployment_failure_responses",
|
|
"litellm_deployment_total_requests",
|
|
"litellm_proxy_failed_requests_metric",
|
|
"litellm_proxy_total_requests_metric",
|
|
],
|
|
"include_labels": [
|
|
"requested_model",
|
|
"team",
|
|
],
|
|
}
|
|
]
|
|
|
|
# Set configuration
|
|
litellm.prometheus_metrics_config = test_config
|
|
|
|
# Create PrometheusLogger instance
|
|
logger = PrometheusLogger()
|
|
|
|
# Parse configuration
|
|
label_filters = logger._parse_prometheus_config()
|
|
|
|
# Verify label filters exist for each metric
|
|
expected_labels = [
|
|
"requested_model",
|
|
"team",
|
|
]
|
|
|
|
expected_metrics = [
|
|
"litellm_deployment_failure_responses",
|
|
"litellm_deployment_total_requests",
|
|
"litellm_proxy_failed_requests_metric",
|
|
"litellm_proxy_total_requests_metric",
|
|
]
|
|
|
|
for metric in expected_metrics:
|
|
assert metric in label_filters
|
|
assert label_filters[metric] == expected_labels
|
|
|
|
|
|
def test_get_metric_labels():
|
|
"""Test that metric label filtering works correctly"""
|
|
# Clear registry before test
|
|
clear_prometheus_registry()
|
|
|
|
# Set up test configuration
|
|
test_config = [
|
|
{
|
|
"group": "service_metrics",
|
|
"metrics": ["litellm_deployment_failure_responses"],
|
|
"include_labels": ["litellm_model_name", "api_provider"],
|
|
}
|
|
]
|
|
|
|
litellm.prometheus_metrics_config = test_config
|
|
|
|
logger = PrometheusLogger()
|
|
|
|
# Get filtered labels
|
|
labels = logger.get_labels_for_metric("litellm_deployment_failure_responses")
|
|
|
|
# Verify only configured labels are returned
|
|
assert "litellm_model_name" in labels
|
|
assert "api_provider" in labels
|
|
# These should be filtered out even if they're in the default labels
|
|
assert (
|
|
len([l for l in labels if l not in ["litellm_model_name", "api_provider"]]) == 0
|
|
)
|
|
|
|
|
|
def test_no_prometheus_config():
|
|
"""Test behavior when no prometheus config is set"""
|
|
# Clear registry before test
|
|
clear_prometheus_registry()
|
|
|
|
# Clear any existing config
|
|
litellm.prometheus_metrics_config = None
|
|
|
|
logger = PrometheusLogger()
|
|
|
|
# Should return default labels when no config is set
|
|
labels = logger.get_labels_for_metric("litellm_deployment_failure_responses")
|
|
# Should return some labels (the default ones)
|
|
assert isinstance(labels, list)
|
|
# Should have more than 0 labels (the default ones)
|
|
assert len(labels) > 0
|
|
|
|
|
|
def test_prometheus_metrics_config_type():
|
|
"""Test that PrometheusMetricsConfig type validation works"""
|
|
# Valid configuration
|
|
valid_config = PrometheusMetricsConfig(
|
|
group="service_metrics",
|
|
metrics=["litellm_deployment_failure_responses"],
|
|
include_labels=["litellm_model_name"],
|
|
)
|
|
|
|
assert valid_config.group == "service_metrics"
|
|
assert valid_config.metrics == ["litellm_deployment_failure_responses"]
|
|
assert valid_config.include_labels == ["litellm_model_name"]
|
|
|
|
# Test with None include_labels (should be allowed)
|
|
config_no_labels = PrometheusMetricsConfig(
|
|
group="service_metrics",
|
|
metrics=["litellm_deployment_failure_responses"],
|
|
include_labels=None,
|
|
)
|
|
|
|
assert config_no_labels.include_labels is None
|
|
print("PrometheusMetricsConfig type validation passed!")
|
|
|
|
|
|
def test_basic_functionality():
|
|
"""Test basic functionality without creating multiple instances"""
|
|
# Clear registry before test
|
|
clear_prometheus_registry()
|
|
|
|
# Set up test configuration
|
|
test_config = [
|
|
{
|
|
"group": "service_metrics",
|
|
"metrics": [
|
|
"litellm_deployment_failure_responses",
|
|
"litellm_deployment_total_requests",
|
|
],
|
|
"include_labels": ["litellm_model_name", "api_provider"],
|
|
}
|
|
]
|
|
|
|
# Set configuration
|
|
litellm.prometheus_metrics_config = test_config
|
|
|
|
# Test that the configuration is properly set
|
|
assert litellm.prometheus_metrics_config is not None
|
|
assert len(litellm.prometheus_metrics_config) == 1
|
|
assert litellm.prometheus_metrics_config[0]["group"] == "service_metrics"
|
|
assert (
|
|
"litellm_deployment_failure_responses"
|
|
in litellm.prometheus_metrics_config[0]["metrics"]
|
|
)
|
|
|
|
print("Basic prometheus configuration test passed!")
|
|
|
|
|
|
# ==============================================================================
|
|
# VALIDATION TESTS - Test the new validation logic for metrics and labels
|
|
# ==============================================================================
|
|
|
|
def test_invalid_metric_name_validation():
|
|
"""Test that invalid metric names are caught and raise ValueError"""
|
|
# Clear registry before test
|
|
clear_prometheus_registry()
|
|
|
|
# Set up test configuration with invalid metric name
|
|
test_config = [
|
|
{
|
|
"group": "service_metrics",
|
|
"metrics": [
|
|
"invalid_metric_name_that_does_not_exist",
|
|
"litellm_deployment_total_requests", # valid metric
|
|
],
|
|
"include_labels": ["litellm_model_name"],
|
|
}
|
|
]
|
|
|
|
litellm.prometheus_metrics_config = test_config
|
|
|
|
# Creating PrometheusLogger should raise ValueError due to invalid metric
|
|
with pytest.raises(ValueError) as exc_info:
|
|
PrometheusLogger()
|
|
|
|
# Verify error message contains information about invalid metric
|
|
assert "invalid_metric_name_that_does_not_exist" in str(exc_info.value)
|
|
assert "Configuration validation failed" in str(exc_info.value)
|
|
|
|
|
|
def test_invalid_labels_validation():
|
|
"""Test that invalid labels for metrics are caught and raise ValueError"""
|
|
# Clear registry before test
|
|
clear_prometheus_registry()
|
|
|
|
# Set up test configuration with invalid labels
|
|
test_config = [
|
|
{
|
|
"group": "service_metrics",
|
|
"metrics": ["litellm_deployment_total_requests"],
|
|
"include_labels": [
|
|
"litellm_model_name", # valid label
|
|
"invalid_label_name", # invalid label
|
|
"another_invalid_label", # another invalid label
|
|
],
|
|
}
|
|
]
|
|
|
|
litellm.prometheus_metrics_config = test_config
|
|
|
|
# Creating PrometheusLogger should raise ValueError due to invalid labels
|
|
with pytest.raises(ValueError) as exc_info:
|
|
PrometheusLogger()
|
|
|
|
# Verify error message contains information about invalid labels
|
|
assert "invalid_label_name" in str(exc_info.value)
|
|
assert "Configuration validation failed" in str(exc_info.value)
|
|
|
|
|
|
def test_valid_configuration_passes_validation():
|
|
"""Test that valid configuration passes validation without errors"""
|
|
# Clear registry before test
|
|
clear_prometheus_registry()
|
|
|
|
# Set up test configuration with all valid metrics and labels
|
|
test_config = [
|
|
{
|
|
"group": "service_metrics",
|
|
"metrics": [
|
|
"litellm_deployment_total_requests",
|
|
"litellm_deployment_failure_responses",
|
|
],
|
|
"include_labels": [
|
|
"litellm_model_name",
|
|
"api_provider",
|
|
"requested_model",
|
|
],
|
|
}
|
|
]
|
|
|
|
litellm.prometheus_metrics_config = test_config
|
|
|
|
# This should not raise any exceptions
|
|
try:
|
|
logger = PrometheusLogger()
|
|
# Verify the logger was created successfully
|
|
assert logger is not None
|
|
assert hasattr(logger, 'enabled_metrics')
|
|
assert 'litellm_deployment_total_requests' in logger.enabled_metrics
|
|
assert 'litellm_deployment_failure_responses' in logger.enabled_metrics
|
|
except Exception as e:
|
|
pytest.fail(f"Valid configuration should not raise exception: {e}")
|
|
|
|
|
|
# ==============================================================================
|
|
# END VALIDATION TESTS
|
|
# ==============================================================================
|
|
|
|
|
|
# ==============================================================================
|
|
# SEMANTIC VALIDATION TESTS - Detect logical errors in metric increments
|
|
# ==============================================================================
|
|
|
|
|
|
class MockCounter:
|
|
"""Mock counter for testing metric increments"""
|
|
|
|
def __init__(self, name):
|
|
self.name = name
|
|
self.labels_calls = []
|
|
self.inc_calls = []
|
|
|
|
def labels(self, *args, **kwargs):
|
|
self.labels_calls.append(kwargs)
|
|
return self
|
|
|
|
def inc(self, value=1):
|
|
self.inc_calls.append(value)
|
|
|
|
|
|
class MockHistogram:
|
|
"""Mock histogram for testing metric observations"""
|
|
|
|
def __init__(self, name):
|
|
self.name = name
|
|
self.labels_calls = []
|
|
self.observe_calls = []
|
|
|
|
def labels(self, *args, **kwargs):
|
|
self.labels_calls.append(kwargs)
|
|
return self
|
|
|
|
def observe(self, value):
|
|
self.observe_calls.append(value)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_prometheus_logger():
|
|
"""Create a PrometheusLogger with mocked metrics to test increment logic"""
|
|
from unittest.mock import patch
|
|
|
|
collectors = list(REGISTRY._collector_to_names.keys())
|
|
for collector in collectors:
|
|
REGISTRY.unregister(collector)
|
|
|
|
with patch("litellm.proxy.proxy_server.premium_user", True):
|
|
logger = PrometheusLogger()
|
|
|
|
# Replace metrics with mocks to capture increment calls
|
|
logger.litellm_proxy_total_requests_metric = MockCounter(
|
|
"litellm_proxy_total_requests_metric"
|
|
)
|
|
logger.litellm_tokens_metric = MockCounter("litellm_total_tokens")
|
|
logger.litellm_input_tokens_metric = MockCounter("litellm_input_tokens")
|
|
logger.litellm_output_tokens_metric = MockCounter("litellm_output_tokens")
|
|
logger.litellm_spend_metric = MockCounter("litellm_spend_metric")
|
|
logger.litellm_requests_metric = MockCounter("litellm_requests_metric")
|
|
|
|
return logger
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_request_counter_semantic_validation(mock_prometheus_logger):
|
|
"""
|
|
CRITICAL TEST: Validates that request counters are incremented by 1, not by token count.
|
|
This test specifically catches the bug where litellm_proxy_total_requests_metric
|
|
is incorrectly incremented by total_tokens instead of 1.
|
|
"""
|
|
from datetime import datetime, timedelta
|
|
from unittest.mock import MagicMock
|
|
|
|
from litellm.proxy._types import UserAPIKeyAuth
|
|
|
|
# Test data with large token count that should NOT affect request counter
|
|
kwargs = {
|
|
"model": "gpt-3.5-turbo",
|
|
"litellm_params": {"metadata": {}},
|
|
"start_time": datetime.now() - timedelta(seconds=1),
|
|
"end_time": datetime.now(),
|
|
"api_call_start_time": datetime.now() - timedelta(seconds=0.5),
|
|
"standard_logging_object": {
|
|
"total_tokens": 999, # Large number - this should NOT be used for request counter
|
|
"prompt_tokens": 600,
|
|
"completion_tokens": 399,
|
|
"response_cost": 0.005,
|
|
"model_group": "gpt-3.5-turbo",
|
|
"model_id": "test-model-id",
|
|
"api_base": "https://api.openai.com/v1",
|
|
"custom_llm_provider": "openai",
|
|
"stream": False,
|
|
"request_tags": [],
|
|
"metadata": {
|
|
"user_api_key_user_id": "test-user",
|
|
"user_api_key_hash": "test-hash",
|
|
"user_api_key_alias": "test-alias",
|
|
"user_api_key_team_id": "test-team",
|
|
"user_api_key_team_alias": "test-team-alias",
|
|
"user_api_key_user_email": "test@example.com",
|
|
},
|
|
"hidden_params": {
|
|
"additional_headers": {},
|
|
},
|
|
},
|
|
}
|
|
|
|
# Call the success event
|
|
await mock_prometheus_logger.async_log_success_event(
|
|
kwargs, None, kwargs["start_time"], kwargs["end_time"]
|
|
)
|
|
|
|
# CRITICAL ASSERTION: Request counter should not be incremented
|
|
total_requests_metric = mock_prometheus_logger.litellm_proxy_total_requests_metric
|
|
assert (
|
|
len(total_requests_metric.inc_calls) == 0
|
|
), "Request metric should not be incremented"
|
|
|
|
# Call the post-call logging hook
|
|
await mock_prometheus_logger.async_post_call_success_hook(
|
|
data={},
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
end_user="test-user",
|
|
hashed_api_key="test-hash",
|
|
api_key_alias="test-alias",
|
|
team="test-team",
|
|
model="gpt-4",
|
|
),
|
|
response=MagicMock(),
|
|
)
|
|
|
|
# CRITICAL ASSERTION: Request counter be incremented by 1
|
|
total_requests_metric = mock_prometheus_logger.litellm_proxy_total_requests_metric
|
|
assert (
|
|
len(total_requests_metric.inc_calls) == 1
|
|
), "Request metric should not be incremented"
|
|
|
|
# Check that ALL request counter increments are by 1 (not by token count)
|
|
for inc_value in total_requests_metric.inc_calls:
|
|
assert inc_value == 1, (
|
|
f"SEMANTIC BUG DETECTED: Request counter incremented by {inc_value} instead of 1. "
|
|
f"This indicates the bug where request counters are incremented by token counts."
|
|
)
|
|
|
|
# Verify token counters ARE incremented by token counts (this should work correctly)
|
|
tokens_metric = mock_prometheus_logger.litellm_tokens_metric
|
|
assert (
|
|
999 in tokens_metric.inc_calls
|
|
), "Token metric should be incremented by total_tokens (999)"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multiple_requests_counter_semantics(mock_prometheus_logger):
|
|
"""
|
|
Test that demonstrates the scaling issue: with multiple requests,
|
|
request counters should scale by number of requests, not total tokens.
|
|
"""
|
|
from datetime import datetime, timedelta
|
|
|
|
num_requests = 3
|
|
tokens_per_request = 500 # High token count to make the bug obvious
|
|
|
|
for i in range(num_requests):
|
|
kwargs = {
|
|
"model": "gpt-3.5-turbo",
|
|
"litellm_params": {"metadata": {}},
|
|
"start_time": datetime.now() - timedelta(seconds=1),
|
|
"end_time": datetime.now(),
|
|
"api_call_start_time": datetime.now() - timedelta(seconds=0.5),
|
|
"standard_logging_object": {
|
|
"total_tokens": tokens_per_request,
|
|
"prompt_tokens": tokens_per_request // 2,
|
|
"completion_tokens": tokens_per_request // 2,
|
|
"response_cost": 0.001,
|
|
"model_group": "gpt-3.5-turbo",
|
|
"model_id": "test-model-id",
|
|
"api_base": "https://api.openai.com/v1",
|
|
"custom_llm_provider": "openai",
|
|
"stream": False,
|
|
"request_tags": [],
|
|
"metadata": {
|
|
"user_api_key_user_id": "test-user",
|
|
"user_api_key_hash": "test-hash",
|
|
"user_api_key_alias": "test-alias",
|
|
"user_api_key_team_id": "test-team",
|
|
"user_api_key_team_alias": "test-team-alias",
|
|
"user_api_key_user_email": "test@example.com",
|
|
},
|
|
"hidden_params": {
|
|
"additional_headers": {},
|
|
},
|
|
},
|
|
}
|
|
|
|
await mock_prometheus_logger.async_log_success_event(
|
|
kwargs, None, kwargs["start_time"], kwargs["end_time"]
|
|
)
|
|
|
|
# Calculate total increments
|
|
total_request_increments = sum(
|
|
mock_prometheus_logger.litellm_proxy_total_requests_metric.inc_calls
|
|
)
|
|
total_token_increments = sum(mock_prometheus_logger.litellm_tokens_metric.inc_calls)
|
|
|
|
# CRITICAL ASSERTION: Request increments should equal number of requests
|
|
expected_total_tokens = num_requests * tokens_per_request # 3 * 500 = 1500
|
|
|
|
# With the bug, total_request_increments would be 1500 instead of 3
|
|
assert total_request_increments == 0, (
|
|
f"SEMANTIC BUG: Request counter total increments = 0, "
|
|
f"expected {num_requests}. This suggests request counters are being incremented "
|
|
f"by token counts instead of request counts."
|
|
)
|
|
|
|
# Token counter should correctly equal total tokens
|
|
assert (
|
|
total_token_increments == expected_total_tokens
|
|
), f"Token counter should sum to {expected_total_tokens}, got {total_token_increments}"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_streaming_request_counter_semantics(mock_prometheus_logger):
|
|
"""
|
|
Test that streaming requests are also counted correctly (by 1, not by token count)
|
|
"""
|
|
from datetime import datetime, timedelta
|
|
|
|
kwargs = {
|
|
"model": "gpt-3.5-turbo",
|
|
"litellm_params": {"metadata": {}},
|
|
"start_time": datetime.now() - timedelta(seconds=1),
|
|
"end_time": datetime.now(),
|
|
"api_call_start_time": datetime.now() - timedelta(seconds=0.5),
|
|
"standard_logging_object": {
|
|
"total_tokens": 750, # High token count for streaming
|
|
"prompt_tokens": 300,
|
|
"completion_tokens": 450,
|
|
"response_cost": 0.003,
|
|
"model_group": "gpt-3.5-turbo",
|
|
"model_id": "test-model-id",
|
|
"api_base": "https://api.openai.com/v1",
|
|
"custom_llm_provider": "openai",
|
|
"stream": True, # This is a streaming request
|
|
"request_tags": [],
|
|
"metadata": {
|
|
"user_api_key_user_id": "test-user",
|
|
"user_api_key_hash": "test-hash",
|
|
"user_api_key_alias": "test-alias",
|
|
"user_api_key_team_id": "test-team",
|
|
"user_api_key_team_alias": "test-team-alias",
|
|
"user_api_key_user_email": "test@example.com",
|
|
},
|
|
"hidden_params": {
|
|
"additional_headers": {},
|
|
},
|
|
},
|
|
}
|
|
|
|
await mock_prometheus_logger.async_log_success_event(
|
|
kwargs, None, kwargs["start_time"], kwargs["end_time"]
|
|
)
|
|
|
|
# Streaming requests should also be counted as 1 request, not 750
|
|
for (
|
|
inc_value
|
|
) in mock_prometheus_logger.litellm_proxy_total_requests_metric.inc_calls:
|
|
assert (
|
|
inc_value == 1
|
|
), f"SEMANTIC BUG: Streaming request counter incremented by {inc_value} instead of 1"
|
|
|
|
|
|
def test_metric_increment_invariants():
|
|
"""
|
|
Test invariants that should always hold for different metric types
|
|
"""
|
|
# Invariant 1: Request counters should never be incremented by large values
|
|
suspicious_request_increments = [
|
|
100,
|
|
500,
|
|
1000,
|
|
1500,
|
|
] # These look like token counts
|
|
for increment in suspicious_request_increments:
|
|
# If we see request counters incremented by these values, it's likely a bug
|
|
assert (
|
|
increment > 10
|
|
), f"Request increment of {increment} is suspiciously large - likely a semantic bug"
|
|
|
|
# Invariant 2: Token counters should never be incremented by 1 (unless it's a 1-token response)
|
|
# This would indicate the reverse bug (using request count for token counter)
|
|
|
|
# Invariant 3: Cost increments should be small positive floats
|
|
reasonable_costs = [0.001, 0.01, 0.1, 1.0]
|
|
for cost in reasonable_costs:
|
|
assert 0 < cost < 100, f"Cost {cost} should be in reasonable range"
|
|
|
|
|
|
def test_token_counter_semantics():
|
|
"""
|
|
Test that token counters should be incremented by actual token values, not by 1
|
|
"""
|
|
# These are correct patterns for token counters
|
|
correct_token_increments = [50, 100, 250, 500, 1000, 2000]
|
|
|
|
for tokens in correct_token_increments:
|
|
# Token counters should be incremented by actual token counts
|
|
assert tokens > 1, f"Token increment of {tokens} is reasonable"
|
|
|
|
# These would be incorrect for token counters (suggests using request count for tokens)
|
|
incorrect_token_increments = [1] # Unless it's actually a 1-token response
|
|
|
|
# This test documents the expected behavior - token counters should use token values
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_spend_counter_semantics(mock_prometheus_logger):
|
|
"""
|
|
Test that spend counters are incremented by cost amounts, not by 1 or token counts
|
|
"""
|
|
from datetime import datetime, timedelta
|
|
|
|
kwargs = {
|
|
"model": "gpt-3.5-turbo",
|
|
"litellm_params": {"metadata": {}},
|
|
"start_time": datetime.now() - timedelta(seconds=1),
|
|
"end_time": datetime.now(),
|
|
"api_call_start_time": datetime.now() - timedelta(seconds=0.5),
|
|
"standard_logging_object": {
|
|
"total_tokens": 100,
|
|
"prompt_tokens": 60,
|
|
"completion_tokens": 40,
|
|
"response_cost": 0.0015, # This should be used for spend metrics
|
|
"model_group": "gpt-3.5-turbo",
|
|
"model_id": "test-model-id",
|
|
"api_base": "https://api.openai.com/v1",
|
|
"custom_llm_provider": "openai",
|
|
"stream": False,
|
|
"request_tags": [],
|
|
"metadata": {
|
|
"user_api_key_user_id": "test-user",
|
|
"user_api_key_hash": "test-hash",
|
|
"user_api_key_alias": "test-alias",
|
|
"user_api_key_team_id": "test-team",
|
|
"user_api_key_team_alias": "test-team-alias",
|
|
"user_api_key_user_email": "test@example.com",
|
|
},
|
|
"hidden_params": {
|
|
"additional_headers": {},
|
|
},
|
|
},
|
|
}
|
|
|
|
await mock_prometheus_logger.async_log_success_event(
|
|
kwargs, None, kwargs["start_time"], kwargs["end_time"]
|
|
)
|
|
|
|
# Verify spend counter is incremented by cost amount
|
|
spend_metric = mock_prometheus_logger.litellm_spend_metric
|
|
assert len(spend_metric.inc_calls) > 0, "Spend metric should be incremented"
|
|
assert (
|
|
0.0015 in spend_metric.inc_calls
|
|
), "Spend metric should be incremented by response_cost (0.0015)"
|
|
|
|
|
|
# ==============================================================================
|
|
# END SEMANTIC VALIDATION TESTS
|
|
# ==============================================================================
|