mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 20:48:32 +00:00
92db2df2f6
Bedrock: move native structured output model list to cost JSON, add Sonnet 4.6
3867 lines
148 KiB
Python
3867 lines
148 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
from jsonschema import validate
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
|
|
import litellm
|
|
from litellm.proxy.utils import is_valid_api_key
|
|
from litellm.types.utils import (
|
|
CallTypes,
|
|
Delta,
|
|
LlmProviders,
|
|
ModelResponseStream,
|
|
StreamingChoices,
|
|
)
|
|
from litellm.utils import (
|
|
ProviderConfigManager,
|
|
TextCompletionStreamWrapper,
|
|
_check_provider_match,
|
|
_is_streaming_request,
|
|
get_llm_provider,
|
|
get_optional_params_image_gen,
|
|
is_cached_message,
|
|
)
|
|
|
|
# Adds the parent directory to the system path
|
|
|
|
|
|
def test_check_provider_match_azure_ai_allows_openai_and_azure():
|
|
"""
|
|
Test that azure_ai provider can match openai and azure models.
|
|
This is needed for Azure Model Router which can route to OpenAI models.
|
|
"""
|
|
# azure_ai should match openai models
|
|
assert (
|
|
_check_provider_match(
|
|
model_info={"litellm_provider": "openai"}, custom_llm_provider="azure_ai"
|
|
)
|
|
is True
|
|
)
|
|
|
|
# azure_ai should match azure models
|
|
assert (
|
|
_check_provider_match(
|
|
model_info={"litellm_provider": "azure"}, custom_llm_provider="azure_ai"
|
|
)
|
|
is True
|
|
)
|
|
|
|
# azure_ai should NOT match other providers
|
|
assert (
|
|
_check_provider_match(
|
|
model_info={"litellm_provider": "anthropic"}, custom_llm_provider="azure_ai"
|
|
)
|
|
is False
|
|
)
|
|
|
|
|
|
def test_check_provider_match_github_allows_upstream_provider_metadata():
|
|
"""
|
|
Test that github provider can match upstream provider metadata.
|
|
GitHub Models can provide models from multiple providers.
|
|
"""
|
|
assert (
|
|
_check_provider_match(
|
|
model_info={"litellm_provider": "openai"},
|
|
custom_llm_provider="github",
|
|
)
|
|
is True
|
|
)
|
|
|
|
assert (
|
|
_check_provider_match(
|
|
model_info={"litellm_provider": "github"},
|
|
custom_llm_provider="github",
|
|
)
|
|
is True
|
|
)
|
|
|
|
assert (
|
|
_check_provider_match(
|
|
model_info={"litellm_provider": "anthropic"},
|
|
custom_llm_provider="github",
|
|
)
|
|
is True
|
|
)
|
|
|
|
|
|
def test_supports_function_calling_github_openai_alias():
|
|
assert litellm.utils.supports_function_calling(model="github/gpt-4o-mini") is True
|
|
assert (
|
|
litellm.utils.supports_function_calling(
|
|
model="gpt-4o-mini", custom_llm_provider="github"
|
|
)
|
|
is True
|
|
)
|
|
|
|
|
|
def test_supports_function_calling_github_anthropic_alias():
|
|
assert (
|
|
litellm.utils.supports_function_calling(
|
|
model="github/claude-3-7-sonnet-20250219"
|
|
)
|
|
is True
|
|
)
|
|
|
|
|
|
def test_supports_function_calling_deepinfra_llama():
|
|
"""Test that deepinfra Llama models correctly report function calling support.
|
|
|
|
Regression test for https://github.com/BerriAI/litellm/issues/22619
|
|
"""
|
|
assert (
|
|
litellm.utils.supports_function_calling(
|
|
model="deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
|
)
|
|
is True
|
|
)
|
|
|
|
|
|
def test_supports_function_calling_unknown_github_alias_returns_false():
|
|
assert (
|
|
litellm.utils.supports_function_calling(
|
|
model="github/non-existent-model-for-capability-check"
|
|
)
|
|
is False
|
|
)
|
|
|
|
|
|
def test_get_optional_params_image_gen():
|
|
from litellm.llms.azure.image_generation import AzureGPTImageGenerationConfig
|
|
|
|
provider_config = AzureGPTImageGenerationConfig()
|
|
optional_params = get_optional_params_image_gen(
|
|
model="gpt-image-1",
|
|
response_format="b64_json",
|
|
n=3,
|
|
custom_llm_provider="azure",
|
|
drop_params=True,
|
|
provider_config=provider_config,
|
|
)
|
|
assert optional_params is not None
|
|
assert "response_format" not in optional_params
|
|
assert optional_params["n"] == 3
|
|
|
|
|
|
def test_get_optional_params_image_gen_vertex_ai_size():
|
|
"""Test that Vertex AI image generation properly handles size parameter and maps it to aspectRatio"""
|
|
# Test with various size parameters
|
|
test_cases = [
|
|
("1024x1024", "1:1"), # Square aspect ratio
|
|
("256x256", "1:1"), # Square aspect ratio
|
|
("512x512", "1:1"), # Square aspect ratio
|
|
("1792x1024", "16:9"), # Landscape aspect ratio
|
|
("1024x1792", "9:16"), # Portrait aspect ratio
|
|
("unsupported", "1:1"), # Default to square for unsupported sizes
|
|
]
|
|
|
|
for size_input, expected_aspect_ratio in test_cases:
|
|
optional_params = get_optional_params_image_gen(
|
|
model="vertex_ai/imagegeneration@006",
|
|
size=size_input,
|
|
n=2,
|
|
custom_llm_provider="vertex_ai",
|
|
drop_params=True,
|
|
)
|
|
assert optional_params is not None
|
|
assert optional_params["aspectRatio"] == expected_aspect_ratio
|
|
assert optional_params["sampleCount"] == 2
|
|
assert "size" not in optional_params # size should be converted to aspectRatio
|
|
|
|
# Test without size parameter
|
|
optional_params = get_optional_params_image_gen(
|
|
model="vertex_ai/imagegeneration@006",
|
|
n=1,
|
|
custom_llm_provider="vertex_ai",
|
|
drop_params=True,
|
|
)
|
|
assert optional_params is not None
|
|
assert (
|
|
"aspectRatio" not in optional_params
|
|
) # aspectRatio should not be set if size is not provided
|
|
assert optional_params["sampleCount"] == 1
|
|
|
|
|
|
def test_get_optional_params_image_gen_filters_empty_values():
|
|
optional_params = get_optional_params_image_gen(
|
|
model="gpt-image-1",
|
|
custom_llm_provider="openai",
|
|
extra_body={},
|
|
)
|
|
assert optional_params == {}
|
|
|
|
|
|
def test_all_model_configs():
|
|
from litellm.llms.vertex_ai.vertex_ai_partner_models.ai21.transformation import (
|
|
VertexAIAi21Config,
|
|
)
|
|
from litellm.llms.vertex_ai.vertex_ai_partner_models.llama3.transformation import (
|
|
VertexAILlama3Config,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in VertexAILlama3Config().get_supported_openai_params(model="llama3")
|
|
)
|
|
assert VertexAILlama3Config().map_openai_params(
|
|
{"max_completion_tokens": 10}, {}, "llama3", drop_params=False
|
|
) == {"max_tokens": 10}
|
|
|
|
assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params(
|
|
model="jamba-1.5-mini@001"
|
|
)
|
|
assert VertexAIAi21Config().map_openai_params(
|
|
{"max_completion_tokens": 10}, {}, "jamba-1.5-mini@001", drop_params=False
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.fireworks_ai.chat.transformation import FireworksAIConfig
|
|
|
|
assert "max_completion_tokens" in FireworksAIConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert FireworksAIConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.nvidia_nim.chat.transformation import NvidiaNimConfig
|
|
|
|
assert "max_completion_tokens" in NvidiaNimConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert NvidiaNimConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.ollama.chat.transformation import OllamaChatConfig
|
|
|
|
assert "max_completion_tokens" in OllamaChatConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert OllamaChatConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"num_predict": 10}
|
|
|
|
from litellm.llms.predibase.chat.transformation import PredibaseConfig
|
|
|
|
assert "max_completion_tokens" in PredibaseConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert PredibaseConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_new_tokens": 10}
|
|
|
|
from litellm.llms.codestral.completion.transformation import (
|
|
CodestralTextCompletionConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in CodestralTextCompletionConfig().get_supported_openai_params(model="llama3")
|
|
)
|
|
assert CodestralTextCompletionConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.volcengine.chat.transformation import (
|
|
VolcEngineChatConfig as VolcEngineConfig,
|
|
)
|
|
|
|
assert "max_completion_tokens" in VolcEngineConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert VolcEngineConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.ai21.chat.transformation import AI21ChatConfig
|
|
|
|
assert "max_completion_tokens" in AI21ChatConfig().get_supported_openai_params(
|
|
"jamba-1.5-mini@001"
|
|
)
|
|
assert AI21ChatConfig().map_openai_params(
|
|
model="jamba-1.5-mini@001",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.azure.chat.gpt_transformation import AzureOpenAIConfig
|
|
|
|
assert "max_completion_tokens" in AzureOpenAIConfig().get_supported_openai_params(
|
|
model="gpt-3.5-turbo"
|
|
)
|
|
assert AzureOpenAIConfig().map_openai_params(
|
|
model="gpt-3.5-turbo",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
api_version="2022-12-01",
|
|
drop_params=False,
|
|
) == {"max_completion_tokens": 10}
|
|
|
|
from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in AmazonConverseConfig().get_supported_openai_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0"
|
|
)
|
|
)
|
|
assert AmazonConverseConfig().map_openai_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"maxTokens": 10}
|
|
|
|
from litellm.llms.codestral.completion.transformation import (
|
|
CodestralTextCompletionConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in CodestralTextCompletionConfig().get_supported_openai_params(model="llama3")
|
|
)
|
|
assert CodestralTextCompletionConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm import AmazonAnthropicClaudeConfig, AmazonAnthropicConfig
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in AmazonAnthropicClaudeConfig().get_supported_openai_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0"
|
|
)
|
|
)
|
|
|
|
assert AmazonAnthropicClaudeConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in AmazonAnthropicConfig().get_supported_openai_params(model="")
|
|
)
|
|
|
|
assert AmazonAnthropicConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
model="",
|
|
drop_params=False,
|
|
) == {"max_tokens_to_sample": 10}
|
|
|
|
from litellm.llms.databricks.chat.transformation import DatabricksConfig
|
|
|
|
assert "max_completion_tokens" in DatabricksConfig().get_supported_openai_params()
|
|
|
|
assert DatabricksConfig().map_openai_params(
|
|
model="databricks/llama-3-70b-instruct",
|
|
drop_params=False,
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation import (
|
|
VertexAIAnthropicConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in VertexAIAnthropicConfig().get_supported_openai_params(
|
|
model="claude-sonnet-4-6"
|
|
)
|
|
)
|
|
|
|
assert VertexAIAnthropicConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
model="claude-sonnet-4-6",
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.gemini.chat.transformation import GoogleAIStudioGeminiConfig
|
|
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
|
VertexGeminiConfig,
|
|
)
|
|
|
|
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params(
|
|
model="gemini-1.0-pro"
|
|
)
|
|
|
|
assert VertexGeminiConfig().map_openai_params(
|
|
model="gemini-1.0-pro",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_output_tokens": 10}
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in GoogleAIStudioGeminiConfig().get_supported_openai_params(
|
|
model="gemini-1.0-pro"
|
|
)
|
|
)
|
|
|
|
assert GoogleAIStudioGeminiConfig().map_openai_params(
|
|
model="gemini-1.0-pro",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_output_tokens": 10}
|
|
|
|
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params(
|
|
model="gemini-1.0-pro"
|
|
)
|
|
|
|
assert VertexGeminiConfig().map_openai_params(
|
|
model="gemini-1.0-pro",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_output_tokens": 10}
|
|
|
|
|
|
def test_anthropic_web_search_in_model_info():
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
supported_models = [
|
|
"anthropic/claude-4-sonnet-20250514",
|
|
"anthropic/claude-sonnet-4-5-20250929",
|
|
]
|
|
for model in supported_models:
|
|
from litellm.utils import get_model_info
|
|
|
|
model_info = get_model_info(model)
|
|
assert model_info is not None
|
|
assert (
|
|
model_info["supports_web_search"] is True
|
|
), f"Model {model} should support web search"
|
|
assert (
|
|
model_info["search_context_cost_per_query"] is not None
|
|
), f"Model {model} should have a search context cost per query"
|
|
|
|
|
|
def test_cohere_embedding_optional_params():
|
|
from litellm import get_optional_params_embeddings
|
|
|
|
optional_params = get_optional_params_embeddings(
|
|
model="embed-v4.0",
|
|
custom_llm_provider="cohere",
|
|
input="Hello, world!",
|
|
input_type="search_query",
|
|
dimensions=512,
|
|
)
|
|
assert optional_params is not None
|
|
|
|
|
|
def validate_model_cost_values(model_data, exceptions=None):
|
|
"""
|
|
Validates that cost values in model data do not exceed 1.
|
|
|
|
Args:
|
|
model_data (dict): The model data dictionary
|
|
exceptions (list, optional): List of model IDs that are allowed to have costs > 1
|
|
|
|
Returns:
|
|
tuple: (is_valid, violations) where is_valid is a boolean and violations is a list of error messages
|
|
"""
|
|
if exceptions is None:
|
|
exceptions = []
|
|
|
|
violations = []
|
|
|
|
# Define all cost-related fields to check
|
|
cost_fields = [
|
|
"input_cost_per_token",
|
|
"output_cost_per_token",
|
|
"input_cost_per_character",
|
|
"output_cost_per_character",
|
|
"input_cost_per_image",
|
|
"output_cost_per_image",
|
|
"input_cost_per_pixel",
|
|
"output_cost_per_pixel",
|
|
"input_cost_per_second",
|
|
"output_cost_per_second",
|
|
"input_cost_per_query",
|
|
"input_cost_per_request",
|
|
"input_cost_per_audio_token",
|
|
"output_cost_per_audio_token",
|
|
"output_cost_per_image_token",
|
|
"output_cost_per_image_token_batches",
|
|
"input_cost_per_audio_per_second",
|
|
"input_cost_per_video_per_second",
|
|
"input_cost_per_token_above_128k_tokens",
|
|
"output_cost_per_token_above_128k_tokens",
|
|
"input_cost_per_token_above_200k_tokens",
|
|
"output_cost_per_token_above_200k_tokens",
|
|
"input_cost_per_token_above_272k_tokens",
|
|
"output_cost_per_token_above_272k_tokens",
|
|
"input_cost_per_character_above_128k_tokens",
|
|
"output_cost_per_character_above_128k_tokens",
|
|
"input_cost_per_image_above_128k_tokens",
|
|
"input_cost_per_video_per_second_above_8s_interval",
|
|
"input_cost_per_video_per_second_above_15s_interval",
|
|
"input_cost_per_video_per_second_above_128k_tokens",
|
|
"input_cost_per_token_batch_requests",
|
|
"input_cost_per_token_batches",
|
|
"output_cost_per_token_batches",
|
|
"input_cost_per_token_cache_hit",
|
|
"cache_creation_input_token_cost",
|
|
"cache_creation_input_audio_token_cost",
|
|
"cache_read_input_token_cost",
|
|
"cache_read_input_audio_token_cost",
|
|
"input_dbu_cost_per_token",
|
|
"output_db_cost_per_token",
|
|
"output_dbu_cost_per_token",
|
|
"output_cost_per_reasoning_token",
|
|
"citation_cost_per_token",
|
|
]
|
|
|
|
# Also check nested cost fields
|
|
nested_cost_fields = [
|
|
"search_context_cost_per_query",
|
|
]
|
|
|
|
for model_id, model_info in model_data.items():
|
|
# Skip if this model is in exceptions
|
|
if model_id in exceptions:
|
|
continue
|
|
|
|
# Check direct cost fields
|
|
for field in cost_fields:
|
|
if field in model_info and model_info[field] is not None:
|
|
cost_value = model_info[field]
|
|
|
|
# Convert string values to float if needed
|
|
if isinstance(cost_value, str):
|
|
try:
|
|
cost_value = float(cost_value)
|
|
except (ValueError, TypeError):
|
|
# Skip if we can't convert to float
|
|
continue
|
|
|
|
if isinstance(cost_value, (int, float)) and cost_value > 1:
|
|
violations.append(
|
|
f"Model '{model_id}' has {field} = {cost_value} which exceeds 1"
|
|
)
|
|
|
|
# Check nested cost fields
|
|
for field in nested_cost_fields:
|
|
if field in model_info and model_info[field] is not None:
|
|
nested_costs = model_info[field]
|
|
if isinstance(nested_costs, dict):
|
|
for nested_field, nested_value in nested_costs.items():
|
|
# Convert string values to float if needed
|
|
if isinstance(nested_value, str):
|
|
try:
|
|
nested_value = float(nested_value)
|
|
except (ValueError, TypeError):
|
|
# Skip if we can't convert to float
|
|
continue
|
|
|
|
if isinstance(nested_value, (int, float)) and nested_value > 1:
|
|
violations.append(
|
|
f"Model '{model_id}' has {field}.{nested_field} = {nested_value} which exceeds 1"
|
|
)
|
|
|
|
return len(violations) == 0, violations
|
|
|
|
|
|
def test_aaamodel_prices_and_context_window_json_is_valid():
|
|
"""
|
|
Validates the `model_prices_and_context_window.json` file.
|
|
|
|
If this test fails after you update the json, you need to update the schema or correct the change you made.
|
|
"""
|
|
|
|
INTENDED_SCHEMA = {
|
|
"type": "object",
|
|
"additionalProperties": {
|
|
"type": "object",
|
|
"properties": {
|
|
"supports_computer_use": {"type": "boolean"},
|
|
"cache_creation_input_audio_token_cost": {"type": "number"},
|
|
"cache_creation_input_token_cost": {"type": "number"},
|
|
"cache_creation_input_token_cost_above_1hr": {"type": "number"},
|
|
"cache_creation_input_token_cost_above_200k_tokens": {"type": "number"},
|
|
"cache_read_input_token_cost": {"type": "number"},
|
|
"cache_read_input_token_cost_above_200k_tokens": {"type": "number"},
|
|
"cache_read_input_token_cost_above_272k_tokens": {"type": "number"},
|
|
"cache_read_input_token_cost_batches": {"type": "number"},
|
|
"cache_creation_input_token_cost_above_1hr_above_200k_tokens": {
|
|
"type": "number"
|
|
},
|
|
"cache_read_input_audio_token_cost": {"type": "number"},
|
|
"cache_read_input_token_cost_per_audio_token": {"type": "number"},
|
|
"cache_read_input_image_token_cost": {"type": "number"},
|
|
"deprecation_date": {"type": "string"},
|
|
"input_cost_per_audio_per_second": {"type": "number"},
|
|
"input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
|
|
"input_cost_per_audio_token": {"type": "number"},
|
|
"input_cost_per_image_token": {"type": "number"},
|
|
"input_cost_per_character": {"type": "number"},
|
|
"input_cost_per_character_above_128k_tokens": {"type": "number"},
|
|
"input_cost_per_image": {"type": "number"},
|
|
"input_cost_per_image_above_128k_tokens": {"type": "number"},
|
|
"input_cost_per_image_token": {"type": "number"},
|
|
"input_cost_per_token_above_200k_tokens": {"type": "number"},
|
|
"input_cost_per_token_above_256k_tokens": {"type": "number"},
|
|
"input_cost_per_token_above_272k_tokens": {"type": "number"},
|
|
"cache_read_input_token_cost_flex": {"type": "number"},
|
|
"cache_read_input_token_cost_priority": {"type": "number"},
|
|
"cache_read_input_token_cost_above_200k_tokens_priority": {
|
|
"type": "number"
|
|
},
|
|
"cache_read_input_token_cost_above_272k_tokens_priority": {
|
|
"type": "number"
|
|
},
|
|
"input_cost_per_token_flex": {"type": "number"},
|
|
"input_cost_per_token_priority": {"type": "number"},
|
|
"input_cost_per_token_above_200k_tokens_priority": {"type": "number"},
|
|
"input_cost_per_token_above_272k_tokens_priority": {"type": "number"},
|
|
"input_cost_per_audio_token_priority": {"type": "number"},
|
|
"output_cost_per_token_flex": {"type": "number"},
|
|
"output_cost_per_token_priority": {"type": "number"},
|
|
"output_cost_per_token_above_200k_tokens_priority": {"type": "number"},
|
|
"output_cost_per_token_above_272k_tokens_priority": {"type": "number"},
|
|
"input_cost_per_pixel": {"type": "number"},
|
|
"input_cost_per_query": {"type": "number"},
|
|
"input_cost_per_request": {"type": "number"},
|
|
"input_cost_per_second": {"type": "number"},
|
|
"input_cost_per_token": {"type": "number"},
|
|
"input_cost_per_token_above_128k_tokens": {"type": "number"},
|
|
"input_cost_per_token_batch_requests": {"type": "number"},
|
|
"input_cost_per_token_batches": {"type": "number"},
|
|
"input_cost_per_token_cache_hit": {"type": "number"},
|
|
"input_cost_per_video_per_second": {"type": "number"},
|
|
"input_cost_per_video_per_second_above_8s_interval": {"type": "number"},
|
|
"input_cost_per_video_per_second_above_15s_interval": {
|
|
"type": "number"
|
|
},
|
|
"input_cost_per_video_per_second_above_128k_tokens": {"type": "number"},
|
|
"input_dbu_cost_per_token": {"type": "number"},
|
|
"annotation_cost_per_page": {"type": "number"},
|
|
"ocr_cost_per_page": {"type": "number"},
|
|
"code_interpreter_cost_per_session": {"type": "number"},
|
|
"inference_geo": {"type": "string"},
|
|
"litellm_provider": {"type": "string"},
|
|
"max_audio_length_hours": {"type": "number"},
|
|
"max_audio_per_prompt": {"type": "number"},
|
|
"max_document_chunks_per_query": {"type": "number"},
|
|
"max_images_per_prompt": {"type": "number"},
|
|
"max_input_tokens": {"type": "number"},
|
|
"max_output_tokens": {"type": "number"},
|
|
"max_pdf_size_mb": {"type": "number"},
|
|
"max_query_tokens": {"type": "number"},
|
|
"max_tokens": {"type": "number"},
|
|
"max_tokens_per_document_chunk": {"type": "number"},
|
|
"max_video_length": {"type": "number"},
|
|
"max_videos_per_prompt": {"type": "number"},
|
|
"metadata": {"type": "object"},
|
|
"provider_specific_entry": {"type": "object"},
|
|
"mode": {
|
|
"type": "string",
|
|
"enum": [
|
|
"audio_speech",
|
|
"audio_transcription",
|
|
"chat",
|
|
"completion",
|
|
"container",
|
|
"image_edit",
|
|
"embedding",
|
|
"image_generation",
|
|
"video_generation",
|
|
"moderation",
|
|
"rerank",
|
|
"realtime",
|
|
"responses",
|
|
"ocr",
|
|
"search",
|
|
"vector_store",
|
|
],
|
|
},
|
|
"output_cost_per_audio_token": {"type": "number"},
|
|
"output_cost_per_character": {"type": "number"},
|
|
"output_cost_per_character_above_128k_tokens": {"type": "number"},
|
|
"output_cost_per_image": {"type": "number"},
|
|
"output_cost_per_image_token": {"type": "number"},
|
|
"output_cost_per_image_token_batches": {"type": "number"},
|
|
"output_cost_per_pixel": {"type": "number"},
|
|
"output_cost_per_second": {"type": "number"},
|
|
"output_cost_per_token": {"type": "number"},
|
|
"output_cost_per_token_above_128k_tokens": {"type": "number"},
|
|
"output_cost_per_token_above_200k_tokens": {"type": "number"},
|
|
"output_cost_per_token_above_256k_tokens": {"type": "number"},
|
|
"output_cost_per_token_above_272k_tokens": {"type": "number"},
|
|
"output_cost_per_image_above_1024_and_1024_pixels": {"type": "number"},
|
|
"output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": {
|
|
"type": "number"
|
|
},
|
|
"output_cost_per_image_above_512_and_512_pixels": {"type": "number"},
|
|
"output_cost_per_image_above_512_and_512_pixels_and_premium_image": {
|
|
"type": "number"
|
|
},
|
|
"output_cost_per_image_premium_image": {"type": "number"},
|
|
"output_cost_per_token_batches": {"type": "number"},
|
|
"output_cost_per_reasoning_token": {"type": "number"},
|
|
"output_cost_per_video_per_second": {"type": "number"},
|
|
"output_db_cost_per_token": {"type": "number"},
|
|
"output_dbu_cost_per_token": {"type": "number"},
|
|
"output_vector_size": {"type": "number"},
|
|
"rpd": {"type": "number"},
|
|
"rpm": {"type": "number"},
|
|
"source": {"type": "string"},
|
|
"comment": {"type": "string"},
|
|
"supports_assistant_prefill": {"type": "boolean"},
|
|
"supports_audio_input": {"type": "boolean"},
|
|
"supports_audio_output": {"type": "boolean"},
|
|
"supports_embedding_image_input": {"type": "boolean"},
|
|
"supports_code_execution": {"type": "boolean"},
|
|
"supports_file_search": {"type": "boolean"},
|
|
"supports_function_calling": {"type": "boolean"},
|
|
"supports_image_input": {"type": "boolean"},
|
|
"supports_parallel_function_calling": {"type": "boolean"},
|
|
"supports_pdf_input": {"type": "boolean"},
|
|
"supports_prompt_caching": {"type": "boolean"},
|
|
"supports_response_schema": {"type": "boolean"},
|
|
"supports_system_messages": {"type": "boolean"},
|
|
"supports_tool_choice": {"type": "boolean"},
|
|
"supports_video_input": {"type": "boolean"},
|
|
"supports_vision": {"type": "boolean"},
|
|
"supports_web_search": {"type": "boolean"},
|
|
"supports_url_context": {"type": "boolean"},
|
|
"supports_multimodal": {"type": "boolean"},
|
|
"uses_embed_content": {"type": "boolean"},
|
|
"supports_reasoning": {"type": "boolean"},
|
|
"supports_minimal_reasoning_effort": {"type": "boolean"},
|
|
"supports_none_reasoning_effort": {"type": "boolean"},
|
|
"supports_xhigh_reasoning_effort": {"type": "boolean"},
|
|
"supports_service_tier": {"type": "boolean"},
|
|
"supports_preset": {"type": "boolean"},
|
|
"tool_use_system_prompt_tokens": {"type": "number"},
|
|
"tpm": {"type": "number"},
|
|
"provider_specific_entry": {"type": "object"},
|
|
"supported_endpoints": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": [
|
|
"/v1/responses",
|
|
"/v1/embeddings",
|
|
"/v1/chat/completions",
|
|
"/v1/completions",
|
|
"/v1/images/generations",
|
|
"/v1/realtime",
|
|
"/v1/images/variations",
|
|
"/v1/images/edits",
|
|
"/v1/batch",
|
|
"/v1/audio/transcriptions",
|
|
"/v1/audio/speech",
|
|
"/v1/ocr",
|
|
"/vertex_ai/live",
|
|
],
|
|
},
|
|
},
|
|
"supported_regions": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
},
|
|
},
|
|
"search_context_cost_per_query": {
|
|
"type": "object",
|
|
"properties": {
|
|
"search_context_size_low": {"type": "number"},
|
|
"search_context_size_medium": {"type": "number"},
|
|
"search_context_size_high": {"type": "number"},
|
|
},
|
|
"additionalProperties": False,
|
|
},
|
|
"citation_cost_per_token": {"type": "number"},
|
|
"supported_modalities": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": ["text", "audio", "image", "video"],
|
|
},
|
|
},
|
|
"supported_output_modalities": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": ["text", "image", "audio", "code", "video"],
|
|
},
|
|
},
|
|
"supported_resolutions": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
},
|
|
},
|
|
"supports_native_streaming": {"type": "boolean"},
|
|
"supports_native_structured_output": {"type": "boolean"},
|
|
"tiered_pricing": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"range": {
|
|
"type": "array",
|
|
"items": {"type": "number"},
|
|
"minItems": 2,
|
|
"maxItems": 2,
|
|
},
|
|
"input_cost_per_token": {"type": "number"},
|
|
"output_cost_per_token": {"type": "number"},
|
|
"cache_read_input_token_cost": {"type": "number"},
|
|
"output_cost_per_reasoning_token": {"type": "number"},
|
|
"max_results_range": {
|
|
"type": "array",
|
|
"items": {"type": "number"},
|
|
"minItems": 2,
|
|
"maxItems": 2,
|
|
},
|
|
"input_cost_per_query": {"type": "number"},
|
|
},
|
|
"additionalProperties": False,
|
|
},
|
|
},
|
|
},
|
|
"additionalProperties": False,
|
|
},
|
|
}
|
|
|
|
prod_json = os.path.join(
|
|
os.path.dirname(__file__), "..", "..", "model_prices_and_context_window.json"
|
|
)
|
|
with open(prod_json, "r") as model_prices_file:
|
|
actual_json = json.load(model_prices_file)
|
|
assert isinstance(actual_json, dict)
|
|
actual_json.pop(
|
|
"sample_spec", None
|
|
) # remove the sample, whose schema is inconsistent with the real data
|
|
|
|
# Validate schema
|
|
validate(actual_json, INTENDED_SCHEMA)
|
|
|
|
# Validate cost values
|
|
# Define exceptions for models that are allowed to have costs > 1
|
|
# Add model IDs here if they legitimately have costs > 1
|
|
exceptions = [
|
|
# Add any model IDs that should be exempt from the cost validation
|
|
# Example: "expensive-model-id",
|
|
]
|
|
|
|
is_valid, violations = validate_model_cost_values(actual_json, exceptions)
|
|
|
|
if not is_valid:
|
|
error_message = "Cost validation failed:\n" + "\n".join(violations)
|
|
error_message += "\n\nTo add exceptions, add the model ID to the 'exceptions' list in the test function."
|
|
raise AssertionError(error_message)
|
|
|
|
|
|
def test_max_tokens_consistency():
|
|
"""
|
|
Test that max_tokens == max_output_tokens for all models.
|
|
|
|
According to the spec in model_prices_and_context_window.json:
|
|
- max_tokens is a LEGACY parameter
|
|
- It should be set to max_output_tokens if the provider specifies it
|
|
|
|
This test ensures consistency across all model definitions.
|
|
"""
|
|
import json
|
|
from pathlib import Path
|
|
|
|
# Load the model configuration
|
|
config_path = (
|
|
Path(__file__).parent.parent.parent / "model_prices_and_context_window.json"
|
|
)
|
|
with open(config_path, "r") as f:
|
|
models = json.load(f)
|
|
|
|
inconsistencies = []
|
|
|
|
for model_name, config in models.items():
|
|
# Skip the sample_spec
|
|
if model_name == "sample_spec":
|
|
continue
|
|
|
|
# Check if both max_tokens and max_output_tokens exist
|
|
if isinstance(config, dict):
|
|
max_tokens = config.get("max_tokens")
|
|
max_output_tokens = config.get("max_output_tokens")
|
|
|
|
# Only validate if both exist
|
|
if max_tokens is not None and max_output_tokens is not None:
|
|
if max_tokens != max_output_tokens:
|
|
inconsistencies.append(
|
|
{
|
|
"model": model_name,
|
|
"max_tokens": max_tokens,
|
|
"max_output_tokens": max_output_tokens,
|
|
}
|
|
)
|
|
|
|
if inconsistencies:
|
|
error_msg = f"\n\n❌ Found {len(inconsistencies)} models with max_tokens != max_output_tokens:\n\n"
|
|
for item in inconsistencies[:10]: # Show first 10
|
|
error_msg += f" {item['model']}: max_tokens={item['max_tokens']}, max_output_tokens={item['max_output_tokens']}\n"
|
|
|
|
if len(inconsistencies) > 10:
|
|
error_msg += f"\n ... and {len(inconsistencies) - 10} more\n"
|
|
|
|
error_msg += "\nTo fix these inconsistencies, run: poetry run python fix_max_tokens_inconsistencies.py"
|
|
raise AssertionError(error_msg)
|
|
|
|
|
|
def test_get_model_info_gemini():
|
|
"""
|
|
Tests if ALL gemini models have 'tpm' and 'rpm' in the model info
|
|
"""
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
model_map = litellm.model_cost
|
|
for model, info in model_map.items():
|
|
if (
|
|
model.startswith("gemini/")
|
|
and not "gemma" in model
|
|
and not "learnlm" in model
|
|
and not "imagen" in model
|
|
and not "veo" in model
|
|
and not "lyria" in model
|
|
and not "robotics" in model
|
|
):
|
|
assert info.get("tpm") is not None, f"{model} does not have tpm"
|
|
assert info.get("rpm") is not None, f"{model} does not have rpm"
|
|
|
|
|
|
def test_openai_models_in_model_info():
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
model_map = litellm.model_cost
|
|
violated_models = []
|
|
for model, info in model_map.items():
|
|
if (
|
|
info.get("litellm_provider") == "openai"
|
|
and info.get("supports_vision") is True
|
|
):
|
|
if info.get("supports_pdf_input") is not True:
|
|
violated_models.append(model)
|
|
assert (
|
|
len(violated_models) == 0
|
|
), f"The following models should support pdf input: {violated_models}"
|
|
|
|
|
|
def test_supports_tool_choice_simple_tests():
|
|
"""
|
|
simple sanity checks
|
|
"""
|
|
assert litellm.utils.supports_tool_choice(model="gpt-4o") == True
|
|
assert (
|
|
litellm.utils.supports_tool_choice(
|
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0"
|
|
)
|
|
== True
|
|
)
|
|
assert (
|
|
litellm.utils.supports_tool_choice(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0"
|
|
)
|
|
is True
|
|
)
|
|
|
|
assert (
|
|
litellm.utils.supports_tool_choice(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
|
custom_llm_provider="bedrock_converse",
|
|
)
|
|
is True
|
|
)
|
|
|
|
assert (
|
|
litellm.utils.supports_tool_choice(model="us.amazon.nova-micro-v1:0") is False
|
|
)
|
|
assert (
|
|
litellm.utils.supports_tool_choice(model="bedrock/us.amazon.nova-micro-v1:0")
|
|
is False
|
|
)
|
|
assert (
|
|
litellm.utils.supports_tool_choice(
|
|
model="us.amazon.nova-micro-v1:0", custom_llm_provider="bedrock_converse"
|
|
)
|
|
is False
|
|
)
|
|
|
|
assert litellm.utils.supports_tool_choice(model="perplexity/sonar") is False
|
|
|
|
|
|
def test_check_provider_match():
|
|
"""
|
|
Test the _check_provider_match function for various provider scenarios
|
|
"""
|
|
# Test bedrock and bedrock_converse cases
|
|
model_info = {"litellm_provider": "bedrock"}
|
|
assert litellm.utils._check_provider_match(model_info, "bedrock") is True
|
|
assert litellm.utils._check_provider_match(model_info, "bedrock_converse") is True
|
|
|
|
# Test bedrock_converse provider
|
|
model_info = {"litellm_provider": "bedrock_converse"}
|
|
assert litellm.utils._check_provider_match(model_info, "bedrock") is True
|
|
assert litellm.utils._check_provider_match(model_info, "bedrock_converse") is True
|
|
|
|
# Test non-matching provider
|
|
model_info = {"litellm_provider": "bedrock"}
|
|
assert litellm.utils._check_provider_match(model_info, "openai") is False
|
|
|
|
|
|
def test_get_provider_rerank_config():
|
|
"""
|
|
Test the get_provider_rerank_config function for various providers
|
|
"""
|
|
from litellm import HostedVLLMRerankConfig
|
|
from litellm.utils import LlmProviders, ProviderConfigManager
|
|
|
|
# Test for hosted_vllm provider
|
|
config = ProviderConfigManager.get_provider_rerank_config(
|
|
"my_model", LlmProviders.HOSTED_VLLM, "http://localhost", []
|
|
)
|
|
assert isinstance(config, HostedVLLMRerankConfig)
|
|
|
|
|
|
# Models that should be skipped during testing
|
|
OLD_PROVIDERS = ["aleph_alpha", "palm"]
|
|
SKIP_MODELS = [
|
|
"azure/mistral",
|
|
"azure/command-r",
|
|
"jamba",
|
|
"deepinfra",
|
|
"mistral.",
|
|
]
|
|
|
|
# Bedrock models to block - organized by type
|
|
BEDROCK_REGIONS = ["ap-northeast-1", "eu-central-1", "us-east-1", "us-west-2"]
|
|
BEDROCK_COMMITMENTS = ["1-month-commitment", "6-month-commitment"]
|
|
BEDROCK_MODELS = {
|
|
"anthropic.claude-v1",
|
|
"anthropic.claude-v2",
|
|
"anthropic.claude-v2:1",
|
|
"anthropic.claude-instant-v1",
|
|
}
|
|
|
|
# Generate block_list dynamically
|
|
block_list = set()
|
|
for region in BEDROCK_REGIONS:
|
|
for commitment in BEDROCK_COMMITMENTS:
|
|
for model in BEDROCK_MODELS:
|
|
block_list.add(f"bedrock/{region}/{commitment}/{model}")
|
|
block_list.add(f"bedrock/{region}/{model}")
|
|
|
|
# Add Cohere models
|
|
for commitment in BEDROCK_COMMITMENTS:
|
|
block_list.add(f"bedrock/*/{commitment}/cohere.command-text-v14")
|
|
block_list.add(f"bedrock/*/{commitment}/cohere.command-light-text-v14")
|
|
|
|
print("block_list", block_list)
|
|
|
|
|
|
def test_supports_computer_use_utility():
|
|
"""
|
|
Tests the litellm.utils.supports_computer_use utility function.
|
|
"""
|
|
from litellm.utils import supports_computer_use
|
|
|
|
# Ensure LITELLM_LOCAL_MODEL_COST_MAP is set for consistent test behavior,
|
|
# as supports_computer_use relies on get_model_info.
|
|
# This also requires litellm.model_cost to be populated.
|
|
original_env_var = os.getenv("LITELLM_LOCAL_MODEL_COST_MAP")
|
|
original_model_cost = getattr(litellm, "model_cost", None)
|
|
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="") # Load with local/backup
|
|
|
|
try:
|
|
# Test a model known to support computer_use from backup JSON
|
|
supports_cu_anthropic = supports_computer_use(
|
|
model="anthropic/claude-4-sonnet-20250514"
|
|
)
|
|
assert supports_cu_anthropic is True
|
|
|
|
# Test a model known not to have the flag or set to false (defaults to False via get_model_info)
|
|
supports_cu_gpt = supports_computer_use(model="gpt-3.5-turbo")
|
|
assert supports_cu_gpt is False
|
|
finally:
|
|
# Restore original environment and model_cost to avoid side effects
|
|
if original_env_var is None:
|
|
del os.environ["LITELLM_LOCAL_MODEL_COST_MAP"]
|
|
else:
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = original_env_var
|
|
|
|
if original_model_cost is not None:
|
|
litellm.model_cost = original_model_cost
|
|
elif hasattr(litellm, "model_cost"):
|
|
delattr(litellm, "model_cost")
|
|
|
|
|
|
def test_get_model_info_shows_supports_computer_use():
|
|
"""
|
|
Tests if 'supports_computer_use' is correctly retrieved by get_model_info.
|
|
We'll use 'claude-4-sonnet-20250514' as it's configured
|
|
in the backup JSON to have supports_computer_use: True.
|
|
"""
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
# Ensure litellm.model_cost is loaded, relying on the backup mechanism if primary fails
|
|
# as per previous debugging.
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
# This model should have 'supports_computer_use': True in the backup JSON
|
|
model_known_to_support_computer_use = "claude-4-sonnet-20250514"
|
|
info = litellm.get_model_info(model_known_to_support_computer_use)
|
|
print(f"Info for {model_known_to_support_computer_use}: {info}")
|
|
|
|
# After the fix in utils.py, this should now be present and True
|
|
assert info.get("supports_computer_use") is True
|
|
|
|
# Optionally, test a model known NOT to support it, or where it's undefined (should default to False)
|
|
# For example, if "gpt-3.5-turbo" doesn't have it defined, it should be False.
|
|
model_known_not_to_support_computer_use = "gpt-3.5-turbo"
|
|
info_gpt = litellm.get_model_info(model_known_not_to_support_computer_use)
|
|
print(f"Info for {model_known_not_to_support_computer_use}: {info_gpt}")
|
|
assert (
|
|
info_gpt.get("supports_computer_use") is None
|
|
) # Expecting None due to the default in ModelInfoBase
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model, custom_llm_provider",
|
|
[
|
|
("gpt-3.5-turbo", "openai"),
|
|
("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"),
|
|
("gemini-2.5-pro", "vertex_ai"),
|
|
],
|
|
)
|
|
def test_pre_process_non_default_params(model, custom_llm_provider):
|
|
from pydantic import BaseModel
|
|
|
|
from litellm.utils import ProviderConfigManager, pre_process_non_default_params
|
|
|
|
provider_config = ProviderConfigManager.get_provider_chat_config(
|
|
model=model, provider=LlmProviders(custom_llm_provider)
|
|
)
|
|
|
|
class ResponseFormat(BaseModel):
|
|
x: str
|
|
y: str
|
|
|
|
passed_params = {
|
|
"model": "gpt-3.5-turbo",
|
|
"response_format": ResponseFormat,
|
|
}
|
|
special_params = {}
|
|
processed_non_default_params = pre_process_non_default_params(
|
|
model=model,
|
|
passed_params=passed_params,
|
|
special_params=special_params,
|
|
custom_llm_provider=custom_llm_provider,
|
|
additional_drop_params=None,
|
|
provider_config=provider_config,
|
|
)
|
|
print(processed_non_default_params)
|
|
# Vertex AI / Gemini uses Pydantic's model_json_schema() which doesn't
|
|
# include additionalProperties: False (Gemini rejects it). Other
|
|
# providers use OpenAI's to_strict_json_schema() which does.
|
|
expected_schema = {
|
|
"properties": {
|
|
"x": {"title": "X", "type": "string"},
|
|
"y": {"title": "Y", "type": "string"},
|
|
},
|
|
"required": ["x", "y"],
|
|
"title": "ResponseFormat",
|
|
"type": "object",
|
|
}
|
|
if custom_llm_provider not in ("vertex_ai", "vertex_ai_beta", "gemini"):
|
|
expected_schema["additionalProperties"] = False
|
|
assert processed_non_default_params == {
|
|
"response_format": {
|
|
"type": "json_schema",
|
|
"json_schema": {
|
|
"schema": expected_schema,
|
|
"name": "ResponseFormat",
|
|
"strict": True,
|
|
},
|
|
}
|
|
}
|
|
|
|
|
|
from litellm.utils import supports_function_calling
|
|
|
|
|
|
class TestProxyFunctionCalling:
|
|
"""Test class for proxy function calling capabilities."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def reset_mock_cache(self):
|
|
"""Reset model cache before each test."""
|
|
from litellm.utils import _model_cache
|
|
|
|
_model_cache.flush_cache()
|
|
|
|
@pytest.mark.parametrize(
|
|
"direct_model,proxy_model,expected_result",
|
|
[
|
|
# OpenAI models
|
|
("gpt-3.5-turbo", "litellm_proxy/gpt-3.5-turbo", True),
|
|
("gpt-4", "litellm_proxy/gpt-4", True),
|
|
("gpt-4o", "litellm_proxy/gpt-4o", True),
|
|
("gpt-4o-mini", "litellm_proxy/gpt-4o-mini", True),
|
|
("gpt-4-turbo", "litellm_proxy/gpt-4-turbo", True),
|
|
("gpt-4-1106-preview", "litellm_proxy/gpt-4-1106-preview", True),
|
|
# Azure OpenAI models
|
|
("azure/gpt-4", "litellm_proxy/azure/gpt-4", True),
|
|
("azure/gpt-3.5-turbo", "litellm_proxy/azure/gpt-3.5-turbo", True),
|
|
(
|
|
"azure/gpt-4-1106-preview",
|
|
"litellm_proxy/azure/gpt-4-1106-preview",
|
|
True,
|
|
),
|
|
# Anthropic models (Claude supports function calling)
|
|
(
|
|
"claude-sonnet-4-6",
|
|
"litellm_proxy/claude-sonnet-4-6",
|
|
True,
|
|
),
|
|
# Google models
|
|
("gemini-2.5-pro", "litellm_proxy/gemini-2.5-pro", True),
|
|
("gemini/gemini-2.5-pro", "litellm_proxy/gemini/gemini-2.5-pro", True),
|
|
("gemini/gemini-2.5-flash", "litellm_proxy/gemini/gemini-2.5-flash", True),
|
|
# Groq models (mixed support)
|
|
("groq/gemma-7b-it", "litellm_proxy/groq/gemma-7b-it", True),
|
|
(
|
|
"groq/llama-3.3-70b-versatile",
|
|
"litellm_proxy/groq/llama-3.3-70b-versatile",
|
|
True,
|
|
),
|
|
# Cohere models (generally don't support function calling)
|
|
("command-nightly", "litellm_proxy/command-nightly", False),
|
|
],
|
|
)
|
|
def test_proxy_function_calling_support_consistency(
|
|
self, direct_model, proxy_model, expected_result
|
|
):
|
|
"""Test that proxy models have the same function calling support as their direct counterparts."""
|
|
direct_result = supports_function_calling(direct_model)
|
|
proxy_result = supports_function_calling(proxy_model)
|
|
|
|
# Both should match the expected result
|
|
assert (
|
|
direct_result == expected_result
|
|
), f"Direct model {direct_model} should return {expected_result}"
|
|
assert (
|
|
proxy_result == expected_result
|
|
), f"Proxy model {proxy_model} should return {expected_result}"
|
|
|
|
# Direct and proxy should be consistent
|
|
assert (
|
|
direct_result == proxy_result
|
|
), f"Mismatch: {direct_model}={direct_result} vs {proxy_model}={proxy_result}"
|
|
|
|
@pytest.mark.parametrize(
|
|
"proxy_model_name,underlying_model,expected_proxy_result",
|
|
[
|
|
# Custom model names that cannot be resolved without proxy configuration context
|
|
# These will return False because LiteLLM cannot determine the underlying model
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-haiku",
|
|
"bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-sonnet",
|
|
"bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-opus",
|
|
"bedrock/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-instant",
|
|
"bedrock/anthropic.claude-instant-v1",
|
|
False,
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-titan-text",
|
|
"bedrock/amazon.titan-text-express-v1",
|
|
False,
|
|
),
|
|
# Azure with custom deployment names (cannot be resolved)
|
|
("litellm_proxy/my-gpt4-deployment", "azure/gpt-4", False),
|
|
("litellm_proxy/production-gpt35", "azure/gpt-3.5-turbo", False),
|
|
("litellm_proxy/dev-gpt4o", "azure/gpt-4o", False),
|
|
# Custom OpenAI deployments (cannot be resolved)
|
|
("litellm_proxy/company-gpt4", "gpt-4", False),
|
|
("litellm_proxy/internal-gpt35", "gpt-3.5-turbo", False),
|
|
# Vertex AI with custom names (cannot be resolved)
|
|
("litellm_proxy/vertex-gemini-pro", "vertex_ai/gemini-1.5-pro", False),
|
|
("litellm_proxy/vertex-gemini-flash", "vertex_ai/gemini-1.5-flash", False),
|
|
# Anthropic with custom names (cannot be resolved)
|
|
("litellm_proxy/claude-prod", "anthropic/claude-3-sonnet-20240229", False),
|
|
("litellm_proxy/claude-dev", "anthropic/claude-3-haiku-20240307", False),
|
|
# Groq with custom names (cannot be resolved)
|
|
("litellm_proxy/fast-llama", "groq/llama-3.1-8b-instant", False),
|
|
("litellm_proxy/groq-gemma", "groq/gemma-7b-it", False),
|
|
# Cohere with custom names (cannot be resolved)
|
|
("litellm_proxy/cohere-command", "cohere/command-r", False),
|
|
("litellm_proxy/cohere-command-plus", "cohere/command-r-plus", False),
|
|
# Together AI with custom names (cannot be resolved)
|
|
(
|
|
"litellm_proxy/together-llama",
|
|
"together_ai/meta-llama/Llama-2-70b-chat-hf",
|
|
False,
|
|
),
|
|
(
|
|
"litellm_proxy/together-mistral",
|
|
"together_ai/mistralai/Mistral-7B-Instruct-v0.1",
|
|
False,
|
|
),
|
|
# Ollama with custom names (cannot be resolved)
|
|
("litellm_proxy/local-llama", "ollama/llama2", False),
|
|
("litellm_proxy/local-mistral", "ollama/mistral", False),
|
|
],
|
|
)
|
|
def test_proxy_custom_model_names_without_config(
|
|
self, proxy_model_name, underlying_model, expected_proxy_result
|
|
):
|
|
"""
|
|
Test proxy models with custom model names that differ from underlying models.
|
|
|
|
Without proxy configuration context, LiteLLM cannot resolve custom model names
|
|
to their underlying models, so these will return False.
|
|
This demonstrates the limitation and documents the expected behavior.
|
|
"""
|
|
# Test the underlying model directly first to establish what it SHOULD return
|
|
try:
|
|
underlying_result = supports_function_calling(underlying_model)
|
|
print(
|
|
f"Underlying model {underlying_model} supports function calling: {underlying_result}"
|
|
)
|
|
except Exception as e:
|
|
print(f"Warning: Could not test underlying model {underlying_model}: {e}")
|
|
|
|
# Test the proxy model - this will return False due to lack of configuration context
|
|
proxy_result = supports_function_calling(proxy_model_name)
|
|
assert (
|
|
proxy_result == expected_proxy_result
|
|
), f"Proxy model {proxy_model_name} should return {expected_proxy_result} (without config context)"
|
|
|
|
def test_proxy_model_resolution_with_custom_names_documentation(self):
|
|
"""
|
|
Document the behavior and limitation for custom proxy model names.
|
|
|
|
This test demonstrates:
|
|
1. The current limitation with custom model names
|
|
2. How the proxy server would handle this in production
|
|
3. The expected behavior for both scenarios
|
|
"""
|
|
# Case 1: Custom model name that cannot be resolved
|
|
custom_model = "litellm_proxy/my-custom-claude"
|
|
result = supports_function_calling(custom_model)
|
|
assert (
|
|
result is False
|
|
), "Custom model names return False without proxy config context"
|
|
|
|
# Case 2: Model name that can be resolved (matches pattern)
|
|
resolvable_model = "litellm_proxy/claude-sonnet-4-5-20250929"
|
|
result = supports_function_calling(resolvable_model)
|
|
assert result is True, "Resolvable model names work with fallback logic"
|
|
|
|
# Documentation notes:
|
|
print(
|
|
"""
|
|
PROXY MODEL RESOLUTION BEHAVIOR:
|
|
|
|
✅ WORKS (with current fallback logic):
|
|
- litellm_proxy/gpt-4
|
|
- litellm_proxy/claude-sonnet-4-5-20250929
|
|
- litellm_proxy/anthropic/claude-3-haiku-20240307
|
|
|
|
❌ DOESN'T WORK (requires proxy server config):
|
|
- litellm_proxy/my-custom-gpt4
|
|
- litellm_proxy/bedrock-claude-3-haiku
|
|
- litellm_proxy/production-model
|
|
|
|
💡 SOLUTION: Use LiteLLM proxy server with proper model_list configuration
|
|
that maps custom names to underlying models.
|
|
"""
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
"proxy_model_with_hints,expected_result",
|
|
[
|
|
# These are proxy models where we can infer the underlying model from the name
|
|
("litellm_proxy/gpt-4-with-functions", True), # Hints at GPT-4
|
|
("litellm_proxy/claude-3-haiku-prod", True), # Hints at Claude 3 Haiku
|
|
(
|
|
"litellm_proxy/bedrock-anthropic-claude-3-sonnet",
|
|
True,
|
|
), # Hints at Bedrock Claude 3 Sonnet
|
|
],
|
|
)
|
|
def test_proxy_models_with_naming_hints(
|
|
self, proxy_model_with_hints, expected_result
|
|
):
|
|
"""
|
|
Test proxy models with names that provide hints about the underlying model.
|
|
|
|
Note: These will currently fail because the hint-based resolution isn't implemented yet,
|
|
but they demonstrate what could be possible with enhanced model name inference.
|
|
"""
|
|
# This test documents potential future enhancement
|
|
proxy_result = supports_function_calling(proxy_model_with_hints)
|
|
|
|
# Currently these will return False, but we document the expected behavior
|
|
# In the future, we could implement smarter model name inference
|
|
print(
|
|
f"Model {proxy_model_with_hints}: current={proxy_result}, desired={expected_result}"
|
|
)
|
|
|
|
# For now, we expect False (current behavior), but document the limitation
|
|
assert (
|
|
proxy_result is False
|
|
), f"Current limitation: {proxy_model_with_hints} returns False without inference"
|
|
|
|
@pytest.mark.parametrize(
|
|
"proxy_model,expected_result",
|
|
[
|
|
# Test specific proxy models that should support function calling
|
|
("litellm_proxy/gpt-3.5-turbo", True),
|
|
("litellm_proxy/gpt-4", True),
|
|
("litellm_proxy/gpt-4o", True),
|
|
("litellm_proxy/claude-sonnet-4-6", True),
|
|
("litellm_proxy/gemini/gemini-2.5-pro", True),
|
|
# Test proxy models that should not support function calling
|
|
("litellm_proxy/command-nightly", False),
|
|
("litellm_proxy/anthropic.claude-instant-v1", False),
|
|
],
|
|
)
|
|
def test_proxy_only_function_calling_support(self, proxy_model, expected_result):
|
|
"""
|
|
Test proxy models independently to ensure they report correct function calling support.
|
|
|
|
This test focuses on proxy models without comparing to direct models,
|
|
useful for cases where we only care about the proxy behavior.
|
|
"""
|
|
try:
|
|
result = supports_function_calling(model=proxy_model)
|
|
assert (
|
|
result == expected_result
|
|
), f"Proxy model {proxy_model} returned {result}, expected {expected_result}"
|
|
except Exception as e:
|
|
pytest.fail(f"Error testing proxy model {proxy_model}: {e}")
|
|
|
|
def test_litellm_utils_supports_function_calling_import(self):
|
|
"""Test that supports_function_calling can be imported from litellm.utils."""
|
|
try:
|
|
from litellm.utils import supports_function_calling
|
|
|
|
assert callable(supports_function_calling)
|
|
except ImportError as e:
|
|
pytest.fail(f"Failed to import supports_function_calling: {e}")
|
|
|
|
def test_litellm_supports_function_calling_import(self):
|
|
"""Test that supports_function_calling can be imported from litellm directly."""
|
|
try:
|
|
import litellm
|
|
|
|
assert hasattr(litellm, "supports_function_calling")
|
|
assert callable(litellm.supports_function_calling)
|
|
except Exception as e:
|
|
pytest.fail(f"Failed to access litellm.supports_function_calling: {e}")
|
|
|
|
@pytest.mark.parametrize(
|
|
"model_name",
|
|
[
|
|
"litellm_proxy/gpt-3.5-turbo",
|
|
"litellm_proxy/gpt-4",
|
|
"litellm_proxy/claude-sonnet-4-6",
|
|
"litellm_proxy/gemini/gemini-2.5-pro",
|
|
],
|
|
)
|
|
def test_proxy_model_with_custom_llm_provider_none(self, model_name):
|
|
"""
|
|
Test proxy models with custom_llm_provider=None parameter.
|
|
|
|
This tests the supports_function_calling function with the custom_llm_provider
|
|
parameter explicitly set to None, which is a common usage pattern.
|
|
"""
|
|
try:
|
|
result = supports_function_calling(
|
|
model=model_name, custom_llm_provider=None
|
|
)
|
|
# All the models in this test should support function calling
|
|
assert (
|
|
result is True
|
|
), f"Model {model_name} should support function calling but returned {result}"
|
|
except Exception as e:
|
|
pytest.fail(
|
|
f"Error testing {model_name} with custom_llm_provider=None: {e}"
|
|
)
|
|
|
|
def test_edge_cases_and_malformed_proxy_models(self):
|
|
"""Test edge cases and malformed proxy model names."""
|
|
test_cases = [
|
|
("litellm_proxy/", False), # Empty model name after proxy prefix
|
|
("litellm_proxy", False), # Just the proxy prefix without slash
|
|
("litellm_proxy//gpt-3.5-turbo", False), # Double slash
|
|
("litellm_proxy/nonexistent-model", False), # Non-existent model
|
|
]
|
|
|
|
for model_name, expected_result in test_cases:
|
|
try:
|
|
result = supports_function_calling(model=model_name)
|
|
# For malformed models, we expect False or the function to handle gracefully
|
|
assert (
|
|
result == expected_result
|
|
), f"Edge case {model_name} returned {result}, expected {expected_result}"
|
|
except Exception:
|
|
# It's acceptable for malformed model names to raise exceptions
|
|
# rather than returning False, as long as they're handled gracefully
|
|
pass
|
|
|
|
def test_proxy_model_resolution_demonstration(self):
|
|
"""
|
|
Demonstration test showing the current issue with proxy model resolution.
|
|
|
|
This test documents the current behavior and can be used to verify
|
|
when the issue is fixed.
|
|
"""
|
|
direct_model = "gpt-3.5-turbo"
|
|
proxy_model = "litellm_proxy/gpt-3.5-turbo"
|
|
|
|
direct_result = supports_function_calling(model=direct_model)
|
|
proxy_result = supports_function_calling(model=proxy_model)
|
|
|
|
print(f"\nDemonstration of proxy model resolution:")
|
|
print(
|
|
f"Direct model '{direct_model}' supports function calling: {direct_result}"
|
|
)
|
|
print(f"Proxy model '{proxy_model}' supports function calling: {proxy_result}")
|
|
|
|
# This assertion will currently fail due to the bug
|
|
# When the bug is fixed, this test should pass
|
|
if direct_result != proxy_result:
|
|
pytest.skip(
|
|
f"Known issue: Proxy model resolution inconsistency. "
|
|
f"Direct: {direct_result}, Proxy: {proxy_result}. "
|
|
f"This test will pass when the issue is resolved."
|
|
)
|
|
|
|
assert direct_result == proxy_result, (
|
|
f"Proxy model resolution issue: {direct_model} -> {direct_result}, "
|
|
f"{proxy_model} -> {proxy_result}"
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
"proxy_model_name,underlying_bedrock_model,expected_proxy_result,description",
|
|
[
|
|
# Bedrock Converse API mappings - these are the real-world scenarios
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-haiku",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Haiku via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Sonnet via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-opus",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Opus via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-5-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
|
False,
|
|
"Bedrock Claude 3.5 Sonnet via Converse API",
|
|
),
|
|
# Bedrock Legacy API mappings (non-converse)
|
|
(
|
|
"litellm_proxy/bedrock-claude-instant",
|
|
"bedrock/anthropic.claude-instant-v1",
|
|
False,
|
|
"Bedrock Claude Instant Legacy API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-v2",
|
|
"bedrock/anthropic.claude-v2",
|
|
False,
|
|
"Bedrock Claude v2 Legacy API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-v2-1",
|
|
"bedrock/anthropic.claude-v2:1",
|
|
False,
|
|
"Bedrock Claude v2.1 Legacy API",
|
|
),
|
|
# Bedrock other model providers via Converse API
|
|
(
|
|
"litellm_proxy/bedrock-titan-text",
|
|
"bedrock/converse/amazon.titan-text-express-v1",
|
|
False,
|
|
"Bedrock Titan Text Express via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-titan-text-premier",
|
|
"bedrock/converse/amazon.titan-text-premier-v1:0",
|
|
False,
|
|
"Bedrock Titan Text Premier via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-llama3-8b",
|
|
"bedrock/converse/meta.llama3-8b-instruct-v1:0",
|
|
False,
|
|
"Bedrock Llama 3 8B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-llama3-70b",
|
|
"bedrock/converse/meta.llama3-70b-instruct-v1:0",
|
|
False,
|
|
"Bedrock Llama 3 70B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-7b",
|
|
"bedrock/converse/mistral.mistral-7b-instruct-v0:2",
|
|
False,
|
|
"Bedrock Mistral 7B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-8x7b",
|
|
"bedrock/converse/mistral.mixtral-8x7b-instruct-v0:1",
|
|
False,
|
|
"Bedrock Mistral 8x7B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-large",
|
|
"bedrock/converse/mistral.mistral-large-2402-v1:0",
|
|
False,
|
|
"Bedrock Mistral Large via Converse API",
|
|
),
|
|
# Company-specific naming patterns (real-world examples)
|
|
(
|
|
"litellm_proxy/prod-claude-haiku",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Production Claude Haiku",
|
|
),
|
|
(
|
|
"litellm_proxy/dev-claude-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"Development Claude Sonnet",
|
|
),
|
|
(
|
|
"litellm_proxy/staging-claude-opus",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"Staging Claude Opus",
|
|
),
|
|
(
|
|
"litellm_proxy/cost-optimized-claude",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Cost-optimized Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/high-performance-claude",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"High-performance Claude deployment",
|
|
),
|
|
# Regional deployment examples
|
|
(
|
|
"litellm_proxy/us-east-claude",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"US East Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/eu-west-claude",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"EU West Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/ap-south-llama",
|
|
"bedrock/converse/meta.llama3-70b-instruct-v1:0",
|
|
False,
|
|
"Asia Pacific Llama deployment",
|
|
),
|
|
],
|
|
)
|
|
def test_bedrock_converse_api_proxy_mappings(
|
|
self,
|
|
proxy_model_name,
|
|
underlying_bedrock_model,
|
|
expected_proxy_result,
|
|
description,
|
|
):
|
|
"""
|
|
Test real-world Bedrock Converse API proxy model mappings.
|
|
|
|
This test covers the specific scenario where proxy model names like
|
|
'bedrock-claude-3-haiku' map to underlying Bedrock Converse API models like
|
|
'bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0'.
|
|
|
|
These mappings are typically defined in proxy server configuration files
|
|
and cannot be resolved by LiteLLM without that context.
|
|
"""
|
|
print(f"\nTesting: {description}")
|
|
print(f" Proxy model: {proxy_model_name}")
|
|
print(f" Underlying model: {underlying_bedrock_model}")
|
|
|
|
# Test the underlying model directly to verify it supports function calling
|
|
try:
|
|
underlying_result = supports_function_calling(underlying_bedrock_model)
|
|
print(f" Underlying model function calling support: {underlying_result}")
|
|
|
|
# Most Bedrock Converse API models with Anthropic Claude should support function calling
|
|
if "anthropic.claude-3" in underlying_bedrock_model:
|
|
assert (
|
|
underlying_result is True
|
|
), f"Claude 3 models should support function calling: {underlying_bedrock_model}"
|
|
except Exception as e:
|
|
print(
|
|
f" Warning: Could not test underlying model {underlying_bedrock_model}: {e}"
|
|
)
|
|
|
|
# Test the proxy model - should return False due to lack of configuration context
|
|
proxy_result = supports_function_calling(proxy_model_name)
|
|
print(f" Proxy model function calling support: {proxy_result}")
|
|
|
|
assert proxy_result == expected_proxy_result, (
|
|
f"Proxy model {proxy_model_name} should return {expected_proxy_result} "
|
|
f"(without config context). Description: {description}"
|
|
)
|
|
|
|
def test_real_world_proxy_config_documentation(self):
|
|
"""
|
|
Document how real-world proxy configurations would handle model mappings.
|
|
|
|
This test provides documentation on how the proxy server configuration
|
|
would typically map custom model names to underlying models.
|
|
"""
|
|
print(
|
|
"""
|
|
|
|
REAL-WORLD PROXY SERVER CONFIGURATION EXAMPLE:
|
|
===============================================
|
|
|
|
In a proxy_server_config.yaml file, you would define:
|
|
|
|
model_list:
|
|
- model_name: bedrock-claude-3-haiku
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-east-1
|
|
|
|
- model_name: bedrock-claude-3-sonnet
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0
|
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-east-1
|
|
|
|
- model_name: prod-claude-haiku
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
aws_access_key_id: os.environ/PROD_AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/PROD_AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-west-2
|
|
|
|
|
|
FUNCTION CALLING WITH PROXY SERVER:
|
|
===================================
|
|
|
|
When using the proxy server with this configuration:
|
|
|
|
1. Client calls: supports_function_calling("bedrock-claude-3-haiku")
|
|
2. Proxy server resolves to: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
3. LiteLLM evaluates the underlying model's capabilities
|
|
4. Returns: True (because Claude 3 Haiku supports function calling)
|
|
|
|
Without the proxy server configuration context, LiteLLM cannot resolve
|
|
the custom model name and returns False.
|
|
|
|
|
|
BEDROCK CONVERSE API BENEFITS:
|
|
==============================
|
|
|
|
The Bedrock Converse API provides:
|
|
- Standardized function calling interface across providers
|
|
- Better tool use capabilities compared to legacy APIs
|
|
- Consistent request/response format
|
|
- Enhanced streaming support for function calls
|
|
|
|
"""
|
|
)
|
|
|
|
# Verify that direct underlying models work as expected
|
|
bedrock_models = [
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
]
|
|
|
|
for model in bedrock_models:
|
|
try:
|
|
result = supports_function_calling(model)
|
|
print(f"Direct test - {model}: {result}")
|
|
# Claude 3 models should support function calling
|
|
assert (
|
|
result is True
|
|
), f"Claude 3 model should support function calling: {model}"
|
|
except Exception as e:
|
|
print(f"Could not test {model}: {e}")
|
|
|
|
@pytest.mark.parametrize(
|
|
"proxy_model_name,underlying_bedrock_model,expected_proxy_result,description",
|
|
[
|
|
# Bedrock Converse API mappings - these are the real-world scenarios
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-haiku",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Haiku via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Sonnet via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-opus",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Opus via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-5-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
|
False,
|
|
"Bedrock Claude 3.5 Sonnet via Converse API",
|
|
),
|
|
# Bedrock Legacy API mappings (non-converse)
|
|
(
|
|
"litellm_proxy/bedrock-claude-instant",
|
|
"bedrock/anthropic.claude-instant-v1",
|
|
False,
|
|
"Bedrock Claude Instant Legacy API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-v2",
|
|
"bedrock/anthropic.claude-v2",
|
|
False,
|
|
"Bedrock Claude v2 Legacy API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-v2-1",
|
|
"bedrock/anthropic.claude-v2:1",
|
|
False,
|
|
"Bedrock Claude v2.1 Legacy API",
|
|
),
|
|
# Bedrock other model providers via Converse API
|
|
(
|
|
"litellm_proxy/bedrock-titan-text",
|
|
"bedrock/converse/amazon.titan-text-express-v1",
|
|
False,
|
|
"Bedrock Titan Text Express via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-titan-text-premier",
|
|
"bedrock/converse/amazon.titan-text-premier-v1:0",
|
|
False,
|
|
"Bedrock Titan Text Premier via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-llama3-8b",
|
|
"bedrock/converse/meta.llama3-8b-instruct-v1:0",
|
|
False,
|
|
"Bedrock Llama 3 8B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-llama3-70b",
|
|
"bedrock/converse/meta.llama3-70b-instruct-v1:0",
|
|
False,
|
|
"Bedrock Llama 3 70B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-7b",
|
|
"bedrock/converse/mistral.mistral-7b-instruct-v0:2",
|
|
False,
|
|
"Bedrock Mistral 7B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-8x7b",
|
|
"bedrock/converse/mistral.mixtral-8x7b-instruct-v0:1",
|
|
False,
|
|
"Bedrock Mistral 8x7B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-large",
|
|
"bedrock/converse/mistral.mistral-large-2402-v1:0",
|
|
False,
|
|
"Bedrock Mistral Large via Converse API",
|
|
),
|
|
# Company-specific naming patterns (real-world examples)
|
|
(
|
|
"litellm_proxy/prod-claude-haiku",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Production Claude Haiku",
|
|
),
|
|
(
|
|
"litellm_proxy/dev-claude-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"Development Claude Sonnet",
|
|
),
|
|
(
|
|
"litellm_proxy/staging-claude-opus",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"Staging Claude Opus",
|
|
),
|
|
(
|
|
"litellm_proxy/cost-optimized-claude",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Cost-optimized Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/high-performance-claude",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"High-performance Claude deployment",
|
|
),
|
|
# Regional deployment examples
|
|
(
|
|
"litellm_proxy/us-east-claude",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"US East Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/eu-west-claude",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"EU West Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/ap-south-llama",
|
|
"bedrock/converse/meta.llama3-70b-instruct-v1:0",
|
|
False,
|
|
"Asia Pacific Llama deployment",
|
|
),
|
|
],
|
|
)
|
|
def test_bedrock_converse_api_proxy_mappings(
|
|
self,
|
|
proxy_model_name,
|
|
underlying_bedrock_model,
|
|
expected_proxy_result,
|
|
description,
|
|
):
|
|
"""
|
|
Test real-world Bedrock Converse API proxy model mappings.
|
|
|
|
This test covers the specific scenario where proxy model names like
|
|
'bedrock-claude-3-haiku' map to underlying Bedrock Converse API models like
|
|
'bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0'.
|
|
|
|
These mappings are typically defined in proxy server configuration files
|
|
and cannot be resolved by LiteLLM without that context.
|
|
"""
|
|
print(f"\nTesting: {description}")
|
|
print(f" Proxy model: {proxy_model_name}")
|
|
print(f" Underlying model: {underlying_bedrock_model}")
|
|
|
|
# Test the underlying model directly to verify it supports function calling
|
|
try:
|
|
underlying_result = supports_function_calling(underlying_bedrock_model)
|
|
print(f" Underlying model function calling support: {underlying_result}")
|
|
|
|
# Most Bedrock Converse API models with Anthropic Claude should support function calling
|
|
if "anthropic.claude-3" in underlying_bedrock_model:
|
|
assert (
|
|
underlying_result is True
|
|
), f"Claude 3 models should support function calling: {underlying_bedrock_model}"
|
|
except Exception as e:
|
|
print(
|
|
f" Warning: Could not test underlying model {underlying_bedrock_model}: {e}"
|
|
)
|
|
|
|
# Test the proxy model - should return False due to lack of configuration context
|
|
proxy_result = supports_function_calling(proxy_model_name)
|
|
print(f" Proxy model function calling support: {proxy_result}")
|
|
|
|
assert proxy_result == expected_proxy_result, (
|
|
f"Proxy model {proxy_model_name} should return {expected_proxy_result} "
|
|
f"(without config context). Description: {description}"
|
|
)
|
|
|
|
def test_real_world_proxy_config_documentation(self):
|
|
"""
|
|
Document how real-world proxy configurations would handle model mappings.
|
|
|
|
This test provides documentation on how the proxy server configuration
|
|
would typically map custom model names to underlying models.
|
|
"""
|
|
print(
|
|
"""
|
|
|
|
REAL-WORLD PROXY SERVER CONFIGURATION EXAMPLE:
|
|
===============================================
|
|
|
|
In a proxy_server_config.yaml file, you would define:
|
|
|
|
model_list:
|
|
- model_name: bedrock-claude-3-haiku
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-east-1
|
|
|
|
- model_name: bedrock-claude-3-sonnet
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0
|
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-east-1
|
|
|
|
- model_name: prod-claude-haiku
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
aws_access_key_id: os.environ/PROD_AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/PROD_AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-west-2
|
|
|
|
|
|
FUNCTION CALLING WITH PROXY SERVER:
|
|
===================================
|
|
|
|
When using the proxy server with this configuration:
|
|
|
|
1. Client calls: supports_function_calling("bedrock-claude-3-haiku")
|
|
2. Proxy server resolves to: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
3. LiteLLM evaluates the underlying model's capabilities
|
|
4. Returns: True (because Claude 3 Haiku supports function calling)
|
|
|
|
Without the proxy server configuration context, LiteLLM cannot resolve
|
|
the custom model name and returns False.
|
|
|
|
|
|
BEDROCK CONVERSE API BENEFITS:
|
|
==============================
|
|
|
|
The Bedrock Converse API provides:
|
|
- Standardized function calling interface across providers
|
|
- Better tool use capabilities compared to legacy APIs
|
|
- Consistent request/response format
|
|
- Enhanced streaming support for function calls
|
|
|
|
"""
|
|
)
|
|
|
|
# Verify that direct underlying models work as expected
|
|
bedrock_models = [
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
]
|
|
|
|
for model in bedrock_models:
|
|
try:
|
|
result = supports_function_calling(model)
|
|
print(f"Direct test - {model}: {result}")
|
|
# Claude 3 models should support function calling
|
|
assert (
|
|
result is True
|
|
), f"Claude 3 model should support function calling: {model}"
|
|
except Exception as e:
|
|
print(f"Could not test {model}: {e}")
|
|
|
|
@pytest.mark.parametrize(
|
|
"proxy_model_name,underlying_bedrock_model,expected_proxy_result,description",
|
|
[
|
|
# Bedrock Converse API mappings - these are the real-world scenarios
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-haiku",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Haiku via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Sonnet via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-opus",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"Bedrock Claude 3 Opus via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-3-5-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
|
False,
|
|
"Bedrock Claude 3.5 Sonnet via Converse API",
|
|
),
|
|
# Bedrock Legacy API mappings (non-converse)
|
|
(
|
|
"litellm_proxy/bedrock-claude-instant",
|
|
"bedrock/anthropic.claude-instant-v1",
|
|
False,
|
|
"Bedrock Claude Instant Legacy API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-v2",
|
|
"bedrock/anthropic.claude-v2",
|
|
False,
|
|
"Bedrock Claude v2 Legacy API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-claude-v2-1",
|
|
"bedrock/anthropic.claude-v2:1",
|
|
False,
|
|
"Bedrock Claude v2.1 Legacy API",
|
|
),
|
|
# Bedrock other model providers via Converse API
|
|
(
|
|
"litellm_proxy/bedrock-titan-text",
|
|
"bedrock/converse/amazon.titan-text-express-v1",
|
|
False,
|
|
"Bedrock Titan Text Express via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-titan-text-premier",
|
|
"bedrock/converse/amazon.titan-text-premier-v1:0",
|
|
False,
|
|
"Bedrock Titan Text Premier via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-llama3-8b",
|
|
"bedrock/converse/meta.llama3-8b-instruct-v1:0",
|
|
False,
|
|
"Bedrock Llama 3 8B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-llama3-70b",
|
|
"bedrock/converse/meta.llama3-70b-instruct-v1:0",
|
|
False,
|
|
"Bedrock Llama 3 70B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-7b",
|
|
"bedrock/converse/mistral.mistral-7b-instruct-v0:2",
|
|
False,
|
|
"Bedrock Mistral 7B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-8x7b",
|
|
"bedrock/converse/mistral.mixtral-8x7b-instruct-v0:1",
|
|
False,
|
|
"Bedrock Mistral 8x7B via Converse API",
|
|
),
|
|
(
|
|
"litellm_proxy/bedrock-mistral-large",
|
|
"bedrock/converse/mistral.mistral-large-2402-v1:0",
|
|
False,
|
|
"Bedrock Mistral Large via Converse API",
|
|
),
|
|
# Company-specific naming patterns (real-world examples)
|
|
(
|
|
"litellm_proxy/prod-claude-haiku",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Production Claude Haiku",
|
|
),
|
|
(
|
|
"litellm_proxy/dev-claude-sonnet",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"Development Claude Sonnet",
|
|
),
|
|
(
|
|
"litellm_proxy/staging-claude-opus",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"Staging Claude Opus",
|
|
),
|
|
(
|
|
"litellm_proxy/cost-optimized-claude",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"Cost-optimized Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/high-performance-claude",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
False,
|
|
"High-performance Claude deployment",
|
|
),
|
|
# Regional deployment examples
|
|
(
|
|
"litellm_proxy/us-east-claude",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
False,
|
|
"US East Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/eu-west-claude",
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
False,
|
|
"EU West Claude deployment",
|
|
),
|
|
(
|
|
"litellm_proxy/ap-south-llama",
|
|
"bedrock/converse/meta.llama3-70b-instruct-v1:0",
|
|
False,
|
|
"Asia Pacific Llama deployment",
|
|
),
|
|
],
|
|
)
|
|
def test_bedrock_converse_api_proxy_mappings(
|
|
self,
|
|
proxy_model_name,
|
|
underlying_bedrock_model,
|
|
expected_proxy_result,
|
|
description,
|
|
):
|
|
"""
|
|
Test real-world Bedrock Converse API proxy model mappings.
|
|
|
|
This test covers the specific scenario where proxy model names like
|
|
'bedrock-claude-3-haiku' map to underlying Bedrock Converse API models like
|
|
'bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0'.
|
|
|
|
These mappings are typically defined in proxy server configuration files
|
|
and cannot be resolved by LiteLLM without that context.
|
|
"""
|
|
print(f"\nTesting: {description}")
|
|
print(f" Proxy model: {proxy_model_name}")
|
|
print(f" Underlying model: {underlying_bedrock_model}")
|
|
|
|
# Test the underlying model directly to verify it supports function calling
|
|
try:
|
|
underlying_result = supports_function_calling(underlying_bedrock_model)
|
|
print(f" Underlying model function calling support: {underlying_result}")
|
|
|
|
# Most Bedrock Converse API models with Anthropic Claude should support function calling
|
|
if "anthropic.claude-3" in underlying_bedrock_model:
|
|
assert (
|
|
underlying_result is True
|
|
), f"Claude 3 models should support function calling: {underlying_bedrock_model}"
|
|
except Exception as e:
|
|
print(
|
|
f" Warning: Could not test underlying model {underlying_bedrock_model}: {e}"
|
|
)
|
|
|
|
# Test the proxy model - should return False due to lack of configuration context
|
|
proxy_result = supports_function_calling(proxy_model_name)
|
|
print(f" Proxy model function calling support: {proxy_result}")
|
|
|
|
assert proxy_result == expected_proxy_result, (
|
|
f"Proxy model {proxy_model_name} should return {expected_proxy_result} "
|
|
f"(without config context). Description: {description}"
|
|
)
|
|
|
|
def test_real_world_proxy_config_documentation(self):
|
|
"""
|
|
Document how real-world proxy configurations would handle model mappings.
|
|
|
|
This test provides documentation on how the proxy server configuration
|
|
would typically map custom model names to underlying models.
|
|
"""
|
|
print(
|
|
"""
|
|
|
|
REAL-WORLD PROXY SERVER CONFIGURATION EXAMPLE:
|
|
===============================================
|
|
|
|
In a proxy_server_config.yaml file, you would define:
|
|
|
|
model_list:
|
|
- model_name: bedrock-claude-3-haiku
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-east-1
|
|
|
|
- model_name: bedrock-claude-3-sonnet
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0
|
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-east-1
|
|
|
|
- model_name: prod-claude-haiku
|
|
litellm_params:
|
|
model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
aws_access_key_id: os.environ/PROD_AWS_ACCESS_KEY_ID
|
|
aws_secret_access_key: os.environ/PROD_AWS_SECRET_ACCESS_KEY
|
|
aws_region_name: us-west-2
|
|
|
|
|
|
FUNCTION CALLING WITH PROXY SERVER:
|
|
===================================
|
|
|
|
When using the proxy server with this configuration:
|
|
|
|
1. Client calls: supports_function_calling("bedrock-claude-3-haiku")
|
|
2. Proxy server resolves to: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
|
|
3. LiteLLM evaluates the underlying model's capabilities
|
|
4. Returns: True (because Claude 3 Haiku supports function calling)
|
|
|
|
Without the proxy server configuration context, LiteLLM cannot resolve
|
|
the custom model name and returns False.
|
|
|
|
|
|
BEDROCK CONVERSE API BENEFITS:
|
|
==============================
|
|
|
|
The Bedrock Converse API provides:
|
|
- Standardized function calling interface across providers
|
|
- Better tool use capabilities compared to legacy APIs
|
|
- Consistent request/response format
|
|
- Enhanced streaming support for function calls
|
|
|
|
"""
|
|
)
|
|
|
|
# Verify that direct underlying models work as expected
|
|
bedrock_models = [
|
|
"bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
|
|
"bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
"bedrock/converse/anthropic.claude-3-opus-20240229-v1:0",
|
|
]
|
|
|
|
for model in bedrock_models:
|
|
try:
|
|
result = supports_function_calling(model)
|
|
print(f"Direct test - {model}: {result}")
|
|
# Claude 3 models should support function calling
|
|
assert (
|
|
result is True
|
|
), f"Claude 3 model should support function calling: {model}"
|
|
except Exception as e:
|
|
print(f"Could not test {model}: {e}")
|
|
|
|
|
|
def test_register_model_with_scientific_notation():
|
|
"""
|
|
Test that the register_model function can handle scientific notation in the model name.
|
|
"""
|
|
import uuid
|
|
|
|
# Use a truly unique model name with uuid to avoid conflicts when tests run in parallel
|
|
test_model_name = f"test-scientific-notation-model-{uuid.uuid4().hex[:12]}"
|
|
|
|
# Clear LRU caches that might have stale data
|
|
from litellm.utils import (
|
|
_invalidate_model_cost_lowercase_map,
|
|
)
|
|
|
|
_invalidate_model_cost_lowercase_map()
|
|
|
|
model_cost_dict = {
|
|
test_model_name: {
|
|
"max_tokens": 8192,
|
|
"input_cost_per_token": "3e-07",
|
|
"output_cost_per_token": "6e-07",
|
|
"litellm_provider": "openai",
|
|
"mode": "chat",
|
|
},
|
|
}
|
|
|
|
litellm.register_model(model_cost_dict)
|
|
|
|
registered_model = litellm.model_cost[test_model_name]
|
|
print(registered_model)
|
|
assert registered_model["input_cost_per_token"] == 3e-07
|
|
assert registered_model["output_cost_per_token"] == 6e-07
|
|
assert registered_model["litellm_provider"] == "openai"
|
|
assert registered_model["mode"] == "chat"
|
|
|
|
# Clean up after test
|
|
if test_model_name in litellm.model_cost:
|
|
del litellm.model_cost[test_model_name]
|
|
_invalidate_model_cost_lowercase_map()
|
|
|
|
|
|
def test_register_model_openrouter_without_slash():
|
|
"""
|
|
Test that register_model handles openrouter models without '/' in the name.
|
|
|
|
Fixes https://github.com/BerriAI/litellm/issues/18936
|
|
|
|
Previously, the code did `split_string[1]` which would fail with IndexError
|
|
when the model name didn't contain '/'. Now it uses `split_string[-1]` which
|
|
always works.
|
|
"""
|
|
# Clear any existing entries
|
|
litellm.openrouter_models.discard("my-custom-alias")
|
|
litellm.openrouter_models.discard("gpt-4")
|
|
litellm.openrouter_models.discard("openai/gpt-4")
|
|
|
|
# Test 1: Model name without '/' (this was the bug - would raise IndexError)
|
|
litellm.register_model(
|
|
{
|
|
"my-custom-alias": {
|
|
"max_tokens": 8192,
|
|
"input_cost_per_token": 0.00001,
|
|
"output_cost_per_token": 0.00002,
|
|
"litellm_provider": "openrouter",
|
|
"mode": "chat",
|
|
},
|
|
}
|
|
)
|
|
assert "my-custom-alias" in litellm.openrouter_models
|
|
|
|
# Test 2: Model name with single '/' (openrouter/model format)
|
|
litellm.register_model(
|
|
{
|
|
"openrouter/gpt-4": {
|
|
"max_tokens": 8192,
|
|
"input_cost_per_token": 0.00001,
|
|
"output_cost_per_token": 0.00002,
|
|
"litellm_provider": "openrouter",
|
|
"mode": "chat",
|
|
},
|
|
}
|
|
)
|
|
assert "gpt-4" in litellm.openrouter_models
|
|
|
|
# Test 3: Model name with double '/' (openrouter/provider/model format)
|
|
litellm.register_model(
|
|
{
|
|
"openrouter/openai/gpt-4-turbo": {
|
|
"max_tokens": 8192,
|
|
"input_cost_per_token": 0.00001,
|
|
"output_cost_per_token": 0.00002,
|
|
"litellm_provider": "openrouter",
|
|
"mode": "chat",
|
|
},
|
|
}
|
|
)
|
|
assert "openai/gpt-4-turbo" in litellm.openrouter_models
|
|
|
|
|
|
def test_reasoning_content_preserved_in_text_completion_wrapper():
|
|
"""Ensure reasoning_content is copied from delta to text_choices."""
|
|
chunk = ModelResponseStream(
|
|
id="test-id",
|
|
created=1234567890,
|
|
model="test-model",
|
|
object="chat.completion.chunk",
|
|
choices=[
|
|
StreamingChoices(
|
|
finish_reason=None,
|
|
index=0,
|
|
delta=Delta(
|
|
content="Some answer text",
|
|
role="assistant",
|
|
reasoning_content="Here's my chain of thought...",
|
|
),
|
|
)
|
|
],
|
|
)
|
|
|
|
wrapper = TextCompletionStreamWrapper(
|
|
completion_stream=None, # Not used in convert_to_text_completion_object
|
|
model="test-model",
|
|
stream_options=None,
|
|
)
|
|
|
|
transformed = wrapper.convert_to_text_completion_object(chunk)
|
|
|
|
assert "choices" in transformed
|
|
assert len(transformed["choices"]) == 1
|
|
choice = transformed["choices"][0]
|
|
assert choice["text"] == "Some answer text"
|
|
assert choice["reasoning_content"] == "Here's my chain of thought..."
|
|
|
|
|
|
def test_anthropic_claude_4_invoke_chat_provider_config():
|
|
"""Test that the Anthropic Claude 4 Invoke chat provider config is correct."""
|
|
from litellm.llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation import (
|
|
AmazonAnthropicClaudeConfig,
|
|
)
|
|
from litellm.utils import ProviderConfigManager
|
|
|
|
config = ProviderConfigManager.get_provider_chat_config(
|
|
model="invoke/us.anthropic.claude-sonnet-4-20250514-v1:0",
|
|
provider=LlmProviders.BEDROCK,
|
|
)
|
|
print(config)
|
|
assert isinstance(config, AmazonAnthropicClaudeConfig)
|
|
|
|
|
|
def test_bedrock_application_inference_profile():
|
|
model = "arn:aws:bedrock:us-east-2:<AWS-ACCOUNT-ID>:inference-profile/us.anthropic.claude-3-5-haiku-20241022-v1:0"
|
|
from pydantic import BaseModel
|
|
|
|
from litellm import completion
|
|
from litellm.utils import supports_tool_choice
|
|
|
|
result = supports_tool_choice(model, custom_llm_provider="bedrock")
|
|
result_2 = supports_tool_choice(model, custom_llm_provider="bedrock_converse")
|
|
print(result)
|
|
assert result == result_2
|
|
assert result is True
|
|
|
|
|
|
def test_image_response_utils():
|
|
"""Test that the image response utils are correct."""
|
|
from litellm.utils import ImageResponse
|
|
|
|
result = {
|
|
"created": None,
|
|
"data": [
|
|
{
|
|
"b64_json": "/9j/.../2Q==",
|
|
"revised_prompt": None,
|
|
"url": None,
|
|
"timings": {"inference": 0.9612685777246952},
|
|
"index": 0,
|
|
}
|
|
],
|
|
"id": "91559891cxxx-PDX",
|
|
"model": "black-forest-labs/FLUX.1-schnell-Free",
|
|
"object": "list",
|
|
"hidden_params": {"additional_headers": {}},
|
|
}
|
|
image_response = ImageResponse(**result)
|
|
|
|
|
|
def test_is_valid_api_key():
|
|
import hashlib
|
|
|
|
# Valid sk- keys
|
|
assert is_valid_api_key("sk-abc123")
|
|
assert is_valid_api_key("sk-ABC_123-xyz")
|
|
# Valid hashed key (64 hex chars)
|
|
assert is_valid_api_key("a" * 64)
|
|
assert is_valid_api_key("0123456789abcdef" * 4) # 16*4 = 64
|
|
# Real SHA-256 hash
|
|
real_hash = hashlib.sha256(b"my_secret_key").hexdigest()
|
|
assert len(real_hash) == 64
|
|
assert is_valid_api_key(real_hash)
|
|
# Invalid: too short
|
|
assert not is_valid_api_key("sk-")
|
|
assert not is_valid_api_key("")
|
|
# Invalid: too long
|
|
assert not is_valid_api_key("sk-" + "a" * 200)
|
|
# Invalid: wrong prefix
|
|
assert not is_valid_api_key("pk-abc123")
|
|
# Invalid: wrong chars in sk- key
|
|
assert not is_valid_api_key("sk-abc$%#@!")
|
|
# Invalid: not a string
|
|
assert not is_valid_api_key(None)
|
|
assert not is_valid_api_key(12345)
|
|
# Invalid: wrong length for hash
|
|
assert not is_valid_api_key("a" * 63)
|
|
assert not is_valid_api_key("a" * 65)
|
|
|
|
|
|
def test_block_key_hashing_logic():
|
|
"""
|
|
Test that block_key() function only hashes keys that start with "sk-"
|
|
"""
|
|
import hashlib
|
|
|
|
from litellm.proxy.utils import hash_token
|
|
|
|
# Test cases: (input_key, should_be_hashed, expected_output)
|
|
test_cases = [
|
|
("sk-1234567890abcdef", True, hash_token("sk-1234567890abcdef")),
|
|
("sk-test-key", True, hash_token("sk-test-key")),
|
|
("abc123", False, "abc123"), # Should not be hashed
|
|
("hashed_key_123", False, "hashed_key_123"), # Should not be hashed
|
|
("", False, ""), # Empty string should not be hashed
|
|
("sk-", True, hash_token("sk-")), # Edge case: just "sk-"
|
|
]
|
|
|
|
for input_key, should_be_hashed, expected_output in test_cases:
|
|
# Simulate the logic from block_key() function
|
|
if input_key.startswith("sk-"):
|
|
hashed_token = hash_token(token=input_key)
|
|
else:
|
|
hashed_token = input_key
|
|
|
|
assert hashed_token == expected_output, f"Failed for input: {input_key}"
|
|
|
|
# Additional verification: if it should be hashed, verify it's actually a hash
|
|
if should_be_hashed:
|
|
# SHA-256 hashes are 64 characters long and contain only hex digits
|
|
assert (
|
|
len(hashed_token) == 64
|
|
), f"Hash length should be 64, got {len(hashed_token)} for {input_key}"
|
|
assert all(
|
|
c in "0123456789abcdef" for c in hashed_token
|
|
), f"Hash should contain only hex digits for {input_key}"
|
|
else:
|
|
# If not hashed, it should be the original string
|
|
assert (
|
|
hashed_token == input_key
|
|
), f"Non-hashed key should remain unchanged: {input_key}"
|
|
|
|
print("✅ All block_key hashing logic tests passed!")
|
|
|
|
|
|
def test_generate_gcp_iam_access_token():
|
|
"""
|
|
Test the _generate_gcp_iam_access_token function with mocked GCP IAM client.
|
|
"""
|
|
from unittest.mock import Mock, patch
|
|
|
|
service_account = "projects/-/serviceAccounts/test@project.iam.gserviceaccount.com"
|
|
expected_token = "test-access-token-12345"
|
|
|
|
# Mock the GCP IAM client and its response
|
|
mock_response = Mock()
|
|
mock_response.access_token = expected_token
|
|
|
|
mock_client = Mock()
|
|
mock_client.generate_access_token.return_value = mock_response
|
|
|
|
# Mock the iam_credentials_v1 module
|
|
mock_iam_credentials_v1 = Mock()
|
|
mock_iam_credentials_v1.IAMCredentialsClient = Mock(return_value=mock_client)
|
|
mock_iam_credentials_v1.GenerateAccessTokenRequest = Mock()
|
|
|
|
# Test successful token generation by mocking sys.modules
|
|
with patch.dict(
|
|
"sys.modules", {"google.cloud.iam_credentials_v1": mock_iam_credentials_v1}
|
|
):
|
|
from litellm._redis import _generate_gcp_iam_access_token
|
|
|
|
result = _generate_gcp_iam_access_token(service_account)
|
|
|
|
assert result == expected_token
|
|
mock_iam_credentials_v1.IAMCredentialsClient.assert_called_once()
|
|
mock_client.generate_access_token.assert_called_once()
|
|
|
|
# Verify the request was created with correct parameters
|
|
mock_iam_credentials_v1.GenerateAccessTokenRequest.assert_called_once_with(
|
|
name=service_account,
|
|
scope=["https://www.googleapis.com/auth/cloud-platform"],
|
|
)
|
|
|
|
|
|
def test_generate_gcp_iam_access_token_import_error():
|
|
"""
|
|
Test that _generate_gcp_iam_access_token raises ImportError when google-cloud-iam is not available.
|
|
"""
|
|
# Import the function first, before mocking
|
|
from litellm._redis import _generate_gcp_iam_access_token
|
|
|
|
# Mock the import to fail when the function tries to import google.cloud.iam_credentials_v1
|
|
original_import = __builtins__["__import__"]
|
|
|
|
def mock_import(name, *args, **kwargs):
|
|
if name == "google.cloud.iam_credentials_v1":
|
|
raise ImportError("No module named 'google.cloud.iam_credentials_v1'")
|
|
return original_import(name, *args, **kwargs)
|
|
|
|
with patch("builtins.__import__", side_effect=mock_import):
|
|
with pytest.raises(ImportError) as exc_info:
|
|
_generate_gcp_iam_access_token("test-service-account")
|
|
|
|
assert "google-cloud-iam is required" in str(exc_info.value)
|
|
assert "pip install google-cloud-iam" in str(exc_info.value)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Allow running this test file directly for debugging
|
|
pytest.main([__file__, "-v"])
|
|
|
|
|
|
def test_model_info_for_vertex_ai_deepseek_model():
|
|
model_info = litellm.get_model_info(
|
|
model="vertex_ai/deepseek-ai/deepseek-r1-0528-maas"
|
|
)
|
|
assert model_info is not None
|
|
assert model_info["litellm_provider"] == "vertex_ai-deepseek_models"
|
|
assert model_info["mode"] == "chat"
|
|
|
|
assert model_info["input_cost_per_token"] is not None
|
|
assert model_info["output_cost_per_token"] is not None
|
|
print("vertex deepseek model info", model_info)
|
|
|
|
|
|
def test_model_info_for_openrouter_kimi_k2_5():
|
|
"""
|
|
Test that openrouter/moonshotai/kimi-k2.5 model info is correctly configured
|
|
in model_prices_and_context_window.json.
|
|
|
|
Model properties from OpenRouter API:
|
|
- context_length: 262144
|
|
- pricing: prompt=$0.0000006, completion=$0.000003, input_cache_read=$0.0000001
|
|
- modality: text+image->text (supports vision)
|
|
- supports: tool_choice, tools (function calling)
|
|
"""
|
|
import json
|
|
from pathlib import Path
|
|
|
|
# Load directly from the local JSON file
|
|
json_path = Path(__file__).parents[2] / "model_prices_and_context_window.json"
|
|
with open(json_path) as f:
|
|
model_cost = json.load(f)
|
|
|
|
model_info = model_cost.get("openrouter/moonshotai/kimi-k2.5")
|
|
assert (
|
|
model_info is not None
|
|
), "Model not found in model_prices_and_context_window.json"
|
|
assert model_info["litellm_provider"] == "openrouter"
|
|
assert model_info["mode"] == "chat"
|
|
|
|
# Verify context window
|
|
assert model_info["max_input_tokens"] == 262144
|
|
assert model_info["max_output_tokens"] == 262144
|
|
assert model_info["max_tokens"] == 262144
|
|
|
|
# Verify pricing
|
|
assert model_info["input_cost_per_token"] == 6e-07
|
|
assert model_info["output_cost_per_token"] == 3e-06
|
|
assert model_info["cache_read_input_token_cost"] == 1e-07
|
|
|
|
# Verify capabilities
|
|
assert model_info["supports_vision"] is True
|
|
assert model_info["supports_function_calling"] is True
|
|
assert model_info["supports_tool_choice"] is True
|
|
|
|
print("openrouter kimi-k2.5 model info", model_info)
|
|
|
|
|
|
def test_gemini_lyria_3_preview_models_in_cost_map():
|
|
import json
|
|
from pathlib import Path
|
|
|
|
json_path = Path(__file__).parents[2] / "model_prices_and_context_window.json"
|
|
with open(json_path) as f:
|
|
model_cost = json.load(f)
|
|
|
|
clip = model_cost.get("gemini/lyria-3-clip-preview")
|
|
pro = model_cost.get("gemini/lyria-3-pro-preview")
|
|
assert clip is not None and pro is not None
|
|
assert clip["litellm_provider"] == "gemini" and pro["litellm_provider"] == "gemini"
|
|
assert clip["max_input_tokens"] == 131072 == pro["max_input_tokens"]
|
|
assert clip["output_cost_per_image"] == 0.04
|
|
|
|
|
|
def test_model_info_for_fireworks_short_form_models():
|
|
"""
|
|
Test that fireworks_ai short-form model entries (fireworks_ai/<model>)
|
|
are correctly configured in model_prices_and_context_window.json.
|
|
|
|
These entries enable cost attribution for models called via short-form
|
|
names (e.g., fireworks_ai/glm-4p7 instead of
|
|
fireworks_ai/accounts/fireworks/models/glm-4p7).
|
|
"""
|
|
import json
|
|
from pathlib import Path
|
|
|
|
json_path = Path(__file__).parents[2] / "model_prices_and_context_window.json"
|
|
with open(json_path) as f:
|
|
model_cost = json.load(f)
|
|
|
|
# glm-4p7: short-form and long-form
|
|
for key in [
|
|
"fireworks_ai/glm-4p7",
|
|
"fireworks_ai/accounts/fireworks/models/glm-4p7",
|
|
]:
|
|
info = model_cost.get(key)
|
|
assert (
|
|
info is not None
|
|
), f"{key} not found in model_prices_and_context_window.json"
|
|
assert info["litellm_provider"] == "fireworks_ai"
|
|
assert info["mode"] == "chat"
|
|
assert info["input_cost_per_token"] == 6e-07
|
|
assert info["output_cost_per_token"] == 2.2e-06
|
|
assert info["max_input_tokens"] == 202800
|
|
assert info["supports_reasoning"] is True
|
|
|
|
# minimax-m2p1: short-form and long-form
|
|
for key in [
|
|
"fireworks_ai/minimax-m2p1",
|
|
"fireworks_ai/accounts/fireworks/models/minimax-m2p1",
|
|
]:
|
|
info = model_cost.get(key)
|
|
assert (
|
|
info is not None
|
|
), f"{key} not found in model_prices_and_context_window.json"
|
|
assert info["litellm_provider"] == "fireworks_ai"
|
|
assert info["mode"] == "chat"
|
|
assert info["input_cost_per_token"] == 3e-07
|
|
assert info["output_cost_per_token"] == 1.2e-06
|
|
assert info["max_input_tokens"] == 204800
|
|
|
|
# kimi-k2p5: short-form only (long-form already existed)
|
|
info = model_cost.get("fireworks_ai/kimi-k2p5")
|
|
assert (
|
|
info is not None
|
|
), "fireworks_ai/kimi-k2p5 not found in model_prices_and_context_window.json"
|
|
assert info["litellm_provider"] == "fireworks_ai"
|
|
assert info["mode"] == "chat"
|
|
assert info["input_cost_per_token"] == 6e-07
|
|
assert info["output_cost_per_token"] == 3e-06
|
|
assert info["max_input_tokens"] == 262144
|
|
|
|
|
|
class TestGetValidModelsWithCLI:
|
|
"""Test get_valid_models function as used in CLI token usage"""
|
|
|
|
def test_get_valid_models_with_cli_pattern(self):
|
|
"""Test get_valid_models with litellm_proxy provider and CLI token pattern"""
|
|
|
|
# Mock the HTTP request that get_valid_models makes to the proxy
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 200
|
|
mock_response.json.return_value = {
|
|
"data": [
|
|
{"id": "gpt-3.5-turbo", "object": "model"},
|
|
{"id": "gpt-4", "object": "model"},
|
|
{"id": "litellm_proxy/gemini/gemini-2.5-flash", "object": "model"},
|
|
{"id": "claude-3-sonnet", "object": "model"},
|
|
]
|
|
}
|
|
|
|
with patch.object(
|
|
litellm.module_level_client, "get", return_value=mock_response
|
|
) as mock_get:
|
|
# Test the exact pattern used in cli_token_usage.py
|
|
result = litellm.get_valid_models(
|
|
check_provider_endpoint=True,
|
|
custom_llm_provider="litellm_proxy",
|
|
api_key="sk-test-cli-key-123",
|
|
api_base="http://localhost:4000/",
|
|
)
|
|
|
|
# Verify the function returns a list of model names
|
|
assert isinstance(result, list)
|
|
assert len(result) == 4
|
|
# All models get prefixed with "litellm_proxy/" by the get_models method
|
|
assert "litellm_proxy/gpt-3.5-turbo" in result
|
|
assert "litellm_proxy/gpt-4" in result
|
|
# Note: This model already had the prefix, so it gets double-prefixed
|
|
assert "litellm_proxy/litellm_proxy/gemini/gemini-2.5-flash" in result
|
|
assert "litellm_proxy/claude-3-sonnet" in result
|
|
|
|
# Verify the HTTP request was made with correct parameters
|
|
mock_get.assert_called_once()
|
|
_, call_kwargs = mock_get.call_args
|
|
|
|
# Check that the request was made to the correct endpoint
|
|
assert call_kwargs["url"].startswith("http://localhost:4000/")
|
|
assert call_kwargs["url"].endswith("/v1/models")
|
|
|
|
# Check that the API key was included in headers
|
|
assert "headers" in call_kwargs
|
|
headers = call_kwargs["headers"]
|
|
assert headers.get("Authorization") == "Bearer sk-test-cli-key-123"
|
|
|
|
|
|
class TestIsCachedMessage:
|
|
"""Test is_cached_message function for context caching detection.
|
|
|
|
Fixes GitHub issue #17821 - TypeError when content is string instead of list.
|
|
"""
|
|
|
|
def test_string_content_returns_false(self):
|
|
"""String content should return False without crashing."""
|
|
message = {"role": "user", "content": "Hello world"}
|
|
assert is_cached_message(message) is False
|
|
|
|
def test_none_content_returns_false(self):
|
|
"""None content should return False."""
|
|
message = {"role": "user", "content": None}
|
|
assert is_cached_message(message) is False
|
|
|
|
def test_missing_content_returns_false(self):
|
|
"""Message without content key should return False."""
|
|
message = {"role": "user"}
|
|
assert is_cached_message(message) is False
|
|
|
|
def test_list_content_without_cache_control_returns_false(self):
|
|
"""List content without cache_control should return False."""
|
|
message = {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
|
|
assert is_cached_message(message) is False
|
|
|
|
def test_list_content_with_cache_control_returns_true(self):
|
|
"""List content with cache_control ephemeral should return True."""
|
|
message = {
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "Hello",
|
|
"cache_control": {"type": "ephemeral"},
|
|
}
|
|
],
|
|
}
|
|
assert is_cached_message(message) is True
|
|
|
|
def test_list_with_non_dict_items_skips_them(self):
|
|
"""List content with non-dict items should skip them gracefully."""
|
|
message = {
|
|
"role": "user",
|
|
"content": ["string_item", 123, {"type": "text", "text": "Hello"}],
|
|
}
|
|
assert is_cached_message(message) is False
|
|
|
|
def test_list_with_mixed_items_finds_cached(self):
|
|
"""Mixed content list should find cached item."""
|
|
message = {
|
|
"role": "user",
|
|
"content": [
|
|
"string_item",
|
|
{"type": "image", "url": "..."},
|
|
{
|
|
"type": "text",
|
|
"text": "cached",
|
|
"cache_control": {"type": "ephemeral"},
|
|
},
|
|
],
|
|
}
|
|
assert is_cached_message(message) is True
|
|
|
|
def test_wrong_cache_control_type_returns_false(self):
|
|
"""Non-ephemeral cache_control type should return False."""
|
|
message = {
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "Hello",
|
|
"cache_control": {"type": "permanent"},
|
|
}
|
|
],
|
|
}
|
|
assert is_cached_message(message) is False
|
|
|
|
def test_empty_list_content_returns_false(self):
|
|
"""Empty list content should return False."""
|
|
message = {"role": "user", "content": []}
|
|
assert is_cached_message(message) is False
|
|
|
|
def test_message_level_cache_control_returns_true(self):
|
|
"""Message with string content and message-level cache_control should return True.
|
|
|
|
This is the format injected by the cache_control_injection_points hook
|
|
when the message content is a string (common for system messages).
|
|
Fixes GitHub issue #18519 - Gemini models ignoring cache_control_injection_points.
|
|
"""
|
|
message = {
|
|
"role": "system",
|
|
"content": "You are a helpful assistant.",
|
|
"cache_control": {"type": "ephemeral"},
|
|
}
|
|
assert is_cached_message(message) is True
|
|
|
|
def test_message_level_cache_control_wrong_type_returns_false(self):
|
|
"""Message-level cache_control with non-ephemeral type should return False."""
|
|
message = {
|
|
"role": "system",
|
|
"content": "You are a helpful assistant.",
|
|
"cache_control": {"type": "permanent"},
|
|
}
|
|
assert is_cached_message(message) is False
|
|
|
|
def test_message_level_cache_control_non_dict_returns_false(self):
|
|
"""Message-level cache_control that's not a dict should return False."""
|
|
message = {
|
|
"role": "system",
|
|
"content": "You are a helpful assistant.",
|
|
"cache_control": "ephemeral",
|
|
}
|
|
assert is_cached_message(message) is False
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestProxyLoggingBudgetAlerts:
|
|
"""Test budget_alerts method in ProxyLogging class."""
|
|
|
|
async def test_budget_alerts_when_alerting_is_none(self):
|
|
"""Test that budget_alerts returns early when alerting is None."""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = None
|
|
proxy_logging.slack_alerting_instance = AsyncMock()
|
|
proxy_logging.email_logging_instance = AsyncMock()
|
|
|
|
user_info = MagicMock()
|
|
|
|
# Should return without calling any alerting instances
|
|
await proxy_logging.budget_alerts(type="user_budget", user_info=user_info)
|
|
|
|
# Verify no calls were made
|
|
proxy_logging.slack_alerting_instance.budget_alerts.assert_not_called()
|
|
proxy_logging.email_logging_instance.budget_alerts.assert_not_called()
|
|
|
|
async def test_budget_alerts_with_slack_only(self):
|
|
"""Test that budget_alerts calls slack_alerting_instance when slack is in alerting."""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = ["slack"]
|
|
proxy_logging.slack_alerting_instance = AsyncMock()
|
|
|
|
user_info = MagicMock()
|
|
|
|
await proxy_logging.budget_alerts(type="token_budget", user_info=user_info)
|
|
|
|
proxy_logging.slack_alerting_instance.budget_alerts.assert_called_once_with(
|
|
type="token_budget", user_info=user_info
|
|
)
|
|
|
|
async def test_budget_alerts_with_email_only(self):
|
|
"""Test that budget_alerts calls email_logging_instance when email is in alerting."""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = ["email"]
|
|
proxy_logging.email_logging_instance = AsyncMock()
|
|
|
|
user_info = MagicMock()
|
|
|
|
await proxy_logging.budget_alerts(type="team_budget", user_info=user_info)
|
|
|
|
proxy_logging.email_logging_instance.budget_alerts.assert_called_once_with(
|
|
type="team_budget", user_info=user_info
|
|
)
|
|
|
|
async def test_budget_alerts_with_email_when_instance_is_none(self):
|
|
"""Test that budget_alerts does not call email_logging_instance when it is None."""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = ["email"]
|
|
proxy_logging.email_logging_instance = None
|
|
|
|
user_info = MagicMock()
|
|
|
|
# Should not raise an error
|
|
await proxy_logging.budget_alerts(
|
|
type="organization_budget", user_info=user_info
|
|
)
|
|
|
|
async def test_budget_alerts_with_both_slack_and_email(self):
|
|
"""Test that budget_alerts calls both slack and email instances when both are in alerting."""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = ["slack", "email"]
|
|
proxy_logging.slack_alerting_instance = AsyncMock()
|
|
proxy_logging.email_logging_instance = AsyncMock()
|
|
|
|
user_info = MagicMock()
|
|
|
|
await proxy_logging.budget_alerts(type="proxy_budget", user_info=user_info)
|
|
|
|
proxy_logging.slack_alerting_instance.budget_alerts.assert_called_once_with(
|
|
type="proxy_budget", user_info=user_info
|
|
)
|
|
proxy_logging.email_logging_instance.budget_alerts.assert_called_once_with(
|
|
type="proxy_budget", user_info=user_info
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
"alert_type",
|
|
[
|
|
"token_budget",
|
|
"user_budget",
|
|
"soft_budget",
|
|
"team_budget",
|
|
"organization_budget",
|
|
"proxy_budget",
|
|
"projected_limit_exceeded",
|
|
],
|
|
)
|
|
async def test_budget_alerts_with_all_alert_types(self, alert_type):
|
|
"""Test that budget_alerts works with all supported alert types."""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = ["slack", "email"]
|
|
proxy_logging.slack_alerting_instance = AsyncMock()
|
|
proxy_logging.email_logging_instance = AsyncMock()
|
|
|
|
user_info = MagicMock()
|
|
|
|
await proxy_logging.budget_alerts(type=alert_type, user_info=user_info)
|
|
|
|
proxy_logging.slack_alerting_instance.budget_alerts.assert_called_once_with(
|
|
type=alert_type, user_info=user_info
|
|
)
|
|
proxy_logging.email_logging_instance.budget_alerts.assert_called_once_with(
|
|
type=alert_type, user_info=user_info
|
|
)
|
|
|
|
async def test_budget_alerts_soft_budget_with_alert_emails_bypasses_alerting_none(
|
|
self,
|
|
):
|
|
"""
|
|
Test that soft_budget alerts with alert_emails bypass the alerting=None check
|
|
and send emails even when alerting is None.
|
|
|
|
This tests the new logic that allows team-specific soft budget email alerts
|
|
via metadata.soft_budget_alerting_emails to work even when global alerting is disabled.
|
|
"""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy._types import CallInfo, Litellm_EntityType
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = None # Global alerting is disabled
|
|
proxy_logging.slack_alerting_instance = AsyncMock()
|
|
proxy_logging.email_logging_instance = AsyncMock()
|
|
|
|
# Create CallInfo with alert_emails set (simulating team metadata extraction)
|
|
user_info = CallInfo(
|
|
token="test-token",
|
|
spend=100.0,
|
|
soft_budget=50.0,
|
|
user_id="test-user",
|
|
team_id="test-team",
|
|
team_alias="test-team-alias",
|
|
event_group=Litellm_EntityType.TEAM,
|
|
alert_emails=["team1@example.com", "team2@example.com"],
|
|
)
|
|
|
|
# Should send email even though alerting is None (because of alert_emails)
|
|
await proxy_logging.budget_alerts(type="soft_budget", user_info=user_info)
|
|
|
|
# Verify slack was NOT called (alerting is None)
|
|
proxy_logging.slack_alerting_instance.budget_alerts.assert_not_called()
|
|
|
|
# Verify email WAS called (bypasses alerting=None check)
|
|
proxy_logging.email_logging_instance.budget_alerts.assert_called_once_with(
|
|
type="soft_budget", user_info=user_info
|
|
)
|
|
|
|
async def test_budget_alerts_soft_budget_without_alert_emails_respects_alerting_none(
|
|
self,
|
|
):
|
|
"""
|
|
Test that soft_budget alerts WITHOUT alert_emails still respect alerting=None
|
|
and do not send emails when alerting is None.
|
|
"""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy._types import CallInfo, Litellm_EntityType
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = None
|
|
proxy_logging.slack_alerting_instance = AsyncMock()
|
|
proxy_logging.email_logging_instance = AsyncMock()
|
|
|
|
# Create CallInfo WITHOUT alert_emails
|
|
user_info = CallInfo(
|
|
token="test-token",
|
|
spend=100.0,
|
|
soft_budget=50.0,
|
|
user_id="test-user",
|
|
team_id="test-team",
|
|
team_alias="test-team-alias",
|
|
event_group=Litellm_EntityType.TEAM,
|
|
alert_emails=None, # No alert emails
|
|
)
|
|
|
|
# Should NOT send email (alerting is None and no alert_emails)
|
|
await proxy_logging.budget_alerts(type="soft_budget", user_info=user_info)
|
|
|
|
# Verify no calls were made
|
|
proxy_logging.slack_alerting_instance.budget_alerts.assert_not_called()
|
|
proxy_logging.email_logging_instance.budget_alerts.assert_not_called()
|
|
|
|
async def test_budget_alerts_soft_budget_with_empty_alert_emails_respects_alerting_none(
|
|
self,
|
|
):
|
|
"""
|
|
Test that soft_budget alerts with empty alert_emails list still respect alerting=None.
|
|
"""
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy._types import CallInfo, Litellm_EntityType
|
|
from litellm.proxy.utils import ProxyLogging
|
|
|
|
proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
|
|
proxy_logging.alerting = None
|
|
proxy_logging.slack_alerting_instance = AsyncMock()
|
|
proxy_logging.email_logging_instance = AsyncMock()
|
|
|
|
# Create CallInfo with empty alert_emails list
|
|
user_info = CallInfo(
|
|
token="test-token",
|
|
spend=100.0,
|
|
soft_budget=50.0,
|
|
user_id="test-user",
|
|
team_id="test-team",
|
|
team_alias="test-team-alias",
|
|
event_group=Litellm_EntityType.TEAM,
|
|
alert_emails=[], # Empty list
|
|
)
|
|
|
|
# Should NOT send email (alert_emails is empty)
|
|
await proxy_logging.budget_alerts(type="soft_budget", user_info=user_info)
|
|
|
|
# Verify no calls were made
|
|
proxy_logging.slack_alerting_instance.budget_alerts.assert_not_called()
|
|
proxy_logging.email_logging_instance.budget_alerts.assert_not_called()
|
|
|
|
|
|
def test_azure_ai_claude_provider_config():
|
|
"""Test that Azure AI Claude models return AzureAnthropicConfig for proper tool transformation."""
|
|
from litellm import AzureAIStudioConfig, AzureAnthropicConfig
|
|
from litellm.utils import ProviderConfigManager
|
|
|
|
# Claude models should return AzureAnthropicConfig
|
|
config = ProviderConfigManager.get_provider_chat_config(
|
|
model="claude-sonnet-4-5",
|
|
provider=LlmProviders.AZURE_AI,
|
|
)
|
|
assert isinstance(config, AzureAnthropicConfig)
|
|
|
|
# Test case-insensitive matching
|
|
config = ProviderConfigManager.get_provider_chat_config(
|
|
model="Claude-Opus-4",
|
|
provider=LlmProviders.AZURE_AI,
|
|
)
|
|
assert isinstance(config, AzureAnthropicConfig)
|
|
|
|
# Non-Claude models should return AzureAIStudioConfig
|
|
config = ProviderConfigManager.get_provider_chat_config(
|
|
model="mistral-large",
|
|
provider=LlmProviders.AZURE_AI,
|
|
)
|
|
assert isinstance(config, AzureAIStudioConfig)
|
|
|
|
|
|
# Tests for thinking blocks helper functions
|
|
# Related to issue: https://github.com/BerriAI/litellm/issues/18926
|
|
|
|
|
|
def test_any_assistant_message_has_thinking_blocks_with_thinking():
|
|
"""Test that function returns True when any assistant message has thinking_blocks."""
|
|
from litellm.utils import any_assistant_message_has_thinking_blocks
|
|
|
|
messages = [
|
|
{"role": "user", "content": "Hello"},
|
|
{
|
|
"role": "assistant",
|
|
"thinking_blocks": [{"type": "thinking", "thinking": "Let me think..."}],
|
|
"tool_calls": [{"id": "123", "function": {"name": "test"}}],
|
|
},
|
|
{"role": "tool", "tool_call_id": "123", "content": "result"},
|
|
{
|
|
"role": "assistant",
|
|
"tool_calls": [{"id": "456", "function": {"name": "test2"}}],
|
|
# No thinking_blocks here - Claude sometimes doesn't include them
|
|
},
|
|
]
|
|
|
|
assert any_assistant_message_has_thinking_blocks(messages) is True
|
|
|
|
|
|
def test_any_assistant_message_has_thinking_blocks_without_thinking():
|
|
"""Test that function returns False when no assistant message has thinking_blocks."""
|
|
from litellm.utils import any_assistant_message_has_thinking_blocks
|
|
|
|
messages = [
|
|
{"role": "user", "content": "Hello"},
|
|
{
|
|
"role": "assistant",
|
|
"tool_calls": [{"id": "123", "function": {"name": "test"}}],
|
|
},
|
|
{"role": "tool", "tool_call_id": "123", "content": "result"},
|
|
]
|
|
|
|
assert any_assistant_message_has_thinking_blocks(messages) is False
|
|
|
|
|
|
def test_any_assistant_message_has_thinking_blocks_empty_list():
|
|
"""Test that function returns False when thinking_blocks is an empty list."""
|
|
from litellm.utils import any_assistant_message_has_thinking_blocks
|
|
|
|
messages = [
|
|
{"role": "user", "content": "Hello"},
|
|
{
|
|
"role": "assistant",
|
|
"thinking_blocks": [], # Empty list
|
|
"tool_calls": [{"id": "123", "function": {"name": "test"}}],
|
|
},
|
|
]
|
|
|
|
assert any_assistant_message_has_thinking_blocks(messages) is False
|
|
|
|
|
|
def test_last_assistant_with_tool_calls_has_no_thinking_blocks_issue_18926():
|
|
"""
|
|
Test the scenario from issue #18926 where:
|
|
- First assistant message HAS thinking_blocks
|
|
- Second assistant message has NO thinking_blocks
|
|
|
|
The old logic would drop thinking because the LAST tool_call message
|
|
has no thinking_blocks, but this breaks because the first message
|
|
still has thinking blocks in the conversation.
|
|
"""
|
|
from litellm.utils import (
|
|
any_assistant_message_has_thinking_blocks,
|
|
last_assistant_with_tool_calls_has_no_thinking_blocks,
|
|
)
|
|
|
|
messages = [
|
|
{"role": "user", "content": "Build a feature"},
|
|
{
|
|
"role": "assistant",
|
|
"thinking_blocks": [
|
|
{"type": "thinking", "thinking": "Let me analyze the requirements..."}
|
|
],
|
|
"tool_calls": [
|
|
{
|
|
"id": "toolu_1",
|
|
"function": {"name": "file_editor", "arguments": "{}"},
|
|
}
|
|
],
|
|
},
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": "toolu_1",
|
|
"content": "File contents here...",
|
|
},
|
|
{
|
|
"role": "assistant",
|
|
# NO thinking_blocks - Claude sometimes doesn't include them
|
|
"content": [{"type": "text", "text": "Let me explore more..."}],
|
|
"tool_calls": [
|
|
{
|
|
"id": "toolu_2",
|
|
"function": {"name": "file_editor", "arguments": "{}"},
|
|
}
|
|
],
|
|
},
|
|
]
|
|
|
|
# Last assistant with tool_calls has no thinking_blocks
|
|
assert last_assistant_with_tool_calls_has_no_thinking_blocks(messages) is True
|
|
|
|
# But ANY assistant message has thinking_blocks
|
|
assert any_assistant_message_has_thinking_blocks(messages) is True
|
|
|
|
# So we should NOT drop thinking - the combination tells us thinking is in use
|
|
# The fix uses both checks: only drop if last has none AND no message has any
|
|
should_drop_thinking = last_assistant_with_tool_calls_has_no_thinking_blocks(
|
|
messages
|
|
) and not any_assistant_message_has_thinking_blocks(messages)
|
|
assert should_drop_thinking is False
|
|
|
|
|
|
class TestAdditionalDropParamsForNonOpenAIProviders:
|
|
"""
|
|
Test additional_drop_params functionality for non-OpenAI providers.
|
|
|
|
Fixes https://github.com/BerriAI/litellm/issues/19225
|
|
|
|
The bug was that additional_drop_params only filtered params for OpenAI/Azure
|
|
providers, but not for other providers like Bedrock. This caused OpenAI-specific
|
|
params like prompt_cache_key to be passed to Bedrock, resulting in errors.
|
|
"""
|
|
|
|
def test_additional_drop_params_filters_for_bedrock(self):
|
|
"""
|
|
Test that additional_drop_params correctly filters params for Bedrock provider.
|
|
|
|
Before the fix, prompt_cache_key would be passed through to Bedrock even when
|
|
specified in additional_drop_params, causing:
|
|
'BedrockException - {"message":"The model returned the following errors:
|
|
prompt_cache_key: Extra inputs are not permitted"}'
|
|
"""
|
|
from litellm.utils import add_provider_specific_params_to_optional_params
|
|
|
|
optional_params = {}
|
|
passed_params = {
|
|
"prompt_cache_key": "test_key_123",
|
|
"temperature": 0.7,
|
|
"model": "bedrock/anthropic.claude-v2",
|
|
}
|
|
openai_params = ["temperature", "max_tokens", "top_p", "model"]
|
|
|
|
result = add_provider_specific_params_to_optional_params(
|
|
optional_params=optional_params,
|
|
passed_params=passed_params,
|
|
custom_llm_provider="bedrock",
|
|
openai_params=openai_params,
|
|
additional_drop_params=["prompt_cache_key"],
|
|
)
|
|
|
|
# prompt_cache_key should be filtered out
|
|
assert "prompt_cache_key" not in result
|
|
# temperature should still be there (it's in openai_params, not filtered)
|
|
# Note: temperature is in openai_params so it won't be added by this function
|
|
# The function only adds params NOT in openai_params
|
|
|
|
def test_additional_drop_params_filters_multiple_params_for_non_openai(self):
|
|
"""Test filtering multiple params for non-OpenAI providers."""
|
|
from litellm.utils import add_provider_specific_params_to_optional_params
|
|
|
|
optional_params = {}
|
|
passed_params = {
|
|
"prompt_cache_key": "test_key",
|
|
"some_openai_only_param": "value1",
|
|
"another_openai_param": "value2",
|
|
"keep_this_param": "keep_me",
|
|
}
|
|
openai_params = ["temperature", "max_tokens"]
|
|
|
|
result = add_provider_specific_params_to_optional_params(
|
|
optional_params=optional_params,
|
|
passed_params=passed_params,
|
|
custom_llm_provider="anthropic",
|
|
openai_params=openai_params,
|
|
additional_drop_params=["prompt_cache_key", "some_openai_only_param"],
|
|
)
|
|
|
|
# Filtered params should not be present
|
|
assert "prompt_cache_key" not in result
|
|
assert "some_openai_only_param" not in result
|
|
# Non-filtered params should be present
|
|
assert result.get("another_openai_param") == "value2"
|
|
assert result.get("keep_this_param") == "keep_me"
|
|
|
|
def test_additional_drop_params_none_keeps_all_params(self):
|
|
"""Test that when additional_drop_params is None, all params are kept."""
|
|
from litellm.utils import add_provider_specific_params_to_optional_params
|
|
|
|
optional_params = {}
|
|
passed_params = {
|
|
"prompt_cache_key": "test_key",
|
|
"custom_param": "value",
|
|
}
|
|
openai_params = ["temperature"]
|
|
|
|
result = add_provider_specific_params_to_optional_params(
|
|
optional_params=optional_params,
|
|
passed_params=passed_params,
|
|
custom_llm_provider="bedrock",
|
|
openai_params=openai_params,
|
|
additional_drop_params=None,
|
|
)
|
|
|
|
# All params should be present when additional_drop_params is None
|
|
assert result.get("prompt_cache_key") == "test_key"
|
|
assert result.get("custom_param") == "value"
|
|
|
|
def test_additional_drop_params_empty_list_keeps_all_params(self):
|
|
"""Test that when additional_drop_params is empty list, all params are kept."""
|
|
from litellm.utils import add_provider_specific_params_to_optional_params
|
|
|
|
optional_params = {}
|
|
passed_params = {
|
|
"prompt_cache_key": "test_key",
|
|
"custom_param": "value",
|
|
}
|
|
openai_params = ["temperature"]
|
|
|
|
result = add_provider_specific_params_to_optional_params(
|
|
optional_params=optional_params,
|
|
passed_params=passed_params,
|
|
custom_llm_provider="bedrock",
|
|
openai_params=openai_params,
|
|
additional_drop_params=[],
|
|
)
|
|
|
|
# All params should be present when additional_drop_params is empty
|
|
assert result.get("prompt_cache_key") == "test_key"
|
|
assert result.get("custom_param") == "value"
|
|
|
|
|
|
class TestDropParamsWithPromptCacheKey:
|
|
"""
|
|
Test that drop_params: true correctly drops prompt_cache_key for non-OpenAI providers.
|
|
|
|
Fixes https://github.com/BerriAI/litellm/issues/19225
|
|
|
|
prompt_cache_key is an OpenAI-specific parameter that should be automatically
|
|
dropped when using providers like Bedrock that don't support it.
|
|
"""
|
|
|
|
def test_prompt_cache_key_in_default_params(self):
|
|
"""Verify prompt_cache_key is now in DEFAULT_CHAT_COMPLETION_PARAM_VALUES."""
|
|
from litellm.constants import DEFAULT_CHAT_COMPLETION_PARAM_VALUES
|
|
|
|
assert "prompt_cache_key" in DEFAULT_CHAT_COMPLETION_PARAM_VALUES
|
|
assert "prompt_cache_retention" in DEFAULT_CHAT_COMPLETION_PARAM_VALUES
|
|
|
|
def test_drop_params_removes_prompt_cache_key_for_bedrock(self):
|
|
"""
|
|
Test that get_optional_params with drop_params=True removes prompt_cache_key
|
|
for Bedrock provider since it's not in Bedrock's supported params.
|
|
"""
|
|
from litellm.utils import get_optional_params
|
|
|
|
# Call get_optional_params for Bedrock with prompt_cache_key
|
|
# drop_params=True should remove it since Bedrock doesn't support it
|
|
result = get_optional_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
|
custom_llm_provider="bedrock",
|
|
prompt_cache_key="test_cache_key",
|
|
temperature=0.7,
|
|
drop_params=True,
|
|
)
|
|
|
|
# prompt_cache_key should be dropped for Bedrock
|
|
assert "prompt_cache_key" not in result
|
|
# temperature should remain (it's supported by Bedrock)
|
|
assert result.get("temperature") == 0.7
|
|
|
|
|
|
class TestGetOptionalParamsDeepSeek:
|
|
"""Tests that deepseek provider uses DeepSeekChatConfig for parameter mapping."""
|
|
|
|
def test_deepseek_supports_thinking_param(self):
|
|
"""
|
|
Verify that get_optional_params for deepseek accepts the 'thinking' param,
|
|
which is only supported by DeepSeekChatConfig, not OpenAIConfig.
|
|
"""
|
|
from litellm.utils import get_optional_params
|
|
|
|
result = get_optional_params(
|
|
model="deepseek-reasoner",
|
|
custom_llm_provider="deepseek",
|
|
thinking={"type": "enabled"},
|
|
)
|
|
assert result.get("thinking") == {"type": "enabled"}
|
|
|
|
def test_deepseek_supports_reasoning_effort_param(self):
|
|
"""
|
|
Verify that get_optional_params for deepseek accepts 'reasoning_effort',
|
|
which is only supported by DeepSeekChatConfig, not OpenAIConfig.
|
|
"""
|
|
from litellm.utils import get_optional_params
|
|
|
|
result = get_optional_params(
|
|
model="deepseek-reasoner",
|
|
custom_llm_provider="deepseek",
|
|
reasoning_effort="high",
|
|
)
|
|
assert result.get("thinking") == {"type": "enabled"}
|
|
|
|
def test_deepseek_thinking_strips_budget_tokens(self):
|
|
"""
|
|
DeepSeekChatConfig strips budget_tokens from thinking param.
|
|
This would not happen with OpenAIConfig.
|
|
"""
|
|
from litellm.utils import get_optional_params
|
|
|
|
result = get_optional_params(
|
|
model="deepseek-reasoner",
|
|
custom_llm_provider="deepseek",
|
|
thinking={"type": "enabled", "budget_tokens": 5000},
|
|
)
|
|
assert "budget_tokens" not in result.get("thinking", {})
|
|
assert result.get("thinking") == {"type": "enabled"}
|
|
|
|
|
|
class TestIsStreamingRequest:
|
|
def test_stream_true_in_kwargs(self):
|
|
assert (
|
|
_is_streaming_request(kwargs={"stream": True}, call_type="acompletion")
|
|
is True
|
|
)
|
|
|
|
def test_stream_false_in_kwargs(self):
|
|
assert (
|
|
_is_streaming_request(kwargs={"stream": False}, call_type="acompletion")
|
|
is False
|
|
)
|
|
|
|
def test_no_stream_in_kwargs(self):
|
|
assert _is_streaming_request(kwargs={}, call_type="acompletion") is False
|
|
|
|
def test_generate_content_stream_string(self):
|
|
assert (
|
|
_is_streaming_request(
|
|
kwargs={}, call_type=CallTypes.generate_content_stream.value
|
|
)
|
|
is True
|
|
)
|
|
|
|
def test_agenerate_content_stream_string(self):
|
|
assert (
|
|
_is_streaming_request(
|
|
kwargs={}, call_type=CallTypes.agenerate_content_stream.value
|
|
)
|
|
is True
|
|
)
|
|
|
|
def test_generate_content_stream_enum(self):
|
|
assert (
|
|
_is_streaming_request(
|
|
kwargs={}, call_type=CallTypes.generate_content_stream
|
|
)
|
|
is True
|
|
)
|
|
|
|
def test_agenerate_content_stream_enum(self):
|
|
assert (
|
|
_is_streaming_request(
|
|
kwargs={}, call_type=CallTypes.agenerate_content_stream
|
|
)
|
|
is True
|
|
)
|
|
|
|
def test_non_streaming_call_type_string(self):
|
|
assert _is_streaming_request(kwargs={}, call_type="acompletion") is False
|
|
|
|
def test_non_streaming_call_type_enum(self):
|
|
assert (
|
|
_is_streaming_request(kwargs={}, call_type=CallTypes.acompletion) is False
|
|
)
|
|
|
|
def test_stream_true_overrides_non_streaming_call_type(self):
|
|
assert (
|
|
_is_streaming_request(
|
|
kwargs={"stream": True}, call_type=CallTypes.acompletion
|
|
)
|
|
is True
|
|
)
|
|
|
|
|
|
class TestCallbackAsyncSyncSeparation:
|
|
"""Test that LoggingCallbackManager auto-routes async callbacks to async lists."""
|
|
|
|
def setup_method(self):
|
|
"""Reset callback lists before each test."""
|
|
litellm.input_callback = []
|
|
litellm.success_callback = []
|
|
litellm.failure_callback = []
|
|
litellm._async_input_callback = []
|
|
litellm._async_success_callback = []
|
|
litellm._async_failure_callback = []
|
|
|
|
def test_async_success_callback_routed_to_async_list(self):
|
|
async def my_async_cb(*args, **kwargs):
|
|
pass
|
|
|
|
litellm.logging_callback_manager.add_litellm_success_callback(my_async_cb)
|
|
assert my_async_cb in litellm._async_success_callback
|
|
assert my_async_cb not in litellm.success_callback
|
|
|
|
def test_sync_success_callback_stays_in_sync_list(self):
|
|
def my_sync_cb(*args, **kwargs):
|
|
pass
|
|
|
|
litellm.logging_callback_manager.add_litellm_success_callback(my_sync_cb)
|
|
assert my_sync_cb in litellm.success_callback
|
|
assert my_sync_cb not in litellm._async_success_callback
|
|
|
|
def test_string_callback_stays_in_sync_list(self):
|
|
litellm.logging_callback_manager.add_litellm_success_callback("langfuse")
|
|
assert "langfuse" in litellm.success_callback
|
|
assert "langfuse" not in litellm._async_success_callback
|
|
|
|
def test_async_failure_callback_routed_to_async_list(self):
|
|
async def my_async_cb(*args, **kwargs):
|
|
pass
|
|
|
|
litellm.logging_callback_manager.add_litellm_failure_callback(my_async_cb)
|
|
assert my_async_cb in litellm._async_failure_callback
|
|
assert my_async_cb not in litellm.failure_callback
|
|
|
|
def test_sync_failure_callback_stays_in_sync_list(self):
|
|
def my_sync_cb(*args, **kwargs):
|
|
pass
|
|
|
|
litellm.logging_callback_manager.add_litellm_failure_callback(my_sync_cb)
|
|
assert my_sync_cb in litellm.failure_callback
|
|
assert my_sync_cb not in litellm._async_failure_callback
|
|
|
|
def test_dynamodb_routed_to_async_success(self):
|
|
litellm.logging_callback_manager.add_litellm_success_callback("dynamodb")
|
|
assert "dynamodb" in litellm._async_success_callback
|
|
assert "dynamodb" not in litellm.success_callback
|
|
|
|
def test_openmeter_routed_to_async_success(self):
|
|
litellm.logging_callback_manager.add_litellm_success_callback("openmeter")
|
|
assert "openmeter" in litellm._async_success_callback
|
|
assert "openmeter" not in litellm.success_callback
|
|
|
|
def test_async_input_callback_routed_to_async_list(self):
|
|
async def my_async_cb(*args, **kwargs):
|
|
pass
|
|
|
|
litellm.logging_callback_manager.add_litellm_input_callback(my_async_cb)
|
|
assert my_async_cb in litellm._async_input_callback
|
|
assert my_async_cb not in litellm.input_callback
|
|
|
|
def test_sync_input_callback_stays_in_sync_list(self):
|
|
def my_sync_cb(*args, **kwargs):
|
|
pass
|
|
|
|
litellm.logging_callback_manager.add_litellm_input_callback(my_sync_cb)
|
|
assert my_sync_cb in litellm.input_callback
|
|
assert my_sync_cb not in litellm._async_input_callback
|
|
|
|
|
|
class TestMetadataNoneHandling:
|
|
"""
|
|
Test that metadata=None in kwargs doesn't cause TypeError.
|
|
|
|
When metadata key exists with value None (e.g., from Azure OpenAI streaming),
|
|
dict.get("metadata", {}) returns None (key exists, so default is ignored).
|
|
The fix uses (kwargs.get("metadata") or {}) which handles both missing key
|
|
and explicit None value.
|
|
|
|
Related: #20871
|
|
"""
|
|
|
|
def test_metadata_none_get_previous_models(self):
|
|
"""kwargs.get("metadata") or {} should return {} when metadata is None."""
|
|
kwargs = {"metadata": None}
|
|
previous_models = (kwargs.get("metadata") or {}).get("previous_models", None)
|
|
assert previous_models is None
|
|
|
|
def test_metadata_none_model_group_check(self):
|
|
"""'model_group' in (kwargs.get("metadata") or {}) should not raise TypeError."""
|
|
kwargs = {"metadata": None}
|
|
_is_litellm_router_call = "model_group" in (kwargs.get("metadata") or {})
|
|
assert _is_litellm_router_call is False
|
|
|
|
def test_metadata_missing_key(self):
|
|
"""Should work when metadata key is completely absent."""
|
|
kwargs = {}
|
|
previous_models = (kwargs.get("metadata") or {}).get("previous_models", None)
|
|
assert previous_models is None
|
|
|
|
def test_metadata_present_with_values(self):
|
|
"""Should work when metadata has actual values."""
|
|
kwargs = {"metadata": {"previous_models": ["model1"], "model_group": "test"}}
|
|
previous_models = (kwargs.get("metadata") or {}).get("previous_models", None)
|
|
assert previous_models == ["model1"]
|
|
_is_litellm_router_call = "model_group" in (kwargs.get("metadata") or {})
|
|
assert _is_litellm_router_call is True
|
|
|
|
def test_metadata_none_causes_error_with_old_pattern(self):
|
|
"""Demonstrate the bug: dict.get('metadata', {}) returns None when key exists with None value."""
|
|
kwargs = {"metadata": None}
|
|
# Old pattern: kwargs.get("metadata", {}) returns None because key exists
|
|
result = kwargs.get("metadata", {})
|
|
assert result is None # This is the root cause of the bug
|
|
|
|
# Attempting to use .get() on None raises AttributeError or TypeError
|
|
with pytest.raises((TypeError, AttributeError)):
|
|
kwargs.get("metadata", {}).get("previous_models", None)
|
|
|
|
# Attempting 'in' on None raises TypeError
|
|
with pytest.raises(TypeError):
|
|
"model_group" in kwargs.get("metadata", {})
|
|
|
|
def test_litellm_params_metadata_none(self):
|
|
"""litellm_params.get("metadata") or {} should handle None value."""
|
|
litellm_params = {"metadata": None}
|
|
metadata = litellm_params.get("metadata") or {}
|
|
assert metadata == {}
|
|
|
|
|
|
class TestValidateAndFixThinkingParam:
|
|
"""Tests for validate_and_fix_thinking_param."""
|
|
|
|
def test_none_returns_none(self):
|
|
from litellm.utils import validate_and_fix_thinking_param
|
|
|
|
assert validate_and_fix_thinking_param(thinking=None) is None
|
|
|
|
def test_already_snake_case(self):
|
|
from litellm.utils import validate_and_fix_thinking_param
|
|
|
|
thinking = {"type": "enabled", "budget_tokens": 32000}
|
|
result = validate_and_fix_thinking_param(thinking=thinking)
|
|
assert result == {"type": "enabled", "budget_tokens": 32000}
|
|
|
|
def test_camel_case_normalized(self):
|
|
from litellm.utils import validate_and_fix_thinking_param
|
|
|
|
thinking = {"type": "enabled", "budgetTokens": 32000}
|
|
result = validate_and_fix_thinking_param(thinking=thinking)
|
|
assert result == {"type": "enabled", "budget_tokens": 32000}
|
|
assert "budgetTokens" not in result
|
|
|
|
def test_both_keys_snake_case_wins(self):
|
|
from litellm.utils import validate_and_fix_thinking_param
|
|
|
|
thinking = {"type": "enabled", "budget_tokens": 10000, "budgetTokens": 50000}
|
|
result = validate_and_fix_thinking_param(thinking=thinking)
|
|
assert result == {"type": "enabled", "budget_tokens": 10000}
|
|
assert "budgetTokens" not in result
|
|
|
|
def test_original_dict_not_mutated(self):
|
|
from litellm.utils import validate_and_fix_thinking_param
|
|
|
|
thinking = {"type": "enabled", "budgetTokens": 32000}
|
|
validate_and_fix_thinking_param(thinking=thinking)
|
|
assert "budgetTokens" in thinking
|
|
assert "budget_tokens" not in thinking
|