litellm/tests/test_litellm/test_utils.py

import json
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from jsonschema import validate

sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path

import litellm
from litellm.proxy.utils import is_valid_api_key
from litellm.types.utils import (
    CallTypes,
    Delta,
    LlmProviders,
    ModelResponseStream,
    StreamingChoices,
)
from litellm.utils import (
    ProviderConfigManager,
    TextCompletionStreamWrapper,
    _check_provider_match,
    _is_streaming_request,
    get_llm_provider,
    get_optional_params_image_gen,
    is_cached_message,
)

# Adds the parent directory to the system path


def test_check_provider_match_azure_ai_allows_openai_and_azure():
    """
    Test that azure_ai provider can match openai and azure models.
    This is needed for Azure Model Router which can route to OpenAI models.
    """
    # azure_ai should match openai models
    assert (
        _check_provider_match(
            model_info={"litellm_provider": "openai"}, custom_llm_provider="azure_ai"
        )
        is True
    )

    # azure_ai should match azure models
    assert (
        _check_provider_match(
            model_info={"litellm_provider": "azure"}, custom_llm_provider="azure_ai"
        )
        is True
    )

    # azure_ai should NOT match other providers
    assert (
        _check_provider_match(
            model_info={"litellm_provider": "anthropic"}, custom_llm_provider="azure_ai"
        )
        is False
    )


def test_check_provider_match_github_allows_upstream_provider_metadata():
    """
    Test that github provider can match upstream provider metadata.
    GitHub Models can provide models from multiple providers.
    """
    assert (
        _check_provider_match(
            model_info={"litellm_provider": "openai"},
            custom_llm_provider="github",
        )
        is True
    )

    assert (
        _check_provider_match(
            model_info={"litellm_provider": "github"},
            custom_llm_provider="github",
        )
        is True
    )

    assert (
        _check_provider_match(
            model_info={"litellm_provider": "anthropic"},
            custom_llm_provider="github",
        )
        is True
    )


def test_supports_function_calling_github_openai_alias():
    assert litellm.utils.supports_function_calling(model="github/gpt-4o-mini") is True
    assert (
        litellm.utils.supports_function_calling(
            model="gpt-4o-mini", custom_llm_provider="github"
        )
        is True
    )


def test_supports_function_calling_github_anthropic_alias():
    assert (
        litellm.utils.supports_function_calling(
            model="github/claude-3-7-sonnet-20250219"
        )
        is True
    )


def test_supports_function_calling_deepinfra_llama():
    """Test that deepinfra Llama models correctly report function calling support.

    Regression test for https://github.com/BerriAI/litellm/issues/22619
    """
    assert (
        litellm.utils.supports_function_calling(
            model="deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo"
        )
        is True
    )


def test_supports_function_calling_unknown_github_alias_returns_false():
    assert (
        litellm.utils.supports_function_calling(
            model="github/non-existent-model-for-capability-check"
        )
        is False
    )


def test_get_optional_params_image_gen():
    from litellm.llms.azure.image_generation import AzureGPTImageGenerationConfig

    provider_config = AzureGPTImageGenerationConfig()
    optional_params = get_optional_params_image_gen(
        model="gpt-image-1",
        response_format="b64_json",
        n=3,
        custom_llm_provider="azure",
        drop_params=True,
        provider_config=provider_config,
    )
    assert optional_params is not None
    assert "response_format" not in optional_params
    assert optional_params["n"] == 3


def test_get_optional_params_image_gen_vertex_ai_size():
    """Test that Vertex AI image generation properly handles size parameter and maps it to aspectRatio"""
    # Test with various size parameters
    test_cases = [
        ("1024x1024", "1:1"),  # Square aspect ratio
        ("256x256", "1:1"),  # Square aspect ratio
        ("512x512", "1:1"),  # Square aspect ratio
        ("1792x1024", "16:9"),  # Landscape aspect ratio
        ("1024x1792", "9:16"),  # Portrait aspect ratio
        ("unsupported", "1:1"),  # Default to square for unsupported sizes
    ]

    for size_input, expected_aspect_ratio in test_cases:
        optional_params = get_optional_params_image_gen(
            model="vertex_ai/imagegeneration@006",
            size=size_input,
            n=2,
            custom_llm_provider="vertex_ai",
            drop_params=True,
        )
        assert optional_params is not None
        assert optional_params["aspectRatio"] == expected_aspect_ratio
        assert optional_params["sampleCount"] == 2
        assert "size" not in optional_params  # size should be converted to aspectRatio

    # Test without size parameter
    optional_params = get_optional_params_image_gen(
        model="vertex_ai/imagegeneration@006",
        n=1,
        custom_llm_provider="vertex_ai",
        drop_params=True,
    )
    assert optional_params is not None
    assert (
        "aspectRatio" not in optional_params
    )  # aspectRatio should not be set if size is not provided
    assert optional_params["sampleCount"] == 1


def test_get_optional_params_image_gen_filters_empty_values():
    optional_params = get_optional_params_image_gen(
        model="gpt-image-1",
        custom_llm_provider="openai",
        extra_body={},
    )
    assert optional_params == {}


def test_all_model_configs():
    from litellm.llms.vertex_ai.vertex_ai_partner_models.ai21.transformation import (
        VertexAIAi21Config,
    )
    from litellm.llms.vertex_ai.vertex_ai_partner_models.llama3.transformation import (
        VertexAILlama3Config,
    )

    assert (
        "max_completion_tokens"
        in VertexAILlama3Config().get_supported_openai_params(model="llama3")
    )
    assert VertexAILlama3Config().map_openai_params(
        {"max_completion_tokens": 10}, {}, "llama3", drop_params=False
    ) == {"max_tokens": 10}

    assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params(
        model="jamba-1.5-mini@001"
    )
    assert VertexAIAi21Config().map_openai_params(
        {"max_completion_tokens": 10}, {}, "jamba-1.5-mini@001", drop_params=False
    ) == {"max_tokens": 10}

    from litellm.llms.fireworks_ai.chat.transformation import FireworksAIConfig

    assert "max_completion_tokens" in FireworksAIConfig().get_supported_openai_params(
        model="llama3"
    )
    assert FireworksAIConfig().map_openai_params(
        model="llama3",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_tokens": 10}

    from litellm.llms.nvidia_nim.chat.transformation import NvidiaNimConfig

    assert "max_completion_tokens" in NvidiaNimConfig().get_supported_openai_params(
        model="llama3"
    )
    assert NvidiaNimConfig().map_openai_params(
        model="llama3",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_tokens": 10}

    from litellm.llms.ollama.chat.transformation import OllamaChatConfig

    assert "max_completion_tokens" in OllamaChatConfig().get_supported_openai_params(
        model="llama3"
    )
    assert OllamaChatConfig().map_openai_params(
        model="llama3",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"num_predict": 10}

    from litellm.llms.predibase.chat.transformation import PredibaseConfig

    assert "max_completion_tokens" in PredibaseConfig().get_supported_openai_params(
        model="llama3"
    )
    assert PredibaseConfig().map_openai_params(
        model="llama3",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_new_tokens": 10}

    from litellm.llms.codestral.completion.transformation import (
        CodestralTextCompletionConfig,
    )

    assert (
        "max_completion_tokens"
        in CodestralTextCompletionConfig().get_supported_openai_params(model="llama3")
    )
    assert CodestralTextCompletionConfig().map_openai_params(
        model="llama3",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_tokens": 10}

    from litellm.llms.volcengine.chat.transformation import (
        VolcEngineChatConfig as VolcEngineConfig,
    )

    assert "max_completion_tokens" in VolcEngineConfig().get_supported_openai_params(
        model="llama3"
    )
    assert VolcEngineConfig().map_openai_params(
        model="llama3",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_tokens": 10}

    from litellm.llms.ai21.chat.transformation import AI21ChatConfig

    assert "max_completion_tokens" in AI21ChatConfig().get_supported_openai_params(
        "jamba-1.5-mini@001"
    )
    assert AI21ChatConfig().map_openai_params(
        model="jamba-1.5-mini@001",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_tokens": 10}

    from litellm.llms.azure.chat.gpt_transformation import AzureOpenAIConfig

    assert "max_completion_tokens" in AzureOpenAIConfig().get_supported_openai_params(
        model="gpt-3.5-turbo"
    )
    assert AzureOpenAIConfig().map_openai_params(
        model="gpt-3.5-turbo",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        api_version="2022-12-01",
        drop_params=False,
    ) == {"max_completion_tokens": 10}

    from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig

    assert (
        "max_completion_tokens"
        in AmazonConverseConfig().get_supported_openai_params(
            model="anthropic.claude-3-sonnet-20240229-v1:0"
        )
    )
    assert AmazonConverseConfig().map_openai_params(
        model="anthropic.claude-3-sonnet-20240229-v1:0",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"maxTokens": 10}

    from litellm.llms.codestral.completion.transformation import (
        CodestralTextCompletionConfig,
    )

    assert (
        "max_completion_tokens"
        in CodestralTextCompletionConfig().get_supported_openai_params(model="llama3")
    )
    assert CodestralTextCompletionConfig().map_openai_params(
        model="llama3",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_tokens": 10}

    from litellm import AmazonAnthropicClaudeConfig, AmazonAnthropicConfig

    assert (
        "max_completion_tokens"
        in AmazonAnthropicClaudeConfig().get_supported_openai_params(
            model="anthropic.claude-3-sonnet-20240229-v1:0"
        )
    )

    assert AmazonAnthropicClaudeConfig().map_openai_params(
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        model="anthropic.claude-3-sonnet-20240229-v1:0",
        drop_params=False,
    ) == {"max_tokens": 10}

    assert (
        "max_completion_tokens"
        in AmazonAnthropicConfig().get_supported_openai_params(model="")
    )

    assert AmazonAnthropicConfig().map_openai_params(
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        model="",
        drop_params=False,
    ) == {"max_tokens_to_sample": 10}

    from litellm.llms.databricks.chat.transformation import DatabricksConfig

    assert "max_completion_tokens" in DatabricksConfig().get_supported_openai_params()

    assert DatabricksConfig().map_openai_params(
        model="databricks/llama-3-70b-instruct",
        drop_params=False,
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
    ) == {"max_tokens": 10}

    from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation import (
        VertexAIAnthropicConfig,
    )

    assert (
        "max_completion_tokens"
        in VertexAIAnthropicConfig().get_supported_openai_params(
            model="claude-sonnet-4-6"
        )
    )

    assert VertexAIAnthropicConfig().map_openai_params(
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        model="claude-sonnet-4-6",
        drop_params=False,
    ) == {"max_tokens": 10}

    from litellm.llms.gemini.chat.transformation import GoogleAIStudioGeminiConfig
    from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
        VertexGeminiConfig,
    )

    assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params(
        model="gemini-1.0-pro"
    )

    assert VertexGeminiConfig().map_openai_params(
        model="gemini-1.0-pro",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_output_tokens": 10}

    assert (
        "max_completion_tokens"
        in GoogleAIStudioGeminiConfig().get_supported_openai_params(
            model="gemini-1.0-pro"
        )
    )

    assert GoogleAIStudioGeminiConfig().map_openai_params(
        model="gemini-1.0-pro",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_output_tokens": 10}

    assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params(
        model="gemini-1.0-pro"
    )

    assert VertexGeminiConfig().map_openai_params(
        model="gemini-1.0-pro",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_output_tokens": 10}


def test_anthropic_web_search_in_model_info():
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")

    supported_models = [
        "anthropic/claude-4-sonnet-20250514",
        "anthropic/claude-sonnet-4-5-20250929",
    ]
    for model in supported_models:
        from litellm.utils import get_model_info

        model_info = get_model_info(model)
        assert model_info is not None
        assert (
            model_info["supports_web_search"] is True
        ), f"Model {model} should support web search"
        assert (
            model_info["search_context_cost_per_query"] is not None
        ), f"Model {model} should have a search context cost per query"


def test_cohere_embedding_optional_params():
    from litellm import get_optional_params_embeddings

    optional_params = get_optional_params_embeddings(
        model="embed-v4.0",
        custom_llm_provider="cohere",
        input="Hello, world!",
        input_type="search_query",
        dimensions=512,
    )
    assert optional_params is not None


def validate_model_cost_values(model_data, exceptions=None):
    """
    Validates that cost values in model data do not exceed 1.

    Args:
        model_data (dict): The model data dictionary
        exceptions (list, optional): List of model IDs that are allowed to have costs > 1

    Returns:
        tuple: (is_valid, violations) where is_valid is a boolean and violations is a list of error messages
    """
    if exceptions is None:
        exceptions = []

    violations = []

    # Define all cost-related fields to check
    cost_fields = [
        "input_cost_per_token",
        "output_cost_per_token",
        "input_cost_per_character",
        "output_cost_per_character",
        "input_cost_per_image",
        "output_cost_per_image",
        "input_cost_per_pixel",
        "output_cost_per_pixel",
        "input_cost_per_second",
        "output_cost_per_second",
        "output_cost_per_second_1080p",
        "input_cost_per_query",
        "input_cost_per_request",
        "input_cost_per_audio_token",
        "output_cost_per_audio_token",
        "output_cost_per_image_token",
        "output_cost_per_image_token_batches",
        "input_cost_per_audio_per_second",
        "input_cost_per_video_per_second",
        "input_cost_per_token_above_128k_tokens",
        "output_cost_per_token_above_128k_tokens",
        "input_cost_per_token_above_200k_tokens",
        "output_cost_per_token_above_200k_tokens",
        "input_cost_per_token_above_272k_tokens",
        "output_cost_per_token_above_272k_tokens",
        "input_cost_per_character_above_128k_tokens",
        "output_cost_per_character_above_128k_tokens",
        "input_cost_per_image_above_128k_tokens",
        "input_cost_per_video_per_second_above_8s_interval",
        "input_cost_per_video_per_second_above_15s_interval",
        "input_cost_per_video_per_second_above_128k_tokens",
        "input_cost_per_token_batch_requests",
        "input_cost_per_token_batches",
        "output_cost_per_token_batches",
        "input_cost_per_token_cache_hit",
        "cache_creation_input_token_cost",
        "cache_creation_input_audio_token_cost",
        "cache_read_input_token_cost",
        "cache_read_input_audio_token_cost",
        "input_dbu_cost_per_token",
        "output_db_cost_per_token",
        "output_dbu_cost_per_token",
        "output_cost_per_reasoning_token",
        "citation_cost_per_token",
    ]

    # Also check nested cost fields
    nested_cost_fields = [
        "search_context_cost_per_query",
    ]

    for model_id, model_info in model_data.items():
        # Skip if this model is in exceptions
        if model_id in exceptions:
            continue

        # Check direct cost fields
        for field in cost_fields:
            if field in model_info and model_info[field] is not None:
                cost_value = model_info[field]

                # Convert string values to float if needed
                if isinstance(cost_value, str):
                    try:
                        cost_value = float(cost_value)
                    except (ValueError, TypeError):
                        # Skip if we can't convert to float
                        continue

                if isinstance(cost_value, (int, float)) and cost_value > 1:
                    violations.append(
                        f"Model '{model_id}' has {field} = {cost_value} which exceeds 1"
                    )

        # Check nested cost fields
        for field in nested_cost_fields:
            if field in model_info and model_info[field] is not None:
                nested_costs = model_info[field]
                if isinstance(nested_costs, dict):
                    for nested_field, nested_value in nested_costs.items():
                        # Convert string values to float if needed
                        if isinstance(nested_value, str):
                            try:
                                nested_value = float(nested_value)
                            except (ValueError, TypeError):
                                # Skip if we can't convert to float
                                continue

                        if isinstance(nested_value, (int, float)) and nested_value > 1:
                            violations.append(
                                f"Model '{model_id}' has {field}.{nested_field} = {nested_value} which exceeds 1"
                            )

    return len(violations) == 0, violations


def test_aaamodel_prices_and_context_window_json_is_valid():
    """
    Validates the `model_prices_and_context_window.json` file.

    If this test fails after you update the json, you need to update the schema or correct the change you made.
    """

    INTENDED_SCHEMA = {
        "type": "object",
        "additionalProperties": {
            "type": "object",
            "properties": {
                "supports_computer_use": {"type": "boolean"},
                "cache_creation_input_audio_token_cost": {"type": "number"},
                "cache_creation_input_token_cost": {"type": "number"},
                "cache_creation_input_token_cost_above_1hr": {"type": "number"},
                "cache_creation_input_token_cost_above_200k_tokens": {"type": "number"},
                "cache_read_input_token_cost": {"type": "number"},
                "cache_read_input_token_cost_above_200k_tokens": {"type": "number"},
                "cache_read_input_token_cost_above_272k_tokens": {"type": "number"},
                "cache_read_input_token_cost_batches": {"type": "number"},
                "cache_creation_input_token_cost_above_1hr_above_200k_tokens": {
                    "type": "number"
                },
                "cache_read_input_audio_token_cost": {"type": "number"},
                "cache_read_input_token_cost_per_audio_token": {"type": "number"},
                "cache_read_input_image_token_cost": {"type": "number"},
                "deprecation_date": {"type": "string"},
                "input_cost_per_audio_per_second": {"type": "number"},
                "input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
                "input_cost_per_audio_token": {"type": "number"},
                "input_cost_per_image_token": {"type": "number"},
                "input_cost_per_character": {"type": "number"},
                "input_cost_per_character_above_128k_tokens": {"type": "number"},
                "input_cost_per_image": {"type": "number"},
                "input_cost_per_image_above_128k_tokens": {"type": "number"},
                "input_cost_per_image_token": {"type": "number"},
                "input_cost_per_token_above_200k_tokens": {"type": "number"},
                "input_cost_per_token_above_256k_tokens": {"type": "number"},
                "input_cost_per_token_above_272k_tokens": {"type": "number"},
                "cache_read_input_token_cost_flex": {"type": "number"},
                "cache_read_input_token_cost_priority": {"type": "number"},
                "cache_read_input_token_cost_above_200k_tokens_priority": {
                    "type": "number"
                },
                "cache_read_input_token_cost_above_272k_tokens_priority": {
                    "type": "number"
                },
                "input_cost_per_token_flex": {"type": "number"},
                "input_cost_per_token_priority": {"type": "number"},
                "input_cost_per_token_above_200k_tokens_priority": {"type": "number"},
                "input_cost_per_token_above_272k_tokens_priority": {"type": "number"},
                "input_cost_per_audio_token_priority": {"type": "number"},
                "output_cost_per_token_flex": {"type": "number"},
                "output_cost_per_token_priority": {"type": "number"},
                "output_cost_per_token_above_200k_tokens_priority": {"type": "number"},
                "output_cost_per_token_above_272k_tokens_priority": {"type": "number"},
                "input_cost_per_pixel": {"type": "number"},
                "input_cost_per_query": {"type": "number"},
                "input_cost_per_request": {"type": "number"},
                "input_cost_per_second": {"type": "number"},
                "input_cost_per_token": {"type": "number"},
                "input_cost_per_token_above_128k_tokens": {"type": "number"},
                "input_cost_per_token_batch_requests": {"type": "number"},
                "input_cost_per_token_batches": {"type": "number"},
                "input_cost_per_token_cache_hit": {"type": "number"},
                "input_cost_per_video_per_second": {"type": "number"},
                "input_cost_per_video_per_second_above_8s_interval": {"type": "number"},
                "input_cost_per_video_per_second_above_15s_interval": {
                    "type": "number"
                },
                "input_cost_per_video_per_second_above_128k_tokens": {"type": "number"},
                "input_dbu_cost_per_token": {"type": "number"},
                "annotation_cost_per_page": {"type": "number"},
                "ocr_cost_per_page": {"type": "number"},
                "code_interpreter_cost_per_session": {"type": "number"},
                "inference_geo": {"type": "string"},
                "litellm_provider": {"type": "string"},
                "max_audio_length_hours": {"type": "number"},
                "max_audio_per_prompt": {"type": "number"},
                "max_document_chunks_per_query": {"type": "number"},
                "max_images_per_prompt": {"type": "number"},
                "max_input_tokens": {"type": "number"},
                "max_output_tokens": {"type": "number"},
                "max_pdf_size_mb": {"type": "number"},
                "max_query_tokens": {"type": "number"},
                "max_tokens": {"type": "number"},
                "max_tokens_per_document_chunk": {"type": "number"},
                "max_video_length": {"type": "number"},
                "max_videos_per_prompt": {"type": "number"},
                "metadata": {"type": "object"},
                "provider_specific_entry": {"type": "object"},
                "mode": {
                    "type": "string",
                    "enum": [
                        "audio_speech",
                        "audio_transcription",
                        "chat",
                        "completion",
                        "container",
                        "image_edit",
                        "embedding",
                        "image_generation",
                        "video_generation",
                        "moderation",
                        "rerank",
                        "realtime",
                        "responses",
                        "ocr",
                        "search",
                        "vector_store",
                    ],
                },
                "output_cost_per_audio_token": {"type": "number"},
                "output_cost_per_character": {"type": "number"},
                "output_cost_per_character_above_128k_tokens": {"type": "number"},
                "output_cost_per_image": {"type": "number"},
                "output_cost_per_image_token": {"type": "number"},
                "output_cost_per_image_token_batches": {"type": "number"},
                "output_cost_per_pixel": {"type": "number"},
                "output_cost_per_second": {"type": "number"},
                "output_cost_per_second_1080p": {"type": "number"},
                "output_cost_per_token": {"type": "number"},
                "output_cost_per_token_above_128k_tokens": {"type": "number"},
                "output_cost_per_token_above_200k_tokens": {"type": "number"},
                "output_cost_per_token_above_256k_tokens": {"type": "number"},
                "output_cost_per_token_above_272k_tokens": {"type": "number"},
                "output_cost_per_image_above_1024_and_1024_pixels": {"type": "number"},
                "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": {
                    "type": "number"
                },
                "output_cost_per_image_above_512_and_512_pixels": {"type": "number"},
                "output_cost_per_image_above_512_and_512_pixels_and_premium_image": {
                    "type": "number"
                },
                "output_cost_per_image_premium_image": {"type": "number"},
                "output_cost_per_token_batches": {"type": "number"},
                "output_cost_per_reasoning_token": {"type": "number"},
                "output_cost_per_video_per_second": {"type": "number"},
                "output_db_cost_per_token": {"type": "number"},
                "output_dbu_cost_per_token": {"type": "number"},
                "output_vector_size": {"type": "number"},
                "rpd": {"type": "number"},
                "rpm": {"type": "number"},
                "source": {"type": "string"},
                "comment": {"type": "string"},
                "supports_assistant_prefill": {"type": "boolean"},
                "supports_audio_input": {"type": "boolean"},
                "supports_audio_output": {"type": "boolean"},
                "supports_embedding_image_input": {"type": "boolean"},
                "supports_code_execution": {"type": "boolean"},
                "supports_file_search": {"type": "boolean"},
                "supports_function_calling": {"type": "boolean"},
                "supports_image_input": {"type": "boolean"},
                "supports_nova_canvas_image_edit": {"type": "boolean"},
                "supports_parallel_function_calling": {"type": "boolean"},
                "supports_pdf_input": {"type": "boolean"},
                "supports_prompt_caching": {"type": "boolean"},
                "supports_response_schema": {"type": "boolean"},
                "supports_system_messages": {"type": "boolean"},
                "supports_tool_choice": {"type": "boolean"},
                "supports_video_input": {"type": "boolean"},
                "supports_vision": {"type": "boolean"},
                "supports_web_search": {"type": "boolean"},
                "supports_url_context": {"type": "boolean"},
                "supports_multimodal": {"type": "boolean"},
                "uses_embed_content": {"type": "boolean"},
                "supports_reasoning": {"type": "boolean"},
                "supports_minimal_reasoning_effort": {"type": "boolean"},
                "supports_none_reasoning_effort": {"type": "boolean"},
                "supports_xhigh_reasoning_effort": {"type": "boolean"},
                "supports_max_reasoning_effort": {"type": "boolean"},
                "supports_service_tier": {"type": "boolean"},
                "supports_preset": {"type": "boolean"},
                "tool_use_system_prompt_tokens": {"type": "number"},
                "tpm": {"type": "number"},
                "provider_specific_entry": {"type": "object"},
                "supported_endpoints": {
                    "type": "array",
                    "items": {
                        "type": "string",
                        "enum": [
                            "/v1/responses",
                            "/v1/embeddings",
                            "/v1/chat/completions",
                            "/v1/completions",
                            "/v1/images/generations",
                            "/v1/realtime",
                            "/v1/images/variations",
                            "/v1/images/edits",
                            "/v1/batch",
                            "/v1/audio/transcriptions",
                            "/v1/audio/speech",
                            "/v1/ocr",
                            "/vertex_ai/live",
                        ],
                    },
                },
                "supported_regions": {
                    "type": "array",
                    "items": {
                        "type": "string",
                    },
                },
                "search_context_cost_per_query": {
                    "type": "object",
                    "properties": {
                        "search_context_size_low": {"type": "number"},
                        "search_context_size_medium": {"type": "number"},
                        "search_context_size_high": {"type": "number"},
                    },
                    "additionalProperties": False,
                },
                "citation_cost_per_token": {"type": "number"},
                "supported_modalities": {
                    "type": "array",
                    "items": {
                        "type": "string",
                        "enum": ["text", "audio", "image", "video"],
                    },
                },
                "supported_output_modalities": {
                    "type": "array",
                    "items": {
                        "type": "string",
                        "enum": ["text", "image", "audio", "code", "video"],
                    },
                },
                "supported_resolutions": {
                    "type": "array",
                    "items": {
                        "type": "string",
                    },
                },
                "supports_native_streaming": {"type": "boolean"},
                "supports_native_structured_output": {"type": "boolean"},
                "tiered_pricing": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "range": {
                                "type": "array",
                                "items": {"type": "number"},
                                "minItems": 2,
                                "maxItems": 2,
                            },
                            "input_cost_per_token": {"type": "number"},
                            "output_cost_per_token": {"type": "number"},
                            "cache_read_input_token_cost": {"type": "number"},
                            "output_cost_per_reasoning_token": {"type": "number"},
                            "max_results_range": {
                                "type": "array",
                                "items": {"type": "number"},
                                "minItems": 2,
                                "maxItems": 2,
                            },
                            "input_cost_per_query": {"type": "number"},
                        },
                        "additionalProperties": False,
                    },
                },
            },
            "additionalProperties": False,
        },
    }

    prod_json = os.path.join(
        os.path.dirname(__file__), "..", "..", "model_prices_and_context_window.json"
    )
    with open(prod_json, "r") as model_prices_file:
        actual_json = json.load(model_prices_file)
    assert isinstance(actual_json, dict)
    actual_json.pop(
        "sample_spec", None
    )  # remove the sample, whose schema is inconsistent with the real data

    # Validate schema
    validate(actual_json, INTENDED_SCHEMA)

    # Validate cost values
    # Define exceptions for models that are allowed to have costs > 1
    # Add model IDs here if they legitimately have costs > 1
    exceptions = [
        # Add any model IDs that should be exempt from the cost validation
        # Example: "expensive-model-id",
    ]

    is_valid, violations = validate_model_cost_values(actual_json, exceptions)

    if not is_valid:
        error_message = "Cost validation failed:\n" + "\n".join(violations)
        error_message += "\n\nTo add exceptions, add the model ID to the 'exceptions' list in the test function."
        raise AssertionError(error_message)


def test_max_tokens_consistency():
    """
    Test that max_tokens == max_output_tokens for all models.

    According to the spec in model_prices_and_context_window.json:
    - max_tokens is a LEGACY parameter
    - It should be set to max_output_tokens if the provider specifies it

    This test ensures consistency across all model definitions.
    """
    import json
    from pathlib import Path

    # Load the model configuration
    config_path = (
        Path(__file__).parent.parent.parent / "model_prices_and_context_window.json"
    )
    with open(config_path, "r") as f:
        models = json.load(f)

    inconsistencies = []

    for model_name, config in models.items():
        # Skip the sample_spec
        if model_name == "sample_spec":
            continue

        # Check if both max_tokens and max_output_tokens exist
        if isinstance(config, dict):
            max_tokens = config.get("max_tokens")
            max_output_tokens = config.get("max_output_tokens")

            # Only validate if both exist
            if max_tokens is not None and max_output_tokens is not None:
                if max_tokens != max_output_tokens:
                    inconsistencies.append(
                        {
                            "model": model_name,
                            "max_tokens": max_tokens,
                            "max_output_tokens": max_output_tokens,
                        }
                    )

    if inconsistencies:
        error_msg = f"\n\n❌ Found {len(inconsistencies)} models with max_tokens != max_output_tokens:\n\n"
        for item in inconsistencies[:10]:  # Show first 10
            error_msg += f"  {item['model']}: max_tokens={item['max_tokens']}, max_output_tokens={item['max_output_tokens']}\n"

        if len(inconsistencies) > 10:
            error_msg += f"\n  ... and {len(inconsistencies) - 10} more\n"

        error_msg += "\nTo fix these inconsistencies, run: poetry run python fix_max_tokens_inconsistencies.py"
        raise AssertionError(error_msg)


def test_get_model_info_gemini():
    """
    Tests if ALL gemini models have 'tpm' and 'rpm' in the model info
    """
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")

    model_map = litellm.model_cost
    for model, info in model_map.items():
        if (
            model.startswith("gemini/")
            and not "gemma" in model
            and not "learnlm" in model
            and not "imagen" in model
            and not "veo" in model
            and not "lyria" in model
            and not "robotics" in model
        ):
            assert info.get("tpm") is not None, f"{model} does not have tpm"
            assert info.get("rpm") is not None, f"{model} does not have rpm"


def test_openai_models_in_model_info():
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")

    model_map = litellm.model_cost
    violated_models = []
    for model, info in model_map.items():
        if (
            info.get("litellm_provider") == "openai"
            and info.get("supports_vision") is True
        ):
            if info.get("supports_pdf_input") is not True:
                violated_models.append(model)
    assert (
        len(violated_models) == 0
    ), f"The following models should support pdf input: {violated_models}"


def test_supports_tool_choice_simple_tests():
    """
    simple sanity checks
    """
    assert litellm.utils.supports_tool_choice(model="gpt-4o") == True
    assert (
        litellm.utils.supports_tool_choice(
            model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0"
        )
        == True
    )
    assert (
        litellm.utils.supports_tool_choice(
            model="anthropic.claude-3-sonnet-20240229-v1:0"
        )
        is True
    )

    assert (
        litellm.utils.supports_tool_choice(
            model="anthropic.claude-3-sonnet-20240229-v1:0",
            custom_llm_provider="bedrock_converse",
        )
        is True
    )

    assert (
        litellm.utils.supports_tool_choice(model="us.amazon.nova-micro-v1:0") is False
    )
    assert (
        litellm.utils.supports_tool_choice(model="bedrock/us.amazon.nova-micro-v1:0")
        is False
    )
    assert (
        litellm.utils.supports_tool_choice(
            model="us.amazon.nova-micro-v1:0", custom_llm_provider="bedrock_converse"
        )
        is False
    )

    assert litellm.utils.supports_tool_choice(model="perplexity/sonar") is False


def test_check_provider_match():
    """
    Test the _check_provider_match function for various provider scenarios
    """
    # Test bedrock and bedrock_converse cases
    model_info = {"litellm_provider": "bedrock"}
    assert litellm.utils._check_provider_match(model_info, "bedrock") is True
    assert litellm.utils._check_provider_match(model_info, "bedrock_converse") is True

    # Test bedrock_converse provider
    model_info = {"litellm_provider": "bedrock_converse"}
    assert litellm.utils._check_provider_match(model_info, "bedrock") is True
    assert litellm.utils._check_provider_match(model_info, "bedrock_converse") is True

    # Test non-matching provider
    model_info = {"litellm_provider": "bedrock"}
    assert litellm.utils._check_provider_match(model_info, "openai") is False


def test_get_provider_rerank_config():
    """
    Test the get_provider_rerank_config function for various providers
    """
    from litellm import HostedVLLMRerankConfig
    from litellm.utils import LlmProviders, ProviderConfigManager

    # Test for hosted_vllm provider
    config = ProviderConfigManager.get_provider_rerank_config(
        "my_model", LlmProviders.HOSTED_VLLM, "http://localhost", []
    )
    assert isinstance(config, HostedVLLMRerankConfig)


# Models that should be skipped during testing
OLD_PROVIDERS = ["aleph_alpha", "palm"]
SKIP_MODELS = [
    "azure/mistral",
    "azure/command-r",
    "jamba",
    "deepinfra",
    "mistral.",
]

# Bedrock models to block - organized by type
BEDROCK_REGIONS = ["ap-northeast-1", "eu-central-1", "us-east-1", "us-west-2"]
BEDROCK_COMMITMENTS = ["1-month-commitment", "6-month-commitment"]
BEDROCK_MODELS = {
    "anthropic.claude-v1",
    "anthropic.claude-v2",
    "anthropic.claude-v2:1",
    "anthropic.claude-instant-v1",
}

# Generate block_list dynamically
block_list = set()
for region in BEDROCK_REGIONS:
    for commitment in BEDROCK_COMMITMENTS:
        for model in BEDROCK_MODELS:
            block_list.add(f"bedrock/{region}/{commitment}/{model}")
            block_list.add(f"bedrock/{region}/{model}")

# Add Cohere models
for commitment in BEDROCK_COMMITMENTS:
    block_list.add(f"bedrock/*/{commitment}/cohere.command-text-v14")
    block_list.add(f"bedrock/*/{commitment}/cohere.command-light-text-v14")

print("block_list", block_list)


def test_supports_computer_use_utility():
    """
    Tests the litellm.utils.supports_computer_use utility function.
    """
    from litellm.utils import supports_computer_use

    # Ensure LITELLM_LOCAL_MODEL_COST_MAP is set for consistent test behavior,
    # as supports_computer_use relies on get_model_info.
    # This also requires litellm.model_cost to be populated.
    original_env_var = os.getenv("LITELLM_LOCAL_MODEL_COST_MAP")
    original_model_cost = getattr(litellm, "model_cost", None)

    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")  # Load with local/backup

    try:
        # Test a model known to support computer_use from backup JSON
        supports_cu_anthropic = supports_computer_use(
            model="anthropic/claude-4-sonnet-20250514"
        )
        assert supports_cu_anthropic is True

        # Test a model known not to have the flag or set to false (defaults to False via get_model_info)
        supports_cu_gpt = supports_computer_use(model="gpt-3.5-turbo")
        assert supports_cu_gpt is False
    finally:
        # Restore original environment and model_cost to avoid side effects
        if original_env_var is None:
            del os.environ["LITELLM_LOCAL_MODEL_COST_MAP"]
        else:
            os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = original_env_var

        if original_model_cost is not None:
            litellm.model_cost = original_model_cost
        elif hasattr(litellm, "model_cost"):
            delattr(litellm, "model_cost")


def test_get_model_info_shows_supports_computer_use():
    """
    Tests if 'supports_computer_use' is correctly retrieved by get_model_info.
    We'll use 'claude-4-sonnet-20250514' as it's configured
    in the backup JSON to have supports_computer_use: True.
    """
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    # Ensure litellm.model_cost is loaded, relying on the backup mechanism if primary fails
    # as per previous debugging.
    litellm.model_cost = litellm.get_model_cost_map(url="")

    # This model should have 'supports_computer_use': True in the backup JSON
    model_known_to_support_computer_use = "claude-4-sonnet-20250514"
    info = litellm.get_model_info(model_known_to_support_computer_use)
    print(f"Info for {model_known_to_support_computer_use}: {info}")

    # After the fix in utils.py, this should now be present and True
    assert info.get("supports_computer_use") is True

    # Optionally, test a model known NOT to support it, or where it's undefined (should default to False)
    # For example, if "gpt-3.5-turbo" doesn't have it defined, it should be False.
    model_known_not_to_support_computer_use = "gpt-3.5-turbo"
    info_gpt = litellm.get_model_info(model_known_not_to_support_computer_use)
    print(f"Info for {model_known_not_to_support_computer_use}: {info_gpt}")
    assert (
        info_gpt.get("supports_computer_use") is None
    )  # Expecting None due to the default in ModelInfoBase


@pytest.mark.parametrize(
    "model, custom_llm_provider",
    [
        ("gpt-3.5-turbo", "openai"),
        ("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"),
        ("gemini-2.5-pro", "vertex_ai"),
    ],
)
def test_pre_process_non_default_params(model, custom_llm_provider):
    from pydantic import BaseModel

    from litellm.utils import ProviderConfigManager, pre_process_non_default_params

    provider_config = ProviderConfigManager.get_provider_chat_config(
        model=model, provider=LlmProviders(custom_llm_provider)
    )

    class ResponseFormat(BaseModel):
        x: str
        y: str

    passed_params = {
        "model": "gpt-3.5-turbo",
        "response_format": ResponseFormat,
    }
    special_params = {}
    processed_non_default_params = pre_process_non_default_params(
        model=model,
        passed_params=passed_params,
        special_params=special_params,
        custom_llm_provider=custom_llm_provider,
        additional_drop_params=None,
        provider_config=provider_config,
    )
    print(processed_non_default_params)
    # Vertex AI / Gemini uses Pydantic's model_json_schema() which doesn't
    # include additionalProperties: False (Gemini rejects it).  Other
    # providers use OpenAI's to_strict_json_schema() which does.
    expected_schema = {
        "properties": {
            "x": {"title": "X", "type": "string"},
            "y": {"title": "Y", "type": "string"},
        },
        "required": ["x", "y"],
        "title": "ResponseFormat",
        "type": "object",
    }
    if custom_llm_provider not in ("vertex_ai", "vertex_ai_beta", "gemini"):
        expected_schema["additionalProperties"] = False
    assert processed_non_default_params == {
        "response_format": {
            "type": "json_schema",
            "json_schema": {
                "schema": expected_schema,
                "name": "ResponseFormat",
                "strict": True,
            },
        }
    }


from litellm.utils import supports_function_calling


class TestProxyFunctionCalling:
    """Test class for proxy function calling capabilities."""

    @pytest.fixture(autouse=True)
    def reset_mock_cache(self):
        """Reset model cache before each test."""
        from litellm.utils import _model_cache

        _model_cache.flush_cache()

    @pytest.mark.parametrize(
        "direct_model,proxy_model,expected_result",
        [
            # OpenAI models
            ("gpt-3.5-turbo", "litellm_proxy/gpt-3.5-turbo", True),
            ("gpt-4", "litellm_proxy/gpt-4", True),
            ("gpt-4o", "litellm_proxy/gpt-4o", True),
            ("gpt-4o-mini", "litellm_proxy/gpt-4o-mini", True),
            ("gpt-4-turbo", "litellm_proxy/gpt-4-turbo", True),
            ("gpt-4-1106-preview", "litellm_proxy/gpt-4-1106-preview", True),
            # Azure OpenAI models
            ("azure/gpt-4", "litellm_proxy/azure/gpt-4", True),
            ("azure/gpt-3.5-turbo", "litellm_proxy/azure/gpt-3.5-turbo", True),
            (
                "azure/gpt-4-1106-preview",
                "litellm_proxy/azure/gpt-4-1106-preview",
                True,
            ),
            # Anthropic models (Claude supports function calling)
            (
                "claude-sonnet-4-6",
                "litellm_proxy/claude-sonnet-4-6",
                True,
            ),
            # Google models
            ("gemini-2.5-pro", "litellm_proxy/gemini-2.5-pro", True),
            ("gemini/gemini-2.5-pro", "litellm_proxy/gemini/gemini-2.5-pro", True),
            ("gemini/gemini-2.5-flash", "litellm_proxy/gemini/gemini-2.5-flash", True),
            # Groq models (mixed support)
            ("groq/gemma-7b-it", "litellm_proxy/groq/gemma-7b-it", True),
            (
                "groq/llama-3.3-70b-versatile",
                "litellm_proxy/groq/llama-3.3-70b-versatile",
                True,
            ),
            # Cohere models (generally don't support function calling)
            ("command-nightly", "litellm_proxy/command-nightly", False),
        ],
    )
    def test_proxy_function_calling_support_consistency(
        self, direct_model, proxy_model, expected_result
    ):
        """Test that proxy models have the same function calling support as their direct counterparts."""
        direct_result = supports_function_calling(direct_model)
        proxy_result = supports_function_calling(proxy_model)

        # Both should match the expected result
        assert (
            direct_result == expected_result
        ), f"Direct model {direct_model} should return {expected_result}"
        assert (
            proxy_result == expected_result
        ), f"Proxy model {proxy_model} should return {expected_result}"

        # Direct and proxy should be consistent
        assert (
            direct_result == proxy_result
        ), f"Mismatch: {direct_model}={direct_result} vs {proxy_model}={proxy_result}"

    @pytest.mark.parametrize(
        "proxy_model_name,underlying_model,expected_proxy_result",
        [
            # Custom model names that cannot be resolved without proxy configuration context
            # These will return False because LiteLLM cannot determine the underlying model
            (
                "litellm_proxy/bedrock-claude-3-haiku",
                "bedrock/anthropic.claude-3-haiku-20240307-v1:0",
                False,
            ),
            (
                "litellm_proxy/bedrock-claude-3-sonnet",
                "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
            ),
            (
                "litellm_proxy/bedrock-claude-3-opus",
                "bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
            ),
            (
                "litellm_proxy/bedrock-claude-instant",
                "bedrock/anthropic.claude-instant-v1",
                False,
            ),
            (
                "litellm_proxy/bedrock-titan-text",
                "bedrock/amazon.titan-text-express-v1",
                False,
            ),
            # Azure with custom deployment names (cannot be resolved)
            ("litellm_proxy/my-gpt4-deployment", "azure/gpt-4", False),
            ("litellm_proxy/production-gpt35", "azure/gpt-3.5-turbo", False),
            ("litellm_proxy/dev-gpt4o", "azure/gpt-4o", False),
            # Custom OpenAI deployments (cannot be resolved)
            ("litellm_proxy/company-gpt4", "gpt-4", False),
            ("litellm_proxy/internal-gpt35", "gpt-3.5-turbo", False),
            # Vertex AI with custom names (cannot be resolved)
            ("litellm_proxy/vertex-gemini-pro", "vertex_ai/gemini-1.5-pro", False),
            ("litellm_proxy/vertex-gemini-flash", "vertex_ai/gemini-1.5-flash", False),
            # Anthropic with custom names (cannot be resolved)
            ("litellm_proxy/claude-prod", "anthropic/claude-3-sonnet-20240229", False),
            ("litellm_proxy/claude-dev", "anthropic/claude-3-haiku-20240307", False),
            # Groq with custom names (cannot be resolved)
            ("litellm_proxy/fast-llama", "groq/llama-3.1-8b-instant", False),
            ("litellm_proxy/groq-gemma", "groq/gemma-7b-it", False),
            # Cohere with custom names (cannot be resolved)
            ("litellm_proxy/cohere-command", "cohere/command-r", False),
            ("litellm_proxy/cohere-command-plus", "cohere/command-r-plus", False),
            # Together AI with custom names (cannot be resolved)
            (
                "litellm_proxy/together-llama",
                "together_ai/meta-llama/Llama-2-70b-chat-hf",
                False,
            ),
            (
                "litellm_proxy/together-mistral",
                "together_ai/mistralai/Mistral-7B-Instruct-v0.1",
                False,
            ),
            # Ollama with custom names (cannot be resolved)
            ("litellm_proxy/local-llama", "ollama/llama2", False),
            ("litellm_proxy/local-mistral", "ollama/mistral", False),
        ],
    )
    def test_proxy_custom_model_names_without_config(
        self, proxy_model_name, underlying_model, expected_proxy_result
    ):
        """
        Test proxy models with custom model names that differ from underlying models.

        Without proxy configuration context, LiteLLM cannot resolve custom model names
        to their underlying models, so these will return False.
        This demonstrates the limitation and documents the expected behavior.
        """
        # Test the underlying model directly first to establish what it SHOULD return
        try:
            underlying_result = supports_function_calling(underlying_model)
            print(
                f"Underlying model {underlying_model} supports function calling: {underlying_result}"
            )
        except Exception as e:
            print(f"Warning: Could not test underlying model {underlying_model}: {e}")

        # Test the proxy model - this will return False due to lack of configuration context
        proxy_result = supports_function_calling(proxy_model_name)
        assert (
            proxy_result == expected_proxy_result
        ), f"Proxy model {proxy_model_name} should return {expected_proxy_result} (without config context)"

    def test_proxy_model_resolution_with_custom_names_documentation(self):
        """
        Document the behavior and limitation for custom proxy model names.

        This test demonstrates:
        1. The current limitation with custom model names
        2. How the proxy server would handle this in production
        3. The expected behavior for both scenarios
        """
        # Case 1: Custom model name that cannot be resolved
        custom_model = "litellm_proxy/my-custom-claude"
        result = supports_function_calling(custom_model)
        assert (
            result is False
        ), "Custom model names return False without proxy config context"

        # Case 2: Model name that can be resolved (matches pattern)
        resolvable_model = "litellm_proxy/claude-sonnet-4-5-20250929"
        result = supports_function_calling(resolvable_model)
        assert result is True, "Resolvable model names work with fallback logic"

        # Documentation notes:
        print(
            """
        PROXY MODEL RESOLUTION BEHAVIOR:

        ✅ WORKS (with current fallback logic):
           - litellm_proxy/gpt-4
           - litellm_proxy/claude-sonnet-4-5-20250929
           - litellm_proxy/anthropic/claude-3-haiku-20240307

        ❌ DOESN'T WORK (requires proxy server config):
           - litellm_proxy/my-custom-gpt4
           - litellm_proxy/bedrock-claude-3-haiku
           - litellm_proxy/production-model

        💡 SOLUTION: Use LiteLLM proxy server with proper model_list configuration
           that maps custom names to underlying models.
        """
        )

    @pytest.mark.parametrize(
        "proxy_model_with_hints,expected_result",
        [
            # These are proxy models where we can infer the underlying model from the name
            ("litellm_proxy/gpt-4-with-functions", True),  # Hints at GPT-4
            ("litellm_proxy/claude-3-haiku-prod", True),  # Hints at Claude 3 Haiku
            (
                "litellm_proxy/bedrock-anthropic-claude-3-sonnet",
                True,
            ),  # Hints at Bedrock Claude 3 Sonnet
        ],
    )
    def test_proxy_models_with_naming_hints(
        self, proxy_model_with_hints, expected_result
    ):
        """
        Test proxy models with names that provide hints about the underlying model.

        Note: These will currently fail because the hint-based resolution isn't implemented yet,
        but they demonstrate what could be possible with enhanced model name inference.
        """
        # This test documents potential future enhancement
        proxy_result = supports_function_calling(proxy_model_with_hints)

        # Currently these will return False, but we document the expected behavior
        # In the future, we could implement smarter model name inference
        print(
            f"Model {proxy_model_with_hints}: current={proxy_result}, desired={expected_result}"
        )

        # For now, we expect False (current behavior), but document the limitation
        assert (
            proxy_result is False
        ), f"Current limitation: {proxy_model_with_hints} returns False without inference"

    @pytest.mark.parametrize(
        "proxy_model,expected_result",
        [
            # Test specific proxy models that should support function calling
            ("litellm_proxy/gpt-3.5-turbo", True),
            ("litellm_proxy/gpt-4", True),
            ("litellm_proxy/gpt-4o", True),
            ("litellm_proxy/claude-sonnet-4-6", True),
            ("litellm_proxy/gemini/gemini-2.5-pro", True),
            # Test proxy models that should not support function calling
            ("litellm_proxy/command-nightly", False),
            ("litellm_proxy/anthropic.claude-instant-v1", False),
        ],
    )
    def test_proxy_only_function_calling_support(self, proxy_model, expected_result):
        """
        Test proxy models independently to ensure they report correct function calling support.

        This test focuses on proxy models without comparing to direct models,
        useful for cases where we only care about the proxy behavior.
        """
        try:
            result = supports_function_calling(model=proxy_model)
            assert (
                result == expected_result
            ), f"Proxy model {proxy_model} returned {result}, expected {expected_result}"
        except Exception as e:
            pytest.fail(f"Error testing proxy model {proxy_model}: {e}")

    def test_litellm_utils_supports_function_calling_import(self):
        """Test that supports_function_calling can be imported from litellm.utils."""
        try:
            from litellm.utils import supports_function_calling

            assert callable(supports_function_calling)
        except ImportError as e:
            pytest.fail(f"Failed to import supports_function_calling: {e}")

    def test_litellm_supports_function_calling_import(self):
        """Test that supports_function_calling can be imported from litellm directly."""
        try:
            import litellm

            assert hasattr(litellm, "supports_function_calling")
            assert callable(litellm.supports_function_calling)
        except Exception as e:
            pytest.fail(f"Failed to access litellm.supports_function_calling: {e}")

    @pytest.mark.parametrize(
        "model_name",
        [
            "litellm_proxy/gpt-3.5-turbo",
            "litellm_proxy/gpt-4",
            "litellm_proxy/claude-sonnet-4-6",
            "litellm_proxy/gemini/gemini-2.5-pro",
        ],
    )
    def test_proxy_model_with_custom_llm_provider_none(self, model_name):
        """
        Test proxy models with custom_llm_provider=None parameter.

        This tests the supports_function_calling function with the custom_llm_provider
        parameter explicitly set to None, which is a common usage pattern.
        """
        try:
            result = supports_function_calling(
                model=model_name, custom_llm_provider=None
            )
            # All the models in this test should support function calling
            assert (
                result is True
            ), f"Model {model_name} should support function calling but returned {result}"
        except Exception as e:
            pytest.fail(
                f"Error testing {model_name} with custom_llm_provider=None: {e}"
            )

    def test_edge_cases_and_malformed_proxy_models(self):
        """Test edge cases and malformed proxy model names."""
        test_cases = [
            ("litellm_proxy/", False),  # Empty model name after proxy prefix
            ("litellm_proxy", False),  # Just the proxy prefix without slash
            ("litellm_proxy//gpt-3.5-turbo", False),  # Double slash
            ("litellm_proxy/nonexistent-model", False),  # Non-existent model
        ]

        for model_name, expected_result in test_cases:
            try:
                result = supports_function_calling(model=model_name)
                # For malformed models, we expect False or the function to handle gracefully
                assert (
                    result == expected_result
                ), f"Edge case {model_name} returned {result}, expected {expected_result}"
            except Exception:
                # It's acceptable for malformed model names to raise exceptions
                # rather than returning False, as long as they're handled gracefully
                pass

    def test_proxy_model_resolution_demonstration(self):
        """
        Demonstration test showing the current issue with proxy model resolution.

        This test documents the current behavior and can be used to verify
        when the issue is fixed.
        """
        direct_model = "gpt-3.5-turbo"
        proxy_model = "litellm_proxy/gpt-3.5-turbo"

        direct_result = supports_function_calling(model=direct_model)
        proxy_result = supports_function_calling(model=proxy_model)

        print(f"\nDemonstration of proxy model resolution:")
        print(
            f"Direct model '{direct_model}' supports function calling: {direct_result}"
        )
        print(f"Proxy model '{proxy_model}' supports function calling: {proxy_result}")

        # This assertion will currently fail due to the bug
        # When the bug is fixed, this test should pass
        if direct_result != proxy_result:
            pytest.skip(
                f"Known issue: Proxy model resolution inconsistency. "
                f"Direct: {direct_result}, Proxy: {proxy_result}. "
                f"This test will pass when the issue is resolved."
            )

        assert direct_result == proxy_result, (
            f"Proxy model resolution issue: {direct_model} -> {direct_result}, "
            f"{proxy_model} -> {proxy_result}"
        )

    @pytest.mark.parametrize(
        "proxy_model_name,underlying_bedrock_model,expected_proxy_result,description",
        [
            # Bedrock Converse API mappings - these are the real-world scenarios
            (
                "litellm_proxy/bedrock-claude-3-haiku",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Bedrock Claude 3 Haiku via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-sonnet",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "Bedrock Claude 3 Sonnet via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-opus",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "Bedrock Claude 3 Opus via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-5-sonnet",
                "bedrock/converse/anthropic.claude-haiku-4-5-20251001-v1:0",
                False,
                "Bedrock Claude 3.5 Sonnet via Converse API",
            ),
            # Bedrock Legacy API mappings (non-converse)
            (
                "litellm_proxy/bedrock-claude-instant",
                "bedrock/anthropic.claude-instant-v1",
                False,
                "Bedrock Claude Instant Legacy API",
            ),
            (
                "litellm_proxy/bedrock-claude-v2",
                "bedrock/anthropic.claude-v2",
                False,
                "Bedrock Claude v2 Legacy API",
            ),
            (
                "litellm_proxy/bedrock-claude-v2-1",
                "bedrock/anthropic.claude-v2:1",
                False,
                "Bedrock Claude v2.1 Legacy API",
            ),
            # Bedrock other model providers via Converse API
            (
                "litellm_proxy/bedrock-titan-text",
                "bedrock/converse/amazon.titan-text-express-v1",
                False,
                "Bedrock Titan Text Express via Converse API",
            ),
            (
                "litellm_proxy/bedrock-titan-text-premier",
                "bedrock/converse/amazon.titan-text-premier-v1:0",
                False,
                "Bedrock Titan Text Premier via Converse API",
            ),
            (
                "litellm_proxy/bedrock-llama3-8b",
                "bedrock/converse/meta.llama3-8b-instruct-v1:0",
                False,
                "Bedrock Llama 3 8B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-llama3-70b",
                "bedrock/converse/meta.llama3-70b-instruct-v1:0",
                False,
                "Bedrock Llama 3 70B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-7b",
                "bedrock/converse/mistral.mistral-7b-instruct-v0:2",
                False,
                "Bedrock Mistral 7B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-8x7b",
                "bedrock/converse/mistral.mixtral-8x7b-instruct-v0:1",
                False,
                "Bedrock Mistral 8x7B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-large",
                "bedrock/converse/mistral.mistral-large-2402-v1:0",
                False,
                "Bedrock Mistral Large via Converse API",
            ),
            # Company-specific naming patterns (real-world examples)
            (
                "litellm_proxy/prod-claude-haiku",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Production Claude Haiku",
            ),
            (
                "litellm_proxy/dev-claude-sonnet",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "Development Claude Sonnet",
            ),
            (
                "litellm_proxy/staging-claude-opus",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "Staging Claude Opus",
            ),
            (
                "litellm_proxy/cost-optimized-claude",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Cost-optimized Claude deployment",
            ),
            (
                "litellm_proxy/high-performance-claude",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "High-performance Claude deployment",
            ),
            # Regional deployment examples
            (
                "litellm_proxy/us-east-claude",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "US East Claude deployment",
            ),
            (
                "litellm_proxy/eu-west-claude",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "EU West Claude deployment",
            ),
            (
                "litellm_proxy/ap-south-llama",
                "bedrock/converse/meta.llama3-70b-instruct-v1:0",
                False,
                "Asia Pacific Llama deployment",
            ),
        ],
    )
    def test_bedrock_converse_api_proxy_mappings(
        self,
        proxy_model_name,
        underlying_bedrock_model,
        expected_proxy_result,
        description,
    ):
        """
        Test real-world Bedrock Converse API proxy model mappings.

        This test covers the specific scenario where proxy model names like
        'bedrock-claude-3-haiku' map to underlying Bedrock Converse API models like
        'bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0'.

        These mappings are typically defined in proxy server configuration files
        and cannot be resolved by LiteLLM without that context.
        """
        print(f"\nTesting: {description}")
        print(f"  Proxy model: {proxy_model_name}")
        print(f"  Underlying model: {underlying_bedrock_model}")

        # Test the underlying model directly to verify it supports function calling
        try:
            underlying_result = supports_function_calling(underlying_bedrock_model)
            print(f"  Underlying model function calling support: {underlying_result}")

            # Most Bedrock Converse API models with Anthropic Claude should support function calling
            if "anthropic.claude-3" in underlying_bedrock_model:
                assert (
                    underlying_result is True
                ), f"Claude 3 models should support function calling: {underlying_bedrock_model}"
        except Exception as e:
            print(
                f"  Warning: Could not test underlying model {underlying_bedrock_model}: {e}"
            )

        # Test the proxy model - should return False due to lack of configuration context
        proxy_result = supports_function_calling(proxy_model_name)
        print(f"  Proxy model function calling support: {proxy_result}")

        assert proxy_result == expected_proxy_result, (
            f"Proxy model {proxy_model_name} should return {expected_proxy_result} "
            f"(without config context). Description: {description}"
        )

    def test_real_world_proxy_config_documentation(self):
        """
        Document how real-world proxy configurations would handle model mappings.

        This test provides documentation on how the proxy server configuration
        would typically map custom model names to underlying models.
        """
        print(
            """

        REAL-WORLD PROXY SERVER CONFIGURATION EXAMPLE:
        ===============================================

        In a proxy_server_config.yaml file, you would define:

        model_list:
          - model_name: bedrock-claude-3-haiku
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
              aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
              aws_region_name: us-east-1

          - model_name: bedrock-claude-3-sonnet
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0
              aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
              aws_region_name: us-east-1

          - model_name: prod-claude-haiku
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
              aws_access_key_id: os.environ/PROD_AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/PROD_AWS_SECRET_ACCESS_KEY
              aws_region_name: us-west-2


        FUNCTION CALLING WITH PROXY SERVER:
        ===================================

        When using the proxy server with this configuration:

        1. Client calls: supports_function_calling("bedrock-claude-3-haiku")
        2. Proxy server resolves to: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
        3. LiteLLM evaluates the underlying model's capabilities
        4. Returns: True (because Claude 3 Haiku supports function calling)

        Without the proxy server configuration context, LiteLLM cannot resolve
        the custom model name and returns False.


        BEDROCK CONVERSE API BENEFITS:
        ==============================

        The Bedrock Converse API provides:
        - Standardized function calling interface across providers
        - Better tool use capabilities compared to legacy APIs
        - Consistent request/response format
        - Enhanced streaming support for function calls

        """
        )

        # Verify that direct underlying models work as expected
        bedrock_models = [
            "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
            "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
            "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
        ]

        for model in bedrock_models:
            try:
                result = supports_function_calling(model)
                print(f"Direct test - {model}: {result}")
                # Claude 3 models should support function calling
                assert (
                    result is True
                ), f"Claude 3 model should support function calling: {model}"
            except Exception as e:
                print(f"Could not test {model}: {e}")

    @pytest.mark.parametrize(
        "proxy_model_name,underlying_bedrock_model,expected_proxy_result,description",
        [
            # Bedrock Converse API mappings - these are the real-world scenarios
            (
                "litellm_proxy/bedrock-claude-3-haiku",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Bedrock Claude 3 Haiku via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-sonnet",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "Bedrock Claude 3 Sonnet via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-opus",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "Bedrock Claude 3 Opus via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-5-sonnet",
                "bedrock/converse/anthropic.claude-haiku-4-5-20251001-v1:0",
                False,
                "Bedrock Claude 3.5 Sonnet via Converse API",
            ),
            # Bedrock Legacy API mappings (non-converse)
            (
                "litellm_proxy/bedrock-claude-instant",
                "bedrock/anthropic.claude-instant-v1",
                False,
                "Bedrock Claude Instant Legacy API",
            ),
            (
                "litellm_proxy/bedrock-claude-v2",
                "bedrock/anthropic.claude-v2",
                False,
                "Bedrock Claude v2 Legacy API",
            ),
            (
                "litellm_proxy/bedrock-claude-v2-1",
                "bedrock/anthropic.claude-v2:1",
                False,
                "Bedrock Claude v2.1 Legacy API",
            ),
            # Bedrock other model providers via Converse API
            (
                "litellm_proxy/bedrock-titan-text",
                "bedrock/converse/amazon.titan-text-express-v1",
                False,
                "Bedrock Titan Text Express via Converse API",
            ),
            (
                "litellm_proxy/bedrock-titan-text-premier",
                "bedrock/converse/amazon.titan-text-premier-v1:0",
                False,
                "Bedrock Titan Text Premier via Converse API",
            ),
            (
                "litellm_proxy/bedrock-llama3-8b",
                "bedrock/converse/meta.llama3-8b-instruct-v1:0",
                False,
                "Bedrock Llama 3 8B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-llama3-70b",
                "bedrock/converse/meta.llama3-70b-instruct-v1:0",
                False,
                "Bedrock Llama 3 70B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-7b",
                "bedrock/converse/mistral.mistral-7b-instruct-v0:2",
                False,
                "Bedrock Mistral 7B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-8x7b",
                "bedrock/converse/mistral.mixtral-8x7b-instruct-v0:1",
                False,
                "Bedrock Mistral 8x7B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-large",
                "bedrock/converse/mistral.mistral-large-2402-v1:0",
                False,
                "Bedrock Mistral Large via Converse API",
            ),
            # Company-specific naming patterns (real-world examples)
            (
                "litellm_proxy/prod-claude-haiku",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Production Claude Haiku",
            ),
            (
                "litellm_proxy/dev-claude-sonnet",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "Development Claude Sonnet",
            ),
            (
                "litellm_proxy/staging-claude-opus",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "Staging Claude Opus",
            ),
            (
                "litellm_proxy/cost-optimized-claude",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Cost-optimized Claude deployment",
            ),
            (
                "litellm_proxy/high-performance-claude",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "High-performance Claude deployment",
            ),
            # Regional deployment examples
            (
                "litellm_proxy/us-east-claude",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "US East Claude deployment",
            ),
            (
                "litellm_proxy/eu-west-claude",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "EU West Claude deployment",
            ),
            (
                "litellm_proxy/ap-south-llama",
                "bedrock/converse/meta.llama3-70b-instruct-v1:0",
                False,
                "Asia Pacific Llama deployment",
            ),
        ],
    )
    def test_bedrock_converse_api_proxy_mappings(
        self,
        proxy_model_name,
        underlying_bedrock_model,
        expected_proxy_result,
        description,
    ):
        """
        Test real-world Bedrock Converse API proxy model mappings.

        This test covers the specific scenario where proxy model names like
        'bedrock-claude-3-haiku' map to underlying Bedrock Converse API models like
        'bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0'.

        These mappings are typically defined in proxy server configuration files
        and cannot be resolved by LiteLLM without that context.
        """
        print(f"\nTesting: {description}")
        print(f"  Proxy model: {proxy_model_name}")
        print(f"  Underlying model: {underlying_bedrock_model}")

        # Test the underlying model directly to verify it supports function calling
        try:
            underlying_result = supports_function_calling(underlying_bedrock_model)
            print(f"  Underlying model function calling support: {underlying_result}")

            # Most Bedrock Converse API models with Anthropic Claude should support function calling
            if "anthropic.claude-3" in underlying_bedrock_model:
                assert (
                    underlying_result is True
                ), f"Claude 3 models should support function calling: {underlying_bedrock_model}"
        except Exception as e:
            print(
                f"  Warning: Could not test underlying model {underlying_bedrock_model}: {e}"
            )

        # Test the proxy model - should return False due to lack of configuration context
        proxy_result = supports_function_calling(proxy_model_name)
        print(f"  Proxy model function calling support: {proxy_result}")

        assert proxy_result == expected_proxy_result, (
            f"Proxy model {proxy_model_name} should return {expected_proxy_result} "
            f"(without config context). Description: {description}"
        )

    def test_real_world_proxy_config_documentation(self):
        """
        Document how real-world proxy configurations would handle model mappings.

        This test provides documentation on how the proxy server configuration
        would typically map custom model names to underlying models.
        """
        print(
            """

        REAL-WORLD PROXY SERVER CONFIGURATION EXAMPLE:
        ===============================================

        In a proxy_server_config.yaml file, you would define:

        model_list:
          - model_name: bedrock-claude-3-haiku
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
              aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
              aws_region_name: us-east-1

          - model_name: bedrock-claude-3-sonnet
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0
              aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
              aws_region_name: us-east-1

          - model_name: prod-claude-haiku
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
              aws_access_key_id: os.environ/PROD_AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/PROD_AWS_SECRET_ACCESS_KEY
              aws_region_name: us-west-2


        FUNCTION CALLING WITH PROXY SERVER:
        ===================================

        When using the proxy server with this configuration:

        1. Client calls: supports_function_calling("bedrock-claude-3-haiku")
        2. Proxy server resolves to: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
        3. LiteLLM evaluates the underlying model's capabilities
        4. Returns: True (because Claude 3 Haiku supports function calling)

        Without the proxy server configuration context, LiteLLM cannot resolve
        the custom model name and returns False.


        BEDROCK CONVERSE API BENEFITS:
        ==============================

        The Bedrock Converse API provides:
        - Standardized function calling interface across providers
        - Better tool use capabilities compared to legacy APIs
        - Consistent request/response format
        - Enhanced streaming support for function calls

        """
        )

        # Verify that direct underlying models work as expected
        bedrock_models = [
            "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
            "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
            "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
        ]

        for model in bedrock_models:
            try:
                result = supports_function_calling(model)
                print(f"Direct test - {model}: {result}")
                # Claude 3 models should support function calling
                assert (
                    result is True
                ), f"Claude 3 model should support function calling: {model}"
            except Exception as e:
                print(f"Could not test {model}: {e}")

    @pytest.mark.parametrize(
        "proxy_model_name,underlying_bedrock_model,expected_proxy_result,description",
        [
            # Bedrock Converse API mappings - these are the real-world scenarios
            (
                "litellm_proxy/bedrock-claude-3-haiku",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Bedrock Claude 3 Haiku via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-sonnet",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "Bedrock Claude 3 Sonnet via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-opus",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "Bedrock Claude 3 Opus via Converse API",
            ),
            (
                "litellm_proxy/bedrock-claude-3-5-sonnet",
                "bedrock/converse/anthropic.claude-haiku-4-5-20251001-v1:0",
                False,
                "Bedrock Claude 3.5 Sonnet via Converse API",
            ),
            # Bedrock Legacy API mappings (non-converse)
            (
                "litellm_proxy/bedrock-claude-instant",
                "bedrock/anthropic.claude-instant-v1",
                False,
                "Bedrock Claude Instant Legacy API",
            ),
            (
                "litellm_proxy/bedrock-claude-v2",
                "bedrock/anthropic.claude-v2",
                False,
                "Bedrock Claude v2 Legacy API",
            ),
            (
                "litellm_proxy/bedrock-claude-v2-1",
                "bedrock/anthropic.claude-v2:1",
                False,
                "Bedrock Claude v2.1 Legacy API",
            ),
            # Bedrock other model providers via Converse API
            (
                "litellm_proxy/bedrock-titan-text",
                "bedrock/converse/amazon.titan-text-express-v1",
                False,
                "Bedrock Titan Text Express via Converse API",
            ),
            (
                "litellm_proxy/bedrock-titan-text-premier",
                "bedrock/converse/amazon.titan-text-premier-v1:0",
                False,
                "Bedrock Titan Text Premier via Converse API",
            ),
            (
                "litellm_proxy/bedrock-llama3-8b",
                "bedrock/converse/meta.llama3-8b-instruct-v1:0",
                False,
                "Bedrock Llama 3 8B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-llama3-70b",
                "bedrock/converse/meta.llama3-70b-instruct-v1:0",
                False,
                "Bedrock Llama 3 70B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-7b",
                "bedrock/converse/mistral.mistral-7b-instruct-v0:2",
                False,
                "Bedrock Mistral 7B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-8x7b",
                "bedrock/converse/mistral.mixtral-8x7b-instruct-v0:1",
                False,
                "Bedrock Mistral 8x7B via Converse API",
            ),
            (
                "litellm_proxy/bedrock-mistral-large",
                "bedrock/converse/mistral.mistral-large-2402-v1:0",
                False,
                "Bedrock Mistral Large via Converse API",
            ),
            # Company-specific naming patterns (real-world examples)
            (
                "litellm_proxy/prod-claude-haiku",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Production Claude Haiku",
            ),
            (
                "litellm_proxy/dev-claude-sonnet",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "Development Claude Sonnet",
            ),
            (
                "litellm_proxy/staging-claude-opus",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "Staging Claude Opus",
            ),
            (
                "litellm_proxy/cost-optimized-claude",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "Cost-optimized Claude deployment",
            ),
            (
                "litellm_proxy/high-performance-claude",
                "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
                False,
                "High-performance Claude deployment",
            ),
            # Regional deployment examples
            (
                "litellm_proxy/us-east-claude",
                "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
                False,
                "US East Claude deployment",
            ),
            (
                "litellm_proxy/eu-west-claude",
                "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
                False,
                "EU West Claude deployment",
            ),
            (
                "litellm_proxy/ap-south-llama",
                "bedrock/converse/meta.llama3-70b-instruct-v1:0",
                False,
                "Asia Pacific Llama deployment",
            ),
        ],
    )
    def test_bedrock_converse_api_proxy_mappings(
        self,
        proxy_model_name,
        underlying_bedrock_model,
        expected_proxy_result,
        description,
    ):
        """
        Test real-world Bedrock Converse API proxy model mappings.

        This test covers the specific scenario where proxy model names like
        'bedrock-claude-3-haiku' map to underlying Bedrock Converse API models like
        'bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0'.

        These mappings are typically defined in proxy server configuration files
        and cannot be resolved by LiteLLM without that context.
        """
        print(f"\nTesting: {description}")
        print(f"  Proxy model: {proxy_model_name}")
        print(f"  Underlying model: {underlying_bedrock_model}")

        # Test the underlying model directly to verify it supports function calling
        try:
            underlying_result = supports_function_calling(underlying_bedrock_model)
            print(f"  Underlying model function calling support: {underlying_result}")

            # Most Bedrock Converse API models with Anthropic Claude should support function calling
            if "anthropic.claude-3" in underlying_bedrock_model:
                assert (
                    underlying_result is True
                ), f"Claude 3 models should support function calling: {underlying_bedrock_model}"
        except Exception as e:
            print(
                f"  Warning: Could not test underlying model {underlying_bedrock_model}: {e}"
            )

        # Test the proxy model - should return False due to lack of configuration context
        proxy_result = supports_function_calling(proxy_model_name)
        print(f"  Proxy model function calling support: {proxy_result}")

        assert proxy_result == expected_proxy_result, (
            f"Proxy model {proxy_model_name} should return {expected_proxy_result} "
            f"(without config context). Description: {description}"
        )

    def test_real_world_proxy_config_documentation(self):
        """
        Document how real-world proxy configurations would handle model mappings.

        This test provides documentation on how the proxy server configuration
        would typically map custom model names to underlying models.
        """
        print(
            """

        REAL-WORLD PROXY SERVER CONFIGURATION EXAMPLE:
        ===============================================

        In a proxy_server_config.yaml file, you would define:

        model_list:
          - model_name: bedrock-claude-3-haiku
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
              aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
              aws_region_name: us-east-1

          - model_name: bedrock-claude-3-sonnet
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0
              aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
              aws_region_name: us-east-1

          - model_name: prod-claude-haiku
            litellm_params:
              model: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
              aws_access_key_id: os.environ/PROD_AWS_ACCESS_KEY_ID
              aws_secret_access_key: os.environ/PROD_AWS_SECRET_ACCESS_KEY
              aws_region_name: us-west-2


        FUNCTION CALLING WITH PROXY SERVER:
        ===================================

        When using the proxy server with this configuration:

        1. Client calls: supports_function_calling("bedrock-claude-3-haiku")
        2. Proxy server resolves to: bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0
        3. LiteLLM evaluates the underlying model's capabilities
        4. Returns: True (because Claude 3 Haiku supports function calling)

        Without the proxy server configuration context, LiteLLM cannot resolve
        the custom model name and returns False.


        BEDROCK CONVERSE API BENEFITS:
        ==============================

        The Bedrock Converse API provides:
        - Standardized function calling interface across providers
        - Better tool use capabilities compared to legacy APIs
        - Consistent request/response format
        - Enhanced streaming support for function calls

        """
        )

        # Verify that direct underlying models work as expected
        bedrock_models = [
            "bedrock/converse/anthropic.claude-3-haiku-20240307-v1:0",
            "bedrock/converse/anthropic.claude-3-sonnet-20240229-v1:0",
            "bedrock/converse/anthropic.claude-3-7-sonnet-20250219-v1:0",
        ]

        for model in bedrock_models:
            try:
                result = supports_function_calling(model)
                print(f"Direct test - {model}: {result}")
                # Claude 3 models should support function calling
                assert (
                    result is True
                ), f"Claude 3 model should support function calling: {model}"
            except Exception as e:
                print(f"Could not test {model}: {e}")


def test_register_model_with_scientific_notation():
    """
    Test that the register_model function can handle scientific notation in the model name.
    """
    import uuid

    # Use a truly unique model name with uuid to avoid conflicts when tests run in parallel
    test_model_name = f"test-scientific-notation-model-{uuid.uuid4().hex[:12]}"

    # Clear LRU caches that might have stale data
    from litellm.utils import (
        _invalidate_model_cost_lowercase_map,
    )

    _invalidate_model_cost_lowercase_map()

    model_cost_dict = {
        test_model_name: {
            "max_tokens": 8192,
            "input_cost_per_token": "3e-07",
            "output_cost_per_token": "6e-07",
            "litellm_provider": "openai",
            "mode": "chat",
        },
    }

    litellm.register_model(model_cost_dict)

    registered_model = litellm.model_cost[test_model_name]
    print(registered_model)
    assert registered_model["input_cost_per_token"] == 3e-07
    assert registered_model["output_cost_per_token"] == 6e-07
    assert registered_model["litellm_provider"] == "openai"
    assert registered_model["mode"] == "chat"

    # Clean up after test
    if test_model_name in litellm.model_cost:
        del litellm.model_cost[test_model_name]
    _invalidate_model_cost_lowercase_map()


def test_register_model_openrouter_without_slash():
    """
    Test that register_model handles openrouter models without '/' in the name.

    Fixes https://github.com/BerriAI/litellm/issues/18936

    Previously, the code did `split_string[1]` which would fail with IndexError
    when the model name didn't contain '/'. Now it uses `split_string[-1]` which
    always works.
    """
    # Clear any existing entries
    litellm.openrouter_models.discard("my-custom-alias")
    litellm.openrouter_models.discard("gpt-4")
    litellm.openrouter_models.discard("openai/gpt-4")

    # Test 1: Model name without '/' (this was the bug - would raise IndexError)
    litellm.register_model(
        {
            "my-custom-alias": {
                "max_tokens": 8192,
                "input_cost_per_token": 0.00001,
                "output_cost_per_token": 0.00002,
                "litellm_provider": "openrouter",
                "mode": "chat",
            },
        }
    )
    assert "my-custom-alias" in litellm.openrouter_models

    # Test 2: Model name with single '/' (openrouter/model format)
    litellm.register_model(
        {
            "openrouter/gpt-4": {
                "max_tokens": 8192,
                "input_cost_per_token": 0.00001,
                "output_cost_per_token": 0.00002,
                "litellm_provider": "openrouter",
                "mode": "chat",
            },
        }
    )
    assert "gpt-4" in litellm.openrouter_models

    # Test 3: Model name with double '/' (openrouter/provider/model format)
    litellm.register_model(
        {
            "openrouter/openai/gpt-4-turbo": {
                "max_tokens": 8192,
                "input_cost_per_token": 0.00001,
                "output_cost_per_token": 0.00002,
                "litellm_provider": "openrouter",
                "mode": "chat",
            },
        }
    )
    assert "openai/gpt-4-turbo" in litellm.openrouter_models


def test_reasoning_content_preserved_in_text_completion_wrapper():
    """Ensure reasoning_content is copied from delta to text_choices."""
    chunk = ModelResponseStream(
        id="test-id",
        created=1234567890,
        model="test-model",
        object="chat.completion.chunk",
        choices=[
            StreamingChoices(
                finish_reason=None,
                index=0,
                delta=Delta(
                    content="Some answer text",
                    role="assistant",
                    reasoning_content="Here's my chain of thought...",
                ),
            )
        ],
    )

    wrapper = TextCompletionStreamWrapper(
        completion_stream=None,  # Not used in convert_to_text_completion_object
        model="test-model",
        stream_options=None,
    )

    transformed = wrapper.convert_to_text_completion_object(chunk)

    assert "choices" in transformed
    assert len(transformed["choices"]) == 1
    choice = transformed["choices"][0]
    assert choice["text"] == "Some answer text"
    assert choice["reasoning_content"] == "Here's my chain of thought..."


def test_anthropic_claude_4_invoke_chat_provider_config():
    """Test that the Anthropic Claude 4 Invoke chat provider config is correct."""
    from litellm.llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation import (
        AmazonAnthropicClaudeConfig,
    )
    from litellm.utils import ProviderConfigManager

    config = ProviderConfigManager.get_provider_chat_config(
        model="invoke/us.anthropic.claude-sonnet-4-20250514-v1:0",
        provider=LlmProviders.BEDROCK,
    )
    print(config)
    assert isinstance(config, AmazonAnthropicClaudeConfig)


def test_bedrock_application_inference_profile():
    model = "arn:aws:bedrock:us-east-2:<AWS-ACCOUNT-ID>:inference-profile/us.anthropic.claude-3-5-haiku-20241022-v1:0"
    from pydantic import BaseModel

    from litellm import completion
    from litellm.utils import supports_tool_choice

    result = supports_tool_choice(model, custom_llm_provider="bedrock")
    result_2 = supports_tool_choice(model, custom_llm_provider="bedrock_converse")
    print(result)
    assert result == result_2
    assert result is True


def test_image_response_utils():
    """Test that the image response utils are correct."""
    from litellm.utils import ImageResponse

    result = {
        "created": None,
        "data": [
            {
                "b64_json": "/9j/.../2Q==",
                "revised_prompt": None,
                "url": None,
                "timings": {"inference": 0.9612685777246952},
                "index": 0,
            }
        ],
        "id": "91559891cxxx-PDX",
        "model": "black-forest-labs/FLUX.1-schnell-Free",
        "object": "list",
        "hidden_params": {"additional_headers": {}},
    }
    image_response = ImageResponse(**result)


def test_is_valid_api_key():
    import hashlib

    # Valid sk- keys
    assert is_valid_api_key("sk-abc123")
    assert is_valid_api_key("sk-ABC_123-xyz")
    # Valid hashed key (64 hex chars)
    assert is_valid_api_key("a" * 64)
    assert is_valid_api_key("0123456789abcdef" * 4)  # 16*4 = 64
    # Real SHA-256 hash
    real_hash = hashlib.sha256(b"my_secret_key").hexdigest()
    assert len(real_hash) == 64
    assert is_valid_api_key(real_hash)
    # Invalid: too short
    assert not is_valid_api_key("sk-")
    assert not is_valid_api_key("")
    # Invalid: too long
    assert not is_valid_api_key("sk-" + "a" * 200)
    # Invalid: wrong prefix
    assert not is_valid_api_key("pk-abc123")
    # Invalid: wrong chars in sk- key
    assert not is_valid_api_key("sk-abc$%#@!")
    # Invalid: not a string
    assert not is_valid_api_key(None)
    assert not is_valid_api_key(12345)
    # Invalid: wrong length for hash
    assert not is_valid_api_key("a" * 63)
    assert not is_valid_api_key("a" * 65)


def test_block_key_hashing_logic():
    """
    Test that block_key() function only hashes keys that start with "sk-"
    """
    import hashlib

    from litellm.proxy.utils import hash_token

    # Test cases: (input_key, should_be_hashed, expected_output)
    test_cases = [
        ("sk-1234567890abcdef", True, hash_token("sk-1234567890abcdef")),
        ("sk-test-key", True, hash_token("sk-test-key")),
        ("abc123", False, "abc123"),  # Should not be hashed
        ("hashed_key_123", False, "hashed_key_123"),  # Should not be hashed
        ("", False, ""),  # Empty string should not be hashed
        ("sk-", True, hash_token("sk-")),  # Edge case: just "sk-"
    ]

    for input_key, should_be_hashed, expected_output in test_cases:
        # Simulate the logic from block_key() function
        if input_key.startswith("sk-"):
            hashed_token = hash_token(token=input_key)
        else:
            hashed_token = input_key

        assert hashed_token == expected_output, f"Failed for input: {input_key}"

        # Additional verification: if it should be hashed, verify it's actually a hash
        if should_be_hashed:
            # SHA-256 hashes are 64 characters long and contain only hex digits
            assert (
                len(hashed_token) == 64
            ), f"Hash length should be 64, got {len(hashed_token)} for {input_key}"
            assert all(
                c in "0123456789abcdef" for c in hashed_token
            ), f"Hash should contain only hex digits for {input_key}"
        else:
            # If not hashed, it should be the original string
            assert (
                hashed_token == input_key
            ), f"Non-hashed key should remain unchanged: {input_key}"

    print("✅ All block_key hashing logic tests passed!")


def test_generate_gcp_iam_access_token():
    """
    Test the _generate_gcp_iam_access_token function with mocked GCP IAM client.
    """
    from unittest.mock import Mock, patch

    service_account = "projects/-/serviceAccounts/test@project.iam.gserviceaccount.com"
    expected_token = "test-access-token-12345"

    # Mock the GCP IAM client and its response
    mock_response = Mock()
    mock_response.access_token = expected_token

    mock_client = Mock()
    mock_client.generate_access_token.return_value = mock_response

    # Mock the iam_credentials_v1 module
    mock_iam_credentials_v1 = Mock()
    mock_iam_credentials_v1.IAMCredentialsClient = Mock(return_value=mock_client)
    mock_iam_credentials_v1.GenerateAccessTokenRequest = Mock()

    # Test successful token generation by mocking sys.modules
    with patch.dict(
        "sys.modules", {"google.cloud.iam_credentials_v1": mock_iam_credentials_v1}
    ):
        from litellm._redis import _generate_gcp_iam_access_token

        result = _generate_gcp_iam_access_token(service_account)

        assert result == expected_token
        mock_iam_credentials_v1.IAMCredentialsClient.assert_called_once()
        mock_client.generate_access_token.assert_called_once()

        # Verify the request was created with correct parameters
        mock_iam_credentials_v1.GenerateAccessTokenRequest.assert_called_once_with(
            name=service_account,
            scope=["https://www.googleapis.com/auth/cloud-platform"],
        )


def test_generate_gcp_iam_access_token_import_error():
    """
    Test that _generate_gcp_iam_access_token raises ImportError when google-cloud-iam is not available.
    """
    # Import the function first, before mocking
    from litellm._redis import _generate_gcp_iam_access_token

    # Mock the import to fail when the function tries to import google.cloud.iam_credentials_v1
    original_import = __builtins__["__import__"]

    def mock_import(name, *args, **kwargs):
        if name == "google.cloud.iam_credentials_v1":
            raise ImportError("No module named 'google.cloud.iam_credentials_v1'")
        return original_import(name, *args, **kwargs)

    with patch("builtins.__import__", side_effect=mock_import):
        with pytest.raises(ImportError) as exc_info:
            _generate_gcp_iam_access_token("test-service-account")

        assert "google-cloud-iam is required" in str(exc_info.value)
        assert "pip install google-cloud-iam" in str(exc_info.value)


if __name__ == "__main__":
    # Allow running this test file directly for debugging
    pytest.main([__file__, "-v"])


def test_model_info_for_vertex_ai_deepseek_model():
    model_info = litellm.get_model_info(
        model="vertex_ai/deepseek-ai/deepseek-r1-0528-maas"
    )
    assert model_info is not None
    assert model_info["litellm_provider"] == "vertex_ai-deepseek_models"
    assert model_info["mode"] == "chat"

    assert model_info["input_cost_per_token"] is not None
    assert model_info["output_cost_per_token"] is not None
    print("vertex deepseek model info", model_info)


def test_model_info_for_openrouter_kimi_k2_5():
    """
    Test that openrouter/moonshotai/kimi-k2.5 model info is correctly configured
    in model_prices_and_context_window.json.

    Model properties from OpenRouter API:
    - context_length: 262144
    - pricing: prompt=$0.0000006, completion=$0.000003, input_cache_read=$0.0000001
    - modality: text+image->text (supports vision)
    - supports: tool_choice, tools (function calling)
    """
    import json
    from pathlib import Path

    # Load directly from the local JSON file
    json_path = Path(__file__).parents[2] / "model_prices_and_context_window.json"
    with open(json_path) as f:
        model_cost = json.load(f)

    model_info = model_cost.get("openrouter/moonshotai/kimi-k2.5")
    assert (
        model_info is not None
    ), "Model not found in model_prices_and_context_window.json"
    assert model_info["litellm_provider"] == "openrouter"
    assert model_info["mode"] == "chat"

    # Verify context window
    assert model_info["max_input_tokens"] == 262144
    assert model_info["max_output_tokens"] == 262144
    assert model_info["max_tokens"] == 262144

    # Verify pricing
    assert model_info["input_cost_per_token"] == 6e-07
    assert model_info["output_cost_per_token"] == 3e-06
    assert model_info["cache_read_input_token_cost"] == 1e-07

    # Verify capabilities
    assert model_info["supports_vision"] is True
    assert model_info["supports_function_calling"] is True
    assert model_info["supports_tool_choice"] is True

    print("openrouter kimi-k2.5 model info", model_info)


def test_gemini_lyria_3_preview_models_in_cost_map():
    import json
    from pathlib import Path

    json_path = Path(__file__).parents[2] / "model_prices_and_context_window.json"
    with open(json_path) as f:
        model_cost = json.load(f)

    clip = model_cost.get("gemini/lyria-3-clip-preview")
    pro = model_cost.get("gemini/lyria-3-pro-preview")
    assert clip is not None and pro is not None
    assert clip["litellm_provider"] == "gemini" and pro["litellm_provider"] == "gemini"
    assert clip["max_input_tokens"] == 131072 == pro["max_input_tokens"]
    assert clip["output_cost_per_image"] == 0.04


def test_model_info_for_fireworks_short_form_models():
    """
    Test that fireworks_ai short-form model entries (fireworks_ai/<model>)
    are correctly configured in model_prices_and_context_window.json.

    These entries enable cost attribution for models called via short-form
    names (e.g., fireworks_ai/glm-4p7 instead of
    fireworks_ai/accounts/fireworks/models/glm-4p7).
    """
    import json
    from pathlib import Path

    json_path = Path(__file__).parents[2] / "model_prices_and_context_window.json"
    with open(json_path) as f:
        model_cost = json.load(f)

    # glm-4p7: short-form and long-form
    for key in [
        "fireworks_ai/glm-4p7",
        "fireworks_ai/accounts/fireworks/models/glm-4p7",
    ]:
        info = model_cost.get(key)
        assert (
            info is not None
        ), f"{key} not found in model_prices_and_context_window.json"
        assert info["litellm_provider"] == "fireworks_ai"
        assert info["mode"] == "chat"
        assert info["input_cost_per_token"] == 6e-07
        assert info["output_cost_per_token"] == 2.2e-06
        assert info["max_input_tokens"] == 202800
        assert info["supports_reasoning"] is True

    # minimax-m2p1: short-form and long-form
    for key in [
        "fireworks_ai/minimax-m2p1",
        "fireworks_ai/accounts/fireworks/models/minimax-m2p1",
    ]:
        info = model_cost.get(key)
        assert (
            info is not None
        ), f"{key} not found in model_prices_and_context_window.json"
        assert info["litellm_provider"] == "fireworks_ai"
        assert info["mode"] == "chat"
        assert info["input_cost_per_token"] == 3e-07
        assert info["output_cost_per_token"] == 1.2e-06
        assert info["max_input_tokens"] == 204800

    # kimi-k2p5: short-form only (long-form already existed)
    info = model_cost.get("fireworks_ai/kimi-k2p5")
    assert (
        info is not None
    ), "fireworks_ai/kimi-k2p5 not found in model_prices_and_context_window.json"
    assert info["litellm_provider"] == "fireworks_ai"
    assert info["mode"] == "chat"
    assert info["input_cost_per_token"] == 6e-07
    assert info["output_cost_per_token"] == 3e-06
    assert info["max_input_tokens"] == 262144


class TestGetValidModelsWithCLI:
    """Test get_valid_models function as used in CLI token usage"""

    def test_get_valid_models_with_cli_pattern(self):
        """Test get_valid_models with litellm_proxy provider and CLI token pattern"""

        # Mock the HTTP request that get_valid_models makes to the proxy
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.json.return_value = {
            "data": [
                {"id": "gpt-3.5-turbo", "object": "model"},
                {"id": "gpt-4", "object": "model"},
                {"id": "litellm_proxy/gemini/gemini-2.5-flash", "object": "model"},
                {"id": "claude-3-sonnet", "object": "model"},
            ]
        }

        with patch.object(
            litellm.module_level_client, "get", return_value=mock_response
        ) as mock_get:
            # Test the exact pattern used in cli_token_usage.py
            result = litellm.get_valid_models(
                check_provider_endpoint=True,
                custom_llm_provider="litellm_proxy",
                api_key="sk-test-cli-key-123",
                api_base="http://localhost:4000/",
            )

            # Verify the function returns a list of model names
            assert isinstance(result, list)
            assert len(result) == 4
            # All models get prefixed with "litellm_proxy/" by the get_models method
            assert "litellm_proxy/gpt-3.5-turbo" in result
            assert "litellm_proxy/gpt-4" in result
            # Note: This model already had the prefix, so it gets double-prefixed
            assert "litellm_proxy/litellm_proxy/gemini/gemini-2.5-flash" in result
            assert "litellm_proxy/claude-3-sonnet" in result

            # Verify the HTTP request was made with correct parameters
            mock_get.assert_called_once()
            _, call_kwargs = mock_get.call_args

            # Check that the request was made to the correct endpoint
            assert call_kwargs["url"].startswith("http://localhost:4000/")
            assert call_kwargs["url"].endswith("/v1/models")

            # Check that the API key was included in headers
            assert "headers" in call_kwargs
            headers = call_kwargs["headers"]
            assert headers.get("Authorization") == "Bearer sk-test-cli-key-123"


class TestIsCachedMessage:
    """Test is_cached_message function for context caching detection.

    Fixes GitHub issue #17821 - TypeError when content is string instead of list.
    """

    def test_string_content_returns_false(self):
        """String content should return False without crashing."""
        message = {"role": "user", "content": "Hello world"}
        assert is_cached_message(message) is False

    def test_none_content_returns_false(self):
        """None content should return False."""
        message = {"role": "user", "content": None}
        assert is_cached_message(message) is False

    def test_missing_content_returns_false(self):
        """Message without content key should return False."""
        message = {"role": "user"}
        assert is_cached_message(message) is False

    def test_list_content_without_cache_control_returns_false(self):
        """List content without cache_control should return False."""
        message = {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
        assert is_cached_message(message) is False

    def test_list_content_with_cache_control_returns_true(self):
        """List content with cache_control ephemeral should return True."""
        message = {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Hello",
                    "cache_control": {"type": "ephemeral"},
                }
            ],
        }
        assert is_cached_message(message) is True

    def test_list_with_non_dict_items_skips_them(self):
        """List content with non-dict items should skip them gracefully."""
        message = {
            "role": "user",
            "content": ["string_item", 123, {"type": "text", "text": "Hello"}],
        }
        assert is_cached_message(message) is False

    def test_list_with_mixed_items_finds_cached(self):
        """Mixed content list should find cached item."""
        message = {
            "role": "user",
            "content": [
                "string_item",
                {"type": "image", "url": "..."},
                {
                    "type": "text",
                    "text": "cached",
                    "cache_control": {"type": "ephemeral"},
                },
            ],
        }
        assert is_cached_message(message) is True

    def test_wrong_cache_control_type_returns_false(self):
        """Non-ephemeral cache_control type should return False."""
        message = {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Hello",
                    "cache_control": {"type": "permanent"},
                }
            ],
        }
        assert is_cached_message(message) is False

    def test_empty_list_content_returns_false(self):
        """Empty list content should return False."""
        message = {"role": "user", "content": []}
        assert is_cached_message(message) is False

    def test_message_level_cache_control_returns_true(self):
        """Message with string content and message-level cache_control should return True.

        This is the format injected by the cache_control_injection_points hook
        when the message content is a string (common for system messages).
        Fixes GitHub issue #18519 - Gemini models ignoring cache_control_injection_points.
        """
        message = {
            "role": "system",
            "content": "You are a helpful assistant.",
            "cache_control": {"type": "ephemeral"},
        }
        assert is_cached_message(message) is True

    def test_message_level_cache_control_wrong_type_returns_false(self):
        """Message-level cache_control with non-ephemeral type should return False."""
        message = {
            "role": "system",
            "content": "You are a helpful assistant.",
            "cache_control": {"type": "permanent"},
        }
        assert is_cached_message(message) is False

    def test_message_level_cache_control_non_dict_returns_false(self):
        """Message-level cache_control that's not a dict should return False."""
        message = {
            "role": "system",
            "content": "You are a helpful assistant.",
            "cache_control": "ephemeral",
        }
        assert is_cached_message(message) is False


@pytest.mark.asyncio
class TestProxyLoggingBudgetAlerts:
    """Test budget_alerts method in ProxyLogging class."""

    async def test_budget_alerts_when_alerting_is_none(self):
        """Test that budget_alerts returns early when alerting is None."""
        from litellm.caching.caching import DualCache
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = None
        proxy_logging.slack_alerting_instance = AsyncMock()
        proxy_logging.email_logging_instance = AsyncMock()

        user_info = MagicMock()

        # Should return without calling any alerting instances
        await proxy_logging.budget_alerts(type="user_budget", user_info=user_info)

        # Verify no calls were made
        proxy_logging.slack_alerting_instance.budget_alerts.assert_not_called()
        proxy_logging.email_logging_instance.budget_alerts.assert_not_called()

    async def test_budget_alerts_with_slack_only(self):
        """Test that budget_alerts calls slack_alerting_instance when slack is in alerting."""
        from litellm.caching.caching import DualCache
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = ["slack"]
        proxy_logging.slack_alerting_instance = AsyncMock()

        user_info = MagicMock()

        await proxy_logging.budget_alerts(type="token_budget", user_info=user_info)

        proxy_logging.slack_alerting_instance.budget_alerts.assert_called_once_with(
            type="token_budget", user_info=user_info
        )

    async def test_budget_alerts_with_email_only(self):
        """Test that budget_alerts calls email_logging_instance when email is in alerting."""
        from litellm.caching.caching import DualCache
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = ["email"]
        proxy_logging.email_logging_instance = AsyncMock()

        user_info = MagicMock()

        await proxy_logging.budget_alerts(type="team_budget", user_info=user_info)

        proxy_logging.email_logging_instance.budget_alerts.assert_called_once_with(
            type="team_budget", user_info=user_info
        )

    async def test_budget_alerts_with_email_when_instance_is_none(self):
        """Test that budget_alerts does not call email_logging_instance when it is None."""
        from litellm.caching.caching import DualCache
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = ["email"]
        proxy_logging.email_logging_instance = None

        user_info = MagicMock()

        # Should not raise an error
        await proxy_logging.budget_alerts(
            type="organization_budget", user_info=user_info
        )

    async def test_budget_alerts_with_both_slack_and_email(self):
        """Test that budget_alerts calls both slack and email instances when both are in alerting."""
        from litellm.caching.caching import DualCache
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = ["slack", "email"]
        proxy_logging.slack_alerting_instance = AsyncMock()
        proxy_logging.email_logging_instance = AsyncMock()

        user_info = MagicMock()

        await proxy_logging.budget_alerts(type="proxy_budget", user_info=user_info)

        proxy_logging.slack_alerting_instance.budget_alerts.assert_called_once_with(
            type="proxy_budget", user_info=user_info
        )
        proxy_logging.email_logging_instance.budget_alerts.assert_called_once_with(
            type="proxy_budget", user_info=user_info
        )

    @pytest.mark.parametrize(
        "alert_type",
        [
            "token_budget",
            "user_budget",
            "soft_budget",
            "team_budget",
            "organization_budget",
            "proxy_budget",
            "projected_limit_exceeded",
        ],
    )
    async def test_budget_alerts_with_all_alert_types(self, alert_type):
        """Test that budget_alerts works with all supported alert types."""
        from litellm.caching.caching import DualCache
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = ["slack", "email"]
        proxy_logging.slack_alerting_instance = AsyncMock()
        proxy_logging.email_logging_instance = AsyncMock()

        user_info = MagicMock()

        await proxy_logging.budget_alerts(type=alert_type, user_info=user_info)

        proxy_logging.slack_alerting_instance.budget_alerts.assert_called_once_with(
            type=alert_type, user_info=user_info
        )
        proxy_logging.email_logging_instance.budget_alerts.assert_called_once_with(
            type=alert_type, user_info=user_info
        )

    async def test_budget_alerts_soft_budget_with_alert_emails_bypasses_alerting_none(
        self,
    ):
        """
        Test that soft_budget alerts with alert_emails bypass the alerting=None check
        and send emails even when alerting is None.

        This tests the new logic that allows team-specific soft budget email alerts
        via metadata.soft_budget_alerting_emails to work even when global alerting is disabled.
        """
        from litellm.caching.caching import DualCache
        from litellm.proxy._types import CallInfo, Litellm_EntityType
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = None  # Global alerting is disabled
        proxy_logging.slack_alerting_instance = AsyncMock()
        proxy_logging.email_logging_instance = AsyncMock()

        # Create CallInfo with alert_emails set (simulating team metadata extraction)
        user_info = CallInfo(
            token="test-token",
            spend=100.0,
            soft_budget=50.0,
            user_id="test-user",
            team_id="test-team",
            team_alias="test-team-alias",
            event_group=Litellm_EntityType.TEAM,
            alert_emails=["team1@example.com", "team2@example.com"],
        )

        # Should send email even though alerting is None (because of alert_emails)
        await proxy_logging.budget_alerts(type="soft_budget", user_info=user_info)

        # Verify slack was NOT called (alerting is None)
        proxy_logging.slack_alerting_instance.budget_alerts.assert_not_called()

        # Verify email WAS called (bypasses alerting=None check)
        proxy_logging.email_logging_instance.budget_alerts.assert_called_once_with(
            type="soft_budget", user_info=user_info
        )

    async def test_budget_alerts_soft_budget_without_alert_emails_respects_alerting_none(
        self,
    ):
        """
        Test that soft_budget alerts WITHOUT alert_emails still respect alerting=None
        and do not send emails when alerting is None.
        """
        from litellm.caching.caching import DualCache
        from litellm.proxy._types import CallInfo, Litellm_EntityType
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = None
        proxy_logging.slack_alerting_instance = AsyncMock()
        proxy_logging.email_logging_instance = AsyncMock()

        # Create CallInfo WITHOUT alert_emails
        user_info = CallInfo(
            token="test-token",
            spend=100.0,
            soft_budget=50.0,
            user_id="test-user",
            team_id="test-team",
            team_alias="test-team-alias",
            event_group=Litellm_EntityType.TEAM,
            alert_emails=None,  # No alert emails
        )

        # Should NOT send email (alerting is None and no alert_emails)
        await proxy_logging.budget_alerts(type="soft_budget", user_info=user_info)

        # Verify no calls were made
        proxy_logging.slack_alerting_instance.budget_alerts.assert_not_called()
        proxy_logging.email_logging_instance.budget_alerts.assert_not_called()

    async def test_budget_alerts_soft_budget_with_empty_alert_emails_respects_alerting_none(
        self,
    ):
        """
        Test that soft_budget alerts with empty alert_emails list still respect alerting=None.
        """
        from litellm.caching.caching import DualCache
        from litellm.proxy._types import CallInfo, Litellm_EntityType
        from litellm.proxy.utils import ProxyLogging

        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
        proxy_logging.alerting = None
        proxy_logging.slack_alerting_instance = AsyncMock()
        proxy_logging.email_logging_instance = AsyncMock()

        # Create CallInfo with empty alert_emails list
        user_info = CallInfo(
            token="test-token",
            spend=100.0,
            soft_budget=50.0,
            user_id="test-user",
            team_id="test-team",
            team_alias="test-team-alias",
            event_group=Litellm_EntityType.TEAM,
            alert_emails=[],  # Empty list
        )

        # Should NOT send email (alert_emails is empty)
        await proxy_logging.budget_alerts(type="soft_budget", user_info=user_info)

        # Verify no calls were made
        proxy_logging.slack_alerting_instance.budget_alerts.assert_not_called()
        proxy_logging.email_logging_instance.budget_alerts.assert_not_called()


def test_azure_ai_claude_provider_config():
    """Test that Azure AI Claude models return AzureAnthropicConfig for proper tool transformation."""
    from litellm import AzureAIStudioConfig, AzureAnthropicConfig
    from litellm.utils import ProviderConfigManager

    # Claude models should return AzureAnthropicConfig
    config = ProviderConfigManager.get_provider_chat_config(
        model="claude-sonnet-4-5",
        provider=LlmProviders.AZURE_AI,
    )
    assert isinstance(config, AzureAnthropicConfig)

    # Test case-insensitive matching
    config = ProviderConfigManager.get_provider_chat_config(
        model="Claude-Opus-4",
        provider=LlmProviders.AZURE_AI,
    )
    assert isinstance(config, AzureAnthropicConfig)

    # Non-Claude models should return AzureAIStudioConfig
    config = ProviderConfigManager.get_provider_chat_config(
        model="mistral-large",
        provider=LlmProviders.AZURE_AI,
    )
    assert isinstance(config, AzureAIStudioConfig)


# Tests for thinking blocks helper functions
# Related to issue: https://github.com/BerriAI/litellm/issues/18926


def test_any_assistant_message_has_thinking_blocks_with_thinking():
    """Test that function returns True when any assistant message has thinking_blocks."""
    from litellm.utils import any_assistant_message_has_thinking_blocks

    messages = [
        {"role": "user", "content": "Hello"},
        {
            "role": "assistant",
            "thinking_blocks": [{"type": "thinking", "thinking": "Let me think..."}],
            "tool_calls": [{"id": "123", "function": {"name": "test"}}],
        },
        {"role": "tool", "tool_call_id": "123", "content": "result"},
        {
            "role": "assistant",
            "tool_calls": [{"id": "456", "function": {"name": "test2"}}],
            # No thinking_blocks here - Claude sometimes doesn't include them
        },
    ]

    assert any_assistant_message_has_thinking_blocks(messages) is True


def test_any_assistant_message_has_thinking_blocks_without_thinking():
    """Test that function returns False when no assistant message has thinking_blocks."""
    from litellm.utils import any_assistant_message_has_thinking_blocks

    messages = [
        {"role": "user", "content": "Hello"},
        {
            "role": "assistant",
            "tool_calls": [{"id": "123", "function": {"name": "test"}}],
        },
        {"role": "tool", "tool_call_id": "123", "content": "result"},
    ]

    assert any_assistant_message_has_thinking_blocks(messages) is False


def test_any_assistant_message_has_thinking_blocks_empty_list():
    """Test that function returns False when thinking_blocks is an empty list."""
    from litellm.utils import any_assistant_message_has_thinking_blocks

    messages = [
        {"role": "user", "content": "Hello"},
        {
            "role": "assistant",
            "thinking_blocks": [],  # Empty list
            "tool_calls": [{"id": "123", "function": {"name": "test"}}],
        },
    ]

    assert any_assistant_message_has_thinking_blocks(messages) is False


def test_last_assistant_with_tool_calls_has_no_thinking_blocks_issue_18926():
    """
    Test the scenario from issue #18926 where:
    - First assistant message HAS thinking_blocks
    - Second assistant message has NO thinking_blocks

    The old logic would drop thinking because the LAST tool_call message
    has no thinking_blocks, but this breaks because the first message
    still has thinking blocks in the conversation.
    """
    from litellm.utils import (
        any_assistant_message_has_thinking_blocks,
        last_assistant_with_tool_calls_has_no_thinking_blocks,
    )

    messages = [
        {"role": "user", "content": "Build a feature"},
        {
            "role": "assistant",
            "thinking_blocks": [
                {"type": "thinking", "thinking": "Let me analyze the requirements..."}
            ],
            "tool_calls": [
                {
                    "id": "toolu_1",
                    "function": {"name": "file_editor", "arguments": "{}"},
                }
            ],
        },
        {
            "role": "tool",
            "tool_call_id": "toolu_1",
            "content": "File contents here...",
        },
        {
            "role": "assistant",
            # NO thinking_blocks - Claude sometimes doesn't include them
            "content": [{"type": "text", "text": "Let me explore more..."}],
            "tool_calls": [
                {
                    "id": "toolu_2",
                    "function": {"name": "file_editor", "arguments": "{}"},
                }
            ],
        },
    ]

    # Last assistant with tool_calls has no thinking_blocks
    assert last_assistant_with_tool_calls_has_no_thinking_blocks(messages) is True

    # But ANY assistant message has thinking_blocks
    assert any_assistant_message_has_thinking_blocks(messages) is True

    # So we should NOT drop thinking - the combination tells us thinking is in use
    # The fix uses both checks: only drop if last has none AND no message has any
    should_drop_thinking = last_assistant_with_tool_calls_has_no_thinking_blocks(
        messages
    ) and not any_assistant_message_has_thinking_blocks(messages)
    assert should_drop_thinking is False


class TestAdditionalDropParamsForNonOpenAIProviders:
    """
    Test additional_drop_params functionality for non-OpenAI providers.

    Fixes https://github.com/BerriAI/litellm/issues/19225

    The bug was that additional_drop_params only filtered params for OpenAI/Azure
    providers, but not for other providers like Bedrock. This caused OpenAI-specific
    params like prompt_cache_key to be passed to Bedrock, resulting in errors.
    """

    def test_additional_drop_params_filters_for_bedrock(self):
        """
        Test that additional_drop_params correctly filters params for Bedrock provider.

        Before the fix, prompt_cache_key would be passed through to Bedrock even when
        specified in additional_drop_params, causing:
        'BedrockException - {"message":"The model returned the following errors:
        prompt_cache_key: Extra inputs are not permitted"}'
        """
        from litellm.utils import add_provider_specific_params_to_optional_params

        optional_params = {}
        passed_params = {
            "prompt_cache_key": "test_key_123",
            "temperature": 0.7,
            "model": "bedrock/anthropic.claude-v2",
        }
        openai_params = ["temperature", "max_tokens", "top_p", "model"]

        result = add_provider_specific_params_to_optional_params(
            optional_params=optional_params,
            passed_params=passed_params,
            custom_llm_provider="bedrock",
            openai_params=openai_params,
            additional_drop_params=["prompt_cache_key"],
        )

        # prompt_cache_key should be filtered out
        assert "prompt_cache_key" not in result
        # temperature should still be there (it's in openai_params, not filtered)
        # Note: temperature is in openai_params so it won't be added by this function
        # The function only adds params NOT in openai_params

    def test_additional_drop_params_filters_multiple_params_for_non_openai(self):
        """Test filtering multiple params for non-OpenAI providers."""
        from litellm.utils import add_provider_specific_params_to_optional_params

        optional_params = {}
        passed_params = {
            "prompt_cache_key": "test_key",
            "some_openai_only_param": "value1",
            "another_openai_param": "value2",
            "keep_this_param": "keep_me",
        }
        openai_params = ["temperature", "max_tokens"]

        result = add_provider_specific_params_to_optional_params(
            optional_params=optional_params,
            passed_params=passed_params,
            custom_llm_provider="anthropic",
            openai_params=openai_params,
            additional_drop_params=["prompt_cache_key", "some_openai_only_param"],
        )

        # Filtered params should not be present
        assert "prompt_cache_key" not in result
        assert "some_openai_only_param" not in result
        # Non-filtered params should be present
        assert result.get("another_openai_param") == "value2"
        assert result.get("keep_this_param") == "keep_me"

    def test_additional_drop_params_none_keeps_all_params(self):
        """Test that when additional_drop_params is None, all params are kept."""
        from litellm.utils import add_provider_specific_params_to_optional_params

        optional_params = {}
        passed_params = {
            "prompt_cache_key": "test_key",
            "custom_param": "value",
        }
        openai_params = ["temperature"]

        result = add_provider_specific_params_to_optional_params(
            optional_params=optional_params,
            passed_params=passed_params,
            custom_llm_provider="bedrock",
            openai_params=openai_params,
            additional_drop_params=None,
        )

        # All params should be present when additional_drop_params is None
        assert result.get("prompt_cache_key") == "test_key"
        assert result.get("custom_param") == "value"

    def test_additional_drop_params_empty_list_keeps_all_params(self):
        """Test that when additional_drop_params is empty list, all params are kept."""
        from litellm.utils import add_provider_specific_params_to_optional_params

        optional_params = {}
        passed_params = {
            "prompt_cache_key": "test_key",
            "custom_param": "value",
        }
        openai_params = ["temperature"]

        result = add_provider_specific_params_to_optional_params(
            optional_params=optional_params,
            passed_params=passed_params,
            custom_llm_provider="bedrock",
            openai_params=openai_params,
            additional_drop_params=[],
        )

        # All params should be present when additional_drop_params is empty
        assert result.get("prompt_cache_key") == "test_key"
        assert result.get("custom_param") == "value"


class TestDropParamsWithPromptCacheKey:
    """
    Test that drop_params: true correctly drops prompt_cache_key for non-OpenAI providers.

    Fixes https://github.com/BerriAI/litellm/issues/19225

    prompt_cache_key is an OpenAI-specific parameter that should be automatically
    dropped when using providers like Bedrock that don't support it.
    """

    def test_prompt_cache_key_in_default_params(self):
        """Verify prompt_cache_key is now in DEFAULT_CHAT_COMPLETION_PARAM_VALUES."""
        from litellm.constants import DEFAULT_CHAT_COMPLETION_PARAM_VALUES

        assert "prompt_cache_key" in DEFAULT_CHAT_COMPLETION_PARAM_VALUES
        assert "prompt_cache_retention" in DEFAULT_CHAT_COMPLETION_PARAM_VALUES

    def test_drop_params_removes_prompt_cache_key_for_bedrock(self):
        """
        Test that get_optional_params with drop_params=True removes prompt_cache_key
        for Bedrock provider since it's not in Bedrock's supported params.
        """
        from litellm.utils import get_optional_params

        # Call get_optional_params for Bedrock with prompt_cache_key
        # drop_params=True should remove it since Bedrock doesn't support it
        result = get_optional_params(
            model="anthropic.claude-3-sonnet-20240229-v1:0",
            custom_llm_provider="bedrock",
            prompt_cache_key="test_cache_key",
            temperature=0.7,
            drop_params=True,
        )

        # prompt_cache_key should be dropped for Bedrock
        assert "prompt_cache_key" not in result
        # temperature should remain (it's supported by Bedrock)
        assert result.get("temperature") == 0.7


class TestGetOptionalParamsDeepSeek:
    """Tests that deepseek provider uses DeepSeekChatConfig for parameter mapping."""

    def test_deepseek_supports_thinking_param(self):
        """
        Verify that get_optional_params for deepseek accepts the 'thinking' param,
        which is only supported by DeepSeekChatConfig, not OpenAIConfig.
        """
        from litellm.utils import get_optional_params

        result = get_optional_params(
            model="deepseek-reasoner",
            custom_llm_provider="deepseek",
            thinking={"type": "enabled"},
        )
        assert result.get("thinking") == {"type": "enabled"}

    def test_deepseek_supports_reasoning_effort_param(self):
        """
        Verify that get_optional_params for deepseek accepts 'reasoning_effort',
        which is only supported by DeepSeekChatConfig, not OpenAIConfig.
        """
        from litellm.utils import get_optional_params

        result = get_optional_params(
            model="deepseek-reasoner",
            custom_llm_provider="deepseek",
            reasoning_effort="high",
        )
        assert result.get("thinking") == {"type": "enabled"}

    def test_deepseek_thinking_strips_budget_tokens(self):
        """
        DeepSeekChatConfig strips budget_tokens from thinking param.
        This would not happen with OpenAIConfig.
        """
        from litellm.utils import get_optional_params

        result = get_optional_params(
            model="deepseek-reasoner",
            custom_llm_provider="deepseek",
            thinking={"type": "enabled", "budget_tokens": 5000},
        )
        assert "budget_tokens" not in result.get("thinking", {})
        assert result.get("thinking") == {"type": "enabled"}


class TestIsStreamingRequest:
    def test_stream_true_in_kwargs(self):
        assert (
            _is_streaming_request(kwargs={"stream": True}, call_type="acompletion")
            is True
        )

    def test_stream_false_in_kwargs(self):
        assert (
            _is_streaming_request(kwargs={"stream": False}, call_type="acompletion")
            is False
        )

    def test_no_stream_in_kwargs(self):
        assert _is_streaming_request(kwargs={}, call_type="acompletion") is False

    def test_generate_content_stream_string(self):
        assert (
            _is_streaming_request(
                kwargs={}, call_type=CallTypes.generate_content_stream.value
            )
            is True
        )

    def test_agenerate_content_stream_string(self):
        assert (
            _is_streaming_request(
                kwargs={}, call_type=CallTypes.agenerate_content_stream.value
            )
            is True
        )

    def test_generate_content_stream_enum(self):
        assert (
            _is_streaming_request(
                kwargs={}, call_type=CallTypes.generate_content_stream
            )
            is True
        )

    def test_agenerate_content_stream_enum(self):
        assert (
            _is_streaming_request(
                kwargs={}, call_type=CallTypes.agenerate_content_stream
            )
            is True
        )

    def test_non_streaming_call_type_string(self):
        assert _is_streaming_request(kwargs={}, call_type="acompletion") is False

    def test_non_streaming_call_type_enum(self):
        assert (
            _is_streaming_request(kwargs={}, call_type=CallTypes.acompletion) is False
        )

    def test_stream_true_overrides_non_streaming_call_type(self):
        assert (
            _is_streaming_request(
                kwargs={"stream": True}, call_type=CallTypes.acompletion
            )
            is True
        )


class TestCallbackAsyncSyncSeparation:
    """Test that LoggingCallbackManager auto-routes async callbacks to async lists."""

    def setup_method(self):
        """Reset callback lists before each test."""
        litellm.input_callback = []
        litellm.success_callback = []
        litellm.failure_callback = []
        litellm._async_input_callback = []
        litellm._async_success_callback = []
        litellm._async_failure_callback = []

    def test_async_success_callback_routed_to_async_list(self):
        async def my_async_cb(*args, **kwargs):
            pass

        litellm.logging_callback_manager.add_litellm_success_callback(my_async_cb)
        assert my_async_cb in litellm._async_success_callback
        assert my_async_cb not in litellm.success_callback

    def test_sync_success_callback_stays_in_sync_list(self):
        def my_sync_cb(*args, **kwargs):
            pass

        litellm.logging_callback_manager.add_litellm_success_callback(my_sync_cb)
        assert my_sync_cb in litellm.success_callback
        assert my_sync_cb not in litellm._async_success_callback

    def test_string_callback_stays_in_sync_list(self):
        litellm.logging_callback_manager.add_litellm_success_callback("langfuse")
        assert "langfuse" in litellm.success_callback
        assert "langfuse" not in litellm._async_success_callback

    def test_async_failure_callback_routed_to_async_list(self):
        async def my_async_cb(*args, **kwargs):
            pass

        litellm.logging_callback_manager.add_litellm_failure_callback(my_async_cb)
        assert my_async_cb in litellm._async_failure_callback
        assert my_async_cb not in litellm.failure_callback

    def test_sync_failure_callback_stays_in_sync_list(self):
        def my_sync_cb(*args, **kwargs):
            pass

        litellm.logging_callback_manager.add_litellm_failure_callback(my_sync_cb)
        assert my_sync_cb in litellm.failure_callback
        assert my_sync_cb not in litellm._async_failure_callback

    def test_dynamodb_routed_to_async_success(self):
        litellm.logging_callback_manager.add_litellm_success_callback("dynamodb")
        assert "dynamodb" in litellm._async_success_callback
        assert "dynamodb" not in litellm.success_callback

    def test_openmeter_routed_to_async_success(self):
        litellm.logging_callback_manager.add_litellm_success_callback("openmeter")
        assert "openmeter" in litellm._async_success_callback
        assert "openmeter" not in litellm.success_callback

    def test_async_input_callback_routed_to_async_list(self):
        async def my_async_cb(*args, **kwargs):
            pass

        litellm.logging_callback_manager.add_litellm_input_callback(my_async_cb)
        assert my_async_cb in litellm._async_input_callback
        assert my_async_cb not in litellm.input_callback

    def test_sync_input_callback_stays_in_sync_list(self):
        def my_sync_cb(*args, **kwargs):
            pass

        litellm.logging_callback_manager.add_litellm_input_callback(my_sync_cb)
        assert my_sync_cb in litellm.input_callback
        assert my_sync_cb not in litellm._async_input_callback


class TestMetadataNoneHandling:
    """
    Test that metadata=None in kwargs doesn't cause TypeError.

    When metadata key exists with value None (e.g., from Azure OpenAI streaming),
    dict.get("metadata", {}) returns None (key exists, so default is ignored).
    The fix uses (kwargs.get("metadata") or {}) which handles both missing key
    and explicit None value.

    Related: #20871
    """

    def test_metadata_none_get_previous_models(self):
        """kwargs.get("metadata") or {} should return {} when metadata is None."""
        kwargs = {"metadata": None}
        previous_models = (kwargs.get("metadata") or {}).get("previous_models", None)
        assert previous_models is None

    def test_metadata_none_model_group_check(self):
        """'model_group' in (kwargs.get("metadata") or {}) should not raise TypeError."""
        kwargs = {"metadata": None}
        _is_litellm_router_call = "model_group" in (kwargs.get("metadata") or {})
        assert _is_litellm_router_call is False

    def test_metadata_missing_key(self):
        """Should work when metadata key is completely absent."""
        kwargs = {}
        previous_models = (kwargs.get("metadata") or {}).get("previous_models", None)
        assert previous_models is None

    def test_metadata_present_with_values(self):
        """Should work when metadata has actual values."""
        kwargs = {"metadata": {"previous_models": ["model1"], "model_group": "test"}}
        previous_models = (kwargs.get("metadata") or {}).get("previous_models", None)
        assert previous_models == ["model1"]
        _is_litellm_router_call = "model_group" in (kwargs.get("metadata") or {})
        assert _is_litellm_router_call is True

    def test_metadata_none_causes_error_with_old_pattern(self):
        """Demonstrate the bug: dict.get('metadata', {}) returns None when key exists with None value."""
        kwargs = {"metadata": None}
        # Old pattern: kwargs.get("metadata", {}) returns None because key exists
        result = kwargs.get("metadata", {})
        assert result is None  # This is the root cause of the bug

        # Attempting to use .get() on None raises AttributeError or TypeError
        with pytest.raises((TypeError, AttributeError)):
            kwargs.get("metadata", {}).get("previous_models", None)

        # Attempting 'in' on None raises TypeError
        with pytest.raises(TypeError):
            "model_group" in kwargs.get("metadata", {})

    def test_litellm_params_metadata_none(self):
        """litellm_params.get("metadata") or {} should handle None value."""
        litellm_params = {"metadata": None}
        metadata = litellm_params.get("metadata") or {}
        assert metadata == {}


class TestValidateAndFixThinkingParam:
    """Tests for validate_and_fix_thinking_param."""

    def test_none_returns_none(self):
        from litellm.utils import validate_and_fix_thinking_param

        assert validate_and_fix_thinking_param(thinking=None) is None

    def test_already_snake_case(self):
        from litellm.utils import validate_and_fix_thinking_param

        thinking = {"type": "enabled", "budget_tokens": 32000}
        result = validate_and_fix_thinking_param(thinking=thinking)
        assert result == {"type": "enabled", "budget_tokens": 32000}

    def test_camel_case_normalized(self):
        from litellm.utils import validate_and_fix_thinking_param

        thinking = {"type": "enabled", "budgetTokens": 32000}
        result = validate_and_fix_thinking_param(thinking=thinking)
        assert result == {"type": "enabled", "budget_tokens": 32000}
        assert "budgetTokens" not in result

    def test_both_keys_snake_case_wins(self):
        from litellm.utils import validate_and_fix_thinking_param

        thinking = {"type": "enabled", "budget_tokens": 10000, "budgetTokens": 50000}
        result = validate_and_fix_thinking_param(thinking=thinking)
        assert result == {"type": "enabled", "budget_tokens": 10000}
        assert "budgetTokens" not in result

    def test_original_dict_not_mutated(self):
        from litellm.utils import validate_and_fix_thinking_param

        thinking = {"type": "enabled", "budgetTokens": 32000}
        validate_and_fix_thinking_param(thinking=thinking)
        assert "budgetTokens" in thinking
        assert "budget_tokens" not in thinking