From 0d09c8ec96d1e2b55a2cbdea8fa95cf109db6ef1 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Wed, 18 Jun 2025 21:24:36 -0700 Subject: [PATCH] Litellm dev 06 18 2025 p1 (#11872) * fix(spend_tracking_utils.py): add user agent tags from standard logging payload, in spend logs payload * feat(litellm_logging.py): identify user agent tags as `User-Agent: ..` and allow admin to disable storing user agent as tag * fix(azure_ai/): pass content type header in azure ai request Fixes https://github.com/BerriAI/litellm/issues/11227 * test: add unit test * fix(router.py): fix passing dynamic credentials to retrieve batch Fixes batch retrieval when using router * test: add more unit tests --- litellm/__init__.py | 1 + litellm/litellm_core_utils/litellm_logging.py | 6 ++-- litellm/llms/azure_ai/chat/transformation.py | 4 +++ litellm/llms/custom_httpx/llm_http_handler.py | 5 ++- litellm/proxy/_new_secret_config.yaml | 23 +++++++----- .../spend_tracking/spend_tracking_utils.py | 11 ++++-- litellm/router.py | 19 +++++++--- .../litellm_logging_code_coverage.py | 8 ++--- tests/llm_translation/test_azure_ai.py | 2 ++ .../test_litellm_logging.py | 32 +++++++++++++++++ .../chat/test_azure_ai_transformation.py | 12 +++++++ tests/test_litellm/test_router.py | 36 +++++++++++++++++++ 12 files changed, 137 insertions(+), 22 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index 562c8634af..21aa2c8185 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -220,6 +220,7 @@ ssl_certificate: Optional[str] = None disable_streaming_logging: bool = False disable_token_counter: bool = False disable_add_transform_inline_image_block: bool = False +disable_add_user_agent_to_request_tags: bool = False in_memory_llm_clients_cache: LLMClientCache = LLMClientCache() safe_memory_mode: bool = False enable_azure_ad_token_refresh: Optional[bool] = False diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 92d1def3cf..4523342a09 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -3820,6 +3820,8 @@ class StandardLoggingPayloadSetup: """ Return the user agent tags from the proxy server request for spend tracking """ + if litellm.disable_add_user_agent_to_request_tags is True: + return None user_agent_tags: Optional[List[str]] = None headers = proxy_server_request.get("headers", {}) if headers is not None and isinstance(headers, dict): @@ -3832,9 +3834,9 @@ class StandardLoggingPayloadSetup: if "/" in user_agent: user_agent_part = user_agent.split("/")[0] if user_agent_part is not None: - user_agent_tags.append(user_agent_part) + user_agent_tags.append("User-Agent: " + user_agent_part) if user_agent is not None: - user_agent_tags.append(user_agent) + user_agent_tags.append("User-Agent: " + user_agent) return user_agent_tags @staticmethod diff --git a/litellm/llms/azure_ai/chat/transformation.py b/litellm/llms/azure_ai/chat/transformation.py index 1adc56804f..7eb7b767d0 100644 --- a/litellm/llms/azure_ai/chat/transformation.py +++ b/litellm/llms/azure_ai/chat/transformation.py @@ -53,6 +53,10 @@ class AzureAIStudioConfig(OpenAIConfig): else: headers["Authorization"] = f"Bearer {api_key}" + headers["Content-Type"] = ( + "application/json" # tell Azure AI Studio to expect JSON + ) + return headers def _should_use_api_key_header(self, api_base: str) -> bool: diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index bb72211375..3a5490eee4 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -2447,7 +2447,10 @@ class BaseLLMHTTPHandler: _is_async: bool = False, fake_stream: bool = False, litellm_metadata: Optional[Dict[str, Any]] = None, - ) -> Union[ImageResponse, Coroutine[Any, Any, ImageResponse],]: + ) -> Union[ + ImageResponse, + Coroutine[Any, Any, ImageResponse], + ]: """ Handles image edit requests. diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 107769c26a..1f907035a3 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -3,20 +3,17 @@ model_list: litellm_params: model: codex-mini-latest api_key: os.environ/OPENAI_API_KEY - - model_name: "gemini/gemini-2.5-flash-preview-tts" - litellm_params: - model: gemini/gemini-2.5-flash-preview-tts - api_key: os.environ/GEMINI_API_KEY - - model_name: "gpt-4o-mini-tts" - litellm_params: - model: openai/gpt-4o-mini-tts - api_key: os.environ/OPENAI_API_KEY - model_name: "gpt-4o-mini-openai" litellm_params: model: gpt-4o-mini api_key: os.environ/OPENAI_API_KEY model_info: access_groups: ["beta-models"] # 👈 Model Access Group + - model_name: azure_ai/Phi-3-medium + litellm_params: + model: azure_ai/Phi-3-medium + api_key: os.environ/AZURE_AI_PHI_3_MEDIUM_API_KEY + api_base: os.environ/AZURE_AI_PHI_3_MEDIUM_API_BASE - model_name: "bedrock-nova" litellm_params: model: us.amazon.nova-pro-v1:0 @@ -105,6 +102,16 @@ general_settings: token_rate_limit_type: "output" # master_key: os.environ/PROXY_MASTER_KEY +litellm_settings: + # cache: true + # cache_params: + # type: redis + # ttl: 600 + # password: os.environ/REDIS_PASSWORD + # supported_call_types: ["acompletion", "completion"] + callbacks: ["prometheus", "langfuse"] + + diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py index c780541eb1..ad82ee7dca 100644 --- a/litellm/proxy/spend_tracking/spend_tracking_utils.py +++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py @@ -91,9 +91,9 @@ def _get_spend_logs_metadata( clean_metadata["applied_guardrails"] = applied_guardrails clean_metadata["batch_models"] = batch_models clean_metadata["mcp_tool_call_metadata"] = mcp_tool_call_metadata - clean_metadata[ - "vector_store_request_metadata" - ] = _get_vector_store_request_for_spend_logs_payload(vector_store_request_metadata) + clean_metadata["vector_store_request_metadata"] = ( + _get_vector_store_request_for_spend_logs_payload(vector_store_request_metadata) + ) clean_metadata["guardrail_information"] = guardrail_information clean_metadata["usage_object"] = usage_object clean_metadata["model_map_information"] = model_map_information @@ -212,6 +212,11 @@ def get_logging_payload( # noqa: PLR0915 if isinstance(metadata.get("tags", []), list) else "[]" ) + if ( + standard_logging_payload is not None + and standard_logging_payload.get("request_tags") is not None + ): # use 'tags' from standard logging payload instead + request_tags = json.dumps(standard_logging_payload["request_tags"]) if ( _is_master_key(api_key=api_key, _master_key=master_key) and general_settings.get("disable_adding_master_key_hash_to_db") is True diff --git a/litellm/router.py b/litellm/router.py index cc71b48110..e7297b0d41 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3038,15 +3038,18 @@ class Router: async def try_retrieve_batch(model_name: DeploymentTypedDict): try: model = model_name["litellm_params"].get("model") + data = model_name["litellm_params"].copy() + custom_llm_provider = data.get("custom_llm_provider") if model is None: raise Exception( f"Model not found in litellm_params for deployment: {model_name}" ) # Update kwargs with the current model name or any other model-specific adjustments ## SET CUSTOM PROVIDER TO SELECTED DEPLOYMENT ## - _, custom_llm_provider, _, _ = get_llm_provider( # type: ignore - model=model - ) + if not custom_llm_provider: + _, custom_llm_provider, _, _ = get_llm_provider( # type: ignore + model=model + ) new_kwargs = copy.deepcopy(kwargs) self._update_kwargs_with_deployment( deployment=cast(dict, model_name), @@ -3054,10 +3057,18 @@ class Router: function_name="aretrieve_batch", ) new_kwargs.pop("custom_llm_provider", None) + data.pop("custom_llm_provider", None) return await litellm.aretrieve_batch( - custom_llm_provider=custom_llm_provider, **new_kwargs # type: ignore + **{ + **data, + "custom_llm_provider": custom_llm_provider, + **new_kwargs, # type: ignore + }, ) except Exception as e: + import traceback + + traceback.print_exc() receieved_exceptions.append(e) return None diff --git a/tests/code_coverage_tests/litellm_logging_code_coverage.py b/tests/code_coverage_tests/litellm_logging_code_coverage.py index 9825cfba1e..eb927e006e 100644 --- a/tests/code_coverage_tests/litellm_logging_code_coverage.py +++ b/tests/code_coverage_tests/litellm_logging_code_coverage.py @@ -64,12 +64,12 @@ ignored_function_names = [ def main(): - logging_file = "./litellm/litellm_core_utils/litellm_logging.py" - tests_dir = "./tests/" + # logging_file = "./litellm/litellm_core_utils/litellm_logging.py" + # tests_dir = "./tests/" # LOCAL TESTING - # logging_file = "../../litellm/litellm_core_utils/litellm_logging.py" - # tests_dir = "../../tests/" + logging_file = "../../litellm/litellm_core_utils/litellm_logging.py" + tests_dir = "../../tests/" logging_functions = get_function_names_from_file(logging_file) print("logging_functions:", logging_functions) diff --git a/tests/llm_translation/test_azure_ai.py b/tests/llm_translation/test_azure_ai.py index 09596e147f..3873aa7abf 100644 --- a/tests/llm_translation/test_azure_ai.py +++ b/tests/llm_translation/test_azure_ai.py @@ -14,6 +14,7 @@ from litellm.llms.anthropic.chat import ModelResponseIterator import httpx import json from litellm.llms.custom_httpx.http_handler import HTTPHandler + # from base_rerank_unit_tests import BaseLLMRerankTest load_dotenv() @@ -184,6 +185,7 @@ def test_azure_ai_services_with_api_version(): == "https://litellm8397336933.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview" ) + @pytest.mark.skip(reason="Skipping due to cohere ssl issues") def test_completion_azure_ai_command_r(): try: diff --git a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py index 0637594a3d..9d95749827 100644 --- a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py +++ b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py @@ -146,3 +146,35 @@ async def test_logging_non_streaming_request(): "kwargs" ]["standard_logging_object"] assert standard_logging_object["stream"] is not True + + +def test_get_user_agent_tags(): + from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup + + tags = StandardLoggingPayloadSetup._get_user_agent_tags( + proxy_server_request={ + "headers": { + "user-agent": "litellm/0.1.0", + } + } + ) + + assert "User-Agent: litellm" in tags + assert "User-Agent: litellm/0.1.0" in tags + + +def test_get_request_tags(): + from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup + + tags = StandardLoggingPayloadSetup._get_request_tags( + metadata={"tags": ["test-tag"]}, + proxy_server_request={ + "headers": { + "user-agent": "litellm/0.1.0", + } + }, + ) + + assert "test-tag" in tags + assert "User-Agent: litellm" in tags + assert "User-Agent: litellm/0.1.0" in tags diff --git a/tests/test_litellm/llms/azure_ai/chat/test_azure_ai_transformation.py b/tests/test_litellm/llms/azure_ai/chat/test_azure_ai_transformation.py index ffb485c368..7076b24405 100644 --- a/tests/test_litellm/llms/azure_ai/chat/test_azure_ai_transformation.py +++ b/tests/test_litellm/llms/azure_ai/chat/test_azure_ai_transformation.py @@ -31,3 +31,15 @@ async def test_get_openai_compatible_provider_info(): ) assert custom_llm_provider == "azure" + + +def test_azure_ai_validate_environment(): + config = AzureAIStudioConfig() + headers = config.validate_environment( + headers={}, + model="azure_ai/gpt-4o-mini", + messages=[], + optional_params={}, + litellm_params={}, + ) + assert headers["Content-Type"] == "application/json" diff --git a/tests/test_litellm/test_router.py b/tests/test_litellm/test_router.py index bb266dfd4b..eb6c740818 100644 --- a/tests/test_litellm/test_router.py +++ b/tests/test_litellm/test_router.py @@ -348,3 +348,39 @@ def test_router_ignore_invalid_deployments(): ) assert router.get_model_list() == [] + + +@pytest.mark.asyncio +async def test_router_aretrieve_batch(): + """ + Test that router.aretrieve_batch returns the correct response + """ + router = litellm.Router( + model_list=[ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "custom_llm_provider": "azure", + "api_key": "my-custom-key", + "api_base": "my-custom-base", + }, + } + ], + ) + + with patch.object( + litellm, "aretrieve_batch", return_value=AsyncMock() + ) as mock_aretrieve_batch: + try: + response = await router.aretrieve_batch( + model="gpt-3.5-turbo", + ) + except Exception as e: + print(f"Error: {e}") + + mock_aretrieve_batch.assert_called_once() + + print(mock_aretrieve_batch.call_args.kwargs) + assert mock_aretrieve_batch.call_args.kwargs["api_key"] == "my-custom-key" + assert mock_aretrieve_batch.call_args.kwargs["api_base"] == "my-custom-base"