From 0d09c8ec96d1e2b55a2cbdea8fa95cf109db6ef1 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Jun 2025 21:24:36 -0700
Subject: [PATCH] Litellm dev 06 18 2025 p1 (#11872)

* fix(spend_tracking_utils.py): add user agent tags from standard logging payload, in spend logs payload

* feat(litellm_logging.py): identify user agent tags as `User-Agent: ..` and allow admin to disable storing user agent as tag

* fix(azure_ai/): pass content type header in azure ai request

Fixes https://github.com/BerriAI/litellm/issues/11227

* test: add unit test

* fix(router.py): fix passing dynamic credentials to retrieve batch

Fixes batch retrieval when using router

* test: add more unit tests
---
 litellm/__init__.py                           |  1 +
 litellm/litellm_core_utils/litellm_logging.py |  6 ++--
 litellm/llms/azure_ai/chat/transformation.py  |  4 +++
 litellm/llms/custom_httpx/llm_http_handler.py |  5 ++-
 litellm/proxy/_new_secret_config.yaml         | 23 +++++++-----
 .../spend_tracking/spend_tracking_utils.py    | 11 ++++--
 litellm/router.py                             | 19 +++++++---
 .../litellm_logging_code_coverage.py          |  8 ++---
 tests/llm_translation/test_azure_ai.py        |  2 ++
 .../test_litellm_logging.py                   | 32 +++++++++++++++++
 .../chat/test_azure_ai_transformation.py      | 12 +++++++
 tests/test_litellm/test_router.py             | 36 +++++++++++++++++++
 12 files changed, 137 insertions(+), 22 deletions(-)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index 562c8634af..21aa2c8185 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -220,6 +220,7 @@ ssl_certificate: Optional[str] = None
 disable_streaming_logging: bool = False
 disable_token_counter: bool = False
 disable_add_transform_inline_image_block: bool = False
+disable_add_user_agent_to_request_tags: bool = False
 in_memory_llm_clients_cache: LLMClientCache = LLMClientCache()
 safe_memory_mode: bool = False
 enable_azure_ad_token_refresh: Optional[bool] = False
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 92d1def3cf..4523342a09 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -3820,6 +3820,8 @@ class StandardLoggingPayloadSetup:
         """
         Return the user agent tags from the proxy server request for spend tracking
         """
+        if litellm.disable_add_user_agent_to_request_tags is True:
+            return None
         user_agent_tags: Optional[List[str]] = None
         headers = proxy_server_request.get("headers", {})
         if headers is not None and isinstance(headers, dict):
@@ -3832,9 +3834,9 @@ class StandardLoggingPayloadSetup:
                     if "/" in user_agent:
                         user_agent_part = user_agent.split("/")[0]
                     if user_agent_part is not None:
-                        user_agent_tags.append(user_agent_part)
+                        user_agent_tags.append("User-Agent: " + user_agent_part)
                     if user_agent is not None:
-                        user_agent_tags.append(user_agent)
+                        user_agent_tags.append("User-Agent: " + user_agent)
         return user_agent_tags
 
     @staticmethod
diff --git a/litellm/llms/azure_ai/chat/transformation.py b/litellm/llms/azure_ai/chat/transformation.py
index 1adc56804f..7eb7b767d0 100644
--- a/litellm/llms/azure_ai/chat/transformation.py
+++ b/litellm/llms/azure_ai/chat/transformation.py
@@ -53,6 +53,10 @@ class AzureAIStudioConfig(OpenAIConfig):
         else:
             headers["Authorization"] = f"Bearer {api_key}"
 
+        headers["Content-Type"] = (
+            "application/json"  # tell Azure AI Studio to expect JSON
+        )
+
         return headers
 
     def _should_use_api_key_header(self, api_base: str) -> bool:
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index bb72211375..3a5490eee4 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -2447,7 +2447,10 @@ class BaseLLMHTTPHandler:
         _is_async: bool = False,
         fake_stream: bool = False,
         litellm_metadata: Optional[Dict[str, Any]] = None,
-    ) -> Union[ImageResponse, Coroutine[Any, Any, ImageResponse],]:
+    ) -> Union[
+        ImageResponse,
+        Coroutine[Any, Any, ImageResponse],
+    ]:
         """
 
         Handles image edit requests.
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 107769c26a..1f907035a3 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -3,20 +3,17 @@ model_list:
     litellm_params:
       model: codex-mini-latest
       api_key: os.environ/OPENAI_API_KEY
-  - model_name: "gemini/gemini-2.5-flash-preview-tts"
-    litellm_params:
-      model: gemini/gemini-2.5-flash-preview-tts
-      api_key: os.environ/GEMINI_API_KEY
-  - model_name: "gpt-4o-mini-tts"
-    litellm_params:
-      model: openai/gpt-4o-mini-tts
-      api_key: os.environ/OPENAI_API_KEY
   - model_name: "gpt-4o-mini-openai"
     litellm_params:
       model: gpt-4o-mini
       api_key: os.environ/OPENAI_API_KEY
     model_info:
       access_groups: ["beta-models"] # 👈 Model Access Group
+  - model_name: azure_ai/Phi-3-medium
+    litellm_params:
+      model: azure_ai/Phi-3-medium
+      api_key: os.environ/AZURE_AI_PHI_3_MEDIUM_API_KEY
+      api_base: os.environ/AZURE_AI_PHI_3_MEDIUM_API_BASE
   - model_name: "bedrock-nova"
     litellm_params:
       model: us.amazon.nova-pro-v1:0
@@ -105,6 +102,16 @@ general_settings:
   token_rate_limit_type: "output"
   # master_key: os.environ/PROXY_MASTER_KEY
 
+litellm_settings:
+  # cache: true
+  # cache_params:
+  #   type: redis
+  #   ttl: 600
+  #   password: os.environ/REDIS_PASSWORD
+  #   supported_call_types: ["acompletion", "completion"]
+  callbacks: ["prometheus", "langfuse"]
+    
+
 
 
 
diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py
index c780541eb1..ad82ee7dca 100644
--- a/litellm/proxy/spend_tracking/spend_tracking_utils.py
+++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py
@@ -91,9 +91,9 @@ def _get_spend_logs_metadata(
     clean_metadata["applied_guardrails"] = applied_guardrails
     clean_metadata["batch_models"] = batch_models
     clean_metadata["mcp_tool_call_metadata"] = mcp_tool_call_metadata
-    clean_metadata[
-        "vector_store_request_metadata"
-    ] = _get_vector_store_request_for_spend_logs_payload(vector_store_request_metadata)
+    clean_metadata["vector_store_request_metadata"] = (
+        _get_vector_store_request_for_spend_logs_payload(vector_store_request_metadata)
+    )
     clean_metadata["guardrail_information"] = guardrail_information
     clean_metadata["usage_object"] = usage_object
     clean_metadata["model_map_information"] = model_map_information
@@ -212,6 +212,11 @@ def get_logging_payload(  # noqa: PLR0915
         if isinstance(metadata.get("tags", []), list)
         else "[]"
     )
+    if (
+        standard_logging_payload is not None
+        and standard_logging_payload.get("request_tags") is not None
+    ):  # use 'tags' from standard logging payload instead
+        request_tags = json.dumps(standard_logging_payload["request_tags"])
     if (
         _is_master_key(api_key=api_key, _master_key=master_key)
         and general_settings.get("disable_adding_master_key_hash_to_db") is True
diff --git a/litellm/router.py b/litellm/router.py
index cc71b48110..e7297b0d41 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -3038,15 +3038,18 @@ class Router:
             async def try_retrieve_batch(model_name: DeploymentTypedDict):
                 try:
                     model = model_name["litellm_params"].get("model")
+                    data = model_name["litellm_params"].copy()
+                    custom_llm_provider = data.get("custom_llm_provider")
                     if model is None:
                         raise Exception(
                             f"Model not found in litellm_params for deployment: {model_name}"
                         )
                     # Update kwargs with the current model name or any other model-specific adjustments
                     ## SET CUSTOM PROVIDER TO SELECTED DEPLOYMENT ##
-                    _, custom_llm_provider, _, _ = get_llm_provider(  # type: ignore
-                        model=model
-                    )
+                    if not custom_llm_provider:
+                        _, custom_llm_provider, _, _ = get_llm_provider(  # type: ignore
+                            model=model
+                        )
                     new_kwargs = copy.deepcopy(kwargs)
                     self._update_kwargs_with_deployment(
                         deployment=cast(dict, model_name),
@@ -3054,10 +3057,18 @@ class Router:
                         function_name="aretrieve_batch",
                     )
                     new_kwargs.pop("custom_llm_provider", None)
+                    data.pop("custom_llm_provider", None)
                     return await litellm.aretrieve_batch(
-                        custom_llm_provider=custom_llm_provider, **new_kwargs  # type: ignore
+                        **{
+                            **data,
+                            "custom_llm_provider": custom_llm_provider,
+                            **new_kwargs,  # type: ignore
+                        },
                     )
                 except Exception as e:
+                    import traceback
+
+                    traceback.print_exc()
                     receieved_exceptions.append(e)
                     return None
 
diff --git a/tests/code_coverage_tests/litellm_logging_code_coverage.py b/tests/code_coverage_tests/litellm_logging_code_coverage.py
index 9825cfba1e..eb927e006e 100644
--- a/tests/code_coverage_tests/litellm_logging_code_coverage.py
+++ b/tests/code_coverage_tests/litellm_logging_code_coverage.py
@@ -64,12 +64,12 @@ ignored_function_names = [
 
 
 def main():
-    logging_file = "./litellm/litellm_core_utils/litellm_logging.py"
-    tests_dir = "./tests/"
+    # logging_file = "./litellm/litellm_core_utils/litellm_logging.py"
+    # tests_dir = "./tests/"
 
     # LOCAL TESTING
-    # logging_file = "../../litellm/litellm_core_utils/litellm_logging.py"
-    # tests_dir = "../../tests/"
+    logging_file = "../../litellm/litellm_core_utils/litellm_logging.py"
+    tests_dir = "../../tests/"
 
     logging_functions = get_function_names_from_file(logging_file)
     print("logging_functions:", logging_functions)
diff --git a/tests/llm_translation/test_azure_ai.py b/tests/llm_translation/test_azure_ai.py
index 09596e147f..3873aa7abf 100644
--- a/tests/llm_translation/test_azure_ai.py
+++ b/tests/llm_translation/test_azure_ai.py
@@ -14,6 +14,7 @@ from litellm.llms.anthropic.chat import ModelResponseIterator
 import httpx
 import json
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
 # from base_rerank_unit_tests import BaseLLMRerankTest
 
 load_dotenv()
@@ -184,6 +185,7 @@ def test_azure_ai_services_with_api_version():
             == "https://litellm8397336933.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview"
         )
 
+
 @pytest.mark.skip(reason="Skipping due to cohere ssl issues")
 def test_completion_azure_ai_command_r():
     try:
diff --git a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py
index 0637594a3d..9d95749827 100644
--- a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py
+++ b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py
@@ -146,3 +146,35 @@ async def test_logging_non_streaming_request():
             "kwargs"
         ]["standard_logging_object"]
         assert standard_logging_object["stream"] is not True
+
+
+def test_get_user_agent_tags():
+    from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
+
+    tags = StandardLoggingPayloadSetup._get_user_agent_tags(
+        proxy_server_request={
+            "headers": {
+                "user-agent": "litellm/0.1.0",
+            }
+        }
+    )
+
+    assert "User-Agent: litellm" in tags
+    assert "User-Agent: litellm/0.1.0" in tags
+
+
+def test_get_request_tags():
+    from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
+
+    tags = StandardLoggingPayloadSetup._get_request_tags(
+        metadata={"tags": ["test-tag"]},
+        proxy_server_request={
+            "headers": {
+                "user-agent": "litellm/0.1.0",
+            }
+        },
+    )
+
+    assert "test-tag" in tags
+    assert "User-Agent: litellm" in tags
+    assert "User-Agent: litellm/0.1.0" in tags
diff --git a/tests/test_litellm/llms/azure_ai/chat/test_azure_ai_transformation.py b/tests/test_litellm/llms/azure_ai/chat/test_azure_ai_transformation.py
index ffb485c368..7076b24405 100644
--- a/tests/test_litellm/llms/azure_ai/chat/test_azure_ai_transformation.py
+++ b/tests/test_litellm/llms/azure_ai/chat/test_azure_ai_transformation.py
@@ -31,3 +31,15 @@ async def test_get_openai_compatible_provider_info():
     )
 
     assert custom_llm_provider == "azure"
+
+
+def test_azure_ai_validate_environment():
+    config = AzureAIStudioConfig()
+    headers = config.validate_environment(
+        headers={},
+        model="azure_ai/gpt-4o-mini",
+        messages=[],
+        optional_params={},
+        litellm_params={},
+    )
+    assert headers["Content-Type"] == "application/json"
diff --git a/tests/test_litellm/test_router.py b/tests/test_litellm/test_router.py
index bb266dfd4b..eb6c740818 100644
--- a/tests/test_litellm/test_router.py
+++ b/tests/test_litellm/test_router.py
@@ -348,3 +348,39 @@ def test_router_ignore_invalid_deployments():
     )
 
     assert router.get_model_list() == []
+
+
+@pytest.mark.asyncio
+async def test_router_aretrieve_batch():
+    """
+    Test that router.aretrieve_batch returns the correct response
+    """
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "custom_llm_provider": "azure",
+                    "api_key": "my-custom-key",
+                    "api_base": "my-custom-base",
+                },
+            }
+        ],
+    )
+
+    with patch.object(
+        litellm, "aretrieve_batch", return_value=AsyncMock()
+    ) as mock_aretrieve_batch:
+        try:
+            response = await router.aretrieve_batch(
+                model="gpt-3.5-turbo",
+            )
+        except Exception as e:
+            print(f"Error: {e}")
+
+        mock_aretrieve_batch.assert_called_once()
+
+        print(mock_aretrieve_batch.call_args.kwargs)
+        assert mock_aretrieve_batch.call_args.kwargs["api_key"] == "my-custom-key"
+        assert mock_aretrieve_batch.call_args.kwargs["api_base"] == "my-custom-base"