From c402db905725308948d3b4b8bfeb097e73f5c8a5 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 4 Apr 2025 17:07:43 -0700
Subject: [PATCH 1/4] prometheus emit llm provider on failure metric

---
 litellm/integrations/prometheus.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 5ac8c80eb3..205e1f0c6b 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -818,7 +818,7 @@ class PrometheusLogger(CustomLogger):
                 requested_model=request_data.get("model", ""),
                 status_code=str(getattr(original_exception, "status_code", None)),
                 exception_status=str(getattr(original_exception, "status_code", None)),
-                exception_class=str(original_exception.__class__.__name__),
+                exception_class=self._get_exception_class_name(original_exception),
                 tags=_tags,
             )
             _labels = prometheus_label_factory(
@@ -917,7 +917,7 @@ class PrometheusLogger(CustomLogger):
                 api_base=api_base,
                 api_provider=llm_provider,
                 exception_status=str(getattr(exception, "status_code", None)),
-                exception_class=exception.__class__.__name__,
+                exception_class=self._get_exception_class_name(exception),
                 requested_model=model_group,
                 hashed_api_key=standard_logging_payload["metadata"][
                     "user_api_key_hash"
@@ -1146,6 +1146,20 @@ class PrometheusLogger(CustomLogger):
             )
             return
 
+    @staticmethod
+    def _get_exception_class_name(exception: Exception) -> str:
+        exception_class_name = getattr(exception, "llm_provider") or ""
+
+        # pretty print the provider name on prometheus
+        # eg. `openai` -> `Openai.`
+        if len(exception_class_name) >= 1:
+            exception_class_name = (
+                exception_class_name[0].upper() + exception_class_name[1:] + "."
+            )
+
+        exception_class_name += exception.__class__.__name__
+        return exception_class_name
+
     async def log_success_fallback_event(
         self, original_model_group: str, kwargs: dict, original_exception: Exception
     ):
@@ -1181,7 +1195,7 @@ class PrometheusLogger(CustomLogger):
             team=standard_metadata["user_api_key_team_id"],
             team_alias=standard_metadata["user_api_key_team_alias"],
             exception_status=str(getattr(original_exception, "status_code", None)),
-            exception_class=str(original_exception.__class__.__name__),
+            exception_class=self._get_exception_class_name(original_exception),
             tags=_tags,
         )
         _labels = prometheus_label_factory(
@@ -1225,7 +1239,7 @@ class PrometheusLogger(CustomLogger):
             team=standard_metadata["user_api_key_team_id"],
             team_alias=standard_metadata["user_api_key_team_alias"],
             exception_status=str(getattr(original_exception, "status_code", None)),
-            exception_class=str(original_exception.__class__.__name__),
+            exception_class=self._get_exception_class_name(original_exception),
             tags=_tags,
         )
 

From f402e9bbd1942a182f962da9f09e1138bb343e6f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 4 Apr 2025 21:23:21 -0700
Subject: [PATCH 2/4] _get_exception_class_name

---
 litellm/integrations/prometheus.py  | 4 +++-
 tests/otel_tests/test_prometheus.py | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 205e1f0c6b..cfa7c368ce 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -1148,7 +1148,9 @@ class PrometheusLogger(CustomLogger):
 
     @staticmethod
     def _get_exception_class_name(exception: Exception) -> str:
-        exception_class_name = getattr(exception, "llm_provider") or ""
+        exception_class_name = ""
+        if hasattr(exception, "llm_provider"):
+            exception_class_name = getattr(exception, "llm_provider") or ""
 
         # pretty print the provider name on prometheus
         # eg. `openai` -> `Openai.`
diff --git a/tests/otel_tests/test_prometheus.py b/tests/otel_tests/test_prometheus.py
index 932ae0bbe7..9cae5c565f 100644
--- a/tests/otel_tests/test_prometheus.py
+++ b/tests/otel_tests/test_prometheus.py
@@ -107,7 +107,7 @@ async def test_proxy_failure_metrics():
         print("/metrics", metrics)
 
         # Check if the failure metric is present and correct
-        expected_metric = 'litellm_proxy_failed_requests_metric_total{api_key_alias="None",end_user="None",exception_class="RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None",user="default_user_id"} 1.0'
+        expected_metric = 'litellm_proxy_failed_requests_metric_total{api_key_alias="None",end_user="None",exception_class="Openai.RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None",user="default_user_id"} 1.0'
 
         assert (
             expected_metric in metrics
@@ -121,7 +121,7 @@ async def test_proxy_failure_metrics():
         )
 
         assert (
-            'litellm_deployment_failure_responses_total{api_base="https://exampleopenaiendpoint-production.up.railway.app",api_key_alias="None",api_provider="openai",exception_class="RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="429",model_id="7499d31f98cd518cf54486d5a00deda6894239ce16d13543398dc8abf870b15f",requested_model="fake-azure-endpoint",team="None",team_alias="None"}'
+            'litellm_deployment_failure_responses_total{api_base="https://exampleopenaiendpoint-production.up.railway.app",api_key_alias="None",api_provider="openai",exception_class="Openai.RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="429",model_id="7499d31f98cd518cf54486d5a00deda6894239ce16d13543398dc8abf870b15f",requested_model="fake-azure-endpoint",team="None",team_alias="None"}'
             in metrics
         )
 
@@ -229,13 +229,13 @@ async def test_proxy_fallback_metrics():
 
         # Check if successful fallback metric is incremented
         assert (
-            'litellm_deployment_successful_fallbacks_total{api_key_alias="None",exception_class="RateLimitError",exception_status="429",fallback_model="fake-openai-endpoint",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
+            'litellm_deployment_successful_fallbacks_total{api_key_alias="None",exception_class="Openai.RateLimitError",exception_status="429",fallback_model="fake-openai-endpoint",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
             in metrics
         )
 
         # Check if failed fallback metric is incremented
         assert (
-            'litellm_deployment_failed_fallbacks_total{api_key_alias="None",exception_class="RateLimitError",exception_status="429",fallback_model="unknown-model",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
+            'litellm_deployment_failed_fallbacks_total{api_key_alias="None",exception_class="Openai.RateLimitError",exception_status="429",fallback_model="unknown-model",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
             in metrics
         )
 

From df4593d58bf5f3047061a3ce7ece2fb89900f3fa Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 4 Apr 2025 21:30:05 -0700
Subject: [PATCH 3/4] test prom unit tests

---
 tests/logging_callback_tests/test_prometheus_unit_tests.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py
index 6bc5b42c45..c24bb27691 100644
--- a/tests/logging_callback_tests/test_prometheus_unit_tests.py
+++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py
@@ -713,7 +713,7 @@ async def test_async_post_call_failure_hook(prometheus_logger):
         team_alias="test_team_alias",
         user="test_user",
         exception_status="429",
-        exception_class="RateLimitError",
+        exception_class="Openai.RateLimitError",
     )
     prometheus_logger.litellm_proxy_failed_requests_metric.labels().inc.assert_called_once()
 
@@ -948,7 +948,7 @@ async def test_log_success_fallback_event(prometheus_logger):
         team="test_team",
         team_alias="test_team_alias",
         exception_status="429",
-        exception_class="RateLimitError",
+        exception_class="Openai.RateLimitError",
     )
     prometheus_logger.litellm_deployment_successful_fallbacks.labels().inc.assert_called_once()
 
@@ -985,7 +985,7 @@ async def test_log_failure_fallback_event(prometheus_logger):
         team="test_team",
         team_alias="test_team_alias",
         exception_status="429",
-        exception_class="RateLimitError",
+        exception_class="Openai.RateLimitError",
     )
     prometheus_logger.litellm_deployment_failed_fallbacks.labels().inc.assert_called_once()
 

From b7cd4cef07b789a1bf59c1a922aae775f5d6614c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 4 Apr 2025 21:32:55 -0700
Subject: [PATCH 4/4] test_get_exception_class_name

---
 .../test_prometheus_unit_tests.py             | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py
index c24bb27691..ddfce710d7 100644
--- a/tests/logging_callback_tests/test_prometheus_unit_tests.py
+++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py
@@ -1500,3 +1500,33 @@ def test_set_team_budget_metrics_with_custom_labels(prometheus_logger, monkeypat
         "metadata_organization": None,
         "metadata_environment": None,
     }
+
+
+def test_get_exception_class_name(prometheus_logger):
+    """
+    Test that _get_exception_class_name correctly formats the exception class name
+    """
+    # Test case 1: Exception with llm_provider
+    rate_limit_error = litellm.RateLimitError(
+        message="Rate limit exceeded",
+        llm_provider="openai",
+        model="gpt-3.5-turbo"
+    )
+    assert prometheus_logger._get_exception_class_name(rate_limit_error) == "Openai.RateLimitError"
+
+    # Test case 2: Exception with empty llm_provider
+    auth_error = litellm.AuthenticationError(
+        message="Invalid API key",
+        llm_provider="",
+        model="gpt-4"
+    )
+    assert prometheus_logger._get_exception_class_name(auth_error) == "AuthenticationError"
+
+    # Test case 3: Exception with None llm_provider
+    context_window_error = litellm.ContextWindowExceededError(
+        message="Context length exceeded",
+        llm_provider=None,
+        model="gpt-4"
+    )
+    assert prometheus_logger._get_exception_class_name(context_window_error) == "ContextWindowExceededError"
+