From 02fc507b01e9eaa30dec376d6cb6188a5c4aa105 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 20 May 2024 14:26:30 -0700 Subject: [PATCH 1/3] fix divide by 0 bug --- litellm/integrations/slack_alerting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 227db079db..bd07e5dd7a 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -875,7 +875,8 @@ Model Info: if isinstance(response_obj, litellm.ModelResponse): completion_tokens = response_obj.usage.completion_tokens - final_value = float(response_s.total_seconds() / completion_tokens) + if completion_tokens is not None and completion_tokens > 0: + final_value = float(response_s.total_seconds() / completion_tokens) await self.async_update_daily_reports( DeploymentMetrics( From f417495b7b062a4b846b4bd8f4827e0b7c9015bd Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 20 May 2024 14:59:06 -0700 Subject: [PATCH 2/3] fix - only adding alerting callbacks when alerting is on --- litellm/proxy/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 09e772e10b..1bafdd89e3 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -131,7 +131,13 @@ class ProxyLogging: alerting_args=alerting_args, ) - if "daily_reports" in self.alert_types: + if ( + self.alerting is not None + and "slack" in self.alerting + and "daily_reports" in self.alert_types + ): + # NOTE: ENSURE we only add callbacks when alerting is on + # We should NOT add callbacks when alerting is off litellm.callbacks.append(self.slack_alerting_instance) # type: ignore if redis_cache is not None: From b5f8c6387535547165c74ccc9cd6fbbe8481f9ba Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 20 May 2024 15:03:04 -0700 Subject: [PATCH 3/3] try/except deployment metrics error --- litellm/integrations/slack_alerting.py | 49 +++++++++++++++----------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index bd07e5dd7a..a5ae97d412 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -864,28 +864,37 @@ Model Info: async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): """Log deployment latency""" - if "daily_reports" in self.alert_types: - model_id = ( - kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "") - ) - response_s: timedelta = end_time - start_time - - final_value = response_s - total_tokens = 0 - - if isinstance(response_obj, litellm.ModelResponse): - completion_tokens = response_obj.usage.completion_tokens - if completion_tokens is not None and completion_tokens > 0: - final_value = float(response_s.total_seconds() / completion_tokens) - - await self.async_update_daily_reports( - DeploymentMetrics( - id=model_id, - failed_request=False, - latency_per_output_token=final_value, - updated_at=litellm.utils.get_utc_datetime(), + try: + if "daily_reports" in self.alert_types: + model_id = ( + kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "") ) + response_s: timedelta = end_time - start_time + + final_value = response_s + total_tokens = 0 + + if isinstance(response_obj, litellm.ModelResponse): + completion_tokens = response_obj.usage.completion_tokens + if completion_tokens is not None and completion_tokens > 0: + final_value = float( + response_s.total_seconds() / completion_tokens + ) + + await self.async_update_daily_reports( + DeploymentMetrics( + id=model_id, + failed_request=False, + latency_per_output_token=final_value, + updated_at=litellm.utils.get_utc_datetime(), + ) + ) + except Exception as e: + verbose_proxy_logger.error( + "[Non-Blocking Error] Slack Alerting: Got error in logging LLM deployment latency: ", + e, ) + pass async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): """Log failure + deployment latency"""