mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-18 05:28:02 +00:00
a1f3a1c5dc
* feat: add async_get_oldest_n_keys in memory cache * fix: add add_request_to_hanging_request_check * test: alerting * feat: v2 hanging request check * fix: HangingRequestData * fix: AlertingHangingRequestCheck * fix: check_for_hanging_requests * fix: use correct metadata location for hanging requests * fix: formatting alert * test hanging request check * fix: add guard flags for background tasks alerting
263 lines
10 KiB
Python
263 lines
10 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
from typing import Optional
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
# Adds the grandparent directory to sys.path to allow importing project modules
|
|
sys.path.insert(0, os.path.abspath("../.."))
|
|
|
|
from litellm.integrations.SlackAlerting.hanging_request_check import (
|
|
AlertingHangingRequestCheck,
|
|
)
|
|
from litellm.types.integrations.slack_alerting import HangingRequestData
|
|
|
|
|
|
class TestAlertingHangingRequestCheck:
|
|
"""Test suite for AlertingHangingRequestCheck class"""
|
|
|
|
@pytest.fixture
|
|
def mock_slack_alerting(self):
|
|
"""Create a mock SlackAlerting object for testing"""
|
|
mock_slack = MagicMock()
|
|
mock_slack.alerting_threshold = 300 # 5 minutes
|
|
mock_slack.send_alert = AsyncMock()
|
|
return mock_slack
|
|
|
|
@pytest.fixture
|
|
def hanging_request_checker(self, mock_slack_alerting):
|
|
"""Create an AlertingHangingRequestCheck instance for testing"""
|
|
return AlertingHangingRequestCheck(slack_alerting_object=mock_slack_alerting)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_init_creates_cache_with_correct_ttl(self, mock_slack_alerting):
|
|
"""
|
|
Test that initialization creates a hanging request cache with correct TTL.
|
|
The TTL should be alerting_threshold + buffer time.
|
|
"""
|
|
checker = AlertingHangingRequestCheck(slack_alerting_object=mock_slack_alerting)
|
|
|
|
# The cache should be created with TTL = alerting_threshold + buffer time
|
|
expected_ttl = (
|
|
mock_slack_alerting.alerting_threshold + 60
|
|
) # HANGING_ALERT_BUFFER_TIME_SECONDS
|
|
assert checker.hanging_request_cache.default_ttl == expected_ttl
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_add_request_to_hanging_request_check_success(
|
|
self, hanging_request_checker
|
|
):
|
|
"""
|
|
Test successfully adding a request to the hanging request cache.
|
|
Should extract metadata and store HangingRequestData in cache.
|
|
"""
|
|
request_data = {
|
|
"litellm_call_id": "test_request_123",
|
|
"model": "gpt-4",
|
|
"deployment": {"litellm_params": {"api_base": "https://api.openai.com/v1"}},
|
|
"metadata": {
|
|
"user_api_key_alias": "test_key",
|
|
"user_api_key_team_alias": "test_team",
|
|
},
|
|
}
|
|
|
|
with patch("litellm.get_api_base", return_value="https://api.openai.com/v1"):
|
|
await hanging_request_checker.add_request_to_hanging_request_check(
|
|
request_data
|
|
)
|
|
|
|
# Verify the request was added to cache
|
|
cached_data = (
|
|
await hanging_request_checker.hanging_request_cache.async_get_cache(
|
|
key="test_request_123"
|
|
)
|
|
)
|
|
|
|
assert cached_data is not None
|
|
assert isinstance(cached_data, HangingRequestData)
|
|
assert cached_data.request_id == "test_request_123"
|
|
assert cached_data.model == "gpt-4"
|
|
assert cached_data.api_base == "https://api.openai.com/v1"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_add_request_to_hanging_request_check_none_request_data(
|
|
self, hanging_request_checker
|
|
):
|
|
"""
|
|
Test that passing None request_data returns early without error.
|
|
Should handle gracefully when no request data is provided.
|
|
"""
|
|
result = await hanging_request_checker.add_request_to_hanging_request_check(
|
|
None
|
|
)
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_add_request_to_hanging_request_check_minimal_data(
|
|
self, hanging_request_checker
|
|
):
|
|
"""
|
|
Test adding request with minimal required data.
|
|
Should handle cases where optional fields are missing.
|
|
"""
|
|
request_data = {
|
|
"litellm_call_id": "minimal_request_456",
|
|
"model": "gpt-3.5-turbo",
|
|
}
|
|
|
|
await hanging_request_checker.add_request_to_hanging_request_check(request_data)
|
|
|
|
cached_data = (
|
|
await hanging_request_checker.hanging_request_cache.async_get_cache(
|
|
key="minimal_request_456"
|
|
)
|
|
)
|
|
|
|
assert cached_data is not None
|
|
assert cached_data.request_id == "minimal_request_456"
|
|
assert cached_data.model == "gpt-3.5-turbo"
|
|
assert cached_data.api_base is None
|
|
assert cached_data.key_alias == ""
|
|
assert cached_data.team_alias == ""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_send_hanging_request_alert(self, hanging_request_checker):
|
|
"""
|
|
Test sending a hanging request alert.
|
|
Should format the alert message correctly and call slack alerting.
|
|
"""
|
|
hanging_request_data = HangingRequestData(
|
|
request_id="test_hanging_request",
|
|
model="gpt-4",
|
|
api_base="https://api.openai.com/v1",
|
|
key_alias="test_key",
|
|
team_alias="test_team",
|
|
)
|
|
|
|
await hanging_request_checker.send_hanging_request_alert(hanging_request_data)
|
|
|
|
# Verify slack alert was called
|
|
hanging_request_checker.slack_alerting_object.send_alert.assert_called_once()
|
|
|
|
# Check the alert message format
|
|
call_args = hanging_request_checker.slack_alerting_object.send_alert.call_args
|
|
message = call_args[1]["message"]
|
|
|
|
assert "Requests are hanging - 300s+ request time" in message
|
|
assert "Request Model: `gpt-4`" in message
|
|
assert "API Base: `https://api.openai.com/v1`" in message
|
|
assert "Key Alias: `test_key`" in message
|
|
assert "Team Alias: `test_team`" in message
|
|
assert call_args[1]["level"] == "Medium"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_send_alerts_for_hanging_requests_no_proxy_logging(
|
|
self, hanging_request_checker
|
|
):
|
|
"""
|
|
Test send_alerts_for_hanging_requests when proxy_logging_obj.internal_usage_cache is None.
|
|
Should return early without processing when internal usage cache is unavailable.
|
|
"""
|
|
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
|
|
mock_proxy.internal_usage_cache = None
|
|
|
|
result = await hanging_request_checker.send_alerts_for_hanging_requests()
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_send_alerts_for_hanging_requests_with_completed_request(
|
|
self, hanging_request_checker
|
|
):
|
|
"""
|
|
Test send_alerts_for_hanging_requests when request has completed (not hanging).
|
|
Should remove completed requests from cache and not send alerts.
|
|
"""
|
|
# Add a request to the hanging cache
|
|
hanging_data = HangingRequestData(
|
|
request_id="completed_request_789",
|
|
model="gpt-4",
|
|
api_base="https://api.openai.com/v1",
|
|
)
|
|
await hanging_request_checker.hanging_request_cache.async_set_cache(
|
|
key="completed_request_789", value=hanging_data, ttl=300
|
|
)
|
|
|
|
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
|
|
# Mock internal usage cache to return a request status (meaning request completed)
|
|
mock_internal_cache = AsyncMock()
|
|
mock_internal_cache.async_get_cache.return_value = {"status": "success"}
|
|
mock_proxy.internal_usage_cache = mock_internal_cache
|
|
|
|
# Mock the cache method to return our test request
|
|
hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
|
|
AsyncMock(return_value=["completed_request_789"])
|
|
)
|
|
|
|
await hanging_request_checker.send_alerts_for_hanging_requests()
|
|
|
|
# Verify no alert was sent since request completed
|
|
hanging_request_checker.slack_alerting_object.send_alert.assert_not_called()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_send_alerts_for_hanging_requests_with_actual_hanging_request(
|
|
self, hanging_request_checker
|
|
):
|
|
"""
|
|
Test send_alerts_for_hanging_requests when request is actually hanging.
|
|
Should send alert for requests that haven't completed within threshold.
|
|
"""
|
|
# Add a hanging request to the cache
|
|
hanging_data = HangingRequestData(
|
|
request_id="hanging_request_999",
|
|
model="gpt-4",
|
|
api_base="https://api.openai.com/v1",
|
|
key_alias="test_key",
|
|
team_alias="test_team",
|
|
)
|
|
await hanging_request_checker.hanging_request_cache.async_set_cache(
|
|
key="hanging_request_999", value=hanging_data, ttl=300
|
|
)
|
|
|
|
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
|
|
# Mock internal usage cache to return None (meaning request is still hanging)
|
|
mock_internal_cache = AsyncMock()
|
|
mock_internal_cache.async_get_cache.return_value = None
|
|
mock_proxy.internal_usage_cache = mock_internal_cache
|
|
|
|
# Mock the cache method to return our test request
|
|
hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
|
|
AsyncMock(return_value=["hanging_request_999"])
|
|
)
|
|
|
|
await hanging_request_checker.send_alerts_for_hanging_requests()
|
|
|
|
# Verify alert was sent for hanging request
|
|
hanging_request_checker.slack_alerting_object.send_alert.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_send_alerts_for_hanging_requests_with_missing_hanging_data(
|
|
self, hanging_request_checker
|
|
):
|
|
"""
|
|
Test send_alerts_for_hanging_requests when hanging request data is missing from cache.
|
|
Should continue processing other requests when individual request data is missing.
|
|
"""
|
|
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
|
|
mock_internal_cache = AsyncMock()
|
|
mock_proxy.internal_usage_cache = mock_internal_cache
|
|
|
|
# Mock cache to return request ID but no data (simulating expired or missing data)
|
|
hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
|
|
AsyncMock(return_value=["missing_request_111"])
|
|
)
|
|
hanging_request_checker.hanging_request_cache.async_get_cache = AsyncMock(
|
|
return_value=None
|
|
)
|
|
|
|
await hanging_request_checker.send_alerts_for_hanging_requests()
|
|
|
|
# Should not crash and should not send any alerts
|
|
hanging_request_checker.slack_alerting_object.send_alert.assert_not_called()
|