From 66f97a00a44d096c5ee0e54e9fbab59ea8ed9cd7 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 19 Mar 2026 14:30:29 +0530 Subject: [PATCH] fix(test): rewrite polling pre-call guard test to call responses_api() directly Previously the test called common_processing_pre_call_logic in isolation, making generate_polling_id.assert_not_called() vacuously true. Now the test calls responses_api() end-to-end so it actually verifies that a rate-limited request never receives a polling ID. Co-Authored-By: Claude Sonnet 4.6 --- .../test_response_polling_pre_call_checks.py | 69 ++++++++++++------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/tests/proxy_unit_tests/test_response_polling_pre_call_checks.py b/tests/proxy_unit_tests/test_response_polling_pre_call_checks.py index cdea075d0d..45e4e9e4d3 100644 --- a/tests/proxy_unit_tests/test_response_polling_pre_call_checks.py +++ b/tests/proxy_unit_tests/test_response_polling_pre_call_checks.py @@ -108,7 +108,8 @@ class TestPollingEndpointPreCallGuard: @pytest.mark.asyncio async def test_rate_limit_error_prevents_polling_id_creation(self): - """When pre-call checks raise, generate_polling_id must not be called""" + """responses_api() must raise 429 and never call generate_polling_id when rate-limited""" + from litellm.proxy.response_api_endpoints.endpoints import responses_api from litellm.proxy.response_polling.polling_handler import ResponsePollingHandler rate_limit_exc = litellm.RateLimitError( @@ -116,10 +117,37 @@ class TestPollingEndpointPreCallGuard: llm_provider="", model="gpt-4", ) - generate_polling_id_mock = MagicMock(return_value="litellm_poll_test") + proxy_server_patches = { + "litellm.proxy.proxy_server._read_request_body": AsyncMock( + return_value={"model": "gpt-4", "background": True} + ), + "litellm.proxy.proxy_server.general_settings": {}, + "litellm.proxy.proxy_server.llm_router": MagicMock(), + "litellm.proxy.proxy_server.native_background_mode": None, + "litellm.proxy.proxy_server.polling_cache_ttl": 3600, + "litellm.proxy.proxy_server.polling_via_cache_enabled": True, + "litellm.proxy.proxy_server.proxy_config": MagicMock(), + "litellm.proxy.proxy_server.proxy_logging_obj": AsyncMock(), + "litellm.proxy.proxy_server.redis_usage_cache": AsyncMock(), + "litellm.proxy.proxy_server.select_data_generator": None, + "litellm.proxy.proxy_server.user_api_base": None, + "litellm.proxy.proxy_server.user_max_tokens": None, + "litellm.proxy.proxy_server.user_model": None, + "litellm.proxy.proxy_server.user_request_timeout": None, + "litellm.proxy.proxy_server.user_temperature": None, + "litellm.proxy.proxy_server.version": "1.0.0", + } + with ( + patch.multiple("litellm.proxy.proxy_server", **{ + k.split(".")[-1]: v for k, v in proxy_server_patches.items() + }), + patch( + "litellm.proxy.response_polling.polling_handler.should_use_polling_for_request", + return_value=True, + ), patch.object( ProxyBaseLLMRequestProcessing, "common_processing_pre_call_logic", @@ -133,33 +161,22 @@ class TestPollingEndpointPreCallGuard: return_value=HTTPException(status_code=429, detail="Rate limit exceeded"), ), patch.object(ResponsePollingHandler, "generate_polling_id", generate_polling_id_mock), + # Prevent background task from running (avoids noise from incomplete mocks) + patch("asyncio.create_task"), + patch.object( + ResponsePollingHandler, + "create_initial_state", + new_callable=AsyncMock, + return_value=MagicMock(), + ), ): - # Simulate the endpoint logic directly (avoids proxy_server import complexity) - data = {"model": "gpt-4", "background": True} - processor = ProxyBaseLLMRequestProcessing(data=data) - - raised_exc = None - try: - await processor.common_processing_pre_call_logic( + with pytest.raises(HTTPException) as exc_info: + await responses_api( request=MagicMock(spec=Request), - general_settings={}, - proxy_logging_obj=AsyncMock(), + fastapi_response=MagicMock(spec=Response), user_api_key_dict=MagicMock(spec=UserAPIKeyAuth), - version="1.0.0", - proxy_config=MagicMock(), - user_model=None, - user_temperature=None, - user_request_timeout=None, - user_max_tokens=None, - user_api_base=None, - model=None, - route_type="aresponses", - llm_router=MagicMock(), ) - except litellm.RateLimitError as e: - raised_exc = e - # The exception was raised before generate_polling_id could be called - assert raised_exc is not None - generate_polling_id_mock.assert_not_called() + assert exc_info.value.status_code == 429 + generate_polling_id_mock.assert_not_called()