fix(test): rewrite polling pre-call guard test to call responses_api() directly

Previously the test called common_processing_pre_call_logic in isolation, making generate_polling_id.assert_not_called() vacuously true. Now the test calls responses_api() end-to-end so it actually verifies that a rate-limited request never receives a polling ID. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-08-02 04:21:34 +00:00 · 2026-03-19 14:30:29 +05:30
parent c12717f494
commit 66f97a00a4
1 changed files with 43 additions and 26 deletions
@@ -108,7 +108,8 @@ class TestPollingEndpointPreCallGuard:

    @pytest.mark.asyncio
    async def test_rate_limit_error_prevents_polling_id_creation(self):
-        """When pre-call checks raise, generate_polling_id must not be called"""
+        """responses_api() must raise 429 and never call generate_polling_id when rate-limited"""
+        from litellm.proxy.response_api_endpoints.endpoints import responses_api
        from litellm.proxy.response_polling.polling_handler import ResponsePollingHandler

        rate_limit_exc = litellm.RateLimitError(
@@ -116,10 +117,37 @@ class TestPollingEndpointPreCallGuard:
            llm_provider="",
            model="gpt-4",
        )
-
        generate_polling_id_mock = MagicMock(return_value="litellm_poll_test")

+        proxy_server_patches = {
+            "litellm.proxy.proxy_server._read_request_body": AsyncMock(
+                return_value={"model": "gpt-4", "background": True}
+            ),
+            "litellm.proxy.proxy_server.general_settings": {},
+            "litellm.proxy.proxy_server.llm_router": MagicMock(),
+            "litellm.proxy.proxy_server.native_background_mode": None,
+            "litellm.proxy.proxy_server.polling_cache_ttl": 3600,
+            "litellm.proxy.proxy_server.polling_via_cache_enabled": True,
+            "litellm.proxy.proxy_server.proxy_config": MagicMock(),
+            "litellm.proxy.proxy_server.proxy_logging_obj": AsyncMock(),
+            "litellm.proxy.proxy_server.redis_usage_cache": AsyncMock(),
+            "litellm.proxy.proxy_server.select_data_generator": None,
+            "litellm.proxy.proxy_server.user_api_base": None,
+            "litellm.proxy.proxy_server.user_max_tokens": None,
+            "litellm.proxy.proxy_server.user_model": None,
+            "litellm.proxy.proxy_server.user_request_timeout": None,
+            "litellm.proxy.proxy_server.user_temperature": None,
+            "litellm.proxy.proxy_server.version": "1.0.0",
+        }
+
        with (
+            patch.multiple("litellm.proxy.proxy_server", **{
+                k.split(".")[-1]: v for k, v in proxy_server_patches.items()
+            }),
+            patch(
+                "litellm.proxy.response_polling.polling_handler.should_use_polling_for_request",
+                return_value=True,
+            ),
            patch.object(
                ProxyBaseLLMRequestProcessing,
                "common_processing_pre_call_logic",
@@ -133,33 +161,22 @@ class TestPollingEndpointPreCallGuard:
                return_value=HTTPException(status_code=429, detail="Rate limit exceeded"),
            ),
            patch.object(ResponsePollingHandler, "generate_polling_id", generate_polling_id_mock),
+            # Prevent background task from running (avoids noise from incomplete mocks)
+            patch("asyncio.create_task"),
+            patch.object(
+                ResponsePollingHandler,
+                "create_initial_state",
+                new_callable=AsyncMock,
+                return_value=MagicMock(),
+            ),
        ):
-            # Simulate the endpoint logic directly (avoids proxy_server import complexity)
-            data = {"model": "gpt-4", "background": True}
-            processor = ProxyBaseLLMRequestProcessing(data=data)
-
-            raised_exc = None
-            try:
-                await processor.common_processing_pre_call_logic(
+            with pytest.raises(HTTPException) as exc_info:
+                await responses_api(
                    request=MagicMock(spec=Request),
-                    general_settings={},
-                    proxy_logging_obj=AsyncMock(),
+                    fastapi_response=MagicMock(spec=Response),
                    user_api_key_dict=MagicMock(spec=UserAPIKeyAuth),
-                    version="1.0.0",
-                    proxy_config=MagicMock(),
-                    user_model=None,
-                    user_temperature=None,
-                    user_request_timeout=None,
-                    user_max_tokens=None,
-                    user_api_base=None,
-                    model=None,
-                    route_type="aresponses",
-                    llm_router=MagicMock(),
                )
-            except litellm.RateLimitError as e:
-                raised_exc = e

-            # The exception was raised before generate_polling_id could be called
-            assert raised_exc is not None
-            generate_polling_id_mock.assert_not_called()
+        assert exc_info.value.status_code == 429
+        generate_polling_id_mock.assert_not_called()