From bec3b8e321e097db7d902e5746ecfaebfb68ee22 Mon Sep 17 00:00:00 2001 From: mateo-berri <277851410+mateo-berri@users.noreply.github.com> Date: Fri, 12 Jun 2026 07:51:43 +0000 Subject: [PATCH] fix(proxy): scope passthrough post-call guardrail buffering to the request Buffering the Bedrock event stream into a single non-streaming response was gated on whether any post_call guardrail existed globally, so every converse-stream request lost streaming once any post_call guardrail was registered, even for keys that did not reference it. Mirror the gate used by post_call_success_hook (should_run_guardrail against the request's merged guardrails) so only requests whose key/team actually trigger a post_call guardrail are buffered. --- litellm/proxy/common_request_processing.py | 26 +++++++--- .../test_deferred_guardrail_logging.py | 51 +++++++++++-------- 2 files changed, 48 insertions(+), 29 deletions(-) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index c4744d61d4..383db6fff5 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -1751,20 +1751,30 @@ class ProxyBaseLLMRequestProcessing: return True return False - @staticmethod - def _has_post_call_guardrails_for_passthrough() -> bool: + def _has_post_call_guardrails_for_passthrough(self) -> bool: """ - True when any guardrail runs at post_call for passthrough responses. + True when a post_call guardrail will actually run for THIS request. - Unlike _has_post_call_guardrails, an event_hook=None guardrail counts: - should_run_guardrail treats it as matching every hook (post_call - included), so skipping the passthrough buffer here would forward the - raw upstream body and bypass that guardrail's output processing. + Mirrors the gate in ProxyLogging.post_call_success_hook + (should_run_guardrail against the request's merged guardrails) so that a + guardrail registered globally but not configured for this key/team does + not force the passthrough stream to be buffered into a single + non-streaming response. An event_hook=None guardrail still counts here + because should_run_guardrail treats it as matching every hook. """ + from litellm.proxy.proxy_server import llm_router + from litellm.proxy.utils import _check_and_merge_model_level_guardrails + + guardrail_data = _check_and_merge_model_level_guardrails( + data=self.data, llm_router=llm_router + ) for cb in litellm.callbacks: if not isinstance(cb, CustomGuardrail): continue - if cb._event_hook_is_event_type(GuardrailEventHooks.post_call): + if cb.should_run_guardrail( + data=guardrail_data, + event_type=GuardrailEventHooks.post_call, + ): return True return False diff --git a/tests/test_litellm/proxy/guardrails/test_deferred_guardrail_logging.py b/tests/test_litellm/proxy/guardrails/test_deferred_guardrail_logging.py index d104f8d7f4..be92b6fc6c 100644 --- a/tests/test_litellm/proxy/guardrails/test_deferred_guardrail_logging.py +++ b/tests/test_litellm/proxy/guardrails/test_deferred_guardrail_logging.py @@ -159,43 +159,52 @@ class TestHasPostCallGuardrailsForPassthrough: Those guardrails run at post_call (should_run_guardrail treats None as matching every hook); skipping the buffer would forward the raw upstream - body and bypass output processing. + body and bypass output processing. The check is scoped to the request via + should_run_guardrail so a guardrail that exists globally but is not + configured for this key/team does not turn the stream non-streaming. """ + @staticmethod + def _has(data: dict) -> bool: + return ProxyBaseLLMRequestProcessing( + data=data + )._has_post_call_guardrails_for_passthrough() + def test_returns_true_for_event_hook_none(self): with patch("litellm.callbacks", [AllEventsGuardrail()]): - assert ( - ProxyBaseLLMRequestProcessing._has_post_call_guardrails_for_passthrough() - is True - ) + assert self._has({}) is True def test_returns_true_for_post_call_guardrail(self): with patch("litellm.callbacks", [PostCallGuardrail()]): - assert ( - ProxyBaseLLMRequestProcessing._has_post_call_guardrails_for_passthrough() - is True - ) + assert self._has({}) is True def test_returns_false_for_pre_call_only(self): with patch("litellm.callbacks", [PreCallGuardrail()]): - assert ( - ProxyBaseLLMRequestProcessing._has_post_call_guardrails_for_passthrough() - is False - ) + assert self._has({}) is False def test_returns_false_for_no_callbacks(self): with patch("litellm.callbacks", []): - assert ( - ProxyBaseLLMRequestProcessing._has_post_call_guardrails_for_passthrough() - is False - ) + assert self._has({}) is False def test_ignores_non_guardrail_callbacks(self): with patch("litellm.callbacks", ["langfuse", CustomLogger()]): - assert ( - ProxyBaseLLMRequestProcessing._has_post_call_guardrails_for_passthrough() - is False - ) + assert self._has({}) is False + + def test_request_scoped_guardrail_not_configured_for_key(self): + """A non-default-on post_call guardrail must not force buffering for a + request whose key/team does not reference it.""" + + class OptInPostCall(CustomGuardrail): + def __init__(self): + super().__init__( + guardrail_name="opt-in-post", + default_on=False, + event_hook=GuardrailEventHooks.post_call, + ) + + with patch("litellm.callbacks", [OptInPostCall()]): + assert self._has({"metadata": {"guardrails": []}}) is False + assert self._has({"metadata": {"guardrails": ["opt-in-post"]}}) is True # ---------------------------------------------------------------------------