From 0cc320cc0a72394da05fb683f2e190861ab9afaa Mon Sep 17 00:00:00 2001 From: Dmitriy Alergant <93501479+DmitriyAlergant@users.noreply.github.com> Date: Fri, 25 Jul 2025 01:07:06 -0400 Subject: [PATCH] GuardrailsAI: use validatedOutput to allow usage of "fix" guards. Previously "fix" guards had no effect in llmOutput mode. (#12891) --- .../docs/proxy/guardrails/guardrails_ai.md | 7 ++- .../guardrails_ai/guardrails_ai.py | 6 +- .../guardrails_ai/test_guardrails_ai.py | 59 +++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/docs/my-website/docs/proxy/guardrails/guardrails_ai.md b/docs/my-website/docs/proxy/guardrails/guardrails_ai.md index 4dfed2bb69..ddeccaf16d 100644 --- a/docs/my-website/docs/proxy/guardrails/guardrails_ai.md +++ b/docs/my-website/docs/proxy/guardrails/guardrails_ai.md @@ -25,9 +25,10 @@ guardrails: - guardrail_name: "guardrails_ai-guard" litellm_params: guardrail: guardrails_ai - guard_name: "gibberish_guard" # 👈 Guardrail AI guard name - mode: "post_call" - api_base: os.environ/GUARDRAILS_AI_API_BASE # 👈 Guardrails AI API Base. Defaults to "http://0.0.0.0:8000" + guard_name: "detect-secrets-guard" # 👈 Guardrail AI guard name + mode: "pre_call" + guardrails_ai_api_input_format: "llmOutput" # 👈 This is the only option that currently works (and it is a default), use it for both pre_call and post_call hooks + api_base: os.environ/GUARDRAILS_AI_API_BASE # 👈 Guardrails AI API Base. Defaults to "http://0.0.0.0:8000" ``` 2. Start LiteLLM Gateway diff --git a/litellm/proxy/guardrails/guardrail_hooks/guardrails_ai/guardrails_ai.py b/litellm/proxy/guardrails/guardrail_hooks/guardrails_ai/guardrails_ai.py index 050617790d..ea5b7641c6 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/guardrails_ai/guardrails_ai.py +++ b/litellm/proxy/guardrails/guardrail_hooks/guardrails_ai/guardrails_ai.py @@ -121,6 +121,10 @@ class GuardrailsAI(CustomGuardrail): ) -> str: from httpx import URL + # This branch of code does not work with current version of GuardrailsAI API (as of July 2025), and it is unclear if it ever worked. + # Use guardrails_ai_api_input_format: "llmOutput" config line for all guardrails (which is the default anyway) + # We can still use the "pre_call" mode to validate the inputs even if the API input format is technicallt "llmOutput" + data = { "inputs": [ { @@ -180,7 +184,7 @@ class GuardrailsAI(CustomGuardrail): _result = await self.make_guardrails_ai_api_request( llm_output=text, request_data=data ) - updated_text = _result.get("rawLlmOutput") or text + updated_text = _result.get("validatedOutput") or _result.get("rawLlmOutput") or text data["messages"] = set_last_user_message(data["messages"], updated_text) return data diff --git a/tests/test_litellm/proxy/guardrails/guardrail_hooks/guardrails_ai/test_guardrails_ai.py b/tests/test_litellm/proxy/guardrails/guardrail_hooks/guardrails_ai/test_guardrails_ai.py index ba0db9fc53..97b3d5045c 100644 --- a/tests/test_litellm/proxy/guardrails/guardrail_hooks/guardrails_ai/test_guardrails_ai.py +++ b/tests/test_litellm/proxy/guardrails/guardrail_hooks/guardrails_ai/test_guardrails_ai.py @@ -133,3 +133,62 @@ async def test_guardrails_ai_process_input(): assert result["messages"][1]["content"] == "First question" assert result["messages"][2]["content"] == "First answer" assert result["messages"][3]["content"] == "sanitized message" + + # Test case 7: Test validatedOutput preference over rawLlmOutput + with patch.object( + guardrails_ai_guardrail, + "make_guardrails_ai_api_request", + return_value=GuardrailsAIResponse( + rawLlmOutput="Somtimes I hav spelling errors in my vriting", + validatedOutput="Sometimes I have spelling errors in my writing", + validationPassed=True, + callId="test-123", + ), + ) as mock_api_request: + + data = { + "messages": [ + {"role": "user", "content": "Somtimes I hav spelling errors in my vriting"} + ] + } + + result = await guardrails_ai_guardrail.process_input(data, "completion") + + mock_api_request.assert_called_once_with( + llm_output="Somtimes I hav spelling errors in my vriting", request_data=data + ) + + # Should use validatedOutput when available + assert result["messages"][0]["content"] == "Sometimes I have spelling errors in my writing" + + # Test case 8: Test fallback to rawLlmOutput when validatedOutput is not present + with patch.object( + guardrails_ai_guardrail, + "make_guardrails_ai_api_request", + return_value=GuardrailsAIResponse( + rawLlmOutput="fallback text", + validatedOutput="", # Empty validatedOutput + validationPassed=True, + callId="test-456", + ), + ) as mock_api_request: + + data = {"messages": [{"role": "user", "content": "Test message"}]} + + result = await guardrails_ai_guardrail.process_input(data, "completion") + + assert result["messages"][0]["content"] == "fallback text" + + # Test case 9: Test fallback to original text when neither validatedOutput nor rawLlmOutput is present + with patch.object( + guardrails_ai_guardrail, + "make_guardrails_ai_api_request", + return_value={}, # Empty response + ) as mock_api_request: + + data = {"messages": [{"role": "user", "content": "Original message"}]} + + result = await guardrails_ai_guardrail.process_input(data, "completion") + + # Should keep original content when no output fields are present + assert result["messages"][0]["content"] == "Original message"