GuardrailsAI: use validatedOutput to allow usage of "fix" guards. Previously "fix" guards had no effect in llmOutput mode. (#12891)

This commit is contained in:
Dmitriy Alergant
2025-07-25 01:07:06 -04:00
committed by GitHub
parent eb96fb78bc
commit 0cc320cc0a
3 changed files with 68 additions and 4 deletions
@@ -25,9 +25,10 @@ guardrails:
- guardrail_name: "guardrails_ai-guard"
litellm_params:
guardrail: guardrails_ai
guard_name: "gibberish_guard" # 👈 Guardrail AI guard name
mode: "post_call"
api_base: os.environ/GUARDRAILS_AI_API_BASE # 👈 Guardrails AI API Base. Defaults to "http://0.0.0.0:8000"
guard_name: "detect-secrets-guard" # 👈 Guardrail AI guard name
mode: "pre_call"
guardrails_ai_api_input_format: "llmOutput" # 👈 This is the only option that currently works (and it is a default), use it for both pre_call and post_call hooks
api_base: os.environ/GUARDRAILS_AI_API_BASE # 👈 Guardrails AI API Base. Defaults to "http://0.0.0.0:8000"
```
2. Start LiteLLM Gateway
@@ -121,6 +121,10 @@ class GuardrailsAI(CustomGuardrail):
) -> str:
from httpx import URL
# This branch of code does not work with current version of GuardrailsAI API (as of July 2025), and it is unclear if it ever worked.
# Use guardrails_ai_api_input_format: "llmOutput" config line for all guardrails (which is the default anyway)
# We can still use the "pre_call" mode to validate the inputs even if the API input format is technicallt "llmOutput"
data = {
"inputs": [
{
@@ -180,7 +184,7 @@ class GuardrailsAI(CustomGuardrail):
_result = await self.make_guardrails_ai_api_request(
llm_output=text, request_data=data
)
updated_text = _result.get("rawLlmOutput") or text
updated_text = _result.get("validatedOutput") or _result.get("rawLlmOutput") or text
data["messages"] = set_last_user_message(data["messages"], updated_text)
return data
@@ -133,3 +133,62 @@ async def test_guardrails_ai_process_input():
assert result["messages"][1]["content"] == "First question"
assert result["messages"][2]["content"] == "First answer"
assert result["messages"][3]["content"] == "sanitized message"
# Test case 7: Test validatedOutput preference over rawLlmOutput
with patch.object(
guardrails_ai_guardrail,
"make_guardrails_ai_api_request",
return_value=GuardrailsAIResponse(
rawLlmOutput="Somtimes I hav spelling errors in my vriting",
validatedOutput="Sometimes I have spelling errors in my writing",
validationPassed=True,
callId="test-123",
),
) as mock_api_request:
data = {
"messages": [
{"role": "user", "content": "Somtimes I hav spelling errors in my vriting"}
]
}
result = await guardrails_ai_guardrail.process_input(data, "completion")
mock_api_request.assert_called_once_with(
llm_output="Somtimes I hav spelling errors in my vriting", request_data=data
)
# Should use validatedOutput when available
assert result["messages"][0]["content"] == "Sometimes I have spelling errors in my writing"
# Test case 8: Test fallback to rawLlmOutput when validatedOutput is not present
with patch.object(
guardrails_ai_guardrail,
"make_guardrails_ai_api_request",
return_value=GuardrailsAIResponse(
rawLlmOutput="fallback text",
validatedOutput="", # Empty validatedOutput
validationPassed=True,
callId="test-456",
),
) as mock_api_request:
data = {"messages": [{"role": "user", "content": "Test message"}]}
result = await guardrails_ai_guardrail.process_input(data, "completion")
assert result["messages"][0]["content"] == "fallback text"
# Test case 9: Test fallback to original text when neither validatedOutput nor rawLlmOutput is present
with patch.object(
guardrails_ai_guardrail,
"make_guardrails_ai_api_request",
return_value={}, # Empty response
) as mock_api_request:
data = {"messages": [{"role": "user", "content": "Original message"}]}
result = await guardrails_ai_guardrail.process_input(data, "completion")
# Should keep original content when no output fields are present
assert result["messages"][0]["content"] == "Original message"