Files
goclaw/internal/agent/input_guard.go
T
Viet Tran f3f4c67b36 Initial commit: GoClaw AI agent gateway
Multi-agent AI gateway with WebSocket RPC, HTTP API, and messaging channel integrations.
Go port of OpenClaw with multi-tenant PostgreSQL, per-user isolation, security hardening,
and production observability.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 14:58:07 +07:00

99 lines
2.9 KiB
Go

// Package agent — input guard for prompt injection detection.
//
// InputGuard scans user messages for known injection patterns.
// Action is configurable via gateway.injection_action:
// - "log": info-level logging (quiet)
// - "warn": warning-level logging (default)
// - "block": reject the message with an error
// - "off": disable scanning entirely
package agent
import (
"regexp"
"strings"
)
// guardPattern pairs a human-readable name with a compiled regex.
type guardPattern struct {
name string
pattern *regexp.Regexp
}
// InputGuard scans user input for known prompt injection patterns.
type InputGuard struct {
patterns []guardPattern
}
// NewInputGuard creates an InputGuard with the default set of injection detection patterns.
func NewInputGuard() *InputGuard {
return &InputGuard{
patterns: defaultGuardPatterns(),
}
}
// Scan checks a message against all known injection patterns.
// Returns the names of matched patterns (empty slice = no matches).
func (g *InputGuard) Scan(message string) []string {
if message == "" {
return nil
}
var matches []string
for _, gp := range g.patterns {
if gp.pattern.MatchString(message) {
matches = append(matches, gp.name)
}
}
return matches
}
// defaultGuardPatterns returns the built-in set of injection detection patterns.
// These are designed to detect common prompt injection techniques while
// minimizing false positives on legitimate user messages.
func defaultGuardPatterns() []guardPattern {
return []guardPattern{
{
name: "ignore_instructions",
pattern: regexp.MustCompile(`(?i)ignore\s+(all\s+)?(previous|prior|above|earlier|preceding)\s+(instructions?|rules?|prompts?|directives?|guidelines?)`),
},
{
name: "role_override",
pattern: regexp.MustCompile(`(?i)(you are now|from now on you are|pretend you are|act as if you are|imagine you are)\s+`),
},
{
name: "system_tags",
pattern: regexp.MustCompile(`(?i)</?system>|\[SYSTEM\]|\[INST\]|<<SYS>>|<\|im_start\|>system`),
},
{
name: "instruction_injection",
pattern: regexp.MustCompile(`(?i)(new instructions?:|override:|system prompt:|<\|system\|>)`),
},
{
name: "null_bytes",
pattern: regexp.MustCompile(`\x00`),
},
{
name: "delimiter_escape",
pattern: regexp.MustCompile(`(?i)(end of system|begin user input|</?(instructions?|rules|prompt|context)>)`),
},
}
}
// HasPatterns returns true if the guard has any patterns configured.
func (g *InputGuard) HasPatterns() bool {
return len(g.patterns) > 0
}
// PatternNames returns the names of all configured patterns.
func (g *InputGuard) PatternNames() []string {
names := make([]string, len(g.patterns))
for i, gp := range g.patterns {
names[i] = gp.name
}
return names
}
// ContainsNullBytes is a fast check for null bytes without regex overhead.
func ContainsNullBytes(s string) bool {
return strings.ContainsRune(s, 0)
}