Files
Kai (Tam Nhu) Tran 8730a90acb fix(exec): allow uploaded files in active workspaces (#748)
Shell-aware command parsing, dynamic workspace exemptions, and symlink canonicalization for exec path denial. Fixes #739.
2026-04-08 13:35:19 +07:00

688 lines
20 KiB
Go

package tools
import (
"bytes"
"context"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"
shellwords "github.com/mattn/go-shellwords"
"github.com/nextlevelbuilder/goclaw/internal/sandbox"
"github.com/nextlevelbuilder/goclaw/internal/store"
"golang.org/x/text/unicode/norm"
)
// Dangerous command patterns organized into configurable deny groups.
// Defense-in-depth: patterns complement Docker hardening (cap-drop ALL,
// no-new-privileges, pids-limit, memory limit).
// Sources: OWASP Agentic AI Top 10, Claude Code CVE-2025-66032, MITRE ATT&CK,
// PayloadsAllTheThings, Trail of Bits prompt-injection-to-RCE research.
// Groups and patterns defined in shell_deny_groups.go.
// DefaultDenyPatterns returns all patterns from groups where Default=true.
// Backward-compatible wrapper for code that doesn't use per-agent overrides.
func DefaultDenyPatterns() []*regexp.Regexp {
return ResolveDenyPatterns(nil)
}
// ExecTool executes shell commands, optionally inside a sandbox container.
type ExecTool struct {
workspace string
timeout time.Duration
pathDenyPatterns []*regexp.Regexp // always-on path-based denials (DenyPaths)
pathDenyRoots []string // raw deny roots for nested workspace exemptions
denyExemptions []string // substrings that exempt a command from deny
restrict bool
sandboxMgr sandbox.Manager // nil = no sandbox, execute on host
approvalMgr *ExecApprovalManager // nil = no approval needed
agentID string // for approval request context
secureCLIStore store.SecureCLIStore // nil = no credentialed exec
}
// NewExecTool creates an exec tool that runs commands directly on the host.
func NewExecTool(workspace string, restrict bool) *ExecTool {
return &ExecTool{
workspace: workspace,
timeout: 60 * time.Second,
restrict: restrict,
}
}
// NewSandboxedExecTool creates an exec tool that routes commands through a sandbox container.
func NewSandboxedExecTool(workspace string, restrict bool, mgr sandbox.Manager) *ExecTool {
return &ExecTool{
workspace: workspace,
timeout: 300 * time.Second, // sandbox allows longer timeout
restrict: restrict,
sandboxMgr: mgr,
}
}
// SetSandboxKey is a no-op; sandbox key is now read from ctx (thread-safe).
func (t *ExecTool) SetSandboxKey(key string) {}
// DenyPaths adds always-on deny patterns that block commands referencing the given paths.
// These are NOT configurable via deny groups — they always apply regardless of group config.
func (t *ExecTool) DenyPaths(paths ...string) {
for _, p := range paths {
escaped := regexp.QuoteMeta(p)
t.pathDenyPatterns = append(t.pathDenyPatterns, regexp.MustCompile(escaped))
t.pathDenyRoots = append(t.pathDenyRoots, p)
}
}
// AllowPathExemptions adds path prefixes that exempt a command from deny pattern matches.
// Each shell argument is checked individually — commands like "cat .goclaw/skills-store/tool.py"
// are exempt because the argument ".goclaw/skills-store/tool.py" starts with the prefix.
func (t *ExecTool) AllowPathExemptions(prefixes ...string) {
t.denyExemptions = append(t.denyExemptions, prefixes...)
}
// normalizeCommand applies NFKC Unicode normalization and strips zero-width
// characters before deny pattern matching, preventing Unicode-based bypasses.
func normalizeCommand(s string) string {
// NFKC normalization: folds compatibility characters (e.g. fullwidth letters)
s = norm.NFKC.String(s)
// Strip zero-width characters that are invisible but can fragment tokens
s = strings.NewReplacer(
"\u200b", "", // zero-width space
"\u200c", "", // zero-width non-joiner
"\u200d", "", // zero-width joiner
"\u2060", "", // word joiner
"\ufeff", "", // BOM / zero-width no-break space
).Replace(s)
return s
}
func (t *ExecTool) dynamicPathExemptions(ctx context.Context) []string {
var exemptions []string
seen := make(map[string]struct{}, 4)
workspace := ToolWorkspaceFromCtx(ctx)
teamWorkspace := ToolTeamWorkspaceFromCtx(ctx)
var dirs []string
if teamWorkspace != "" {
dirs = append(dirs, teamWorkspace)
}
if workspace != "" && filepath.Clean(workspace) != filepath.Clean(teamWorkspace) {
dirs = append(dirs, filepath.Join(workspace, ".uploads"))
dirs = append(dirs, filepath.Join(workspace, "uploads"))
}
for _, dir := range dirs {
if dir == "" || strings.Contains(dir, "..") {
continue
}
for _, variant := range pathAliasVariants(filepath.Clean(dir)) {
if !t.isNestedUnderDeniedRoot(variant) {
continue
}
for _, ex := range []string{variant, variant + string(filepath.Separator)} {
if _, ok := seen[ex]; ok {
continue
}
seen[ex] = struct{}{}
exemptions = append(exemptions, ex)
}
}
}
return exemptions
}
func pathAliasVariants(path string) []string {
variants := []string{path}
for _, mapping := range [][2]string{
{"/app/workspace", "/app/.goclaw"},
{"/app/.goclaw", "/app/workspace"},
} {
from, to := mapping[0], mapping[1]
if path == from {
variants = append(variants, to)
continue
}
if strings.HasPrefix(path, from+string(filepath.Separator)) {
variants = append(variants, to+strings.TrimPrefix(path, from))
}
}
return variants
}
func (t *ExecTool) isNestedUnderDeniedRoot(path string) bool {
for _, root := range t.pathDenyRoots {
cleanRoot := filepath.Clean(root)
if cleanRoot == "." || cleanRoot == string(filepath.Separator) {
continue
}
if !filepath.IsAbs(cleanRoot) {
marker := string(filepath.Separator) + cleanRoot + string(filepath.Separator)
if strings.Contains(path, marker) {
return true
}
continue
}
if path == cleanRoot {
continue
}
if strings.HasPrefix(path, cleanRoot+string(filepath.Separator)) {
return true
}
}
return false
}
func matchesPathExemption(path string, exemptions []string) bool {
sep := string(filepath.Separator)
for _, ex := range exemptions {
if ex == "" {
continue
}
if path == ex {
return true
}
if strings.HasSuffix(ex, sep) {
if strings.HasPrefix(path, ex) {
return true
}
continue
}
if strings.HasPrefix(path, ex+sep) {
return true
}
}
return false
}
func parseExecCommandWords(command string) []string {
var words []string
for _, segment := range splitExecCommandSegments(command) {
parser := shellwords.NewParser()
parser.ParseBacktick = false
parser.ParseEnv = false
segmentWords, err := parser.Parse(segment)
if err != nil || len(segmentWords) == 0 {
words = append(words, strings.Fields(segment)...)
continue
}
words = append(words, segmentWords...)
}
if len(words) == 0 {
return strings.Fields(command)
}
return words
}
func splitExecCommandSegments(command string) []string {
var segments []string
start := 0
inSingle := false
inDouble := false
for i := 0; i < len(command); i++ {
ch := command[i]
switch {
case inSingle:
if ch == '\'' {
inSingle = false
}
case inDouble:
if ch == '\\' && i+1 < len(command) {
i++
} else if ch == '"' {
inDouble = false
}
default:
switch ch {
case '\\':
if i+1 < len(command) {
i++
}
case '\'':
inSingle = true
case '"':
inDouble = true
case ';', '|', '&', '<', '>', '\n', '\r':
if segment := strings.TrimSpace(command[start:i]); segment != "" {
segments = append(segments, segment)
}
start = i + 1
}
}
}
if tail := strings.TrimSpace(command[start:]); tail != "" {
segments = append(segments, tail)
}
return segments
}
func extractPathCandidates(word string) []string {
if word == "" {
return nil
}
queue := []string{word}
seen := make(map[string]struct{}, 4)
var out []string
for len(queue) > 0 {
current := queue[0]
queue = queue[1:]
if current == "" {
continue
}
if _, ok := seen[current]; ok {
continue
}
seen[current] = struct{}{}
if looksLikePathCandidate(current) {
out = append(out, current)
}
for _, sep := range []string{"=", "@"} {
if idx := strings.Index(current, sep); idx >= 0 && idx+1 < len(current) {
queue = append(queue, current[idx+1:])
}
}
}
return out
}
func looksLikePathCandidate(s string) bool {
if s == "" {
return false
}
if filepath.IsAbs(s) {
return true
}
return strings.HasPrefix(s, "./") ||
strings.HasPrefix(s, "../") ||
strings.HasPrefix(s, ".uploads/") ||
strings.HasPrefix(s, ".goclaw/") ||
strings.HasPrefix(s, "teams/") ||
strings.HasPrefix(s, "tenants/") ||
strings.HasPrefix(s, "~/") ||
strings.Contains(s, string(filepath.Separator))
}
func canonicalizeExecPath(path, baseDir string) (string, error) {
if strings.HasPrefix(path, "~/") {
homeDir, err := os.UserHomeDir()
if err != nil {
return "", err
}
path = filepath.Join(homeDir, strings.TrimPrefix(path, "~/"))
}
if !filepath.IsAbs(path) {
path = filepath.Join(baseDir, path)
}
absPath, _ := filepath.Abs(filepath.Clean(path))
if real, err := filepath.EvalSymlinks(absPath); err == nil {
return real, nil
}
return resolveThroughExistingAncestors(absPath)
}
func matchesAnyPathExemption(word string, exemptions []string, baseDir string) bool {
for _, candidate := range extractPathCandidates(word) {
if strings.Contains(candidate, "..") {
continue
}
realCandidate, err := canonicalizeExecPath(candidate, baseDir)
if err != nil {
continue
}
for _, exemption := range exemptions {
realExemption, err := canonicalizeExecPath(exemption, baseDir)
if err != nil {
continue
}
if matchesPathExemption(realCandidate, []string{realExemption}) {
return true
}
}
}
return false
}
// SetApprovalManager sets the exec approval manager for this tool.
func (t *ExecTool) SetApprovalManager(mgr *ExecApprovalManager, agentID string) {
t.approvalMgr = mgr
t.agentID = agentID
}
// SetSecureCLIStore sets the credential store for credentialed exec.
func (t *ExecTool) SetSecureCLIStore(s store.SecureCLIStore) {
t.secureCLIStore = s
}
func (t *ExecTool) Name() string { return "exec" }
func (t *ExecTool) Description() string { return "Execute a shell command and return its output" }
func (t *ExecTool) Parameters() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"command": map[string]any{
"type": "string",
"description": "The shell command to execute",
},
"working_dir": map[string]any{
"type": "string",
"description": "Working directory for the command (default: workspace root)",
},
},
"required": []string{"command"},
}
}
func (t *ExecTool) Execute(ctx context.Context, args map[string]any) *Result {
command, _ := args["command"].(string)
if command == "" {
return ErrorResult("command is required")
}
// Reject NUL bytes — they cause silent shell truncation enabling injection.
if strings.ContainsRune(command, '\x00') {
return ErrorResult("command contains invalid NUL byte")
}
// Normalize command before all deny checks: NFKC + zero-width strip prevents
// Unicode-based pattern bypass while preserving functional command content.
normalizedCommand := normalizeCommand(command)
// Resolve deny patterns: per-agent overrides from context, fallback to all defaults.
denyOverrides := store.ShellDenyGroupsFromContext(ctx)
groupPatterns := ResolveDenyPatterns(denyOverrides)
// Also resolve package_install patterns separately for approval routing.
var pkgInstallPatterns []*regexp.Regexp
if pkgGroup, ok := DenyGroupRegistry["package_install"]; ok && IsGroupDenied(denyOverrides, "package_install") {
pkgInstallPatterns = pkgGroup.Patterns
}
// Combine group-based patterns + always-on path denials.
allPatterns := make([]*regexp.Regexp, 0, len(groupPatterns)+len(t.pathDenyPatterns))
allPatterns = append(allPatterns, groupPatterns...)
allPatterns = append(allPatterns, t.pathDenyPatterns...)
exemptions := append([]string{}, t.denyExemptions...)
exemptions = append(exemptions, t.dynamicPathExemptions(ctx)...)
// Check for dangerous commands (applies to both host and sandbox).
wordFields := parseExecCommandWords(normalizedCommand)
pathBaseDir := ToolWorkspaceFromCtx(ctx)
if pathBaseDir == "" {
pathBaseDir = t.workspace
}
for _, pattern := range allPatterns {
if pattern.MatchString(normalizedCommand) {
// Check if exemption applies. Only exempt if EVERY field that
// individually matches the deny pattern is covered by an exemption.
// This prevents pipe/comment bypass: "cat /app/data/skills-store/x | cat /app/data/secret"
// — the second field matches deny but has no exemption → denied.
// Strips surrounding quotes (LLMs often quote paths) and rejects
// path traversal ("..") to prevent exemption escape.
exempt := false
trimmed := strings.TrimSpace(normalizedCommand)
fields := wordFields
if len(fields) == 0 {
fields = strings.Fields(trimmed)
}
matchingFields := 0
exemptFields := 0
for _, field := range fields {
clean := strings.TrimSpace(field)
if !pattern.MatchString(clean) {
continue // field doesn't trigger this deny pattern
}
matchingFields++
if matchesAnyPathExemption(clean, exemptions, pathBaseDir) {
exemptFields++
}
}
// Exempt only if at least one field matched AND all matched fields are exempt.
if matchingFields > 0 && exemptFields == matchingFields {
exempt = true
}
if exempt {
continue
}
// Package install commands: route through approval flow instead of hard deny.
// This lets agents "request permission" from admin to install packages.
if t.approvalMgr != nil && matchesAny(normalizedCommand, pkgInstallPatterns) {
slog.Info("exec: package install requires approval", "command", truncateCmd(command, 100), "agent", t.agentID)
decision, err := t.approvalMgr.RequestApproval(command, t.agentID, 2*time.Minute)
if err != nil {
return ErrorResult(fmt.Sprintf("package install approval: %v", err))
}
if decision == ApprovalDeny {
return ErrorResult("package installation denied by admin")
}
// Approved — skip deny, continue to execution.
continue
}
return ErrorResult(fmt.Sprintf("command denied by safety policy: matches pattern %s", pattern.String()))
}
}
// Memory path hint: shell commands can't access DB-backed memory files.
if hint := MaybeMemoryExecHint(normalizedCommand); hint != "" {
return SilentResult(hint)
}
// Credentialed exec: if command matches a configured binary, use Direct Exec Mode.
// This bypasses approval (admin trust) and shell (security).
if cred, binary, cmdArgs := t.lookupCredentialedBinary(ctx, command); cred != nil {
cwd := ToolWorkspaceFromCtx(ctx)
if cwd == "" {
cwd = t.workspace
}
if wd, _ := args["working_dir"].(string); wd != "" {
if effectiveRestrict(ctx, t.restrict) {
if resolved, err := resolvePath(wd, t.workspace, true); err == nil {
cwd = resolved
}
} else {
cwd = wd
}
}
sandboxKey := ToolSandboxKeyFromCtx(ctx)
return t.executeCredentialed(ctx, cred, binary, cmdArgs, cwd, sandboxKey, command)
}
// Exec approval check (matching TS exec-approval.ts pipeline)
if t.approvalMgr != nil {
switch t.approvalMgr.CheckCommand(command) {
case "deny":
return ErrorResult("command denied by exec approval policy")
case "ask":
decision, err := t.approvalMgr.RequestApproval(command, t.agentID, 2*time.Minute)
if err != nil {
return ErrorResult(fmt.Sprintf("exec approval: %v", err))
}
if decision == ApprovalDeny {
return ErrorResult("command denied by user")
}
}
}
// Use per-user workspace from context if available, fallback to struct field.
// The context workspace is tenant-scoped; t.workspace is the global (master) workspace.
cwd := ToolWorkspaceFromCtx(ctx)
if cwd == "" {
cwd = t.workspace
}
if wd, _ := args["working_dir"].(string); wd != "" {
if effectiveRestrict(ctx, t.restrict) {
// Validate working_dir against the tenant-scoped workspace (not the
// global workspace) so non-master tenants can't escape their scope.
// Also allow team workspace as a valid target (same as filesystem tools).
wsBase := ToolWorkspaceFromCtx(ctx)
if wsBase == "" {
wsBase = t.workspace
}
allowed := allowedWithTeamWorkspace(ctx, nil)
resolved, err := resolvePathWithAllowed(wd, wsBase, true, allowed)
if err != nil {
return ErrorResult(err.Error())
}
cwd = resolved
} else {
cwd = wd
}
}
// Sandbox routing (sandboxKey from ctx — thread-safe)
sandboxKey := ToolSandboxKeyFromCtx(ctx)
if t.sandboxMgr != nil && sandboxKey != "" {
return t.executeInSandbox(ctx, command, cwd, sandboxKey)
}
// Host execution
return t.executeOnHost(ctx, command, cwd)
}
// matchesAny checks if a command matches any pattern in the list.
func matchesAny(command string, patterns []*regexp.Regexp) bool {
for _, p := range patterns {
if p.MatchString(command) {
return true
}
}
return false
}
// executeOnHost runs a command directly on the host (original behavior).
func (t *ExecTool) executeOnHost(ctx context.Context, command, cwd string) *Result {
ctx, cancel := context.WithTimeout(ctx, t.timeout)
defer cancel()
cmd := exec.CommandContext(ctx, "sh", "-c", command)
cmd.Dir = cwd
// Limit output to 1MB to prevent OOM from runaway commands.
stdout := &limitedBuffer{max: 1 << 20}
stderr := &limitedBuffer{max: 1 << 20}
cmd.Stdout = stdout
cmd.Stderr = stderr
err := cmd.Run()
var result string
if stdout.Len() > 0 {
result = stdout.String()
}
if stderr.Len() > 0 {
if result != "" {
result += "\n"
}
result += "STDERR:\n" + stderr.String()
}
if err != nil {
if ctx.Err() == context.DeadlineExceeded {
return ErrorResult(fmt.Sprintf("command timed out after %s", t.timeout))
}
if result == "" {
result = err.Error()
}
return ErrorResult(result)
}
if result == "" {
result = "(command completed with no output)"
}
return SilentResult(capExecOutput(result, execMaxOutputChars))
}
// executeInSandbox routes a command through a Docker sandbox container.
func (t *ExecTool) executeInSandbox(ctx context.Context, command, cwd, sandboxKey string) *Result {
sb, err := t.sandboxMgr.Get(ctx, sandboxKey, t.workspace, SandboxConfigFromCtx(ctx))
if err != nil {
if errors.Is(err, sandbox.ErrSandboxDisabled) {
return t.executeOnHost(ctx, command, cwd)
}
// Docker unavailable (binary missing, daemon down) → fail closed.
// Do NOT silently fallback to host — that defeats the purpose of sandboxing.
slog.Warn("security.sandbox_unavailable",
"error", err,
"command", truncateCmd(command, 80),
)
return ErrorResult(fmt.Sprintf("sandbox unavailable: %v (will not fall back to unsandboxed host execution)", err))
}
// Map host workdir to container workdir via SandboxCwd helper.
containerCwd, cwdErr := SandboxCwd(ctx, t.workspace, sandbox.DefaultContainerWorkdir)
if cwdErr != nil {
return ErrorResult(fmt.Sprintf("sandbox path mapping: %v", cwdErr))
}
result, err := sb.Exec(ctx, []string{"sh", "-c", command}, containerCwd) //nolint: no ExecOption for normal exec
if err != nil {
return ErrorResult(fmt.Sprintf("sandbox exec: %v", err))
}
// Format output same as host execution
output := result.Stdout
if result.Stderr != "" {
if output != "" {
output += "\n"
}
output += "STDERR:\n" + result.Stderr
}
if result.ExitCode != 0 {
if output == "" {
output = fmt.Sprintf("command exited with code %d", result.ExitCode)
}
output += MaybeSandboxHint(result.ExitCode, output)
return ErrorResult(output)
}
if output == "" {
output = "(command completed with no output)"
}
return SilentResult(capExecOutput(output, execMaxOutputChars))
}
// limitedBuffer caps output to prevent OOM from runaway commands.
type limitedBuffer struct {
buf bytes.Buffer
max int
truncated bool
}
func (lb *limitedBuffer) Write(p []byte) (int, error) {
if lb.truncated {
return len(p), nil
}
remaining := lb.max - lb.buf.Len()
if remaining <= 0 {
lb.truncated = true
return len(p), nil
}
if len(p) > remaining {
lb.buf.Write(p[:remaining])
lb.truncated = true
return len(p), nil
}
return lb.buf.Write(p)
}
func (lb *limitedBuffer) String() string {
s := lb.buf.String()
if lb.truncated {
s += "\n[output truncated at 1MB]"
}
return s
}
func (lb *limitedBuffer) Len() int { return lb.buf.Len() }