mirror of
https://github.com/tiennm99/goclaw.git
synced 2026-06-10 10:10:49 +00:00
0df619023c
- read_file: reject binary files (images, audio, video, archives) with helpful error pointing to the correct specialized tool - read_image: add optional `path` parameter to analyze workspace/generated images via vision API (with workspace restriction + denied path checks)
471 lines
17 KiB
Go
471 lines
17 KiB
Go
package tools
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"syscall"
|
|
|
|
"github.com/nextlevelbuilder/goclaw/internal/bootstrap"
|
|
"github.com/nextlevelbuilder/goclaw/internal/sandbox"
|
|
"github.com/nextlevelbuilder/goclaw/internal/store"
|
|
)
|
|
|
|
// virtualSystemFiles are files dynamically injected into the system prompt.
|
|
// They don't exist on disk — if the model tries to read them, return a hint.
|
|
var virtualSystemFiles = map[string]string{
|
|
bootstrap.TeamFile: "TEAM.md is already loaded in your system prompt. Refer to the TEAM.md section in your context above for team member information.",
|
|
bootstrap.AvailabilityFile: "AVAILABILITY.md is already loaded in your system prompt. Refer to the AVAILABILITY.md section in your context above for agent availability information.",
|
|
}
|
|
|
|
// ReadFileTool reads file contents, optionally through a sandbox container.
|
|
type ReadFileTool struct {
|
|
workspace string
|
|
restrict bool
|
|
allowedPrefixes []string // extra allowed path prefixes (e.g. skills dirs)
|
|
deniedPrefixes []string // path prefixes to deny access to (e.g. .goclaw)
|
|
sandboxMgr sandbox.Manager // nil = direct host access
|
|
contextFileIntc *ContextFileInterceptor // nil = no virtual FS routing
|
|
memIntc *MemoryInterceptor // nil = no memory routing
|
|
groupWriterCache *store.GroupWriterCache // nil = no group read restriction
|
|
}
|
|
|
|
// SetContextFileInterceptor enables virtual FS routing for context files.
|
|
func (t *ReadFileTool) SetContextFileInterceptor(intc *ContextFileInterceptor) {
|
|
t.contextFileIntc = intc
|
|
}
|
|
|
|
// SetMemoryInterceptor enables virtual FS routing for memory files.
|
|
func (t *ReadFileTool) SetMemoryInterceptor(intc *MemoryInterceptor) {
|
|
t.memIntc = intc
|
|
}
|
|
|
|
// SetGroupWriterCache enables group read restriction for SOUL.md/AGENTS.md.
|
|
func (t *ReadFileTool) SetGroupWriterCache(c *store.GroupWriterCache) {
|
|
t.groupWriterCache = c
|
|
}
|
|
|
|
func NewReadFileTool(workspace string, restrict bool) *ReadFileTool {
|
|
return &ReadFileTool{workspace: workspace, restrict: restrict}
|
|
}
|
|
|
|
// AllowPaths adds extra path prefixes that read_file is allowed to access
|
|
// even when restrict_to_workspace is true (e.g. skills directories).
|
|
func (t *ReadFileTool) AllowPaths(prefixes ...string) {
|
|
t.allowedPrefixes = append(t.allowedPrefixes, prefixes...)
|
|
}
|
|
|
|
// DenyPaths adds path prefixes that read_file must reject (e.g. hidden dirs).
|
|
func (t *ReadFileTool) DenyPaths(prefixes ...string) {
|
|
t.deniedPrefixes = append(t.deniedPrefixes, prefixes...)
|
|
}
|
|
|
|
func NewSandboxedReadFileTool(workspace string, restrict bool, mgr sandbox.Manager) *ReadFileTool {
|
|
return &ReadFileTool{workspace: workspace, restrict: restrict, sandboxMgr: mgr}
|
|
}
|
|
|
|
// SetSandboxKey is a no-op; sandbox key is now read from ctx (thread-safe).
|
|
func (t *ReadFileTool) SetSandboxKey(key string) {}
|
|
|
|
func (t *ReadFileTool) Name() string { return "read_file" }
|
|
func (t *ReadFileTool) Description() string { return "Read the contents of a file" }
|
|
func (t *ReadFileTool) Parameters() map[string]any {
|
|
return map[string]any{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"path": map[string]any{
|
|
"type": "string",
|
|
"description": "File path (relative to workspace, or absolute)",
|
|
},
|
|
},
|
|
"required": []string{"path"},
|
|
}
|
|
}
|
|
|
|
func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *Result {
|
|
path, _ := args["path"].(string)
|
|
if path == "" {
|
|
return ErrorResult("path is required")
|
|
}
|
|
|
|
// Group read restriction: block non-writers from reading SOUL.md/AGENTS.md
|
|
if t.groupWriterCache != nil {
|
|
base := filepath.Base(path)
|
|
if base == bootstrap.SoulFile || base == bootstrap.AgentsFile {
|
|
if err := store.CheckGroupWritePermission(ctx, t.groupWriterCache); err != nil {
|
|
return ErrorResult(fmt.Sprintf("permission denied: %s is restricted in this group", base))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Virtual FS: route context files to DB
|
|
if t.contextFileIntc != nil {
|
|
if content, handled, err := t.contextFileIntc.ReadFile(ctx, path); handled {
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("failed to read context file: %v", err))
|
|
}
|
|
if content == "" {
|
|
return ErrorResult(fmt.Sprintf("context file not found: %s", path))
|
|
}
|
|
return SilentResult(content)
|
|
}
|
|
}
|
|
|
|
// Virtual system files: TEAM.md, DELEGATION.md, AVAILABILITY.md are injected
|
|
// into the system prompt and don't exist on disk. Return a helpful hint.
|
|
baseName := filepath.Base(path)
|
|
if hint, ok := virtualSystemFiles[baseName]; ok {
|
|
return SilentResult(hint)
|
|
}
|
|
|
|
// Virtual FS: route memory files to DB
|
|
if t.memIntc != nil {
|
|
if content, handled, err := t.memIntc.ReadFile(ctx, path); handled {
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("failed to read memory file: %v", err))
|
|
}
|
|
if content == "" {
|
|
return SilentResult(fmt.Sprintf("(memory file %s does not exist yet — it will be created when memory is saved)", path))
|
|
}
|
|
return SilentResult(content)
|
|
}
|
|
}
|
|
|
|
// Sandbox routing (sandboxKey from ctx — thread-safe)
|
|
sandboxKey := ToolSandboxKeyFromCtx(ctx)
|
|
if t.sandboxMgr != nil && sandboxKey != "" {
|
|
return t.executeInSandbox(ctx, path, sandboxKey)
|
|
}
|
|
|
|
// Host execution — use per-user workspace from context if available
|
|
workspace := ToolWorkspaceFromCtx(ctx)
|
|
if workspace == "" {
|
|
workspace = t.workspace
|
|
}
|
|
allowed := allowedWithTeamWorkspace(ctx, t.allowedPrefixes)
|
|
resolved, err := resolvePathWithAllowed(path, workspace, effectiveRestrict(ctx, t.restrict), allowed)
|
|
if err != nil {
|
|
return ErrorResult(err.Error())
|
|
}
|
|
if err := checkDeniedPath(resolved, t.workspace, t.deniedPrefixes); err != nil {
|
|
return ErrorResult(err.Error())
|
|
}
|
|
|
|
// Block binary files — reading them wastes context with garbled data.
|
|
if isBinaryFileExt(resolved) {
|
|
ext := strings.ToLower(filepath.Ext(resolved))
|
|
return ErrorResult(fmt.Sprintf("cannot read binary file (%s). Use the appropriate tool: read_image for images, read_document for documents, read_audio for audio, read_video for video.", ext))
|
|
}
|
|
|
|
data, err := os.ReadFile(resolved)
|
|
if err != nil {
|
|
msg := fmt.Sprintf("failed to read file: %v", err)
|
|
if os.IsNotExist(err) {
|
|
if teamWs := ToolTeamWorkspaceFromCtx(ctx); teamWs != "" && !strings.HasPrefix(resolved, teamWs) {
|
|
msg += fmt.Sprintf("\nHint: file may be in the team workspace. Try: read_file(path=\"%s/%s\")", teamWs, path)
|
|
}
|
|
}
|
|
return ErrorResult(msg)
|
|
}
|
|
|
|
return SilentResult(string(data))
|
|
}
|
|
|
|
func (t *ReadFileTool) executeInSandbox(ctx context.Context, path, sandboxKey string) *Result {
|
|
bridge, err := t.getFsBridge(ctx, sandboxKey)
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("sandbox error: %v", err))
|
|
}
|
|
|
|
data, err := bridge.ReadFile(ctx, path)
|
|
if err != nil {
|
|
return ErrorResult(fmt.Sprintf("failed to read file: %v", err))
|
|
}
|
|
|
|
return SilentResult(data)
|
|
}
|
|
|
|
func (t *ReadFileTool) getFsBridge(ctx context.Context, sandboxKey string) (*sandbox.FsBridge, error) {
|
|
sb, err := t.sandboxMgr.Get(ctx, sandboxKey, t.workspace, SandboxConfigFromCtx(ctx))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return sandbox.NewFsBridge(sb.ID(), "/workspace"), nil
|
|
}
|
|
|
|
// allowedWithTeamWorkspace returns the allowed prefixes with team workspace appended
|
|
// if present in context. Thread-safe: creates a new slice per request.
|
|
func allowedWithTeamWorkspace(ctx context.Context, base []string) []string {
|
|
teamWs := ToolTeamWorkspaceFromCtx(ctx)
|
|
if teamWs == "" {
|
|
return base
|
|
}
|
|
out := make([]string, len(base)+1)
|
|
copy(out, base)
|
|
out[len(base)] = teamWs
|
|
return out
|
|
}
|
|
|
|
// resolvePathWithAllowed is like resolvePath but also allows paths under extra prefixes.
|
|
func resolvePathWithAllowed(path, workspace string, restrict bool, allowedPrefixes []string) (string, error) {
|
|
resolved, err := resolvePath(path, workspace, restrict)
|
|
if err == nil {
|
|
return resolved, nil
|
|
}
|
|
// If restricted and denied, check if path falls under an allowed prefix.
|
|
// Resolve symlinks in the candidate path for safe comparison.
|
|
cleaned := filepath.Clean(path)
|
|
absPath, _ := filepath.Abs(cleaned)
|
|
real, evalErr := filepath.EvalSymlinks(absPath)
|
|
if evalErr != nil {
|
|
// Try resolving parent for non-existent files
|
|
parentReal, parentErr := filepath.EvalSymlinks(filepath.Dir(absPath))
|
|
if parentErr != nil {
|
|
return "", err
|
|
}
|
|
real = filepath.Join(parentReal, filepath.Base(absPath))
|
|
}
|
|
for _, prefix := range allowedPrefixes {
|
|
absPrefix, _ := filepath.Abs(prefix)
|
|
prefixReal, prefixErr := filepath.EvalSymlinks(absPrefix)
|
|
if prefixErr != nil {
|
|
prefixReal = absPrefix
|
|
}
|
|
if isPathInside(real, prefixReal) {
|
|
slog.Debug("read_file: allowed by prefix", "path", real, "prefix", prefixReal)
|
|
return real, nil
|
|
}
|
|
}
|
|
slog.Warn("read_file: access denied", "path", cleaned, "workspace", workspace, "allowedPrefixes", allowedPrefixes)
|
|
return "", err
|
|
}
|
|
|
|
// checkDeniedPath returns an error if the resolved path falls under any denied prefix.
|
|
// Denied prefixes are relative to the workspace (e.g. ".goclaw" denies workspace/.goclaw/).
|
|
// The resolved path should already be canonical (from resolvePath with restrict=true).
|
|
func checkDeniedPath(resolved, workspace string, deniedPrefixes []string) error {
|
|
if len(deniedPrefixes) == 0 {
|
|
return nil
|
|
}
|
|
absResolved, _ := filepath.Abs(resolved)
|
|
absWorkspace, _ := filepath.Abs(workspace)
|
|
// Resolve workspace to canonical form for consistent comparison.
|
|
wsReal, err := filepath.EvalSymlinks(absWorkspace)
|
|
if err != nil {
|
|
wsReal = absWorkspace
|
|
}
|
|
for _, prefix := range deniedPrefixes {
|
|
denied := filepath.Join(wsReal, prefix)
|
|
if isPathInside(absResolved, denied) {
|
|
return fmt.Errorf("access denied: path %s is restricted", prefix)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// binaryFileExts are file extensions that should not be read as text.
|
|
// Reading these wastes context with garbled binary data.
|
|
var binaryFileExts = map[string]bool{
|
|
// Images
|
|
".jpg": true, ".jpeg": true, ".png": true, ".gif": true, ".webp": true,
|
|
".bmp": true, ".ico": true, ".tiff": true, ".tif": true,
|
|
// Audio
|
|
".mp3": true, ".wav": true, ".ogg": true, ".flac": true, ".aac": true, ".m4a": true,
|
|
// Video
|
|
".mp4": true, ".avi": true, ".mov": true, ".mkv": true, ".webm": true,
|
|
// Archives
|
|
".zip": true, ".tar": true, ".gz": true, ".bz2": true, ".7z": true, ".rar": true,
|
|
// Documents (binary)
|
|
".pdf": true, ".docx": true, ".xlsx": true, ".pptx": true,
|
|
// Executables
|
|
".exe": true, ".dll": true, ".so": true, ".dylib": true,
|
|
}
|
|
|
|
// isBinaryFileExt returns true if the file extension indicates a binary file.
|
|
func isBinaryFileExt(path string) bool {
|
|
return binaryFileExts[strings.ToLower(filepath.Ext(path))]
|
|
}
|
|
|
|
// resolvePath resolves a path relative to the workspace and validates it.
|
|
// When restrict=true, resolves symlinks to canonical paths and rejects
|
|
// paths that escape the workspace boundary (symlink/hardlink attacks).
|
|
func resolvePath(path, workspace string, restrict bool) (string, error) {
|
|
var resolved string
|
|
if filepath.IsAbs(path) {
|
|
resolved = filepath.Clean(path)
|
|
} else {
|
|
resolved = filepath.Clean(filepath.Join(workspace, path))
|
|
}
|
|
|
|
if !restrict {
|
|
return resolved, nil
|
|
}
|
|
|
|
// Resolve workspace to canonical path (follow symlinks in workspace path itself).
|
|
absWorkspace, _ := filepath.Abs(workspace)
|
|
wsReal, err := filepath.EvalSymlinks(absWorkspace)
|
|
if err != nil {
|
|
wsReal = absWorkspace // workspace doesn't exist yet — use as-is
|
|
}
|
|
|
|
// Resolve the target path to canonical form (follows all symlinks).
|
|
absResolved, _ := filepath.Abs(resolved)
|
|
real, err := filepath.EvalSymlinks(absResolved)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
// Check if the path itself is a symlink (broken/dangling).
|
|
// Lstat doesn't follow symlinks, so it succeeds even for broken ones.
|
|
if linfo, lerr := os.Lstat(absResolved); lerr == nil && linfo.Mode()&os.ModeSymlink != 0 {
|
|
// It's a broken symlink — read target and validate.
|
|
target, readErr := os.Readlink(absResolved)
|
|
if readErr != nil {
|
|
return "", fmt.Errorf("access denied: cannot resolve symlink")
|
|
}
|
|
if !filepath.IsAbs(target) {
|
|
target = filepath.Join(filepath.Dir(absResolved), target)
|
|
}
|
|
target = filepath.Clean(target)
|
|
|
|
// Resolve through existing ancestors to catch chained symlinks
|
|
// (e.g. link1 → link2 → /outside) where intermediate targets escape.
|
|
resolved, resolveErr := resolveThroughExistingAncestors(target)
|
|
if resolveErr != nil {
|
|
slog.Warn("security.broken_symlink_resolve_failed", "path", path, "target", target)
|
|
return "", fmt.Errorf("access denied: cannot resolve broken symlink target")
|
|
}
|
|
if !isPathInside(resolved, wsReal) {
|
|
slog.Warn("security.broken_symlink_escape", "path", path, "target", resolved, "workspace", wsReal)
|
|
return "", fmt.Errorf("access denied: broken symlink target outside workspace")
|
|
}
|
|
real = resolved
|
|
} else {
|
|
// Truly non-existent file (not a symlink): walk up to find the
|
|
// deepest existing ancestor so nested new dirs (e.g. posts/file.md)
|
|
// are allowed as long as an ancestor is inside the workspace.
|
|
ancestorReal, ancestorErr := resolveThroughExistingAncestors(absResolved)
|
|
if ancestorErr != nil {
|
|
return "", fmt.Errorf("access denied: cannot resolve path")
|
|
}
|
|
real = ancestorReal
|
|
}
|
|
} else {
|
|
// Permission error or other — reject.
|
|
slog.Warn("security.path_resolve_failed", "path", path, "error", err)
|
|
return "", fmt.Errorf("access denied: cannot resolve path")
|
|
}
|
|
}
|
|
|
|
// Validate canonical path stays within canonical workspace.
|
|
if !isPathInside(real, wsReal) {
|
|
slog.Warn("security.path_escape", "path", path, "resolved", real, "workspace", wsReal)
|
|
return "", fmt.Errorf("access denied: path outside workspace")
|
|
}
|
|
|
|
// Reject paths with mutable symlink components (TOCTOU symlink rebind risk).
|
|
// A symlink in the path whose parent directory is writable could be replaced
|
|
// between resolution time and actual file operation.
|
|
if hasMutableSymlinkParent(real) {
|
|
slog.Warn("security.mutable_symlink_parent", "path", path, "resolved", real)
|
|
return "", fmt.Errorf("access denied: path contains mutable symlink component")
|
|
}
|
|
|
|
// Reject hardlinked files (nlink > 1) to prevent hardlink-based escapes.
|
|
if err := checkHardlink(real); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return real, nil
|
|
}
|
|
|
|
// isPathInside checks whether child is inside or equal to parent directory.
|
|
func isPathInside(child, parent string) bool {
|
|
if child == parent {
|
|
return true
|
|
}
|
|
return strings.HasPrefix(child, parent+string(filepath.Separator))
|
|
}
|
|
|
|
// resolveThroughExistingAncestors resolves a path by finding the deepest
|
|
// existing ancestor, canonicalizing it with EvalSymlinks, then appending
|
|
// the remaining non-existent components. This handles broken symlinks
|
|
// whose targets contain intermediate symlinks that escape the workspace.
|
|
func resolveThroughExistingAncestors(target string) (string, error) {
|
|
// Try full resolution first (target exists and all symlinks resolve)
|
|
if real, err := filepath.EvalSymlinks(target); err == nil {
|
|
return real, nil
|
|
}
|
|
|
|
// Walk up to find the deepest existing ancestor
|
|
current := target
|
|
var tail []string
|
|
for {
|
|
parent := filepath.Dir(current)
|
|
if parent == current {
|
|
// Reached filesystem root without finding existing dir
|
|
break
|
|
}
|
|
tail = append([]string{filepath.Base(current)}, tail...)
|
|
current = parent
|
|
|
|
if realParent, err := filepath.EvalSymlinks(current); err == nil {
|
|
// Found existing ancestor — canonicalize and rebuild
|
|
result := realParent
|
|
for _, component := range tail {
|
|
result = filepath.Join(result, component)
|
|
}
|
|
return result, nil
|
|
}
|
|
}
|
|
return filepath.Clean(target), nil
|
|
}
|
|
|
|
// hasMutableSymlinkParent checks if any component of the resolved path is a symlink
|
|
// whose parent directory is writable by the current process. A writable parent means
|
|
// the symlink could be replaced between path resolution and actual file operation
|
|
// (TOCTOU symlink rebind attack).
|
|
func hasMutableSymlinkParent(path string) bool {
|
|
clean := filepath.Clean(path)
|
|
components := strings.Split(clean, string(filepath.Separator))
|
|
current := string(filepath.Separator)
|
|
for _, comp := range components {
|
|
if comp == "" {
|
|
continue
|
|
}
|
|
current = filepath.Join(current, comp)
|
|
info, err := os.Lstat(current)
|
|
if err != nil {
|
|
break // non-existent — stop checking
|
|
}
|
|
if info.Mode()&os.ModeSymlink != 0 {
|
|
// Symlink found — check if its parent dir is writable
|
|
parentDir := filepath.Dir(current)
|
|
if syscall.Access(parentDir, 0x2 /* W_OK */) == nil {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// checkHardlink rejects regular files with nlink > 1 (hardlink attack prevention).
|
|
// Directories naturally have nlink > 1 and are exempt.
|
|
func checkHardlink(path string) error {
|
|
info, err := os.Lstat(path)
|
|
if err != nil {
|
|
return nil // non-existent files are OK — will fail at read/write
|
|
}
|
|
if info.IsDir() {
|
|
return nil
|
|
}
|
|
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
|
|
if stat.Nlink > 1 {
|
|
slog.Warn("security.hardlink_rejected", "path", path, "nlink", stat.Nlink)
|
|
return fmt.Errorf("access denied: hardlinked file not allowed")
|
|
}
|
|
}
|
|
return nil
|
|
}
|