package tools import ( "context" "fmt" "log/slog" "os" "path/filepath" "strings" "syscall" "github.com/nextlevelbuilder/goclaw/internal/bootstrap" "github.com/nextlevelbuilder/goclaw/internal/sandbox" "github.com/nextlevelbuilder/goclaw/internal/store" ) // virtualSystemFiles are files dynamically injected into the system prompt. // They don't exist on disk — if the model tries to read them, return a hint. var virtualSystemFiles = map[string]string{ bootstrap.TeamFile: "TEAM.md is already loaded in your system prompt. Refer to the TEAM.md section in your context above for team member information.", bootstrap.AvailabilityFile: "AVAILABILITY.md is already loaded in your system prompt. Refer to the AVAILABILITY.md section in your context above for agent availability information.", } // ReadFileTool reads file contents, optionally through a sandbox container. type ReadFileTool struct { workspace string restrict bool allowedPrefixes []string // extra allowed path prefixes (e.g. skills dirs) deniedPrefixes []string // path prefixes to deny access to (e.g. .goclaw) sandboxMgr sandbox.Manager // nil = direct host access contextFileIntc *ContextFileInterceptor // nil = no virtual FS routing memIntc *MemoryInterceptor // nil = no memory routing groupWriterCache *store.GroupWriterCache // nil = no group read restriction } // SetContextFileInterceptor enables virtual FS routing for context files. func (t *ReadFileTool) SetContextFileInterceptor(intc *ContextFileInterceptor) { t.contextFileIntc = intc } // SetMemoryInterceptor enables virtual FS routing for memory files. func (t *ReadFileTool) SetMemoryInterceptor(intc *MemoryInterceptor) { t.memIntc = intc } // SetGroupWriterCache enables group read restriction for SOUL.md/AGENTS.md. func (t *ReadFileTool) SetGroupWriterCache(c *store.GroupWriterCache) { t.groupWriterCache = c } func NewReadFileTool(workspace string, restrict bool) *ReadFileTool { return &ReadFileTool{workspace: workspace, restrict: restrict} } // AllowPaths adds extra path prefixes that read_file is allowed to access // even when restrict_to_workspace is true (e.g. skills directories). func (t *ReadFileTool) AllowPaths(prefixes ...string) { t.allowedPrefixes = append(t.allowedPrefixes, prefixes...) } // DenyPaths adds path prefixes that read_file must reject (e.g. hidden dirs). func (t *ReadFileTool) DenyPaths(prefixes ...string) { t.deniedPrefixes = append(t.deniedPrefixes, prefixes...) } func NewSandboxedReadFileTool(workspace string, restrict bool, mgr sandbox.Manager) *ReadFileTool { return &ReadFileTool{workspace: workspace, restrict: restrict, sandboxMgr: mgr} } // SetSandboxKey is a no-op; sandbox key is now read from ctx (thread-safe). func (t *ReadFileTool) SetSandboxKey(key string) {} func (t *ReadFileTool) Name() string { return "read_file" } func (t *ReadFileTool) Description() string { return "Read the contents of a file" } func (t *ReadFileTool) Parameters() map[string]any { return map[string]any{ "type": "object", "properties": map[string]any{ "path": map[string]any{ "type": "string", "description": "File path (relative to workspace, or absolute)", }, }, "required": []string{"path"}, } } func (t *ReadFileTool) Execute(ctx context.Context, args map[string]any) *Result { path, _ := args["path"].(string) if path == "" { return ErrorResult("path is required") } // Group read restriction: block non-writers from reading SOUL.md/AGENTS.md if t.groupWriterCache != nil { base := filepath.Base(path) if base == bootstrap.SoulFile || base == bootstrap.AgentsFile { if err := store.CheckGroupWritePermission(ctx, t.groupWriterCache); err != nil { return ErrorResult(fmt.Sprintf("permission denied: %s is restricted in this group", base)) } } } // Virtual FS: route context files to DB if t.contextFileIntc != nil { if content, handled, err := t.contextFileIntc.ReadFile(ctx, path); handled { if err != nil { return ErrorResult(fmt.Sprintf("failed to read context file: %v", err)) } if content == "" { return ErrorResult(fmt.Sprintf("context file not found: %s", path)) } return SilentResult(content) } } // Virtual system files: TEAM.md, DELEGATION.md, AVAILABILITY.md are injected // into the system prompt and don't exist on disk. Return a helpful hint. baseName := filepath.Base(path) if hint, ok := virtualSystemFiles[baseName]; ok { return SilentResult(hint) } // Virtual FS: route memory files to DB if t.memIntc != nil { if content, handled, err := t.memIntc.ReadFile(ctx, path); handled { if err != nil { return ErrorResult(fmt.Sprintf("failed to read memory file: %v", err)) } if content == "" { return SilentResult(fmt.Sprintf("(memory file %s does not exist yet — it will be created when memory is saved)", path)) } return SilentResult(content) } } // Sandbox routing (sandboxKey from ctx — thread-safe) sandboxKey := ToolSandboxKeyFromCtx(ctx) if t.sandboxMgr != nil && sandboxKey != "" { return t.executeInSandbox(ctx, path, sandboxKey) } // Host execution — use per-user workspace from context if available workspace := ToolWorkspaceFromCtx(ctx) if workspace == "" { workspace = t.workspace } allowed := allowedWithTeamWorkspace(ctx, t.allowedPrefixes) resolved, err := resolvePathWithAllowed(path, workspace, effectiveRestrict(ctx, t.restrict), allowed) if err != nil { return ErrorResult(err.Error()) } if err := checkDeniedPath(resolved, t.workspace, t.deniedPrefixes); err != nil { return ErrorResult(err.Error()) } // Block binary files — reading them wastes context with garbled data. if isBinaryFileExt(resolved) { ext := strings.ToLower(filepath.Ext(resolved)) return ErrorResult(fmt.Sprintf("cannot read binary file (%s). Use the appropriate tool: read_image for images, read_document for documents, read_audio for audio, read_video for video.", ext)) } data, err := os.ReadFile(resolved) if err != nil { msg := fmt.Sprintf("failed to read file: %v", err) if os.IsNotExist(err) { if teamWs := ToolTeamWorkspaceFromCtx(ctx); teamWs != "" && !strings.HasPrefix(resolved, teamWs) { msg += fmt.Sprintf("\nHint: file may be in the team workspace. Try: read_file(path=\"%s/%s\")", teamWs, path) } } return ErrorResult(msg) } return SilentResult(string(data)) } func (t *ReadFileTool) executeInSandbox(ctx context.Context, path, sandboxKey string) *Result { bridge, err := t.getFsBridge(ctx, sandboxKey) if err != nil { return ErrorResult(fmt.Sprintf("sandbox error: %v", err)) } data, err := bridge.ReadFile(ctx, path) if err != nil { return ErrorResult(fmt.Sprintf("failed to read file: %v", err)) } return SilentResult(data) } func (t *ReadFileTool) getFsBridge(ctx context.Context, sandboxKey string) (*sandbox.FsBridge, error) { sb, err := t.sandboxMgr.Get(ctx, sandboxKey, t.workspace, SandboxConfigFromCtx(ctx)) if err != nil { return nil, err } return sandbox.NewFsBridge(sb.ID(), "/workspace"), nil } // allowedWithTeamWorkspace returns the allowed prefixes with team workspace appended // if present in context. Thread-safe: creates a new slice per request. func allowedWithTeamWorkspace(ctx context.Context, base []string) []string { teamWs := ToolTeamWorkspaceFromCtx(ctx) if teamWs == "" { return base } out := make([]string, len(base)+1) copy(out, base) out[len(base)] = teamWs return out } // resolvePathWithAllowed is like resolvePath but also allows paths under extra prefixes. func resolvePathWithAllowed(path, workspace string, restrict bool, allowedPrefixes []string) (string, error) { resolved, err := resolvePath(path, workspace, restrict) if err == nil { return resolved, nil } // If restricted and denied, check if path falls under an allowed prefix. // Resolve symlinks in the candidate path for safe comparison. cleaned := filepath.Clean(path) absPath, _ := filepath.Abs(cleaned) real, evalErr := filepath.EvalSymlinks(absPath) if evalErr != nil { // Try resolving parent for non-existent files parentReal, parentErr := filepath.EvalSymlinks(filepath.Dir(absPath)) if parentErr != nil { return "", err } real = filepath.Join(parentReal, filepath.Base(absPath)) } for _, prefix := range allowedPrefixes { absPrefix, _ := filepath.Abs(prefix) prefixReal, prefixErr := filepath.EvalSymlinks(absPrefix) if prefixErr != nil { prefixReal = absPrefix } if isPathInside(real, prefixReal) { slog.Debug("read_file: allowed by prefix", "path", real, "prefix", prefixReal) return real, nil } } slog.Warn("read_file: access denied", "path", cleaned, "workspace", workspace, "allowedPrefixes", allowedPrefixes) return "", err } // checkDeniedPath returns an error if the resolved path falls under any denied prefix. // Denied prefixes are relative to the workspace (e.g. ".goclaw" denies workspace/.goclaw/). // The resolved path should already be canonical (from resolvePath with restrict=true). func checkDeniedPath(resolved, workspace string, deniedPrefixes []string) error { if len(deniedPrefixes) == 0 { return nil } absResolved, _ := filepath.Abs(resolved) absWorkspace, _ := filepath.Abs(workspace) // Resolve workspace to canonical form for consistent comparison. wsReal, err := filepath.EvalSymlinks(absWorkspace) if err != nil { wsReal = absWorkspace } for _, prefix := range deniedPrefixes { denied := filepath.Join(wsReal, prefix) if isPathInside(absResolved, denied) { return fmt.Errorf("access denied: path %s is restricted", prefix) } } return nil } // binaryFileExts are file extensions that should not be read as text. // Reading these wastes context with garbled binary data. var binaryFileExts = map[string]bool{ // Images ".jpg": true, ".jpeg": true, ".png": true, ".gif": true, ".webp": true, ".bmp": true, ".ico": true, ".tiff": true, ".tif": true, // Audio ".mp3": true, ".wav": true, ".ogg": true, ".flac": true, ".aac": true, ".m4a": true, // Video ".mp4": true, ".avi": true, ".mov": true, ".mkv": true, ".webm": true, // Archives ".zip": true, ".tar": true, ".gz": true, ".bz2": true, ".7z": true, ".rar": true, // Documents (binary) ".pdf": true, ".docx": true, ".xlsx": true, ".pptx": true, // Executables ".exe": true, ".dll": true, ".so": true, ".dylib": true, } // isBinaryFileExt returns true if the file extension indicates a binary file. func isBinaryFileExt(path string) bool { return binaryFileExts[strings.ToLower(filepath.Ext(path))] } // resolvePath resolves a path relative to the workspace and validates it. // When restrict=true, resolves symlinks to canonical paths and rejects // paths that escape the workspace boundary (symlink/hardlink attacks). func resolvePath(path, workspace string, restrict bool) (string, error) { var resolved string if filepath.IsAbs(path) { resolved = filepath.Clean(path) } else { resolved = filepath.Clean(filepath.Join(workspace, path)) } if !restrict { return resolved, nil } // Resolve workspace to canonical path (follow symlinks in workspace path itself). absWorkspace, _ := filepath.Abs(workspace) wsReal, err := filepath.EvalSymlinks(absWorkspace) if err != nil { wsReal = absWorkspace // workspace doesn't exist yet — use as-is } // Resolve the target path to canonical form (follows all symlinks). absResolved, _ := filepath.Abs(resolved) real, err := filepath.EvalSymlinks(absResolved) if err != nil { if os.IsNotExist(err) { // Check if the path itself is a symlink (broken/dangling). // Lstat doesn't follow symlinks, so it succeeds even for broken ones. if linfo, lerr := os.Lstat(absResolved); lerr == nil && linfo.Mode()&os.ModeSymlink != 0 { // It's a broken symlink — read target and validate. target, readErr := os.Readlink(absResolved) if readErr != nil { return "", fmt.Errorf("access denied: cannot resolve symlink") } if !filepath.IsAbs(target) { target = filepath.Join(filepath.Dir(absResolved), target) } target = filepath.Clean(target) // Resolve through existing ancestors to catch chained symlinks // (e.g. link1 → link2 → /outside) where intermediate targets escape. resolved, resolveErr := resolveThroughExistingAncestors(target) if resolveErr != nil { slog.Warn("security.broken_symlink_resolve_failed", "path", path, "target", target) return "", fmt.Errorf("access denied: cannot resolve broken symlink target") } if !isPathInside(resolved, wsReal) { slog.Warn("security.broken_symlink_escape", "path", path, "target", resolved, "workspace", wsReal) return "", fmt.Errorf("access denied: broken symlink target outside workspace") } real = resolved } else { // Truly non-existent file (not a symlink): walk up to find the // deepest existing ancestor so nested new dirs (e.g. posts/file.md) // are allowed as long as an ancestor is inside the workspace. ancestorReal, ancestorErr := resolveThroughExistingAncestors(absResolved) if ancestorErr != nil { return "", fmt.Errorf("access denied: cannot resolve path") } real = ancestorReal } } else { // Permission error or other — reject. slog.Warn("security.path_resolve_failed", "path", path, "error", err) return "", fmt.Errorf("access denied: cannot resolve path") } } // Validate canonical path stays within canonical workspace. if !isPathInside(real, wsReal) { slog.Warn("security.path_escape", "path", path, "resolved", real, "workspace", wsReal) return "", fmt.Errorf("access denied: path outside workspace") } // Reject paths with mutable symlink components (TOCTOU symlink rebind risk). // A symlink in the path whose parent directory is writable could be replaced // between resolution time and actual file operation. if hasMutableSymlinkParent(real) { slog.Warn("security.mutable_symlink_parent", "path", path, "resolved", real) return "", fmt.Errorf("access denied: path contains mutable symlink component") } // Reject hardlinked files (nlink > 1) to prevent hardlink-based escapes. if err := checkHardlink(real); err != nil { return "", err } return real, nil } // isPathInside checks whether child is inside or equal to parent directory. func isPathInside(child, parent string) bool { if child == parent { return true } return strings.HasPrefix(child, parent+string(filepath.Separator)) } // resolveThroughExistingAncestors resolves a path by finding the deepest // existing ancestor, canonicalizing it with EvalSymlinks, then appending // the remaining non-existent components. This handles broken symlinks // whose targets contain intermediate symlinks that escape the workspace. func resolveThroughExistingAncestors(target string) (string, error) { // Try full resolution first (target exists and all symlinks resolve) if real, err := filepath.EvalSymlinks(target); err == nil { return real, nil } // Walk up to find the deepest existing ancestor current := target var tail []string for { parent := filepath.Dir(current) if parent == current { // Reached filesystem root without finding existing dir break } tail = append([]string{filepath.Base(current)}, tail...) current = parent if realParent, err := filepath.EvalSymlinks(current); err == nil { // Found existing ancestor — canonicalize and rebuild result := realParent for _, component := range tail { result = filepath.Join(result, component) } return result, nil } } return filepath.Clean(target), nil } // hasMutableSymlinkParent checks if any component of the resolved path is a symlink // whose parent directory is writable by the current process. A writable parent means // the symlink could be replaced between path resolution and actual file operation // (TOCTOU symlink rebind attack). func hasMutableSymlinkParent(path string) bool { clean := filepath.Clean(path) components := strings.Split(clean, string(filepath.Separator)) current := string(filepath.Separator) for _, comp := range components { if comp == "" { continue } current = filepath.Join(current, comp) info, err := os.Lstat(current) if err != nil { break // non-existent — stop checking } if info.Mode()&os.ModeSymlink != 0 { // Symlink found — check if its parent dir is writable parentDir := filepath.Dir(current) if syscall.Access(parentDir, 0x2 /* W_OK */) == nil { return true } } } return false } // checkHardlink rejects regular files with nlink > 1 (hardlink attack prevention). // Directories naturally have nlink > 1 and are exempt. func checkHardlink(path string) error { info, err := os.Lstat(path) if err != nil { return nil // non-existent files are OK — will fail at read/write } if info.IsDir() { return nil } if stat, ok := info.Sys().(*syscall.Stat_t); ok { if stat.Nlink > 1 { slog.Warn("security.hardlink_rejected", "path", path, "nlink", stat.Nlink) return fmt.Errorf("access denied: hardlinked file not allowed") } } return nil }