mirror of
https://github.com/tiennm99/goclaw.git
synced 2026-06-10 18:11:00 +00:00
ace07509b7
* feat(infra): add runtime package support for skills Install nodejs, npm, pandoc, github-cli + pre-install Python packages (openpyxl, pandas, python-pptx, markitdown) and Node packages (docx, pptxgenjs). Configure runtime dirs for agent pip/npm installs with PIP_TARGET, NPM_CONFIG_PREFIX, NODE_PATH to enable dynamic package installation in read-only container environment. * feat(infra): add bundled skills with runtime package support - Add 5 bundled skills: docx, pdf, pptx, xlsx, skill-creator from container skills-store - Wire GOCLAW_BUILTIN_SKILLS_DIR env var in gateway and CLI - Support optional runtime packages alongside dynamic skill loading - Update Dockerfile to COPY bundled-skills at /app/bundled-skills/ - Add PIP_CACHE_DIR in docker-entrypoint.sh for clean pip installs - Document bundled skills in 14-skills-runtime.md section 6 * feat(infra): remove ai-multimodal skill directory from bundled skills Remove the ai-multimodal skill package as part of consolidating runtime package support for bundled skills. This directory is no longer needed in the bundled skills structure. * feat(ci): add semantic release and Docker Hub publishing Add go-semantic-release workflow to auto-create semver tags on merge to main. Extend docker-publish to push all variants to both GHCR and Docker Hub (digitop/goclaw). * feat(skills): add system skills infrastructure with is_system column, dep scanning, and seeder - Migration 000017: add is_system boolean column with partial index - Store layer: UpsertSystemSkill, delete protection, IsSystemSkill - ListAccessible auto-includes system skills (no grants needed) - ListWithGrantStatus returns is_system field - Dependency scanner: auto-detect deps from scripts/ or skill-manifest.json - Dependency checker: verify system binaries, Python/Node packages - Seeder: seed bundled skills into DB on startup (idempotent via hash) - Gateway wiring: GOCLAW_BUNDLED_SKILLS_DIR env for bundled skills - HTTP: delete guard (403), slug conflict check (409), rescan-deps endpoint - UI: System badge, hide delete for system skills, rescan deps button - Agent skills tab: "Always available" for system skills - i18n: en/vi/zh keys for system skills, deps scanning * feat(skills): conditional system prompt, skill manifests, and Zip Slip fix - System prompt: only show package list when python3/node are available - Add skill-manifest.json for pdf, docx, xlsx, pptx bundled skills - Fix Zip Slip vulnerability in office/unpack.py (all 3 copies) * refactor(skills): extract shared office code to _shared/ and deduplicate Move office scripts (pack, unpack, validate, schemas, validators) from duplicated copies in docx/xlsx/pptx to skills/_shared/office/ with symlinks. Remove soffice.py (non-functional in containers) and update SKILL.md references to use soffice binary directly. Update seeder copyDir to follow symlinks. Removes ~45K lines of duplicate code across 3 skills. * fix(skills): address code review findings for system skills integration - H1: Remove dead symlink branch in copyDir (filepath.Walk follows symlinks) - H3: Fix rescan-deps to query ALL skills (including archived) and re-activate when deps become available; add ListAllSkills() + Status field to SkillInfo - H4: Add Status field to SkillCreateParams, stop overloading Visibility - M1: Batch Python/Node dep checks into single subprocess per runtime - M4: Add rows.Err() check in ListSkills to prevent caching partial results * feat(skills): async dep checking with realtime WS events Split Seed() into sync DB upsert + async CheckDepsAsync() goroutine. Gateway startup no longer blocks on Python/Node subprocess dep checks. - Seed() returns seeded skills list, all initially status="active" - CheckDepsAsync() runs in background, emits skill.deps.checked per-skill - skill.deps.complete event emitted when all checks finish - Each failed dep check: archives skill + BumpVersion() for immediate cache invalidation so next agent turn picks up the change - UI: use-query-invalidation listens to skill.deps.* events → auto-refresh skills list in realtime * feat(skills): system skills integration with toggle, dep checking, and per-item install - Add is_system, deps, enabled columns to skills table (migration 017) - Seed bundled core skills (pdf, docx, pptx, xlsx, skill-creator) on startup - PYTHONPATH-based dep detection — eliminates false positives from local modules - Per-item dep install UI with individual status (installing/success/error) - Enable/disable toggle for core and custom skills (independent of dep status) - Re-run dep check when skill is toggled back on - Inline skill thresholds: 40 skills / 5000 tokens before switching to search mode - Fix UpsertSystemSkill: backfill null file_hash without bumping DB version - Remove redundant skill-manifest.json files (replaced by deps JSONB column) - Show author from frontmatter in custom skills tab - Runtime checker for python3/pip3/node/npm availability - WS events for dep checking/installing progress - docs: add 15-core-skills-system.md, 16-skill-publishing.md --------- Co-authored-by: Goon <duy@wearetopgroup.com>
531 lines
14 KiB
Go
531 lines
14 KiB
Go
// Package skills loads and manages SKILL.md files from multiple source directories.
|
|
// Skills are injected into the agent's system prompt to provide specialized knowledge.
|
|
//
|
|
// Hierarchy (highest priority wins, matching TS loadSkillEntries):
|
|
// 1. Workspace skills — <workspace>/skills/
|
|
// 2. Project agent skills — <workspace>/.agents/skills/
|
|
// 3. Personal agent skills — ~/.agents/skills/
|
|
// 4. Global/managed skills — ~/.goclaw/skills/
|
|
// 5. Builtin skills — bundled with binary
|
|
package skills
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
// Metadata holds parsed SKILL.md frontmatter.
|
|
type Metadata struct {
|
|
Name string `json:"name"`
|
|
Description string `json:"description"`
|
|
}
|
|
|
|
// Info describes a discovered skill.
|
|
type Info struct {
|
|
Name string `json:"name"`
|
|
Slug string `json:"slug"` // directory name (unique identifier)
|
|
Path string `json:"path"` // absolute path to SKILL.md
|
|
BaseDir string `json:"baseDir"` // skill directory (parent of SKILL.md)
|
|
Source string `json:"source"` // "workspace", "global", "builtin"
|
|
Description string `json:"description"`
|
|
}
|
|
|
|
// Loader discovers and loads SKILL.md files from multiple directories.
|
|
type Loader struct {
|
|
// Skill directories in priority order (highest first).
|
|
// Matches TS loadSkillEntries() 5-tier hierarchy.
|
|
workspaceSkills string // <workspace>/skills/
|
|
projectAgentSkills string // <workspace>/.agents/skills/
|
|
personalAgentSkills string // ~/.agents/skills/
|
|
globalSkills string // ~/.goclaw/skills/
|
|
builtinSkills string // bundled with binary
|
|
|
|
// DB-managed skills directory (set via SetManagedDir).
|
|
// Uses versioned subdirectory structure: <dir>/<slug>/<version>/SKILL.md
|
|
managedSkillsDir string
|
|
|
|
mu sync.RWMutex
|
|
cache map[string]*Info // name → info (lazily populated)
|
|
|
|
// Version tracking for hot-reload (matching TS bumpSkillsSnapshotVersion).
|
|
// Bumped by the watcher on SKILL.md changes; consumers compare to detect staleness.
|
|
version atomic.Int64
|
|
}
|
|
|
|
// NewLoader creates a skills loader.
|
|
// workspace: project workspace root (skills dir is workspace/skills/)
|
|
// globalSkills: global skills directory (e.g. ~/.goclaw/skills)
|
|
// builtinSkills: bundled skills directory
|
|
func NewLoader(workspace, globalSkills, builtinSkills string) *Loader {
|
|
wsSkills := ""
|
|
projectAgentSkills := ""
|
|
if workspace != "" {
|
|
wsSkills = filepath.Join(workspace, "skills")
|
|
projectAgentSkills = filepath.Join(workspace, ".agents", "skills")
|
|
}
|
|
|
|
// Personal agent skills: ~/.agents/skills/ (matching TS)
|
|
homeDir, _ := os.UserHomeDir()
|
|
personalAgentSkills := ""
|
|
if homeDir != "" {
|
|
personalAgentSkills = filepath.Join(homeDir, ".agents", "skills")
|
|
}
|
|
|
|
return &Loader{
|
|
workspaceSkills: wsSkills,
|
|
projectAgentSkills: projectAgentSkills,
|
|
personalAgentSkills: personalAgentSkills,
|
|
globalSkills: globalSkills,
|
|
builtinSkills: builtinSkills,
|
|
cache: make(map[string]*Info),
|
|
}
|
|
}
|
|
|
|
// SetManagedDir sets the managed skills directory (skills-store).
|
|
// Managed skills use versioned subdirectories: <dir>/<slug>/<version>/SKILL.md.
|
|
// Called after PG stores are created.
|
|
func (l *Loader) SetManagedDir(dir string) {
|
|
l.managedSkillsDir = dir
|
|
l.BumpVersion() // trigger re-scan
|
|
}
|
|
|
|
// ListSkills returns all available skills, respecting the priority hierarchy.
|
|
// Higher-priority sources override lower ones by name.
|
|
func (l *Loader) ListSkills() []Info {
|
|
l.mu.Lock()
|
|
defer l.mu.Unlock()
|
|
|
|
seen := make(map[string]bool)
|
|
var skills []Info
|
|
|
|
// Priority: workspace > project-agents > personal-agents > global > managed > builtin
|
|
// Managed (DB-seeded) skills take priority over raw bundled files so agents
|
|
// always receive paths within the skills-store (workspace-accessible), not /app/bundled-skills/.
|
|
for _, src := range []struct {
|
|
dir string
|
|
source string
|
|
}{
|
|
{l.workspaceSkills, "workspace"},
|
|
{l.projectAgentSkills, "agents-project"},
|
|
{l.personalAgentSkills, "agents-personal"},
|
|
{l.globalSkills, "global"},
|
|
} {
|
|
if src.dir == "" {
|
|
continue
|
|
}
|
|
dirs, err := os.ReadDir(src.dir)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
for _, d := range dirs {
|
|
if !d.IsDir() || seen[d.Name()] {
|
|
continue
|
|
}
|
|
skillFile := filepath.Join(src.dir, d.Name(), "SKILL.md")
|
|
if _, err := os.Stat(skillFile); err != nil {
|
|
continue
|
|
}
|
|
|
|
info := Info{
|
|
Name: d.Name(),
|
|
Slug: d.Name(),
|
|
Path: skillFile,
|
|
BaseDir: filepath.Join(src.dir, d.Name()),
|
|
Source: src.source,
|
|
}
|
|
if meta := parseMetadata(skillFile); meta != nil {
|
|
info.Description = meta.Description
|
|
if meta.Name != "" {
|
|
info.Name = meta.Name
|
|
}
|
|
}
|
|
skills = append(skills, info)
|
|
seen[d.Name()] = true
|
|
l.cache[d.Name()] = &info
|
|
}
|
|
}
|
|
|
|
// Managed skills (versioned, DB-seeded) come before builtin so their workspace paths win.
|
|
if l.managedSkillsDir != "" {
|
|
for _, info := range l.listManagedSkills() {
|
|
if seen[info.Slug] {
|
|
continue
|
|
}
|
|
skills = append(skills, info)
|
|
seen[info.Slug] = true
|
|
l.cache[info.Slug] = &info
|
|
}
|
|
}
|
|
|
|
// Builtin (raw bundled files) — lowest priority fallback.
|
|
if l.builtinSkills != "" {
|
|
dirs, err := os.ReadDir(l.builtinSkills)
|
|
if err == nil {
|
|
for _, d := range dirs {
|
|
if !d.IsDir() || seen[d.Name()] {
|
|
continue
|
|
}
|
|
skillFile := filepath.Join(l.builtinSkills, d.Name(), "SKILL.md")
|
|
if _, err := os.Stat(skillFile); err != nil {
|
|
continue
|
|
}
|
|
info := Info{
|
|
Name: d.Name(),
|
|
Slug: d.Name(),
|
|
Path: skillFile,
|
|
BaseDir: filepath.Join(l.builtinSkills, d.Name()),
|
|
Source: "builtin",
|
|
}
|
|
if meta := parseMetadata(skillFile); meta != nil {
|
|
info.Description = meta.Description
|
|
if meta.Name != "" {
|
|
info.Name = meta.Name
|
|
}
|
|
}
|
|
skills = append(skills, info)
|
|
seen[d.Name()] = true
|
|
l.cache[d.Name()] = &info
|
|
}
|
|
}
|
|
}
|
|
|
|
return skills
|
|
}
|
|
|
|
// listManagedSkills scans the managed skills directory for versioned skill directories.
|
|
// Structure: <managedSkillsDir>/<slug>/<version>/SKILL.md
|
|
// Returns the latest version of each skill found.
|
|
func (l *Loader) listManagedSkills() []Info {
|
|
dirs, err := os.ReadDir(l.managedSkillsDir)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
var skills []Info
|
|
for _, d := range dirs {
|
|
if !d.IsDir() {
|
|
continue
|
|
}
|
|
slug := d.Name()
|
|
|
|
// Find the latest version subdirectory
|
|
latestVersion, latestDir := l.findLatestVersion(slug)
|
|
if latestVersion < 0 {
|
|
continue
|
|
}
|
|
|
|
skillFile := filepath.Join(latestDir, "SKILL.md")
|
|
if _, err := os.Stat(skillFile); err != nil {
|
|
continue
|
|
}
|
|
|
|
info := Info{
|
|
Name: slug,
|
|
Slug: slug,
|
|
Path: skillFile,
|
|
BaseDir: latestDir,
|
|
Source: "managed",
|
|
}
|
|
if meta := parseMetadata(skillFile); meta != nil {
|
|
info.Description = meta.Description
|
|
if meta.Name != "" {
|
|
info.Name = meta.Name
|
|
}
|
|
}
|
|
skills = append(skills, info)
|
|
}
|
|
return skills
|
|
}
|
|
|
|
// findLatestVersion finds the highest-numbered version subdirectory for a skill slug.
|
|
// Returns (version, path) or (-1, "") if no valid version found.
|
|
func (l *Loader) findLatestVersion(slug string) (int, string) {
|
|
slugDir := filepath.Join(l.managedSkillsDir, slug)
|
|
entries, err := os.ReadDir(slugDir)
|
|
if err != nil {
|
|
return -1, ""
|
|
}
|
|
|
|
var versions []int
|
|
for _, e := range entries {
|
|
if !e.IsDir() {
|
|
continue
|
|
}
|
|
v, err := strconv.Atoi(e.Name())
|
|
if err != nil || v < 1 {
|
|
continue
|
|
}
|
|
versions = append(versions, v)
|
|
}
|
|
if len(versions) == 0 {
|
|
return -1, ""
|
|
}
|
|
|
|
sort.Sort(sort.Reverse(sort.IntSlice(versions)))
|
|
latestVer := versions[0]
|
|
return latestVer, filepath.Join(slugDir, strconv.Itoa(latestVer))
|
|
}
|
|
|
|
// LoadSkill reads and returns the content of a skill by name (frontmatter stripped).
|
|
// The {baseDir} placeholder in SKILL.md is replaced with the skill's absolute directory path.
|
|
// Priority: workspace > agents > global > managed > builtin
|
|
func (l *Loader) LoadSkill(name string) (string, bool) {
|
|
// Check flat skill directories (workspace, agents, global) first
|
|
for _, dir := range []string{l.workspaceSkills, l.projectAgentSkills, l.personalAgentSkills, l.globalSkills} {
|
|
if dir == "" {
|
|
continue
|
|
}
|
|
path := filepath.Join(dir, name, "SKILL.md")
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
content := stripFrontmatter(string(data))
|
|
content = strings.ReplaceAll(content, "{baseDir}", filepath.Join(dir, name))
|
|
return content, true
|
|
}
|
|
|
|
// Managed skills (DB-seeded, versioned) take priority over raw builtin files.
|
|
if l.managedSkillsDir != "" {
|
|
latestVer, latestDir := l.findLatestVersion(name)
|
|
if latestVer >= 0 {
|
|
path := filepath.Join(latestDir, "SKILL.md")
|
|
data, err := os.ReadFile(path)
|
|
if err == nil {
|
|
content := stripFrontmatter(string(data))
|
|
content = strings.ReplaceAll(content, "{baseDir}", latestDir)
|
|
return content, true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Builtin fallback (only if not in managed)
|
|
if l.builtinSkills != "" {
|
|
path := filepath.Join(l.builtinSkills, name, "SKILL.md")
|
|
data, err := os.ReadFile(path)
|
|
if err == nil {
|
|
content := stripFrontmatter(string(data))
|
|
content = strings.ReplaceAll(content, "{baseDir}", filepath.Join(l.builtinSkills, name))
|
|
return content, true
|
|
}
|
|
}
|
|
|
|
return "", false
|
|
}
|
|
|
|
// LoadForContext loads multiple skills and formats them for system prompt injection.
|
|
// If allowList is nil, all skills are loaded. If non-nil, only listed skills are loaded.
|
|
func (l *Loader) LoadForContext(allowList []string) string {
|
|
var names []string
|
|
|
|
if allowList == nil {
|
|
// Load all available skills
|
|
for _, s := range l.ListSkills() {
|
|
names = append(names, s.Name)
|
|
}
|
|
} else {
|
|
names = allowList
|
|
}
|
|
|
|
if len(names) == 0 {
|
|
return ""
|
|
}
|
|
|
|
var parts []string
|
|
for _, name := range names {
|
|
content, ok := l.LoadSkill(name)
|
|
if !ok {
|
|
continue
|
|
}
|
|
parts = append(parts, fmt.Sprintf("### Skill: %s\n\n%s", name, content))
|
|
}
|
|
|
|
if len(parts) == 0 {
|
|
return ""
|
|
}
|
|
|
|
return "## Available Skills\n\n" + strings.Join(parts, "\n\n---\n\n")
|
|
}
|
|
|
|
// BuildSummary returns an XML summary of skills for context injection.
|
|
// If allowList is nil, all skills are included. If non-nil, only listed skills are included.
|
|
// The format matches the TS <available_skills> XML used in system prompts.
|
|
func (l *Loader) BuildSummary(allowList []string) string {
|
|
allSkills := l.ListSkills()
|
|
if len(allSkills) == 0 {
|
|
return ""
|
|
}
|
|
|
|
// Filter by allowList if provided
|
|
var filtered []Info
|
|
if allowList == nil {
|
|
filtered = allSkills
|
|
} else {
|
|
allowed := make(map[string]bool, len(allowList))
|
|
for _, name := range allowList {
|
|
allowed[name] = true
|
|
}
|
|
for _, s := range allSkills {
|
|
if allowed[s.Slug] {
|
|
filtered = append(filtered, s)
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(filtered) == 0 {
|
|
return ""
|
|
}
|
|
|
|
var lines []string
|
|
lines = append(lines, "<available_skills>")
|
|
for _, s := range filtered {
|
|
lines = append(lines, " <skill>")
|
|
lines = append(lines, fmt.Sprintf(" <name>%s</name>", escapeXML(s.Name)))
|
|
lines = append(lines, fmt.Sprintf(" <description>%s</description>", escapeXML(s.Description)))
|
|
lines = append(lines, fmt.Sprintf(" <location>%s</location>", escapeXML(s.Path)))
|
|
lines = append(lines, " </skill>")
|
|
}
|
|
lines = append(lines, "</available_skills>")
|
|
|
|
return strings.Join(lines, "\n")
|
|
}
|
|
|
|
// Version returns the current skill snapshot version.
|
|
// Consumers compare this to their cached version to detect changes.
|
|
func (l *Loader) Version() int64 {
|
|
return l.version.Load()
|
|
}
|
|
|
|
// BumpVersion increments the version counter (called by watcher on changes).
|
|
func (l *Loader) BumpVersion() {
|
|
l.version.Store(time.Now().UnixMilli())
|
|
}
|
|
|
|
// Dirs returns all non-empty skill directories (for the watcher to monitor).
|
|
func (l *Loader) Dirs() []string {
|
|
var dirs []string
|
|
for _, d := range []string{l.workspaceSkills, l.projectAgentSkills, l.personalAgentSkills, l.globalSkills, l.builtinSkills, l.managedSkillsDir} {
|
|
if d != "" {
|
|
dirs = append(dirs, d)
|
|
}
|
|
}
|
|
return dirs
|
|
}
|
|
|
|
// FilterSkills returns skills filtered by an allowlist.
|
|
// If allowList is nil, all skills are returned. If empty slice, none are returned.
|
|
func (l *Loader) FilterSkills(allowList []string) []Info {
|
|
all := l.ListSkills()
|
|
if allowList == nil {
|
|
return all
|
|
}
|
|
if len(allowList) == 0 {
|
|
return nil
|
|
}
|
|
allowed := make(map[string]bool, len(allowList))
|
|
for _, name := range allowList {
|
|
allowed[name] = true
|
|
}
|
|
var filtered []Info
|
|
for _, s := range all {
|
|
if allowed[s.Slug] {
|
|
filtered = append(filtered, s)
|
|
}
|
|
}
|
|
return filtered
|
|
}
|
|
|
|
// GetSkill returns info about a specific skill.
|
|
func (l *Loader) GetSkill(name string) (*Info, bool) {
|
|
// Ensure cache is populated
|
|
l.ListSkills()
|
|
|
|
l.mu.RLock()
|
|
defer l.mu.RUnlock()
|
|
info, ok := l.cache[name]
|
|
return info, ok
|
|
}
|
|
|
|
// --- Frontmatter parsing ---
|
|
|
|
var frontmatterRe = regexp.MustCompile(`(?s)^---\n(.*?)\n---\n?`)
|
|
|
|
func parseMetadata(path string) *Metadata {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
fm := extractFrontmatter(string(data))
|
|
if fm == "" {
|
|
return &Metadata{Name: filepath.Base(filepath.Dir(path))}
|
|
}
|
|
|
|
// Try JSON first
|
|
var jm Metadata
|
|
if json.Unmarshal([]byte(fm), &jm) == nil && jm.Name != "" {
|
|
return &jm
|
|
}
|
|
|
|
// Fall back to simple YAML key: value
|
|
kv := parseSimpleYAML(fm)
|
|
return &Metadata{
|
|
Name: kv["name"],
|
|
Description: kv["description"],
|
|
}
|
|
}
|
|
|
|
// normalizeLineEndings converts \r\n and bare \r to \n so frontmatter regex matches
|
|
// files created on Windows or uploaded via ZIP with CRLF line endings.
|
|
func normalizeLineEndings(s string) string {
|
|
s = strings.ReplaceAll(s, "\r\n", "\n")
|
|
s = strings.ReplaceAll(s, "\r", "\n")
|
|
return s
|
|
}
|
|
|
|
func extractFrontmatter(content string) string {
|
|
match := frontmatterRe.FindStringSubmatch(normalizeLineEndings(content))
|
|
if len(match) > 1 {
|
|
return match[1]
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func stripFrontmatter(content string) string {
|
|
return frontmatterRe.ReplaceAllString(normalizeLineEndings(content), "")
|
|
}
|
|
|
|
func parseSimpleYAML(content string) map[string]string {
|
|
result := make(map[string]string)
|
|
for line := range strings.SplitSeq(content, "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" || strings.HasPrefix(line, "#") {
|
|
continue
|
|
}
|
|
parts := strings.SplitN(line, ":", 2)
|
|
if len(parts) == 2 {
|
|
key := strings.TrimSpace(parts[0])
|
|
val := strings.TrimSpace(parts[1])
|
|
val = strings.Trim(val, "\"'")
|
|
result[key] = val
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func escapeXML(s string) string {
|
|
s = strings.ReplaceAll(s, "&", "&")
|
|
s = strings.ReplaceAll(s, "<", "<")
|
|
s = strings.ReplaceAll(s, ">", ">")
|
|
return s
|
|
}
|