package agent import ( "context" "encoding/json" "fmt" "log/slog" "os" "path/filepath" "sort" "strings" "sync" "time" "github.com/google/uuid" "github.com/nextlevelbuilder/goclaw/internal/bootstrap" "github.com/nextlevelbuilder/goclaw/internal/bus" "github.com/nextlevelbuilder/goclaw/internal/i18n" "github.com/nextlevelbuilder/goclaw/internal/config" "github.com/nextlevelbuilder/goclaw/internal/providers" "github.com/nextlevelbuilder/goclaw/internal/store" "github.com/nextlevelbuilder/goclaw/internal/tools" "github.com/nextlevelbuilder/goclaw/pkg/protocol" ) func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error) { // Per-run emit wrapper: enriches every AgentEvent with delegation + routing context. emitRun := func(event AgentEvent) { event.RunKind = req.RunKind event.DelegationID = req.DelegationID event.TeamID = req.TeamID event.TeamTaskID = req.TeamTaskID event.ParentAgentID = req.ParentAgentID event.UserID = req.UserID event.Channel = req.Channel event.ChatID = req.ChatID l.emit(event) } // Inject agent UUID into context for tool routing if l.agentUUID != uuid.Nil { ctx = store.WithAgentID(ctx, l.agentUUID) } // Inject user ID into context for per-user scoping (memory, context files, etc.) if req.UserID != "" { ctx = store.WithUserID(ctx, req.UserID) } // Inject agent type into context for interceptor routing if l.agentType != "" { ctx = store.WithAgentType(ctx, l.agentType) } // Inject self-evolve flag for predefined agents that can update SOUL.md if l.selfEvolve { ctx = store.WithSelfEvolve(ctx, true) } // Inject original sender ID for group file writer permission checks if req.SenderID != "" { ctx = store.WithSenderID(ctx, req.SenderID) } // Inject global builtin tool settings for media tools (provider chain) if l.builtinToolSettings != nil { ctx = tools.WithBuiltinToolSettings(ctx, l.builtinToolSettings) } // Inject channel type into context for tools (e.g. message tool needs it for Zalo group routing) if req.ChannelType != "" { ctx = tools.WithToolChannelType(ctx, req.ChannelType) } // Inject per-agent overrides from DB so tools honor per-agent settings. if l.restrictToWs != nil { ctx = tools.WithRestrictToWorkspace(ctx, *l.restrictToWs) } if l.subagentsCfg != nil { ctx = tools.WithSubagentConfig(ctx, l.subagentsCfg) } // Pass the agent's model and provider so subagents inherit the correct combo. if l.model != "" { ctx = tools.WithParentModel(ctx, l.model) } if l.provider != nil { ctx = tools.WithParentProvider(ctx, l.provider.Name()) } if l.memoryCfg != nil { ctx = tools.WithMemoryConfig(ctx, l.memoryCfg) } if l.sandboxCfg != nil { ctx = tools.WithSandboxConfig(ctx, l.sandboxCfg) } if l.shellDenyGroups != nil { ctx = store.WithShellDenyGroups(ctx, l.shellDenyGroups) } // Workspace scope propagation (delegation origin → workspace tools). if req.WorkspaceChannel != "" { ctx = tools.WithWorkspaceChannel(ctx, req.WorkspaceChannel) } if req.WorkspaceChatID != "" { ctx = tools.WithWorkspaceChatID(ctx, req.WorkspaceChatID) } if req.TeamTaskID != "" { ctx = tools.WithTeamTaskID(ctx, req.TeamTaskID) } // Per-user workspace isolation. // Workspace path comes from user_agent_profiles (includes channel segment // for cross-channel isolation). Cached in userWorkspaces to avoid repeated DB queries. isTeamSession := bootstrap.IsTeamSession(req.SessionKey) if l.workspace != "" && req.UserID != "" { cachedWs, loaded := l.userWorkspaces.Load(req.UserID) if !loaded { // First request for this user: get/create profile → returns stored workspace. // Also seeds per-user context files on first chat. // Team-dispatched sessions skip seeding — members process tasks with full // capabilities, no bootstrap/user onboarding needed. ws := l.workspace if l.ensureUserFiles != nil && !isTeamSession { var err error ws, err = l.ensureUserFiles(ctx, l.agentUUID, req.UserID, l.agentType, l.workspace, req.Channel) if err != nil { slog.Warn("failed to ensure user context files", "error", err) ws = l.workspace } } // Expand ~ and convert to absolute for filesystem operations. ws = config.ExpandHome(ws) if !filepath.IsAbs(ws) { ws, _ = filepath.Abs(ws) } l.userWorkspaces.Store(req.UserID, ws) cachedWs = ws } effectiveWorkspace := cachedWs.(string) if !l.shouldShareWorkspace(req.UserID, req.PeerKind) { effectiveWorkspace = filepath.Join(effectiveWorkspace, sanitizePathSegment(req.UserID)) } if l.shouldShareMemory() { ctx = store.WithSharedMemory(ctx) } if err := os.MkdirAll(effectiveWorkspace, 0755); err != nil { slog.Warn("failed to create user workspace directory", "workspace", effectiveWorkspace, "user", req.UserID, "error", err) } ctx = tools.WithToolWorkspace(ctx, effectiveWorkspace) } else if l.workspace != "" { ctx = tools.WithToolWorkspace(ctx, l.workspace) } // Team workspace handling: // - Dispatched task (req.TeamWorkspace set): override default workspace so // relative paths resolve to team workspace. Agent workspace is accessible // via ToolTeamWorkspace for absolute-path access. // - Direct chat (auto-resolved): keep agent workspace as default, team // workspace accessible via absolute path. if req.TeamWorkspace != "" { if err := os.MkdirAll(req.TeamWorkspace, 0755); err != nil { slog.Warn("failed to create team workspace directory", "workspace", req.TeamWorkspace, "error", err) } ctx = tools.WithToolTeamWorkspace(ctx, req.TeamWorkspace) ctx = tools.WithToolWorkspace(ctx, req.TeamWorkspace) // default for relative paths } if req.TeamID != "" { ctx = tools.WithToolTeamID(ctx, req.TeamID) } // Auto-resolve team workspace for agents not dispatched via team task. // Lead agents default to team workspace (primary job is team coordination). // Non-lead members keep own workspace; team workspace is accessible via absolute path. // resolvedTeamSettings caches team settings from workspace resolution // to avoid re-querying when checking slow_tool notification config. var resolvedTeamSettings json.RawMessage if req.TeamWorkspace == "" && l.teamStore != nil && l.agentUUID != uuid.Nil { if team, _ := l.teamStore.GetTeamForAgent(ctx, l.agentUUID); team != nil { resolvedTeamSettings = team.Settings // Shared workspace: scope by teamID only. Isolated (default): scope by chatID too. wsChat := req.ChatID if wsChat == "" { wsChat = req.UserID } if tools.IsSharedWorkspace(team.Settings) { wsChat = "" } if wsDir, err := tools.WorkspaceDir(l.dataDir, team.ID, wsChat); err == nil { ctx = tools.WithToolTeamWorkspace(ctx, wsDir) if team.LeadAgentID == l.agentUUID { ctx = tools.WithToolWorkspace(ctx, wsDir) } } if req.TeamID == "" { ctx = tools.WithToolTeamID(ctx, team.ID.String()) } } } // Persist agent UUID + user ID on the session (for querying/tracing) if l.agentUUID != uuid.Nil || req.UserID != "" { l.sessions.SetAgentInfo(req.SessionKey, l.agentUUID, req.UserID) } // Security: scan user message for injection patterns. // Action is configurable: "log" (info), "warn" (default), "block" (reject message). if l.inputGuard != nil { if matches := l.inputGuard.Scan(req.Message); len(matches) > 0 { matchStr := strings.Join(matches, ",") switch l.injectionAction { case "block": slog.Warn("security.injection_blocked", "agent", l.id, "user", req.UserID, "patterns", matchStr, "message_len", len(req.Message), ) return nil, fmt.Errorf("message blocked: potential prompt injection detected (%s)", matchStr) case "log": slog.Info("security.injection_detected", "agent", l.id, "user", req.UserID, "patterns", matchStr, "message_len", len(req.Message), ) default: // "warn" slog.Warn("security.injection_detected", "agent", l.id, "user", req.UserID, "patterns", matchStr, "message_len", len(req.Message), ) } } } // Inject agent key into context for tool-level resolution (multiple agents share tool registry) ctx = tools.WithToolAgentKey(ctx, l.id) // Security: truncate oversized user messages gracefully (feed truncation notice into LLM) maxChars := l.maxMessageChars if maxChars <= 0 { maxChars = 32_000 // default ~8-10K tokens } if len(req.Message) > maxChars { originalLen := len(req.Message) req.Message = req.Message[:maxChars] + fmt.Sprintf("\n\n[System: Message was truncated from %d to %d characters due to size limit. "+ "Please ask the user to send shorter messages or use the read_file tool for large content.]", originalLen, maxChars) slog.Warn("security.message_truncated", "agent", l.id, "user", req.UserID, "original_len", originalLen, "truncated_to", maxChars, ) } // 0. Cache agent's context window on the session (first run only). // Enables scheduler's adaptive throttle to use the real value instead of hardcoded 200K. if l.sessions.GetContextWindow(req.SessionKey) <= 0 { l.sessions.SetContextWindow(req.SessionKey, l.contextWindow) } // 0b. Load adaptive tool timing from session metadata. toolTiming := ParseToolTiming(l.sessions.GetSessionMetadata(req.SessionKey)) // Resolve slow_tool notification config from already-loaded team settings (no extra DB query). slowToolEnabled := tools.ParseTeamNotifyConfig(resolvedTeamSettings).SlowTool // 1. Build messages from session history history := l.sessions.GetHistory(req.SessionKey) summary := l.sessions.GetSummary(req.SessionKey) // buildMessages resolves context files once and also detects BOOTSTRAP.md presence // (hadBootstrap) — no extra DB roundtrip needed for bootstrap detection. messages, hadBootstrap := l.buildMessages(ctx, history, summary, req.Message, req.ExtraSystemPrompt, req.SessionKey, req.Channel, req.ChannelType, req.PeerKind, req.UserID, req.HistoryLimit, req.SkillFilter, req.LightContext) // 1b. Determine image routing strategy. // If read_image tool has a dedicated vision provider, images are NOT attached inline // to the main LLM — the agent calls read_image tool instead. This avoids sending // images to providers that don't support vision or have strict content filters. deferToReadImageTool := l.hasReadImageProvider() if !deferToReadImageTool { // Inline mode: reload historical images directly into messages for main provider. l.reloadMediaForMessages(messages, maxMediaReloadMessages) } // 2. Process media: sanitize images, persist to media store. var mediaRefs []providers.MediaRef if len(req.Media) > 0 { mediaRefs = l.persistMedia(req.SessionKey, req.Media) // Load current-turn images from persisted refs. var imageFiles []bus.MediaFile for _, ref := range mediaRefs { if ref.Kind == "image" { if p, err := l.mediaStore.LoadPath(ref.ID); err == nil { imageFiles = append(imageFiles, bus.MediaFile{Path: p, MimeType: ref.MimeType}) } } } if images := loadImages(imageFiles); len(images) > 0 { if deferToReadImageTool { // Tool mode: store in context only — agent calls read_image tool. ctx = tools.WithMediaImages(ctx, images) slog.Info("vision: deferring to read_image tool", "count", len(images), "agent", l.id) } else { // Inline mode: attach to message + context. messages[len(messages)-1].Images = images ctx = tools.WithMediaImages(ctx, images) slog.Info("vision: attached images inline to main provider", "count", len(images), "agent", l.id) } } } // 2a. Tool mode: also load historical images into context for read_image tool. // Without this, read_image can only see current-turn images, not previous turns. if deferToReadImageTool && l.mediaStore != nil { ctx = l.loadHistoricalImagesForTool(ctx, mediaRefs, messages) } // 2b. Collect document MediaRefs (historical + current) for read_document tool. // Historical first, current last — so refs[len-1] is always the most recent file. var docRefs []providers.MediaRef for i := len(messages) - 1; i >= 0; i-- { for _, ref := range messages[i].MediaRefs { if ref.Kind == "document" { docRefs = append(docRefs, ref) } } } for _, ref := range mediaRefs { if ref.Kind == "document" { docRefs = append(docRefs, ref) } } if len(docRefs) > 0 { ctx = tools.WithMediaDocRefs(ctx, docRefs) // Enrich the last user message with persisted file paths so skills can access // documents via exec (e.g. pypdf). Only for current-turn refs (just persisted). l.enrichDocumentPaths(messages, mediaRefs) } // 2c. Collect audio MediaRefs (historical + current) for read_audio tool. var audioRefs []providers.MediaRef for i := len(messages) - 1; i >= 0; i-- { for _, ref := range messages[i].MediaRefs { if ref.Kind == "audio" { audioRefs = append(audioRefs, ref) } } } for _, ref := range mediaRefs { if ref.Kind == "audio" { audioRefs = append(audioRefs, ref) } } if len(audioRefs) > 0 { ctx = tools.WithMediaAudioRefs(ctx, audioRefs) // Embed media IDs into tags so LLM can reference them. l.enrichAudioIDs(messages, mediaRefs) } // 2d. Collect video MediaRefs (historical + current) for read_video tool. var videoRefs []providers.MediaRef for i := len(messages) - 1; i >= 0; i-- { for _, ref := range messages[i].MediaRefs { if ref.Kind == "video" { videoRefs = append(videoRefs, ref) } } } for _, ref := range mediaRefs { if ref.Kind == "video" { videoRefs = append(videoRefs, ref) } } if len(videoRefs) > 0 { ctx = tools.WithMediaVideoRefs(ctx, videoRefs) // Embed media IDs into tags so LLM can reference them. l.enrichVideoIDs(messages, mediaRefs) } // 2e. Enrich tags with persisted media IDs so the LLM // knows images were received and stored (consistent with audio/video enrichment). l.enrichImageIDs(messages, mediaRefs) // 2f. Collect all media file paths for team workspace auto-collect. // When the leader calls team_tasks(create), these paths are copied to the // team workspace so members can access attached files. if len(mediaRefs) > 0 && l.mediaStore != nil { var mediaPaths []string for _, ref := range mediaRefs { if p, err := l.mediaStore.LoadPath(ref.ID); err == nil { mediaPaths = append(mediaPaths, p) } } if len(mediaPaths) > 0 { ctx = tools.WithRunMediaPaths(ctx, mediaPaths) // Extract original filenames from tags // in the last user message (enriched in step 2b above). if lastMsg := messages[len(messages)-1]; lastMsg.Role == "user" { if nameMap := tools.ExtractMediaNameMap(lastMsg.Content); len(nameMap) > 0 { ctx = tools.WithRunMediaNames(ctx, nameMap) } } } } // 2g. Cross-session task reminder: notify team leads about pending and in-progress tasks. // Stale recovery (expired lock → pending) is handled by the background TaskTicker. if l.teamStore != nil && l.agentUUID != uuid.Nil { if team, _ := l.teamStore.GetTeamForAgent(ctx, l.agentUUID); team != nil && team.LeadAgentID == l.agentUUID { if tasks, err := l.teamStore.ListTasks(ctx, team.ID, "newest", "active", req.UserID, "", "", 0, 0); err == nil { var stale []string var inProgress []string for _, t := range tasks { if t.Status == store.TeamTaskStatusPending { age := time.Since(t.CreatedAt).Truncate(time.Minute) stale = append(stale, fmt.Sprintf("- %s: \"%s\" (pending %s)", t.ID, t.Subject, age)) } if t.Status == store.TeamTaskStatusInProgress { age := time.Since(t.UpdatedAt).Truncate(time.Minute) progressInfo := fmt.Sprintf("in progress %s", age) if t.ProgressPercent > 0 { if t.ProgressStep != "" { progressInfo = fmt.Sprintf("%d%% — %s, %s", t.ProgressPercent, t.ProgressStep, age) } else { progressInfo = fmt.Sprintf("%d%%, %s", t.ProgressPercent, age) } } inProgress = append(inProgress, fmt.Sprintf("- %s: \"%s\" (%s)", t.ID, t.Subject, progressInfo)) } } var parts []string if len(stale) > 0 { parts = append(parts, fmt.Sprintf( "You have %d pending team task(s) awaiting dispatch:\n%s\n"+ "These tasks will be auto-dispatched to available team members. If no longer needed, cancel with team_tasks action=cancel.", len(stale), strings.Join(stale, "\n"))) } if len(inProgress) > 0 { parts = append(parts, fmt.Sprintf( "You have %d in-progress team task(s) being handled by team members:\n%s\n"+ "Their results will arrive automatically. Do NOT cancel, re-create, or re-spawn these tasks.", len(inProgress), strings.Join(inProgress, "\n"))) } if len(parts) > 0 { reminder := "[System] " + strings.Join(parts, "\n\n") messages = append(messages, providers.Message{Role: "user", Content: reminder}, providers.Message{Role: "assistant", Content: "I see the task status. Let me handle accordingly."}, ) } } } } // 2g. Member task reminder: inject task context for members working on dispatched tasks. // Caches task subject/number for mid-loop progress nudge (avoids extra DB query). var memberTaskSubject string var memberTaskNumber int if req.TeamTaskID != "" && l.teamStore != nil { if taskUUID, err := uuid.Parse(req.TeamTaskID); err == nil { if task, err := l.teamStore.GetTask(ctx, taskUUID); err == nil && task != nil { memberTaskSubject = task.Subject memberTaskNumber = task.TaskNumber reminder := fmt.Sprintf( "[System] You are working on team task #%d: %q. "+ "Stay focused on this task. Your final response becomes the task result — make it clear and complete. "+ "For long tasks, report progress: team_tasks(action=\"progress\", percent=50, text=\"status\").", task.TaskNumber, task.Subject) messages = append(messages, providers.Message{Role: "user", Content: reminder}, providers.Message{Role: "assistant", Content: "Understood. I'll focus on this task and report progress."}, ) } } } // 3. Buffer new messages — write to session only AFTER the run completes. // This prevents concurrent runs from seeing each other's in-progress messages. // NOTE: pendingMsgs stores text + lightweight MediaRefs (not base64 images). var pendingMsgs []providers.Message if !req.HideInput { pendingMsgs = append(pendingMsgs, providers.Message{ Role: "user", Content: req.Message, MediaRefs: mediaRefs, }) } // 4. Run LLM iteration loop var loopDetector toolLoopState // detects repeated no-progress tool calls var totalUsage providers.Usage iteration := 0 totalToolCalls := 0 var finalContent string var finalThinking string var asyncToolCalls []string // track async spawn tool names for fallback var bootstrapWriteDetected bool // track if write_file was called during bootstrap var mediaResults []MediaResult // media files from tool MEDIA: results var deliverables []string // actual content from tool outputs (for team task results) var blockReplies int // count of block.reply events emitted (for dedup in consumer) var lastBlockReply string // last block reply content // Mid-loop compaction: summarize in-memory messages when context exceeds threshold. // Uses same config as maybeSummarize (contextWindow * historyShare). var midLoopCompacted bool // Team task orphan detection: track team_tasks create vs spawn calls. // If the LLM creates tasks but forgets to spawn, inject a reminder. var teamTaskCreates int // count of team_tasks action=create calls var teamTaskSpawns int // count of spawn calls with team_task_id // Skill evolution: budget pressure nudge state (sent at most once each per run). var skillNudge70Sent, skillNudge90Sent bool var skillPostscriptSent bool // Member progress nudge: remind dispatched members to report progress (every 6 iterations). // Inject retry hook so channels can update placeholder on LLM retries. ctx = providers.WithRetryHook(ctx, func(attempt, maxAttempts int, err error) { emitRun(AgentEvent{ Type: protocol.AgentEventRunRetrying, AgentID: l.id, RunID: req.RunID, Payload: map[string]string{ "attempt": fmt.Sprintf("%d", attempt), "maxAttempts": fmt.Sprintf("%d", maxAttempts), "error": err.Error(), }, }) }) maxIter := l.maxIterations if req.MaxIterations > 0 && req.MaxIterations < maxIter { maxIter = req.MaxIterations } // Budget check: query monthly spent once before starting iterations. if l.budgetMonthlyCents > 0 && l.tracingStore != nil && l.agentUUID != uuid.Nil { now := time.Now().UTC() spent, err := l.tracingStore.GetMonthlyAgentCost(ctx, l.agentUUID, now.Year(), now.Month()) if err == nil { spentCents := int(spent * 100) if spentCents >= l.budgetMonthlyCents { slog.Warn("agent budget exceeded", "agent", l.id, "spent_cents", spentCents, "budget_cents", l.budgetMonthlyCents) return nil, fmt.Errorf("monthly budget exceeded ($%.2f / $%.2f)", spent, float64(l.budgetMonthlyCents)/100) } } } for iteration < maxIter { iteration++ slog.Debug("agent iteration", "agent", l.id, "iteration", iteration, "messages", len(messages)) // Skill evolution: budget pressure nudges at 70% and 90% of iteration budget. // Ephemeral (in-memory only, not persisted to session) — LLM sees them during this run only. if l.skillEvolve && maxIter > 0 { locale := store.LocaleFromContext(ctx) iterPct := float64(iteration) / float64(maxIter) if iterPct >= 0.90 && !skillNudge90Sent { skillNudge90Sent = true messages = append(messages, providers.Message{ Role: "user", Content: i18n.T(locale, i18n.MsgSkillNudge90Pct), }) } else if iterPct >= 0.70 && !skillNudge70Sent { skillNudge70Sent = true messages = append(messages, providers.Message{ Role: "user", Content: i18n.T(locale, i18n.MsgSkillNudge70Pct), }) } } // Member progress nudge: remind to report progress every 6 iterations. // Suggests percent based on iteration ratio — model can adjust but has a baseline. if req.TeamTaskID != "" && memberTaskSubject != "" && iteration > 0 && iteration%6 == 0 { var nudge string if maxIter > 0 { suggestedPct := iteration * 100 / maxIter nudge = fmt.Sprintf( "[System] You are at iteration %d/%d (~%d%% of budget) working on task #%d: %q. "+ "Report your progress now: team_tasks(action=\"progress\", percent=%d, text=\"what you've accomplished so far\"). "+ "Adjust percent based on actual work completed.", iteration, maxIter, suggestedPct, memberTaskNumber, memberTaskSubject, suggestedPct) } else { nudge = fmt.Sprintf( "[System] You are at iteration %d working on task #%d: %q. "+ "Report your progress now: team_tasks(action=\"progress\", percent=50, text=\"what you've accomplished so far\"). "+ "Adjust percent based on actual work completed.", iteration, memberTaskNumber, memberTaskSubject) } messages = append(messages, providers.Message{Role: "user", Content: nudge}) } // Emit activity event: thinking phase emitRun(AgentEvent{ Type: protocol.AgentEventActivity, AgentID: l.id, RunID: req.RunID, Payload: map[string]any{"phase": "thinking", "iteration": iteration}, }) // Build provider request with policy-filtered tools var toolDefs []providers.ToolDefinition var allowedTools map[string]bool if l.toolPolicy != nil { toolDefs = l.toolPolicy.FilterTools(l.tools, l.id, l.provider.Name(), l.agentToolPolicy, req.ToolAllow, false, false) allowedTools = make(map[string]bool, len(toolDefs)) for _, td := range toolDefs { allowedTools[td.Function.Name] = true } } else { toolDefs = l.tools.ProviderDefs() } // Bootstrap mode: restrict API tool definitions to write_file only (open agents). // Predefined agents keep all tools — BOOTSTRAP.md guides behavior. if hadBootstrap && l.agentType != "predefined" { var bootstrapDefs []providers.ToolDefinition for _, td := range toolDefs { if bootstrapToolAllowlist[td.Function.Name] { bootstrapDefs = append(bootstrapDefs, td) } } toolDefs = bootstrapDefs } // Hide skill_manage from LLM when skill_evolve is off. // Tool stays in the registry (shared) but won't appear in API tool definitions. if !l.skillEvolve { filtered := toolDefs[:0:0] for _, td := range toolDefs { if td.Function.Name != "skill_manage" { filtered = append(filtered, td) } } toolDefs = filtered } // Use per-request model override if set (e.g. heartbeat uses cheaper model). model := l.model if req.ModelOverride != "" { model = req.ModelOverride } chatReq := providers.ChatRequest{ Messages: messages, Tools: toolDefs, Model: model, Options: map[string]any{ providers.OptMaxTokens: l.effectiveMaxTokens(), providers.OptTemperature: 0.7, providers.OptSessionKey: req.SessionKey, providers.OptAgentID: l.agentUUID.String(), providers.OptUserID: req.UserID, providers.OptChannel: req.Channel, providers.OptChatID: req.ChatID, providers.OptPeerKind: req.PeerKind, }, } if l.thinkingLevel != "" && l.thinkingLevel != "off" { if tc, ok := l.provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() { chatReq.Options[providers.OptThinkingLevel] = l.thinkingLevel } else { slog.Debug("thinking_level ignored: provider does not support thinking", "provider", l.provider.Name(), "level", l.thinkingLevel) } } // Call LLM (streaming or non-streaming) var resp *providers.ChatResponse var err error llmSpanStart := time.Now().UTC() llmSpanID := l.emitLLMSpanStart(ctx, llmSpanStart, iteration, messages) if req.Stream { resp, err = l.provider.ChatStream(ctx, chatReq, func(chunk providers.StreamChunk) { if chunk.Thinking != "" { emitRun(AgentEvent{ Type: protocol.ChatEventThinking, AgentID: l.id, RunID: req.RunID, Payload: map[string]string{"content": chunk.Thinking}, }) } if chunk.Content != "" { emitRun(AgentEvent{ Type: protocol.ChatEventChunk, AgentID: l.id, RunID: req.RunID, Payload: map[string]string{"content": chunk.Content}, }) } }) } else { resp, err = l.provider.Chat(ctx, chatReq) } if err != nil { l.emitLLMSpanEnd(ctx, llmSpanID, llmSpanStart, nil, err) return nil, fmt.Errorf("LLM call failed (iteration %d): %w", iteration, err) } l.emitLLMSpanEnd(ctx, llmSpanID, llmSpanStart, resp, nil) // For non-streaming responses, emit thinking and content as single events if !req.Stream { if resp.Thinking != "" { emitRun(AgentEvent{ Type: protocol.ChatEventThinking, AgentID: l.id, RunID: req.RunID, Payload: map[string]string{"content": resp.Thinking}, }) } if resp.Content != "" { emitRun(AgentEvent{ Type: protocol.ChatEventChunk, AgentID: l.id, RunID: req.RunID, Payload: map[string]string{"content": resp.Content}, }) } } if resp.Usage != nil { totalUsage.PromptTokens += resp.Usage.PromptTokens totalUsage.CompletionTokens += resp.Usage.CompletionTokens totalUsage.TotalTokens += resp.Usage.TotalTokens totalUsage.ThinkingTokens += resp.Usage.ThinkingTokens } // Mid-loop compaction: same threshold as maybeSummarize (contextWindow * historyShare) // but applied to in-memory messages during the run. Prevents context overflow for // long-running agents (e.g. delegated research tasks that accumulate many tool results). if !midLoopCompacted && l.contextWindow > 0 { historyShare := 0.75 if l.compactionCfg != nil && l.compactionCfg.MaxHistoryShare > 0 { historyShare = l.compactionCfg.MaxHistoryShare } threshold := int(float64(l.contextWindow) * historyShare) promptTokens := 0 if resp.Usage != nil && resp.Usage.PromptTokens > 0 { promptTokens = resp.Usage.PromptTokens } else { promptTokens = EstimateTokens(messages) } if promptTokens >= threshold { midLoopCompacted = true emitRun(AgentEvent{ Type: protocol.AgentEventActivity, AgentID: l.id, RunID: req.RunID, Payload: map[string]any{"phase": "compacting", "iteration": iteration}, }) if compacted := l.compactMessagesInPlace(ctx, messages); compacted != nil { messages = compacted } slog.Info("mid_loop_compaction", "agent", l.id, "prompt_tokens", promptTokens, "threshold", threshold, "context_window", l.contextWindow) } } // Output truncated (max_tokens hit). Tool call args are likely incomplete. // Inject a system hint so the model can retry with shorter output. if resp.FinishReason == "length" && len(resp.ToolCalls) > 0 { slog.Warn("output truncated (max_tokens), tool calls may have incomplete args", "agent", l.id, "iteration", iteration, "max_tokens", l.effectiveMaxTokens()) messages = append(messages, providers.Message{Role: "assistant", Content: resp.Content}, providers.Message{ Role: "user", Content: "[System] Your output was truncated because it exceeded max_tokens. Your tool call arguments were incomplete. Please retry with shorter content — split large writes into multiple smaller calls, or reduce the amount of text.", }, ) continue } // No tool calls → done if len(resp.ToolCalls) == 0 { // Mid-run injection (Point B): drain all buffered user follow-up messages // before exiting. If found, save current assistant response and continue // the loop so the LLM can respond to the injected messages. if forLLM, forSession := l.drainInjectChannel(req.InjectCh, emitRun); len(forLLM) > 0 { messages = append(messages, providers.Message{Role: "assistant", Content: resp.Content}) messages = append(messages, forLLM...) pendingMsgs = append(pendingMsgs, providers.Message{Role: "assistant", Content: resp.Content}) pendingMsgs = append(pendingMsgs, forSession...) continue } finalContent = resp.Content finalThinking = resp.Thinking break } // Build assistant message with tool calls assistantMsg := providers.Message{ Role: "assistant", Content: resp.Content, Thinking: resp.Thinking, // reasoning_content passback for thinking models (Kimi, DeepSeek) ToolCalls: resp.ToolCalls, Phase: resp.Phase, // preserve Codex phase metadata (gpt-5.3-codex) RawAssistantContent: resp.RawAssistantContent, // preserve thinking blocks for Anthropic passback } messages = append(messages, assistantMsg) pendingMsgs = append(pendingMsgs, assistantMsg) // Emit block.reply for intermediate assistant content during tool iterations. // Non-streaming channels (Zalo, Discord, WhatsApp) would otherwise lose this text. if resp.Content != "" { sanitized := SanitizeAssistantContent(resp.Content) if sanitized != "" && !IsSilentReply(sanitized) { blockReplies++ lastBlockReply = sanitized l.emit(AgentEvent{ Type: protocol.AgentEventBlockReply, AgentID: l.id, RunID: req.RunID, Payload: map[string]string{"content": sanitized}, }) } } // Track team_tasks create for orphan detection (argument-based, pre-execution). // Spawn counting is done post-execution so failed spawns don't get counted. for _, tc := range resp.ToolCalls { if tc.Name == "team_tasks" { if action, _ := tc.Arguments["action"].(string); action == "create" { teamTaskCreates++ } } } // Tool budget check: soft stop when total tool calls exceed the per-agent limit. // Same pattern as maxIterations — no error thrown, LLM summarizes and returns. totalToolCalls += len(resp.ToolCalls) if l.maxToolCalls > 0 && totalToolCalls > l.maxToolCalls { slog.Warn("security.tool_budget_exceeded", "agent", l.id, "total", totalToolCalls, "limit", l.maxToolCalls) messages = append(messages, providers.Message{ Role: "user", Content: fmt.Sprintf("[System] Tool call budget reached (%d/%d). Do NOT call any more tools. Summarize results so far and respond to the user.", totalToolCalls, l.maxToolCalls), }) continue // one more LLM call for summarization, then loop exits (no tool calls) } // Emit activity event: tool execution phase if len(resp.ToolCalls) > 0 { toolNames := make([]string, len(resp.ToolCalls)) for i, tc := range resp.ToolCalls { toolNames[i] = tc.Name } emitRun(AgentEvent{ Type: protocol.AgentEventActivity, AgentID: l.id, RunID: req.RunID, Payload: map[string]any{ "phase": "tool_exec", "tool": toolNames[0], "tools": toolNames, "iteration": iteration, }, }) } // Execute tool calls (parallel when multiple, sequential when single) if len(resp.ToolCalls) == 1 { // Single tool: sequential — no goroutine overhead tc := resp.ToolCalls[0] emitRun(AgentEvent{ Type: protocol.AgentEventToolCall, AgentID: l.id, RunID: req.RunID, Payload: map[string]any{"name": tc.Name, "id": tc.ID, "arguments": truncateToolArgs(tc.Arguments, 500)}, }) argsJSON, _ := json.Marshal(tc.Arguments) slog.Info("tool call", "agent", l.id, "tool", tc.Name, "args_len", len(argsJSON)) argsHash := loopDetector.record(tc.Name, tc.Arguments) toolSpanStart := time.Now().UTC() toolSpanID := l.emitToolSpanStart(ctx, toolSpanStart, tc.Name, tc.ID, string(argsJSON)) stopSlowTimer := toolTiming.StartSlowTimer(tc.Name, l.id, req.RunID, slowToolEnabled, emitRun) var result *tools.Result if allowedTools != nil && !allowedTools[tc.Name] { // Attempt lazy activation: deferred MCP tools can be activated on first call // so the LLM can call them by name directly without mcp_tool_search. if l.tools.TryActivateDeferred(tc.Name) { // Verify tool isn't explicitly denied by policy before allowing. if l.toolPolicy != nil && l.toolPolicy.IsDenied(tc.Name, l.agentToolPolicy) { slog.Warn("security.tool_policy_denied_lazy", "agent", l.id, "tool", tc.Name) result = tools.ErrorResult("tool not allowed by policy: " + tc.Name) } else { allowedTools[tc.Name] = true slog.Info("mcp.tool.lazy_activated", "agent", l.id, "tool", tc.Name) } } else { slog.Warn("security.tool_policy_blocked", "agent", l.id, "tool", tc.Name) result = tools.ErrorResult("tool not allowed by policy: " + tc.Name) } } if result == nil { result = l.tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, req.Channel, req.ChatID, req.PeerKind, req.SessionKey, nil) } stopSlowTimer() l.emitToolSpanEnd(ctx, toolSpanID, toolSpanStart, result) // Record tool execution time for adaptive thresholds. toolTiming.Record(tc.Name, time.Since(toolSpanStart).Milliseconds()) // Record result for loop detection. loopDetector.recordResult(argsHash, result.ForLLM) if result.Async { asyncToolCalls = append(asyncToolCalls, tc.Name) } if result.IsError { errMsg := result.ForLLM if len(errMsg) > 200 { errMsg = errMsg[:200] + "..." } slog.Warn("tool error", "agent", l.id, "tool", tc.Name, "error", errMsg) } // Count successful spawn calls for orphan detection (post-execution). if tc.Name == "spawn" && !result.IsError { if tid, _ := tc.Arguments["team_task_id"].(string); tid != "" { teamTaskSpawns++ } } if hadBootstrap && bootstrapToolAllowlist[tc.Name] { bootstrapWriteDetected = true } toolResultPayload := map[string]any{ "name": tc.Name, "id": tc.ID, "is_error": result.IsError, "arguments": tc.Arguments, "result": truncateStr(result.ForLLM, 1000), } if result.IsError && result.ForLLM != "" { toolResultPayload["content"] = result.ForLLM } emitRun(AgentEvent{ Type: protocol.AgentEventToolResult, AgentID: l.id, RunID: req.RunID, Payload: toolResultPayload, }) l.scanWebToolResult(tc.Name, result) // Collect MEDIA: paths from tool results. // Prefer result.Media (explicit) over ForLLM MEDIA: prefix (legacy) to avoid duplicates. if len(result.Media) > 0 { for _, mf := range result.Media { ct := mf.MimeType if ct == "" { ct = mimeFromExt(filepath.Ext(mf.Path)) } mediaResults = append(mediaResults, MediaResult{Path: mf.Path, ContentType: ct}) } } else if mr := parseMediaResult(result.ForLLM); mr != nil { mediaResults = append(mediaResults, *mr) } if result.Deliverable != "" { deliverables = append(deliverables, result.Deliverable) } toolMsg := providers.Message{ Role: "tool", Content: result.ForLLM, ToolCallID: tc.ID, } messages = append(messages, toolMsg) pendingMsgs = append(pendingMsgs, toolMsg) // Check for tool call loop after recording result. if level, msg := loopDetector.detect(tc.Name, argsHash); level != "" { if level == "critical" { slog.Warn("tool loop critical", "agent", l.id, "tool", tc.Name, "message", msg) finalContent = "I was unable to complete this task — I got stuck repeatedly calling " + tc.Name + " without making progress. Please try rephrasing your request." break } // Warning: inject message so model knows to change strategy. slog.Warn("tool loop warning", "agent", l.id, "tool", tc.Name, "message", msg) messages = append(messages, providers.Message{Role: "user", Content: msg}) } } else { // Multiple tools: parallel execution via goroutines. // Tool instances are immutable (context-based) so concurrent access is safe. // Results are collected then processed sequentially for deterministic ordering. type indexedResult struct { idx int tc providers.ToolCall result *tools.Result argsJSON string spanStart time.Time } // 1. Emit all tool.call events upfront (client sees all calls starting) for _, tc := range resp.ToolCalls { emitRun(AgentEvent{ Type: protocol.AgentEventToolCall, AgentID: l.id, RunID: req.RunID, Payload: map[string]any{"name": tc.Name, "id": tc.ID, "arguments": truncateToolArgs(tc.Arguments, 500)}, }) } // 2. Execute all tools in parallel resultCh := make(chan indexedResult, len(resp.ToolCalls)) var wg sync.WaitGroup for i, tc := range resp.ToolCalls { wg.Add(1) go func(idx int, tc providers.ToolCall) { defer wg.Done() argsJSON, _ := json.Marshal(tc.Arguments) slog.Info("tool call", "agent", l.id, "tool", tc.Name, "args_len", len(argsJSON), "parallel", true) spanStart := time.Now().UTC() // Emit running span inside goroutine — goroutine-safe (channel send only). // End is also emitted here to prevent orphans on ctx cancellation. spanID := l.emitToolSpanStart(ctx, spanStart, tc.Name, tc.ID, string(argsJSON)) stopSlowTimer := toolTiming.StartSlowTimer(tc.Name, l.id, req.RunID, slowToolEnabled, emitRun) var result *tools.Result if allowedTools != nil && !allowedTools[tc.Name] { // Attempt lazy activation for deferred MCP tools. // Note: don't write back to allowedTools — concurrent goroutines share // the map and writes would race. TryActivateDeferred is idempotent. if l.tools.TryActivateDeferred(tc.Name) { // Verify tool isn't explicitly denied by policy before allowing. if l.toolPolicy != nil && l.toolPolicy.IsDenied(tc.Name, l.agentToolPolicy) { slog.Warn("security.tool_policy_denied_lazy", "agent", l.id, "tool", tc.Name) result = tools.ErrorResult("tool not allowed by policy: " + tc.Name) } else { slog.Info("mcp.tool.lazy_activated", "agent", l.id, "tool", tc.Name) } } else { slog.Warn("security.tool_policy_blocked", "agent", l.id, "tool", tc.Name) result = tools.ErrorResult("tool not allowed by policy: " + tc.Name) } } if result == nil { result = l.tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, req.Channel, req.ChatID, req.PeerKind, req.SessionKey, nil) } stopSlowTimer() l.emitToolSpanEnd(ctx, spanID, spanStart, result) resultCh <- indexedResult{idx: idx, tc: tc, result: result, argsJSON: string(argsJSON), spanStart: spanStart} }(i, tc) } // Close channel after all goroutines complete (run in separate goroutine to avoid deadlock) go func() { wg.Wait(); close(resultCh) }() // 3. Collect results collected := make([]indexedResult, 0, len(resp.ToolCalls)) for r := range resultCh { collected = append(collected, r) } // 4. Sort by original index → deterministic message ordering sort.Slice(collected, func(i, j int) bool { return collected[i].idx < collected[j].idx }) // 5. Process results sequentially: emit events, append messages, save to session // Note: tool span start/end already emitted inside goroutines above. var loopStuck bool for _, r := range collected { // Record tool execution time for adaptive thresholds. toolTiming.Record(r.tc.Name, time.Since(r.spanStart).Milliseconds()) // Record for loop detection. argsHash := loopDetector.record(r.tc.Name, r.tc.Arguments) loopDetector.recordResult(argsHash, r.result.ForLLM) if r.result.Async { asyncToolCalls = append(asyncToolCalls, r.tc.Name) } if r.result.IsError { errMsg := r.result.ForLLM if len(errMsg) > 200 { errMsg = errMsg[:200] + "..." } slog.Warn("tool error", "agent", l.id, "tool", r.tc.Name, "error", errMsg) } // Count successful spawn calls for orphan detection (post-execution). if r.tc.Name == "spawn" && !r.result.IsError { if tid, _ := r.tc.Arguments["team_task_id"].(string); tid != "" { teamTaskSpawns++ } } if hadBootstrap && bootstrapToolAllowlist[r.tc.Name] { bootstrapWriteDetected = true } parToolResultPayload := map[string]any{ "name": r.tc.Name, "id": r.tc.ID, "is_error": r.result.IsError, "arguments": r.tc.Arguments, "result": truncateStr(r.result.ForLLM, 1000), } if r.result.IsError && r.result.ForLLM != "" { parToolResultPayload["content"] = r.result.ForLLM } emitRun(AgentEvent{ Type: protocol.AgentEventToolResult, AgentID: l.id, RunID: req.RunID, Payload: parToolResultPayload, }) l.scanWebToolResult(r.tc.Name, r.result) // Collect MEDIA: paths from tool results. // Prefer result.Media (explicit) over ForLLM MEDIA: prefix (legacy) to avoid duplicates. if len(r.result.Media) > 0 { for _, mf := range r.result.Media { ct := mf.MimeType if ct == "" { ct = mimeFromExt(filepath.Ext(mf.Path)) } mediaResults = append(mediaResults, MediaResult{Path: mf.Path, ContentType: ct}) } } else if mr := parseMediaResult(r.result.ForLLM); mr != nil { mediaResults = append(mediaResults, *mr) } if r.result.Deliverable != "" { deliverables = append(deliverables, r.result.Deliverable) } toolMsg := providers.Message{ Role: "tool", Content: r.result.ForLLM, ToolCallID: r.tc.ID, } messages = append(messages, toolMsg) pendingMsgs = append(pendingMsgs, toolMsg) // Check for tool call loop. if level, msg := loopDetector.detect(r.tc.Name, argsHash); level != "" { if level == "critical" { slog.Warn("tool loop critical", "agent", l.id, "tool", r.tc.Name, "message", msg) finalContent = "I was unable to complete this task — I got stuck repeatedly calling " + r.tc.Name + " without making progress. Please try rephrasing your request." loopStuck = true break } slog.Warn("tool loop warning", "agent", l.id, "tool", r.tc.Name, "message", msg) messages = append(messages, providers.Message{Role: "user", Content: msg}) } } if loopStuck { break } } // Mid-run injection (Point A): drain any user follow-up messages // that arrived during tool execution. Append them after tool results // so the next LLM call sees: [tool results...] + [user follow-ups...]. if forLLM, forSession := l.drainInjectChannel(req.InjectCh, emitRun); len(forLLM) > 0 { messages = append(messages, forLLM...) pendingMsgs = append(pendingMsgs, forSession...) } } // 4. Full sanitization pipeline (matching TS extractAssistantText + sanitizeUserFacingText) finalContent = SanitizeAssistantContent(finalContent) // 4b. Config leak detection — disabled: too many false positives // (e.g. agent explaining public architecture mentioning SOUL.md etc.) // finalContent = StripConfigLeak(finalContent, l.agentType) // 5. Handle NO_REPLY: save to session for context but mark as silent. // Matching TS: NO_REPLY is saved (via resolveSilentReplyFallbackText) but // filtered at the payload level before delivery. isSilent := IsSilentReply(finalContent) // 5b. Skill evolution: postscript suggestion after complex tasks. // Fires when skill_evolve=true AND the run involved enough tool calls to warrant a skill. // Appended to the agent's own final response so the user sees it inline and can explicitly // consent ("save as skill") before anything is created. No mid-loop injection, no async // goroutine, no session contamination — the next user turn naturally triggers skill creation. if l.skillEvolve && l.skillNudgeInterval > 0 && totalToolCalls >= l.skillNudgeInterval && finalContent != "" && !isSilent && !skillPostscriptSent { skillPostscriptSent = true locale := store.LocaleFromContext(ctx) finalContent += "\n\n---\n_" + i18n.T(locale, i18n.MsgSkillNudgePostscript) + "_" } // 6. Fallback for empty content if finalContent == "" { if len(asyncToolCalls) > 0 { finalContent = "..." } else { finalContent = "..." } } // Append content suffix (e.g. image markdown for WS) before saving to session. if req.ContentSuffix != "" && !strings.Contains(finalContent, req.ContentSuffix) { finalContent += req.ContentSuffix } pendingMsgs = append(pendingMsgs, providers.Message{ Role: "assistant", Content: finalContent, Thinking: finalThinking, }) // Bootstrap nudge: if model didn't call write_file on turn 2+, inject reminder // into session history so the next turn sees it. Appended to pendingMsgs so it's // flushed in the single Save below (avoids double I/O). // Note: the nudge counts as a "user" turn in history, which accelerates auto-cleanup // by one turn — acceptable since bootstrap should complete in 2-3 turns anyway. if hadBootstrap && l.bootstrapCleanup != nil { nudgeUserTurns := 1 for _, m := range history { if m.Role == "user" { nudgeUserTurns++ } } if !bootstrapWriteDetected && nudgeUserTurns >= 2 && nudgeUserTurns < bootstrapAutoCleanupTurns { pendingMsgs = append(pendingMsgs, providers.Message{ Role: "user", Content: "[System] You haven't completed onboarding yet. Please update USER.md with the user's details and clear BOOTSTRAP.md as instructed.", }) } } // Flush all buffered messages to session atomically. // This ensures concurrent runs never see each other's in-progress messages. for _, msg := range pendingMsgs { l.sessions.AddMessage(req.SessionKey, msg) } // Persist adaptive tool timing to session metadata. if serialized := toolTiming.Serialize(); serialized != "" { l.sessions.SetSessionMetadata(req.SessionKey, map[string]string{"tool_timing": serialized}) } // Write session metadata (matching TS session entry updates) l.sessions.UpdateMetadata(req.SessionKey, l.model, l.provider.Name(), req.Channel) l.sessions.AccumulateTokens(req.SessionKey, int64(totalUsage.PromptTokens), int64(totalUsage.CompletionTokens)) // Calibrate token estimation: store actual prompt tokens + message count. // Next time EstimateTokensWithCalibration() is called, it uses this as a base // instead of the chars/3 heuristic (more accurate for multilingual content). if totalUsage.PromptTokens > 0 { msgCount := len(history) + len(pendingMsgs) l.sessions.SetLastPromptTokens(req.SessionKey, totalUsage.PromptTokens, msgCount) } l.sessions.Save(req.SessionKey) // Bootstrap auto-cleanup: after enough conversation turns, remove BOOTSTRAP.md // as a safety net in case the LLM didn't clear it itself. // Bootstrap typically completes in 2-3 turns; we auto-cleanup after 3 user messages. // Uses pre-run history (already loaded) + 1 for current message — no extra DB call. if hadBootstrap && l.bootstrapCleanup != nil { userTurns := 1 // current user message for _, m := range history { if m.Role == "user" { userTurns++ } } if userTurns >= bootstrapAutoCleanupTurns { if cleanErr := l.bootstrapCleanup(ctx, l.agentUUID, req.UserID); cleanErr != nil { slog.Warn("bootstrap auto-cleanup failed", "error", cleanErr, "agent", l.id, "user", req.UserID) } else { slog.Info("bootstrap auto-cleanup completed", "agent", l.id, "user", req.UserID, "turns", userTurns) } } } // If silent, return empty content so gateway suppresses delivery. if isSilent { slog.Info("agent loop: NO_REPLY detected, suppressing delivery", "agent", l.id, "session", req.SessionKey) finalContent = "" } // 5. Maybe summarize l.maybeSummarize(ctx, req.SessionKey) // Include forwarded media from delegation results (not cleaned up like req.Media) for _, mf := range req.ForwardMedia { ct := mf.MimeType if ct == "" { ct = mimeFromExt(filepath.Ext(mf.Path)) } mediaResults = append(mediaResults, MediaResult{Path: mf.Path, ContentType: ct}) } // Deduplicate media by path — prevents the same image being sent twice // (e.g. once via ForwardMedia and again when the LLM reads the file). mediaResults = deduplicateMedia(mediaResults) return &RunResult{ Content: finalContent, RunID: req.RunID, Iterations: iteration, Usage: &totalUsage, Media: mediaResults, Deliverables: deliverables, BlockReplies: blockReplies, LastBlockReply: lastBlockReply, }, nil } // truncateToolArgs returns a copy of arguments with string values truncated to maxLen. func truncateToolArgs(args map[string]any, maxLen int) map[string]any { out := make(map[string]any, len(args)) for k, v := range args { if s, ok := v.(string); ok && len(s) > maxLen { out[k] = truncateStr(s, maxLen) } else { out[k] = v } } return out }