Files
goclaw/internal/agent/loop.go
T
viettranx 4e9f155a4c feat(agent): adaptive tool timing with slow tool notification
Track per-tool execution time statistics in session metadata. When a tool
call exceeds its adaptive threshold (2x historical max, min 120s default),
send a direct outbound notification to the user.

- ToolTimingMap: parse/serialize/record/threshold from session metadata
- StartSlowTimer: fires once per tool call, auto-cancels on completion
- Team config: slow_tool toggle (default on, always direct, never leader)
- UI: toggle in team settings with i18n (en/vi/zh)
- Store: add GetSessionMetadata to session store interface
2026-03-19 13:35:57 +07:00

1332 lines
50 KiB
Go

package agent
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"github.com/google/uuid"
"github.com/nextlevelbuilder/goclaw/internal/bootstrap"
"github.com/nextlevelbuilder/goclaw/internal/bus"
"github.com/nextlevelbuilder/goclaw/internal/i18n"
"github.com/nextlevelbuilder/goclaw/internal/config"
"github.com/nextlevelbuilder/goclaw/internal/providers"
"github.com/nextlevelbuilder/goclaw/internal/store"
"github.com/nextlevelbuilder/goclaw/internal/tools"
"github.com/nextlevelbuilder/goclaw/pkg/protocol"
)
func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error) {
// Per-run emit wrapper: enriches every AgentEvent with delegation + routing context.
emitRun := func(event AgentEvent) {
event.RunKind = req.RunKind
event.DelegationID = req.DelegationID
event.TeamID = req.TeamID
event.TeamTaskID = req.TeamTaskID
event.ParentAgentID = req.ParentAgentID
event.UserID = req.UserID
event.Channel = req.Channel
event.ChatID = req.ChatID
l.emit(event)
}
// Inject agent UUID into context for tool routing
if l.agentUUID != uuid.Nil {
ctx = store.WithAgentID(ctx, l.agentUUID)
}
// Inject user ID into context for per-user scoping (memory, context files, etc.)
if req.UserID != "" {
ctx = store.WithUserID(ctx, req.UserID)
}
// Inject agent type into context for interceptor routing
if l.agentType != "" {
ctx = store.WithAgentType(ctx, l.agentType)
}
// Inject self-evolve flag for predefined agents that can update SOUL.md
if l.selfEvolve {
ctx = store.WithSelfEvolve(ctx, true)
}
// Inject original sender ID for group file writer permission checks
if req.SenderID != "" {
ctx = store.WithSenderID(ctx, req.SenderID)
}
// Inject global builtin tool settings for media tools (provider chain)
if l.builtinToolSettings != nil {
ctx = tools.WithBuiltinToolSettings(ctx, l.builtinToolSettings)
}
// Inject channel type into context for tools (e.g. message tool needs it for Zalo group routing)
if req.ChannelType != "" {
ctx = tools.WithToolChannelType(ctx, req.ChannelType)
}
// Inject per-agent overrides from DB so tools honor per-agent settings.
if l.restrictToWs != nil {
ctx = tools.WithRestrictToWorkspace(ctx, *l.restrictToWs)
}
if l.subagentsCfg != nil {
ctx = tools.WithSubagentConfig(ctx, l.subagentsCfg)
}
// Pass the agent's model and provider so subagents inherit the correct combo.
if l.model != "" {
ctx = tools.WithParentModel(ctx, l.model)
}
if l.provider != nil {
ctx = tools.WithParentProvider(ctx, l.provider.Name())
}
if l.memoryCfg != nil {
ctx = tools.WithMemoryConfig(ctx, l.memoryCfg)
}
if l.sandboxCfg != nil {
ctx = tools.WithSandboxConfig(ctx, l.sandboxCfg)
}
if l.shellDenyGroups != nil {
ctx = store.WithShellDenyGroups(ctx, l.shellDenyGroups)
}
// Workspace scope propagation (delegation origin → workspace tools).
if req.WorkspaceChannel != "" {
ctx = tools.WithWorkspaceChannel(ctx, req.WorkspaceChannel)
}
if req.WorkspaceChatID != "" {
ctx = tools.WithWorkspaceChatID(ctx, req.WorkspaceChatID)
}
if req.TeamTaskID != "" {
ctx = tools.WithTeamTaskID(ctx, req.TeamTaskID)
}
// Per-user workspace isolation.
// Workspace path comes from user_agent_profiles (includes channel segment
// for cross-channel isolation). Cached in userWorkspaces to avoid repeated DB queries.
isTeamSession := bootstrap.IsTeamSession(req.SessionKey)
if l.workspace != "" && req.UserID != "" {
cachedWs, loaded := l.userWorkspaces.Load(req.UserID)
if !loaded {
// First request for this user: get/create profile → returns stored workspace.
// Also seeds per-user context files on first chat.
// Team-dispatched sessions skip seeding — members process tasks with full
// capabilities, no bootstrap/user onboarding needed.
ws := l.workspace
if l.ensureUserFiles != nil && !isTeamSession {
var err error
ws, err = l.ensureUserFiles(ctx, l.agentUUID, req.UserID, l.agentType, l.workspace, req.Channel)
if err != nil {
slog.Warn("failed to ensure user context files", "error", err)
ws = l.workspace
}
}
// Expand ~ and convert to absolute for filesystem operations.
ws = config.ExpandHome(ws)
if !filepath.IsAbs(ws) {
ws, _ = filepath.Abs(ws)
}
l.userWorkspaces.Store(req.UserID, ws)
cachedWs = ws
}
effectiveWorkspace := cachedWs.(string)
if !l.shouldShareWorkspace(req.UserID, req.PeerKind) {
effectiveWorkspace = filepath.Join(effectiveWorkspace, sanitizePathSegment(req.UserID))
}
if l.shouldShareMemory() {
ctx = store.WithSharedMemory(ctx)
}
if err := os.MkdirAll(effectiveWorkspace, 0755); err != nil {
slog.Warn("failed to create user workspace directory", "workspace", effectiveWorkspace, "user", req.UserID, "error", err)
}
ctx = tools.WithToolWorkspace(ctx, effectiveWorkspace)
} else if l.workspace != "" {
ctx = tools.WithToolWorkspace(ctx, l.workspace)
}
// Team workspace handling:
// - Dispatched task (req.TeamWorkspace set): override default workspace so
// relative paths resolve to team workspace. Agent workspace is accessible
// via ToolTeamWorkspace for absolute-path access.
// - Direct chat (auto-resolved): keep agent workspace as default, team
// workspace accessible via absolute path.
if req.TeamWorkspace != "" {
if err := os.MkdirAll(req.TeamWorkspace, 0755); err != nil {
slog.Warn("failed to create team workspace directory", "workspace", req.TeamWorkspace, "error", err)
}
ctx = tools.WithToolTeamWorkspace(ctx, req.TeamWorkspace)
ctx = tools.WithToolWorkspace(ctx, req.TeamWorkspace) // default for relative paths
}
if req.TeamID != "" {
ctx = tools.WithToolTeamID(ctx, req.TeamID)
}
// Auto-resolve team workspace for agents not dispatched via team task.
// Lead agents default to team workspace (primary job is team coordination).
// Non-lead members keep own workspace; team workspace is accessible via absolute path.
// resolvedTeamSettings caches team settings from workspace resolution
// to avoid re-querying when checking slow_tool notification config.
var resolvedTeamSettings json.RawMessage
if req.TeamWorkspace == "" && l.teamStore != nil && l.agentUUID != uuid.Nil {
if team, _ := l.teamStore.GetTeamForAgent(ctx, l.agentUUID); team != nil {
resolvedTeamSettings = team.Settings
// Shared workspace: scope by teamID only. Isolated (default): scope by chatID too.
wsChat := req.ChatID
if wsChat == "" {
wsChat = req.UserID
}
if tools.IsSharedWorkspace(team.Settings) {
wsChat = ""
}
if wsDir, err := tools.WorkspaceDir(l.dataDir, team.ID, wsChat); err == nil {
ctx = tools.WithToolTeamWorkspace(ctx, wsDir)
if team.LeadAgentID == l.agentUUID {
ctx = tools.WithToolWorkspace(ctx, wsDir)
}
}
if req.TeamID == "" {
ctx = tools.WithToolTeamID(ctx, team.ID.String())
}
}
}
// Persist agent UUID + user ID on the session (for querying/tracing)
if l.agentUUID != uuid.Nil || req.UserID != "" {
l.sessions.SetAgentInfo(req.SessionKey, l.agentUUID, req.UserID)
}
// Security: scan user message for injection patterns.
// Action is configurable: "log" (info), "warn" (default), "block" (reject message).
if l.inputGuard != nil {
if matches := l.inputGuard.Scan(req.Message); len(matches) > 0 {
matchStr := strings.Join(matches, ",")
switch l.injectionAction {
case "block":
slog.Warn("security.injection_blocked",
"agent", l.id, "user", req.UserID,
"patterns", matchStr, "message_len", len(req.Message),
)
return nil, fmt.Errorf("message blocked: potential prompt injection detected (%s)", matchStr)
case "log":
slog.Info("security.injection_detected",
"agent", l.id, "user", req.UserID,
"patterns", matchStr, "message_len", len(req.Message),
)
default: // "warn"
slog.Warn("security.injection_detected",
"agent", l.id, "user", req.UserID,
"patterns", matchStr, "message_len", len(req.Message),
)
}
}
}
// Inject agent key into context for tool-level resolution (multiple agents share tool registry)
ctx = tools.WithToolAgentKey(ctx, l.id)
// Security: truncate oversized user messages gracefully (feed truncation notice into LLM)
maxChars := l.maxMessageChars
if maxChars <= 0 {
maxChars = 32_000 // default ~8-10K tokens
}
if len(req.Message) > maxChars {
originalLen := len(req.Message)
req.Message = req.Message[:maxChars] +
fmt.Sprintf("\n\n[System: Message was truncated from %d to %d characters due to size limit. "+
"Please ask the user to send shorter messages or use the read_file tool for large content.]",
originalLen, maxChars)
slog.Warn("security.message_truncated",
"agent", l.id, "user", req.UserID,
"original_len", originalLen, "truncated_to", maxChars,
)
}
// 0. Cache agent's context window on the session (first run only).
// Enables scheduler's adaptive throttle to use the real value instead of hardcoded 200K.
if l.sessions.GetContextWindow(req.SessionKey) <= 0 {
l.sessions.SetContextWindow(req.SessionKey, l.contextWindow)
}
// 0b. Load adaptive tool timing from session metadata.
toolTiming := ParseToolTiming(l.sessions.GetSessionMetadata(req.SessionKey))
// Resolve slow_tool notification config from already-loaded team settings (no extra DB query).
slowToolEnabled := tools.ParseTeamNotifyConfig(resolvedTeamSettings).SlowTool
// 1. Build messages from session history
history := l.sessions.GetHistory(req.SessionKey)
summary := l.sessions.GetSummary(req.SessionKey)
// buildMessages resolves context files once and also detects BOOTSTRAP.md presence
// (hadBootstrap) — no extra DB roundtrip needed for bootstrap detection.
messages, hadBootstrap := l.buildMessages(ctx, history, summary, req.Message, req.ExtraSystemPrompt, req.SessionKey, req.Channel, req.ChannelType, req.PeerKind, req.UserID, req.HistoryLimit, req.SkillFilter, req.LightContext)
// 1b. Determine image routing strategy.
// If read_image tool has a dedicated vision provider, images are NOT attached inline
// to the main LLM — the agent calls read_image tool instead. This avoids sending
// images to providers that don't support vision or have strict content filters.
deferToReadImageTool := l.hasReadImageProvider()
if !deferToReadImageTool {
// Inline mode: reload historical images directly into messages for main provider.
l.reloadMediaForMessages(messages, maxMediaReloadMessages)
}
// 2. Process media: sanitize images, persist to media store.
var mediaRefs []providers.MediaRef
if len(req.Media) > 0 {
mediaRefs = l.persistMedia(req.SessionKey, req.Media)
// Load current-turn images from persisted refs.
var imageFiles []bus.MediaFile
for _, ref := range mediaRefs {
if ref.Kind == "image" {
if p, err := l.mediaStore.LoadPath(ref.ID); err == nil {
imageFiles = append(imageFiles, bus.MediaFile{Path: p, MimeType: ref.MimeType})
}
}
}
if images := loadImages(imageFiles); len(images) > 0 {
if deferToReadImageTool {
// Tool mode: store in context only — agent calls read_image tool.
ctx = tools.WithMediaImages(ctx, images)
slog.Info("vision: deferring to read_image tool", "count", len(images), "agent", l.id)
} else {
// Inline mode: attach to message + context.
messages[len(messages)-1].Images = images
ctx = tools.WithMediaImages(ctx, images)
slog.Info("vision: attached images inline to main provider", "count", len(images), "agent", l.id)
}
}
}
// 2a. Tool mode: also load historical images into context for read_image tool.
// Without this, read_image can only see current-turn images, not previous turns.
if deferToReadImageTool && l.mediaStore != nil {
ctx = l.loadHistoricalImagesForTool(ctx, mediaRefs, messages)
}
// 2b. Collect document MediaRefs (historical + current) for read_document tool.
// Historical first, current last — so refs[len-1] is always the most recent file.
var docRefs []providers.MediaRef
for i := len(messages) - 1; i >= 0; i-- {
for _, ref := range messages[i].MediaRefs {
if ref.Kind == "document" {
docRefs = append(docRefs, ref)
}
}
}
for _, ref := range mediaRefs {
if ref.Kind == "document" {
docRefs = append(docRefs, ref)
}
}
if len(docRefs) > 0 {
ctx = tools.WithMediaDocRefs(ctx, docRefs)
// Enrich the last user message with persisted file paths so skills can access
// documents via exec (e.g. pypdf). Only for current-turn refs (just persisted).
l.enrichDocumentPaths(messages, mediaRefs)
}
// 2c. Collect audio MediaRefs (historical + current) for read_audio tool.
var audioRefs []providers.MediaRef
for i := len(messages) - 1; i >= 0; i-- {
for _, ref := range messages[i].MediaRefs {
if ref.Kind == "audio" {
audioRefs = append(audioRefs, ref)
}
}
}
for _, ref := range mediaRefs {
if ref.Kind == "audio" {
audioRefs = append(audioRefs, ref)
}
}
if len(audioRefs) > 0 {
ctx = tools.WithMediaAudioRefs(ctx, audioRefs)
// Embed media IDs into <media:audio> tags so LLM can reference them.
l.enrichAudioIDs(messages, mediaRefs)
}
// 2d. Collect video MediaRefs (historical + current) for read_video tool.
var videoRefs []providers.MediaRef
for i := len(messages) - 1; i >= 0; i-- {
for _, ref := range messages[i].MediaRefs {
if ref.Kind == "video" {
videoRefs = append(videoRefs, ref)
}
}
}
for _, ref := range mediaRefs {
if ref.Kind == "video" {
videoRefs = append(videoRefs, ref)
}
}
if len(videoRefs) > 0 {
ctx = tools.WithMediaVideoRefs(ctx, videoRefs)
// Embed media IDs into <media:video> tags so LLM can reference them.
l.enrichVideoIDs(messages, mediaRefs)
}
// 2e. Enrich <media:image> tags with persisted media IDs so the LLM
// knows images were received and stored (consistent with audio/video enrichment).
l.enrichImageIDs(messages, mediaRefs)
// 2f. Collect all media file paths for team workspace auto-collect.
// When the leader calls team_tasks(create), these paths are copied to the
// team workspace so members can access attached files.
if len(mediaRefs) > 0 && l.mediaStore != nil {
var mediaPaths []string
for _, ref := range mediaRefs {
if p, err := l.mediaStore.LoadPath(ref.ID); err == nil {
mediaPaths = append(mediaPaths, p)
}
}
if len(mediaPaths) > 0 {
ctx = tools.WithRunMediaPaths(ctx, mediaPaths)
// Extract original filenames from <media:document name="X" path="Y"> tags
// in the last user message (enriched in step 2b above).
if lastMsg := messages[len(messages)-1]; lastMsg.Role == "user" {
if nameMap := tools.ExtractMediaNameMap(lastMsg.Content); len(nameMap) > 0 {
ctx = tools.WithRunMediaNames(ctx, nameMap)
}
}
}
}
// 2g. Cross-session task reminder: notify team leads about pending and in-progress tasks.
// Stale recovery (expired lock → pending) is handled by the background TaskTicker.
if l.teamStore != nil && l.agentUUID != uuid.Nil {
if team, _ := l.teamStore.GetTeamForAgent(ctx, l.agentUUID); team != nil && team.LeadAgentID == l.agentUUID {
if tasks, err := l.teamStore.ListTasks(ctx, team.ID, "newest", "active", req.UserID, "", "", 0, 0); err == nil {
var stale []string
var inProgress []string
for _, t := range tasks {
if t.Status == store.TeamTaskStatusPending {
age := time.Since(t.CreatedAt).Truncate(time.Minute)
stale = append(stale, fmt.Sprintf("- %s: \"%s\" (pending %s)", t.ID, t.Subject, age))
}
if t.Status == store.TeamTaskStatusInProgress {
age := time.Since(t.UpdatedAt).Truncate(time.Minute)
progressInfo := fmt.Sprintf("in progress %s", age)
if t.ProgressPercent > 0 {
if t.ProgressStep != "" {
progressInfo = fmt.Sprintf("%d%% — %s, %s", t.ProgressPercent, t.ProgressStep, age)
} else {
progressInfo = fmt.Sprintf("%d%%, %s", t.ProgressPercent, age)
}
}
inProgress = append(inProgress, fmt.Sprintf("- %s: \"%s\" (%s)", t.ID, t.Subject, progressInfo))
}
}
var parts []string
if len(stale) > 0 {
parts = append(parts, fmt.Sprintf(
"You have %d pending team task(s) awaiting dispatch:\n%s\n"+
"These tasks will be auto-dispatched to available team members. If no longer needed, cancel with team_tasks action=cancel.",
len(stale), strings.Join(stale, "\n")))
}
if len(inProgress) > 0 {
parts = append(parts, fmt.Sprintf(
"You have %d in-progress team task(s) being handled by team members:\n%s\n"+
"Their results will arrive automatically. Do NOT cancel, re-create, or re-spawn these tasks.",
len(inProgress), strings.Join(inProgress, "\n")))
}
if len(parts) > 0 {
reminder := "[System] " + strings.Join(parts, "\n\n")
messages = append(messages,
providers.Message{Role: "user", Content: reminder},
providers.Message{Role: "assistant", Content: "I see the task status. Let me handle accordingly."},
)
}
}
}
}
// 2g. Member task reminder: inject task context for members working on dispatched tasks.
// Caches task subject/number for mid-loop progress nudge (avoids extra DB query).
var memberTaskSubject string
var memberTaskNumber int
if req.TeamTaskID != "" && l.teamStore != nil {
if taskUUID, err := uuid.Parse(req.TeamTaskID); err == nil {
if task, err := l.teamStore.GetTask(ctx, taskUUID); err == nil && task != nil {
memberTaskSubject = task.Subject
memberTaskNumber = task.TaskNumber
reminder := fmt.Sprintf(
"[System] You are working on team task #%d: %q. "+
"Stay focused on this task. Your final response becomes the task result — make it clear and complete. "+
"For long tasks, report progress: team_tasks(action=\"progress\", percent=50, text=\"status\").",
task.TaskNumber, task.Subject)
messages = append(messages,
providers.Message{Role: "user", Content: reminder},
providers.Message{Role: "assistant", Content: "Understood. I'll focus on this task and report progress."},
)
}
}
}
// 3. Buffer new messages — write to session only AFTER the run completes.
// This prevents concurrent runs from seeing each other's in-progress messages.
// NOTE: pendingMsgs stores text + lightweight MediaRefs (not base64 images).
var pendingMsgs []providers.Message
if !req.HideInput {
pendingMsgs = append(pendingMsgs, providers.Message{
Role: "user",
Content: req.Message,
MediaRefs: mediaRefs,
})
}
// 4. Run LLM iteration loop
var loopDetector toolLoopState // detects repeated no-progress tool calls
var totalUsage providers.Usage
iteration := 0
totalToolCalls := 0
var finalContent string
var finalThinking string
var asyncToolCalls []string // track async spawn tool names for fallback
var bootstrapWriteDetected bool // track if write_file was called during bootstrap
var mediaResults []MediaResult // media files from tool MEDIA: results
var deliverables []string // actual content from tool outputs (for team task results)
var blockReplies int // count of block.reply events emitted (for dedup in consumer)
var lastBlockReply string // last block reply content
// Mid-loop compaction: summarize in-memory messages when context exceeds threshold.
// Uses same config as maybeSummarize (contextWindow * historyShare).
var midLoopCompacted bool
// Team task orphan detection: track team_tasks create vs spawn calls.
// If the LLM creates tasks but forgets to spawn, inject a reminder.
var teamTaskCreates int // count of team_tasks action=create calls
var teamTaskSpawns int // count of spawn calls with team_task_id
// Skill evolution: budget pressure nudge state (sent at most once each per run).
var skillNudge70Sent, skillNudge90Sent bool
var skillPostscriptSent bool
// Member progress nudge: remind dispatched members to report progress (every 6 iterations).
// Inject retry hook so channels can update placeholder on LLM retries.
ctx = providers.WithRetryHook(ctx, func(attempt, maxAttempts int, err error) {
emitRun(AgentEvent{
Type: protocol.AgentEventRunRetrying,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]string{
"attempt": fmt.Sprintf("%d", attempt),
"maxAttempts": fmt.Sprintf("%d", maxAttempts),
"error": err.Error(),
},
})
})
maxIter := l.maxIterations
if req.MaxIterations > 0 && req.MaxIterations < maxIter {
maxIter = req.MaxIterations
}
// Budget check: query monthly spent once before starting iterations.
if l.budgetMonthlyCents > 0 && l.tracingStore != nil && l.agentUUID != uuid.Nil {
now := time.Now().UTC()
spent, err := l.tracingStore.GetMonthlyAgentCost(ctx, l.agentUUID, now.Year(), now.Month())
if err == nil {
spentCents := int(spent * 100)
if spentCents >= l.budgetMonthlyCents {
slog.Warn("agent budget exceeded", "agent", l.id, "spent_cents", spentCents, "budget_cents", l.budgetMonthlyCents)
return nil, fmt.Errorf("monthly budget exceeded ($%.2f / $%.2f)", spent, float64(l.budgetMonthlyCents)/100)
}
}
}
for iteration < maxIter {
iteration++
slog.Debug("agent iteration", "agent", l.id, "iteration", iteration, "messages", len(messages))
// Skill evolution: budget pressure nudges at 70% and 90% of iteration budget.
// Ephemeral (in-memory only, not persisted to session) — LLM sees them during this run only.
if l.skillEvolve && maxIter > 0 {
locale := store.LocaleFromContext(ctx)
iterPct := float64(iteration) / float64(maxIter)
if iterPct >= 0.90 && !skillNudge90Sent {
skillNudge90Sent = true
messages = append(messages, providers.Message{
Role: "user",
Content: i18n.T(locale, i18n.MsgSkillNudge90Pct),
})
} else if iterPct >= 0.70 && !skillNudge70Sent {
skillNudge70Sent = true
messages = append(messages, providers.Message{
Role: "user",
Content: i18n.T(locale, i18n.MsgSkillNudge70Pct),
})
}
}
// Member progress nudge: remind to report progress every 6 iterations.
// Suggests percent based on iteration ratio — model can adjust but has a baseline.
if req.TeamTaskID != "" && memberTaskSubject != "" && iteration > 0 && iteration%6 == 0 {
var nudge string
if maxIter > 0 {
suggestedPct := iteration * 100 / maxIter
nudge = fmt.Sprintf(
"[System] You are at iteration %d/%d (~%d%% of budget) working on task #%d: %q. "+
"Report your progress now: team_tasks(action=\"progress\", percent=%d, text=\"what you've accomplished so far\"). "+
"Adjust percent based on actual work completed.",
iteration, maxIter, suggestedPct, memberTaskNumber, memberTaskSubject, suggestedPct)
} else {
nudge = fmt.Sprintf(
"[System] You are at iteration %d working on task #%d: %q. "+
"Report your progress now: team_tasks(action=\"progress\", percent=50, text=\"what you've accomplished so far\"). "+
"Adjust percent based on actual work completed.",
iteration, memberTaskNumber, memberTaskSubject)
}
messages = append(messages, providers.Message{Role: "user", Content: nudge})
}
// Emit activity event: thinking phase
emitRun(AgentEvent{
Type: protocol.AgentEventActivity,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]any{"phase": "thinking", "iteration": iteration},
})
// Build provider request with policy-filtered tools
var toolDefs []providers.ToolDefinition
var allowedTools map[string]bool
if l.toolPolicy != nil {
toolDefs = l.toolPolicy.FilterTools(l.tools, l.id, l.provider.Name(), l.agentToolPolicy, req.ToolAllow, false, false)
allowedTools = make(map[string]bool, len(toolDefs))
for _, td := range toolDefs {
allowedTools[td.Function.Name] = true
}
} else {
toolDefs = l.tools.ProviderDefs()
}
// Bootstrap mode: restrict API tool definitions to write_file only (open agents).
// Predefined agents keep all tools — BOOTSTRAP.md guides behavior.
if hadBootstrap && l.agentType != "predefined" {
var bootstrapDefs []providers.ToolDefinition
for _, td := range toolDefs {
if bootstrapToolAllowlist[td.Function.Name] {
bootstrapDefs = append(bootstrapDefs, td)
}
}
toolDefs = bootstrapDefs
}
// Hide skill_manage from LLM when skill_evolve is off.
// Tool stays in the registry (shared) but won't appear in API tool definitions.
if !l.skillEvolve {
filtered := toolDefs[:0:0]
for _, td := range toolDefs {
if td.Function.Name != "skill_manage" {
filtered = append(filtered, td)
}
}
toolDefs = filtered
}
// Use per-request model override if set (e.g. heartbeat uses cheaper model).
model := l.model
if req.ModelOverride != "" {
model = req.ModelOverride
}
chatReq := providers.ChatRequest{
Messages: messages,
Tools: toolDefs,
Model: model,
Options: map[string]any{
providers.OptMaxTokens: l.effectiveMaxTokens(),
providers.OptTemperature: 0.7,
providers.OptSessionKey: req.SessionKey,
providers.OptAgentID: l.agentUUID.String(),
providers.OptUserID: req.UserID,
providers.OptChannel: req.Channel,
providers.OptChatID: req.ChatID,
providers.OptPeerKind: req.PeerKind,
},
}
if l.thinkingLevel != "" && l.thinkingLevel != "off" {
if tc, ok := l.provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() {
chatReq.Options[providers.OptThinkingLevel] = l.thinkingLevel
} else {
slog.Debug("thinking_level ignored: provider does not support thinking",
"provider", l.provider.Name(), "level", l.thinkingLevel)
}
}
// Call LLM (streaming or non-streaming)
var resp *providers.ChatResponse
var err error
llmSpanStart := time.Now().UTC()
llmSpanID := l.emitLLMSpanStart(ctx, llmSpanStart, iteration, messages)
if req.Stream {
resp, err = l.provider.ChatStream(ctx, chatReq, func(chunk providers.StreamChunk) {
if chunk.Thinking != "" {
emitRun(AgentEvent{
Type: protocol.ChatEventThinking,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]string{"content": chunk.Thinking},
})
}
if chunk.Content != "" {
emitRun(AgentEvent{
Type: protocol.ChatEventChunk,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]string{"content": chunk.Content},
})
}
})
} else {
resp, err = l.provider.Chat(ctx, chatReq)
}
if err != nil {
l.emitLLMSpanEnd(ctx, llmSpanID, llmSpanStart, nil, err)
return nil, fmt.Errorf("LLM call failed (iteration %d): %w", iteration, err)
}
l.emitLLMSpanEnd(ctx, llmSpanID, llmSpanStart, resp, nil)
// For non-streaming responses, emit thinking and content as single events
if !req.Stream {
if resp.Thinking != "" {
emitRun(AgentEvent{
Type: protocol.ChatEventThinking,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]string{"content": resp.Thinking},
})
}
if resp.Content != "" {
emitRun(AgentEvent{
Type: protocol.ChatEventChunk,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]string{"content": resp.Content},
})
}
}
if resp.Usage != nil {
totalUsage.PromptTokens += resp.Usage.PromptTokens
totalUsage.CompletionTokens += resp.Usage.CompletionTokens
totalUsage.TotalTokens += resp.Usage.TotalTokens
totalUsage.ThinkingTokens += resp.Usage.ThinkingTokens
}
// Mid-loop compaction: same threshold as maybeSummarize (contextWindow * historyShare)
// but applied to in-memory messages during the run. Prevents context overflow for
// long-running agents (e.g. delegated research tasks that accumulate many tool results).
if !midLoopCompacted && l.contextWindow > 0 {
historyShare := 0.75
if l.compactionCfg != nil && l.compactionCfg.MaxHistoryShare > 0 {
historyShare = l.compactionCfg.MaxHistoryShare
}
threshold := int(float64(l.contextWindow) * historyShare)
promptTokens := 0
if resp.Usage != nil && resp.Usage.PromptTokens > 0 {
promptTokens = resp.Usage.PromptTokens
} else {
promptTokens = EstimateTokens(messages)
}
if promptTokens >= threshold {
midLoopCompacted = true
emitRun(AgentEvent{
Type: protocol.AgentEventActivity,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]any{"phase": "compacting", "iteration": iteration},
})
if compacted := l.compactMessagesInPlace(ctx, messages); compacted != nil {
messages = compacted
}
slog.Info("mid_loop_compaction",
"agent", l.id,
"prompt_tokens", promptTokens,
"threshold", threshold,
"context_window", l.contextWindow)
}
}
// Output truncated (max_tokens hit). Tool call args are likely incomplete.
// Inject a system hint so the model can retry with shorter output.
if resp.FinishReason == "length" && len(resp.ToolCalls) > 0 {
slog.Warn("output truncated (max_tokens), tool calls may have incomplete args",
"agent", l.id, "iteration", iteration, "max_tokens", l.effectiveMaxTokens())
messages = append(messages,
providers.Message{Role: "assistant", Content: resp.Content},
providers.Message{
Role: "user",
Content: "[System] Your output was truncated because it exceeded max_tokens. Your tool call arguments were incomplete. Please retry with shorter content — split large writes into multiple smaller calls, or reduce the amount of text.",
},
)
continue
}
// No tool calls → done
if len(resp.ToolCalls) == 0 {
// Mid-run injection (Point B): drain all buffered user follow-up messages
// before exiting. If found, save current assistant response and continue
// the loop so the LLM can respond to the injected messages.
if forLLM, forSession := l.drainInjectChannel(req.InjectCh, emitRun); len(forLLM) > 0 {
messages = append(messages, providers.Message{Role: "assistant", Content: resp.Content})
messages = append(messages, forLLM...)
pendingMsgs = append(pendingMsgs, providers.Message{Role: "assistant", Content: resp.Content})
pendingMsgs = append(pendingMsgs, forSession...)
continue
}
finalContent = resp.Content
finalThinking = resp.Thinking
break
}
// Build assistant message with tool calls
assistantMsg := providers.Message{
Role: "assistant",
Content: resp.Content,
Thinking: resp.Thinking, // reasoning_content passback for thinking models (Kimi, DeepSeek)
ToolCalls: resp.ToolCalls,
Phase: resp.Phase, // preserve Codex phase metadata (gpt-5.3-codex)
RawAssistantContent: resp.RawAssistantContent, // preserve thinking blocks for Anthropic passback
}
messages = append(messages, assistantMsg)
pendingMsgs = append(pendingMsgs, assistantMsg)
// Emit block.reply for intermediate assistant content during tool iterations.
// Non-streaming channels (Zalo, Discord, WhatsApp) would otherwise lose this text.
if resp.Content != "" {
sanitized := SanitizeAssistantContent(resp.Content)
if sanitized != "" && !IsSilentReply(sanitized) {
blockReplies++
lastBlockReply = sanitized
l.emit(AgentEvent{
Type: protocol.AgentEventBlockReply,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]string{"content": sanitized},
})
}
}
// Track team_tasks create for orphan detection (argument-based, pre-execution).
// Spawn counting is done post-execution so failed spawns don't get counted.
for _, tc := range resp.ToolCalls {
if tc.Name == "team_tasks" {
if action, _ := tc.Arguments["action"].(string); action == "create" {
teamTaskCreates++
}
}
}
// Tool budget check: soft stop when total tool calls exceed the per-agent limit.
// Same pattern as maxIterations — no error thrown, LLM summarizes and returns.
totalToolCalls += len(resp.ToolCalls)
if l.maxToolCalls > 0 && totalToolCalls > l.maxToolCalls {
slog.Warn("security.tool_budget_exceeded",
"agent", l.id, "total", totalToolCalls, "limit", l.maxToolCalls)
messages = append(messages, providers.Message{
Role: "user",
Content: fmt.Sprintf("[System] Tool call budget reached (%d/%d). Do NOT call any more tools. Summarize results so far and respond to the user.", totalToolCalls, l.maxToolCalls),
})
continue // one more LLM call for summarization, then loop exits (no tool calls)
}
// Emit activity event: tool execution phase
if len(resp.ToolCalls) > 0 {
toolNames := make([]string, len(resp.ToolCalls))
for i, tc := range resp.ToolCalls {
toolNames[i] = tc.Name
}
emitRun(AgentEvent{
Type: protocol.AgentEventActivity,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]any{
"phase": "tool_exec",
"tool": toolNames[0],
"tools": toolNames,
"iteration": iteration,
},
})
}
// Execute tool calls (parallel when multiple, sequential when single)
if len(resp.ToolCalls) == 1 {
// Single tool: sequential — no goroutine overhead
tc := resp.ToolCalls[0]
emitRun(AgentEvent{
Type: protocol.AgentEventToolCall,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]any{"name": tc.Name, "id": tc.ID, "arguments": truncateToolArgs(tc.Arguments, 500)},
})
argsJSON, _ := json.Marshal(tc.Arguments)
slog.Info("tool call", "agent", l.id, "tool", tc.Name, "args_len", len(argsJSON))
argsHash := loopDetector.record(tc.Name, tc.Arguments)
toolSpanStart := time.Now().UTC()
toolSpanID := l.emitToolSpanStart(ctx, toolSpanStart, tc.Name, tc.ID, string(argsJSON))
stopSlowTimer := toolTiming.StartSlowTimer(tc.Name, l.id, req.RunID, slowToolEnabled, emitRun)
var result *tools.Result
if allowedTools != nil && !allowedTools[tc.Name] {
// Attempt lazy activation: deferred MCP tools can be activated on first call
// so the LLM can call them by name directly without mcp_tool_search.
if l.tools.TryActivateDeferred(tc.Name) {
// Verify tool isn't explicitly denied by policy before allowing.
if l.toolPolicy != nil && l.toolPolicy.IsDenied(tc.Name, l.agentToolPolicy) {
slog.Warn("security.tool_policy_denied_lazy", "agent", l.id, "tool", tc.Name)
result = tools.ErrorResult("tool not allowed by policy: " + tc.Name)
} else {
allowedTools[tc.Name] = true
slog.Info("mcp.tool.lazy_activated", "agent", l.id, "tool", tc.Name)
}
} else {
slog.Warn("security.tool_policy_blocked", "agent", l.id, "tool", tc.Name)
result = tools.ErrorResult("tool not allowed by policy: " + tc.Name)
}
}
if result == nil {
result = l.tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, req.Channel, req.ChatID, req.PeerKind, req.SessionKey, nil)
}
stopSlowTimer()
l.emitToolSpanEnd(ctx, toolSpanID, toolSpanStart, result)
// Record tool execution time for adaptive thresholds.
toolTiming.Record(tc.Name, time.Since(toolSpanStart).Milliseconds())
// Record result for loop detection.
loopDetector.recordResult(argsHash, result.ForLLM)
if result.Async {
asyncToolCalls = append(asyncToolCalls, tc.Name)
}
if result.IsError {
errMsg := result.ForLLM
if len(errMsg) > 200 {
errMsg = errMsg[:200] + "..."
}
slog.Warn("tool error", "agent", l.id, "tool", tc.Name, "error", errMsg)
}
// Count successful spawn calls for orphan detection (post-execution).
if tc.Name == "spawn" && !result.IsError {
if tid, _ := tc.Arguments["team_task_id"].(string); tid != "" {
teamTaskSpawns++
}
}
if hadBootstrap && bootstrapToolAllowlist[tc.Name] {
bootstrapWriteDetected = true
}
toolResultPayload := map[string]any{
"name": tc.Name,
"id": tc.ID,
"is_error": result.IsError,
"arguments": tc.Arguments,
"result": truncateStr(result.ForLLM, 1000),
}
if result.IsError && result.ForLLM != "" {
toolResultPayload["content"] = result.ForLLM
}
emitRun(AgentEvent{
Type: protocol.AgentEventToolResult,
AgentID: l.id,
RunID: req.RunID,
Payload: toolResultPayload,
})
l.scanWebToolResult(tc.Name, result)
// Collect MEDIA: paths from tool results.
// Prefer result.Media (explicit) over ForLLM MEDIA: prefix (legacy) to avoid duplicates.
if len(result.Media) > 0 {
for _, mf := range result.Media {
ct := mf.MimeType
if ct == "" {
ct = mimeFromExt(filepath.Ext(mf.Path))
}
mediaResults = append(mediaResults, MediaResult{Path: mf.Path, ContentType: ct})
}
} else if mr := parseMediaResult(result.ForLLM); mr != nil {
mediaResults = append(mediaResults, *mr)
}
if result.Deliverable != "" {
deliverables = append(deliverables, result.Deliverable)
}
toolMsg := providers.Message{
Role: "tool",
Content: result.ForLLM,
ToolCallID: tc.ID,
}
messages = append(messages, toolMsg)
pendingMsgs = append(pendingMsgs, toolMsg)
// Check for tool call loop after recording result.
if level, msg := loopDetector.detect(tc.Name, argsHash); level != "" {
if level == "critical" {
slog.Warn("tool loop critical", "agent", l.id, "tool", tc.Name, "message", msg)
finalContent = "I was unable to complete this task — I got stuck repeatedly calling " + tc.Name + " without making progress. Please try rephrasing your request."
break
}
// Warning: inject message so model knows to change strategy.
slog.Warn("tool loop warning", "agent", l.id, "tool", tc.Name, "message", msg)
messages = append(messages, providers.Message{Role: "user", Content: msg})
}
} else {
// Multiple tools: parallel execution via goroutines.
// Tool instances are immutable (context-based) so concurrent access is safe.
// Results are collected then processed sequentially for deterministic ordering.
type indexedResult struct {
idx int
tc providers.ToolCall
result *tools.Result
argsJSON string
spanStart time.Time
}
// 1. Emit all tool.call events upfront (client sees all calls starting)
for _, tc := range resp.ToolCalls {
emitRun(AgentEvent{
Type: protocol.AgentEventToolCall,
AgentID: l.id,
RunID: req.RunID,
Payload: map[string]any{"name": tc.Name, "id": tc.ID, "arguments": truncateToolArgs(tc.Arguments, 500)},
})
}
// 2. Execute all tools in parallel
resultCh := make(chan indexedResult, len(resp.ToolCalls))
var wg sync.WaitGroup
for i, tc := range resp.ToolCalls {
wg.Add(1)
go func(idx int, tc providers.ToolCall) {
defer wg.Done()
argsJSON, _ := json.Marshal(tc.Arguments)
slog.Info("tool call", "agent", l.id, "tool", tc.Name, "args_len", len(argsJSON), "parallel", true)
spanStart := time.Now().UTC()
// Emit running span inside goroutine — goroutine-safe (channel send only).
// End is also emitted here to prevent orphans on ctx cancellation.
spanID := l.emitToolSpanStart(ctx, spanStart, tc.Name, tc.ID, string(argsJSON))
stopSlowTimer := toolTiming.StartSlowTimer(tc.Name, l.id, req.RunID, slowToolEnabled, emitRun)
var result *tools.Result
if allowedTools != nil && !allowedTools[tc.Name] {
// Attempt lazy activation for deferred MCP tools.
// Note: don't write back to allowedTools — concurrent goroutines share
// the map and writes would race. TryActivateDeferred is idempotent.
if l.tools.TryActivateDeferred(tc.Name) {
// Verify tool isn't explicitly denied by policy before allowing.
if l.toolPolicy != nil && l.toolPolicy.IsDenied(tc.Name, l.agentToolPolicy) {
slog.Warn("security.tool_policy_denied_lazy", "agent", l.id, "tool", tc.Name)
result = tools.ErrorResult("tool not allowed by policy: " + tc.Name)
} else {
slog.Info("mcp.tool.lazy_activated", "agent", l.id, "tool", tc.Name)
}
} else {
slog.Warn("security.tool_policy_blocked", "agent", l.id, "tool", tc.Name)
result = tools.ErrorResult("tool not allowed by policy: " + tc.Name)
}
}
if result == nil {
result = l.tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, req.Channel, req.ChatID, req.PeerKind, req.SessionKey, nil)
}
stopSlowTimer()
l.emitToolSpanEnd(ctx, spanID, spanStart, result)
resultCh <- indexedResult{idx: idx, tc: tc, result: result, argsJSON: string(argsJSON), spanStart: spanStart}
}(i, tc)
}
// Close channel after all goroutines complete (run in separate goroutine to avoid deadlock)
go func() { wg.Wait(); close(resultCh) }()
// 3. Collect results
collected := make([]indexedResult, 0, len(resp.ToolCalls))
for r := range resultCh {
collected = append(collected, r)
}
// 4. Sort by original index → deterministic message ordering
sort.Slice(collected, func(i, j int) bool {
return collected[i].idx < collected[j].idx
})
// 5. Process results sequentially: emit events, append messages, save to session
// Note: tool span start/end already emitted inside goroutines above.
var loopStuck bool
for _, r := range collected {
// Record tool execution time for adaptive thresholds.
toolTiming.Record(r.tc.Name, time.Since(r.spanStart).Milliseconds())
// Record for loop detection.
argsHash := loopDetector.record(r.tc.Name, r.tc.Arguments)
loopDetector.recordResult(argsHash, r.result.ForLLM)
if r.result.Async {
asyncToolCalls = append(asyncToolCalls, r.tc.Name)
}
if r.result.IsError {
errMsg := r.result.ForLLM
if len(errMsg) > 200 {
errMsg = errMsg[:200] + "..."
}
slog.Warn("tool error", "agent", l.id, "tool", r.tc.Name, "error", errMsg)
}
// Count successful spawn calls for orphan detection (post-execution).
if r.tc.Name == "spawn" && !r.result.IsError {
if tid, _ := r.tc.Arguments["team_task_id"].(string); tid != "" {
teamTaskSpawns++
}
}
if hadBootstrap && bootstrapToolAllowlist[r.tc.Name] {
bootstrapWriteDetected = true
}
parToolResultPayload := map[string]any{
"name": r.tc.Name,
"id": r.tc.ID,
"is_error": r.result.IsError,
"arguments": r.tc.Arguments,
"result": truncateStr(r.result.ForLLM, 1000),
}
if r.result.IsError && r.result.ForLLM != "" {
parToolResultPayload["content"] = r.result.ForLLM
}
emitRun(AgentEvent{
Type: protocol.AgentEventToolResult,
AgentID: l.id,
RunID: req.RunID,
Payload: parToolResultPayload,
})
l.scanWebToolResult(r.tc.Name, r.result)
// Collect MEDIA: paths from tool results.
// Prefer result.Media (explicit) over ForLLM MEDIA: prefix (legacy) to avoid duplicates.
if len(r.result.Media) > 0 {
for _, mf := range r.result.Media {
ct := mf.MimeType
if ct == "" {
ct = mimeFromExt(filepath.Ext(mf.Path))
}
mediaResults = append(mediaResults, MediaResult{Path: mf.Path, ContentType: ct})
}
} else if mr := parseMediaResult(r.result.ForLLM); mr != nil {
mediaResults = append(mediaResults, *mr)
}
if r.result.Deliverable != "" {
deliverables = append(deliverables, r.result.Deliverable)
}
toolMsg := providers.Message{
Role: "tool",
Content: r.result.ForLLM,
ToolCallID: r.tc.ID,
}
messages = append(messages, toolMsg)
pendingMsgs = append(pendingMsgs, toolMsg)
// Check for tool call loop.
if level, msg := loopDetector.detect(r.tc.Name, argsHash); level != "" {
if level == "critical" {
slog.Warn("tool loop critical", "agent", l.id, "tool", r.tc.Name, "message", msg)
finalContent = "I was unable to complete this task — I got stuck repeatedly calling " + r.tc.Name + " without making progress. Please try rephrasing your request."
loopStuck = true
break
}
slog.Warn("tool loop warning", "agent", l.id, "tool", r.tc.Name, "message", msg)
messages = append(messages, providers.Message{Role: "user", Content: msg})
}
}
if loopStuck {
break
}
}
// Mid-run injection (Point A): drain any user follow-up messages
// that arrived during tool execution. Append them after tool results
// so the next LLM call sees: [tool results...] + [user follow-ups...].
if forLLM, forSession := l.drainInjectChannel(req.InjectCh, emitRun); len(forLLM) > 0 {
messages = append(messages, forLLM...)
pendingMsgs = append(pendingMsgs, forSession...)
}
}
// 4. Full sanitization pipeline (matching TS extractAssistantText + sanitizeUserFacingText)
finalContent = SanitizeAssistantContent(finalContent)
// 4b. Config leak detection — disabled: too many false positives
// (e.g. agent explaining public architecture mentioning SOUL.md etc.)
// finalContent = StripConfigLeak(finalContent, l.agentType)
// 5. Handle NO_REPLY: save to session for context but mark as silent.
// Matching TS: NO_REPLY is saved (via resolveSilentReplyFallbackText) but
// filtered at the payload level before delivery.
isSilent := IsSilentReply(finalContent)
// 5b. Skill evolution: postscript suggestion after complex tasks.
// Fires when skill_evolve=true AND the run involved enough tool calls to warrant a skill.
// Appended to the agent's own final response so the user sees it inline and can explicitly
// consent ("save as skill") before anything is created. No mid-loop injection, no async
// goroutine, no session contamination — the next user turn naturally triggers skill creation.
if l.skillEvolve && l.skillNudgeInterval > 0 &&
totalToolCalls >= l.skillNudgeInterval &&
finalContent != "" && !isSilent && !skillPostscriptSent {
skillPostscriptSent = true
locale := store.LocaleFromContext(ctx)
finalContent += "\n\n---\n_" + i18n.T(locale, i18n.MsgSkillNudgePostscript) + "_"
}
// 6. Fallback for empty content
if finalContent == "" {
if len(asyncToolCalls) > 0 {
finalContent = "..."
} else {
finalContent = "..."
}
}
// Append content suffix (e.g. image markdown for WS) before saving to session.
if req.ContentSuffix != "" && !strings.Contains(finalContent, req.ContentSuffix) {
finalContent += req.ContentSuffix
}
pendingMsgs = append(pendingMsgs, providers.Message{
Role: "assistant",
Content: finalContent,
Thinking: finalThinking,
})
// Bootstrap nudge: if model didn't call write_file on turn 2+, inject reminder
// into session history so the next turn sees it. Appended to pendingMsgs so it's
// flushed in the single Save below (avoids double I/O).
// Note: the nudge counts as a "user" turn in history, which accelerates auto-cleanup
// by one turn — acceptable since bootstrap should complete in 2-3 turns anyway.
if hadBootstrap && l.bootstrapCleanup != nil {
nudgeUserTurns := 1
for _, m := range history {
if m.Role == "user" {
nudgeUserTurns++
}
}
if !bootstrapWriteDetected && nudgeUserTurns >= 2 && nudgeUserTurns < bootstrapAutoCleanupTurns {
pendingMsgs = append(pendingMsgs, providers.Message{
Role: "user",
Content: "[System] You haven't completed onboarding yet. Please update USER.md with the user's details and clear BOOTSTRAP.md as instructed.",
})
}
}
// Flush all buffered messages to session atomically.
// This ensures concurrent runs never see each other's in-progress messages.
for _, msg := range pendingMsgs {
l.sessions.AddMessage(req.SessionKey, msg)
}
// Persist adaptive tool timing to session metadata.
if serialized := toolTiming.Serialize(); serialized != "" {
l.sessions.SetSessionMetadata(req.SessionKey, map[string]string{"tool_timing": serialized})
}
// Write session metadata (matching TS session entry updates)
l.sessions.UpdateMetadata(req.SessionKey, l.model, l.provider.Name(), req.Channel)
l.sessions.AccumulateTokens(req.SessionKey, int64(totalUsage.PromptTokens), int64(totalUsage.CompletionTokens))
// Calibrate token estimation: store actual prompt tokens + message count.
// Next time EstimateTokensWithCalibration() is called, it uses this as a base
// instead of the chars/3 heuristic (more accurate for multilingual content).
if totalUsage.PromptTokens > 0 {
msgCount := len(history) + len(pendingMsgs)
l.sessions.SetLastPromptTokens(req.SessionKey, totalUsage.PromptTokens, msgCount)
}
l.sessions.Save(req.SessionKey)
// Bootstrap auto-cleanup: after enough conversation turns, remove BOOTSTRAP.md
// as a safety net in case the LLM didn't clear it itself.
// Bootstrap typically completes in 2-3 turns; we auto-cleanup after 3 user messages.
// Uses pre-run history (already loaded) + 1 for current message — no extra DB call.
if hadBootstrap && l.bootstrapCleanup != nil {
userTurns := 1 // current user message
for _, m := range history {
if m.Role == "user" {
userTurns++
}
}
if userTurns >= bootstrapAutoCleanupTurns {
if cleanErr := l.bootstrapCleanup(ctx, l.agentUUID, req.UserID); cleanErr != nil {
slog.Warn("bootstrap auto-cleanup failed", "error", cleanErr, "agent", l.id, "user", req.UserID)
} else {
slog.Info("bootstrap auto-cleanup completed", "agent", l.id, "user", req.UserID, "turns", userTurns)
}
}
}
// If silent, return empty content so gateway suppresses delivery.
if isSilent {
slog.Info("agent loop: NO_REPLY detected, suppressing delivery",
"agent", l.id, "session", req.SessionKey)
finalContent = ""
}
// 5. Maybe summarize
l.maybeSummarize(ctx, req.SessionKey)
// Include forwarded media from delegation results (not cleaned up like req.Media)
for _, mf := range req.ForwardMedia {
ct := mf.MimeType
if ct == "" {
ct = mimeFromExt(filepath.Ext(mf.Path))
}
mediaResults = append(mediaResults, MediaResult{Path: mf.Path, ContentType: ct})
}
// Deduplicate media by path — prevents the same image being sent twice
// (e.g. once via ForwardMedia and again when the LLM reads the file).
mediaResults = deduplicateMedia(mediaResults)
return &RunResult{
Content: finalContent,
RunID: req.RunID,
Iterations: iteration,
Usage: &totalUsage,
Media: mediaResults,
Deliverables: deliverables,
BlockReplies: blockReplies,
LastBlockReply: lastBlockReply,
}, nil
}
// truncateToolArgs returns a copy of arguments with string values truncated to maxLen.
func truncateToolArgs(args map[string]any, maxLen int) map[string]any {
out := make(map[string]any, len(args))
for k, v := range args {
if s, ok := v.(string); ok && len(s) > maxLen {
out[k] = truncateStr(s, maxLen)
} else {
out[k] = v
}
}
return out
}