feat(agent): adaptive tool timing with slow tool notification

Track per-tool execution time statistics in session metadata. When a tool
call exceeds its adaptive threshold (2x historical max, min 120s default),
send a direct outbound notification to the user.

- ToolTimingMap: parse/serialize/record/threshold from session metadata
- StartSlowTimer: fires once per tool call, auto-cancels on completion
- Team config: slow_tool toggle (default on, always direct, never leader)
- UI: toggle in team settings with i18n (en/vi/zh)
- Store: add GetSessionMetadata to session store interface
This commit is contained in:
viettranx
2026-03-19 11:17:32 +07:00
parent 0df619023c
commit 4e9f155a4c
12 changed files with 236 additions and 1 deletions
+26
View File
@@ -164,8 +164,12 @@ func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error)
// Auto-resolve team workspace for agents not dispatched via team task.
// Lead agents default to team workspace (primary job is team coordination).
// Non-lead members keep own workspace; team workspace is accessible via absolute path.
// resolvedTeamSettings caches team settings from workspace resolution
// to avoid re-querying when checking slow_tool notification config.
var resolvedTeamSettings json.RawMessage
if req.TeamWorkspace == "" && l.teamStore != nil && l.agentUUID != uuid.Nil {
if team, _ := l.teamStore.GetTeamForAgent(ctx, l.agentUUID); team != nil {
resolvedTeamSettings = team.Settings
// Shared workspace: scope by teamID only. Isolated (default): scope by chatID too.
wsChat := req.ChatID
if wsChat == "" {
@@ -243,6 +247,12 @@ func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error)
l.sessions.SetContextWindow(req.SessionKey, l.contextWindow)
}
// 0b. Load adaptive tool timing from session metadata.
toolTiming := ParseToolTiming(l.sessions.GetSessionMetadata(req.SessionKey))
// Resolve slow_tool notification config from already-loaded team settings (no extra DB query).
slowToolEnabled := tools.ParseTeamNotifyConfig(resolvedTeamSettings).SlowTool
// 1. Build messages from session history
history := l.sessions.GetHistory(req.SessionKey)
summary := l.sessions.GetSummary(req.SessionKey)
@@ -869,6 +879,8 @@ func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error)
toolSpanStart := time.Now().UTC()
toolSpanID := l.emitToolSpanStart(ctx, toolSpanStart, tc.Name, tc.ID, string(argsJSON))
stopSlowTimer := toolTiming.StartSlowTimer(tc.Name, l.id, req.RunID, slowToolEnabled, emitRun)
var result *tools.Result
if allowedTools != nil && !allowedTools[tc.Name] {
// Attempt lazy activation: deferred MCP tools can be activated on first call
@@ -890,9 +902,13 @@ func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error)
if result == nil {
result = l.tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, req.Channel, req.ChatID, req.PeerKind, req.SessionKey, nil)
}
stopSlowTimer()
l.emitToolSpanEnd(ctx, toolSpanID, toolSpanStart, result)
// Record tool execution time for adaptive thresholds.
toolTiming.Record(tc.Name, time.Since(toolSpanStart).Milliseconds())
// Record result for loop detection.
loopDetector.recordResult(argsHash, result.ForLLM)
@@ -1009,6 +1025,8 @@ func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error)
// Emit running span inside goroutine — goroutine-safe (channel send only).
// End is also emitted here to prevent orphans on ctx cancellation.
spanID := l.emitToolSpanStart(ctx, spanStart, tc.Name, tc.ID, string(argsJSON))
stopSlowTimer := toolTiming.StartSlowTimer(tc.Name, l.id, req.RunID, slowToolEnabled, emitRun)
var result *tools.Result
if allowedTools != nil && !allowedTools[tc.Name] {
// Attempt lazy activation for deferred MCP tools.
@@ -1030,6 +1048,7 @@ func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error)
if result == nil {
result = l.tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, req.Channel, req.ChatID, req.PeerKind, req.SessionKey, nil)
}
stopSlowTimer()
l.emitToolSpanEnd(ctx, spanID, spanStart, result)
resultCh <- indexedResult{idx: idx, tc: tc, result: result, argsJSON: string(argsJSON), spanStart: spanStart}
}(i, tc)
@@ -1053,6 +1072,8 @@ func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error)
// Note: tool span start/end already emitted inside goroutines above.
var loopStuck bool
for _, r := range collected {
// Record tool execution time for adaptive thresholds.
toolTiming.Record(r.tc.Name, time.Since(r.spanStart).Milliseconds())
// Record for loop detection.
argsHash := loopDetector.record(r.tc.Name, r.tc.Arguments)
@@ -1222,6 +1243,11 @@ func (l *Loop) runLoop(ctx context.Context, req RunRequest) (*RunResult, error)
l.sessions.AddMessage(req.SessionKey, msg)
}
// Persist adaptive tool timing to session metadata.
if serialized := toolTiming.Serialize(); serialized != "" {
l.sessions.SetSessionMetadata(req.SessionKey, map[string]string{"tool_timing": serialized})
}
// Write session metadata (matching TS session entry updates)
l.sessions.UpdateMetadata(req.SessionKey, l.model, l.provider.Name(), req.Channel)
l.sessions.AccumulateTokens(req.SessionKey, int64(totalUsage.PromptTokens), int64(totalUsage.CompletionTokens))