mirror of
https://github.com/tiennm99/goclaw.git
synced 2026-06-09 22:12:07 +00:00
4e9f155a4c
Track per-tool execution time statistics in session metadata. When a tool call exceeds its adaptive threshold (2x historical max, min 120s default), send a direct outbound notification to the user. - ToolTimingMap: parse/serialize/record/threshold from session metadata - StartSlowTimer: fires once per tool call, auto-cancels on completion - Team config: slow_tool toggle (default on, always direct, never leader) - UI: toggle in team settings with i18n (en/vi/zh) - Store: add GetSessionMetadata to session store interface
124 lines
3.7 KiB
Go
124 lines
3.7 KiB
Go
package agent
|
|
|
|
import (
|
|
"encoding/json"
|
|
"log/slog"
|
|
"time"
|
|
|
|
"github.com/nextlevelbuilder/goclaw/pkg/protocol"
|
|
)
|
|
|
|
// defaultSlowToolThreshold is used when no historical data is available for a tool.
|
|
const defaultSlowToolThreshold = 120 * time.Second
|
|
|
|
// toolTimingMultiplier determines how much slower than the historical max
|
|
// a tool call must be before it's considered abnormally slow.
|
|
const toolTimingMultiplier = 2.0
|
|
|
|
// minTimingSamples is the minimum number of samples needed before using
|
|
// adaptive thresholds instead of the default.
|
|
const minTimingSamples = 3
|
|
|
|
// ToolTimingStat tracks execution time statistics for a single tool.
|
|
type ToolTimingStat struct {
|
|
Min int64 `json:"min"` // minimum duration in ms
|
|
Max int64 `json:"max"` // maximum duration in ms
|
|
Sum int64 `json:"sum"` // total duration in ms (for avg calculation)
|
|
Count int `json:"n"` // number of samples
|
|
}
|
|
|
|
// ToolTimingMap maps tool names to their timing statistics.
|
|
// Concurrency contract: SlowThreshold (read) may be called from goroutines,
|
|
// but Record (write) must only be called sequentially after parallel tools complete.
|
|
type ToolTimingMap map[string]*ToolTimingStat
|
|
|
|
// ParseToolTiming reads tool timing data from session metadata.
|
|
// Returns an empty map if the key is missing or malformed.
|
|
func ParseToolTiming(metadata map[string]string) ToolTimingMap {
|
|
raw, ok := metadata["tool_timing"]
|
|
if !ok || raw == "" {
|
|
return make(ToolTimingMap)
|
|
}
|
|
var m ToolTimingMap
|
|
if err := json.Unmarshal([]byte(raw), &m); err != nil {
|
|
return make(ToolTimingMap)
|
|
}
|
|
return m
|
|
}
|
|
|
|
// Serialize returns the JSON string for storage in session metadata.
|
|
func (m ToolTimingMap) Serialize() string {
|
|
if len(m) == 0 {
|
|
return ""
|
|
}
|
|
data, err := json.Marshal(m)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return string(data)
|
|
}
|
|
|
|
// Record adds a new timing sample for the given tool.
|
|
func (m ToolTimingMap) Record(toolName string, durationMs int64) {
|
|
stat, ok := m[toolName]
|
|
if !ok {
|
|
m[toolName] = &ToolTimingStat{
|
|
Min: durationMs,
|
|
Max: durationMs,
|
|
Sum: durationMs,
|
|
Count: 1,
|
|
}
|
|
return
|
|
}
|
|
if durationMs < stat.Min {
|
|
stat.Min = durationMs
|
|
}
|
|
if durationMs > stat.Max {
|
|
stat.Max = durationMs
|
|
}
|
|
stat.Sum += durationMs
|
|
stat.Count++
|
|
}
|
|
|
|
// SlowThreshold returns the duration after which a tool call is considered
|
|
// abnormally slow. Uses adaptive threshold if enough samples exist,
|
|
// otherwise falls back to the default.
|
|
func (m ToolTimingMap) SlowThreshold(toolName string) time.Duration {
|
|
stat, ok := m[toolName]
|
|
if !ok || stat.Count < minTimingSamples {
|
|
return defaultSlowToolThreshold
|
|
}
|
|
threshold := time.Duration(float64(stat.Max)*toolTimingMultiplier) * time.Millisecond
|
|
// Never go below the default — short tools shouldn't trigger on tiny spikes.
|
|
if threshold < defaultSlowToolThreshold {
|
|
return defaultSlowToolThreshold
|
|
}
|
|
return threshold
|
|
}
|
|
|
|
// StartSlowTimer starts a timer that emits a tool_slow activity event if the
|
|
// tool call exceeds the adaptive threshold. Returns a stop function that MUST
|
|
// be called after tool execution to cancel the timer.
|
|
// If enabled is false, returns a no-op stop function (no timer started).
|
|
func (m ToolTimingMap) StartSlowTimer(toolName, agentID, runID string, enabled bool, emitRun func(AgentEvent)) func() {
|
|
if !enabled {
|
|
return func() {}
|
|
}
|
|
threshold := m.SlowThreshold(toolName)
|
|
timer := time.AfterFunc(threshold, func() {
|
|
slog.Warn("tool.slow", "agent", agentID, "tool", toolName, "threshold_ms", threshold.Milliseconds())
|
|
emitRun(AgentEvent{
|
|
Type: protocol.AgentEventActivity,
|
|
AgentID: agentID,
|
|
RunID: runID,
|
|
Payload: map[string]any{
|
|
"phase": "tool_slow",
|
|
"tool": toolName,
|
|
"threshold_ms": threshold.Milliseconds(),
|
|
},
|
|
})
|
|
})
|
|
return func() { timer.Stop() }
|
|
}
|
|
|