mirror of
https://github.com/tiennm99/goclaw.git
synced 2026-06-10 10:10:49 +00:00
454 lines
12 KiB
Go
454 lines
12 KiB
Go
package scheduler
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/nextlevelbuilder/goclaw/internal/agent"
|
|
)
|
|
|
|
// QueueMode determines how incoming messages are handled when an agent
|
|
// is already processing a message for the same session.
|
|
type QueueMode string
|
|
|
|
const (
|
|
// QueueModeQueue is simple FIFO: new messages wait until current finishes.
|
|
QueueModeQueue QueueMode = "queue"
|
|
|
|
// QueueModeFollowup queues as a follow-up after the current run completes.
|
|
QueueModeFollowup QueueMode = "followup"
|
|
|
|
// QueueModeInterrupt cancels the current run and starts the new message.
|
|
QueueModeInterrupt QueueMode = "interrupt"
|
|
)
|
|
|
|
// DropPolicy determines which messages to drop when the queue is full.
|
|
type DropPolicy string
|
|
|
|
const (
|
|
DropOld DropPolicy = "old" // drop oldest message
|
|
DropNew DropPolicy = "new" // reject incoming message
|
|
)
|
|
|
|
// QueueConfig configures per-session message queuing.
|
|
type QueueConfig struct {
|
|
Mode QueueMode `json:"mode"`
|
|
Cap int `json:"cap"`
|
|
Drop DropPolicy `json:"drop"`
|
|
DebounceMs int `json:"debounce_ms"`
|
|
MaxConcurrent int `json:"max_concurrent"` // 0 or 1 = serial (default)
|
|
}
|
|
|
|
// DefaultQueueConfig returns sensible defaults.
|
|
func DefaultQueueConfig() QueueConfig {
|
|
return QueueConfig{
|
|
Mode: QueueModeQueue,
|
|
Cap: 10,
|
|
Drop: DropOld,
|
|
DebounceMs: 800,
|
|
MaxConcurrent: 1,
|
|
}
|
|
}
|
|
|
|
// RunFunc is the callback that executes an agent run.
|
|
// The scheduler calls this when it's the request's turn.
|
|
type RunFunc func(ctx context.Context, req agent.RunRequest) (*agent.RunResult, error)
|
|
|
|
// TokenEstimateFunc returns token estimate and context window for a session.
|
|
// Used by adaptive throttle to reduce concurrency near the summary threshold.
|
|
type TokenEstimateFunc func(sessionKey string) (tokens int, contextWindow int)
|
|
|
|
// PendingRequest is a queued agent run awaiting execution.
|
|
type PendingRequest struct {
|
|
Req agent.RunRequest
|
|
ResultCh chan RunOutcome
|
|
EnqueuedAt time.Time // timestamp when enqueued, used for stale message detection
|
|
}
|
|
|
|
// RunOutcome is the result of a scheduled agent run.
|
|
type RunOutcome struct {
|
|
Result *agent.RunResult
|
|
Err error
|
|
}
|
|
|
|
// activeRunEntry tracks a running agent execution with its generation.
|
|
type activeRunEntry struct {
|
|
cancel context.CancelFunc
|
|
generation uint64
|
|
}
|
|
|
|
// SessionQueue manages agent runs for a single session key.
|
|
// Supports configurable concurrency: 1 (serial) or N (concurrent).
|
|
type SessionQueue struct {
|
|
key string
|
|
config QueueConfig
|
|
runFn RunFunc
|
|
laneMgr *LaneManager
|
|
lane string
|
|
|
|
mu sync.Mutex
|
|
queue []*PendingRequest
|
|
activeRuns map[string]activeRunEntry // runID → entry (with generation)
|
|
activeOrder []string // FIFO order of active runIDs
|
|
maxConcurrent int // effective limit (from config or per-session override)
|
|
timer *time.Timer // debounce timer
|
|
parentCtx context.Context // stored from first Enqueue call
|
|
abortCutoffTime time.Time // messages enqueued before this are stale
|
|
generation uint64 // bumped on Reset() to ignore stale completions
|
|
|
|
tokenEstimateFn TokenEstimateFunc // optional: for adaptive throttle
|
|
}
|
|
|
|
// NewSessionQueue creates a queue for a specific session.
|
|
func NewSessionQueue(key, lane string, cfg QueueConfig, laneMgr *LaneManager, runFn RunFunc) *SessionQueue {
|
|
maxC := cfg.MaxConcurrent
|
|
if maxC <= 0 {
|
|
maxC = 1
|
|
}
|
|
return &SessionQueue{
|
|
key: key,
|
|
config: cfg,
|
|
runFn: runFn,
|
|
laneMgr: laneMgr,
|
|
lane: lane,
|
|
activeRuns: make(map[string]activeRunEntry),
|
|
maxConcurrent: maxC,
|
|
}
|
|
}
|
|
|
|
// SetMaxConcurrent overrides the per-session max concurrent runs.
|
|
// Typically called from the consumer when it knows the peer kind (group vs DM).
|
|
func (sq *SessionQueue) SetMaxConcurrent(n int) {
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
if n <= 0 {
|
|
n = 1
|
|
}
|
|
sq.maxConcurrent = n
|
|
}
|
|
|
|
// effectiveMaxConcurrent returns the current concurrency limit,
|
|
// reduced to 1 when near the summary threshold (adaptive throttle).
|
|
// Must be called with sq.mu held.
|
|
func (sq *SessionQueue) effectiveMaxConcurrent() int {
|
|
max := sq.maxConcurrent
|
|
if max <= 0 {
|
|
max = 1
|
|
}
|
|
if sq.tokenEstimateFn == nil {
|
|
return max
|
|
}
|
|
tokens, contextWindow := sq.tokenEstimateFn(sq.key)
|
|
if contextWindow > 0 && float64(tokens)/float64(contextWindow) >= 0.6 {
|
|
return 1 // near summary threshold → serialize
|
|
}
|
|
return max
|
|
}
|
|
|
|
// hasCapacity returns whether a new run can start.
|
|
// Must be called with sq.mu held.
|
|
func (sq *SessionQueue) hasCapacity() bool {
|
|
return len(sq.activeRuns) < sq.effectiveMaxConcurrent()
|
|
}
|
|
|
|
// Enqueue adds a request to the session queue.
|
|
// If capacity is available, it starts immediately (after debounce).
|
|
// Returns a channel that receives the result when the run completes.
|
|
func (sq *SessionQueue) Enqueue(ctx context.Context, req agent.RunRequest) <-chan RunOutcome {
|
|
outcome := make(chan RunOutcome, 1)
|
|
pending := &PendingRequest{Req: req, ResultCh: outcome, EnqueuedAt: time.Now()}
|
|
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
|
|
// Store parent context for spawning future runs
|
|
if sq.parentCtx == nil {
|
|
sq.parentCtx = ctx
|
|
}
|
|
|
|
switch sq.config.Mode {
|
|
case QueueModeInterrupt:
|
|
// Cancel all active runs
|
|
for runID, entry := range sq.activeRuns {
|
|
entry.cancel()
|
|
delete(sq.activeRuns, runID)
|
|
}
|
|
sq.activeOrder = nil
|
|
// Clear existing queue and enqueue this one
|
|
sq.drainQueue(RunOutcome{Err: context.Canceled})
|
|
sq.queue = append(sq.queue, pending)
|
|
if sq.hasCapacity() {
|
|
sq.scheduleNext(ctx)
|
|
}
|
|
|
|
default: // queue, followup
|
|
if len(sq.queue) >= sq.config.Cap {
|
|
sq.applyDropPolicy(pending)
|
|
} else {
|
|
sq.queue = append(sq.queue, pending)
|
|
}
|
|
|
|
if sq.hasCapacity() {
|
|
sq.scheduleNext(ctx)
|
|
}
|
|
}
|
|
|
|
return outcome
|
|
}
|
|
|
|
// scheduleNext starts the next queued request(s), applying debounce.
|
|
// Must be called with sq.mu held.
|
|
func (sq *SessionQueue) scheduleNext(ctx context.Context) {
|
|
if len(sq.queue) == 0 {
|
|
return
|
|
}
|
|
|
|
debounce := time.Duration(sq.config.DebounceMs) * time.Millisecond
|
|
if debounce <= 0 {
|
|
sq.startAvailable(ctx)
|
|
return
|
|
}
|
|
|
|
// Reset debounce timer: collapses rapid messages
|
|
if sq.timer != nil {
|
|
sq.timer.Stop()
|
|
}
|
|
sq.timer = time.AfterFunc(debounce, func() {
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
if sq.hasCapacity() && len(sq.queue) > 0 {
|
|
sq.startAvailable(ctx)
|
|
}
|
|
})
|
|
}
|
|
|
|
// startAvailable starts as many queued requests as capacity allows.
|
|
// Must be called with sq.mu held.
|
|
func (sq *SessionQueue) startAvailable(ctx context.Context) {
|
|
for sq.hasCapacity() && len(sq.queue) > 0 {
|
|
sq.startOne(ctx)
|
|
}
|
|
}
|
|
|
|
// startOne picks the first queued request and runs it in the lane.
|
|
// Skips stale messages that were enqueued before the last abort cutoff.
|
|
// Must be called with sq.mu held.
|
|
func (sq *SessionQueue) startOne(ctx context.Context) {
|
|
// Skip stale messages enqueued before the last /stopall abort cutoff.
|
|
for len(sq.queue) > 0 {
|
|
head := sq.queue[0]
|
|
if !sq.abortCutoffTime.IsZero() && head.EnqueuedAt.Before(sq.abortCutoffTime) {
|
|
sq.queue = sq.queue[1:]
|
|
head.ResultCh <- RunOutcome{Err: ErrMessageStale}
|
|
close(head.ResultCh)
|
|
slog.Debug("scheduler: skipped stale message",
|
|
"session", sq.key,
|
|
"enqueued", head.EnqueuedAt,
|
|
"cutoff", sq.abortCutoffTime,
|
|
)
|
|
continue
|
|
}
|
|
// Clear cutoff once a non-stale message is found
|
|
sq.abortCutoffTime = time.Time{}
|
|
break
|
|
}
|
|
|
|
if len(sq.queue) == 0 {
|
|
return
|
|
}
|
|
|
|
pending := sq.queue[0]
|
|
sq.queue = sq.queue[1:]
|
|
|
|
runID := pending.Req.RunID
|
|
runCtx, cancel := context.WithCancel(ctx)
|
|
sq.activeRuns[runID] = activeRunEntry{cancel: cancel, generation: sq.generation}
|
|
sq.activeOrder = append(sq.activeOrder, runID)
|
|
|
|
lane := sq.laneMgr.Get(sq.lane)
|
|
if lane == nil {
|
|
lane = sq.laneMgr.Get(LaneMain)
|
|
}
|
|
|
|
gen := sq.generation // capture generation under lock
|
|
|
|
if lane == nil {
|
|
// No lane available — run directly
|
|
go sq.executeRun(runCtx, runID, gen, pending)
|
|
return
|
|
}
|
|
|
|
err := lane.Submit(ctx, func() {
|
|
sq.executeRun(runCtx, runID, gen, pending)
|
|
})
|
|
if err != nil {
|
|
pending.ResultCh <- RunOutcome{Err: err}
|
|
close(pending.ResultCh)
|
|
// caller already holds sq.mu — clean up
|
|
delete(sq.activeRuns, runID)
|
|
sq.removeFromOrder(runID)
|
|
}
|
|
}
|
|
|
|
// executeRun runs the agent and then starts the next queued message(s) if capacity allows.
|
|
func (sq *SessionQueue) executeRun(ctx context.Context, runID string, runGeneration uint64, pending *PendingRequest) {
|
|
result, err := sq.runFn(ctx, pending.Req)
|
|
pending.ResultCh <- RunOutcome{Result: result, Err: err}
|
|
close(pending.ResultCh)
|
|
|
|
sq.mu.Lock()
|
|
// Check generation: ignore stale completions from a previous generation.
|
|
if entry, ok := sq.activeRuns[runID]; ok && entry.generation == sq.generation {
|
|
delete(sq.activeRuns, runID)
|
|
sq.removeFromOrder(runID)
|
|
} else if runGeneration != sq.generation {
|
|
// Stale completion from old generation — skip cleanup.
|
|
sq.mu.Unlock()
|
|
return
|
|
}
|
|
|
|
if sq.hasCapacity() && len(sq.queue) > 0 {
|
|
// Use parentCtx (not the per-run ctx which may be cancelled)
|
|
sq.scheduleNext(sq.parentCtx)
|
|
}
|
|
sq.mu.Unlock()
|
|
}
|
|
|
|
// removeFromOrder removes a runID from the activeOrder slice.
|
|
// Must be called with sq.mu held.
|
|
func (sq *SessionQueue) removeFromOrder(runID string) {
|
|
for i, id := range sq.activeOrder {
|
|
if id == runID {
|
|
sq.activeOrder = append(sq.activeOrder[:i], sq.activeOrder[i+1:]...)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// applyDropPolicy handles a full queue.
|
|
// Must be called with sq.mu held.
|
|
func (sq *SessionQueue) applyDropPolicy(incoming *PendingRequest) {
|
|
switch sq.config.Drop {
|
|
case DropOld:
|
|
// Drop the oldest queued message
|
|
if len(sq.queue) > 0 {
|
|
old := sq.queue[0]
|
|
old.ResultCh <- RunOutcome{Err: ErrQueueDropped}
|
|
close(old.ResultCh)
|
|
sq.queue = sq.queue[1:]
|
|
}
|
|
sq.queue = append(sq.queue, incoming)
|
|
|
|
case DropNew:
|
|
// Reject the incoming message
|
|
incoming.ResultCh <- RunOutcome{Err: ErrQueueFull}
|
|
close(incoming.ResultCh)
|
|
|
|
default:
|
|
// Default to drop old
|
|
if len(sq.queue) > 0 {
|
|
old := sq.queue[0]
|
|
old.ResultCh <- RunOutcome{Err: ErrQueueDropped}
|
|
close(old.ResultCh)
|
|
sq.queue = sq.queue[1:]
|
|
}
|
|
sq.queue = append(sq.queue, incoming)
|
|
}
|
|
}
|
|
|
|
// drainQueue cancels all pending requests with the given outcome.
|
|
// Must be called with sq.mu held.
|
|
func (sq *SessionQueue) drainQueue(outcome RunOutcome) {
|
|
for _, p := range sq.queue {
|
|
p.ResultCh <- outcome
|
|
close(p.ResultCh)
|
|
}
|
|
sq.queue = nil
|
|
}
|
|
|
|
// CancelOne stops the oldest active run (FIFO).
|
|
// Does NOT drain the pending queue or set abort cutoff. Used by /stop command.
|
|
// Returns true if an active run was actually cancelled.
|
|
func (sq *SessionQueue) CancelOne() bool {
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
|
|
if len(sq.activeOrder) == 0 {
|
|
return false
|
|
}
|
|
|
|
// Cancel the oldest active run
|
|
runID := sq.activeOrder[0]
|
|
if entry, ok := sq.activeRuns[runID]; ok {
|
|
entry.cancel()
|
|
delete(sq.activeRuns, runID)
|
|
sq.activeOrder = sq.activeOrder[1:]
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// CancelAll stops all active runs and drains all pending requests.
|
|
// Sets abort cutoff so stale queued messages are skipped on next schedule.
|
|
// Used by /stopall command.
|
|
// Returns true if any active run was actually cancelled.
|
|
func (sq *SessionQueue) CancelAll() bool {
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
|
|
sq.abortCutoffTime = time.Now() // mark cutoff for stale message skipping
|
|
|
|
cancelled := false
|
|
for runID, entry := range sq.activeRuns {
|
|
entry.cancel()
|
|
delete(sq.activeRuns, runID)
|
|
cancelled = true
|
|
}
|
|
sq.activeOrder = nil
|
|
sq.drainQueue(RunOutcome{Err: context.Canceled})
|
|
return cancelled
|
|
}
|
|
|
|
// Cancel is an alias for CancelAll (backward compat with /stop command).
|
|
func (sq *SessionQueue) Cancel() bool {
|
|
return sq.CancelAll()
|
|
}
|
|
|
|
// IsActive returns whether any run is currently executing.
|
|
func (sq *SessionQueue) IsActive() bool {
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
return len(sq.activeRuns) > 0
|
|
}
|
|
|
|
// ActiveCount returns the number of currently executing runs.
|
|
func (sq *SessionQueue) ActiveCount() int {
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
return len(sq.activeRuns)
|
|
}
|
|
|
|
// QueueLen returns the number of pending messages.
|
|
func (sq *SessionQueue) QueueLen() int {
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
return len(sq.queue)
|
|
}
|
|
|
|
// Reset bumps the generation counter, cancels all active runs, and drains
|
|
// the pending queue. Stale completions from the old generation are ignored.
|
|
// Used during in-process restart (e.g. SIGUSR1).
|
|
func (sq *SessionQueue) Reset() {
|
|
sq.mu.Lock()
|
|
defer sq.mu.Unlock()
|
|
sq.generation++
|
|
for _, entry := range sq.activeRuns {
|
|
entry.cancel()
|
|
}
|
|
sq.activeRuns = make(map[string]activeRunEntry)
|
|
sq.activeOrder = nil
|
|
sq.drainQueue(RunOutcome{Err: ErrLaneCleared})
|
|
}
|