Files
goclaw/internal/tools/evaluate_loop_tool.go
T
viettranx bdb60de7ae chore: upgrade Go 1.25 → 1.26 and apply go fix modernizations
- Update go.mod and Dockerfile to Go 1.26
- Apply `go fix ./...` stdlib modernizations across 170+ files
- Add `go fix` to post-implementation checklist in CLAUDE.md
- Fix go fix misapplied rewrite in loop_history.go
2026-03-10 00:09:15 +07:00

199 lines
6.1 KiB
Go

package tools
import (
"context"
"fmt"
"strings"
"github.com/google/uuid"
"github.com/nextlevelbuilder/goclaw/internal/hooks"
)
const (
defaultMaxRounds = 3
maxAllowedRounds = 5
)
// EvaluateLoopTool orchestrates a generator-evaluator feedback loop.
// Agent A generates output, Agent B evaluates it, loop until quality threshold is met.
type EvaluateLoopTool struct {
manager *DelegateManager
}
func NewEvaluateLoopTool(manager *DelegateManager) *EvaluateLoopTool {
return &EvaluateLoopTool{manager: manager}
}
func (t *EvaluateLoopTool) Name() string { return "evaluate_loop" }
func (t *EvaluateLoopTool) Description() string {
return "Run a generate-evaluate-revise loop between two agents. " +
"Generator produces output, evaluator approves or rejects with feedback, " +
"generator revises until approved or max rounds reached."
}
func (t *EvaluateLoopTool) Parameters() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"generator": map[string]any{
"type": "string",
"description": "Agent key for the content generator",
},
"evaluator": map[string]any{
"type": "string",
"description": "Agent key for the quality evaluator",
},
"task": map[string]any{
"type": "string",
"description": "Initial task for the generator",
},
"max_rounds": map[string]any{
"type": "number",
"description": "Maximum generate-evaluate rounds (default 3, max 5)",
},
"pass_criteria": map[string]any{
"type": "string",
"description": "Criteria the evaluator uses to approve/reject output",
},
"context": map[string]any{
"type": "string",
"description": "Optional additional context for both agents",
},
"team_task_id": map[string]any{
"type": "string",
"description": "Optional team task ID for auto-completion on success",
},
},
"required": []string{"generator", "evaluator", "task"},
}
}
func (t *EvaluateLoopTool) Execute(ctx context.Context, args map[string]any) *Result {
generatorKey, _ := args["generator"].(string)
evaluatorKey, _ := args["evaluator"].(string)
task, _ := args["task"].(string)
if generatorKey == "" || evaluatorKey == "" || task == "" {
return ErrorResult("generator, evaluator, and task are required")
}
maxRounds := defaultMaxRounds
if v, ok := args["max_rounds"].(float64); ok && int(v) > 0 {
maxRounds = min(int(v), maxAllowedRounds)
}
passCriteria, _ := args["pass_criteria"].(string)
extraContext, _ := args["context"].(string)
var teamTaskID uuid.UUID
if v, _ := args["team_task_id"].(string); v != "" {
teamTaskID, _ = uuid.Parse(v)
}
// Skip quality gates for all internal delegations (prevent recursion).
loopCtx := hooks.WithSkipHooks(ctx, true)
var lastOutput string
var lastFeedback string
for round := 1; round <= maxRounds; round++ {
// --- Generate ---
genTask := task
if extraContext != "" {
genTask = fmt.Sprintf("[Additional Context]\n%s\n\n[Task]\n%s", extraContext, task)
}
if round > 1 && lastFeedback != "" {
genTask = fmt.Sprintf(
"[Revision — Round %d/%d]\n"+
"Your previous output was reviewed and needs improvement.\n\n"+
"Original task: %s\n"+
"Evaluator feedback: %s\n\n"+
"Please revise your output addressing all feedback points.",
round, maxRounds, task, lastFeedback)
if extraContext != "" {
genTask = fmt.Sprintf("[Additional Context]\n%s\n\n%s", extraContext, genTask)
}
}
genResult, err := t.manager.Delegate(loopCtx, DelegateOpts{
TargetAgentKey: generatorKey,
Task: genTask,
Mode: "sync",
})
if err != nil {
return ErrorResult(fmt.Sprintf("generator %q failed in round %d: %s", generatorKey, round, err))
}
lastOutput = genResult.Content
// --- Evaluate ---
evalPrompt := buildEvalLoopPrompt(lastOutput, passCriteria, round, maxRounds)
evalResult, err := t.manager.Delegate(loopCtx, DelegateOpts{
TargetAgentKey: evaluatorKey,
Task: evalPrompt,
Mode: "sync",
})
if err != nil {
return ErrorResult(fmt.Sprintf("evaluator %q failed in round %d: %s", evaluatorKey, round, err))
}
// Check approval
if isApproved(evalResult.Content) {
// Auto-complete team task on the final successful round.
if teamTaskID != uuid.Nil && t.manager.teamStore != nil {
if teamTask, getErr := t.manager.teamStore.GetTask(ctx, teamTaskID); getErr == nil {
_ = t.manager.teamStore.ClaimTask(ctx, teamTaskID, uuid.Nil, teamTask.TeamID)
_ = t.manager.teamStore.CompleteTask(ctx, teamTaskID, teamTask.TeamID, lastOutput)
}
}
return NewResult(fmt.Sprintf(
"Evaluate-optimize loop completed in %d round(s).\n"+
"Generator: %s | Evaluator: %s\n\n"+
"Final output:\n%s",
round, generatorKey, evaluatorKey, lastOutput))
}
// Extract feedback for next round
lastFeedback = extractFeedback(evalResult.Content)
}
// Max rounds exceeded
return NewResult(fmt.Sprintf(
"Evaluate-optimize loop reached max rounds (%d) without evaluator approval.\n\n"+
"Last evaluator feedback: %s\n\n"+
"Last generator output:\n%s",
maxRounds, lastFeedback, lastOutput))
}
func buildEvalLoopPrompt(output, criteria string, round, maxRounds int) string {
criteriaSection := ""
if criteria != "" {
criteriaSection = fmt.Sprintf("\nCriteria: %s\n", criteria)
}
return fmt.Sprintf(
"[Quality Evaluation — Round %d/%d]\n"+
"Evaluate this output against the criteria below.\n"+
"%s\n"+
"Output to evaluate:\n%s\n\n"+
"Respond with EXACTLY one of:\n"+
"- \"APPROVED\" if the output meets ALL criteria (optionally followed by comments)\n"+
"- \"REJECTED: <specific feedback>\" with actionable improvement suggestions",
round, maxRounds, criteriaSection, output)
}
func isApproved(response string) bool {
upper := strings.ToUpper(strings.TrimSpace(response))
return strings.HasPrefix(upper, "APPROVED")
}
func extractFeedback(response string) string {
upper := strings.ToUpper(response)
if idx := strings.Index(upper, "REJECTED:"); idx >= 0 {
return strings.TrimSpace(response[idx+len("REJECTED:"):])
}
return response
}