diff options
Diffstat (limited to 'internal/executor')
| -rw-r--r-- | internal/executor/claude.go | 5 | ||||
| -rw-r--r-- | internal/executor/executor.go | 42 | ||||
| -rw-r--r-- | internal/executor/executor_test.go | 106 |
3 files changed, 152 insertions, 1 deletions
diff --git a/internal/executor/claude.go b/internal/executor/claude.go index 683de87..d8032ab 100644 --- a/internal/executor/claude.go +++ b/internal/executor/claude.go @@ -204,7 +204,10 @@ func teardownSandbox(projectDir, sandboxDir string, logger *slog.Logger) error { } // Fetch new commits from sandbox into project_dir and fast-forward merge. - if out, err := exec.Command("git", "-C", projectDir, "fetch", sandboxDir, "HEAD").CombinedOutput(); err != nil { + // Use file:// prefix to force pack-protocol transfer instead of the local + // optimization that hard-links objects — hard-linking fails across devices + // and can fail with permission errors when the repo has mixed-owner objects. + if out, err := exec.Command("git", "-C", projectDir, "fetch", "file://"+sandboxDir, "HEAD").CombinedOutput(); err != nil { return fmt.Errorf("git fetch from sandbox: %w\n%s", err, out) } if out, err := exec.Command("git", "-C", projectDir, "merge", "--ff-only", "FETCH_HEAD").CombinedOutput(); err != nil { diff --git a/internal/executor/executor.go b/internal/executor/executor.go index d1c8e72..df222f8 100644 --- a/internal/executor/executor.go +++ b/internal/executor/executor.go @@ -6,6 +6,7 @@ import ( "fmt" "log/slog" "path/filepath" + "strings" "sync" "time" @@ -416,6 +417,10 @@ func (p *Pool) execute(ctx context.Context, t *task.Task) { p.mu.Unlock() }() + // Inject prior failure history so the agent knows what went wrong before. + priorExecs, priorErr := p.store.ListExecutions(t.ID) + t = withFailureHistory(t, priorExecs, priorErr) + // Run the task. err = runner.Run(ctx, t, exec) exec.EndTime = time.Now().UTC() @@ -478,6 +483,43 @@ var terminalFailureStates = map[task.State]bool{ task.StateBudgetExceeded: true, } +// withFailureHistory returns a shallow copy of t with prior failed execution +// error messages prepended to SystemPromptAppend so the agent knows what went +// wrong in previous attempts. +func withFailureHistory(t *task.Task, execs []*storage.Execution, err error) *task.Task { + if err != nil || len(execs) == 0 { + return t + } + + var failures []storage.Execution + for _, e := range execs { + if (e.Status == "FAILED" || e.Status == "TIMED_OUT") && e.ErrorMsg != "" { + failures = append(failures, *e) + } + } + if len(failures) == 0 { + return t + } + + var sb strings.Builder + sb.WriteString("## Prior Attempt History\n\n") + sb.WriteString("This task has failed before. Do not repeat the same mistakes.\n\n") + for i, f := range failures { + fmt.Fprintf(&sb, "**Attempt %d** (%s) — %s:\n%s\n\n", + i+1, f.StartTime.Format("2006-01-02 15:04 UTC"), f.Status, f.ErrorMsg) + } + sb.WriteString("---\n\n") + + copy := *t + copy.Agent = t.Agent + if copy.Agent.SystemPromptAppend != "" { + copy.Agent.SystemPromptAppend = sb.String() + copy.Agent.SystemPromptAppend + } else { + copy.Agent.SystemPromptAppend = sb.String() + } + return © +} + // waitForDependencies polls storage until all tasks in t.DependsOn reach COMPLETED, // or until a dependency enters a terminal failure state or the context is cancelled. func (p *Pool) waitForDependencies(ctx context.Context, t *task.Task) error { diff --git a/internal/executor/executor_test.go b/internal/executor/executor_test.go index 028e5cf..e6b8f0b 100644 --- a/internal/executor/executor_test.go +++ b/internal/executor/executor_test.go @@ -22,11 +22,13 @@ type mockRunner struct { delay time.Duration err error exitCode int + onRun func(*task.Task, *storage.Execution) error } func (m *mockRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error { m.mu.Lock() m.calls++ + cb := m.onRun m.mu.Unlock() if m.delay > 0 { @@ -36,6 +38,9 @@ func (m *mockRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution return ctx.Err() } } + if cb != nil { + return cb(t, e) + } if m.err != nil { e.ExitCode = m.exitCode return m.err @@ -355,6 +360,107 @@ func TestPool_ConcurrentExecution(t *testing.T) { } } +func TestWithFailureHistory_NoFailures_ReturnsUnchanged(t *testing.T) { + tk := makeTask("no-fail") + result := withFailureHistory(tk, nil, nil) + if result != tk { + t.Error("expected same pointer when no prior executions") + } +} + +func TestWithFailureHistory_WithError_ReturnsUnchanged(t *testing.T) { + tk := makeTask("err-case") + result := withFailureHistory(tk, nil, fmt.Errorf("db error")) + if result != tk { + t.Error("expected same pointer when ListExecutions errors") + } +} + +func TestWithFailureHistory_InjectsFailedHistory(t *testing.T) { + tk := makeTask("with-fail") + tk.Agent.Instructions = "do the work" + + execs := []*storage.Execution{ + {ID: "e1", Status: "FAILED", ErrorMsg: "sandbox: uncommitted changes", StartTime: time.Now()}, + {ID: "e2", Status: "COMPLETED", ErrorMsg: "", StartTime: time.Now()}, // not a failure, should be ignored + } + result := withFailureHistory(tk, execs, nil) + + if result == tk { + t.Fatal("expected a new task copy, got same pointer") + } + if !strings.Contains(result.Agent.SystemPromptAppend, "Prior Attempt History") { + t.Errorf("expected history header in SystemPromptAppend, got: %q", result.Agent.SystemPromptAppend) + } + if !strings.Contains(result.Agent.SystemPromptAppend, "sandbox: uncommitted changes") { + t.Errorf("expected error message in SystemPromptAppend, got: %q", result.Agent.SystemPromptAppend) + } + // COMPLETED execution should not appear + if strings.Contains(result.Agent.SystemPromptAppend, "e2") { + t.Errorf("COMPLETED execution should not appear in history") + } + // Original instructions unchanged + if result.Agent.Instructions != "do the work" { + t.Errorf("instructions should be unchanged, got: %q", result.Agent.Instructions) + } +} + +func TestWithFailureHistory_PreservesExistingSystemPrompt(t *testing.T) { + tk := makeTask("with-prompt") + tk.Agent.SystemPromptAppend = "existing prompt" + + execs := []*storage.Execution{ + {ID: "e1", Status: "FAILED", ErrorMsg: "some error", StartTime: time.Now()}, + } + result := withFailureHistory(tk, execs, nil) + + if !strings.Contains(result.Agent.SystemPromptAppend, "Prior Attempt History") { + t.Error("expected history prepended") + } + if !strings.Contains(result.Agent.SystemPromptAppend, "existing prompt") { + t.Error("expected existing prompt preserved") + } + // History must come BEFORE the existing prompt + histIdx := strings.Index(result.Agent.SystemPromptAppend, "Prior Attempt History") + existIdx := strings.Index(result.Agent.SystemPromptAppend, "existing prompt") + if histIdx > existIdx { + t.Error("history should be prepended before existing system prompt") + } +} + +func TestPool_FailureHistoryInjectedOnRetry(t *testing.T) { + store := testStore(t) + + var capturedPrompt string + runner := &mockRunner{} + runner.onRun = func(t *task.Task, _ *storage.Execution) error { + capturedPrompt = t.Agent.SystemPromptAppend + return nil + } + runners := map[string]Runner{"claude": runner} + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) + pool := NewPool(2, runners, store, logger) + + tk := makeTask("retry-hist") + store.CreateTask(tk) + + // Simulate a prior failed execution. + store.CreateExecution(&storage.Execution{ + ID: "prior-exec", TaskID: tk.ID, + StartTime: time.Now(), EndTime: time.Now(), + Status: "FAILED", ErrorMsg: "prior failure reason", + }) + + if err := pool.Submit(context.Background(), tk); err != nil { + t.Fatalf("submit: %v", err) + } + <-pool.Results() + + if !strings.Contains(capturedPrompt, "prior failure reason") { + t.Errorf("expected prior failure in system prompt, got: %q", capturedPrompt) + } +} + func TestPool_UnsupportedAgent(t *testing.T) { store := testStore(t) runners := map[string]Runner{"claude": &mockRunner{}} |
