diff options
| author | Peter Stone <thepeterstone@gmail.com> | 2026-03-08 22:24:12 +0000 |
|---|---|---|
| committer | Peter Stone <thepeterstone@gmail.com> | 2026-03-08 22:24:12 +0000 |
| commit | f135ab89ce6710a4f20049e6d0d8e914d8e2e402 (patch) | |
| tree | 67b492274a239a1e0d4fc579a2daf3a70cb9be65 /internal/executor/executor_test.go | |
| parent | fab59cf8e669fe9ec34b30586f07b7478e897c31 (diff) | |
executor: fix sandbox git fetch + inject prior failure history
Fix: use file:// prefix in git fetch during sandbox teardown to force
pack-protocol transfer. The local optimization uses hard links which
fail across devices and with mixed-owner object stores.
Feature: before running a task, query prior failed/timed-out executions
and prepend their error messages to the agent's --append-system-prompt.
This tells the agent what went wrong in previous attempts so it doesn't
repeat the same mistakes.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/executor/executor_test.go')
| -rw-r--r-- | internal/executor/executor_test.go | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/internal/executor/executor_test.go b/internal/executor/executor_test.go index 028e5cf..e6b8f0b 100644 --- a/internal/executor/executor_test.go +++ b/internal/executor/executor_test.go @@ -22,11 +22,13 @@ type mockRunner struct { delay time.Duration err error exitCode int + onRun func(*task.Task, *storage.Execution) error } func (m *mockRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error { m.mu.Lock() m.calls++ + cb := m.onRun m.mu.Unlock() if m.delay > 0 { @@ -36,6 +38,9 @@ func (m *mockRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution return ctx.Err() } } + if cb != nil { + return cb(t, e) + } if m.err != nil { e.ExitCode = m.exitCode return m.err @@ -355,6 +360,107 @@ func TestPool_ConcurrentExecution(t *testing.T) { } } +func TestWithFailureHistory_NoFailures_ReturnsUnchanged(t *testing.T) { + tk := makeTask("no-fail") + result := withFailureHistory(tk, nil, nil) + if result != tk { + t.Error("expected same pointer when no prior executions") + } +} + +func TestWithFailureHistory_WithError_ReturnsUnchanged(t *testing.T) { + tk := makeTask("err-case") + result := withFailureHistory(tk, nil, fmt.Errorf("db error")) + if result != tk { + t.Error("expected same pointer when ListExecutions errors") + } +} + +func TestWithFailureHistory_InjectsFailedHistory(t *testing.T) { + tk := makeTask("with-fail") + tk.Agent.Instructions = "do the work" + + execs := []*storage.Execution{ + {ID: "e1", Status: "FAILED", ErrorMsg: "sandbox: uncommitted changes", StartTime: time.Now()}, + {ID: "e2", Status: "COMPLETED", ErrorMsg: "", StartTime: time.Now()}, // not a failure, should be ignored + } + result := withFailureHistory(tk, execs, nil) + + if result == tk { + t.Fatal("expected a new task copy, got same pointer") + } + if !strings.Contains(result.Agent.SystemPromptAppend, "Prior Attempt History") { + t.Errorf("expected history header in SystemPromptAppend, got: %q", result.Agent.SystemPromptAppend) + } + if !strings.Contains(result.Agent.SystemPromptAppend, "sandbox: uncommitted changes") { + t.Errorf("expected error message in SystemPromptAppend, got: %q", result.Agent.SystemPromptAppend) + } + // COMPLETED execution should not appear + if strings.Contains(result.Agent.SystemPromptAppend, "e2") { + t.Errorf("COMPLETED execution should not appear in history") + } + // Original instructions unchanged + if result.Agent.Instructions != "do the work" { + t.Errorf("instructions should be unchanged, got: %q", result.Agent.Instructions) + } +} + +func TestWithFailureHistory_PreservesExistingSystemPrompt(t *testing.T) { + tk := makeTask("with-prompt") + tk.Agent.SystemPromptAppend = "existing prompt" + + execs := []*storage.Execution{ + {ID: "e1", Status: "FAILED", ErrorMsg: "some error", StartTime: time.Now()}, + } + result := withFailureHistory(tk, execs, nil) + + if !strings.Contains(result.Agent.SystemPromptAppend, "Prior Attempt History") { + t.Error("expected history prepended") + } + if !strings.Contains(result.Agent.SystemPromptAppend, "existing prompt") { + t.Error("expected existing prompt preserved") + } + // History must come BEFORE the existing prompt + histIdx := strings.Index(result.Agent.SystemPromptAppend, "Prior Attempt History") + existIdx := strings.Index(result.Agent.SystemPromptAppend, "existing prompt") + if histIdx > existIdx { + t.Error("history should be prepended before existing system prompt") + } +} + +func TestPool_FailureHistoryInjectedOnRetry(t *testing.T) { + store := testStore(t) + + var capturedPrompt string + runner := &mockRunner{} + runner.onRun = func(t *task.Task, _ *storage.Execution) error { + capturedPrompt = t.Agent.SystemPromptAppend + return nil + } + runners := map[string]Runner{"claude": runner} + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) + pool := NewPool(2, runners, store, logger) + + tk := makeTask("retry-hist") + store.CreateTask(tk) + + // Simulate a prior failed execution. + store.CreateExecution(&storage.Execution{ + ID: "prior-exec", TaskID: tk.ID, + StartTime: time.Now(), EndTime: time.Now(), + Status: "FAILED", ErrorMsg: "prior failure reason", + }) + + if err := pool.Submit(context.Background(), tk); err != nil { + t.Fatalf("submit: %v", err) + } + <-pool.Results() + + if !strings.Contains(capturedPrompt, "prior failure reason") { + t.Errorf("expected prior failure in system prompt, got: %q", capturedPrompt) + } +} + func TestPool_UnsupportedAgent(t *testing.T) { store := testStore(t) runners := map[string]Runner{"claude": &mockRunner{}} |
