diff options
| author | Peter Stone <thepeterstone@gmail.com> | 2026-03-14 07:37:20 +0000 |
|---|---|---|
| committer | Peter Stone <thepeterstone@gmail.com> | 2026-03-14 07:37:20 +0000 |
| commit | 4029fdd82bdd657ed862c89f20eb03ff2594cde9 (patch) | |
| tree | 5725975ffa6825018605ee336ebe8a7e3f02b1d4 /internal/executor/executor_test.go | |
| parent | 02b35218d9aadcaa6a3b52f218b71577ab72c811 (diff) | |
fix: surface agent stderr, auto-retry restart-killed tasks, handle stale sandboxes
#1 - Diagnostics: tailFile() reads last 20 lines of subprocess stderr and
appends to error message when claude/gemini exits non-zero. Previously all
exit-1 failures were opaque; now the error_msg carries the actual subprocess
output.
#4 - Restart recovery: RecoverStaleRunning() now re-queues tasks after
marking them FAILED, so tasks killed by a server restart automatically
retry on the next boot rather than staying permanently FAILED.
#2 - Stale sandbox: If a resume execution's preserved SandboxDir no longer
exists (e.g. /tmp purge after reboot), clone a fresh sandbox instead of
failing immediately with "no such file or directory".
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/executor/executor_test.go')
| -rw-r--r-- | internal/executor/executor_test.go | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/internal/executor/executor_test.go b/internal/executor/executor_test.go index f6d0179..a6c4ad8 100644 --- a/internal/executor/executor_test.go +++ b/internal/executor/executor_test.go @@ -596,15 +596,9 @@ func TestPool_RecoverStaleRunning(t *testing.T) { Status: "RUNNING", }) - pool.RecoverStaleRunning() + pool.RecoverStaleRunning(context.Background()) - recovered, err := store.GetTask(tk.ID) - if err != nil { - t.Fatalf("get task: %v", err) - } - if recovered.State != task.StateFailed { - t.Errorf("state: want FAILED, got %q", recovered.State) - } + // Execution record should be closed as FAILED. execs, _ := store.ListExecutions(tk.ID) if len(execs) == 0 || execs[0].Status != "FAILED" { t.Errorf("execution status: want FAILED, got %+v", execs) @@ -612,6 +606,24 @@ func TestPool_RecoverStaleRunning(t *testing.T) { if execs[0].ErrorMsg == "" { t.Error("expected non-empty error message on recovered execution") } + + // Task should be re-queued for retry and complete. + select { + case result := <-pool.Results(): + if result.TaskID != tk.ID { + t.Errorf("unexpected task in results: %s", result.TaskID) + } + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for stale RUNNING task to be re-queued and run") + } + recovered, err := store.GetTask(tk.ID) + if err != nil { + t.Fatalf("get task: %v", err) + } + // Top-level tasks (no parent) go to READY after a successful run. + if recovered.State != task.StateReady { + t.Errorf("state after re-queue: want READY, got %q", recovered.State) + } } func TestPool_RecoverStaleQueued_ResubmitsToPool(t *testing.T) { |
