From 4029fdd82bdd657ed862c89f20eb03ff2594cde9 Mon Sep 17 00:00:00 2001 From: Peter Stone Date: Sat, 14 Mar 2026 07:37:20 +0000 Subject: fix: surface agent stderr, auto-retry restart-killed tasks, handle stale sandboxes #1 - Diagnostics: tailFile() reads last 20 lines of subprocess stderr and appends to error message when claude/gemini exits non-zero. Previously all exit-1 failures were opaque; now the error_msg carries the actual subprocess output. #4 - Restart recovery: RecoverStaleRunning() now re-queues tasks after marking them FAILED, so tasks killed by a server restart automatically retry on the next boot rather than staying permanently FAILED. #2 - Stale sandbox: If a resume execution's preserved SandboxDir no longer exists (e.g. /tmp purge after reboot), clone a fresh sandbox instead of failing immediately with "no such file or directory". Co-Authored-By: Claude Sonnet 4.6 --- internal/executor/gemini.go | 3 +++ 1 file changed, 3 insertions(+) (limited to 'internal/executor/gemini.go') diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go index c30cd66..2db3218 100644 --- a/internal/executor/gemini.go +++ b/internal/executor/gemini.go @@ -150,6 +150,9 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir s if exitErr, ok := waitErr.(*exec.ExitError); ok { e.ExitCode = exitErr.ExitCode() } + if tail := tailFile(e.StderrPath, 20); tail != "" { + return fmt.Errorf("gemini exited with error: %w\nstderr:\n%s", waitErr, tail) + } return fmt.Errorf("gemini exited with error: %w", waitErr) } -- cgit v1.2.3