From 4029fdd82bdd657ed862c89f20eb03ff2594cde9 Mon Sep 17 00:00:00 2001 From: Peter Stone Date: Sat, 14 Mar 2026 07:37:20 +0000 Subject: fix: surface agent stderr, auto-retry restart-killed tasks, handle stale sandboxes #1 - Diagnostics: tailFile() reads last 20 lines of subprocess stderr and appends to error message when claude/gemini exits non-zero. Previously all exit-1 failures were opaque; now the error_msg carries the actual subprocess output. #4 - Restart recovery: RecoverStaleRunning() now re-queues tasks after marking them FAILED, so tasks killed by a server restart automatically retry on the next boot rather than staying permanently FAILED. #2 - Stale sandbox: If a resume execution's preserved SandboxDir no longer exists (e.g. /tmp purge after reboot), clone a fresh sandbox instead of failing immediately with "no such file or directory". Co-Authored-By: Claude Sonnet 4.6 --- internal/cli/serve.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'internal/cli') diff --git a/internal/cli/serve.go b/internal/cli/serve.go index fd9fda8..2d47630 100644 --- a/internal/cli/serve.go +++ b/internal/cli/serve.go @@ -76,7 +76,7 @@ func serve(addr string) error { if cfg.GeminiBinaryPath != "" { pool.Classifier = &executor.Classifier{GeminiBinaryPath: cfg.GeminiBinaryPath} } - pool.RecoverStaleRunning() + pool.RecoverStaleRunning(context.Background()) pool.RecoverStaleQueued(context.Background()) srv := api.NewServer(store, pool, logger, cfg.ClaudeBinaryPath, cfg.GeminiBinaryPath) -- cgit v1.2.3