diff options
| author | Peter Stone <thepeterstone@gmail.com> | 2026-03-14 07:37:20 +0000 |
|---|---|---|
| committer | Peter Stone <thepeterstone@gmail.com> | 2026-03-14 07:37:20 +0000 |
| commit | 4029fdd82bdd657ed862c89f20eb03ff2594cde9 (patch) | |
| tree | 5725975ffa6825018605ee336ebe8a7e3f02b1d4 /internal/executor/claude.go | |
| parent | 02b35218d9aadcaa6a3b52f218b71577ab72c811 (diff) | |
fix: surface agent stderr, auto-retry restart-killed tasks, handle stale sandboxes
#1 - Diagnostics: tailFile() reads last 20 lines of subprocess stderr and
appends to error message when claude/gemini exits non-zero. Previously all
exit-1 failures were opaque; now the error_msg carries the actual subprocess
output.
#4 - Restart recovery: RecoverStaleRunning() now re-queues tasks after
marking them FAILED, so tasks killed by a server restart automatically
retry on the next boot rather than staying permanently FAILED.
#2 - Stale sandbox: If a resume execution's preserved SandboxDir no longer
exists (e.g. /tmp purge after reboot), clone a fresh sandbox instead of
failing immediately with "no such file or directory".
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/executor/claude.go')
| -rw-r--r-- | internal/executor/claude.go | 41 |
1 files changed, 40 insertions, 1 deletions
diff --git a/internal/executor/claude.go b/internal/executor/claude.go index 626a854..5a5b35e 100644 --- a/internal/executor/claude.go +++ b/internal/executor/claude.go @@ -106,7 +106,23 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi effectiveWorkingDir := projectDir if e.ResumeSessionID != "" { if e.SandboxDir != "" { - effectiveWorkingDir = e.SandboxDir + if _, statErr := os.Stat(e.SandboxDir); statErr == nil { + effectiveWorkingDir = e.SandboxDir + } else { + // Preserved sandbox was cleaned up (e.g. /tmp purge after reboot). + // Clone a fresh sandbox so the task can run rather than fail immediately. + r.Logger.Warn("preserved sandbox missing, cloning fresh", "sandbox", e.SandboxDir, "project_dir", projectDir) + e.SandboxDir = "" + if projectDir != "" { + var err error + sandboxDir, err = setupSandbox(projectDir) + if err != nil { + return fmt.Errorf("setting up sandbox: %w", err) + } + effectiveWorkingDir = sandboxDir + r.Logger.Info("fresh sandbox created for resume", "sandbox", sandboxDir, "project_dir", projectDir) + } + } } } else if projectDir != "" { var err error @@ -399,6 +415,9 @@ func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir s if isRateLimitError(streamErr) || isQuotaExhausted(streamErr) { return streamErr } + if tail := tailFile(e.StderrPath, 20); tail != "" { + return fmt.Errorf("claude exited with error: %w\nstderr:\n%s", waitErr, tail) + } return fmt.Errorf("claude exited with error: %w", waitErr) } @@ -578,3 +597,23 @@ func permissionDenialError(msg map[string]interface{}) error { } return nil } + +// tailFile returns the last n lines of the file at path, or empty string if +// the file cannot be read. Used to surface subprocess stderr on failure. +func tailFile(path string, n int) string { + f, err := os.Open(path) + if err != nil { + return "" + } + defer f.Close() + + var lines []string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + if len(lines) > n { + lines = lines[1:] + } + } + return strings.Join(lines, "\n") +} |
