diff options
| author | Peter Stone <thepeterstone@gmail.com> | 2026-03-26 08:02:09 +0000 |
|---|---|---|
| committer | Peter Stone <thepeterstone@gmail.com> | 2026-03-26 08:02:09 +0000 |
| commit | 5f3e9900358649f1356d0a242e643790e29e3701 (patch) | |
| tree | dd75fd4707e1dc4e5cef5822372997a47cea2830 /internal/api/server.go | |
| parent | 34b4f397b1f46cb5027ba910983021a68f3e7333 (diff) | |
feat: cascade retry deps when running a task with failed dependencies
When /run is called on a CANCELLED/FAILED task that has deps in a terminal
failure state, automatically reset and resubmit those deps so the task
isn't immediately re-cancelled by the pool's dep check.
Also update reset-failed-tasks script to handle CANCELLED tasks and clean
up preserved sandbox workspaces.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/api/server.go')
| -rw-r--r-- | internal/api/server.go | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/internal/api/server.go b/internal/api/server.go index 38108de..8eba829 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -571,6 +571,41 @@ func (s *Server) handleGetTask(w http.ResponseWriter, r *http.Request) { } writeJSON(w, http.StatusOK, s.enrichTask(t)) } +// retryableDepStates are the states from which a dependency can be retried +// when cascading a retry from a dependent task. +var retryableDepStates = map[task.State]bool{ + task.StateFailed: true, + task.StateTimedOut: true, + task.StateCancelled: true, + task.StateBudgetExceeded: true, +} + +// cascadeRetryDeps resets any dependency (recursively) that is in a retryable +// terminal state, and submits it to the pool. This ensures that retrying a +// CANCELLED task that was blocked by a failed dep will also restart that dep. +func (s *Server) cascadeRetryDeps(ctx context.Context, t *task.Task) { + for _, depID := range t.DependsOn { + dep, err := s.store.GetTask(depID) + if err != nil { + s.logger.Warn("cascadeRetryDeps: dep not found", "depID", depID) + continue + } + if !retryableDepStates[dep.State] { + continue + } + // Recursively cascade first (depth-first so root deps go first). + s.cascadeRetryDeps(ctx, dep) + reset, err := s.store.ResetTaskForRetry(depID) + if err != nil { + s.logger.Warn("cascadeRetryDeps: reset failed", "depID", depID, "error", err) + continue + } + if submitErr := s.pool.Submit(ctx, reset); submitErr != nil { + s.logger.Warn("cascadeRetryDeps: submit failed", "depID", depID, "error", submitErr) + } + } +} + func (s *Server) handleRunTask(w http.ResponseWriter, r *http.Request) { id := r.PathValue("id") agentParam := r.URL.Query().Get("agent") // Use a different name to avoid confusion @@ -619,6 +654,10 @@ func (s *Server) handleRunTask(w http.ResponseWriter, r *http.Request) { } // The task `t` now has the correct agent configuration. + // 6. Cascade-retry any deps that are in a terminal failure state so the + // task isn't immediately re-cancelled by checkDepsReady. + s.cascadeRetryDeps(r.Context(), originalTask) + if err := s.pool.Submit(context.Background(), t); err != nil { writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": fmt.Sprintf("executor pool: %v", err)}) return |
