summaryrefslogtreecommitdiff
path: root/internal/executor/claude.go
diff options
context:
space:
mode:
authorPeter Stone <thepeterstone@gmail.com>2026-03-06 00:07:18 +0000
committerPeter Stone <thepeterstone@gmail.com>2026-03-06 00:07:18 +0000
commit7466b1751c4126735769a3304e1db80dab166a9e (patch)
treec5d0fe9d1018e62e3857480d471a0f6f8ebee104 /internal/executor/claude.go
parenta33211d0ad07f5aaf2d8bb51ba18e6790a153bb4 (diff)
feat: blocked task state for agent questions via session resume
When an agent needs user input it writes a question to $CLAUDOMATOR_QUESTION_FILE and exits. The runner detects the file and returns BlockedError; the pool transitions the task to BLOCKED and stores the question JSON on the task record. The user answers via POST /api/tasks/{id}/answer. The server looks up the claude session_id from the most recent execution and submits a resume execution (claude --resume <session-id> "<answer>"), freeing the executor slot entirely while waiting. Changes: - task: add StateBlocked, transitions RUNNING→BLOCKED, BLOCKED→QUEUED - storage: add session_id to executions, question_json to tasks; add GetLatestExecution and UpdateTaskQuestion methods - executor: BlockedError type; ClaudeRunner pre-assigns --session-id, sets CLAUDOMATOR_QUESTION_FILE env var, detects question file on exit; buildArgs handles --resume mode; Pool.SubmitResume for resume path - api: handleAnswerQuestion rewritten to create resume execution - preamble: add question protocol instructions for agents - web: BLOCKED state badge (indigo), question text + option buttons or free-text input with Submit on the task card footer Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/executor/claude.go')
-rw-r--r--internal/executor/claude.go73
1 files changed, 60 insertions, 13 deletions
diff --git a/internal/executor/claude.go b/internal/executor/claude.go
index d3f5751..b97f202 100644
--- a/internal/executor/claude.go
+++ b/internal/executor/claude.go
@@ -27,6 +27,15 @@ type ClaudeRunner struct {
APIURL string // base URL of the Claudomator API, passed to subprocesses
}
+// BlockedError is returned by Run when the agent wrote a question file and exited.
+// The pool transitions the task to BLOCKED and stores the question for the user.
+type BlockedError struct {
+ QuestionJSON string // raw JSON from the question file
+ SessionID string // claude session to resume once the user answers
+}
+
+func (e *BlockedError) Error() string { return fmt.Sprintf("task blocked: %s", e.QuestionJSON) }
+
// ExecLogDir returns the log directory for the given execution ID.
// Implements LogPather so the pool can persist paths before execution starts.
func (r *ClaudeRunner) ExecLogDir(execID string) string {
@@ -45,9 +54,8 @@ func (r *ClaudeRunner) binaryPath() string {
// Run executes a claude -p invocation, streaming output to log files.
// It retries up to 3 times on rate-limit errors using exponential backoff.
+// If the agent writes a question file and exits, Run returns *BlockedError.
func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
- args := r.buildArgs(t)
-
if t.Claude.WorkingDir != "" {
if _, err := os.Stat(t.Claude.WorkingDir); err != nil {
return fmt.Errorf("working_dir %q: %w", t.Claude.WorkingDir, err)
@@ -55,11 +63,9 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
}
// Setup log directory once; retries overwrite the log files.
- // Use pre-set paths if the pool already populated them via LogPather;
- // otherwise fall back to computing from LogDir + execID.
logDir := r.ExecLogDir(e.ID)
if logDir == "" {
- logDir = e.ID // fallback: use execID as relative dir (tests without LogDir set)
+ logDir = e.ID // fallback for tests without LogDir set
}
if err := os.MkdirAll(logDir, 0700); err != nil {
return fmt.Errorf("creating log dir: %w", err)
@@ -70,8 +76,17 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
e.ArtifactDir = logDir
}
+ // Pre-assign session ID so we can resume after a BLOCKED state.
+ // If this is a resume execution the session ID is already set.
+ if e.SessionID == "" {
+ e.SessionID = e.ID // reuse execution UUID as session UUID (both are UUIDs)
+ }
+
+ questionFile := filepath.Join(logDir, "question.json")
+ args := r.buildArgs(t, e, questionFile)
+
attempt := 0
- return runWithBackoff(ctx, 3, 5*time.Second, func() error {
+ err := runWithBackoff(ctx, 3, 5*time.Second, func() error {
if attempt > 0 {
delay := 5 * time.Second * (1 << (attempt - 1))
r.Logger.Warn("rate-limited by Claude API, retrying",
@@ -80,22 +95,34 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
)
}
attempt++
- return r.execOnce(ctx, t, args, e)
+ return r.execOnce(ctx, args, t.Claude.WorkingDir, e)
})
+ if err != nil {
+ return err
+ }
+
+ // Check whether the agent left a question before exiting.
+ data, readErr := os.ReadFile(questionFile)
+ if readErr == nil {
+ os.Remove(questionFile) // consumed
+ return &BlockedError{QuestionJSON: strings.TrimSpace(string(data)), SessionID: e.SessionID}
+ }
+ return nil
}
// execOnce runs the claude subprocess once, streaming output to e's log paths.
-func (r *ClaudeRunner) execOnce(ctx context.Context, t *task.Task, args []string, e *storage.Execution) error {
+func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir string, e *storage.Execution) error {
cmd := exec.CommandContext(ctx, r.binaryPath(), args...)
cmd.Env = append(os.Environ(),
"CLAUDOMATOR_API_URL="+r.APIURL,
- "CLAUDOMATOR_TASK_ID="+t.ID,
+ "CLAUDOMATOR_TASK_ID="+e.TaskID,
+ "CLAUDOMATOR_QUESTION_FILE="+filepath.Join(e.ArtifactDir, "question.json"),
)
// Put the subprocess in its own process group so we can SIGKILL the entire
// group (MCP servers, bash children, etc.) on cancellation.
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
- if t.Claude.WorkingDir != "" {
- cmd.Dir = t.Claude.WorkingDir
+ if workingDir != "" {
+ cmd.Dir = workingDir
}
stdoutFile, err := os.Create(e.StdoutPath)
@@ -172,13 +199,32 @@ func (r *ClaudeRunner) execOnce(ctx context.Context, t *task.Task, args []string
return nil
}
-func (r *ClaudeRunner) buildArgs(t *task.Task) []string {
+func (r *ClaudeRunner) buildArgs(t *task.Task, e *storage.Execution, questionFile string) []string {
+ // Resume execution: the agent already has context; just deliver the answer.
+ if e.ResumeSessionID != "" {
+ args := []string{
+ "-p", e.ResumeAnswer,
+ "--resume", e.ResumeSessionID,
+ "--output-format", "stream-json",
+ "--verbose",
+ }
+ permMode := t.Claude.PermissionMode
+ if permMode == "" {
+ permMode = "bypassPermissions"
+ }
+ args = append(args, "--permission-mode", permMode)
+ if t.Claude.Model != "" {
+ args = append(args, "--model", t.Claude.Model)
+ }
+ return args
+ }
+
instructions := t.Claude.Instructions
allowedTools := t.Claude.AllowedTools
if !t.Claude.SkipPlanning {
instructions = withPlanningPreamble(instructions)
- // Ensure Bash is available so the agent can POST subtasks.
+ // Ensure Bash is available so the agent can POST subtasks and ask questions.
hasBash := false
for _, tool := range allowedTools {
if tool == "Bash" {
@@ -193,6 +239,7 @@ func (r *ClaudeRunner) buildArgs(t *task.Task) []string {
args := []string{
"-p", instructions,
+ "--session-id", e.SessionID,
"--output-format", "stream-json",
"--verbose",
}