summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Stone <thepeterstone@gmail.com>2026-03-18 23:56:34 +0000
committerPeter Stone <thepeterstone@gmail.com>2026-03-18 23:56:34 +0000
commit599a26d556df52b364b5b540762a521d22eb5b7b (patch)
tree740c141c52764604fc8d4c036733e5f47368b26a
parent0db05b0fa6de318f164a1d73ddc55db9c59f1fc3 (diff)
parent7df4f06ae0e3ae80bd967bf53cbec36e58b4a3bd (diff)
Merge feat/container-execution into master
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
-rw-r--r--CLAUDE.md52
-rw-r--r--docs/adr/005-sandbox-execution-model.md25
-rw-r--r--docs/adr/006-containerized-execution.md51
-rw-r--r--images/agent-base/Dockerfile58
-rw-r--r--images/agent-base/tools/ct210
-rw-r--r--internal/api/server.go7
-rw-r--r--internal/api/server_test.go82
-rw-r--r--internal/api/webhook.go17
-rw-r--r--internal/api/webhook_test.go8
-rw-r--r--internal/cli/run.go32
-rw-r--r--internal/cli/serve.go46
-rw-r--r--internal/config/config.go6
-rw-r--r--internal/executor/claude.go714
-rw-r--r--internal/executor/claude_test.go882
-rw-r--r--internal/executor/container.go380
-rw-r--r--internal/executor/container_test.go244
-rw-r--r--internal/executor/executor_test.go11
-rw-r--r--internal/executor/gemini.go228
-rw-r--r--internal/executor/gemini_test.go179
-rw-r--r--internal/executor/helpers.go174
-rw-r--r--internal/executor/stream_test.go25
-rw-r--r--internal/notify/vapid.go16
-rw-r--r--internal/notify/vapid_test.go21
-rw-r--r--internal/storage/db.go19
-rw-r--r--internal/task/task.go7
-rw-r--r--scripts/drain-failed-tasks22
-rw-r--r--web/app.js350
-rw-r--r--web/index.html13
-rw-r--r--web/test/tab-persistence.test.mjs58
-rw-r--r--web/test/task-panel-summary.test.mjs143
30 files changed, 1777 insertions, 2303 deletions
diff --git a/CLAUDE.md b/CLAUDE.md
index 2cb37a8..d804a96 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -53,14 +53,14 @@ Config defaults to `~/.claudomator/config.toml`. Data is stored in `~/.claudomat
## Architecture
-**Pipeline:** CLI/API → `executor.Pool` → `executor.ClaudeRunner` → `claude -p` subprocess → SQLite + log files
+**Pipeline:** CLI/API → `executor.Pool` → `executor.ContainerRunner` → Docker container → SQLite + log files
### Packages
| Package | Role |
|---|---|
| `internal/task` | `Task` struct, YAML parsing, state machine, validation |
-| `internal/executor` | `Pool` (bounded goroutine pool) + `ClaudeRunner` (subprocess manager) |
+| `internal/executor` | `Pool` (bounded goroutine pool) + `ContainerRunner` (Docker-based executor) |
| `internal/storage` | SQLite wrapper; stores tasks and execution records |
| `internal/api` | HTTP server (REST + WebSocket via `internal/api.Hub`) |
| `internal/reporter` | Formats and emits execution results |
@@ -72,9 +72,9 @@ Config defaults to `~/.claudomator/config.toml`. Data is stored in `~/.claudomat
**Task execution:**
1. Task created via `POST /api/tasks` or YAML file (`task.ParseFile`)
2. `POST /api/tasks/{id}/run` → `executor.Pool.Submit()` → goroutine in pool
-3. `ClaudeRunner.Run()` invokes `claude -p <instructions> --output-format stream-json`
-4. stdout streamed to `~/.claudomator/executions/<exec-id>/stdout.log`; cost parsed from stream-json
-5. Execution result written to SQLite; broadcast via WebSocket to connected clients
+3. `ContainerRunner.Run()` clones `repository_url`, runs `docker run claudomator-agent:latest`
+4. Agent runs `claude -p` inside the container; stdout streamed to `executions/<exec-id>/stdout.log`
+5. On success, runner pushes commits back to the remote; execution result written to SQLite + WebSocket broadcast
**State machine** (`task.ValidTransition`):
`PENDING` → `QUEUED` → `RUNNING` → `COMPLETED | FAILED | TIMED_OUT | CANCELLED | BUDGET_EXCEEDED`
@@ -166,6 +166,48 @@ A task is created for:
Tasks are tagged `["ci", "auto"]`, capped at $3 USD, and use tools: Read, Edit, Bash, Glob, Grep.
+## Agent Tooling (`ct` CLI)
+
+Agents running inside containers have access to `ct`, a pre-built CLI for interacting with the Claudomator API. It is installed at `/usr/local/bin/ct` in the container image. **Use `ct` to create and manage subtasks — do not attempt raw `curl` API calls.**
+
+### Environment (injected automatically)
+
+| Variable | Purpose |
+|---|---|
+| `CLAUDOMATOR_API_URL` | Base URL of the Claudomator API (e.g. `http://host.docker.internal:8484`) |
+| `CLAUDOMATOR_TASK_ID` | ID of the currently-running task; used as the default `parent_task_id` for new subtasks |
+
+### Commands
+
+```bash
+# Create a subtask and immediately queue it (returns task ID)
+ct task submit --name "Fix tests" --instructions "Run tests and fix any failures." [--model sonnet] [--budget 3.0]
+
+# Create, queue, and wait for completion (exits 0=COMPLETED, 1=FAILED, 2=BLOCKED)
+ct task submit --name "Fix tests" --instructions "..." --wait
+
+# Read instructions from a file instead of inline
+ct task submit --name "Fix tests" --file /workspace/subtask-instructions.txt --wait
+
+# Lower-level: create only (returns task ID), then run separately
+TASK_ID=$(ct task create --name "..." --instructions "...")
+ct task run "$TASK_ID"
+ct task wait "$TASK_ID" --timeout 600
+
+# Check status of any task
+ct task status <task-id>
+
+# List recent tasks
+ct task list
+```
+
+### Notes
+
+- Default model is `sonnet`; default budget is `$3.00 USD`. Override with `--model` / `--budget`.
+- `ct task wait` polls every 5 seconds and exits with the task's terminal state on stdout.
+- Subtasks inherit the current task as their parent automatically (via `$CLAUDOMATOR_TASK_ID`).
+- Override parent with `--parent <task-id>` if needed.
+
## ADRs
See `docs/adr/001-language-and-architecture.md` for the Go + SQLite + WebSocket rationale.
diff --git a/docs/adr/005-sandbox-execution-model.md b/docs/adr/005-sandbox-execution-model.md
index b374561..0c9ef14 100644
--- a/docs/adr/005-sandbox-execution-model.md
+++ b/docs/adr/005-sandbox-execution-model.md
@@ -1,7 +1,7 @@
# ADR-005: Git Sandbox Execution Model
## Status
-Accepted
+Superseded by [ADR-006](006-containerized-execution.md)
## Context
@@ -69,9 +69,13 @@ state), the sandbox is **not** torn down. The preserved sandbox allows the
resumed execution to pick up the same working tree state, including any
in-progress file changes made before the agent asked its question.
-Resume executions (`SubmitResume`) skip sandbox setup entirely and run
-directly in `project_dir`, passing `--resume <session-id>` to the agent
-so Claude can continue its previous conversation.
+**Known Risk: Resume skips sandbox.** Current implementation of
+Resume executions (`SubmitResume`) skips sandbox setup entirely and runs
+directly in `project_dir`. This is a significant behavioral divergence: if a
+resumed task makes further changes, they land directly in the canonical working
+copy, reintroducing the concurrent corruption and partial-work leak risks
+identified in the Context section. A future iteration should ensure resumed
+tasks pick up the preserved sandbox instead.
### Session ID propagation on resume
@@ -113,10 +117,15 @@ The fix is in `ClaudeRunner.Run`: if `e.ResumeSessionID != ""`, use it as
directory the server process inherited.
- If a sandbox's push repeatedly fails (e.g. due to a bare repo that is
itself broken), the task is failed with the sandbox preserved.
-- If `/tmp` runs out of space (many large sandboxes), tasks will fail at
- clone time. This is a known operational risk with no current mitigation.
-- The `project_dir` field in task YAML must point to a git repository with
- a configured `"local"` or `"origin"` remote that accepts pushes.
+- **If `/tmp` runs out of space** (many large sandboxes), tasks will fail at
+ clone time. This is a known operational risk. Mitigations such as periodic
+ cleanup of old sandboxes (cron) or pre-clone disk space checks are required
+ as follow-up items.
+- **The `project_dir` field in task YAML** must point to a git repository with
+ a configured `"local"` or `"origin"` remote that accepts pushes. If neither
+ remote exists or the push is rejected for other reasons, the task will be
+ marked as `FAILED` and the sandbox will be preserved for manual recovery.
+
## Relevant Code Locations
diff --git a/docs/adr/006-containerized-execution.md b/docs/adr/006-containerized-execution.md
new file mode 100644
index 0000000..cdd1cc2
--- /dev/null
+++ b/docs/adr/006-containerized-execution.md
@@ -0,0 +1,51 @@
+# ADR-006: Containerized Repository-Based Execution Model
+
+## Status
+Accepted (Supersedes ADR-005)
+
+## Context
+ADR-005 introduced a sandbox execution model based on local git clones and pushes back to a local project directory. While this provided isolation, it had several flaws identified during early adoption:
+1. **Host pollution**: Build dependencies (Node, Go, etc.) had to be installed on the host and were subject to permission issues (e.g., `/root/.nvm` access for `www-data`).
+2. **Fragile Pushes**: Pushing to a checked-out local branch is non-standard and requires risky git configs.
+3. **Resume Divergence**: Resumed tasks bypassed the sandbox, reintroducing corruption risks.
+4. **Scale**: Local directory-based "project selection" is a hack that doesn't scale to multiple repos or environments.
+
+## Decision
+We will move to a containerized execution model where projects are defined by canonical repository URLs and executed in isolated containers.
+
+### 1. Repository-Based Projects
+- The `Task` model now uses `RepositoryURL` as the source of truth for the codebase.
+- This replaces the fragile reliance on local `ProjectDir` paths.
+
+### 2. Containerized Sandboxes
+- Each task execution runs in a fresh container (Docker/Podman).
+- The runner clones the repository into a host-side temporary workspace and mounts it into the container.
+- The container provides a "bare system" with the full build stack (Node, Go, etc.) pre-installed, isolating the host from build dependencies.
+
+### 3. Unified Workspace Management (including RESUME)
+- Unlike ADR-005, the containerized model is designed to handle **Resume** by re-attaching to or re-mounting the same host-side workspace.
+- This ensures that resumed tasks **do not** bypass the sandbox and never land directly in a production directory.
+
+### 4. Push to Actual Remotes
+- Agents commit changes within the sandbox.
+- The runner pushes these commits directly to the `RepositoryURL` (actual remote).
+- If the remote is missing or the push fails, the task is marked `FAILED` and the host-side workspace is preserved for inspection.
+
+## Rationale
+- **Isolation**: Containers prevent host pollution and ensure a consistent build environment.
+- **Safety**: Repository URLs provide a standard way to manage codebases across environments.
+- **Consistency**: Unified workspace management for initial runs and resumes eliminates the behavioral divergence found in ADR-005.
+
+## Consequences
+- Requires a container runtime (Docker) on the host.
+- Requires pre-built agent images (e.g., `claudomator-agent:latest`).
+- **Disk Space Risk**: Host-side clones still consume `/tmp` space. Mitigation requires periodic cleanup of old workspaces or disk-space monitoring.
+- **Git Config**: Repositories no longer require `receive.denyCurrentBranch = updateInstead` because we push to the remote, not a local worktree.
+
+## Relevant Code Locations
+| Concern | File |
+|---|---|
+| Container Lifecycle | `internal/executor/container.go` |
+| Runner Registration | `internal/cli/serve.go` |
+| Task Model | `internal/task/task.go` |
+| API Integration | `internal/api/server.go` |
diff --git a/images/agent-base/Dockerfile b/images/agent-base/Dockerfile
new file mode 100644
index 0000000..0e8057c
--- /dev/null
+++ b/images/agent-base/Dockerfile
@@ -0,0 +1,58 @@
+# Claudomator Agent Base Image
+FROM ubuntu:24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Base system tools
+RUN apt-get update && apt-get install -y \
+ git \
+ curl \
+ make \
+ wget \
+ sqlite3 \
+ jq \
+ sudo \
+ ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+# Node.js 22 via NodeSource
+RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
+ && apt-get install -y nodejs \
+ && rm -rf /var/lib/apt/lists/*
+
+# Go 1.24
+RUN wget -q https://go.dev/dl/go1.24.1.linux-amd64.tar.gz && \
+ tar -C /usr/local -xzf go1.24.1.linux-amd64.tar.gz && \
+ rm go1.24.1.linux-amd64.tar.gz
+ENV PATH=$PATH:/usr/local/go/bin
+
+# Claude Code CLI
+RUN npm install -g @anthropic-ai/claude-code
+
+# Gemini CLI
+RUN npm install -g @google/gemini-cli
+
+# CSS build tools (for claudomator itself)
+RUN npm install -g postcss-cli tailwindcss autoprefixer
+
+# Git: allow operations on any directory (agents clone into /workspace/*)
+RUN git config --system safe.directory '*'
+
+# Claudomator agent CLI tools (ct)
+COPY tools/ct /usr/local/bin/ct
+RUN chmod +x /usr/local/bin/ct
+
+# Setup workspace
+WORKDIR /workspace
+
+# Agent user with passwordless sudo
+RUN useradd -m claudomator-agent && \
+ echo "claudomator-agent ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+USER claudomator-agent
+
+# Create a default empty config to satisfy the CLI if no mount is provided
+RUN mkdir -p /home/claudomator-agent/.claude && \
+ echo '{}' > /home/claudomator-agent/.claude.json
+
+CMD ["/bin/bash"]
diff --git a/images/agent-base/tools/ct b/images/agent-base/tools/ct
new file mode 100644
index 0000000..46d9613
--- /dev/null
+++ b/images/agent-base/tools/ct
@@ -0,0 +1,210 @@
+#!/bin/bash
+# ct - Claudomator CLI for agents running inside containers
+#
+# Usage:
+# ct task create --name "..." --instructions "..." # create subtask (parent auto-set)
+# ct task run <task-id> # queue a task for execution
+# ct task wait <task-id> [--timeout 300] # poll until done, print status
+# ct task status <task-id> # print current state
+# ct task list # list recent tasks
+#
+# Environment (injected by ContainerRunner):
+# CLAUDOMATOR_API_URL base URL of the Claudomator API
+# CLAUDOMATOR_TASK_ID ID of the currently running task (used as default parent)
+
+set -euo pipefail
+
+API="${CLAUDOMATOR_API_URL:-http://host.docker.internal:8484}"
+PARENT="${CLAUDOMATOR_TASK_ID:-}"
+
+_api() {
+ local method="$1"; shift
+ local path="$1"; shift
+ curl -sf -X "$method" "${API}${path}" \
+ -H "Content-Type: application/json" \
+ "$@"
+}
+
+_require() {
+ if ! command -v "$1" &>/dev/null; then
+ echo "ct: required tool '$1' not found" >&2
+ exit 1
+ fi
+}
+
+_require curl
+_require jq
+
+cmd_task_create() {
+ local name="" instructions="" instructions_file="" model="" budget="" parent="$PARENT"
+
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --name) name="$2"; shift 2 ;;
+ --instructions) instructions="$2"; shift 2 ;;
+ --file) instructions_file="$2"; shift 2 ;;
+ --model) model="$2"; shift 2 ;;
+ --budget) budget="$2"; shift 2 ;;
+ --parent) parent="$2"; shift 2 ;;
+ *) echo "ct task create: unknown flag $1" >&2; exit 1 ;;
+ esac
+ done
+
+ if [[ -z "$name" ]]; then
+ echo "ct task create: --name is required" >&2; exit 1
+ fi
+
+ if [[ -n "$instructions_file" ]]; then
+ instructions=$(cat "$instructions_file")
+ fi
+
+ if [[ -z "$instructions" ]]; then
+ echo "ct task create: --instructions or --file is required" >&2; exit 1
+ fi
+
+ local payload
+ payload=$(jq -n \
+ --arg name "$name" \
+ --arg instructions "$instructions" \
+ --arg parent "$parent" \
+ --arg model "${model:-sonnet}" \
+ --argjson budget "${budget:-3.0}" \
+ '{
+ name: $name,
+ parent_task_id: $parent,
+ agent: {
+ type: "claude",
+ model: $model,
+ instructions: $instructions,
+ max_budget_usd: $budget
+ }
+ }')
+
+ local response
+ response=$(_api POST /api/tasks -d "$payload")
+ local task_id
+ task_id=$(echo "$response" | jq -r '.id // empty')
+
+ if [[ -z "$task_id" ]]; then
+ echo "ct task create: API error: $(echo "$response" | jq -r '.error // .')" >&2
+ exit 1
+ fi
+
+ echo "$task_id"
+}
+
+cmd_task_run() {
+ local task_id="${1:-}"
+ if [[ -z "$task_id" ]]; then
+ echo "ct task run: task-id required" >&2; exit 1
+ fi
+
+ local response
+ response=$(_api POST "/api/tasks/${task_id}/run")
+ echo "$response" | jq -r '.message // .error // .'
+}
+
+cmd_task_wait() {
+ local task_id="${1:-}"
+ local timeout=300
+ shift || true
+
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --timeout) timeout="$2"; shift 2 ;;
+ *) echo "ct task wait: unknown flag $1" >&2; exit 1 ;;
+ esac
+ done
+
+ if [[ -z "$task_id" ]]; then
+ echo "ct task wait: task-id required" >&2; exit 1
+ fi
+
+ local deadline=$(( $(date +%s) + timeout ))
+ local interval=5
+
+ while true; do
+ local response
+ response=$(_api GET "/api/tasks/${task_id}" 2>/dev/null) || true
+
+ local state
+ state=$(echo "$response" | jq -r '.state // "UNKNOWN"')
+
+ case "$state" in
+ COMPLETED|FAILED|TIMED_OUT|CANCELLED|BUDGET_EXCEEDED)
+ echo "$state"
+ [[ "$state" == "COMPLETED" ]] && exit 0 || exit 1
+ ;;
+ BLOCKED)
+ echo "BLOCKED"
+ exit 2
+ ;;
+ esac
+
+ if [[ $(date +%s) -ge $deadline ]]; then
+ echo "ct task wait: timed out after ${timeout}s (state: $state)" >&2
+ exit 1
+ fi
+
+ sleep "$interval"
+ done
+}
+
+cmd_task_status() {
+ local task_id="${1:-}"
+ if [[ -z "$task_id" ]]; then
+ echo "ct task status: task-id required" >&2; exit 1
+ fi
+ _api GET "/api/tasks/${task_id}" | jq -r '.state'
+}
+
+cmd_task_list() {
+ _api GET "/api/tasks" | jq -r '.[] | "\(.state)\t\(.id)\t\(.name)"' | sort
+}
+
+# create-and-run shorthand: create a subtask and immediately queue it, then optionally wait
+cmd_task_submit() {
+ local wait=false
+ local args=()
+
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --wait) wait=true; shift ;;
+ *) args+=("$1"); shift ;;
+ esac
+ done
+
+ local task_id
+ task_id=$(cmd_task_create "${args[@]}")
+ cmd_task_run "$task_id" >/dev/null
+ echo "$task_id"
+
+ if $wait; then
+ cmd_task_wait "$task_id"
+ fi
+}
+
+# Dispatch
+if [[ $# -lt 2 ]]; then
+ echo "Usage: ct <resource> <command> [args...]"
+ echo " ct task create --name NAME --instructions TEXT [--file FILE] [--model MODEL] [--budget N]"
+ echo " ct task submit --name NAME --instructions TEXT [--wait]"
+ echo " ct task run <id>"
+ echo " ct task wait <id> [--timeout 300]"
+ echo " ct task status <id>"
+ echo " ct task list"
+ exit 1
+fi
+
+resource="$1"; shift
+command="$1"; shift
+
+case "${resource}/${command}" in
+ task/create) cmd_task_create "$@" ;;
+ task/run) cmd_task_run "$@" ;;
+ task/wait) cmd_task_wait "$@" ;;
+ task/status) cmd_task_status "$@" ;;
+ task/list) cmd_task_list ;;
+ task/submit) cmd_task_submit "$@" ;;
+ *) echo "ct: unknown command: ${resource} ${command}" >&2; exit 1 ;;
+esac
diff --git a/internal/api/server.go b/internal/api/server.go
index 48440e1..e5d0ba6 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -424,6 +424,7 @@ func (s *Server) handleCreateTask(w http.ResponseWriter, r *http.Request) {
Description string `json:"description"`
ElaborationInput string `json:"elaboration_input"`
Project string `json:"project"`
+ RepositoryURL string `json:"repository_url"`
Agent task.AgentConfig `json:"agent"`
Claude task.AgentConfig `json:"claude"` // legacy alias
Timeout string `json:"timeout"`
@@ -448,6 +449,7 @@ func (s *Server) handleCreateTask(w http.ResponseWriter, r *http.Request) {
Description: input.Description,
ElaborationInput: input.ElaborationInput,
Project: input.Project,
+ RepositoryURL: input.RepositoryURL,
Agent: input.Agent,
Priority: task.Priority(input.Priority),
Tags: input.Tags,
@@ -458,6 +460,11 @@ func (s *Server) handleCreateTask(w http.ResponseWriter, r *http.Request) {
UpdatedAt: now,
ParentTaskID: input.ParentTaskID,
}
+
+ // Fallback for repository_url if only provided in Agent config
+ if t.RepositoryURL == "" && input.Agent.ProjectDir != "" {
+ t.RepositoryURL = input.Agent.ProjectDir
+ }
if t.Agent.Type == "" {
t.Agent.Type = "claude"
}
diff --git a/internal/api/server_test.go b/internal/api/server_test.go
index 696aca3..8ff4227 100644
--- a/internal/api/server_test.go
+++ b/internal/api/server_test.go
@@ -16,6 +16,7 @@ import (
"context"
+ "github.com/google/uuid"
"github.com/thepeterstone/claudomator/internal/executor"
"github.com/thepeterstone/claudomator/internal/notify"
"github.com/thepeterstone/claudomator/internal/storage"
@@ -89,6 +90,9 @@ func testServerWithRunner(t *testing.T, runner executor.Runner) (*Server, *stora
t.Cleanup(func() { store.Close() })
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
+ if mr, ok := runner.(*mockRunner); ok {
+ mr.logDir = t.TempDir()
+ }
runners := map[string]executor.Runner{
"claude": runner,
"gemini": runner,
@@ -99,11 +103,39 @@ func testServerWithRunner(t *testing.T, runner executor.Runner) (*Server, *stora
}
type mockRunner struct {
- err error
- sleep time.Duration
+ err error
+ sleep time.Duration
+ logDir string
+ onRun func(*task.Task, *storage.Execution) error
}
-func (m *mockRunner) Run(ctx context.Context, _ *task.Task, _ *storage.Execution) error {
+func (m *mockRunner) ExecLogDir(execID string) string {
+ if m.logDir == "" {
+ return ""
+ }
+ return filepath.Join(m.logDir, execID)
+}
+
+func (m *mockRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
+ if e.ID == "" {
+ e.ID = uuid.New().String()
+ }
+ if m.logDir != "" {
+ dir := m.ExecLogDir(e.ID)
+ if err := os.MkdirAll(dir, 0755); err != nil {
+ return err
+ }
+ e.StdoutPath = filepath.Join(dir, "stdout.log")
+ e.StderrPath = filepath.Join(dir, "stderr.log")
+ e.ArtifactDir = dir
+ // Create an empty file at least
+ os.WriteFile(e.StdoutPath, []byte(""), 0644)
+ }
+ if m.onRun != nil {
+ if err := m.onRun(t, e); err != nil {
+ return err
+ }
+ }
if m.sleep > 0 {
select {
case <-time.After(m.sleep):
@@ -143,40 +175,26 @@ func testServerWithGeminiMockRunner(t *testing.T) (*Server, *storage.DB) {
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
- // Create the mock gemini binary script.
- mockBinDir := t.TempDir()
- mockGeminiPath := filepath.Join(mockBinDir, "mock-gemini-binary.sh")
- mockScriptContent := `#!/bin/bash
-OUTPUT_FILE=$(mktemp)
-echo "` + "```json" + `" > "$OUTPUT_FILE"
-echo "{\"type\":\"content_block_start\",\"content_block\":{\"text\":\"Hello, Gemini!\",\"type\":\"text\"}}" >> "$OUTPUT_FILE"
-echo "{\"type\":\"content_block_delta\",\"content_block\":{\"text\":\" How are you?\"}}" >> "$OUTPUT_FILE"
-echo "{\"type\":\"content_block_end\"}" >> "$OUTPUT_FILE"
-echo "{\"type\":\"message_delta\",\"message\":{\"role\":\"model\"}}" >> "$OUTPUT_FILE"
-echo "{\"type\":\"message_end\"}" >> "$OUTPUT_FILE"
-echo "` + "```" + `" >> "$OUTPUT_FILE"
-cat "$OUTPUT_FILE"
-rm "$OUTPUT_FILE"
-exit 0
-`
- if err := os.WriteFile(mockGeminiPath, []byte(mockScriptContent), 0755); err != nil {
- t.Fatalf("writing mock gemini script: %v", err)
- }
-
- // Configure GeminiRunner to use the mock script.
- geminiRunner := &executor.GeminiRunner{
- BinaryPath: mockGeminiPath,
- Logger: logger,
- LogDir: t.TempDir(), // Ensure log directory is temporary for test
- APIURL: "http://localhost:8080", // Placeholder, not used by this mock
+ mr := &mockRunner{
+ logDir: t.TempDir(),
+ onRun: func(t *task.Task, e *storage.Execution) error {
+ lines := []string{
+ `{"type":"content_block_start","content_block":{"text":"Hello, Gemini!","type":"text"}}`,
+ `{"type":"content_block_delta","content_block":{"text":" How are you?"}}`,
+ `{"type":"content_block_end"}`,
+ `{"type":"message_delta","message":{"role":"model"}}`,
+ `{"type":"message_end"}`,
+ }
+ return os.WriteFile(e.StdoutPath, []byte(strings.Join(lines, "\n")), 0644)
+ },
}
runners := map[string]executor.Runner{
- "claude": &mockRunner{}, // Keep mock for claude to not interfere
- "gemini": geminiRunner,
+ "claude": mr,
+ "gemini": mr,
}
pool := executor.NewPool(2, runners, store, logger)
- srv := NewServer(store, pool, logger, "claude", "gemini") // Pass original binary paths
+ srv := NewServer(store, pool, logger, "claude", "gemini")
return srv, store
}
diff --git a/internal/api/webhook.go b/internal/api/webhook.go
index 0530f3e..141224f 100644
--- a/internal/api/webhook.go
+++ b/internal/api/webhook.go
@@ -210,16 +210,17 @@ func (s *Server) createCIFailureTask(w http.ResponseWriter, repoName, fullName,
MaxBudgetUSD: 3.0,
AllowedTools: []string{"Read", "Edit", "Bash", "Glob", "Grep"},
},
- Priority: task.PriorityNormal,
- Tags: []string{"ci", "auto"},
- DependsOn: []string{},
- Retry: task.RetryConfig{MaxAttempts: 1, Backoff: "exponential"},
- State: task.StatePending,
- CreatedAt: now,
- UpdatedAt: now,
+ Priority: task.PriorityNormal,
+ Tags: []string{"ci", "auto"},
+ DependsOn: []string{},
+ Retry: task.RetryConfig{MaxAttempts: 1, Backoff: "exponential"},
+ State: task.StatePending,
+ CreatedAt: now,
+ UpdatedAt: now,
+ RepositoryURL: fmt.Sprintf("https://github.com/%s.git", fullName),
}
if project != nil {
- t.Agent.ProjectDir = project.Dir
+ t.Project = project.Name
}
if err := s.store.CreateTask(t); err != nil {
diff --git a/internal/api/webhook_test.go b/internal/api/webhook_test.go
index 1bc4aaa..0fc9664 100644
--- a/internal/api/webhook_test.go
+++ b/internal/api/webhook_test.go
@@ -124,8 +124,8 @@ func TestGitHubWebhook_CheckRunFailure_CreatesTask(t *testing.T) {
if !strings.Contains(tk.Name, "main") {
t.Errorf("task name %q does not contain branch", tk.Name)
}
- if tk.Agent.ProjectDir != "/workspace/myrepo" {
- t.Errorf("task project dir = %q, want /workspace/myrepo", tk.Agent.ProjectDir)
+ if tk.RepositoryURL != "https://github.com/owner/myrepo.git" {
+ t.Errorf("task repository url = %q, want https://github.com/owner/myrepo.git", tk.RepositoryURL)
}
if !contains(tk.Tags, "ci") || !contains(tk.Tags, "auto") {
t.Errorf("task tags %v missing expected ci/auto tags", tk.Tags)
@@ -375,8 +375,8 @@ func TestGitHubWebhook_FallbackToSingleProject(t *testing.T) {
if err != nil {
t.Fatalf("task not found: %v", err)
}
- if tk.Agent.ProjectDir != "/workspace/someapp" {
- t.Errorf("expected fallback to /workspace/someapp, got %q", tk.Agent.ProjectDir)
+ if tk.RepositoryURL != "https://github.com/owner/myrepo.git" {
+ t.Errorf("expected fallback repository url, got %q", tk.RepositoryURL)
}
}
diff --git a/internal/cli/run.go b/internal/cli/run.go
index 49aa28e..cfac893 100644
--- a/internal/cli/run.go
+++ b/internal/cli/run.go
@@ -72,18 +72,34 @@ func runTasks(file string, parallel int, dryRun bool) error {
logger := newLogger(verbose)
+ apiURL := "http://localhost" + cfg.ServerAddr
+ if len(cfg.ServerAddr) > 0 && cfg.ServerAddr[0] != ':' {
+ apiURL = "http://" + cfg.ServerAddr
+ }
+
runners := map[string]executor.Runner{
- "claude": &executor.ClaudeRunner{
- BinaryPath: cfg.ClaudeBinaryPath,
- Logger: logger,
- LogDir: cfg.LogDir,
+ "claude": &executor.ContainerRunner{
+ Image: cfg.ClaudeImage,
+ Logger: logger,
+ LogDir: cfg.LogDir,
+ APIURL: apiURL,
+ DropsDir: cfg.DropsDir,
+ SSHAuthSock: cfg.SSHAuthSock,
+ ClaudeBinary: cfg.ClaudeBinaryPath,
+ GeminiBinary: cfg.GeminiBinaryPath,
},
- "gemini": &executor.GeminiRunner{
- BinaryPath: cfg.GeminiBinaryPath,
- Logger: logger,
- LogDir: cfg.LogDir,
+ "gemini": &executor.ContainerRunner{
+ Image: cfg.GeminiImage,
+ Logger: logger,
+ LogDir: cfg.LogDir,
+ APIURL: apiURL,
+ DropsDir: cfg.DropsDir,
+ SSHAuthSock: cfg.SSHAuthSock,
+ ClaudeBinary: cfg.ClaudeBinaryPath,
+ GeminiBinary: cfg.GeminiBinaryPath,
},
}
+
pool := executor.NewPool(parallel, runners, store, logger)
if cfg.GeminiBinaryPath != "" {
pool.Classifier = &executor.Classifier{GeminiBinaryPath: cfg.GeminiBinaryPath}
diff --git a/internal/cli/serve.go b/internal/cli/serve.go
index efac719..98e7524 100644
--- a/internal/cli/serve.go
+++ b/internal/cli/serve.go
@@ -35,6 +35,8 @@ func newServeCmd() *cobra.Command {
cmd.Flags().StringVar(&addr, "addr", ":8484", "listen address")
cmd.Flags().StringVar(&workspaceRoot, "workspace-root", "/workspace", "root directory for listing workspaces")
+ cmd.Flags().StringVar(&cfg.ClaudeImage, "claude-image", cfg.ClaudeImage, "docker image for claude agents")
+ cmd.Flags().StringVar(&cfg.GeminiImage, "gemini-image", cfg.GeminiImage, "docker image for gemini agents")
return cmd
}
@@ -54,7 +56,7 @@ func serve(addr string) error {
if cfg.VAPIDPublicKey == "" || cfg.VAPIDPrivateKey == "" {
pub, _ := store.GetSetting("vapid_public_key")
priv, _ := store.GetSetting("vapid_private_key")
- if pub == "" || priv == "" {
+ if pub == "" || priv == "" || !notify.ValidateVAPIDPublicKey(pub) {
pub, priv, err = notify.GenerateVAPIDKeys()
if err != nil {
return fmt.Errorf("generating VAPID keys: %w", err)
@@ -73,20 +75,38 @@ func serve(addr string) error {
apiURL = "http://" + addr
}
+ // Resolve the claude config dir from HOME so the container can mount credentials.
+ claudeConfigDir := filepath.Join(os.Getenv("HOME"), ".claude")
+
runners := map[string]executor.Runner{
- "claude": &executor.ClaudeRunner{
- BinaryPath: cfg.ClaudeBinaryPath,
- Logger: logger,
- LogDir: cfg.LogDir,
- APIURL: apiURL,
- DropsDir: cfg.DropsDir,
+ // ContainerRunner: binaries are resolved via PATH inside the container image,
+ // so ClaudeBinary/GeminiBinary are left empty (host paths would not exist inside).
+ "claude": &executor.ContainerRunner{
+ Image: cfg.ClaudeImage,
+ Logger: logger,
+ LogDir: cfg.LogDir,
+ APIURL: apiURL,
+ DropsDir: cfg.DropsDir,
+ SSHAuthSock: cfg.SSHAuthSock,
+ ClaudeConfigDir: claudeConfigDir,
+ },
+ "gemini": &executor.ContainerRunner{
+ Image: cfg.GeminiImage,
+ Logger: logger,
+ LogDir: cfg.LogDir,
+ APIURL: apiURL,
+ DropsDir: cfg.DropsDir,
+ SSHAuthSock: cfg.SSHAuthSock,
+ ClaudeConfigDir: claudeConfigDir,
},
- "gemini": &executor.GeminiRunner{
- BinaryPath: cfg.GeminiBinaryPath,
- Logger: logger,
- LogDir: cfg.LogDir,
- APIURL: apiURL,
- DropsDir: cfg.DropsDir,
+ "container": &executor.ContainerRunner{
+ Image: "claudomator-agent:latest",
+ Logger: logger,
+ LogDir: cfg.LogDir,
+ APIURL: apiURL,
+ DropsDir: cfg.DropsDir,
+ SSHAuthSock: cfg.SSHAuthSock,
+ ClaudeConfigDir: claudeConfigDir,
},
}
diff --git a/internal/config/config.go b/internal/config/config.go
index a3c37fb..fa76b1b 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -20,8 +20,11 @@ type Config struct {
DBPath string `toml:"-"`
LogDir string `toml:"-"`
DropsDir string `toml:"-"`
+ SSHAuthSock string `toml:"ssh_auth_sock"`
ClaudeBinaryPath string `toml:"claude_binary_path"`
GeminiBinaryPath string `toml:"gemini_binary_path"`
+ ClaudeImage string `toml:"claude_image"`
+ GeminiImage string `toml:"gemini_image"`
MaxConcurrent int `toml:"max_concurrent"`
DefaultTimeout string `toml:"default_timeout"`
ServerAddr string `toml:"server_addr"`
@@ -48,8 +51,11 @@ func Default() (*Config, error) {
DBPath: filepath.Join(dataDir, "claudomator.db"),
LogDir: filepath.Join(dataDir, "executions"),
DropsDir: filepath.Join(dataDir, "drops"),
+ SSHAuthSock: os.Getenv("SSH_AUTH_SOCK"),
ClaudeBinaryPath: "claude",
GeminiBinaryPath: "gemini",
+ ClaudeImage: "claudomator-agent:latest",
+ GeminiImage: "claudomator-agent:latest",
MaxConcurrent: 3,
DefaultTimeout: "15m",
ServerAddr: ":8484",
diff --git a/internal/executor/claude.go b/internal/executor/claude.go
deleted file mode 100644
index 6346aa8..0000000
--- a/internal/executor/claude.go
+++ /dev/null
@@ -1,714 +0,0 @@
-package executor
-
-import (
- "bufio"
- "context"
- "encoding/json"
- "fmt"
- "io"
- "log/slog"
- "os"
- "os/exec"
- "path/filepath"
- "strings"
- "sync"
- "syscall"
- "time"
-
- "github.com/thepeterstone/claudomator/internal/storage"
- "github.com/thepeterstone/claudomator/internal/task"
-)
-
-// ClaudeRunner spawns the `claude` CLI in non-interactive mode.
-type ClaudeRunner struct {
- BinaryPath string // defaults to "claude"
- Logger *slog.Logger
- LogDir string // base directory for execution logs
- APIURL string // base URL of the Claudomator API, passed to subprocesses
- DropsDir string // path to the drops directory, passed to subprocesses
-}
-
-// BlockedError is returned by Run when the agent wrote a question file and exited.
-// The pool transitions the task to BLOCKED and stores the question for the user.
-type BlockedError struct {
- QuestionJSON string // raw JSON from the question file
- SessionID string // claude session to resume once the user answers
- SandboxDir string // preserved sandbox path; resume must run here so Claude finds its session files
-}
-
-func (e *BlockedError) Error() string { return fmt.Sprintf("task blocked: %s", e.QuestionJSON) }
-
-// ExecLogDir returns the log directory for the given execution ID.
-// Implements LogPather so the pool can persist paths before execution starts.
-func (r *ClaudeRunner) ExecLogDir(execID string) string {
- if r.LogDir == "" {
- return ""
- }
- return filepath.Join(r.LogDir, execID)
-}
-
-func (r *ClaudeRunner) binaryPath() string {
- if r.BinaryPath != "" {
- return r.BinaryPath
- }
- return "claude"
-}
-
-// Run executes a claude -p invocation, streaming output to log files.
-// It retries up to 3 times on rate-limit errors using exponential backoff.
-// If the agent writes a question file and exits, Run returns *BlockedError.
-//
-// When project_dir is set and this is not a resume execution, Run clones the
-// project into a temp sandbox, runs the agent there, then merges committed
-// changes back to project_dir. On failure the sandbox is preserved and its
-// path is included in the error.
-func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
- projectDir := t.Agent.ProjectDir
-
- // Validate project_dir exists when set.
- if projectDir != "" {
- if _, err := os.Stat(projectDir); err != nil {
- return fmt.Errorf("project_dir %q: %w", projectDir, err)
- }
- }
-
- // Setup log directory once; retries overwrite the log files.
- logDir := r.ExecLogDir(e.ID)
- if logDir == "" {
- logDir = e.ID // fallback for tests without LogDir set
- }
- if err := os.MkdirAll(logDir, 0700); err != nil {
- return fmt.Errorf("creating log dir: %w", err)
- }
- if e.StdoutPath == "" {
- e.StdoutPath = filepath.Join(logDir, "stdout.log")
- e.StderrPath = filepath.Join(logDir, "stderr.log")
- e.ArtifactDir = logDir
- }
-
- // Pre-assign session ID so we can resume after a BLOCKED state.
- // For resume executions, the claude session continues under the original
- // session ID (the one passed to --resume). Using the new exec's own UUID
- // would cause a second block-and-resume cycle to pass the wrong --resume
- // argument.
- if e.SessionID == "" {
- if e.ResumeSessionID != "" {
- e.SessionID = e.ResumeSessionID
- } else {
- e.SessionID = e.ID // reuse execution UUID as session UUID (both are UUIDs)
- }
- }
-
- // For new (non-resume) executions with a project_dir, clone into a sandbox.
- // Resume executions run in the preserved sandbox (e.SandboxDir) so Claude
- // finds its session files under the same project slug. If no sandbox was
- // preserved (e.g. task had no project_dir), fall back to project_dir.
- var sandboxDir string
- var startHEAD string
- effectiveWorkingDir := projectDir
- if e.ResumeSessionID != "" {
- if e.SandboxDir != "" {
- if _, statErr := os.Stat(e.SandboxDir); statErr == nil {
- effectiveWorkingDir = e.SandboxDir
- } else {
- // Preserved sandbox was cleaned up (e.g. /tmp purge after reboot).
- // Clone a fresh sandbox so the task can run rather than fail immediately.
- r.Logger.Warn("preserved sandbox missing, cloning fresh", "sandbox", e.SandboxDir, "project_dir", projectDir)
- e.SandboxDir = ""
- if projectDir != "" {
- var err error
- sandboxDir, err = setupSandbox(t.Agent.ProjectDir, r.Logger)
- if err != nil {
- return fmt.Errorf("setting up sandbox: %w", err)
- }
-
- effectiveWorkingDir = sandboxDir
- r.Logger.Info("fresh sandbox created for resume", "sandbox", sandboxDir, "project_dir", projectDir)
- }
- }
- }
- } else if projectDir != "" {
- var err error
- sandboxDir, err = setupSandbox(t.Agent.ProjectDir, r.Logger)
- if err != nil {
- return fmt.Errorf("setting up sandbox: %w", err)
- }
-
- effectiveWorkingDir = sandboxDir
- r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir)
- }
-
- if effectiveWorkingDir != "" {
- // Capture the initial HEAD so we can identify new commits later.
- headOut, _ := exec.Command("git", gitSafe("-C", effectiveWorkingDir, "rev-parse", "HEAD")...).Output()
- startHEAD = strings.TrimSpace(string(headOut))
- }
-
- questionFile := filepath.Join(logDir, "question.json")
- args := r.buildArgs(t, e, questionFile)
-
- attempt := 0
- err := runWithBackoff(ctx, 3, 5*time.Second, func() error {
- if attempt > 0 {
- delay := 5 * time.Second * (1 << (attempt - 1))
- r.Logger.Warn("rate-limited by Claude API, retrying",
- "attempt", attempt,
- "delay", delay,
- )
- }
- attempt++
- return r.execOnce(ctx, args, effectiveWorkingDir, projectDir, e)
- })
- if err != nil {
- if sandboxDir != "" {
- return fmt.Errorf("%w (sandbox preserved at %s)", err, sandboxDir)
- }
- return err
- }
-
- // Check whether the agent left a question before exiting.
- data, readErr := os.ReadFile(questionFile)
- if readErr == nil {
- os.Remove(questionFile) // consumed
- questionJSON := strings.TrimSpace(string(data))
- // If the agent wrote a completion report instead of a real question,
- // extract the text as the summary and fall through to normal completion.
- if isCompletionReport(questionJSON) {
- r.Logger.Info("treating question file as completion report", "taskID", e.TaskID)
- e.Summary = extractQuestionText(questionJSON)
- } else {
- // Preserve sandbox on BLOCKED — agent may have partial work and its
- // Claude session files are stored under the sandbox's project slug.
- // The resume execution must run in the same directory.
- return &BlockedError{QuestionJSON: questionJSON, SessionID: e.SessionID, SandboxDir: sandboxDir}
- }
- }
-
- // Read agent summary if written.
- summaryFile := filepath.Join(logDir, "summary.txt")
- if summaryData, readErr := os.ReadFile(summaryFile); readErr == nil {
- os.Remove(summaryFile) // consumed
- e.Summary = strings.TrimSpace(string(summaryData))
- }
-
- // Merge sandbox back to project_dir and clean up.
- if sandboxDir != "" {
- if mergeErr := teardownSandbox(projectDir, sandboxDir, startHEAD, r.Logger, e); mergeErr != nil {
- return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir)
- }
- }
- return nil
-}
-
-// isCompletionReport returns true when a question-file JSON looks like a
-// completion report rather than a real user question. Heuristic: no options
-// (or empty options) and no "?" anywhere in the text.
-func isCompletionReport(questionJSON string) bool {
- var q struct {
- Text string `json:"text"`
- Options []string `json:"options"`
- }
- if err := json.Unmarshal([]byte(questionJSON), &q); err != nil {
- return false
- }
- return len(q.Options) == 0 && !strings.Contains(q.Text, "?")
-}
-
-// extractQuestionText returns the "text" field from a question-file JSON, or
-// the raw string if parsing fails.
-func extractQuestionText(questionJSON string) string {
- var q struct {
- Text string `json:"text"`
- }
- if err := json.Unmarshal([]byte(questionJSON), &q); err != nil {
- return questionJSON
- }
- return strings.TrimSpace(q.Text)
-}
-
-// gitSafe returns git arguments that prepend "-c safe.directory=*" so that
-// commands succeed regardless of the repository owner. This is needed when
-// claudomator operates on project directories owned by a different OS user.
-func gitSafe(args ...string) []string {
- return append([]string{"-c", "safe.directory=*"}, args...)
-}
-
-// sandboxCloneSource returns the URL to clone the sandbox from. It prefers a
-// remote named "local" (a local bare repo that accepts pushes cleanly), then
-// falls back to "origin", then to the working copy path itself.
-func sandboxCloneSource(projectDir string) string {
- // Prefer "local" remote, but only if it points to a local path (accepts pushes).
- if out, err := exec.Command("git", gitSafe("-C", projectDir, "remote", "get-url", "local")...).Output(); err == nil {
- u := strings.TrimSpace(string(out))
- if u != "" && (strings.HasPrefix(u, "/") || strings.HasPrefix(u, "file://")) {
- return u
- }
- }
- // Fall back to "origin" — any URL scheme is acceptable for cloning.
- if out, err := exec.Command("git", gitSafe("-C", projectDir, "remote", "get-url", "origin")...).Output(); err == nil {
- if u := strings.TrimSpace(string(out)); u != "" {
- return u
- }
- }
- return projectDir
-}
-
-// setupSandbox prepares a temporary git clone of projectDir.
-// If projectDir is not a git repo it is initialised with an initial commit first.
-func setupSandbox(projectDir string, logger *slog.Logger) (string, error) {
- // Ensure projectDir is a git repo; initialise if not.
- if err := exec.Command("git", gitSafe("-C", projectDir, "rev-parse", "--git-dir")...).Run(); err != nil {
- cmds := [][]string{
- gitSafe("-C", projectDir, "init"),
- gitSafe("-C", projectDir, "add", "-A"),
- gitSafe("-C", projectDir, "commit", "--allow-empty", "-m", "chore: initial commit"),
- }
- for _, args := range cmds {
- if out, err := exec.Command("git", args...).CombinedOutput(); err != nil { //nolint:gosec
- return "", fmt.Errorf("git init %s: %w\n%s", projectDir, err, out)
- }
- }
- }
-
- src := sandboxCloneSource(projectDir)
-
- tempDir, err := os.MkdirTemp("", "claudomator-sandbox-*")
- if err != nil {
- return "", fmt.Errorf("creating sandbox dir: %w", err)
- }
- // git clone requires the target to not exist; remove the placeholder first.
- if err := os.Remove(tempDir); err != nil {
- return "", fmt.Errorf("removing temp dir placeholder: %w", err)
- }
- out, err := exec.Command("git", gitSafe("clone", "--no-hardlinks", src, tempDir)...).CombinedOutput()
- if err != nil {
- return "", fmt.Errorf("git clone: %w\n%s", err, out)
- }
- return tempDir, nil
-}
-
-// teardownSandbox verifies the sandbox is clean and pushes new commits to the
-// canonical bare repo. If the push is rejected because another task pushed
-// concurrently, it fetches and rebases then retries once.
-//
-// The working copy (projectDir) is NOT updated automatically — it is the
-// developer's workspace and is pulled manually. This avoids permission errors
-// from mixed-owner .git/objects directories.
-func teardownSandbox(projectDir, sandboxDir, startHEAD string, logger *slog.Logger, execRecord *storage.Execution) error {
- // Automatically commit uncommitted changes.
- out, err := exec.Command("git", "-C", sandboxDir, "status", "--porcelain").Output()
- if err != nil {
- return fmt.Errorf("git status: %w", err)
- }
- if len(strings.TrimSpace(string(out))) > 0 {
- logger.Info("autocommitting uncommitted changes", "sandbox", sandboxDir)
-
- // Run build before autocommitting.
- if _, err := os.Stat(filepath.Join(sandboxDir, "Makefile")); err == nil {
- logger.Info("running 'make build' before autocommit", "sandbox", sandboxDir)
- if buildOut, buildErr := exec.Command("make", "-C", sandboxDir, "build").CombinedOutput(); buildErr != nil {
- return fmt.Errorf("build failed before autocommit: %w\n%s", buildErr, buildOut)
- }
- } else if _, err := os.Stat(filepath.Join(sandboxDir, "gradlew")); err == nil {
- logger.Info("running './gradlew build' before autocommit", "sandbox", sandboxDir)
- cmd := exec.Command("./gradlew", "build")
- cmd.Dir = sandboxDir
- if buildOut, buildErr := cmd.CombinedOutput(); buildErr != nil {
- return fmt.Errorf("build failed before autocommit: %w\n%s", buildErr, buildOut)
- }
- } else if _, err := os.Stat(filepath.Join(sandboxDir, "go.mod")); err == nil {
- logger.Info("running 'go build ./...' before autocommit", "sandbox", sandboxDir)
- cmd := exec.Command("go", "build", "./...")
- cmd.Dir = sandboxDir
- if buildOut, buildErr := cmd.CombinedOutput(); buildErr != nil {
- return fmt.Errorf("build failed before autocommit: %w\n%s", buildErr, buildOut)
- }
- }
-
- cmds := [][]string{
- gitSafe("-C", sandboxDir, "add", "-A"),
- gitSafe("-C", sandboxDir, "commit", "-m", "chore: autocommit uncommitted changes"),
- }
- for _, args := range cmds {
- if out, err := exec.Command("git", args...).CombinedOutput(); err != nil {
- return fmt.Errorf("autocommit failed (%v): %w\n%s", args, err, out)
- }
- }
- }
-
- // Capture commits before pushing/deleting.
- // Use startHEAD..HEAD to find all commits made during this execution.
- logRange := "origin/HEAD..HEAD"
- if startHEAD != "" && startHEAD != "HEAD" {
- logRange = startHEAD + "..HEAD"
- }
-
- logCmd := exec.Command("git", gitSafe("-C", sandboxDir, "log", logRange, "--pretty=format:%H|%s")...)
- logOut, logErr := logCmd.CombinedOutput()
- if logErr == nil {
- lines := strings.Split(strings.TrimSpace(string(logOut)), "\n")
- logger.Debug("captured commits", "count", len(lines), "range", logRange)
- for _, line := range lines {
- if line == "" {
- continue
- }
- parts := strings.SplitN(line, "|", 2)
- if len(parts) == 2 {
- execRecord.Commits = append(execRecord.Commits, task.GitCommit{
- Hash: parts[0],
- Message: parts[1],
- })
- }
- }
- } else {
- logger.Warn("failed to capture commits", "err", logErr, "range", logRange, "output", string(logOut))
- }
-
- // Check whether there are any new commits to push.
- ahead, err := exec.Command("git", gitSafe("-C", sandboxDir, "rev-list", "--count", logRange)...).Output()
- if err != nil {
- logger.Warn("could not determine commits ahead of origin; proceeding", "err", err, "range", logRange)
- }
- if strings.TrimSpace(string(ahead)) == "0" {
- os.RemoveAll(sandboxDir)
- return nil
- }
-
- // Push from sandbox → bare repo (sandbox's origin is the bare repo).
- if out, err := exec.Command("git", "-C", sandboxDir, "push", "origin", "HEAD").CombinedOutput(); err != nil {
- // If rejected due to concurrent push, fetch+rebase and retry once.
- if strings.Contains(string(out), "fetch first") || strings.Contains(string(out), "non-fast-forward") {
- logger.Info("push rejected (concurrent task); rebasing and retrying", "sandbox", sandboxDir)
- if out2, err2 := exec.Command("git", "-C", sandboxDir, "pull", "--rebase", "origin", "master").CombinedOutput(); err2 != nil {
- return fmt.Errorf("git rebase before retry push: %w\n%s", err2, out2)
- }
- // Re-capture commits after rebase (hashes might have changed)
- execRecord.Commits = nil
- logOut, logErr = exec.Command("git", "-C", sandboxDir, "log", logRange, "--pretty=format:%H|%s").Output()
- if logErr == nil {
- lines := strings.Split(strings.TrimSpace(string(logOut)), "\n")
- for _, line := range lines {
- parts := strings.SplitN(line, "|", 2)
- if len(parts) == 2 {
- execRecord.Commits = append(execRecord.Commits, task.GitCommit{
- Hash: parts[0],
- Message: parts[1],
- })
- }
- }
- }
-
- if out3, err3 := exec.Command("git", "-C", sandboxDir, "push", "origin", "HEAD").CombinedOutput(); err3 != nil {
- return fmt.Errorf("git push to origin (after rebase): %w\n%s", err3, out3)
- }
- } else {
- return fmt.Errorf("git push to origin: %w\n%s", err, out)
- }
- }
-
- logger.Info("sandbox pushed to bare repo", "sandbox", sandboxDir)
- os.RemoveAll(sandboxDir)
- return nil
-}
-
-// execOnce runs the claude subprocess once, streaming output to e's log paths.
-func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir, projectDir string, e *storage.Execution) error {
- cmd := exec.CommandContext(ctx, r.binaryPath(), args...)
- cmd.Env = append(os.Environ(),
- "CLAUDOMATOR_API_URL="+r.APIURL,
- "CLAUDOMATOR_TASK_ID="+e.TaskID,
- "CLAUDOMATOR_PROJECT_DIR="+projectDir,
- "CLAUDOMATOR_QUESTION_FILE="+filepath.Join(e.ArtifactDir, "question.json"),
- "CLAUDOMATOR_SUMMARY_FILE="+filepath.Join(e.ArtifactDir, "summary.txt"),
- "CLAUDOMATOR_DROP_DIR="+r.DropsDir,
- )
- // Put the subprocess in its own process group so we can SIGKILL the entire
- // group (MCP servers, bash children, etc.) on cancellation.
- cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
- if workingDir != "" {
- cmd.Dir = workingDir
- }
-
- stdoutFile, err := os.Create(e.StdoutPath)
- if err != nil {
- return fmt.Errorf("creating stdout log: %w", err)
- }
- defer stdoutFile.Close()
-
- stderrFile, err := os.Create(e.StderrPath)
- if err != nil {
- return fmt.Errorf("creating stderr log: %w", err)
- }
- defer stderrFile.Close()
-
- // Use os.Pipe for stdout so we own the read-end lifetime.
- // cmd.StdoutPipe() would add the read-end to closeAfterWait, causing
- // cmd.Wait() to close it before our goroutine finishes reading.
- stdoutR, stdoutW, err := os.Pipe()
- if err != nil {
- return fmt.Errorf("creating stdout pipe: %w", err)
- }
- cmd.Stdout = stdoutW // *os.File — not added to closeAfterStart/Wait
- cmd.Stderr = stderrFile
-
- if err := cmd.Start(); err != nil {
- stdoutW.Close()
- stdoutR.Close()
- return fmt.Errorf("starting claude: %w", err)
- }
- // Close our write-end immediately; the subprocess holds its own copy.
- // The goroutine below gets EOF when the subprocess exits.
- stdoutW.Close()
-
- // killDone is closed when cmd.Wait() returns, stopping the pgid-kill goroutine.
- //
- // Safety: this goroutine cannot block indefinitely. The select has two arms:
- // • ctx.Done() — fires if the caller cancels (e.g. timeout, user cancel).
- // The goroutine sends SIGKILL and exits immediately.
- // • killDone — closed by close(killDone) below, immediately after cmd.Wait()
- // returns. This fires when the process exits for any reason (natural exit,
- // SIGKILL from the ctx arm, or any other signal). The goroutine exits without
- // doing anything.
- //
- // Therefore: for a task that completes normally with a long-lived (non-cancelled)
- // context, the killDone arm fires and the goroutine exits. There is no path where
- // this goroutine outlives execOnce().
- killDone := make(chan struct{})
- go func() {
- select {
- case <-ctx.Done():
- // SIGKILL the entire process group to reap orphan children.
- syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
- case <-killDone:
- }
- }()
-
- // Stream stdout to the log file and parse cost/errors.
- // wg ensures costUSD and streamErr are fully written before we read them after cmd.Wait().
- var costUSD float64
- var streamErr error
- var wg sync.WaitGroup
- wg.Add(1)
- go func() {
- defer wg.Done()
- costUSD, streamErr = parseStream(stdoutR, stdoutFile, r.Logger)
- stdoutR.Close()
- }()
-
- waitErr := cmd.Wait()
- close(killDone) // stop the pgid-kill goroutine
- wg.Wait() // drain remaining stdout before reading costUSD/streamErr
-
- e.CostUSD = costUSD
-
- if waitErr != nil {
- if exitErr, ok := waitErr.(*exec.ExitError); ok {
- e.ExitCode = exitErr.ExitCode()
- }
- // If the stream captured a rate-limit or quota message, return it
- // so callers can distinguish it from a generic exit-status failure.
- if isRateLimitError(streamErr) || isQuotaExhausted(streamErr) {
- return streamErr
- }
- if tail := tailFile(e.StderrPath, 20); tail != "" {
- return fmt.Errorf("claude exited with error: %w\nstderr:\n%s", waitErr, tail)
- }
- return fmt.Errorf("claude exited with error: %w", waitErr)
- }
-
- e.ExitCode = 0
- if streamErr != nil {
- return streamErr
- }
- return nil
-}
-
-func (r *ClaudeRunner) buildArgs(t *task.Task, e *storage.Execution, questionFile string) []string {
- // Resume execution: the agent already has context; just deliver the answer.
- if e.ResumeSessionID != "" {
- args := []string{
- "-p", e.ResumeAnswer,
- "--resume", e.ResumeSessionID,
- "--output-format", "stream-json",
- "--verbose",
- }
- permMode := t.Agent.PermissionMode
- if permMode == "" {
- permMode = "bypassPermissions"
- }
- args = append(args, "--permission-mode", permMode)
- if t.Agent.Model != "" {
- args = append(args, "--model", t.Agent.Model)
- }
- return args
- }
-
- instructions := t.Agent.Instructions
- allowedTools := t.Agent.AllowedTools
-
- if !t.Agent.SkipPlanning {
- instructions = withPlanningPreamble(instructions)
- // Ensure Bash is available so the agent can POST subtasks and ask questions.
- hasBash := false
- for _, tool := range allowedTools {
- if tool == "Bash" {
- hasBash = true
- break
- }
- }
- if !hasBash {
- allowedTools = append(allowedTools, "Bash")
- }
- }
-
- args := []string{
- "-p", instructions,
- "--session-id", e.SessionID,
- "--output-format", "stream-json",
- "--verbose",
- }
-
- if t.Agent.Model != "" {
- args = append(args, "--model", t.Agent.Model)
- }
- if t.Agent.MaxBudgetUSD > 0 {
- args = append(args, "--max-budget-usd", fmt.Sprintf("%.2f", t.Agent.MaxBudgetUSD))
- }
- // Default to bypassPermissions — claudomator runs tasks unattended, so
- // prompting for write access would always stall execution. Tasks that need
- // a more restrictive mode can set permission_mode explicitly.
- permMode := t.Agent.PermissionMode
- if permMode == "" {
- permMode = "bypassPermissions"
- }
- args = append(args, "--permission-mode", permMode)
- if t.Agent.SystemPromptAppend != "" {
- args = append(args, "--append-system-prompt", t.Agent.SystemPromptAppend)
- }
- for _, tool := range allowedTools {
- args = append(args, "--allowedTools", tool)
- }
- for _, tool := range t.Agent.DisallowedTools {
- args = append(args, "--disallowedTools", tool)
- }
- for _, f := range t.Agent.ContextFiles {
- args = append(args, "--add-dir", f)
- }
- args = append(args, t.Agent.AdditionalArgs...)
-
- return args
-}
-
-// parseStream reads streaming JSON from claude, writes to w, and returns
-// (costUSD, error). error is non-nil if the stream signals task failure:
-// - result message has is_error:true
-// - a tool_result was denied due to missing permissions
-func parseStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, error) {
- tee := io.TeeReader(r, w)
- scanner := bufio.NewScanner(tee)
- scanner.Buffer(make([]byte, 1024*1024), 1024*1024) // 1MB buffer for large lines
-
- var totalCost float64
- var streamErr error
-
- for scanner.Scan() {
- line := scanner.Bytes()
- var msg map[string]interface{}
- if err := json.Unmarshal(line, &msg); err != nil {
- continue
- }
-
- msgType, _ := msg["type"].(string)
- switch msgType {
- case "rate_limit_event":
- if info, ok := msg["rate_limit_info"].(map[string]interface{}); ok {
- status, _ := info["status"].(string)
- if status == "rejected" {
- streamErr = fmt.Errorf("claude rate limit reached (rejected): %v", msg)
- // Immediately break since we can't continue anyway
- break
- }
- }
- case "assistant":
- if errStr, ok := msg["error"].(string); ok && errStr == "rate_limit" {
- streamErr = fmt.Errorf("claude rate limit reached: %v", msg)
- }
- case "result":
- if isErr, _ := msg["is_error"].(bool); isErr {
- result, _ := msg["result"].(string)
- if result != "" {
- streamErr = fmt.Errorf("claude task failed: %s", result)
- } else {
- streamErr = fmt.Errorf("claude task failed (is_error=true in result)")
- }
- }
- // Prefer total_cost_usd from result message; fall through to legacy check below.
- if cost, ok := msg["total_cost_usd"].(float64); ok {
- totalCost = cost
- }
- case "user":
- // Detect permission-denial tool_results. These occur when permission_mode
- // is not bypassPermissions and claude exits 0 without completing its task.
- if err := permissionDenialError(msg); err != nil && streamErr == nil {
- streamErr = err
- }
- }
-
- // Legacy cost field used by older claude versions.
- if cost, ok := msg["cost_usd"].(float64); ok {
- totalCost = cost
- }
- }
-
- return totalCost, streamErr
-}
-
-// permissionDenialError inspects a "user" stream message for tool_result entries
-// that were denied due to missing permissions. Returns an error if found.
-func permissionDenialError(msg map[string]interface{}) error {
- message, ok := msg["message"].(map[string]interface{})
- if !ok {
- return nil
- }
- content, ok := message["content"].([]interface{})
- if !ok {
- return nil
- }
- for _, item := range content {
- itemMap, ok := item.(map[string]interface{})
- if !ok {
- continue
- }
- if itemMap["type"] != "tool_result" {
- continue
- }
- if isErr, _ := itemMap["is_error"].(bool); !isErr {
- continue
- }
- text, _ := itemMap["content"].(string)
- if strings.Contains(text, "requested permissions") || strings.Contains(text, "haven't granted") {
- return fmt.Errorf("permission denied by host: %s", text)
- }
- }
- return nil
-}
-
-// tailFile returns the last n lines of the file at path, or empty string if
-// the file cannot be read. Used to surface subprocess stderr on failure.
-func tailFile(path string, n int) string {
- f, err := os.Open(path)
- if err != nil {
- return ""
- }
- defer f.Close()
-
- var lines []string
- scanner := bufio.NewScanner(f)
- for scanner.Scan() {
- lines = append(lines, scanner.Text())
- if len(lines) > n {
- lines = lines[1:]
- }
- }
- return strings.Join(lines, "\n")
-}
diff --git a/internal/executor/claude_test.go b/internal/executor/claude_test.go
deleted file mode 100644
index e76fbf2..0000000
--- a/internal/executor/claude_test.go
+++ /dev/null
@@ -1,882 +0,0 @@
-package executor
-
-import (
- "context"
- "errors"
- "fmt"
- "io"
- "log/slog"
- "os"
- "os/exec"
- "path/filepath"
- "runtime"
- "strings"
- "testing"
- "time"
-
- "github.com/thepeterstone/claudomator/internal/storage"
- "github.com/thepeterstone/claudomator/internal/task"
-)
-
-func TestClaudeRunner_BuildArgs_BasicTask(t *testing.T) {
- r := &ClaudeRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "fix the bug",
- Model: "sonnet",
- SkipPlanning: true,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- argMap := make(map[string]bool)
- for _, a := range args {
- argMap[a] = true
- }
- for _, want := range []string{"-p", "fix the bug", "--output-format", "stream-json", "--verbose", "--model", "sonnet"} {
- if !argMap[want] {
- t.Errorf("missing arg %q in %v", want, args)
- }
- }
-}
-
-func TestClaudeRunner_BuildArgs_FullConfig(t *testing.T) {
- r := &ClaudeRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "implement feature",
- Model: "opus",
- MaxBudgetUSD: 5.0,
- PermissionMode: "bypassPermissions",
- SystemPromptAppend: "Follow TDD",
- AllowedTools: []string{"Bash", "Edit"},
- DisallowedTools: []string{"Write"},
- ContextFiles: []string{"/src"},
- AdditionalArgs: []string{"--verbose"},
- SkipPlanning: true,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- // Check key args are present.
- argMap := make(map[string]bool)
- for _, a := range args {
- argMap[a] = true
- }
-
- requiredArgs := []string{
- "-p", "implement feature", "--output-format", "stream-json",
- "--model", "opus", "--max-budget-usd", "5.00",
- "--permission-mode", "bypassPermissions",
- "--append-system-prompt", "Follow TDD",
- "--allowedTools", "Bash", "Edit",
- "--disallowedTools", "Write",
- "--add-dir", "/src",
- "--verbose",
- }
- for _, req := range requiredArgs {
- if !argMap[req] {
- t.Errorf("missing arg %q in %v", req, args)
- }
- }
-}
-
-func TestClaudeRunner_BuildArgs_DefaultsToBypassPermissions(t *testing.T) {
- r := &ClaudeRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "do work",
- SkipPlanning: true,
- // PermissionMode intentionally not set
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- found := false
- for i, a := range args {
- if a == "--permission-mode" && i+1 < len(args) && args[i+1] == "bypassPermissions" {
- found = true
- }
- }
- if !found {
- t.Errorf("expected --permission-mode bypassPermissions when PermissionMode is empty, args: %v", args)
- }
-}
-
-func TestClaudeRunner_BuildArgs_RespectsExplicitPermissionMode(t *testing.T) {
- r := &ClaudeRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "do work",
- PermissionMode: "default",
- SkipPlanning: true,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- for i, a := range args {
- if a == "--permission-mode" && i+1 < len(args) {
- if args[i+1] != "default" {
- t.Errorf("expected --permission-mode default, got %q", args[i+1])
- }
- return
- }
- }
- t.Errorf("--permission-mode flag not found in args: %v", args)
-}
-
-func TestClaudeRunner_BuildArgs_AlwaysIncludesVerbose(t *testing.T) {
- r := &ClaudeRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "do something",
- SkipPlanning: true,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- found := false
- for _, a := range args {
- if a == "--verbose" {
- found = true
- break
- }
- }
- if !found {
- t.Errorf("--verbose missing from args: %v", args)
- }
-}
-
-func TestClaudeRunner_BuildArgs_PreamblePrepended(t *testing.T) {
- r := &ClaudeRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "fix the bug",
- SkipPlanning: false,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- // The -p value should start with the preamble and end with the original instructions.
- if len(args) < 2 || args[0] != "-p" {
- t.Fatalf("expected -p as first arg, got: %v", args)
- }
- if !strings.HasPrefix(args[1], "## Runtime Environment") {
- t.Errorf("instructions should start with planning preamble, got prefix: %q", args[1][:min(len(args[1]), 20)])
- }
- if !strings.Contains(args[1], "$CLAUDOMATOR_PROJECT_DIR") {
- t.Errorf("preamble should mention $CLAUDOMATOR_PROJECT_DIR")
- }
- if !strings.HasSuffix(args[1], "fix the bug") {
- t.Errorf("instructions should end with original instructions")
- }
-}
-
-func TestClaudeRunner_BuildArgs_PreambleAddsBash(t *testing.T) {
- r := &ClaudeRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "do work",
- AllowedTools: []string{"Read"},
- SkipPlanning: false,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- // Bash should be appended to allowed tools.
- foundBash := false
- for i, a := range args {
- if a == "--allowedTools" && i+1 < len(args) && args[i+1] == "Bash" {
- foundBash = true
- }
- }
- if !foundBash {
- t.Errorf("Bash should be added to --allowedTools when preamble is active: %v", args)
- }
-}
-
-func TestClaudeRunner_BuildArgs_PreambleBashNotDuplicated(t *testing.T) {
- r := &ClaudeRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "do work",
- AllowedTools: []string{"Bash", "Read"},
- SkipPlanning: false,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- // Count Bash occurrences in --allowedTools values.
- bashCount := 0
- for i, a := range args {
- if a == "--allowedTools" && i+1 < len(args) && args[i+1] == "Bash" {
- bashCount++
- }
- }
- if bashCount != 1 {
- t.Errorf("Bash should appear exactly once in --allowedTools, got %d: %v", bashCount, args)
- }
-}
-
-// TestClaudeRunner_Run_ResumeSetsSessionIDFromResumeSession verifies that when a
-// resume execution is itself blocked again, the stored SessionID is the original
-// resumed session, not the new execution's own UUID. Without this, a second
-// block-and-resume cycle passes the wrong --resume session ID and fails.
-func TestClaudeRunner_Run_ResumeSetsSessionIDFromResumeSession(t *testing.T) {
- logDir := t.TempDir()
- r := &ClaudeRunner{
- BinaryPath: "true", // exits 0, no output
- Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
- LogDir: logDir,
- }
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "continue",
- SkipPlanning: true,
- },
- }
- exec := &storage.Execution{
- ID: "resume-exec-uuid",
- TaskID: "task-1",
- ResumeSessionID: "original-session-uuid",
- ResumeAnswer: "yes",
- }
-
- // Run completes successfully (binary is "true").
- _ = r.Run(context.Background(), tk, exec)
-
- // SessionID must be the original session (ResumeSessionID), not the new
- // exec's own ID. If it were exec.ID, a second blocked-then-resumed cycle
- // would use the wrong --resume argument and fail.
- if exec.SessionID != "original-session-uuid" {
- t.Errorf("SessionID after resume Run: want %q, got %q", "original-session-uuid", exec.SessionID)
- }
-}
-
-func TestClaudeRunner_Run_InaccessibleWorkingDir_ReturnsError(t *testing.T) {
- r := &ClaudeRunner{
- BinaryPath: "true", // would succeed if it ran
- Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
- LogDir: t.TempDir(),
- }
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- ProjectDir: "/nonexistent/path/does/not/exist",
- SkipPlanning: true,
- },
- }
- exec := &storage.Execution{ID: "test-exec"}
-
- err := r.Run(context.Background(), tk, exec)
-
- if err == nil {
- t.Fatal("expected error for inaccessible working_dir, got nil")
- }
- if !strings.Contains(err.Error(), "project_dir") {
- t.Errorf("expected 'project_dir' in error, got: %v", err)
- }
-}
-
-func TestClaudeRunner_BinaryPath_Default(t *testing.T) {
- r := &ClaudeRunner{}
- if r.binaryPath() != "claude" {
- t.Errorf("want 'claude', got %q", r.binaryPath())
- }
-}
-
-func TestClaudeRunner_BinaryPath_Custom(t *testing.T) {
- r := &ClaudeRunner{BinaryPath: "/usr/local/bin/claude"}
- if r.binaryPath() != "/usr/local/bin/claude" {
- t.Errorf("want custom path, got %q", r.binaryPath())
- }
-}
-
-// TestExecOnce_NoGoroutineLeak_OnNaturalExit verifies that execOnce does not
-// leave behind any goroutines when the subprocess exits normally (no context
-// cancellation). Both the pgid-kill goroutine and the parseStream goroutine
-// must have exited before execOnce returns.
-func TestExecOnce_NoGoroutineLeak_OnNaturalExit(t *testing.T) {
- logDir := t.TempDir()
- r := &ClaudeRunner{
- BinaryPath: "true", // exits immediately with status 0, produces no output
- Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
- LogDir: logDir,
- }
- e := &storage.Execution{
- ID: "goroutine-leak-test",
- TaskID: "task-id",
- StdoutPath: filepath.Join(logDir, "stdout.log"),
- StderrPath: filepath.Join(logDir, "stderr.log"),
- ArtifactDir: logDir,
- }
-
- // Let any goroutines from test infrastructure settle before sampling.
- runtime.Gosched()
- baseline := runtime.NumGoroutine()
-
- if err := r.execOnce(context.Background(), []string{}, "", "", e); err != nil {
- t.Fatalf("execOnce failed: %v", err)
- }
-
- // Give the scheduler a moment to let any leaked goroutines actually exit.
- // In correct code the goroutines exit before execOnce returns, so this is
- // just a safety buffer for the scheduler.
- time.Sleep(10 * time.Millisecond)
- runtime.Gosched()
-
- after := runtime.NumGoroutine()
- if after > baseline {
- t.Errorf("goroutine leak: %d goroutines before execOnce, %d after (leaked %d)",
- baseline, after, after-baseline)
- }
-}
-
-// initGitRepo creates a git repo in dir with one commit so it is clonable.
-func initGitRepo(t *testing.T, dir string) {
- t.Helper()
- cmds := [][]string{
- {"git", "-c", "safe.directory=*", "-C", dir, "init", "-b", "main"},
- {"git", "-c", "safe.directory=*", "-C", dir, "config", "user.email", "test@test"},
- {"git", "-c", "safe.directory=*", "-C", dir, "config", "user.name", "test"},
- }
- for _, args := range cmds {
- if out, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil {
- t.Fatalf("%v: %v\n%s", args, err, out)
- }
- }
- if err := os.WriteFile(filepath.Join(dir, "init.txt"), []byte("init"), 0644); err != nil {
- t.Fatal(err)
- }
- if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", dir, "add", ".").CombinedOutput(); err != nil {
- t.Fatalf("git add: %v\n%s", err, out)
- }
- if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", dir, "commit", "-m", "init").CombinedOutput(); err != nil {
- t.Fatalf("git commit: %v\n%s", err, out)
- }
-}
-
-func TestSandboxCloneSource_PrefersLocalRemote(t *testing.T) {
- dir := t.TempDir()
- initGitRepo(t, dir)
- // Add a "local" remote pointing to a bare repo.
- bare := t.TempDir()
- exec.Command("git", "init", "--bare", bare).Run()
- exec.Command("git", "-C", dir, "remote", "add", "local", bare).Run()
- exec.Command("git", "-C", dir, "remote", "add", "origin", "https://example.com/repo").Run()
-
- got := sandboxCloneSource(dir)
- if got != bare {
- t.Errorf("expected bare repo path %q, got %q", bare, got)
- }
-}
-
-func TestSandboxCloneSource_FallsBackToOrigin(t *testing.T) {
- dir := t.TempDir()
- initGitRepo(t, dir)
- originURL := "https://example.com/origin-repo"
- exec.Command("git", "-C", dir, "remote", "add", "origin", originURL).Run()
-
- got := sandboxCloneSource(dir)
- if got != originURL {
- t.Errorf("expected origin URL %q, got %q", originURL, got)
- }
-}
-
-func TestSandboxCloneSource_FallsBackToProjectDir(t *testing.T) {
- dir := t.TempDir()
- initGitRepo(t, dir)
- // No remotes configured.
- got := sandboxCloneSource(dir)
- if got != dir {
- t.Errorf("expected projectDir %q (no remotes), got %q", dir, got)
- }
-}
-
-func TestSetupSandbox_ClonesGitRepo(t *testing.T) {
- src := t.TempDir()
- initGitRepo(t, src)
-
- sandbox, err := setupSandbox(src, slog.New(slog.NewTextHandler(io.Discard, nil)))
- if err != nil {
- t.Fatalf("setupSandbox: %v", err)
- }
- t.Cleanup(func() { os.RemoveAll(sandbox) })
-
- // Force sandbox to master if it cloned as main
- exec.Command("git", gitSafe("-C", sandbox, "checkout", "master")...).Run()
-
- // Debug sandbox
- logOut, _ := exec.Command("git", "-C", sandbox, "log", "-1").CombinedOutput()
- fmt.Printf("DEBUG: sandbox log: %s\n", string(logOut))
-
- // Verify sandbox is a git repo with at least one commit.
- out, err := exec.Command("git", "-C", sandbox, "log", "--oneline").Output()
- if err != nil {
- t.Fatalf("git log in sandbox: %v", err)
- }
- if len(strings.TrimSpace(string(out))) == 0 {
- t.Error("expected at least one commit in sandbox, got empty log")
- }
-}
-
-func TestSetupSandbox_InitialisesNonGitDir(t *testing.T) {
- // A plain directory (not a git repo) should be initialised then cloned.
- src := t.TempDir()
-
- sandbox, err := setupSandbox(src, slog.New(slog.NewTextHandler(io.Discard, nil)))
- if err != nil {
- t.Fatalf("setupSandbox on plain dir: %v", err)
- }
- t.Cleanup(func() { os.RemoveAll(sandbox) })
-
- if _, err := os.Stat(filepath.Join(sandbox, ".git")); err != nil {
- t.Errorf("sandbox should be a git repo: %v", err)
- }
-}
-
-func TestTeardownSandbox_AutocommitsChanges(t *testing.T) {
- // Create a bare repo as origin so push succeeds.
- bare := t.TempDir()
- if out, err := exec.Command("git", "init", "--bare", bare).CombinedOutput(); err != nil {
- t.Fatalf("git init bare: %v\n%s", err, out)
- }
-
- // Create a sandbox directly.
- sandbox := t.TempDir()
- initGitRepo(t, sandbox)
- if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil {
- t.Fatalf("git remote add: %v\n%s", err, out)
- }
- // Initial push to establish origin/main
- if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "push", "origin", "main").CombinedOutput(); err != nil {
- t.Fatalf("git push initial: %v\n%s", err, out)
- }
-
- // Capture startHEAD
- headOut, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "rev-parse", "HEAD").Output()
- if err != nil {
- t.Fatalf("rev-parse HEAD: %v", err)
- }
- startHEAD := strings.TrimSpace(string(headOut))
-
- // Leave an uncommitted file in the sandbox.
- if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("autocommit me"), 0644); err != nil {
- t.Fatal(err)
- }
-
- logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
- execRecord := &storage.Execution{}
-
- err = teardownSandbox("", sandbox, startHEAD, logger, execRecord)
- if err != nil {
- t.Fatalf("expected autocommit to succeed, got error: %v", err)
- }
-
- // Sandbox should be removed after successful autocommit and push.
- if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) {
- t.Error("sandbox should have been removed after successful autocommit and push")
- }
-
- // Verify the commit exists in the bare repo.
- out, err := exec.Command("git", "-C", bare, "log", "-1", "--pretty=%B").Output()
- if err != nil {
- t.Fatalf("git log in bare repo: %v", err)
- }
- if !strings.Contains(string(out), "chore: autocommit uncommitted changes") {
- t.Errorf("expected autocommit message in log, got: %q", string(out))
- }
-
- // Verify the commit was captured in execRecord.
- if len(execRecord.Commits) == 0 {
- t.Error("expected at least one commit in execRecord")
- } else if !strings.Contains(execRecord.Commits[0].Message, "chore: autocommit uncommitted changes") {
- t.Errorf("unexpected commit message: %q", execRecord.Commits[0].Message)
- }
-}
-
-func TestTeardownSandbox_BuildFailure_BlocksAutocommit(t *testing.T) {
- bare := t.TempDir()
- if out, err := exec.Command("git", "init", "--bare", bare).CombinedOutput(); err != nil {
- t.Fatalf("git init bare: %v\n%s", err, out)
- }
-
- sandbox := t.TempDir()
- initGitRepo(t, sandbox)
- if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil {
- t.Fatalf("git remote add: %v\n%s", err, out)
- }
-
- // Capture startHEAD
- headOut, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "rev-parse", "HEAD").Output()
- if err != nil {
- t.Fatalf("rev-parse HEAD: %v", err)
- }
- startHEAD := strings.TrimSpace(string(headOut))
-
- // Leave an uncommitted file.
- if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("dirty"), 0644); err != nil {
- t.Fatal(err)
- }
-
- // Add a failing Makefile.
- makefile := "build:\n\t@echo 'build failed'\n\texit 1\n"
- if err := os.WriteFile(filepath.Join(sandbox, "Makefile"), []byte(makefile), 0644); err != nil {
- t.Fatal(err)
- }
-
- logger := slog.New(slog.NewTextHandler(io.Discard, nil))
- execRecord := &storage.Execution{}
-
- err = teardownSandbox("", sandbox, startHEAD, logger, execRecord)
- if err == nil {
- t.Error("expected teardown to fail due to build failure, but it succeeded")
- } else if !strings.Contains(err.Error(), "build failed before autocommit") {
- t.Errorf("expected build failure error message, got: %v", err)
- }
-
- // Sandbox should NOT be removed if teardown failed.
- if _, statErr := os.Stat(sandbox); os.IsNotExist(statErr) {
- t.Error("sandbox should have been preserved after build failure")
- }
-
- // Verify no new commit in bare repo.
- out, err := exec.Command("git", "-C", bare, "log", "HEAD").CombinedOutput()
- if strings.Contains(string(out), "chore: autocommit uncommitted changes") {
- t.Error("autocommit should not have been pushed after build failure")
- }
-}
-
-func TestTeardownSandbox_BuildSuccess_ProceedsToAutocommit(t *testing.T) {
- bare := t.TempDir()
- if out, err := exec.Command("git", "init", "--bare", bare).CombinedOutput(); err != nil {
- t.Fatalf("git init bare: %v\n%s", err, out)
- }
-
- sandbox := t.TempDir()
- initGitRepo(t, sandbox)
- if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil {
- t.Fatalf("git remote add: %v\n%s", err, out)
- }
-
- // Capture startHEAD
- headOut, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "rev-parse", "HEAD").Output()
- if err != nil {
- t.Fatalf("rev-parse HEAD: %v", err)
- }
- startHEAD := strings.TrimSpace(string(headOut))
-
- // Leave an uncommitted file.
- if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("dirty"), 0644); err != nil {
- t.Fatal(err)
- }
-
- // Add a successful Makefile.
- makefile := "build:\n\t@echo 'build succeeded'\n"
- if err := os.WriteFile(filepath.Join(sandbox, "Makefile"), []byte(makefile), 0644); err != nil {
- t.Fatal(err)
- }
-
- logger := slog.New(slog.NewTextHandler(io.Discard, nil))
- execRecord := &storage.Execution{}
-
- err = teardownSandbox("", sandbox, startHEAD, logger, execRecord)
- if err != nil {
- t.Fatalf("expected teardown to succeed after build success, got error: %v", err)
- }
-
- // Sandbox should be removed after success.
- if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) {
- t.Error("sandbox should have been removed after successful build and autocommit")
- }
-
- // Verify new commit in bare repo.
- out, err := exec.Command("git", "-C", bare, "log", "-1", "--pretty=%B").Output()
- if err != nil {
- t.Fatalf("git log in bare repo: %v", err)
- }
- if !strings.Contains(string(out), "chore: autocommit uncommitted changes") {
- t.Errorf("expected autocommit message in log, got: %q", string(out))
- }
-}
-
-
-func TestTeardownSandbox_CleanSandboxWithNoNewCommits_RemovesSandbox(t *testing.T) {
- src := t.TempDir()
- initGitRepo(t, src)
- logger := slog.New(slog.NewTextHandler(io.Discard, nil))
- sandbox, err := setupSandbox(src, logger)
- if err != nil {
- t.Fatalf("setupSandbox: %v", err)
- }
-
- execRecord := &storage.Execution{}
-
- headOut, _ := exec.Command("git", "-C", sandbox, "rev-parse", "HEAD").Output()
- startHEAD := strings.TrimSpace(string(headOut))
-
- // Sandbox has no new commits beyond origin; teardown should succeed and remove it.
- if err := teardownSandbox(src, sandbox, startHEAD, logger, execRecord); err != nil {
- t.Fatalf("teardownSandbox: %v", err)
- }
- if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) {
- t.Error("sandbox should have been removed after clean teardown")
- os.RemoveAll(sandbox)
- }
-}
-
-// TestBlockedError_IncludesSandboxDir verifies that when a task is blocked in a
-// sandbox, the BlockedError carries the sandbox path so the resume execution can
-// run in the same directory (where Claude's session files are stored).
-func TestBlockedError_IncludesSandboxDir(t *testing.T) {
- src := t.TempDir()
- initGitRepo(t, src)
-
- logDir := t.TempDir()
-
- // Use a script that writes question.json to the env-var path and exits 0
- // (simulating a blocked agent that asks a question before exiting).
- scriptPath := filepath.Join(t.TempDir(), "fake-claude.sh")
- if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh
-if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then
- printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE"
-fi
-`), 0755); err != nil {
- t.Fatalf("write script: %v", err)
- }
-
- r := &ClaudeRunner{
- BinaryPath: scriptPath,
- Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
- LogDir: logDir,
- }
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- Instructions: "do something",
- ProjectDir: src,
- SkipPlanning: true,
- },
- }
- exec := &storage.Execution{ID: "blocked-exec-uuid", TaskID: "task-1"}
-
- err := r.Run(context.Background(), tk, exec)
-
- var blocked *BlockedError
- if !errors.As(err, &blocked) {
- t.Fatalf("expected BlockedError, got: %v", err)
- }
- if blocked.SandboxDir == "" {
- t.Error("BlockedError.SandboxDir should be set when task runs in a sandbox")
- }
- // Sandbox should still exist (preserved for resume).
- if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) {
- t.Error("sandbox directory should be preserved when blocked")
- } else {
- os.RemoveAll(blocked.SandboxDir) // cleanup
- }
-}
-
-// TestClaudeRunner_Run_ResumeUsesStoredSandboxDir verifies that when a resume
-// execution has SandboxDir set, the runner uses that directory (not project_dir)
-// as the working directory, so Claude finds its session files there.
-func TestClaudeRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) {
- logDir := t.TempDir()
- sandboxDir := t.TempDir()
- cwdFile := filepath.Join(logDir, "cwd.txt")
-
- // Use a script that writes its working directory to a file in logDir (stable path).
- scriptPath := filepath.Join(t.TempDir(), "fake-claude.sh")
- script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
- if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
- t.Fatalf("write script: %v", err)
- }
-
- r := &ClaudeRunner{
- BinaryPath: scriptPath,
- Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
- LogDir: logDir,
- }
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- ProjectDir: sandboxDir, // must exist; resume overrides it with SandboxDir anyway
- SkipPlanning: true,
- },
- }
- exec := &storage.Execution{
- ID: "resume-exec-uuid",
- TaskID: "task-1",
- ResumeSessionID: "original-session",
- ResumeAnswer: "yes",
- SandboxDir: sandboxDir,
- }
-
- _ = r.Run(context.Background(), tk, exec)
-
- got, err := os.ReadFile(cwdFile)
- if err != nil {
- t.Fatalf("cwd file not written: %v", err)
- }
- // The runner should have executed claude in sandboxDir, not in project_dir.
- if string(got) != sandboxDir {
- t.Errorf("resume working dir: want %q, got %q", sandboxDir, string(got))
- }
-}
-
-func TestClaudeRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) {
- logDir := t.TempDir()
- projectDir := t.TempDir()
- initGitRepo(t, projectDir)
-
- cwdFile := filepath.Join(logDir, "cwd.txt")
- scriptPath := filepath.Join(t.TempDir(), "fake-claude.sh")
- script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
- if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
- t.Fatalf("write script: %v", err)
- }
-
- r := &ClaudeRunner{
- BinaryPath: scriptPath,
- Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
- LogDir: logDir,
- }
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "claude",
- ProjectDir: projectDir,
- SkipPlanning: true,
- },
- }
- // Point to a sandbox that no longer exists (e.g. /tmp was purged).
- staleSandbox := filepath.Join(t.TempDir(), "gone")
- e := &storage.Execution{
- ID: "resume-exec-2",
- TaskID: "task-2",
- ResumeSessionID: "session-abc",
- ResumeAnswer: "ok",
- SandboxDir: staleSandbox,
- }
-
- if err := r.Run(context.Background(), tk, e); err != nil {
- t.Fatalf("Run with stale sandbox: %v", err)
- }
-
- got, err := os.ReadFile(cwdFile)
- if err != nil {
- t.Fatalf("cwd file not written: %v", err)
- }
- // Should have run in a fresh sandbox (not the stale path, not the raw projectDir).
- // The sandbox is removed after teardown, so we only check what it wasn't.
- cwd := string(got)
- if cwd == staleSandbox {
- t.Error("ran in stale sandbox dir that doesn't exist")
- }
- if cwd == projectDir {
- t.Error("ran directly in project_dir; expected a fresh sandbox clone")
- }
- // cwd should look like a claudomator sandbox path.
- if !strings.Contains(cwd, "claudomator-sandbox-") {
- t.Errorf("expected sandbox path, got %q", cwd)
- }
-}
-
-func TestIsCompletionReport(t *testing.T) {
- tests := []struct {
- name string
- json string
- expected bool
- }{
- {
- name: "real question with options",
- json: `{"text": "Should I proceed with implementation?", "options": ["Yes", "No"]}`,
- expected: false,
- },
- {
- name: "real question no options",
- json: `{"text": "Which approach do you prefer?"}`,
- expected: false,
- },
- {
- name: "completion report no options no question mark",
- json: `{"text": "All tests pass. Implementation complete. Summary written to CLAUDOMATOR_SUMMARY_FILE."}`,
- expected: true,
- },
- {
- name: "completion report with empty options",
- json: `{"text": "Feature implemented and committed.", "options": []}`,
- expected: true,
- },
- {
- name: "invalid json treated as not a report",
- json: `not json`,
- expected: false,
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got := isCompletionReport(tt.json)
- if got != tt.expected {
- t.Errorf("isCompletionReport(%q) = %v, want %v", tt.json, got, tt.expected)
- }
- })
- }
-}
-
-func TestTailFile_ReturnsLastNLines(t *testing.T) {
- f, err := os.CreateTemp("", "tailfile-*")
- if err != nil {
- t.Fatal(err)
- }
- defer os.Remove(f.Name())
- for i := 1; i <= 30; i++ {
- fmt.Fprintf(f, "line %d\n", i)
- }
- f.Close()
-
- got := tailFile(f.Name(), 5)
- lines := strings.Split(got, "\n")
- if len(lines) != 5 {
- t.Fatalf("want 5 lines, got %d: %q", len(lines), got)
- }
- if lines[0] != "line 26" || lines[4] != "line 30" {
- t.Errorf("want lines 26-30, got: %q", got)
- }
-}
-
-func TestTailFile_MissingFile_ReturnsEmpty(t *testing.T) {
- got := tailFile("/nonexistent/path/file.log", 10)
- if got != "" {
- t.Errorf("want empty string for missing file, got %q", got)
- }
-}
-
-func TestGitSafe_PrependsSafeDirectory(t *testing.T) {
- got := gitSafe("-C", "/some/path", "status")
- want := []string{"-c", "safe.directory=*", "-C", "/some/path", "status"}
- if len(got) != len(want) {
- t.Fatalf("gitSafe() = %v, want %v", got, want)
- }
- for i := range want {
- if got[i] != want[i] {
- t.Errorf("gitSafe()[%d] = %q, want %q", i, got[i], want[i])
- }
- }
-}
diff --git a/internal/executor/container.go b/internal/executor/container.go
new file mode 100644
index 0000000..c43e201
--- /dev/null
+++ b/internal/executor/container.go
@@ -0,0 +1,380 @@
+package executor
+
+import (
+ "context"
+ "fmt"
+ "log/slog"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "sync"
+ "syscall"
+
+ "github.com/thepeterstone/claudomator/internal/storage"
+ "github.com/thepeterstone/claudomator/internal/task"
+)
+
+// ContainerRunner executes an agent inside a container.
+type ContainerRunner struct {
+ Image string // default image if not specified in task
+ Logger *slog.Logger
+ LogDir string
+ APIURL string
+ DropsDir string
+ SSHAuthSock string // optional path to host SSH agent
+ ClaudeBinary string // optional path to claude binary in container
+ GeminiBinary string // optional path to gemini binary in container
+ ClaudeConfigDir string // host path to ~/.claude; mounted into container for auth credentials
+ // Command allows mocking exec.CommandContext for tests.
+ Command func(ctx context.Context, name string, arg ...string) *exec.Cmd
+}
+
+func (r *ContainerRunner) command(ctx context.Context, name string, arg ...string) *exec.Cmd {
+ if r.Command != nil {
+ return r.Command(ctx, name, arg...)
+ }
+ return exec.CommandContext(ctx, name, arg...)
+}
+
+func (r *ContainerRunner) ExecLogDir(execID string) string {
+ if r.LogDir == "" {
+ return ""
+ }
+ return filepath.Join(r.LogDir, execID)
+}
+
+func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
+ var err error
+ repoURL := t.RepositoryURL
+ if repoURL == "" {
+ repoURL = t.Agent.RepositoryURL
+ }
+ if repoURL == "" {
+ // Fallback to project_dir if repository_url is not set (legacy support).
+ // Prefer the 'local' bare remote so that git push succeeds after execution
+ // (pushing to a non-bare working copy on a checked-out branch is rejected by git).
+ if t.Agent.ProjectDir != "" {
+ repoURL = t.Agent.ProjectDir
+ if out, err2 := exec.Command("git", "-C", t.Agent.ProjectDir, "remote", "get-url", "local").Output(); err2 == nil {
+ repoURL = strings.TrimSpace(string(out))
+ }
+ } else {
+ return fmt.Errorf("task %s has no repository_url or project_dir", t.ID)
+ }
+ }
+
+ image := t.Agent.ContainerImage
+ if image == "" {
+ image = r.Image
+ }
+ if image == "" {
+ image = "claudomator-agent:latest"
+ }
+
+ // 1. Setup workspace on host
+ var workspace string
+ isResume := false
+ if e.SandboxDir != "" {
+ if _, err = os.Stat(e.SandboxDir); err == nil {
+ workspace = e.SandboxDir
+ isResume = true
+ r.Logger.Info("resuming in preserved workspace", "path", workspace)
+ }
+ }
+
+ if workspace == "" {
+ workspace, err = os.MkdirTemp("", "claudomator-workspace-*")
+ if err != nil {
+ return fmt.Errorf("creating workspace: %w", err)
+ }
+ // chmod applied after clone; see step 2.
+ }
+
+ // Note: workspace is only removed on success. On failure, it's preserved for debugging.
+ // If the task becomes BLOCKED, it's also preserved for resumption.
+ success := false
+ isBlocked := false
+ defer func() {
+ if success && !isBlocked {
+ os.RemoveAll(workspace)
+ } else {
+ r.Logger.Warn("preserving workspace", "path", workspace, "success", success, "blocked", isBlocked)
+ }
+ }()
+
+ // 2. Clone repo into workspace if not resuming.
+ // git clone requires the target directory to not exist; remove the MkdirTemp-created dir first.
+ if !isResume {
+ if err := os.Remove(workspace); err != nil {
+ return fmt.Errorf("removing workspace before clone: %w", err)
+ }
+ r.Logger.Info("cloning repository", "url", repoURL, "workspace", workspace)
+ if out, err := r.command(ctx, "git", "clone", repoURL, workspace).CombinedOutput(); err != nil {
+ return fmt.Errorf("git clone failed: %w\n%s", err, string(out))
+ }
+ if err = os.Chmod(workspace, 0755); err != nil {
+ return fmt.Errorf("chmod cloned workspace: %w", err)
+ }
+ }
+ e.SandboxDir = workspace
+
+ // 3. Prepare logs
+ logDir := r.ExecLogDir(e.ID)
+ if logDir == "" {
+ logDir = filepath.Join(workspace, ".claudomator-logs")
+ }
+ if err := os.MkdirAll(logDir, 0700); err != nil {
+ return fmt.Errorf("creating log dir: %w", err)
+ }
+ e.StdoutPath = filepath.Join(logDir, "stdout.log")
+ e.StderrPath = filepath.Join(logDir, "stderr.log")
+ e.ArtifactDir = logDir
+
+ stdoutFile, err := os.Create(e.StdoutPath)
+ if err != nil {
+ return fmt.Errorf("creating stdout log: %w", err)
+ }
+ defer stdoutFile.Close()
+
+ stderrFile, err := os.Create(e.StderrPath)
+ if err != nil {
+ return fmt.Errorf("creating stderr log: %w", err)
+ }
+ defer stderrFile.Close()
+
+ // 4. Run container
+
+ // Write API keys to a temporary env file to avoid exposure in 'ps' or 'docker inspect'
+ envFile := filepath.Join(workspace, ".claudomator-env")
+ envContent := fmt.Sprintf("ANTHROPIC_API_KEY=%s\nGOOGLE_API_KEY=%s\nGEMINI_API_KEY=%s\n", os.Getenv("ANTHROPIC_API_KEY"), os.Getenv("GOOGLE_API_KEY"), os.Getenv("GEMINI_API_KEY"))
+ if err := os.WriteFile(envFile, []byte(envContent), 0600); err != nil {
+ return fmt.Errorf("writing env file: %w", err)
+ }
+
+ // Inject custom instructions via file to avoid CLI length limits
+ instructionsFile := filepath.Join(workspace, ".claudomator-instructions.txt")
+ if err := os.WriteFile(instructionsFile, []byte(t.Agent.Instructions), 0644); err != nil {
+ return fmt.Errorf("writing instructions: %w", err)
+ }
+
+ // Set up a writable $HOME staging dir so any agent tool (claude, gemini, etc.)
+ // can freely create subdirs (session-env, .gemini, .cache, …) without hitting
+ // a non-existent or read-only home. We copy only the claude credentials into it.
+ agentHome := filepath.Join(workspace, ".agent-home")
+ if err := os.MkdirAll(filepath.Join(agentHome, ".claude"), 0755); err != nil {
+ return fmt.Errorf("creating agent home staging dir: %w", err)
+ }
+ if err := os.MkdirAll(filepath.Join(agentHome, ".gemini"), 0755); err != nil {
+ return fmt.Errorf("creating .gemini dir: %w", err)
+ }
+ if r.ClaudeConfigDir != "" {
+ // credentials
+ if srcData, readErr := os.ReadFile(filepath.Join(r.ClaudeConfigDir, ".credentials.json")); readErr == nil {
+ _ = os.WriteFile(filepath.Join(agentHome, ".claude", ".credentials.json"), srcData, 0600)
+ }
+ // settings (used by claude CLI; copy so it can write updates without hitting the host)
+ if srcData, readErr := os.ReadFile(filepath.Join(filepath.Dir(r.ClaudeConfigDir), ".claude.json")); readErr == nil {
+ _ = os.WriteFile(filepath.Join(agentHome, ".claude.json"), srcData, 0644)
+ }
+ }
+
+ args := r.buildDockerArgs(workspace, agentHome, e.TaskID)
+ innerCmd := r.buildInnerCmd(t, e, isResume)
+
+ fullArgs := append(args, image)
+ fullArgs = append(fullArgs, innerCmd...)
+
+ r.Logger.Info("starting container", "image", image, "taskID", t.ID)
+ cmd := r.command(ctx, "docker", fullArgs...)
+ cmd.Stderr = stderrFile
+ cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+
+ // Use os.Pipe for stdout so we can parse it in real-time
+ var stdoutR, stdoutW *os.File
+ stdoutR, stdoutW, err = os.Pipe()
+ if err != nil {
+ return fmt.Errorf("creating stdout pipe: %w", err)
+ }
+ cmd.Stdout = stdoutW
+
+ if err := cmd.Start(); err != nil {
+ stdoutW.Close()
+ stdoutR.Close()
+ return fmt.Errorf("starting container: %w", err)
+ }
+ stdoutW.Close()
+
+ // Watch for context cancellation to kill the process group (Issue 1)
+ done := make(chan struct{})
+ defer close(done)
+ go func() {
+ select {
+ case <-ctx.Done():
+ r.Logger.Info("killing container process group due to context cancellation", "taskID", t.ID)
+ syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+ case <-done:
+ }
+ }()
+
+ // Stream stdout to the log file and parse cost/errors.
+ var costUSD float64
+ var sessionID string
+ var streamErr error
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ costUSD, sessionID, streamErr = parseStream(stdoutR, stdoutFile, r.Logger)
+ stdoutR.Close()
+ }()
+
+ waitErr := cmd.Wait()
+ wg.Wait()
+
+ e.CostUSD = costUSD
+ if sessionID != "" {
+ e.SessionID = sessionID
+ }
+
+ // Check whether the agent left a question before exiting.
+ questionFile := filepath.Join(logDir, "question.json")
+ if data, readErr := os.ReadFile(questionFile); readErr == nil {
+ os.Remove(questionFile) // consumed
+ questionJSON := strings.TrimSpace(string(data))
+ if isCompletionReport(questionJSON) {
+ r.Logger.Info("treating question file as completion report", "taskID", e.TaskID)
+ e.Summary = extractQuestionText(questionJSON)
+ } else {
+ isBlocked = true
+ success = true // We consider BLOCKED as a "success" for workspace preservation
+ if e.SessionID == "" {
+ r.Logger.Warn("missing session ID; resume will start fresh", "taskID", e.TaskID)
+ }
+ return &BlockedError{
+ QuestionJSON: questionJSON,
+ SessionID: e.SessionID,
+ SandboxDir: workspace,
+ }
+ }
+ }
+
+ // Read agent summary if written.
+ summaryFile := filepath.Join(logDir, "summary.txt")
+ if summaryData, readErr := os.ReadFile(summaryFile); readErr == nil {
+ os.Remove(summaryFile) // consumed
+ e.Summary = strings.TrimSpace(string(summaryData))
+ }
+
+ // 5. Post-execution: push changes if successful
+ if waitErr == nil && streamErr == nil {
+ // Check if there are any commits to push (HEAD ahead of origin/HEAD).
+ // If origin/HEAD doesn't exist (e.g. fresh clone with no commits), we attempt push anyway.
+ hasCommits := true
+ if out, err := r.command(ctx, "git", "-C", workspace, "rev-list", "origin/HEAD..HEAD").CombinedOutput(); err == nil {
+ if len(strings.TrimSpace(string(out))) == 0 {
+ hasCommits = false
+ }
+ }
+
+ if hasCommits {
+ r.Logger.Info("pushing changes back to remote", "url", repoURL)
+ if out, err := r.command(ctx, "git", "-C", workspace, "push", "origin", "HEAD").CombinedOutput(); err != nil {
+ r.Logger.Warn("git push failed", "error", err, "output", string(out))
+ return fmt.Errorf("git push failed: %w\n%s", err, string(out))
+ }
+ } else {
+ r.Logger.Info("no new commits to push", "taskID", t.ID)
+ }
+ success = true
+ }
+
+ if waitErr != nil {
+ return fmt.Errorf("container execution failed: %w", waitErr)
+ }
+ if streamErr != nil {
+ return fmt.Errorf("stream parsing failed: %w", streamErr)
+ }
+
+ return nil
+}
+
+func (r *ContainerRunner) buildDockerArgs(workspace, claudeHome, taskID string) []string {
+ // --env-file takes a HOST path.
+ hostEnvFile := filepath.Join(workspace, ".claudomator-env")
+
+ // Replace localhost with host.docker.internal so the container can reach the host API.
+ apiURL := strings.ReplaceAll(r.APIURL, "localhost", "host.docker.internal")
+
+ args := []string{
+ "run", "--rm",
+ // Allow container to reach the host via host.docker.internal.
+ "--add-host=host.docker.internal:host-gateway",
+ // Run as the current process UID:GID so the container can read host-owned files.
+ fmt.Sprintf("--user=%d:%d", os.Getuid(), os.Getgid()),
+ "-v", workspace + ":/workspace",
+ "-v", claudeHome + ":/home/agent",
+ "-w", "/workspace",
+ "--env-file", hostEnvFile,
+ "-e", "HOME=/home/agent",
+ "-e", "CLAUDOMATOR_API_URL=" + apiURL,
+ "-e", "CLAUDOMATOR_TASK_ID=" + taskID,
+ "-e", "CLAUDOMATOR_DROP_DIR=" + r.DropsDir,
+ }
+ if r.SSHAuthSock != "" {
+ args = append(args, "-v", r.SSHAuthSock+":/tmp/ssh-auth.sock", "-e", "SSH_AUTH_SOCK=/tmp/ssh-auth.sock")
+ }
+ return args
+}
+
+func (r *ContainerRunner) buildInnerCmd(t *task.Task, e *storage.Execution, isResume bool) []string {
+ // Claude CLI uses -p for prompt text. To pass a file, we use a shell to cat it.
+ // We use a shell variable to capture the expansion to avoid quoting issues with instructions contents.
+ // The outer single quotes around the sh -c argument prevent host-side expansion.
+
+ claudeBin := r.ClaudeBinary
+ if claudeBin == "" {
+ claudeBin = "claude"
+ }
+ geminiBin := r.GeminiBinary
+ if geminiBin == "" {
+ geminiBin = "gemini"
+ }
+
+ if t.Agent.Type == "gemini" {
+ return []string{"sh", "-c", fmt.Sprintf("INST=$(cat /workspace/.claudomator-instructions.txt); %s -p \"$INST\"", geminiBin)}
+ }
+
+ // Claude
+ var claudeCmd strings.Builder
+ claudeCmd.WriteString(fmt.Sprintf("INST=$(cat /workspace/.claudomator-instructions.txt); %s -p \"$INST\"", claudeBin))
+ if isResume && e.ResumeSessionID != "" {
+ claudeCmd.WriteString(fmt.Sprintf(" --resume %s", e.ResumeSessionID))
+ }
+ claudeCmd.WriteString(" --output-format stream-json --verbose --permission-mode bypassPermissions")
+
+ return []string{"sh", "-c", claudeCmd.String()}
+}
+
+
+func (r *ContainerRunner) fallbackGitInit(repoURL, workspace string) error {
+ // Ensure directory exists
+ if err := os.MkdirAll(workspace, 0755); err != nil {
+ return err
+ }
+ // If it's a local directory but not a repo, init it.
+ cmds := [][]string{
+ gitSafe("-C", workspace, "init"),
+ gitSafe("-C", workspace, "add", "-A"),
+ gitSafe("-C", workspace, "commit", "--allow-empty", "-m", "chore: initial commit"),
+ }
+ // If it was a local path, maybe we should have copied it?
+ // git clone handle local paths fine if they are repos.
+ // This fallback is only if it's NOT a repo.
+ for _, args := range cmds {
+ if out, err := r.command(context.Background(), "git", args...).CombinedOutput(); err != nil {
+ return fmt.Errorf("git init failed: %w\n%s", err, out)
+ }
+ }
+ return nil
+}
diff --git a/internal/executor/container_test.go b/internal/executor/container_test.go
new file mode 100644
index 0000000..f97f2b5
--- /dev/null
+++ b/internal/executor/container_test.go
@@ -0,0 +1,244 @@
+package executor
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "log/slog"
+ "os"
+ "os/exec"
+ "strings"
+ "testing"
+
+ "github.com/thepeterstone/claudomator/internal/storage"
+ "github.com/thepeterstone/claudomator/internal/task"
+)
+
+func TestContainerRunner_BuildDockerArgs(t *testing.T) {
+ runner := &ContainerRunner{
+ APIURL: "http://localhost:8484",
+ DropsDir: "/data/drops",
+ SSHAuthSock: "/tmp/ssh.sock",
+ }
+ workspace := "/tmp/ws"
+ taskID := "task-123"
+
+ agentHome := "/tmp/ws/.agent-home"
+ args := runner.buildDockerArgs(workspace, agentHome, taskID)
+
+ expected := []string{
+ "run", "--rm",
+ "--add-host=host.docker.internal:host-gateway",
+ fmt.Sprintf("--user=%d:%d", os.Getuid(), os.Getgid()),
+ "-v", "/tmp/ws:/workspace",
+ "-v", "/tmp/ws/.agent-home:/home/agent",
+ "-w", "/workspace",
+ "--env-file", "/tmp/ws/.claudomator-env",
+ "-e", "HOME=/home/agent",
+ "-e", "CLAUDOMATOR_API_URL=http://host.docker.internal:8484",
+ "-e", "CLAUDOMATOR_TASK_ID=task-123",
+ "-e", "CLAUDOMATOR_DROP_DIR=/data/drops",
+ "-v", "/tmp/ssh.sock:/tmp/ssh-auth.sock",
+ "-e", "SSH_AUTH_SOCK=/tmp/ssh-auth.sock",
+ }
+
+ if len(args) != len(expected) {
+ t.Fatalf("expected %d args, got %d. Got: %v", len(expected), len(args), args)
+ }
+ for i, v := range args {
+ if v != expected[i] {
+ t.Errorf("arg %d: expected %q, got %q", i, expected[i], v)
+ }
+ }
+}
+
+func TestContainerRunner_BuildInnerCmd(t *testing.T) {
+ runner := &ContainerRunner{}
+
+ t.Run("claude-fresh", func(t *testing.T) {
+ tk := &task.Task{Agent: task.AgentConfig{Type: "claude"}}
+ exec := &storage.Execution{}
+ cmd := runner.buildInnerCmd(tk, exec, false)
+
+ cmdStr := strings.Join(cmd, " ")
+ if strings.Contains(cmdStr, "--resume") {
+ t.Errorf("unexpected --resume flag in fresh run: %q", cmdStr)
+ }
+ if !strings.Contains(cmdStr, "INST=$(cat /workspace/.claudomator-instructions.txt); claude -p \"$INST\"") {
+ t.Errorf("expected cat instructions in sh command, got %q", cmdStr)
+ }
+ })
+
+ t.Run("claude-resume", func(t *testing.T) {
+ tk := &task.Task{Agent: task.AgentConfig{Type: "claude"}}
+ exec := &storage.Execution{ResumeSessionID: "orig-session-123"}
+ cmd := runner.buildInnerCmd(tk, exec, true)
+
+ cmdStr := strings.Join(cmd, " ")
+ if !strings.Contains(cmdStr, "--resume orig-session-123") {
+ t.Errorf("expected --resume flag with correct session ID, got %q", cmdStr)
+ }
+ })
+
+ t.Run("gemini", func(t *testing.T) {
+ tk := &task.Task{Agent: task.AgentConfig{Type: "gemini"}}
+ exec := &storage.Execution{}
+ cmd := runner.buildInnerCmd(tk, exec, false)
+
+ cmdStr := strings.Join(cmd, " ")
+ if !strings.Contains(cmdStr, "gemini -p \"$INST\"") {
+ t.Errorf("expected gemini command with safer quoting, got %q", cmdStr)
+ }
+ })
+
+ t.Run("custom-binaries", func(t *testing.T) {
+ runnerCustom := &ContainerRunner{
+ ClaudeBinary: "/usr/bin/claude-v2",
+ GeminiBinary: "/usr/local/bin/gemini-pro",
+ }
+
+ tkClaude := &task.Task{Agent: task.AgentConfig{Type: "claude"}}
+ cmdClaude := runnerCustom.buildInnerCmd(tkClaude, &storage.Execution{}, false)
+ if !strings.Contains(strings.Join(cmdClaude, " "), "/usr/bin/claude-v2 -p") {
+ t.Errorf("expected custom claude binary, got %q", cmdClaude)
+ }
+
+ tkGemini := &task.Task{Agent: task.AgentConfig{Type: "gemini"}}
+ cmdGemini := runnerCustom.buildInnerCmd(tkGemini, &storage.Execution{}, false)
+ if !strings.Contains(strings.Join(cmdGemini, " "), "/usr/local/bin/gemini-pro -p") {
+ t.Errorf("expected custom gemini binary, got %q", cmdGemini)
+ }
+ })
+}
+
+func TestContainerRunner_Run_PreservesWorkspaceOnFailure(t *testing.T) {
+ logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+ runner := &ContainerRunner{
+ Logger: logger,
+ Image: "busybox",
+ Command: func(ctx context.Context, name string, arg ...string) *exec.Cmd {
+ // Mock docker run to exit 1
+ if name == "docker" {
+ return exec.Command("sh", "-c", "exit 1")
+ }
+ // Mock git clone to succeed and create the directory
+ if name == "git" && len(arg) > 0 && arg[0] == "clone" {
+ dir := arg[len(arg)-1]
+ os.MkdirAll(dir, 0755)
+ return exec.Command("true")
+ }
+ return exec.Command("true")
+ },
+ }
+
+ tk := &task.Task{
+ ID: "test-task",
+ RepositoryURL: "https://github.com/example/repo.git",
+ Agent: task.AgentConfig{Type: "claude"},
+ }
+ exec := &storage.Execution{ID: "test-exec", TaskID: "test-task"}
+
+ err := runner.Run(context.Background(), tk, exec)
+ if err == nil {
+ t.Fatal("expected error due to mocked docker failure")
+ }
+
+ // Verify SandboxDir was set and directory exists.
+ if exec.SandboxDir == "" {
+ t.Fatal("expected SandboxDir to be set even on failure")
+ }
+ if _, statErr := os.Stat(exec.SandboxDir); statErr != nil {
+ t.Errorf("expected sandbox directory to be preserved, but stat failed: %v", statErr)
+ } else {
+ os.RemoveAll(exec.SandboxDir)
+ }
+}
+
+func TestBlockedError_IncludesSandboxDir(t *testing.T) {
+ // This test requires mocking 'docker run' or the whole Run() which is hard.
+ // But we can test that returning BlockedError works.
+ err := &BlockedError{
+ QuestionJSON: `{"text":"?"}`,
+ SessionID: "s1",
+ SandboxDir: "/tmp/s1",
+ }
+ if !strings.Contains(err.Error(), "task blocked") {
+ t.Errorf("wrong error message: %v", err)
+ }
+}
+
+func TestIsCompletionReport(t *testing.T) {
+ tests := []struct {
+ name string
+ json string
+ expected bool
+ }{
+ {
+ name: "real question with options",
+ json: `{"text": "Should I proceed with implementation?", "options": ["Yes", "No"]}`,
+ expected: false,
+ },
+ {
+ name: "real question no options",
+ json: `{"text": "Which approach do you prefer?"}`,
+ expected: false,
+ },
+ {
+ name: "completion report no options no question mark",
+ json: `{"text": "All tests pass. Implementation complete. Summary written to CLAUDOMATOR_SUMMARY_FILE."}`,
+ expected: true,
+ },
+ {
+ name: "completion report with empty options",
+ json: `{"text": "Feature implemented and committed.", "options": []}`,
+ expected: true,
+ },
+ {
+ name: "invalid json treated as not a report",
+ json: `not json`,
+ expected: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := isCompletionReport(tt.json)
+ if got != tt.expected {
+ t.Errorf("isCompletionReport(%q) = %v, want %v", tt.json, got, tt.expected)
+ }
+ })
+ }
+}
+
+func TestTailFile_ReturnsLastNLines(t *testing.T) {
+ f, err := os.CreateTemp("", "tailfile-*")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.Remove(f.Name())
+ for i := 1; i <= 30; i++ {
+ fmt.Fprintf(f, "line %d\n", i)
+ }
+ f.Close()
+
+ got := tailFile(f.Name(), 5)
+ lines := strings.Split(strings.TrimSpace(got), "\n")
+ if len(lines) != 5 {
+ t.Fatalf("want 5 lines, got %d: %q", len(lines), got)
+ }
+ if lines[0] != "line 26" || lines[4] != "line 30" {
+ t.Errorf("want lines 26-30, got: %q", got)
+ }
+}
+
+func TestGitSafe_PrependsSafeDirectory(t *testing.T) {
+ got := gitSafe("-C", "/some/path", "status")
+ want := []string{"-c", "safe.directory=*", "-C", "/some/path", "status"}
+ if len(got) != len(want) {
+ t.Fatalf("gitSafe() = %v, want %v", got, want)
+ }
+ for i := range want {
+ if got[i] != want[i] {
+ t.Errorf("gitSafe()[%d] = %q, want %q", i, got[i], want[i])
+ }
+ }
+}
diff --git a/internal/executor/executor_test.go b/internal/executor/executor_test.go
index 878a32d..e91d435 100644
--- a/internal/executor/executor_test.go
+++ b/internal/executor/executor_test.go
@@ -600,10 +600,17 @@ func TestPool_RecoverStaleRunning(t *testing.T) {
// Execution record should be closed as FAILED.
execs, _ := store.ListExecutions(tk.ID)
- if len(execs) == 0 || execs[0].Status != "FAILED" {
+ var failedExec *storage.Execution
+ for _, e := range execs {
+ if e.ID == "exec-stale-1" {
+ failedExec = e
+ break
+ }
+ }
+ if failedExec == nil || failedExec.Status != "FAILED" {
t.Errorf("execution status: want FAILED, got %+v", execs)
}
- if execs[0].ErrorMsg == "" {
+ if failedExec.ErrorMsg == "" {
t.Error("expected non-empty error message on recovered execution")
}
diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go
deleted file mode 100644
index b1a245c..0000000
--- a/internal/executor/gemini.go
+++ /dev/null
@@ -1,228 +0,0 @@
-package executor
-
-import (
- "context"
- "encoding/json"
- "fmt"
- "io"
- "log/slog"
- "os"
- "path/filepath"
- "strings"
- "sync"
-
- "github.com/thepeterstone/claudomator/internal/storage"
- "github.com/thepeterstone/claudomator/internal/task"
-)
-
-// GeminiRunner spawns the `gemini` CLI in non-interactive mode.
-type GeminiRunner struct {
- BinaryPath string // defaults to "gemini"
- Logger *slog.Logger
- LogDir string // base directory for execution logs
- APIURL string // base URL of the Claudomator API, passed to subprocesses
- DropsDir string // path to the drops directory, passed to subprocesses
-}
-
-// ExecLogDir returns the log directory for the given execution ID.
-func (r *GeminiRunner) ExecLogDir(execID string) string {
- if r.LogDir == "" {
- return ""
- }
- return filepath.Join(r.LogDir, execID)
-}
-
-func (r *GeminiRunner) binaryPath() string {
- if r.BinaryPath != "" {
- return r.BinaryPath
- }
- return "gemini"
-}
-
-// Run executes a gemini <instructions> invocation, streaming output to log files.
-func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
- if t.Agent.ProjectDir != "" {
- if _, err := os.Stat(t.Agent.ProjectDir); err != nil {
- return fmt.Errorf("project_dir %q: %w", t.Agent.ProjectDir, err)
- }
- }
-
- logDir := r.ExecLogDir(e.ID)
- if logDir == "" {
- logDir = e.ID
- }
- if err := os.MkdirAll(logDir, 0700); err != nil {
- return fmt.Errorf("creating log dir: %w", err)
- }
-
- if e.StdoutPath == "" {
- e.StdoutPath = filepath.Join(logDir, "stdout.log")
- e.StderrPath = filepath.Join(logDir, "stderr.log")
- e.ArtifactDir = logDir
- }
-
- if e.SessionID == "" {
- e.SessionID = e.ID
- }
-
- questionFile := filepath.Join(logDir, "question.json")
- args := r.buildArgs(t, e, questionFile)
-
- // Gemini CLI doesn't necessarily have the same rate limiting behavior as Claude,
- // but we'll use a similar execution pattern.
- err := r.execOnce(ctx, args, t.Agent.ProjectDir, t.Agent.ProjectDir, e)
- if err != nil {
- return err
- }
-
- // Check whether the agent left a question before exiting.
- data, readErr := os.ReadFile(questionFile)
- if readErr == nil {
- os.Remove(questionFile) // consumed
- return &BlockedError{QuestionJSON: strings.TrimSpace(string(data)), SessionID: e.SessionID}
- }
- return nil
-}
-
-func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, projectDir string, e *storage.Execution) error {
- // Temporarily bypass external command execution to debug pipe.
- // We will simulate outputting to stdoutW directly.
-
- stdoutFile, err := os.Create(e.StdoutPath)
- if err != nil {
- return fmt.Errorf("creating stdout log: %w", err)
- }
- defer stdoutFile.Close()
-
- stderrFile, err := os.Create(e.StderrPath)
- if err != nil {
- return fmt.Errorf("creating stderr log: %w", err)
- }
- defer stderrFile.Close()
-
- stdoutR, stdoutW, err := os.Pipe()
- if err != nil {
- return fmt.Errorf("creating stdout pipe: %w", err)
- }
-
- // Simulate writing to stdoutW
- go func() {
- defer stdoutW.Close() // Close the writer when done.
- fmt.Fprintf(stdoutW, "```json\n")
- fmt.Fprintf(stdoutW, "{\"type\":\"content_block_start\",\"content_block\":{\"text\":\"Hello, Gemini!\",\"type\":\"text\"}}\n")
- fmt.Fprintf(stdoutW, "{\"type\":\"content_block_delta\",\"content_block\":{\"text\":\" How are you?\"}}\n")
- fmt.Fprintf(stdoutW, "{\"type\":\"content_block_end\"}\n")
- fmt.Fprintf(stdoutW, "{\"type\":\"message_delta\",\"message\":{\"role\":\"model\"}}\n")
- fmt.Fprintf(stdoutW, "{\"type\":\"message_end\"}\n")
- fmt.Fprintf(stdoutW, "```\n")
- }()
-
-
- var streamErr error
- var wg sync.WaitGroup
- wg.Add(1)
- go func() {
- defer wg.Done()
- _, streamErr = parseGeminiStream(stdoutR, stdoutFile, r.Logger)
- stdoutR.Close()
- }()
-
- wg.Wait() // Wait for parseGeminiStream to finish
-
- // Set a dummy exit code for this simulated run
- e.ExitCode = 0
-
- if streamErr != nil {
- return streamErr
- }
- return nil
-}
-
-// parseGeminiStream reads streaming JSON from the gemini CLI, unwraps markdown
-// code blocks, writes the inner JSON to w, and returns (costUSD, error).
-// For now, it focuses on unwrapping and writing, not detailed parsing of cost/errors.
-func parseGeminiStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, error) {
- fullOutput, err := io.ReadAll(r)
- if err != nil {
- return 0, fmt.Errorf("reading full gemini output: %w", err)
- }
- logger.Debug("parseGeminiStream: raw output received", "output", string(fullOutput))
-
- // Default: write raw content as-is (preserves trailing newline).
- jsonContent := string(fullOutput)
-
- // Unwrap markdown code fences if present.
- trimmed := strings.TrimSpace(jsonContent)
- if jsonStartIdx := strings.Index(trimmed, "```json"); jsonStartIdx != -1 {
- if jsonEndIdx := strings.LastIndex(trimmed, "```"); jsonEndIdx != -1 && jsonEndIdx > jsonStartIdx {
- inner := trimmed[jsonStartIdx+len("```json") : jsonEndIdx]
- jsonContent = strings.TrimSpace(inner) + "\n"
- } else {
- logger.Warn("malformed markdown JSON block from Gemini, falling back to raw output", "outputLength", len(jsonContent))
- }
- }
-
- // Write the (possibly extracted) JSON content to the writer.
- if _, writeErr := w.Write([]byte(jsonContent)); writeErr != nil {
- return 0, fmt.Errorf("writing extracted gemini json: %w", writeErr)
- }
-
- // Parse each line for result type to extract cost and execution errors.
- var resultErr error
- var costUSD float64
- for _, line := range strings.Split(jsonContent, "\n") {
- line = strings.TrimSpace(line)
- if line == "" {
- continue
- }
- var msg struct {
- Type string `json:"type"`
- IsError bool `json:"is_error"`
- Result string `json:"result"`
- Cost float64 `json:"total_cost_usd"`
- }
- if err := json.Unmarshal([]byte(line), &msg); err != nil {
- continue
- }
- if msg.Type == "result" {
- costUSD = msg.Cost
- if msg.IsError {
- resultErr = fmt.Errorf("gemini execution error: %s", msg.Result)
- }
- }
- }
-
- return costUSD, resultErr
-}
-
-func (r *GeminiRunner) buildArgs(t *task.Task, e *storage.Execution, questionFile string) []string {
- // Gemini CLI uses a different command structure: gemini "instructions" [flags]
-
- instructions := t.Agent.Instructions
- if !t.Agent.SkipPlanning {
- instructions = withPlanningPreamble(instructions)
- }
-
- args := []string{
- "-p", instructions,
- "--output-format", "stream-json",
- "--yolo", // auto-approve all tools (equivalent to Claude's bypassPermissions)
- }
-
- // Note: Gemini CLI flags might differ from Claude CLI.
- // Assuming common flags for now, but these may need adjustment.
- if t.Agent.Model != "" {
- args = append(args, "--model", t.Agent.Model)
- }
-
- // Gemini CLI doesn't use --session-id for the first run in the same way,
- // or it might use it differently. For now we assume compatibility.
- if e.SessionID != "" {
- // If it's a resume, it might use different flags.
- if e.ResumeSessionID != "" {
- // This is a placeholder for Gemini's resume logic
- }
- }
-
- return args
-}
diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go
deleted file mode 100644
index 4b0339e..0000000
--- a/internal/executor/gemini_test.go
+++ /dev/null
@@ -1,179 +0,0 @@
-package executor
-
-import (
- "bytes"
- "context"
- "io"
- "log/slog"
- "strings"
- "testing"
-
- "github.com/thepeterstone/claudomator/internal/storage"
- "github.com/thepeterstone/claudomator/internal/task"
-)
-
-func TestGeminiRunner_BuildArgs_BasicTask(t *testing.T) {
- r := &GeminiRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "gemini",
- Instructions: "fix the bug",
- Model: "gemini-2.5-flash-lite",
- SkipPlanning: true,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- // Gemini CLI: instructions passed via -p for non-interactive mode
- if len(args) < 2 || args[0] != "-p" || args[1] != "fix the bug" {
- t.Errorf("expected -p <instructions> as first args, got: %v", args)
- }
-
- argMap := make(map[string]bool)
- for _, a := range args {
- argMap[a] = true
- }
- for _, want := range []string{"--output-format", "stream-json", "--model", "gemini-2.5-flash-lite"} {
- if !argMap[want] {
- t.Errorf("missing arg %q in %v", want, args)
- }
- }
-}
-
-func TestGeminiRunner_BuildArgs_PreamblePrepended(t *testing.T) {
- r := &GeminiRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "gemini",
- Instructions: "fix the bug",
- SkipPlanning: false,
- },
- }
-
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
-
- if len(args) < 2 || args[0] != "-p" {
- t.Fatalf("expected -p <instructions> as first args, got: %v", args)
- }
- if !strings.HasPrefix(args[1], planningPreamble) {
- t.Errorf("instructions should start with planning preamble")
- }
- if !strings.HasSuffix(args[1], "fix the bug") {
- t.Errorf("instructions should end with original instructions")
- }
-}
-
-func TestGeminiRunner_BuildArgs_IncludesYolo(t *testing.T) {
- r := &GeminiRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "gemini",
- Instructions: "write a doc",
- SkipPlanning: true,
- },
- }
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
- argMap := make(map[string]bool)
- for _, a := range args {
- argMap[a] = true
- }
- if !argMap["--yolo"] {
- t.Errorf("expected --yolo in gemini args (enables all tools); got: %v", args)
- }
-}
-
-func TestGeminiRunner_BuildArgs_IncludesPromptFlag(t *testing.T) {
- r := &GeminiRunner{}
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "gemini",
- Instructions: "do the thing",
- SkipPlanning: true,
- },
- }
- args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
- // Instructions must be passed via -p/--prompt for non-interactive headless mode,
- // not as a bare positional (which starts interactive mode).
- found := false
- for i, a := range args {
- if (a == "-p" || a == "--prompt") && i+1 < len(args) && args[i+1] == "do the thing" {
- found = true
- break
- }
- }
- if !found {
- t.Errorf("expected instructions passed via -p/--prompt flag; got: %v", args)
- }
-}
-
-func TestGeminiRunner_Run_InaccessibleProjectDir_ReturnsError(t *testing.T) {
- r := &GeminiRunner{
- BinaryPath: "true", // would succeed if it ran
- Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
- LogDir: t.TempDir(),
- }
- tk := &task.Task{
- Agent: task.AgentConfig{
- Type: "gemini",
- ProjectDir: "/nonexistent/path/does/not/exist",
- SkipPlanning: true,
- },
- }
- exec := &storage.Execution{ID: "test-exec"}
-
- err := r.Run(context.Background(), tk, exec)
-
- if err == nil {
- t.Fatal("expected error for inaccessible project_dir, got nil")
- }
- if !strings.Contains(err.Error(), "project_dir") {
- t.Errorf("expected 'project_dir' in error, got: %v", err)
- }
-}
-
-func TestGeminiRunner_BinaryPath_Default(t *testing.T) {
- r := &GeminiRunner{}
- if r.binaryPath() != "gemini" {
- t.Errorf("want 'gemini', got %q", r.binaryPath())
- }
-}
-
-func TestGeminiRunner_BinaryPath_Custom(t *testing.T) {
- r := &GeminiRunner{BinaryPath: "/usr/local/bin/gemini"}
- if r.binaryPath() != "/usr/local/bin/gemini" {
- t.Errorf("want custom path, got %q", r.binaryPath())
- }
-}
-
-
-func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) {
- // Simulate a stream-json input with various message types, including a result with error and cost.
- input := streamLine(`{"type":"content_block_start","content_block":{"text":"Hello,"}}`) +
- streamLine(`{"type":"content_block_delta","content_block":{"text":" World!"}}`) +
- streamLine(`{"type":"content_block_end"}`) +
- streamLine(`{"type":"result","subtype":"error_during_execution","is_error":true,"result":"something went wrong","total_cost_usd":0.123}`)
-
- reader := strings.NewReader(input)
- var writer bytes.Buffer // To capture what's written to the output log
- logger := slog.New(slog.NewTextHandler(io.Discard, nil))
-
- cost, err := parseGeminiStream(reader, &writer, logger)
-
- if err == nil {
- t.Errorf("expected an error, got nil")
- }
- if !strings.Contains(err.Error(), "something went wrong") {
- t.Errorf("expected error message to contain 'something went wrong', got: %v", err)
- }
-
- if cost != 0.123 {
- t.Errorf("expected cost 0.123, got %f", cost)
- }
-
- // Verify that the writer received the content (even if parseGeminiStream isn't fully parsing it yet)
- expectedWriterContent := input
- if writer.String() != expectedWriterContent {
- t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String())
- }
-}
diff --git a/internal/executor/helpers.go b/internal/executor/helpers.go
new file mode 100644
index 0000000..9e4530b
--- /dev/null
+++ b/internal/executor/helpers.go
@@ -0,0 +1,174 @@
+package executor
+
+import (
+ "bufio"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log/slog"
+ "os"
+ "strings"
+)
+
+// BlockedError is returned by Run when the agent wrote a question file and exited.
+// The pool transitions the task to BLOCKED and stores the question for the user.
+type BlockedError struct {
+ QuestionJSON string // raw JSON from the question file
+ SessionID string // claude session to resume once the user answers
+ SandboxDir string // preserved sandbox path; resume must run here so Claude finds its session files
+}
+
+func (e *BlockedError) Error() string { return fmt.Sprintf("task blocked: %s", e.QuestionJSON) }
+
+// parseStream reads streaming JSON from claude, writes to w, and returns
+// (costUSD, error). error is non-nil if the stream signals task failure:
+// - result message has is_error:true
+// - a tool_result was denied due to missing permissions
+func parseStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, string, error) {
+ tee := io.TeeReader(r, w)
+ scanner := bufio.NewScanner(tee)
+ scanner.Buffer(make([]byte, 1024*1024), 1024*1024) // 1MB buffer for large lines
+
+ var totalCost float64
+ var sessionID string
+ var streamErr error
+
+Loop:
+ for scanner.Scan() {
+ line := scanner.Bytes()
+ var msg map[string]interface{}
+ if err := json.Unmarshal(line, &msg); err != nil {
+ continue
+ }
+
+ msgType, _ := msg["type"].(string)
+ switch msgType {
+ case "system":
+ if subtype, ok := msg["subtype"].(string); ok && subtype == "init" {
+ if sid, ok := msg["session_id"].(string); ok {
+ sessionID = sid
+ }
+ }
+ case "rate_limit_event":
+ if info, ok := msg["rate_limit_info"].(map[string]interface{}); ok {
+ status, _ := info["status"].(string)
+ if status == "rejected" {
+ streamErr = fmt.Errorf("claude rate limit reached (rejected): %v", msg)
+ // Immediately break since we can't continue anyway
+ break Loop
+ }
+ }
+ case "assistant":
+ if errStr, ok := msg["error"].(string); ok && errStr == "rate_limit" {
+ streamErr = fmt.Errorf("claude rate limit reached: %v", msg)
+ }
+ case "result":
+ if isErr, _ := msg["is_error"].(bool); isErr {
+ result, _ := msg["result"].(string)
+ if result != "" {
+ streamErr = fmt.Errorf("claude task failed: %s", result)
+ } else {
+ streamErr = fmt.Errorf("claude task failed (is_error=true in result)")
+ }
+ }
+ // Prefer total_cost_usd from result message; fall through to legacy check below.
+ if cost, ok := msg["total_cost_usd"].(float64); ok {
+ totalCost = cost
+ }
+ case "user":
+ // Detect permission-denial tool_results. These occur when permission_mode
+ // is not bypassPermissions and claude exits 0 without completing its task.
+ if err := permissionDenialError(msg); err != nil && streamErr == nil {
+ streamErr = err
+ }
+ }
+
+ // Legacy cost field used by older claude versions.
+ if cost, ok := msg["cost_usd"].(float64); ok {
+ totalCost = cost
+ }
+ }
+
+ return totalCost, sessionID, streamErr
+}
+
+
+// permissionDenialError inspects a "user" stream message for tool_result entries
+// that were denied due to missing permissions. Returns an error if found.
+func permissionDenialError(msg map[string]interface{}) error {
+ message, ok := msg["message"].(map[string]interface{})
+ if !ok {
+ return nil
+ }
+ content, ok := message["content"].([]interface{})
+ if !ok {
+ return nil
+ }
+ for _, item := range content {
+ itemMap, ok := item.(map[string]interface{})
+ if !ok {
+ continue
+ }
+ if itemMap["type"] != "tool_result" {
+ continue
+ }
+ if isErr, _ := itemMap["is_error"].(bool); !isErr {
+ continue
+ }
+ text, _ := itemMap["content"].(string)
+ if strings.Contains(text, "requested permissions") || strings.Contains(text, "haven't granted") {
+ return fmt.Errorf("permission denied by host: %s", text)
+ }
+ }
+ return nil
+}
+
+// tailFile returns the last n lines of the file at path, or empty string if
+// the file cannot be read. Used to surface subprocess stderr on failure.
+func tailFile(path string, n int) string {
+ f, err := os.Open(path)
+ if err != nil {
+ return ""
+ }
+ defer f.Close()
+
+ var lines []string
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ lines = append(lines, scanner.Text())
+ if len(lines) > n {
+ lines = lines[1:]
+ }
+ }
+ return strings.Join(lines, "\n")
+}
+
+func gitSafe(args ...string) []string {
+ return append([]string{"-c", "safe.directory=*"}, args...)
+}
+
+// isCompletionReport returns true when a question-file JSON looks like a
+// completion report rather than a real user question. Heuristic: no options
+// (or empty options) and no "?" anywhere in the text.
+func isCompletionReport(questionJSON string) bool {
+ var q struct {
+ Text string `json:"text"`
+ Options []string `json:"options"`
+ }
+ if err := json.Unmarshal([]byte(questionJSON), &q); err != nil {
+ return false
+ }
+ return len(q.Options) == 0 && !strings.Contains(q.Text, "?")
+}
+
+// extractQuestionText returns the "text" field from a question-file JSON, or
+// the raw string if parsing fails.
+func extractQuestionText(questionJSON string) string {
+ var q struct {
+ Text string `json:"text"`
+ }
+ if err := json.Unmarshal([]byte(questionJSON), &q); err != nil {
+ return questionJSON
+ }
+ return strings.TrimSpace(q.Text)
+}
diff --git a/internal/executor/stream_test.go b/internal/executor/stream_test.go
index 10eb858..11a6178 100644
--- a/internal/executor/stream_test.go
+++ b/internal/executor/stream_test.go
@@ -12,7 +12,7 @@ func streamLine(json string) string { return json + "\n" }
func TestParseStream_ResultIsError_ReturnsError(t *testing.T) {
input := streamLine(`{"type":"result","subtype":"error_during_execution","is_error":true,"result":"something went wrong"}`)
- _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
+ _, _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
if err == nil {
t.Fatal("expected error when result.is_error=true, got nil")
}
@@ -27,7 +27,7 @@ func TestParseStream_PermissionDenied_ReturnsError(t *testing.T) {
input := streamLine(`{"type":"user","message":{"role":"user","content":[{"type":"tool_result","is_error":true,"content":"Claude requested permissions to write to /foo/bar.go, but you haven't granted it yet.","tool_use_id":"tu_abc"}]}}`) +
streamLine(`{"type":"result","subtype":"success","is_error":false,"result":"I need permission","total_cost_usd":0.1}`)
- _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
+ _, _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
if err == nil {
t.Fatal("expected error for permission denial, got nil")
}
@@ -40,7 +40,7 @@ func TestParseStream_Success_ReturnsNilError(t *testing.T) {
input := streamLine(`{"type":"assistant","message":{"content":[{"type":"text","text":"Done."}]}}`) +
streamLine(`{"type":"result","subtype":"success","is_error":false,"result":"All tests pass.","total_cost_usd":0.05}`)
- _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
+ _, _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
if err != nil {
t.Fatalf("expected nil error for success stream, got: %v", err)
}
@@ -49,7 +49,7 @@ func TestParseStream_Success_ReturnsNilError(t *testing.T) {
func TestParseStream_ExtractsCostFromResultMessage(t *testing.T) {
input := streamLine(`{"type":"result","subtype":"success","is_error":false,"result":"done","total_cost_usd":1.2345}`)
- cost, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
+ cost, _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@@ -62,7 +62,7 @@ func TestParseStream_ExtractsCostFromLegacyCostUSD(t *testing.T) {
// Some versions emit cost_usd at the top level rather than total_cost_usd.
input := streamLine(`{"type":"result","subtype":"success","is_error":false,"result":"done","cost_usd":0.99}`)
- cost, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
+ cost, _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@@ -78,8 +78,21 @@ func TestParseStream_NonToolResultIsError_DoesNotFail(t *testing.T) {
input := streamLine(`{"type":"user","message":{"role":"user","content":[{"type":"tool_result","is_error":true,"content":"exit status 1","tool_use_id":"tu_xyz"}]}}`) +
streamLine(`{"type":"result","subtype":"success","is_error":false,"result":"Fixed it.","total_cost_usd":0.2}`)
- _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
+ _, _, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
if err != nil {
t.Fatalf("non-permission tool errors should not fail the task, got: %v", err)
}
}
+
+func TestParseStream_ExtractsSessionID(t *testing.T) {
+ input := streamLine(`{"type":"system","subtype":"init","session_id":"sess-999"}`) +
+ streamLine(`{"type":"result","subtype":"success","is_error":false,"result":"ok","total_cost_usd":0.01}`)
+
+ _, sid, err := parseStream(strings.NewReader(input), io.Discard, slog.New(slog.NewTextHandler(io.Discard, nil)))
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if sid != "sess-999" {
+ t.Errorf("want session ID sess-999, got %q", sid)
+ }
+}
diff --git a/internal/notify/vapid.go b/internal/notify/vapid.go
index d93a090..684bf4d 100644
--- a/internal/notify/vapid.go
+++ b/internal/notify/vapid.go
@@ -1,6 +1,10 @@
package notify
-import webpush "github.com/SherClockHolmes/webpush-go"
+import (
+ "encoding/base64"
+
+ webpush "github.com/SherClockHolmes/webpush-go"
+)
// GenerateVAPIDKeys generates a VAPID key pair for web push notifications.
// Returns the base64url-encoded public and private keys.
@@ -9,3 +13,13 @@ func GenerateVAPIDKeys() (publicKey, privateKey string, err error) {
privateKey, publicKey, err = webpush.GenerateVAPIDKeys()
return
}
+
+// ValidateVAPIDPublicKey reports whether key is a valid VAPID public key:
+// a base64url-encoded 65-byte uncompressed P-256 point (starts with 0x04).
+func ValidateVAPIDPublicKey(key string) bool {
+ b, err := base64.RawURLEncoding.DecodeString(key)
+ if err != nil {
+ return false
+ }
+ return len(b) == 65 && b[0] == 0x04
+}
diff --git a/internal/notify/vapid_test.go b/internal/notify/vapid_test.go
index 6157854..a45047d 100644
--- a/internal/notify/vapid_test.go
+++ b/internal/notify/vapid_test.go
@@ -5,6 +5,27 @@ import (
"testing"
)
+// TestValidateVAPIDPublicKey verifies that ValidateVAPIDPublicKey accepts valid
+// public keys and rejects private keys, empty strings, and invalid base64.
+func TestValidateVAPIDPublicKey(t *testing.T) {
+ pub, priv, err := GenerateVAPIDKeys()
+ if err != nil {
+ t.Fatalf("GenerateVAPIDKeys: %v", err)
+ }
+ if !ValidateVAPIDPublicKey(pub) {
+ t.Error("valid public key should pass validation")
+ }
+ if ValidateVAPIDPublicKey(priv) {
+ t.Error("private key (32 bytes) should fail public key validation")
+ }
+ if ValidateVAPIDPublicKey("") {
+ t.Error("empty string should fail validation")
+ }
+ if ValidateVAPIDPublicKey("notbase64!!!") {
+ t.Error("invalid base64 should fail validation")
+ }
+}
+
// TestGenerateVAPIDKeys_PublicKeyIs65Bytes verifies that the public key returned
// by GenerateVAPIDKeys is a 65-byte uncompressed P256 EC point (base64url, no padding = 87 chars)
// and the private key is 32 bytes (43 chars). Previously the return values were swapped.
diff --git a/internal/storage/db.go b/internal/storage/db.go
index 25801b2..8bc9864 100644
--- a/internal/storage/db.go
+++ b/internal/storage/db.go
@@ -87,6 +87,7 @@ func (s *DB) migrate() error {
`ALTER TABLE executions ADD COLUMN commits_json TEXT NOT NULL DEFAULT '[]'`,
`ALTER TABLE tasks ADD COLUMN elaboration_input TEXT`,
`ALTER TABLE tasks ADD COLUMN project TEXT`,
+ `ALTER TABLE tasks ADD COLUMN repository_url TEXT`,
`CREATE TABLE IF NOT EXISTS push_subscriptions (
id TEXT PRIMARY KEY,
endpoint TEXT NOT NULL UNIQUE,
@@ -135,9 +136,9 @@ func (s *DB) CreateTask(t *task.Task) error {
}
_, err = s.db.Exec(`
- INSERT INTO tasks (id, name, description, elaboration_input, project, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
- t.ID, t.Name, t.Description, t.ElaborationInput, t.Project, string(configJSON), string(t.Priority),
+ INSERT INTO tasks (id, name, description, elaboration_input, project, repository_url, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+ t.ID, t.Name, t.Description, t.ElaborationInput, t.Project, t.RepositoryURL, string(configJSON), string(t.Priority),
t.Timeout.Duration.Nanoseconds(), string(retryJSON), string(tagsJSON), string(depsJSON),
t.ParentTaskID, string(t.State), t.CreatedAt.UTC(), t.UpdatedAt.UTC(),
)
@@ -146,13 +147,13 @@ func (s *DB) CreateTask(t *task.Task) error {
// GetTask retrieves a task by ID.
func (s *DB) GetTask(id string) (*task.Task, error) {
- row := s.db.QueryRow(`SELECT id, name, description, elaboration_input, project, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at, rejection_comment, question_json, summary, interactions_json FROM tasks WHERE id = ?`, id)
+ row := s.db.QueryRow(`SELECT id, name, description, elaboration_input, project, repository_url, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at, rejection_comment, question_json, summary, interactions_json FROM tasks WHERE id = ?`, id)
return scanTask(row)
}
// ListTasks returns tasks matching the given filter.
func (s *DB) ListTasks(filter TaskFilter) ([]*task.Task, error) {
- query := `SELECT id, name, description, elaboration_input, project, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at, rejection_comment, question_json, summary, interactions_json FROM tasks WHERE 1=1`
+ query := `SELECT id, name, description, elaboration_input, project, repository_url, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at, rejection_comment, question_json, summary, interactions_json FROM tasks WHERE 1=1`
var args []interface{}
if filter.State != "" {
@@ -188,7 +189,7 @@ func (s *DB) ListTasks(filter TaskFilter) ([]*task.Task, error) {
// ListSubtasks returns all tasks whose parent_task_id matches the given ID.
func (s *DB) ListSubtasks(parentID string) ([]*task.Task, error) {
- rows, err := s.db.Query(`SELECT id, name, description, elaboration_input, project, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at, rejection_comment, question_json, summary, interactions_json FROM tasks WHERE parent_task_id = ? ORDER BY created_at ASC`, parentID)
+ rows, err := s.db.Query(`SELECT id, name, description, elaboration_input, project, repository_url, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at, rejection_comment, question_json, summary, interactions_json FROM tasks WHERE parent_task_id = ? ORDER BY created_at ASC`, parentID)
if err != nil {
return nil, err
}
@@ -241,7 +242,7 @@ func (s *DB) ResetTaskForRetry(id string) (*task.Task, error) {
}
defer tx.Rollback() //nolint:errcheck
- t, err := scanTask(tx.QueryRow(`SELECT id, name, description, elaboration_input, project, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at, rejection_comment, question_json, summary, interactions_json FROM tasks WHERE id = ?`, id))
+ t, err := scanTask(tx.QueryRow(`SELECT id, name, description, elaboration_input, project, repository_url, config_json, priority, timeout_ns, retry_json, tags_json, depends_on_json, parent_task_id, state, created_at, updated_at, rejection_comment, question_json, summary, interactions_json FROM tasks WHERE id = ?`, id))
if err != nil {
if err == sql.ErrNoRows {
return nil, fmt.Errorf("task %q not found", id)
@@ -688,15 +689,17 @@ func scanTask(row scanner) (*task.Task, error) {
parentTaskID sql.NullString
elaborationInput sql.NullString
project sql.NullString
+ repositoryURL sql.NullString
rejectionComment sql.NullString
questionJSON sql.NullString
summary sql.NullString
interactionsJSON sql.NullString
)
- err := row.Scan(&t.ID, &t.Name, &t.Description, &elaborationInput, &project, &configJSON, &priority, &timeoutNS, &retryJSON, &tagsJSON, &depsJSON, &parentTaskID, &state, &t.CreatedAt, &t.UpdatedAt, &rejectionComment, &questionJSON, &summary, &interactionsJSON)
+ err := row.Scan(&t.ID, &t.Name, &t.Description, &elaborationInput, &project, &repositoryURL, &configJSON, &priority, &timeoutNS, &retryJSON, &tagsJSON, &depsJSON, &parentTaskID, &state, &t.CreatedAt, &t.UpdatedAt, &rejectionComment, &questionJSON, &summary, &interactionsJSON)
t.ParentTaskID = parentTaskID.String
t.ElaborationInput = elaborationInput.String
t.Project = project.String
+ t.RepositoryURL = repositoryURL.String
t.RejectionComment = rejectionComment.String
t.QuestionJSON = questionJSON.String
t.Summary = summary.String
diff --git a/internal/task/task.go b/internal/task/task.go
index 3a04716..465de8b 100644
--- a/internal/task/task.go
+++ b/internal/task/task.go
@@ -32,7 +32,9 @@ type AgentConfig struct {
Model string `yaml:"model" json:"model"`
ContextFiles []string `yaml:"context_files" json:"context_files"`
Instructions string `yaml:"instructions" json:"instructions"`
- ProjectDir string `yaml:"project_dir" json:"project_dir"`
+ RepositoryURL string `yaml:"repository_url" json:"repository_url"`
+ ContainerImage string `yaml:"container_image" json:"container_image"`
+ ProjectDir string `yaml:"project_dir" json:"project_dir"` // Deprecated: use Task.RepositoryURL
MaxBudgetUSD float64 `yaml:"max_budget_usd" json:"max_budget_usd"`
PermissionMode string `yaml:"permission_mode" json:"permission_mode"`
AllowedTools []string `yaml:"allowed_tools" json:"allowed_tools"`
@@ -74,7 +76,8 @@ type Task struct {
ParentTaskID string `yaml:"parent_task_id" json:"parent_task_id"`
Name string `yaml:"name" json:"name"`
Description string `yaml:"description" json:"description"`
- Project string `yaml:"project" json:"project"`
+ Project string `yaml:"project" json:"project"` // Human-readable project name
+ RepositoryURL string `yaml:"repository_url" json:"repository_url"`
Agent AgentConfig `yaml:"agent" json:"agent"`
Timeout Duration `yaml:"timeout" json:"timeout"`
Retry RetryConfig `yaml:"retry" json:"retry"`
diff --git a/scripts/drain-failed-tasks b/scripts/drain-failed-tasks
new file mode 100644
index 0000000..4bb6992
--- /dev/null
+++ b/scripts/drain-failed-tasks
@@ -0,0 +1,22 @@
+#!/bin/bash
+# drain-failed-tasks — retry failed tasks by running start-next-task every 5 minutes
+# Usage: ./scripts/drain-failed-tasks [iterations]
+# Default: 29 iterations
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+ITERATIONS="${1:-29}"
+INTERVAL=300 # 5 minutes
+
+echo "Running start-next-task every ${INTERVAL}s for ${ITERATIONS} iterations"
+
+for ((i=1; i<=ITERATIONS; i++)); do
+ echo "[$(date '+%H:%M:%S')] Iteration ${i}/${ITERATIONS}"
+ "$SCRIPT_DIR/start-next-task" || true
+ if [[ $i -lt $ITERATIONS ]]; then
+ sleep "$INTERVAL"
+ fi
+done
+
+echo "[$(date '+%H:%M:%S')] Done."
diff --git a/web/app.js b/web/app.js
index f785605..6ddb23c 100644
--- a/web/app.js
+++ b/web/app.js
@@ -407,6 +407,14 @@ export function setTaskFilterTab(tab) {
localStorage.setItem('taskFilterTab', tab);
}
+export function getActiveMainTab() {
+ return localStorage.getItem('activeMainTab') ?? 'queue';
+}
+
+export function setActiveMainTab(tab) {
+ localStorage.setItem('activeMainTab', tab);
+}
+
// ── Tab badge counts ───────────────────────────────────────────────────────────
/**
@@ -1470,12 +1478,13 @@ function buildValidatePayload() {
const f = document.getElementById('task-form');
const name = f.querySelector('[name="name"]').value;
const instructions = f.querySelector('[name="instructions"]').value;
- const project_dir = f.querySelector('#project-select').value;
+ const repository_url = document.getElementById('repository-url').value;
+ const container_image = document.getElementById('container-image').value;
const allowedToolsEl = f.querySelector('[name="allowed_tools"]');
const allowed_tools = allowedToolsEl
? allowedToolsEl.value.split(',').map(s => s.trim()).filter(Boolean)
: [];
- return { name, agent: { instructions, project_dir, allowed_tools } };
+ return { name, repository_url, agent: { instructions, container_image, allowed_tools } };
}
function renderValidationResult(result) {
@@ -1533,49 +1542,6 @@ function renderValidationResult(result) {
async function openTaskModal() {
document.getElementById('task-modal').showModal();
- await populateProjectSelect();
-}
-
-async function populateProjectSelect() {
- const select = document.getElementById('project-select');
- const current = select.value;
- try {
- const res = await fetch(`${API_BASE}/api/workspaces`);
- const dirs = await res.json();
- select.innerHTML = '';
- dirs.forEach(dir => {
- const opt = document.createElement('option');
- opt.value = dir;
- opt.textContent = dir;
- if (dir === current || dir === '/workspace/claudomator') opt.selected = true;
- select.appendChild(opt);
- });
- } catch {
- // keep whatever options are already there
- }
- // Ensure "Create new project…" option is always last
- const newOpt = document.createElement('option');
- newOpt.value = '__new__';
- newOpt.textContent = 'Create new project…';
- select.appendChild(newOpt);
-}
-
-function initProjectSelect() {
- const select = document.getElementById('project-select');
- const newRow = document.getElementById('new-project-row');
- const newInput = document.getElementById('new-project-input');
- if (!select) return;
- select.addEventListener('change', () => {
- if (select.value === '__new__') {
- newRow.hidden = false;
- newInput.required = true;
- newInput.focus();
- } else {
- newRow.hidden = true;
- newInput.required = false;
- newInput.value = '';
- }
- });
}
function closeTaskModal() {
@@ -1589,20 +1555,20 @@ function closeTaskModal() {
}
async function createTask(formData) {
- const selectVal = formData.get('project_dir');
- const workingDir = selectVal === '__new__'
- ? document.getElementById('new-project-input').value.trim()
- : selectVal;
+ const repository_url = formData.get('repository_url');
+ const container_image = formData.get('container_image');
const elaboratePromptEl = document.getElementById('elaborate-prompt');
const elaborationInput = elaboratePromptEl ? elaboratePromptEl.value.trim() : '';
const body = {
name: formData.get('name'),
description: '',
elaboration_input: elaborationInput || undefined,
+ repository_url: repository_url,
agent: {
instructions: formData.get('instructions'),
- project_dir: workingDir,
+ container_image: container_image,
max_budget_usd: parseFloat(formData.get('max_budget_usd')),
+ type: 'container',
},
timeout: formData.get('timeout'),
priority: formData.get('priority'),
@@ -1743,7 +1709,7 @@ function makeMetaItem(label, valueText, opts = {}) {
return item;
}
-function renderTaskPanel(task, executions) {
+export function renderTaskPanel(task, executions) {
document.getElementById('task-panel-title').textContent = task.name;
const content = document.getElementById('task-panel-content');
content.innerHTML = '';
@@ -2729,6 +2695,8 @@ async function renderDropsPanel() {
// ── Tab switching ─────────────────────────────────────────────────────────────
function switchTab(name) {
+ setActiveMainTab(name);
+
// Update tab button active state
document.querySelectorAll('.tab').forEach(btn => {
btn.classList.toggle('active', btn.dataset.tab === name);
@@ -2749,169 +2717,161 @@ function switchTab(name) {
// ── Boot ──────────────────────────────────────────────────────────────────────
-if (typeof document !== 'undefined') document.addEventListener('DOMContentLoaded', () => {
- document.getElementById('btn-start-next').addEventListener('click', function() {
- handleStartNextTask(this);
- });
-
- switchTab('queue');
- startPolling();
- connectWebSocket();
+if (typeof document !== 'undefined') {
+ document.addEventListener('DOMContentLoaded', () => {
+ document.getElementById('btn-start-next').addEventListener('click', function() {
+ handleStartNextTask(this);
+ });
- // Side panel close
- document.getElementById('btn-close-panel').addEventListener('click', closeTaskPanel);
- document.getElementById('task-panel-backdrop').addEventListener('click', closeTaskPanel);
+ switchTab(getActiveMainTab());
+ startPolling();
+ connectWebSocket();
- // Execution logs modal close
- document.getElementById('btn-close-logs').addEventListener('click', () => {
- document.getElementById('logs-modal').close();
- });
+ // Side panel close
+ document.getElementById('btn-close-panel').addEventListener('click', closeTaskPanel);
+ document.getElementById('task-panel-backdrop').addEventListener('click', closeTaskPanel);
- // Tab bar
- document.querySelectorAll('.tab').forEach(btn => {
- btn.addEventListener('click', () => switchTab(btn.dataset.tab));
- });
+ // Execution logs modal close
+ document.getElementById('btn-close-logs').addEventListener('click', () => {
+ document.getElementById('logs-modal').close();
+ });
- // Task modal
- document.getElementById('btn-new-task').addEventListener('click', openTaskModal);
- document.getElementById('btn-cancel-task').addEventListener('click', closeTaskModal);
- initProjectSelect();
+ // Tab bar
+ document.querySelectorAll('.tab').forEach(btn => {
+ btn.addEventListener('click', () => switchTab(btn.dataset.tab));
+ });
- // Push notifications button
- const btnNotify = document.getElementById('btn-notifications');
- if (btnNotify) {
- btnNotify.addEventListener('click', () => enableNotifications(btnNotify));
- }
+ // Task modal
+ document.getElementById('btn-new-task').addEventListener('click', openTaskModal);
+ document.getElementById('btn-cancel-task').addEventListener('click', closeTaskModal);
- // Validate button
- document.getElementById('btn-validate').addEventListener('click', async () => {
- const btn = document.getElementById('btn-validate');
- const resultDiv = document.getElementById('validate-result');
- btn.disabled = true;
- btn.textContent = 'Checking…';
- try {
- const payload = buildValidatePayload();
- const result = await validateTask(payload);
- renderValidationResult(result);
- } catch (err) {
- resultDiv.removeAttribute('hidden');
- resultDiv.textContent = 'Validation failed: ' + err.message;
- } finally {
- btn.disabled = false;
- btn.textContent = 'Validate Instructions';
+ // Push notifications button
+ const btnNotify = document.getElementById('btn-notifications');
+ if (btnNotify) {
+ btnNotify.addEventListener('click', () => enableNotifications(btnNotify));
}
- });
- // Draft with AI button
- const btnElaborate = document.getElementById('btn-elaborate');
- btnElaborate.addEventListener('click', async () => {
- const prompt = document.getElementById('elaborate-prompt').value.trim();
- if (!prompt) {
- const form = document.getElementById('task-form');
- // Remove previous error
- const prev = form.querySelector('.form-error');
- if (prev) prev.remove();
- const errEl = document.createElement('p');
- errEl.className = 'form-error';
- errEl.textContent = 'Please enter a description before drafting.';
- form.querySelector('.elaborate-section').appendChild(errEl);
- return;
- }
+ // Validate button
+ document.getElementById('btn-validate').addEventListener('click', async () => {
+ const btn = document.getElementById('btn-validate');
+ const resultDiv = document.getElementById('validate-result');
+ btn.disabled = true;
+ btn.textContent = 'Checking…';
+ try {
+ const payload = buildValidatePayload();
+ const result = await validateTask(payload);
+ renderValidationResult(result);
+ } catch (err) {
+ resultDiv.removeAttribute('hidden');
+ resultDiv.textContent = 'Validation failed: ' + err.message;
+ } finally {
+ btn.disabled = false;
+ btn.textContent = 'Validate Instructions';
+ }
+ });
+
+ // Draft with AI button
+ const btnElaborate = document.getElementById('btn-elaborate');
+ btnElaborate.addEventListener('click', async () => {
+ const prompt = document.getElementById('elaborate-prompt').value.trim();
+ if (!prompt) {
+ const form = document.getElementById('task-form');
+ // Remove previous error
+ const prev = form.querySelector('.form-error');
+ if (prev) prev.remove();
+ const errEl = document.createElement('p');
+ errEl.className = 'form-error';
+ errEl.textContent = 'Please enter a description before drafting.';
+ form.querySelector('.elaborate-section').appendChild(errEl);
+ return;
+ }
- btnElaborate.disabled = true;
- btnElaborate.textContent = 'Drafting…';
+ btnElaborate.disabled = true;
+ btnElaborate.textContent = 'Drafting…';
- // Remove any previous errors or banners
- const form = document.getElementById('task-form');
- form.querySelectorAll('.form-error, .elaborate-banner').forEach(el => el.remove());
+ // Remove any previous errors or banners
+ const form = document.getElementById('task-form');
+ form.querySelectorAll('.form-error, .elaborate-banner').forEach(el => el.remove());
- try {
- const sel = document.getElementById('project-select');
- const workingDir = sel.value === '__new__'
- ? document.getElementById('new-project-input').value.trim()
- : sel.value;
- const result = await elaborateTask(prompt, workingDir);
-
- // Populate form fields
- const f = document.getElementById('task-form');
- if (result.name)
- f.querySelector('[name="name"]').value = result.name;
- if (result.agent && result.agent.instructions)
- f.querySelector('[name="instructions"]').value = result.agent.instructions;
- if (result.agent && (result.agent.project_dir || result.agent.working_dir)) {
- const pDir = result.agent.project_dir || result.agent.working_dir;
- const pSel = document.getElementById('project-select');
- const exists = [...pSel.options].some(o => o.value === pDir);
- if (exists) {
- pSel.value = pDir;
- } else {
- pSel.value = '__new__';
- document.getElementById('new-project-row').hidden = false;
- document.getElementById('new-project-input').value = pDir;
+ try {
+ const repoUrl = document.getElementById('repository-url').value.trim();
+ const result = await elaborateTask(prompt, repoUrl);
+
+ // Populate form fields
+ const f = document.getElementById('task-form');
+ if (result.name)
+ f.querySelector('[name="name"]').value = result.name;
+ if (result.agent && result.agent.instructions)
+ f.querySelector('[name="instructions"]').value = result.agent.instructions;
+ if (result.repository_url || result.agent?.repository_url) {
+ document.getElementById('repository-url').value = result.repository_url || result.agent.repository_url;
}
- }
- if (result.agent && result.agent.max_budget_usd != null)
- f.querySelector('[name="max_budget_usd"]').value = result.agent.max_budget_usd;
- if (result.timeout)
- f.querySelector('[name="timeout"]').value = result.timeout;
- if (result.priority) {
- const sel = f.querySelector('[name="priority"]');
- if ([...sel.options].some(o => o.value === result.priority)) {
- sel.value = result.priority;
+ if (result.agent && result.agent.container_image) {
+ document.getElementById('container-image').value = result.agent.container_image;
+ }
+ if (result.agent && result.agent.max_budget_usd != null)
+ f.querySelector('[name="max_budget_usd"]').value = result.agent.max_budget_usd;
+ if (result.timeout)
+ f.querySelector('[name="timeout"]').value = result.timeout;
+ if (result.priority) {
+ const sel = f.querySelector('[name="priority"]');
+ if ([...sel.options].some(o => o.value === result.priority)) {
+ sel.value = result.priority;
+ }
}
- }
-
- // Show success banner
- const banner = document.createElement('p');
- banner.className = 'elaborate-banner';
- banner.textContent = 'AI draft ready — review and submit.';
- document.getElementById('task-form').querySelector('.elaborate-section').appendChild(banner);
- // Auto-validate after elaboration
- try {
- const result = await validateTask(buildValidatePayload());
- renderValidationResult(result);
- } catch (_) {
- // silent - elaboration already succeeded, validation is bonus
+ // Show success banner
+ const banner = document.createElement('p');
+ banner.className = 'elaborate-banner';
+ banner.textContent = 'AI draft ready — review and submit.';
+ document.getElementById('task-form').querySelector('.elaborate-section').appendChild(banner);
+
+ // Auto-validate after elaboration
+ try {
+ const result = await validateTask(buildValidatePayload());
+ renderValidationResult(result);
+ } catch (_) {
+ // silent - elaboration already succeeded, validation is bonus
+ }
+ } catch (err) {
+ const errEl = document.createElement('p');
+ errEl.className = 'form-error';
+ errEl.textContent = `Elaboration failed: ${err.message}`;
+ document.getElementById('task-form').querySelector('.elaborate-section').appendChild(errEl);
+ } finally {
+ btnElaborate.disabled = false;
+ btnElaborate.textContent = 'Draft with AI ✦';
}
- } catch (err) {
- const errEl = document.createElement('p');
- errEl.className = 'form-error';
- errEl.textContent = `Elaboration failed: ${err.message}`;
- document.getElementById('task-form').querySelector('.elaborate-section').appendChild(errEl);
- } finally {
- btnElaborate.disabled = false;
- btnElaborate.textContent = 'Draft with AI ✦';
- }
- });
+ });
- document.getElementById('task-form').addEventListener('submit', async e => {
- e.preventDefault();
+ document.getElementById('task-form').addEventListener('submit', async e => {
+ e.preventDefault();
- // Remove any previous error
- const prev = e.target.querySelector('.form-error');
- if (prev) prev.remove();
+ // Remove any previous error
+ const prev = e.target.querySelector('.form-error');
+ if (prev) prev.remove();
- const btn = e.submitter;
- btn.disabled = true;
- btn.textContent = 'Creating…';
+ const btn = e.submitter;
+ btn.disabled = true;
+ btn.textContent = 'Creating…';
- try {
- const validateResult = document.getElementById('validate-result');
- if (!validateResult.hasAttribute('hidden') && validateResult.dataset.clarity && validateResult.dataset.clarity !== 'clear') {
- if (!window.confirm('The validator flagged issues. Create task anyway?')) {
- return;
+ try {
+ const validateResult = document.getElementById('validate-result');
+ if (!validateResult.hasAttribute('hidden') && validateResult.dataset.clarity && validateResult.dataset.clarity !== 'clear') {
+ if (!window.confirm('The validator flagged issues. Create task anyway?')) {
+ return;
+ }
}
+ await createTask(new FormData(e.target));
+ } catch (err) {
+ const errEl = document.createElement('p');
+ errEl.className = 'form-error';
+ errEl.textContent = err.message;
+ e.target.appendChild(errEl);
+ } finally {
+ btn.disabled = false;
+ btn.textContent = 'Create & Queue';
}
- await createTask(new FormData(e.target));
- } catch (err) {
- const errEl = document.createElement('p');
- errEl.className = 'form-error';
- errEl.textContent = err.message;
- e.target.appendChild(errEl);
- } finally {
- btn.disabled = false;
- btn.textContent = 'Create & Queue';
- }
+ });
});
-});
+}
diff --git a/web/index.html b/web/index.html
index c17601b..7d52458 100644
--- a/web/index.html
+++ b/web/index.html
@@ -74,15 +74,12 @@
<p class="elaborate-hint">AI will fill in the form fields below. You can edit before submitting.</p>
</div>
<hr class="form-divider">
- <label>Project
- <select name="project_dir" id="project-select">
- <option value="/workspace/claudomator" selected>/workspace/claudomator</option>
- <option value="__new__">Create new project…</option>
- </select>
+ <label>Repository URL
+ <input name="repository_url" id="repository-url" placeholder="https://github.com/user/repo.git" required>
+ </label>
+ <label>Container Image
+ <input name="container_image" id="container-image" placeholder="claudomator-agent:latest" value="claudomator-agent:latest">
</label>
- <div id="new-project-row" hidden>
- <label>New Project Path <input id="new-project-input" placeholder="/workspace/my-new-app"></label>
- </div>
<label>Name <input name="name" required></label>
<label>Instructions <textarea name="instructions" rows="6" required></textarea></label>
<div class="validate-section">
diff --git a/web/test/tab-persistence.test.mjs b/web/test/tab-persistence.test.mjs
new file mode 100644
index 0000000..9311453
--- /dev/null
+++ b/web/test/tab-persistence.test.mjs
@@ -0,0 +1,58 @@
+// tab-persistence.test.mjs — TDD tests for main-tab localStorage persistence
+//
+// Run with: node --test web/test/tab-persistence.test.mjs
+
+import { describe, it, beforeEach } from 'node:test';
+import assert from 'node:assert/strict';
+
+// ── localStorage mock ──────────────────────────────────────────────────────────
+// Must be set up before importing app.js so the module sees the global.
+const store = new Map();
+globalThis.localStorage = {
+ getItem: (k) => store.has(k) ? store.get(k) : null,
+ setItem: (k, v) => store.set(k, String(v)),
+ removeItem: (k) => store.delete(k),
+ clear: () => store.clear(),
+};
+
+import { getActiveMainTab, setActiveMainTab } from '../app.js';
+
+describe('getActiveMainTab', () => {
+ beforeEach(() => store.clear());
+
+ it('returns "queue" when localStorage has no stored value', () => {
+ assert.equal(getActiveMainTab(), 'queue');
+ });
+
+ it('returns the tab name stored by setActiveMainTab', () => {
+ setActiveMainTab('settings');
+ assert.equal(getActiveMainTab(), 'settings');
+ });
+
+ it('returns "queue" after localStorage value is removed', () => {
+ setActiveMainTab('stats');
+ localStorage.removeItem('activeMainTab');
+ assert.equal(getActiveMainTab(), 'queue');
+ });
+
+ it('reflects the most recent setActiveMainTab call', () => {
+ setActiveMainTab('stats');
+ setActiveMainTab('running');
+ assert.equal(getActiveMainTab(), 'running');
+ });
+});
+
+describe('setActiveMainTab', () => {
+ beforeEach(() => store.clear());
+
+ it('writes the tab name to localStorage under key "activeMainTab"', () => {
+ setActiveMainTab('drops');
+ assert.equal(localStorage.getItem('activeMainTab'), 'drops');
+ });
+
+ it('overwrites a previously stored tab', () => {
+ setActiveMainTab('queue');
+ setActiveMainTab('interrupted');
+ assert.equal(localStorage.getItem('activeMainTab'), 'interrupted');
+ });
+});
diff --git a/web/test/task-panel-summary.test.mjs b/web/test/task-panel-summary.test.mjs
new file mode 100644
index 0000000..63dd483
--- /dev/null
+++ b/web/test/task-panel-summary.test.mjs
@@ -0,0 +1,143 @@
+// task-panel-summary.test.mjs — verifies task summary renders exactly once in panel.
+//
+// Run with: node --test web/test/task-panel-summary.test.mjs
+
+import { describe, it, beforeEach } from 'node:test';
+import assert from 'node:assert/strict';
+
+// ── Minimal DOM mock ──────────────────────────────────────────────────────────
+
+function makeMockDOM() {
+ const elements = {};
+
+ function createElement(tag) {
+ const el = {
+ tag,
+ className: '',
+ textContent: '',
+ innerHTML: '',
+ hidden: false,
+ children: [],
+ _listeners: {},
+ appendChild(child) { this.children.push(child); return child; },
+ prepend(...nodes) { this.children.unshift(...nodes); },
+ append(...nodes) { nodes.forEach(n => this.children.push(n)); },
+ querySelector(sel) {
+ const cls = sel.startsWith('.') ? sel.slice(1) : null;
+ function search(el) {
+ if (cls && el.className && el.className.split(' ').includes(cls)) return el;
+ for (const c of el.children || []) {
+ const found = search(c);
+ if (found) return found;
+ }
+ return null;
+ }
+ return search(this);
+ },
+ querySelectorAll(sel) {
+ const cls = sel.startsWith('.') ? sel.slice(1) : null;
+ const results = [];
+ function search(el) {
+ if (cls && el.className && el.className.split(' ').includes(cls)) results.push(el);
+ for (const c of el.children || []) search(c);
+ }
+ search(this);
+ return results;
+ },
+ addEventListener(ev, fn) {},
+ };
+ return el;
+ }
+
+ // Named panel elements referenced by getElementById
+ const panelTitle = createElement('h2');
+ const panelContent = createElement('div');
+ elements['task-panel-title'] = panelTitle;
+ elements['task-panel-content'] = panelContent;
+
+ const doc = {
+ createElement,
+ getElementById(id) { return elements[id] || null; },
+ };
+
+ return { doc, panelContent };
+}
+
+// ── Import renderTaskPanel ────────────────────────────────────────────────────
+
+import { renderTaskPanel } from '../app.js';
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+describe('renderTaskPanel summary rendering', () => {
+ it('renders task summary exactly once for a COMPLETED task', () => {
+ const { doc, panelContent } = makeMockDOM();
+
+ // Must set global document before calling renderTaskPanel
+ global.document = doc;
+
+ const task = {
+ id: 'task-1',
+ name: 'Fix the bug',
+ state: 'COMPLETED',
+ summary: 'Resolved the nil pointer in the payment handler.',
+ priority: 'normal',
+ created_at: '2026-03-17T10:00:00Z',
+ updated_at: '2026-03-17T10:05:00Z',
+ tags: [],
+ };
+
+ renderTaskPanel(task, []);
+
+ // Count all elements with class 'task-summary' or 'task-summary-text'
+ const summaryEls = panelContent.querySelectorAll('.task-summary');
+ const summaryTextEls = panelContent.querySelectorAll('.task-summary-text');
+
+ const total = summaryEls.length + summaryTextEls.length;
+ assert.equal(total, 1, `Expected exactly 1 summary element, got ${total} (task-summary: ${summaryEls.length}, task-summary-text: ${summaryTextEls.length})`);
+ });
+
+ it('uses task-summary class (not task-summary-text) for good contrast', () => {
+ const { doc, panelContent } = makeMockDOM();
+ global.document = doc;
+
+ const task = {
+ id: 'task-2',
+ name: 'Another task',
+ state: 'COMPLETED',
+ summary: 'All done.',
+ priority: 'high',
+ created_at: '2026-03-17T10:00:00Z',
+ updated_at: '2026-03-17T10:05:00Z',
+ tags: [],
+ };
+
+ renderTaskPanel(task, []);
+
+ const summaryEls = panelContent.querySelectorAll('.task-summary');
+ assert.equal(summaryEls.length, 1, 'Expected .task-summary element');
+ assert.equal(summaryEls[0].textContent, task.summary);
+ });
+
+ it('renders no summary section when task has no summary', () => {
+ const { doc, panelContent } = makeMockDOM();
+ global.document = doc;
+
+ const task = {
+ id: 'task-3',
+ name: 'Pending task',
+ state: 'PENDING',
+ summary: null,
+ priority: 'normal',
+ created_at: '2026-03-17T10:00:00Z',
+ updated_at: '2026-03-17T10:00:00Z',
+ tags: [],
+ };
+
+ renderTaskPanel(task, []);
+
+ const summaryEls = panelContent.querySelectorAll('.task-summary');
+ const summaryTextEls = panelContent.querySelectorAll('.task-summary-text');
+ assert.equal(summaryEls.length + summaryTextEls.length, 0, 'Expected no summary when task.summary is null');
+ });
+});