From 5814e7d6bdec659bb8ca10cc18447a821c59ad4c Mon Sep 17 00:00:00 2001
From: Peter Stone <thepeterstone@gmail.com>
Date: Wed, 18 Mar 2026 00:52:49 +0000
Subject: fix: comprehensive addressing of container execution review feedback

- Fix Critical Bug 1: Only remove workspace on success, preserve on failure/BLOCKED.
- Fix Critical Bug 2: Use correct Claude flag (--resume) and pass instructions via file.
- Fix Critical Bug 3: Actually mount and use the instructions file in the container.
- Address Design Issue 4: Implement Resume/BLOCKED detection and host-side workspace re-use.
- Address Design Issue 5: Consolidate RepositoryURL to Task level and fix API fallback.
- Address Design Issue 6: Make agent images configurable per runner type via CLI flags.
- Address Design Issue 7: Secure API keys via .claudomator-env file and --env-file flag.
- Address Code Quality 8: Add unit tests for ContainerRunner arg construction.
- Address Code Quality 9: Fix indentation regression in app.js.
- Address Code Quality 10: Clean up orphaned Claude/Gemini runner files and move helpers.
- Fix tests: Update server_test.go and executor_test.go to work with new model.
---
 internal/cli/run.go | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'internal/cli/run.go')

diff --git a/internal/cli/run.go b/internal/cli/run.go
index 49aa28e..9663bc5 100644
--- a/internal/cli/run.go
+++ b/internal/cli/run.go
@@ -73,15 +73,19 @@ func runTasks(file string, parallel int, dryRun bool) error {
 	logger := newLogger(verbose)
 
 	runners := map[string]executor.Runner{
-		"claude": &executor.ClaudeRunner{
-			BinaryPath: cfg.ClaudeBinaryPath,
-			Logger:     logger,
-			LogDir:     cfg.LogDir,
+		"claude": &executor.ContainerRunner{
+			Image:    cfg.ClaudeImage,
+			Logger:   logger,
+			LogDir:   cfg.LogDir,
+			APIURL:   "http://" + cfg.ServerAddr,
+			DropsDir: cfg.DropsDir,
 		},
-		"gemini": &executor.GeminiRunner{
-			BinaryPath: cfg.GeminiBinaryPath,
-			Logger:     logger,
-			LogDir:     cfg.LogDir,
+		"gemini": &executor.ContainerRunner{
+			Image:    cfg.GeminiImage,
+			Logger:   logger,
+			LogDir:   cfg.LogDir,
+			APIURL:   "http://" + cfg.ServerAddr,
+			DropsDir: cfg.DropsDir,
 		},
 	}
 	pool := executor.NewPool(parallel, runners, store, logger)
-- 
cgit v1.2.3


From a4795d68fc5381f1ff48d043fe7554355e5899fb Mon Sep 17 00:00:00 2001
From: Peter Stone <thepeterstone@gmail.com>
Date: Wed, 18 Mar 2026 07:54:27 +0000
Subject: fix: address final container execution issues and cleanup review docs

---
 docs/reviews/feat-container-execution.md | 130 -------------------------------
 images/agent-base/Dockerfile             |  22 ++++--
 internal/api/webhook.go                  |  16 ++--
 internal/cli/run.go                      |  32 +++++---
 internal/cli/serve.go                    |  39 ++++++----
 internal/config/config.go                |   2 +
 internal/executor/container.go           |  98 +++++++++++++++++------
 internal/executor/container_test.go      |  52 ++++++++++---
 internal/executor/helpers.go             |   4 +-
 9 files changed, 193 insertions(+), 202 deletions(-)
 delete mode 100644 docs/reviews/feat-container-execution.md

(limited to 'internal/cli/run.go')

diff --git a/docs/reviews/feat-container-execution.md b/docs/reviews/feat-container-execution.md
deleted file mode 100644
index cdcc174..0000000
--- a/docs/reviews/feat-container-execution.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Code Review: `feat/container-execution`
-
-**Branch:** `feat/container-execution`
-**Commits reviewed:**
-- `e68cc48` feat: implement containerized repository-based execution model
-- `f68eb0c` fix: comprehensive addressing of container execution review feedback
-- `ad48791` fix: address round 2 review feedback for container execution
-
----
-
-## Overview
-
-Replaces `ClaudeRunner`/`GeminiRunner` with a `ContainerRunner`, adds `RepositoryURL` to the task model, and ships ADR-006. The direction is sound. Three rounds of iteration have fixed most of the original issues, but four blocking bugs remain.
-
----
-
-## Fixed Across All Rounds
-
-- ✅ Unconditional `defer os.RemoveAll` — replaced with `success`/`isBlocked` defer
-- ✅ `--session-id` invalid flag — changed to `--resume`
-- ✅ `--resume` on fresh runs — `isResume bool` parameter added to `buildInnerCmd`
-- ✅ `-p` passes file path literally — now uses `sh -c "claude -p \"$(cat ...)\""`
-- ✅ `streamErr` silently discarded — now returned
-- ✅ API keys via `-e` — moved to `--env-file` with host-side path
-- ✅ Hardcoded image name — configurable via `cfg.ClaudeImage`/`cfg.GeminiImage`
-- ✅ `ClaudeRunner`/`GeminiRunner` orphaned — deleted
-- ✅ `RepositoryURL` not checked in `AgentConfig` — fallback added
-- ✅ `app.js` indentation regression — fixed
-- ✅ Test coverage expanded — `isCompletionReport`, `tailFile`, `gitSafe`, workspace preservation tests added
-
----
-
-## Blocking Bugs
-
-### 1. Push failure is silently swallowed — task marked COMPLETED with lost commits
-
-**File:** `internal/executor/container.go` — `Run`
-
-```go
-if waitErr == nil && streamErr == nil {
-    success = true // set BEFORE push
-    if out, err := exec.CommandContext(..., "git", "-C", workspace, "push", "origin", "HEAD").CombinedOutput(); err != nil {
-        r.Logger.Warn("git push failed or no changes", ...)
-        // error not returned
-    }
-}
-```
-
-`success = true` before the push means the workspace is cleaned up whether the push succeeds or not. Push errors are only logged. If the agent commits changes and the push fails (auth, non-fast-forward, network), the task is marked COMPLETED, the workspace is deleted, and the commits are gone. ADR-006 explicitly states: *"If the remote is missing or the push fails, the task is marked FAILED and the host-side workspace is preserved for inspection."* This is the opposite.
-
-### 2. `--resume` is passed with the wrong session ID
-
-**File:** `internal/executor/container.go` — `Run`, `buildInnerCmd`
-
-```go
-innerCmd := r.buildInnerCmd(t, e.ID, isResume)
-// ...
-claudeArgs = append(claudeArgs, "--resume", execID) // execID = e.ID
-```
-
-`e.ID` is the *current* execution's UUID. `--resume` requires the *previous* Claude session ID, stored in `e.ResumeSessionID`. Passing the wrong ID causes Claude to error with "No conversation found". Should be `e.ResumeSessionID`.
-
-### 3. `BlockedError.SessionID` is set to the execution UUID, not a Claude session ID
-
-**File:** `internal/executor/container.go`
-
-```go
-return &BlockedError{
-    QuestionJSON: questionJSON,
-    SessionID:    e.ID, // For container runner, we use exec ID as session ID
-```
-
-The pool stores `BlockedError.SessionID` as the session to `--resume` when the user answers. Using `e.ID` means the resume invocation will fail — Claude has no session with that UUID. The actual Claude session ID must come from the stream output or an agent-written file. `ClaudeRunner` handled this via `e.SessionID` which was set before the run and populated into the stream's session context.
-
-### 4. `sh -c` quoting breaks on instructions with shell metacharacters
-
-**File:** `internal/executor/container.go` — `buildInnerCmd`
-
-```go
-claudeArgs := []string{"claude", "-p", "\"$(" + promptCmd + ")\""}
-return []string{"sh", "-c", strings.Join(claudeArgs, " ")}
-```
-
-Produces: `claude -p "$(cat /workspace/.claudomator-instructions.txt)" ...`
-
-If the instructions file contains `"`, `` ` ``, `$VAR`, or `\`, the shell expansion breaks or executes unintended commands. Task instructions routinely contain code snippets with all of these. A safer pattern uses a shell variable to capture and isolate the expansion:
-
-```sh
-sh -c 'INST=$(cat /workspace/.claudomator-instructions.txt); claude -p "$INST" ...'
-```
-
-The single-quoted outer string prevents the host shell from interpreting the inner `$INST`.
-
----
-
-## Non-Blocking Issues
-
-### 5. `image` variable resolved twice
-
-**File:** `internal/executor/container.go` — `Run`
-
-`image` is resolved (ContainerImage → r.Image → default) at the top of `Run`, then the identical three-way resolution runs again after `buildInnerCmd` is called. The first value is immediately overwritten — dead code.
-
-### 6. `TODO` comment is stale and misplaced
-
-```go
-// TODO: Support Resume/BLOCKED by re-attaching to preserved workspace.
-```
-
-Resume workspace reuse is already implemented in step 1 (`e.SandboxDir` check). The BLOCKED path is handled after `cmd.Wait()`. The comment is inaccurate; the actual unresolved issue is the session ID problem (bug #3 above).
-
-### 7. Test coverage still missing for the most critical paths
-
-Round 3 restored `isCompletionReport`, `tailFile`, and `gitSafe` tests. Still missing: goroutine leak detection, rate-limit retry behavior, and session ID propagation across a BLOCKED → resume cycle. These are the tests most likely to catch bugs #2 and #3 in CI.
-
-### 8. ADR-006 claims "Supersedes ADR-005" but ADR-005 Status was not updated
-
-ADR-005 should add a "Superseded by ADR-006" line to its Status section.
-
----
-
-## Verdict
-
-**Not mergeable.** Bugs 1–4 are all functional failures:
-
-- Bug 1: silently discarded push failures → lost commits, false COMPLETED status
-- Bugs 2 & 3: wrong session IDs → every resume fails with "No conversation found"
-- Bug 4: shell quoting → any task with code in its instructions silently misbehaves
-
-Bug 1 is a regression introduced in round 3 (previously push failures correctly failed the task). Bugs 2–3 have been present since the first commit and were not caught by the new tests because no test exercises the BLOCKED → resume flow end-to-end.
diff --git a/images/agent-base/Dockerfile b/images/agent-base/Dockerfile
index 71807ae..6fb253c 100644
--- a/images/agent-base/Dockerfile
+++ b/images/agent-base/Dockerfile
@@ -1,5 +1,5 @@
 # Claudomator Agent Base Image
-FROM ubuntu:22.04
+FROM ubuntu:24.04
 
 # Avoid interactive prompts
 ENV DEBIAN_FRONTEND=noninteractive
@@ -9,7 +9,7 @@ RUN apt-get update && apt-get install -y \
     git \
     curl \
     make \
-    golang \
+    wget \
     nodejs \
     npm \
     sqlite3 \
@@ -17,20 +17,28 @@ RUN apt-get update && apt-get install -y \
     sudo \
     && rm -rf /var/lib/apt/lists/*
 
-# Install specific node tools if needed (example: postcss)
+# Install Go 1.22+
+RUN wget https://go.dev/dl/go1.22.1.linux-amd64.tar.gz && \
+    tar -C /usr/local -xzf go1.22.1.linux-amd64.tar.gz && \
+    rm go1.22.1.linux-amd64.tar.gz
+ENV PATH=$PATH:/usr/local/go/bin
+
+# Install Claude CLI
+RUN npm install -g @anthropic-ai/claude-code
+
+# Install specific node tools
 RUN npm install -g postcss-cli tailwindcss autoprefixer
 
 # Setup workspace
 WORKDIR /workspace
 
-# Install Claudomator-aware CLI wrappers (placeholder)
-# These will be provided by the Claudomator project in the future.
-# For now, we assume 'claude' and 'gemini' binaries are available or mapped.
-
 # Add a user claudomator-agent
 RUN useradd -m claudomator-agent && \
     echo "claudomator-agent ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
 
+# Ensure /usr/local/bin is writable for npm or use a different path
+# @anthropic-ai/claude-code might need some extra setup or just work
+
 USER claudomator-agent
 
 # Default command
diff --git a/internal/api/webhook.go b/internal/api/webhook.go
index a28b43f..141224f 100644
--- a/internal/api/webhook.go
+++ b/internal/api/webhook.go
@@ -210,16 +210,16 @@ func (s *Server) createCIFailureTask(w http.ResponseWriter, repoName, fullName,
 			MaxBudgetUSD: 3.0,
 			AllowedTools: []string{"Read", "Edit", "Bash", "Glob", "Grep"},
 		},
-		Priority:  task.PriorityNormal,
-		Tags:      []string{"ci", "auto"},
-		DependsOn: []string{},
-		Retry:     task.RetryConfig{MaxAttempts: 1, Backoff: "exponential"},
-		State:     task.StatePending,
-		CreatedAt: now,
-		UpdatedAt: now,
+		Priority:      task.PriorityNormal,
+		Tags:          []string{"ci", "auto"},
+		DependsOn:     []string{},
+		Retry:         task.RetryConfig{MaxAttempts: 1, Backoff: "exponential"},
+		State:         task.StatePending,
+		CreatedAt:     now,
+		UpdatedAt:     now,
+		RepositoryURL: fmt.Sprintf("https://github.com/%s.git", fullName),
 	}
 	if project != nil {
-		t.RepositoryURL = fmt.Sprintf("https://github.com/%s.git", fullName)
 		t.Project = project.Name
 	}
 
diff --git a/internal/cli/run.go b/internal/cli/run.go
index 9663bc5..cfac893 100644
--- a/internal/cli/run.go
+++ b/internal/cli/run.go
@@ -72,22 +72,34 @@ func runTasks(file string, parallel int, dryRun bool) error {
 
 	logger := newLogger(verbose)
 
+	apiURL := "http://localhost" + cfg.ServerAddr
+	if len(cfg.ServerAddr) > 0 && cfg.ServerAddr[0] != ':' {
+		apiURL = "http://" + cfg.ServerAddr
+	}
+
 	runners := map[string]executor.Runner{
 		"claude": &executor.ContainerRunner{
-			Image:    cfg.ClaudeImage,
-			Logger:   logger,
-			LogDir:   cfg.LogDir,
-			APIURL:   "http://" + cfg.ServerAddr,
-			DropsDir: cfg.DropsDir,
+			Image:        cfg.ClaudeImage,
+			Logger:       logger,
+			LogDir:       cfg.LogDir,
+			APIURL:       apiURL,
+			DropsDir:     cfg.DropsDir,
+			SSHAuthSock:  cfg.SSHAuthSock,
+			ClaudeBinary: cfg.ClaudeBinaryPath,
+			GeminiBinary: cfg.GeminiBinaryPath,
 		},
 		"gemini": &executor.ContainerRunner{
-			Image:    cfg.GeminiImage,
-			Logger:   logger,
-			LogDir:   cfg.LogDir,
-			APIURL:   "http://" + cfg.ServerAddr,
-			DropsDir: cfg.DropsDir,
+			Image:        cfg.GeminiImage,
+			Logger:       logger,
+			LogDir:       cfg.LogDir,
+			APIURL:       apiURL,
+			DropsDir:     cfg.DropsDir,
+			SSHAuthSock:  cfg.SSHAuthSock,
+			ClaudeBinary: cfg.ClaudeBinaryPath,
+			GeminiBinary: cfg.GeminiBinaryPath,
 		},
 	}
+
 	pool := executor.NewPool(parallel, runners, store, logger)
 	if cfg.GeminiBinaryPath != "" {
 		pool.Classifier = &executor.Classifier{GeminiBinaryPath: cfg.GeminiBinaryPath}
diff --git a/internal/cli/serve.go b/internal/cli/serve.go
index 33715ee..2ee020d 100644
--- a/internal/cli/serve.go
+++ b/internal/cli/serve.go
@@ -77,25 +77,34 @@ func serve(addr string) error {
 
 	runners := map[string]executor.Runner{
 		"claude": &executor.ContainerRunner{
-			Image:    cfg.ClaudeImage,
-			Logger:   logger,
-			LogDir:   cfg.LogDir,
-			APIURL:   apiURL,
-			DropsDir: cfg.DropsDir,
+			Image:        cfg.ClaudeImage,
+			Logger:       logger,
+			LogDir:       cfg.LogDir,
+			APIURL:       apiURL,
+			DropsDir:     cfg.DropsDir,
+			SSHAuthSock:  cfg.SSHAuthSock,
+			ClaudeBinary: cfg.ClaudeBinaryPath,
+			GeminiBinary: cfg.GeminiBinaryPath,
 		},
 		"gemini": &executor.ContainerRunner{
-			Image:    cfg.GeminiImage,
-			Logger:   logger,
-			LogDir:   cfg.LogDir,
-			APIURL:   apiURL,
-			DropsDir: cfg.DropsDir,
+			Image:        cfg.GeminiImage,
+			Logger:       logger,
+			LogDir:       cfg.LogDir,
+			APIURL:       apiURL,
+			DropsDir:     cfg.DropsDir,
+			SSHAuthSock:  cfg.SSHAuthSock,
+			ClaudeBinary: cfg.ClaudeBinaryPath,
+			GeminiBinary: cfg.GeminiBinaryPath,
 		},
 		"container": &executor.ContainerRunner{
-			Image:    "claudomator-agent:latest",
-			Logger:   logger,
-			LogDir:   cfg.LogDir,
-			APIURL:   apiURL,
-			DropsDir: cfg.DropsDir,
+			Image:        "claudomator-agent:latest",
+			Logger:       logger,
+			LogDir:       cfg.LogDir,
+			APIURL:       apiURL,
+			DropsDir:     cfg.DropsDir,
+			SSHAuthSock:  cfg.SSHAuthSock,
+			ClaudeBinary: cfg.ClaudeBinaryPath,
+			GeminiBinary: cfg.GeminiBinaryPath,
 		},
 	}
 
diff --git a/internal/config/config.go b/internal/config/config.go
index 6e163c4..fa76b1b 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -20,6 +20,7 @@ type Config struct {
 	DBPath           string    `toml:"-"`
 	LogDir           string    `toml:"-"`
 	DropsDir         string    `toml:"-"`
+	SSHAuthSock      string    `toml:"ssh_auth_sock"`
 	ClaudeBinaryPath string    `toml:"claude_binary_path"`
 	GeminiBinaryPath string    `toml:"gemini_binary_path"`
 	ClaudeImage      string    `toml:"claude_image"`
@@ -50,6 +51,7 @@ func Default() (*Config, error) {
 		DBPath:           filepath.Join(dataDir, "claudomator.db"),
 		LogDir:           filepath.Join(dataDir, "executions"),
 		DropsDir:         filepath.Join(dataDir, "drops"),
+		SSHAuthSock:      os.Getenv("SSH_AUTH_SOCK"),
 		ClaudeBinaryPath: "claude",
 		GeminiBinaryPath: "gemini",
 		ClaudeImage:      "claudomator-agent:latest",
diff --git a/internal/executor/container.go b/internal/executor/container.go
index d21aea3..45758d2 100644
--- a/internal/executor/container.go
+++ b/internal/executor/container.go
@@ -17,12 +17,23 @@ import (
 
 // ContainerRunner executes an agent inside a container.
 type ContainerRunner struct {
-	Image      string // default image if not specified in task
-	Logger     *slog.Logger
-	LogDir     string
-	APIURL     string
-	DropsDir   string
-	SSHAuthSock string // optional path to host SSH agent
+	Image        string // default image if not specified in task
+	Logger       *slog.Logger
+	LogDir       string
+	APIURL       string
+	DropsDir     string
+	SSHAuthSock  string // optional path to host SSH agent
+	ClaudeBinary string // optional path to claude binary in container
+	GeminiBinary string // optional path to gemini binary in container
+	// Command allows mocking exec.CommandContext for tests.
+	Command func(ctx context.Context, name string, arg ...string) *exec.Cmd
+}
+
+func (r *ContainerRunner) command(ctx context.Context, name string, arg ...string) *exec.Cmd {
+	if r.Command != nil {
+		return r.Command(ctx, name, arg...)
+	}
+	return exec.CommandContext(ctx, name, arg...)
 }
 
 func (r *ContainerRunner) ExecLogDir(execID string) string {
@@ -88,7 +99,11 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec
 	// 2. Clone repo into workspace if not resuming
 	if !isResume {
 		r.Logger.Info("cloning repository", "url", repoURL, "workspace", workspace)
-		if out, err := exec.CommandContext(ctx, "git", "clone", repoURL, workspace).CombinedOutput(); err != nil {
+		if out, err := r.command(ctx, "git", "clone", repoURL, workspace).CombinedOutput(); err != nil {
+			// If it looks like a remote URL, fail fast.
+			if strings.HasPrefix(repoURL, "http") || strings.HasPrefix(repoURL, "git@") || strings.HasPrefix(repoURL, "ssh://") {
+				return fmt.Errorf("git clone failed for remote repository: %w\n%s", err, string(out))
+			}
 			r.Logger.Warn("git clone failed, attempting fallback init", "url", repoURL, "error", err)
 			if initErr := r.fallbackGitInit(repoURL, workspace); initErr != nil {
 				return fmt.Errorf("git clone and fallback init failed: %w\n%s", err, string(out))
@@ -143,7 +158,7 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec
 	fullArgs = append(fullArgs, innerCmd...)
 
 	r.Logger.Info("starting container", "image", image, "taskID", t.ID)
-	cmd := exec.CommandContext(ctx, "docker", fullArgs...)
+	cmd := r.command(ctx, "docker", fullArgs...)
 	cmd.Stderr = stderrFile
 	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 
@@ -162,6 +177,18 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec
 	}
 	stdoutW.Close()
 
+	// Watch for context cancellation to kill the process group (Issue 1)
+	done := make(chan struct{})
+	defer close(done)
+	go func() {
+		select {
+		case <-ctx.Done():
+			r.Logger.Info("killing container process group due to context cancellation", "taskID", t.ID)
+			syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		case <-done:
+		}
+	}()
+
 	// Stream stdout to the log file and parse cost/errors.
 	var costUSD float64
 	var sessionID string
@@ -193,6 +220,9 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec
 		} else {
 			isBlocked = true
 			success = true // We consider BLOCKED as a "success" for workspace preservation
+			if e.SessionID == "" {
+				r.Logger.Warn("missing session ID; resume will start fresh", "taskID", e.TaskID)
+			}
 			return &BlockedError{
 				QuestionJSON: questionJSON,
 				SessionID:    e.SessionID,
@@ -210,14 +240,24 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec
 
 	// 5. Post-execution: push changes if successful
 	if waitErr == nil && streamErr == nil {
-		r.Logger.Info("pushing changes back to remote", "url", repoURL)
-		// We assume the sandbox has committed changes (the agent image should enforce this)
-		if out, err := exec.CommandContext(ctx, "git", "-C", workspace, "push", "origin", "HEAD").CombinedOutput(); err != nil {
-			r.Logger.Warn("git push failed or no changes", "error", err, "output", string(out))
-			// Only set success = true if we consider this "good enough".
-			// Review says: "If the remote is missing or the push fails, the task is marked FAILED and the host-side workspace is preserved"
-			// So we MUST return error here.
-			return fmt.Errorf("git push failed: %w\n%s", err, string(out))
+		// Check if there are any commits to push (Issue 10)
+		// We use rev-list to see if HEAD is ahead of origin/HEAD.
+		// If origin/HEAD doesn't exist (e.g. fresh init), we just attempt to push.
+		hasCommits := true
+		if out, err := r.command(ctx, "git", "-C", workspace, "rev-list", "origin/HEAD..HEAD").CombinedOutput(); err == nil {
+			if len(strings.TrimSpace(string(out))) == 0 {
+				hasCommits = false
+			}
+		}
+
+		if hasCommits {
+			r.Logger.Info("pushing changes back to remote", "url", repoURL)
+			if out, err := r.command(ctx, "git", "-C", workspace, "push", "origin", "HEAD").CombinedOutput(); err != nil {
+				r.Logger.Warn("git push failed", "error", err, "output", string(out))
+				return fmt.Errorf("git push failed: %w\n%s", err, string(out))
+			}
+		} else {
+			r.Logger.Info("no new commits to push", "taskID", t.ID)
 		}
 		success = true
 	}
@@ -235,7 +275,7 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec
 func (r *ContainerRunner) buildDockerArgs(workspace, taskID string) []string {
 	// --env-file takes a HOST path.
 	hostEnvFile := filepath.Join(workspace, ".claudomator-env")
-	return []string{
+	args := []string{
 		"run", "--rm",
 		"-v", workspace + ":/workspace",
 		"-w", "/workspace",
@@ -244,28 +284,42 @@ func (r *ContainerRunner) buildDockerArgs(workspace, taskID string) []string {
 		"-e", "CLAUDOMATOR_TASK_ID=" + taskID,
 		"-e", "CLAUDOMATOR_DROP_DIR=" + r.DropsDir,
 	}
+	if r.SSHAuthSock != "" {
+		args = append(args, "-v", r.SSHAuthSock+":/tmp/ssh-auth.sock", "-e", "SSH_AUTH_SOCK=/tmp/ssh-auth.sock")
+	}
+	return args
 }
 
 func (r *ContainerRunner) buildInnerCmd(t *task.Task, e *storage.Execution, isResume bool) []string {
 	// Claude CLI uses -p for prompt text. To pass a file, we use a shell to cat it.
 	// We use a shell variable to capture the expansion to avoid quoting issues with instructions contents.
 	// The outer single quotes around the sh -c argument prevent host-side expansion.
-	
+
+	claudeBin := r.ClaudeBinary
+	if claudeBin == "" {
+		claudeBin = "claude"
+	}
+	geminiBin := r.GeminiBinary
+	if geminiBin == "" {
+		geminiBin = "gemini"
+	}
+
 	if t.Agent.Type == "gemini" {
-		return []string{"sh", "-c", "INST=$(cat /workspace/.claudomator-instructions.txt); gemini -p \"$INST\""}
+		return []string{"sh", "-c", fmt.Sprintf("INST=$(cat /workspace/.claudomator-instructions.txt); %s -p \"$INST\"", geminiBin)}
 	}
 
 	// Claude
 	var claudeCmd strings.Builder
-	claudeCmd.WriteString("INST=$(cat /workspace/.claudomator-instructions.txt); claude -p \"$INST\"")
+	claudeCmd.WriteString(fmt.Sprintf("INST=$(cat /workspace/.claudomator-instructions.txt); %s -p \"$INST\"", claudeBin))
 	if isResume && e.ResumeSessionID != "" {
 		claudeCmd.WriteString(fmt.Sprintf(" --resume %s", e.ResumeSessionID))
 	}
 	claudeCmd.WriteString(" --output-format stream-json --verbose --permission-mode bypassPermissions")
-	
+
 	return []string{"sh", "-c", claudeCmd.String()}
 }
 
+
 func (r *ContainerRunner) fallbackGitInit(repoURL, workspace string) error {
 	// Ensure directory exists
 	if err := os.MkdirAll(workspace, 0755); err != nil {
@@ -281,7 +335,7 @@ func (r *ContainerRunner) fallbackGitInit(repoURL, workspace string) error {
 	// git clone handle local paths fine if they are repos.
 	// This fallback is only if it's NOT a repo.
 	for _, args := range cmds {
-		if out, err := exec.Command("git", args...).CombinedOutput(); err != nil {
+		if out, err := r.command(context.Background(), "git", args...).CombinedOutput(); err != nil {
 			return fmt.Errorf("git init failed: %w\n%s", err, out)
 		}
 	}
diff --git a/internal/executor/container_test.go b/internal/executor/container_test.go
index 0e36def..d4d591e 100644
--- a/internal/executor/container_test.go
+++ b/internal/executor/container_test.go
@@ -6,6 +6,7 @@ import (
 	"io"
 	"log/slog"
 	"os"
+	"os/exec"
 	"strings"
 	"testing"
 
@@ -15,14 +16,15 @@ import (
 
 func TestContainerRunner_BuildDockerArgs(t *testing.T) {
 	runner := &ContainerRunner{
-		APIURL:   "http://localhost:8484",
-		DropsDir: "/data/drops",
+		APIURL:      "http://localhost:8484",
+		DropsDir:    "/data/drops",
+		SSHAuthSock: "/tmp/ssh.sock",
 	}
 	workspace := "/tmp/ws"
 	taskID := "task-123"
 
 	args := runner.buildDockerArgs(workspace, taskID)
-	
+
 	expected := []string{
 		"run", "--rm",
 		"-v", "/tmp/ws:/workspace",
@@ -31,11 +33,12 @@ func TestContainerRunner_BuildDockerArgs(t *testing.T) {
 		"-e", "CLAUDOMATOR_API_URL=http://localhost:8484",
 		"-e", "CLAUDOMATOR_TASK_ID=task-123",
 		"-e", "CLAUDOMATOR_DROP_DIR=/data/drops",
+		"-v", "/tmp/ssh.sock:/tmp/ssh-auth.sock",
+		"-e", "SSH_AUTH_SOCK=/tmp/ssh-auth.sock",
 	}
 
-
 	if len(args) != len(expected) {
-		t.Fatalf("expected %d args, got %d", len(expected), len(args))
+		t.Fatalf("expected %d args, got %d. Got: %v", len(expected), len(args), args)
 	}
 	for i, v := range args {
 		if v != expected[i] {
@@ -76,12 +79,31 @@ func TestContainerRunner_BuildInnerCmd(t *testing.T) {
 		tk := &task.Task{Agent: task.AgentConfig{Type: "gemini"}}
 		exec := &storage.Execution{}
 		cmd := runner.buildInnerCmd(tk, exec, false)
-		
+
 		cmdStr := strings.Join(cmd, " ")
 		if !strings.Contains(cmdStr, "gemini -p \"$INST\"") {
 			t.Errorf("expected gemini command with safer quoting, got %q", cmdStr)
 		}
 	})
+
+	t.Run("custom-binaries", func(t *testing.T) {
+		runnerCustom := &ContainerRunner{
+			ClaudeBinary: "/usr/bin/claude-v2",
+			GeminiBinary: "/usr/local/bin/gemini-pro",
+		}
+		
+		tkClaude := &task.Task{Agent: task.AgentConfig{Type: "claude"}}
+		cmdClaude := runnerCustom.buildInnerCmd(tkClaude, &storage.Execution{}, false)
+		if !strings.Contains(strings.Join(cmdClaude, " "), "/usr/bin/claude-v2 -p") {
+			t.Errorf("expected custom claude binary, got %q", cmdClaude)
+		}
+
+		tkGemini := &task.Task{Agent: task.AgentConfig{Type: "gemini"}}
+		cmdGemini := runnerCustom.buildInnerCmd(tkGemini, &storage.Execution{}, false)
+		if !strings.Contains(strings.Join(cmdGemini, " "), "/usr/local/bin/gemini-pro -p") {
+			t.Errorf("expected custom gemini binary, got %q", cmdGemini)
+		}
+	})
 }
 
 func TestContainerRunner_Run_PreservesWorkspaceOnFailure(t *testing.T) {
@@ -89,19 +111,31 @@ func TestContainerRunner_Run_PreservesWorkspaceOnFailure(t *testing.T) {
 	runner := &ContainerRunner{
 		Logger: logger,
 		Image:  "busybox",
+		Command: func(ctx context.Context, name string, arg ...string) *exec.Cmd {
+			// Mock docker run to exit 1
+			if name == "docker" {
+				return exec.Command("sh", "-c", "exit 1")
+			}
+			// Mock git clone to succeed and create the directory
+			if name == "git" && len(arg) > 0 && arg[0] == "clone" {
+				dir := arg[len(arg)-1]
+				os.MkdirAll(dir, 0755)
+				return exec.Command("true")
+			}
+			return exec.Command("true")
+		},
 	}
 
-	// Use an invalid repo URL to trigger failure.
 	tk := &task.Task{
 		ID:            "test-task",
-		RepositoryURL: "/nonexistent/repo",
+		RepositoryURL: "https://github.com/example/repo.git",
 		Agent:         task.AgentConfig{Type: "claude"},
 	}
 	exec := &storage.Execution{ID: "test-exec", TaskID: "test-task"}
 
 	err := runner.Run(context.Background(), tk, exec)
 	if err == nil {
-		t.Fatal("expected error due to invalid repo")
+		t.Fatal("expected error due to mocked docker failure")
 	}
 
 	// Verify SandboxDir was set and directory exists.
diff --git a/internal/executor/helpers.go b/internal/executor/helpers.go
index 36cd050..9e4530b 100644
--- a/internal/executor/helpers.go
+++ b/internal/executor/helpers.go
@@ -33,6 +33,7 @@ func parseStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, string
 	var sessionID string
 	var streamErr error
 
+Loop:
 	for scanner.Scan() {
 		line := scanner.Bytes()
 		var msg map[string]interface{}
@@ -54,7 +55,7 @@ func parseStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, string
 				if status == "rejected" {
 					streamErr = fmt.Errorf("claude rate limit reached (rejected): %v", msg)
 					// Immediately break since we can't continue anyway
-					break
+					break Loop
 				}
 			}
 		case "assistant":
@@ -91,6 +92,7 @@ func parseStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, string
 	return totalCost, sessionID, streamErr
 }
 
+
 // permissionDenialError inspects a "user" stream message for tool_result entries
 // that were denied due to missing permissions. Returns an error if found.
 func permissionDenialError(msg map[string]interface{}) error {
-- 
cgit v1.2.3