From 85c3bf4d28b0903a2005356339e6ea56855b8c80 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 3 May 2026 03:58:19 +0000
Subject: chore: post-epic cleanup — green test suite, no skips
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the cleanup queue captured in docs/plans/local-oss-runner.md
after the local-OSS-models epic landed. After this commit
`go test -race ./...` is green across every package with zero `t.Skip`
calls and no excluded tests.

Real bugs fixed:
- claude.go setupSandbox callsites used `sandboxDir, err := ...` which
  shadowed the outer variable, so BlockedError.SandboxDir was always
  empty. Resume-after-block was broken for both new and stale-sandbox
  paths. TestBlockedError_IncludesSandboxDir now exercises the right
  invariant.
- TestPool_ActivePerAgent_DeletesZeroEntries flake under -race: the
  cleanup defer in execute()/executeResume() runs AFTER
  handleRunResult sends on resultCh, so consumers observing a result
  could see a still-counted activePerAgent entry. Extracted
  decActiveAgent(agentType, *cleaned) helper; called explicitly before
  every resultCh send, defer becomes a no-op via the cleaned flag.
  Verified clean over `go test -race -count=10`.

Test infrastructure made hermetic:
- gitSafe now also passes -c commit.gpgsign=false / -c tag.gpgsign=false
  so sandbox tests pass on hosts whose global config requires signing.
- Bare repos in tests initialized with `-b main` (HEAD symbolic ref
  matched to the branch we push) so `git log` after push works.
- TestSandboxCloneSource_FallsBackToOrigin uses a local-FS origin URL,
  matching sandboxCloneSource's intentional filter against network URLs.
- TestGeminiLogs_ParsedCorrectly URL fixed to the actual log route
  (/api/executions/{id}/log).

GeminiRunner gap closed (partial):
- parseGeminiStream now walks lines for `result` events, surfacing
  is_error as an error and total_cost_usd as the float return value.
- GeminiRunner.Run propagates parsed cost to Execution.CostUSD.
- TestParseGeminiStream_ParsesStructuredOutput unskipped.

Notes:
- GeminiRunner is still simulated end-to-end (Run writes hardcoded
  stream data instead of execing the binary). The result/cost parser
  now exists; finishing the runner is a smaller, contained follow-up.
  Kept on the deferred queue.
- Frontend "Local" agent option and a minor storage.db.go logger TODO
  remain on the deferred queue, both intentionally — neither blocks
  anything in flight.

https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
---
 internal/executor/gemini.go | 87 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 62 insertions(+), 25 deletions(-)

(limited to 'internal/executor/gemini.go')

diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go
index d79c47d..7f2f54f 100644
--- a/internal/executor/gemini.go
+++ b/internal/executor/gemini.go
@@ -2,6 +2,7 @@ package executor
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"io"
 	"log/slog"
@@ -117,16 +118,21 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir,
 
 
 	var streamErr error
+	var streamCost float64
 	var wg sync.WaitGroup
 	wg.Add(1)
 	go func() {
 		defer wg.Done()
-		_, streamErr = parseGeminiStream(stdoutR, stdoutFile, r.Logger)
+		streamCost, streamErr = parseGeminiStream(stdoutR, stdoutFile, r.Logger)
 		stdoutR.Close()
 	}()
 
 	wg.Wait() // Wait for parseGeminiStream to finish
 
+	if streamCost > 0 {
+		e.CostUSD = streamCost
+	}
+
 	// Set a dummy exit code for this simulated run
 	e.ExitCode = 0
 
@@ -136,9 +142,10 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir,
 	return nil
 }
 
-// parseGeminiStream reads streaming JSON from the gemini CLI, unwraps markdown
-// code blocks, writes the inner JSON to w, and returns (costUSD, error).
-// For now, it focuses on unwrapping and writing, not detailed parsing of cost/errors.
+// parseGeminiStream reads streaming JSON from the gemini CLI, strips markdown
+// code fences if the output is wrapped in them, writes the inner stream-json
+// to w, and returns (costUSD, error). If a `result` event has `is_error: true`,
+// an error wrapping the result message is returned.
 func parseGeminiStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, error) {
 	fullOutput, err := io.ReadAll(r)
 	if err != nil {
@@ -146,31 +153,61 @@ func parseGeminiStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64,
 	}
 	logger.Debug("parseGeminiStream: raw output received", "output", string(fullOutput))
 
-	outputStr := strings.TrimSpace(string(fullOutput)) // Trim leading/trailing whitespace/newlines from the whole output
-
-	jsonContent := outputStr // Default to raw output if no markdown block is found or malformed
-	jsonStartIdx := strings.Index(outputStr, "```json")
-	if jsonStartIdx != -1 {
-		// Found "```json", now look for the closing "```"
-		jsonEndIdx := strings.LastIndex(outputStr, "```")
-		if jsonEndIdx != -1 && jsonEndIdx > jsonStartIdx {
-			// Extract content between the markdown fences.
-			jsonContent = outputStr[jsonStartIdx+len("```json"):jsonEndIdx]
-			jsonContent = strings.TrimSpace(jsonContent) // Trim again after extraction, to remove potential inner newlines
-		} else {
-			logger.Warn("Malformed markdown JSON block from Gemini (missing closing ``` or invalid structure), falling back to raw output.", "outputLength", len(outputStr))
+	inner := stripGeminiFences(string(fullOutput), logger)
+	if _, writeErr := w.Write([]byte(inner)); writeErr != nil {
+		return 0, fmt.Errorf("writing gemini output: %w", writeErr)
+	}
+
+	// Walk lines looking for a result event so we can surface errors and cost.
+	var (
+		cost    float64
+		errMsg  string
+		isError bool
+	)
+	for _, raw := range strings.Split(inner, "\n") {
+		line := strings.TrimSpace(raw)
+		if line == "" {
+			continue
+		}
+		var evt struct {
+			Type    string  `json:"type"`
+			IsError bool    `json:"is_error"`
+			Result  string  `json:"result"`
+			Cost    float64 `json:"total_cost_usd"`
+		}
+		if err := json.Unmarshal([]byte(line), &evt); err != nil {
+			continue
+		}
+		if evt.Type == "result" {
+			if evt.Cost > 0 {
+				cost = evt.Cost
+			}
+			if evt.IsError {
+				isError = true
+				errMsg = evt.Result
+			}
 		}
-	} else {
-		logger.Warn("No markdown JSON block found from Gemini, falling back to raw output.", "outputLength", len(outputStr))
 	}
-
-	// Write the (possibly extracted and trimmed) JSON content to the writer.
-	_, writeErr := w.Write([]byte(jsonContent))
-	if writeErr != nil {
-		return 0, fmt.Errorf("writing extracted gemini json: %w", writeErr)
+	if isError {
+		return cost, fmt.Errorf("gemini reported error: %s", errMsg)
 	}
+	return cost, nil
+}
 
-	return 0, nil // For now, no cost/error parsing for Gemini stream
+// stripGeminiFences removes a surrounding ```json ... ``` markdown block if
+// present, returning the trimmed inner content. If no markdown fence is
+// found, the input is returned verbatim (no whitespace trimming) so callers
+// that expect byte-exact pass-through behavior get it.
+func stripGeminiFences(raw string, logger *slog.Logger) string {
+	trimmed := strings.TrimSpace(raw)
+	if start := strings.Index(trimmed, "```json"); start != -1 {
+		if end := strings.LastIndex(trimmed, "```"); end > start {
+			return strings.TrimSpace(trimmed[start+len("```json") : end])
+		}
+		logger.Warn("malformed gemini markdown block (missing closing fence); using raw output", "len", len(trimmed))
+		return trimmed
+	}
+	return raw
 }
 
 func (r *GeminiRunner) buildArgs(t *task.Task, e *storage.Execution, questionFile string) []string {
-- 
cgit v1.2.3


From e7b382bf177cbe518af3d86c3ee6c49344d225f4 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 3 May 2026 08:00:20 +0000
Subject: chore: close deferred work — real GeminiRunner, Local UI option,
 db.go cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the three items left on the deferred queue after the post-epic
cleanup.

GeminiRunner.execOnce now actually executes the gemini binary instead
of writing hardcoded stream data. Mirrors ClaudeRunner.execOnce:
- exec.CommandContext with the same env vars (CLAUDOMATOR_API_URL etc.)
- process group SIGKILL on context cancel
- stdout piped through parseGeminiStream → stdoutFile
- stderr to file
- exit codes captured, stderr tail surfaced on failure

Test infrastructure bug uncovered in passing: testServerWithGeminiMockRunner's
mock script used double-quoted echo with literal triple-backticks, which
bash interpreted as command substitution. The script always produced
empty output. The bug was invisible until now because GeminiRunner
ignored the script entirely. Switched to a single-quoted heredoc.

Frontend: index.html dropdown gains a "Local" option. No JS branching
needed — the value flows through to agent.type verbatim and downstream
display reads the type string as-is.

storage/db.go: removed stale debug-comment scaffolding (the "TODO:
Replace with proper logger" block) that was tracking a dead
`fmt.Printf` call. The path it commented on is fine without logging —
unmarshal errors are returned wrapped.

Test status: `go test -race ./...` green across every package, zero
skips, zero excluded tests.

https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
---
 docs/plans/local-oss-runner.md |  9 +++++++
 internal/api/server_test.go    | 23 ++++++++--------
 internal/executor/gemini.go    | 61 +++++++++++++++++++++++++++++++-----------
 internal/storage/db.go         |  5 ----
 web/index.html                 |  1 +
 5 files changed, 67 insertions(+), 32 deletions(-)

(limited to 'internal/executor/gemini.go')

diff --git a/docs/plans/local-oss-runner.md b/docs/plans/local-oss-runner.md
index 4d5cb87..4504bbb 100644
--- a/docs/plans/local-oss-runner.md
+++ b/docs/plans/local-oss-runner.md
@@ -222,6 +222,15 @@ Items not chased (deferred deliberately):
 - **Frontend "Local" agent option** — UI dropdown still says "Auto / Claude / Gemini". Pending token telemetry surface.
 - **`storage.db.go:706` TODO comment** — minor logger plumbing nit. Skipping unless it blocks something.
 
+## Deferred work — DONE
+
+Follow-up commit closed the three deferred items above:
+
+- `GeminiRunner.execOnce` now invokes the actual `gemini` binary via `exec.CommandContext`, mirroring the `ClaudeRunner` pattern: pipe stdout to `parseGeminiStream`, kill the process group on context cancel, capture stderr to file, surface exit codes. Hardcoded simulation removed.
+- Test infrastructure bug uncovered and fixed in passing: the mock gemini script in `testServerWithGeminiMockRunner` was using `"\``json\`"` which bash interpreted as command substitution, so the script always produced empty output. Switched to a single-quoted heredoc. The bug was masked previously because the runner ignored the script entirely.
+- Frontend `index.html` dropdown gains a `Local` option. No JS branching changes needed — the value flows through to `agent.type` verbatim and downstream display reads the type string as-is.
+- Stale debug-comment scaffolding around `storage.db.go:706` deleted.
+
 ---
 
 # Phase 2 — Focused Plan (Elaboration)
diff --git a/internal/api/server_test.go b/internal/api/server_test.go
index 516e289..2139e36 100644
--- a/internal/api/server_test.go
+++ b/internal/api/server_test.go
@@ -143,20 +143,21 @@ func testServerWithGeminiMockRunner(t *testing.T) (*Server, *storage.DB) {
 
 	logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
 
-	// Create the mock gemini binary script.
+	// Create the mock gemini binary script. Use single-quoted heredoc so
+	// bash does not try to evaluate the literal backticks as command
+	// substitution.
 	mockBinDir := t.TempDir()
 	mockGeminiPath := filepath.Join(mockBinDir, "mock-gemini-binary.sh")
 	mockScriptContent := `#!/bin/bash
-OUTPUT_FILE=$(mktemp)
-echo "` + "```json" + `" > "$OUTPUT_FILE"
-echo "{\"type\":\"content_block_start\",\"content_block\":{\"text\":\"Hello, Gemini!\",\"type\":\"text\"}}" >> "$OUTPUT_FILE"
-echo "{\"type\":\"content_block_delta\",\"content_block\":{\"text\":\" How are you?\"}}" >> "$OUTPUT_FILE"
-echo "{\"type\":\"content_block_end\"}" >> "$OUTPUT_FILE"
-echo "{\"type\":\"message_delta\",\"message\":{\"role\":\"model\"}}" >> "$OUTPUT_FILE"
-echo "{\"type\":\"message_end\"}" >> "$OUTPUT_FILE"
-echo "` + "```" + `" >> "$OUTPUT_FILE"
-cat "$OUTPUT_FILE"
-rm "$OUTPUT_FILE"
+cat <<'EOF'
+` + "```json" + `
+{"type":"content_block_start","content_block":{"text":"Hello, Gemini!","type":"text"}}
+{"type":"content_block_delta","content_block":{"text":" How are you?"}}
+{"type":"content_block_end"}
+{"type":"message_delta","message":{"role":"model"}}
+{"type":"message_end"}
+` + "```" + `
+EOF
 exit 0
 `
 	if err := os.WriteFile(mockGeminiPath, []byte(mockScriptContent), 0755); err != nil {
diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go
index 7f2f54f..04382ae 100644
--- a/internal/executor/gemini.go
+++ b/internal/executor/gemini.go
@@ -7,9 +7,11 @@ import (
 	"io"
 	"log/slog"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"strings"
 	"sync"
+	"syscall"
 
 	"github.com/thepeterstone/claudomator/internal/storage"
 	"github.com/thepeterstone/claudomator/internal/task"
@@ -84,8 +86,18 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
 }
 
 func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, projectDir string, e *storage.Execution) error {
-	// Temporarily bypass external command execution to debug pipe.
-	// We will simulate outputting to stdoutW directly.
+	cmd := exec.CommandContext(ctx, r.binaryPath(), args...)
+	cmd.Env = append(os.Environ(),
+		"CLAUDOMATOR_API_URL="+r.APIURL,
+		"CLAUDOMATOR_TASK_ID="+e.TaskID,
+		"CLAUDOMATOR_PROJECT_DIR="+projectDir,
+		"CLAUDOMATOR_QUESTION_FILE="+filepath.Join(e.ArtifactDir, "question.json"),
+		"CLAUDOMATOR_SUMMARY_FILE="+filepath.Join(e.ArtifactDir, "summary.txt"),
+	)
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+	if workingDir != "" {
+		cmd.Dir = workingDir
+	}
 
 	stdoutFile, err := os.Create(e.StdoutPath)
 	if err != nil {
@@ -103,22 +115,27 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir,
 	if err != nil {
 		return fmt.Errorf("creating stdout pipe: %w", err)
 	}
+	cmd.Stdout = stdoutW
+	cmd.Stderr = stderrFile
 
-	// Simulate writing to stdoutW
+	if err := cmd.Start(); err != nil {
+		stdoutW.Close()
+		stdoutR.Close()
+		return fmt.Errorf("starting gemini: %w", err)
+	}
+	stdoutW.Close()
+
+	killDone := make(chan struct{})
 	go func() {
-		defer stdoutW.Close() // Close the writer when done.
-		fmt.Fprintf(stdoutW, "```json\n")
-		fmt.Fprintf(stdoutW, "{\"type\":\"content_block_start\",\"content_block\":{\"text\":\"Hello, Gemini!\",\"type\":\"text\"}}\n")
-		fmt.Fprintf(stdoutW, "{\"type\":\"content_block_delta\",\"content_block\":{\"text\":\" How are you?\"}}\n")
-		fmt.Fprintf(stdoutW, "{\"type\":\"content_block_end\"}\n")
-		fmt.Fprintf(stdoutW, "{\"type\":\"message_delta\",\"message\":{\"role\":\"model\"}}\n")
-		fmt.Fprintf(stdoutW, "{\"type\":\"message_end\"}\n")
-		fmt.Fprintf(stdoutW, "```\n")
+		select {
+		case <-ctx.Done():
+			syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		case <-killDone:
+		}
 	}()
 
-
-	var streamErr error
 	var streamCost float64
+	var streamErr error
 	var wg sync.WaitGroup
 	wg.Add(1)
 	go func() {
@@ -127,14 +144,26 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir,
 		stdoutR.Close()
 	}()
 
-	wg.Wait() // Wait for parseGeminiStream to finish
+	waitErr := cmd.Wait()
+	close(killDone)
+	wg.Wait()
 
 	if streamCost > 0 {
 		e.CostUSD = streamCost
 	}
 
-	// Set a dummy exit code for this simulated run
-	e.ExitCode = 0
+	if waitErr != nil {
+		if exitErr, ok := waitErr.(*exec.ExitError); ok {
+			e.ExitCode = exitErr.ExitCode()
+		}
+		if streamErr != nil {
+			return streamErr
+		}
+		if tail := tailFile(e.StderrPath, 20); tail != "" {
+			return fmt.Errorf("gemini exited with error: %w\nstderr:\n%s", waitErr, tail)
+		}
+		return fmt.Errorf("gemini exited with error: %w", waitErr)
+	}
 
 	if streamErr != nil {
 		return streamErr
diff --git a/internal/storage/db.go b/internal/storage/db.go
index c871c77..ce60e2f 100644
--- a/internal/storage/db.go
+++ b/internal/storage/db.go
@@ -699,11 +699,6 @@ func scanTask(row scanner) (*task.Task, error) {
 	t.State = task.State(state)
 	t.Priority = task.Priority(priority)
 	t.Timeout.Duration = time.Duration(timeoutNS)
-	// Add debug log for configJSON
-	// The logger is not available directly in db.go, so I'll use fmt.Printf for now.
-	// For production code, a logger should be injected.
-	// fmt.Printf("DEBUG: configJSON from DB: %s\n", configJSON)
-	// TODO: Replace with proper logger when available.
 	if err := json.Unmarshal([]byte(configJSON), &t.Agent); err != nil {
 		return nil, fmt.Errorf("unmarshaling agent config: %w", err)
 	}
diff --git a/web/index.html b/web/index.html
index 1746baf..7c0b030 100644
--- a/web/index.html
+++ b/web/index.html
@@ -16,6 +16,7 @@
         <option value="auto">Auto</option>
         <option value="claude">Claude</option>
         <option value="gemini">Gemini</option>
+        <option value="local">Local</option>
       </select>
       <button id="btn-start-next" class="btn-secondary">Start Next</button>
       <button id="btn-new-task" class="btn-primary">New Task</button>
-- 
cgit v1.2.3


From e7171181fff10c66b2b74eabfb1fc94b3cfbb4fb Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 12 May 2026 21:03:30 +0000
Subject: feat(executor): bring GeminiRunner to sandbox-flow parity with Claude
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All coding tasks now follow the same flow regardless of runner: when
project_dir is set, the agent runs in a temp clone, not in the user's
working tree. On success, edits are autocommitted and pushed back to
origin/master and the sandbox is removed. On failure or BLOCKED, the
sandbox is preserved and its path surfaces in the error / BlockedError
so the user can inspect partial work or resume in place.

Before this commit, GeminiRunner.Run set cmd.Dir to project_dir
directly, so an agent run could leave half-done edits in the user's
working tree with no rollback. ClaudeRunner has had the full sandbox
flow for a while; this commit closes the gap.

Reused the existing package-level helpers from claude.go verbatim:
setupSandbox, teardownSandbox, sandboxCloneSource, gitSafe, plus the
resume/stale-sandbox/blocked-error patterns. No new shared abstraction
needed — same package.

LocalRunner intentionally not changed. The OpenAI chat path has no
tool use, so the agent can't edit files; sandbox would be theater.

Tests (6 new):
- Run_ProjectDir_RunsInSandbox: cwd captured by fake binary is a
  sandbox path, not project_dir.
- Run_BlockedError_IncludesSandboxDir: when question.json appears,
  BlockedError.SandboxDir is set and the dir exists.
- Run_ExecError_PreservesSandbox: failing exit wraps error with
  "(sandbox preserved at <path>)" and the path exists on disk.
- Run_ResumeUsesStoredSandboxDir: ResumeSessionID + SandboxDir →
  runs in that dir without re-cloning.
- Run_StaleSandboxDir_ClonesAfresh: resume pointing at missing
  dir falls back to a fresh clone from project_dir.
- Run_NoProjectDir_SkipsSandbox: tasks without project_dir don't
  trigger sandbox setup.

https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
---
 internal/executor/gemini.go      |  96 ++++++++++++--
 internal/executor/gemini_test.go | 268 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 353 insertions(+), 11 deletions(-)

(limited to 'internal/executor/gemini.go')

diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go
index 04382ae..3abec05 100644
--- a/internal/executor/gemini.go
+++ b/internal/executor/gemini.go
@@ -40,11 +40,21 @@ func (r *GeminiRunner) binaryPath() string {
 	return "gemini"
 }
 
-// Run executes a gemini <instructions> invocation, streaming output to log files.
+// Run executes the gemini CLI inside a sandboxed clone of project_dir.
+// When project_dir is set, claudomator first clones it into a temp sandbox
+// (preferring a `local` bare remote, then `origin`, then the working tree)
+// and runs the agent there. On success the sandbox is autocommitted and
+// pushed back to origin/master, then removed. On failure the sandbox is
+// preserved and its path is included in the returned error so the user can
+// inspect partial work. If the agent writes a question file before exiting,
+// Run returns *BlockedError with SandboxDir populated so a resume execution
+// can pick up in the same directory.
 func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
-	if t.Agent.ProjectDir != "" {
-		if _, err := os.Stat(t.Agent.ProjectDir); err != nil {
-			return fmt.Errorf("project_dir %q: %w", t.Agent.ProjectDir, err)
+	projectDir := t.Agent.ProjectDir
+
+	if projectDir != "" {
+		if _, err := os.Stat(projectDir); err != nil {
+			return fmt.Errorf("project_dir %q: %w", projectDir, err)
 		}
 	}
 
@@ -63,24 +73,88 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
 	}
 
 	if e.SessionID == "" {
-		e.SessionID = e.ID
+		if e.ResumeSessionID != "" {
+			e.SessionID = e.ResumeSessionID
+		} else {
+			e.SessionID = e.ID
+		}
+	}
+
+	// Sandbox setup: for new executions with a project_dir, clone into a sandbox.
+	// Resume executions reuse the preserved sandbox so any partial work survives.
+	// If the preserved sandbox is missing (e.g. /tmp was purged), clone fresh.
+	var sandboxDir string
+	var startHEAD string
+	effectiveWorkingDir := projectDir
+	if e.ResumeSessionID != "" {
+		if e.SandboxDir != "" {
+			if _, statErr := os.Stat(e.SandboxDir); statErr == nil {
+				effectiveWorkingDir = e.SandboxDir
+			} else {
+				r.Logger.Warn("preserved sandbox missing, cloning fresh", "sandbox", e.SandboxDir, "project_dir", projectDir)
+				e.SandboxDir = ""
+				if projectDir != "" {
+					var err error
+					sandboxDir, err = setupSandbox(projectDir, r.Logger)
+					if err != nil {
+						return fmt.Errorf("setting up sandbox: %w", err)
+					}
+					effectiveWorkingDir = sandboxDir
+					r.Logger.Info("fresh sandbox created for resume", "sandbox", sandboxDir, "project_dir", projectDir)
+				}
+			}
+		}
+	} else if projectDir != "" {
+		var err error
+		sandboxDir, err = setupSandbox(projectDir, r.Logger)
+		if err != nil {
+			return fmt.Errorf("setting up sandbox: %w", err)
+		}
+		effectiveWorkingDir = sandboxDir
+		r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir)
+	}
+
+	if effectiveWorkingDir != "" {
+		headOut, _ := exec.Command("git", gitSafe("-C", effectiveWorkingDir, "rev-parse", "HEAD")...).Output()
+		startHEAD = strings.TrimSpace(string(headOut))
 	}
 
 	questionFile := filepath.Join(logDir, "question.json")
 	args := r.buildArgs(t, e, questionFile)
 
-	// Gemini CLI doesn't necessarily have the same rate limiting behavior as Claude,
-	// but we'll use a similar execution pattern.
-	err := r.execOnce(ctx, args, t.Agent.ProjectDir, t.Agent.ProjectDir, e)
-	if err != nil {
+	if err := r.execOnce(ctx, args, effectiveWorkingDir, projectDir, e); err != nil {
+		if sandboxDir != "" {
+			return fmt.Errorf("%w (sandbox preserved at %s)", err, sandboxDir)
+		}
 		return err
 	}
 
 	// Check whether the agent left a question before exiting.
 	data, readErr := os.ReadFile(questionFile)
 	if readErr == nil {
-		os.Remove(questionFile) // consumed
-		return &BlockedError{QuestionJSON: strings.TrimSpace(string(data)), SessionID: e.SessionID}
+		os.Remove(questionFile)
+		questionJSON := strings.TrimSpace(string(data))
+		if isCompletionReport(questionJSON) {
+			r.Logger.Info("treating question file as completion report", "taskID", e.TaskID)
+			e.Summary = extractQuestionText(questionJSON)
+		} else {
+			// Preserve sandbox on BLOCKED so a resume can pick up in the same dir.
+			return &BlockedError{QuestionJSON: questionJSON, SessionID: e.SessionID, SandboxDir: sandboxDir}
+		}
+	}
+
+	// Read agent summary if written.
+	summaryFile := filepath.Join(logDir, "summary.txt")
+	if summaryData, readErr := os.ReadFile(summaryFile); readErr == nil {
+		os.Remove(summaryFile)
+		e.Summary = strings.TrimSpace(string(summaryData))
+	}
+
+	// Merge sandbox back to project_dir and clean up.
+	if sandboxDir != "" {
+		if mergeErr := teardownSandbox(projectDir, sandboxDir, startHEAD, r.Logger, e); mergeErr != nil {
+			return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir)
+		}
 	}
 	return nil
 }
diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go
index 4b0339e..cd11ebc 100644
--- a/internal/executor/gemini_test.go
+++ b/internal/executor/gemini_test.go
@@ -3,8 +3,11 @@ package executor
 import (
 	"bytes"
 	"context"
+	"errors"
 	"io"
 	"log/slog"
+	"os"
+	"path/filepath"
 	"strings"
 	"testing"
 
@@ -177,3 +180,268 @@ func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) {
 		t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String())
 	}
 }
+
+// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir
+// is set, the gemini subprocess runs inside a sandbox clone — not in
+// project_dir itself.
+func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) {
+	projectDir := t.TempDir()
+	initGitRepo(t, projectDir)
+
+	logDir := t.TempDir()
+	cwdFile := filepath.Join(logDir, "gemini-cwd.txt")
+
+	// Fake gemini binary that records its $PWD then exits 0.
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do work",
+			ProjectDir:   projectDir,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	cwd := string(got)
+	if cwd == projectDir {
+		t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd)
+	}
+	// Sandbox should be removed after successful teardown (no edits → nothing to push).
+	// We can't assert the exact dir, but it should not be projectDir.
+}
+
+// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the
+// agent writes a question file before exiting, the BlockedError carries the
+// sandbox path so resume runs in the same dir.
+func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) {
+	src := t.TempDir()
+	initGitRepo(t, src)
+	logDir := t.TempDir()
+
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh
+if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then
+  printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE"
+fi
+`), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do something",
+			ProjectDir:   src,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"}
+
+	err := r.Run(context.Background(), tk, e)
+
+	var blocked *BlockedError
+	if !errors.As(err, &blocked) {
+		t.Fatalf("expected BlockedError, got: %v", err)
+	}
+	if blocked.SandboxDir == "" {
+		t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox")
+	}
+	if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) {
+		t.Error("sandbox directory should be preserved when blocked")
+	} else {
+		os.RemoveAll(blocked.SandboxDir)
+	}
+}
+
+// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini
+// exits non-zero, the sandbox path is included in the wrapped error so the
+// user can inspect partial work.
+func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) {
+	src := t.TempDir()
+	initGitRepo(t, src)
+	logDir := t.TempDir()
+
+	// "false" exits 1, no output.
+	r := &GeminiRunner{
+		BinaryPath: "false",
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do something",
+			ProjectDir:   src,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"}
+
+	err := r.Run(context.Background(), tk, e)
+	if err == nil {
+		t.Fatal("expected error from failing gemini exit")
+	}
+	if !strings.Contains(err.Error(), "sandbox preserved at ") {
+		t.Errorf("expected error to include sandbox path; got: %v", err)
+	}
+	// Extract path and verify it exists.
+	idx := strings.Index(err.Error(), "sandbox preserved at ")
+	rest := err.Error()[idx+len("sandbox preserved at "):]
+	rest = strings.TrimSuffix(rest, ")")
+	rest = strings.TrimSpace(rest)
+	if _, statErr := os.Stat(rest); os.IsNotExist(statErr) {
+		t.Errorf("sandbox path from error should exist on disk: %q", rest)
+	} else {
+		os.RemoveAll(rest)
+	}
+}
+
+// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume
+// execution runs in the preserved SandboxDir rather than cloning fresh.
+func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) {
+	logDir := t.TempDir()
+	sandboxDir := t.TempDir()
+	initGitRepo(t, sandboxDir)
+	cwdFile := filepath.Join(logDir, "cwd.txt")
+
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{
+		ID:              "resume-gemini-1",
+		TaskID:          "task-resume",
+		ResumeSessionID: "session-abc",
+		SandboxDir:      sandboxDir,
+	}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run with preserved sandbox: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	if string(got) != sandboxDir {
+		t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir)
+	}
+}
+
+// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume
+// pointing at a missing sandbox falls back to cloning a fresh sandbox from
+// project_dir rather than failing outright.
+func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) {
+	logDir := t.TempDir()
+	projectDir := t.TempDir()
+	initGitRepo(t, projectDir)
+
+	cwdFile := filepath.Join(logDir, "cwd.txt")
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			ProjectDir:   projectDir,
+			SkipPlanning: true,
+		},
+	}
+	staleSandbox := filepath.Join(t.TempDir(), "gone")
+	e := &storage.Execution{
+		ID:              "resume-gemini-2",
+		TaskID:          "task-stale",
+		ResumeSessionID: "session-xyz",
+		SandboxDir:      staleSandbox,
+	}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run with stale sandbox: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	cwd := string(got)
+	if cwd == staleSandbox {
+		t.Error("ran in stale (nonexistent) sandbox dir")
+	}
+	if cwd == projectDir {
+		t.Error("ran directly in project_dir; expected a fresh sandbox clone")
+	}
+}
+
+// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no
+// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding
+// task semantics).
+func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) {
+	logDir := t.TempDir()
+
+	r := &GeminiRunner{
+		BinaryPath: "true", // exits 0, no output
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "summarize: 2+2",
+			SkipPlanning: true,
+			// No ProjectDir
+		},
+	}
+	e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run without project_dir: %v", err)
+	}
+	if e.SandboxDir != "" {
+		t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir)
+	}
+}
-- 
cgit v1.2.3