feat(executor): bring GeminiRunner to sandbox-flow parity with Claude

All coding tasks now follow the same flow regardless of runner: when project_dir is set, the agent runs in a temp clone, not in the user's working tree. On success, edits are autocommitted and pushed back to origin/master and the sandbox is removed. On failure or BLOCKED, the sandbox is preserved and its path surfaces in the error / BlockedError so the user can inspect partial work or resume in place. Before this commit, GeminiRunner.Run set cmd.Dir to project_dir directly, so an agent run could leave half-done edits in the user's working tree with no rollback. ClaudeRunner has had the full sandbox flow for a while; this commit closes the gap. Reused the existing package-level helpers from claude.go verbatim: setupSandbox, teardownSandbox, sandboxCloneSource, gitSafe, plus the resume/stale-sandbox/blocked-error patterns. No new shared abstraction needed — same package. LocalRunner intentionally not changed. The OpenAI chat path has no tool use, so the agent can't edit files; sandbox would be theater. Tests (6 new): - Run_ProjectDir_RunsInSandbox: cwd captured by fake binary is a sandbox path, not project_dir. - Run_BlockedError_IncludesSandboxDir: when question.json appears, BlockedError.SandboxDir is set and the dir exists. - Run_ExecError_PreservesSandbox: failing exit wraps error with "(sandbox preserved at <path>)" and the path exists on disk. - Run_ResumeUsesStoredSandboxDir: ResumeSessionID + SandboxDir → runs in that dir without re-cloning. - Run_StaleSandboxDir_ClonesAfresh: resume pointing at missing dir falls back to a fresh clone from project_dir. - Run_NoProjectDir_SkipsSandbox: tasks without project_dir don't trigger sandbox setup. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
author: Claude <noreply@anthropic.com> 2026-05-12 21:03:30 +0000
committer: Claude <noreply@anthropic.com> 2026-05-12 21:03:30 +0000
commit: e7171181fff10c66b2b74eabfb1fc94b3cfbb4fb (patch)
tree: 7c62bacc3c02ce5a910ebd176c9d62d10564a5e1 /internal/executor
parent: 22ecff1fde5aa17d3053f43a8ac81f9ca49d8d56 (diff)
2 files changed, 353 insertions, 11 deletions
diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go
index 04382ae..3abec05 100644
--- a/internal/executor/gemini.go
+++ b/internal/executor/gemini.go
@@ -40,11 +40,21 @@ func (r *GeminiRunner) binaryPath() string {
 	return "gemini"
 }
 
-// Run executes a gemini <instructions> invocation, streaming output to log files.
+// Run executes the gemini CLI inside a sandboxed clone of project_dir.
+// When project_dir is set, claudomator first clones it into a temp sandbox
+// (preferring a `local` bare remote, then `origin`, then the working tree)
+// and runs the agent there. On success the sandbox is autocommitted and
+// pushed back to origin/master, then removed. On failure the sandbox is
+// preserved and its path is included in the returned error so the user can
+// inspect partial work. If the agent writes a question file before exiting,
+// Run returns *BlockedError with SandboxDir populated so a resume execution
+// can pick up in the same directory.
 func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
-	if t.Agent.ProjectDir != "" {
-		if _, err := os.Stat(t.Agent.ProjectDir); err != nil {
-			return fmt.Errorf("project_dir %q: %w", t.Agent.ProjectDir, err)
+	projectDir := t.Agent.ProjectDir
+
+	if projectDir != "" {
+		if _, err := os.Stat(projectDir); err != nil {
+			return fmt.Errorf("project_dir %q: %w", projectDir, err)
 		}
 	}
 
@@ -63,24 +73,88 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
 	}
 
 	if e.SessionID == "" {
-		e.SessionID = e.ID
+		if e.ResumeSessionID != "" {
+			e.SessionID = e.ResumeSessionID
+		} else {
+			e.SessionID = e.ID
+		}
+	}
+
+	// Sandbox setup: for new executions with a project_dir, clone into a sandbox.
+	// Resume executions reuse the preserved sandbox so any partial work survives.
+	// If the preserved sandbox is missing (e.g. /tmp was purged), clone fresh.
+	var sandboxDir string
+	var startHEAD string
+	effectiveWorkingDir := projectDir
+	if e.ResumeSessionID != "" {
+		if e.SandboxDir != "" {
+			if _, statErr := os.Stat(e.SandboxDir); statErr == nil {
+				effectiveWorkingDir = e.SandboxDir
+			} else {
+				r.Logger.Warn("preserved sandbox missing, cloning fresh", "sandbox", e.SandboxDir, "project_dir", projectDir)
+				e.SandboxDir = ""
+				if projectDir != "" {
+					var err error
+					sandboxDir, err = setupSandbox(projectDir, r.Logger)
+					if err != nil {
+						return fmt.Errorf("setting up sandbox: %w", err)
+					}
+					effectiveWorkingDir = sandboxDir
+					r.Logger.Info("fresh sandbox created for resume", "sandbox", sandboxDir, "project_dir", projectDir)
+				}
+			}
+		}
+	} else if projectDir != "" {
+		var err error
+		sandboxDir, err = setupSandbox(projectDir, r.Logger)
+		if err != nil {
+			return fmt.Errorf("setting up sandbox: %w", err)
+		}
+		effectiveWorkingDir = sandboxDir
+		r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir)
+	}
+
+	if effectiveWorkingDir != "" {
+		headOut, _ := exec.Command("git", gitSafe("-C", effectiveWorkingDir, "rev-parse", "HEAD")...).Output()
+		startHEAD = strings.TrimSpace(string(headOut))
 	}
 
 	questionFile := filepath.Join(logDir, "question.json")
 	args := r.buildArgs(t, e, questionFile)
 
-	// Gemini CLI doesn't necessarily have the same rate limiting behavior as Claude,
-	// but we'll use a similar execution pattern.
-	err := r.execOnce(ctx, args, t.Agent.ProjectDir, t.Agent.ProjectDir, e)
-	if err != nil {
+	if err := r.execOnce(ctx, args, effectiveWorkingDir, projectDir, e); err != nil {
+		if sandboxDir != "" {
+			return fmt.Errorf("%w (sandbox preserved at %s)", err, sandboxDir)
+		}
 		return err
 	}
 
 	// Check whether the agent left a question before exiting.
 	data, readErr := os.ReadFile(questionFile)
 	if readErr == nil {
-		os.Remove(questionFile) // consumed
-		return &BlockedError{QuestionJSON: strings.TrimSpace(string(data)), SessionID: e.SessionID}
+		os.Remove(questionFile)
+		questionJSON := strings.TrimSpace(string(data))
+		if isCompletionReport(questionJSON) {
+			r.Logger.Info("treating question file as completion report", "taskID", e.TaskID)
+			e.Summary = extractQuestionText(questionJSON)
+		} else {
+			// Preserve sandbox on BLOCKED so a resume can pick up in the same dir.
+			return &BlockedError{QuestionJSON: questionJSON, SessionID: e.SessionID, SandboxDir: sandboxDir}
+		}
+	}
+
+	// Read agent summary if written.
+	summaryFile := filepath.Join(logDir, "summary.txt")
+	if summaryData, readErr := os.ReadFile(summaryFile); readErr == nil {
+		os.Remove(summaryFile)
+		e.Summary = strings.TrimSpace(string(summaryData))
+	}
+
+	// Merge sandbox back to project_dir and clean up.
+	if sandboxDir != "" {
+		if mergeErr := teardownSandbox(projectDir, sandboxDir, startHEAD, r.Logger, e); mergeErr != nil {
+			return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir)
+		}
 	}
 	return nil
 }
diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go
index 4b0339e..cd11ebc 100644
--- a/internal/executor/gemini_test.go
+++ b/internal/executor/gemini_test.go
@@ -3,8 +3,11 @@ package executor
 import (
 	"bytes"
 	"context"
+	"errors"
 	"io"
 	"log/slog"
+	"os"
+	"path/filepath"
 	"strings"
 	"testing"
 
@@ -177,3 +180,268 @@ func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) {
 		t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String())
 	}
 }
+
+// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir
+// is set, the gemini subprocess runs inside a sandbox clone — not in
+// project_dir itself.
+func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) {
+	projectDir := t.TempDir()
+	initGitRepo(t, projectDir)
+
+	logDir := t.TempDir()
+	cwdFile := filepath.Join(logDir, "gemini-cwd.txt")
+
+	// Fake gemini binary that records its $PWD then exits 0.
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do work",
+			ProjectDir:   projectDir,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	cwd := string(got)
+	if cwd == projectDir {
+		t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd)
+	}
+	// Sandbox should be removed after successful teardown (no edits → nothing to push).
+	// We can't assert the exact dir, but it should not be projectDir.
+}
+
+// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the
+// agent writes a question file before exiting, the BlockedError carries the
+// sandbox path so resume runs in the same dir.
+func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) {
+	src := t.TempDir()
+	initGitRepo(t, src)
+	logDir := t.TempDir()
+
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh
+if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then
+  printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE"
+fi
+`), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do something",
+			ProjectDir:   src,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"}
+
+	err := r.Run(context.Background(), tk, e)
+
+	var blocked *BlockedError
+	if !errors.As(err, &blocked) {
+		t.Fatalf("expected BlockedError, got: %v", err)
+	}
+	if blocked.SandboxDir == "" {
+		t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox")
+	}
+	if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) {
+		t.Error("sandbox directory should be preserved when blocked")
+	} else {
+		os.RemoveAll(blocked.SandboxDir)
+	}
+}
+
+// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini
+// exits non-zero, the sandbox path is included in the wrapped error so the
+// user can inspect partial work.
+func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) {
+	src := t.TempDir()
+	initGitRepo(t, src)
+	logDir := t.TempDir()
+
+	// "false" exits 1, no output.
+	r := &GeminiRunner{
+		BinaryPath: "false",
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do something",
+			ProjectDir:   src,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"}
+
+	err := r.Run(context.Background(), tk, e)
+	if err == nil {
+		t.Fatal("expected error from failing gemini exit")
+	}
+	if !strings.Contains(err.Error(), "sandbox preserved at ") {
+		t.Errorf("expected error to include sandbox path; got: %v", err)
+	}
+	// Extract path and verify it exists.
+	idx := strings.Index(err.Error(), "sandbox preserved at ")
+	rest := err.Error()[idx+len("sandbox preserved at "):]
+	rest = strings.TrimSuffix(rest, ")")
+	rest = strings.TrimSpace(rest)
+	if _, statErr := os.Stat(rest); os.IsNotExist(statErr) {
+		t.Errorf("sandbox path from error should exist on disk: %q", rest)
+	} else {
+		os.RemoveAll(rest)
+	}
+}
+
+// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume
+// execution runs in the preserved SandboxDir rather than cloning fresh.
+func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) {
+	logDir := t.TempDir()
+	sandboxDir := t.TempDir()
+	initGitRepo(t, sandboxDir)
+	cwdFile := filepath.Join(logDir, "cwd.txt")
+
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{
+		ID:              "resume-gemini-1",
+		TaskID:          "task-resume",
+		ResumeSessionID: "session-abc",
+		SandboxDir:      sandboxDir,
+	}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run with preserved sandbox: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	if string(got) != sandboxDir {
+		t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir)
+	}
+}
+
+// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume
+// pointing at a missing sandbox falls back to cloning a fresh sandbox from
+// project_dir rather than failing outright.
+func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) {
+	logDir := t.TempDir()
+	projectDir := t.TempDir()
+	initGitRepo(t, projectDir)
+
+	cwdFile := filepath.Join(logDir, "cwd.txt")
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			ProjectDir:   projectDir,
+			SkipPlanning: true,
+		},
+	}
+	staleSandbox := filepath.Join(t.TempDir(), "gone")
+	e := &storage.Execution{
+		ID:              "resume-gemini-2",
+		TaskID:          "task-stale",
+		ResumeSessionID: "session-xyz",
+		SandboxDir:      staleSandbox,
+	}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run with stale sandbox: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	cwd := string(got)
+	if cwd == staleSandbox {
+		t.Error("ran in stale (nonexistent) sandbox dir")
+	}
+	if cwd == projectDir {
+		t.Error("ran directly in project_dir; expected a fresh sandbox clone")
+	}
+}
+
+// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no
+// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding
+// task semantics).
+func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) {
+	logDir := t.TempDir()
+
+	r := &GeminiRunner{
+		BinaryPath: "true", // exits 0, no output
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "summarize: 2+2",
+			SkipPlanning: true,
+			// No ProjectDir
+		},
+	}
+	e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run without project_dir: %v", err)
+	}
+	if e.SandboxDir != "" {
+		t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir)
+	}
+}
author	Claude <noreply@anthropic.com>	2026-05-12 21:03:30 +0000
committer	Claude <noreply@anthropic.com>	2026-05-12 21:03:30 +0000
commit	e7171181fff10c66b2b74eabfb1fc94b3cfbb4fb (patch)
tree	7c62bacc3c02ce5a910ebd176c9d62d10564a5e1 /internal/executor
parent	22ecff1fde5aa17d3053f43a8ac81f9ca49d8d56 (diff)