feat(executor): bring GeminiRunner to sandbox-flow parity with Claude

All coding tasks now follow the same flow regardless of runner: when project_dir is set, the agent runs in a temp clone, not in the user's working tree. On success, edits are autocommitted and pushed back to origin/master and the sandbox is removed. On failure or BLOCKED, the sandbox is preserved and its path surfaces in the error / BlockedError so the user can inspect partial work or resume in place. Before this commit, GeminiRunner.Run set cmd.Dir to project_dir directly, so an agent run could leave half-done edits in the user's working tree with no rollback. ClaudeRunner has had the full sandbox flow for a while; this commit closes the gap. Reused the existing package-level helpers from claude.go verbatim: setupSandbox, teardownSandbox, sandboxCloneSource, gitSafe, plus the resume/stale-sandbox/blocked-error patterns. No new shared abstraction needed — same package. LocalRunner intentionally not changed. The OpenAI chat path has no tool use, so the agent can't edit files; sandbox would be theater. Tests (6 new): - Run_ProjectDir_RunsInSandbox: cwd captured by fake binary is a sandbox path, not project_dir. - Run_BlockedError_IncludesSandboxDir: when question.json appears, BlockedError.SandboxDir is set and the dir exists. - Run_ExecError_PreservesSandbox: failing exit wraps error with "(sandbox preserved at <path>)" and the path exists on disk. - Run_ResumeUsesStoredSandboxDir: ResumeSessionID + SandboxDir → runs in that dir without re-cloning. - Run_StaleSandboxDir_ClonesAfresh: resume pointing at missing dir falls back to a fresh clone from project_dir. - Run_NoProjectDir_SkipsSandbox: tasks without project_dir don't trigger sandbox setup. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
author: Claude <noreply@anthropic.com> 2026-05-12 21:03:30 +0000
committer: Claude <noreply@anthropic.com> 2026-05-12 21:03:30 +0000
commit: e7171181fff10c66b2b74eabfb1fc94b3cfbb4fb (patch)
tree: 7c62bacc3c02ce5a910ebd176c9d62d10564a5e1 /internal/executor/gemini.go
parent: 22ecff1fde5aa17d3053f43a8ac81f9ca49d8d56 (diff)
1 files changed, 85 insertions, 11 deletions
diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go
index 04382ae..3abec05 100644
--- a/internal/executor/gemini.go
+++ b/internal/executor/gemini.go
@@ -40,11 +40,21 @@ func (r *GeminiRunner) binaryPath() string {
 	return "gemini"
 }
 
-// Run executes a gemini <instructions> invocation, streaming output to log files.
+// Run executes the gemini CLI inside a sandboxed clone of project_dir.
+// When project_dir is set, claudomator first clones it into a temp sandbox
+// (preferring a `local` bare remote, then `origin`, then the working tree)
+// and runs the agent there. On success the sandbox is autocommitted and
+// pushed back to origin/master, then removed. On failure the sandbox is
+// preserved and its path is included in the returned error so the user can
+// inspect partial work. If the agent writes a question file before exiting,
+// Run returns *BlockedError with SandboxDir populated so a resume execution
+// can pick up in the same directory.
 func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
-	if t.Agent.ProjectDir != "" {
-		if _, err := os.Stat(t.Agent.ProjectDir); err != nil {
-			return fmt.Errorf("project_dir %q: %w", t.Agent.ProjectDir, err)
+	projectDir := t.Agent.ProjectDir
+
+	if projectDir != "" {
+		if _, err := os.Stat(projectDir); err != nil {
+			return fmt.Errorf("project_dir %q: %w", projectDir, err)
 		}
 	}
 
@@ -63,24 +73,88 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
 	}
 
 	if e.SessionID == "" {
-		e.SessionID = e.ID
+		if e.ResumeSessionID != "" {
+			e.SessionID = e.ResumeSessionID
+		} else {
+			e.SessionID = e.ID
+		}
+	}
+
+	// Sandbox setup: for new executions with a project_dir, clone into a sandbox.
+	// Resume executions reuse the preserved sandbox so any partial work survives.
+	// If the preserved sandbox is missing (e.g. /tmp was purged), clone fresh.
+	var sandboxDir string
+	var startHEAD string
+	effectiveWorkingDir := projectDir
+	if e.ResumeSessionID != "" {
+		if e.SandboxDir != "" {
+			if _, statErr := os.Stat(e.SandboxDir); statErr == nil {
+				effectiveWorkingDir = e.SandboxDir
+			} else {
+				r.Logger.Warn("preserved sandbox missing, cloning fresh", "sandbox", e.SandboxDir, "project_dir", projectDir)
+				e.SandboxDir = ""
+				if projectDir != "" {
+					var err error
+					sandboxDir, err = setupSandbox(projectDir, r.Logger)
+					if err != nil {
+						return fmt.Errorf("setting up sandbox: %w", err)
+					}
+					effectiveWorkingDir = sandboxDir
+					r.Logger.Info("fresh sandbox created for resume", "sandbox", sandboxDir, "project_dir", projectDir)
+				}
+			}
+		}
+	} else if projectDir != "" {
+		var err error
+		sandboxDir, err = setupSandbox(projectDir, r.Logger)
+		if err != nil {
+			return fmt.Errorf("setting up sandbox: %w", err)
+		}
+		effectiveWorkingDir = sandboxDir
+		r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir)
+	}
+
+	if effectiveWorkingDir != "" {
+		headOut, _ := exec.Command("git", gitSafe("-C", effectiveWorkingDir, "rev-parse", "HEAD")...).Output()
+		startHEAD = strings.TrimSpace(string(headOut))
 	}
 
 	questionFile := filepath.Join(logDir, "question.json")
 	args := r.buildArgs(t, e, questionFile)
 
-	// Gemini CLI doesn't necessarily have the same rate limiting behavior as Claude,
-	// but we'll use a similar execution pattern.
-	err := r.execOnce(ctx, args, t.Agent.ProjectDir, t.Agent.ProjectDir, e)
-	if err != nil {
+	if err := r.execOnce(ctx, args, effectiveWorkingDir, projectDir, e); err != nil {
+		if sandboxDir != "" {
+			return fmt.Errorf("%w (sandbox preserved at %s)", err, sandboxDir)
+		}
 		return err
 	}
 
 	// Check whether the agent left a question before exiting.
 	data, readErr := os.ReadFile(questionFile)
 	if readErr == nil {
-		os.Remove(questionFile) // consumed
-		return &BlockedError{QuestionJSON: strings.TrimSpace(string(data)), SessionID: e.SessionID}
+		os.Remove(questionFile)
+		questionJSON := strings.TrimSpace(string(data))
+		if isCompletionReport(questionJSON) {
+			r.Logger.Info("treating question file as completion report", "taskID", e.TaskID)
+			e.Summary = extractQuestionText(questionJSON)
+		} else {
+			// Preserve sandbox on BLOCKED so a resume can pick up in the same dir.
+			return &BlockedError{QuestionJSON: questionJSON, SessionID: e.SessionID, SandboxDir: sandboxDir}
+		}
+	}
+
+	// Read agent summary if written.
+	summaryFile := filepath.Join(logDir, "summary.txt")
+	if summaryData, readErr := os.ReadFile(summaryFile); readErr == nil {
+		os.Remove(summaryFile)
+		e.Summary = strings.TrimSpace(string(summaryData))
+	}
+
+	// Merge sandbox back to project_dir and clean up.
+	if sandboxDir != "" {
+		if mergeErr := teardownSandbox(projectDir, sandboxDir, startHEAD, r.Logger, e); mergeErr != nil {
+			return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir)
+		}
 	}
 	return nil
 }
author	Claude <noreply@anthropic.com>	2026-05-12 21:03:30 +0000
committer	Claude <noreply@anthropic.com>	2026-05-12 21:03:30 +0000
commit	e7171181fff10c66b2b74eabfb1fc94b3cfbb4fb (patch)
tree	7c62bacc3c02ce5a910ebd176c9d62d10564a5e1 /internal/executor/gemini.go
parent	22ecff1fde5aa17d3053f43a8ac81f9ca49d8d56 (diff)