diff options
| author | Claude <noreply@anthropic.com> | 2026-05-12 21:03:30 +0000 |
|---|---|---|
| committer | Claude <noreply@anthropic.com> | 2026-05-12 21:03:30 +0000 |
| commit | e7171181fff10c66b2b74eabfb1fc94b3cfbb4fb (patch) | |
| tree | 7c62bacc3c02ce5a910ebd176c9d62d10564a5e1 /internal/executor | |
| parent | 22ecff1fde5aa17d3053f43a8ac81f9ca49d8d56 (diff) | |
feat(executor): bring GeminiRunner to sandbox-flow parity with Claude
All coding tasks now follow the same flow regardless of runner: when
project_dir is set, the agent runs in a temp clone, not in the user's
working tree. On success, edits are autocommitted and pushed back to
origin/master and the sandbox is removed. On failure or BLOCKED, the
sandbox is preserved and its path surfaces in the error / BlockedError
so the user can inspect partial work or resume in place.
Before this commit, GeminiRunner.Run set cmd.Dir to project_dir
directly, so an agent run could leave half-done edits in the user's
working tree with no rollback. ClaudeRunner has had the full sandbox
flow for a while; this commit closes the gap.
Reused the existing package-level helpers from claude.go verbatim:
setupSandbox, teardownSandbox, sandboxCloneSource, gitSafe, plus the
resume/stale-sandbox/blocked-error patterns. No new shared abstraction
needed — same package.
LocalRunner intentionally not changed. The OpenAI chat path has no
tool use, so the agent can't edit files; sandbox would be theater.
Tests (6 new):
- Run_ProjectDir_RunsInSandbox: cwd captured by fake binary is a
sandbox path, not project_dir.
- Run_BlockedError_IncludesSandboxDir: when question.json appears,
BlockedError.SandboxDir is set and the dir exists.
- Run_ExecError_PreservesSandbox: failing exit wraps error with
"(sandbox preserved at <path>)" and the path exists on disk.
- Run_ResumeUsesStoredSandboxDir: ResumeSessionID + SandboxDir →
runs in that dir without re-cloning.
- Run_StaleSandboxDir_ClonesAfresh: resume pointing at missing
dir falls back to a fresh clone from project_dir.
- Run_NoProjectDir_SkipsSandbox: tasks without project_dir don't
trigger sandbox setup.
https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
Diffstat (limited to 'internal/executor')
| -rw-r--r-- | internal/executor/gemini.go | 96 | ||||
| -rw-r--r-- | internal/executor/gemini_test.go | 268 |
2 files changed, 353 insertions, 11 deletions
diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go index 04382ae..3abec05 100644 --- a/internal/executor/gemini.go +++ b/internal/executor/gemini.go @@ -40,11 +40,21 @@ func (r *GeminiRunner) binaryPath() string { return "gemini" } -// Run executes a gemini <instructions> invocation, streaming output to log files. +// Run executes the gemini CLI inside a sandboxed clone of project_dir. +// When project_dir is set, claudomator first clones it into a temp sandbox +// (preferring a `local` bare remote, then `origin`, then the working tree) +// and runs the agent there. On success the sandbox is autocommitted and +// pushed back to origin/master, then removed. On failure the sandbox is +// preserved and its path is included in the returned error so the user can +// inspect partial work. If the agent writes a question file before exiting, +// Run returns *BlockedError with SandboxDir populated so a resume execution +// can pick up in the same directory. func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error { - if t.Agent.ProjectDir != "" { - if _, err := os.Stat(t.Agent.ProjectDir); err != nil { - return fmt.Errorf("project_dir %q: %w", t.Agent.ProjectDir, err) + projectDir := t.Agent.ProjectDir + + if projectDir != "" { + if _, err := os.Stat(projectDir); err != nil { + return fmt.Errorf("project_dir %q: %w", projectDir, err) } } @@ -63,24 +73,88 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi } if e.SessionID == "" { - e.SessionID = e.ID + if e.ResumeSessionID != "" { + e.SessionID = e.ResumeSessionID + } else { + e.SessionID = e.ID + } + } + + // Sandbox setup: for new executions with a project_dir, clone into a sandbox. + // Resume executions reuse the preserved sandbox so any partial work survives. + // If the preserved sandbox is missing (e.g. /tmp was purged), clone fresh. + var sandboxDir string + var startHEAD string + effectiveWorkingDir := projectDir + if e.ResumeSessionID != "" { + if e.SandboxDir != "" { + if _, statErr := os.Stat(e.SandboxDir); statErr == nil { + effectiveWorkingDir = e.SandboxDir + } else { + r.Logger.Warn("preserved sandbox missing, cloning fresh", "sandbox", e.SandboxDir, "project_dir", projectDir) + e.SandboxDir = "" + if projectDir != "" { + var err error + sandboxDir, err = setupSandbox(projectDir, r.Logger) + if err != nil { + return fmt.Errorf("setting up sandbox: %w", err) + } + effectiveWorkingDir = sandboxDir + r.Logger.Info("fresh sandbox created for resume", "sandbox", sandboxDir, "project_dir", projectDir) + } + } + } + } else if projectDir != "" { + var err error + sandboxDir, err = setupSandbox(projectDir, r.Logger) + if err != nil { + return fmt.Errorf("setting up sandbox: %w", err) + } + effectiveWorkingDir = sandboxDir + r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir) + } + + if effectiveWorkingDir != "" { + headOut, _ := exec.Command("git", gitSafe("-C", effectiveWorkingDir, "rev-parse", "HEAD")...).Output() + startHEAD = strings.TrimSpace(string(headOut)) } questionFile := filepath.Join(logDir, "question.json") args := r.buildArgs(t, e, questionFile) - // Gemini CLI doesn't necessarily have the same rate limiting behavior as Claude, - // but we'll use a similar execution pattern. - err := r.execOnce(ctx, args, t.Agent.ProjectDir, t.Agent.ProjectDir, e) - if err != nil { + if err := r.execOnce(ctx, args, effectiveWorkingDir, projectDir, e); err != nil { + if sandboxDir != "" { + return fmt.Errorf("%w (sandbox preserved at %s)", err, sandboxDir) + } return err } // Check whether the agent left a question before exiting. data, readErr := os.ReadFile(questionFile) if readErr == nil { - os.Remove(questionFile) // consumed - return &BlockedError{QuestionJSON: strings.TrimSpace(string(data)), SessionID: e.SessionID} + os.Remove(questionFile) + questionJSON := strings.TrimSpace(string(data)) + if isCompletionReport(questionJSON) { + r.Logger.Info("treating question file as completion report", "taskID", e.TaskID) + e.Summary = extractQuestionText(questionJSON) + } else { + // Preserve sandbox on BLOCKED so a resume can pick up in the same dir. + return &BlockedError{QuestionJSON: questionJSON, SessionID: e.SessionID, SandboxDir: sandboxDir} + } + } + + // Read agent summary if written. + summaryFile := filepath.Join(logDir, "summary.txt") + if summaryData, readErr := os.ReadFile(summaryFile); readErr == nil { + os.Remove(summaryFile) + e.Summary = strings.TrimSpace(string(summaryData)) + } + + // Merge sandbox back to project_dir and clean up. + if sandboxDir != "" { + if mergeErr := teardownSandbox(projectDir, sandboxDir, startHEAD, r.Logger, e); mergeErr != nil { + return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir) + } } return nil } diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go index 4b0339e..cd11ebc 100644 --- a/internal/executor/gemini_test.go +++ b/internal/executor/gemini_test.go @@ -3,8 +3,11 @@ package executor import ( "bytes" "context" + "errors" "io" "log/slog" + "os" + "path/filepath" "strings" "testing" @@ -177,3 +180,268 @@ func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) { t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String()) } } + +// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir +// is set, the gemini subprocess runs inside a sandbox clone — not in +// project_dir itself. +func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) { + projectDir := t.TempDir() + initGitRepo(t, projectDir) + + logDir := t.TempDir() + cwdFile := filepath.Join(logDir, "gemini-cwd.txt") + + // Fake gemini binary that records its $PWD then exits 0. + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do work", + ProjectDir: projectDir, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"} + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + cwd := string(got) + if cwd == projectDir { + t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd) + } + // Sandbox should be removed after successful teardown (no edits → nothing to push). + // We can't assert the exact dir, but it should not be projectDir. +} + +// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the +// agent writes a question file before exiting, the BlockedError carries the +// sandbox path so resume runs in the same dir. +func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) { + src := t.TempDir() + initGitRepo(t, src) + logDir := t.TempDir() + + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh +if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then + printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE" +fi +`), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do something", + ProjectDir: src, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"} + + err := r.Run(context.Background(), tk, e) + + var blocked *BlockedError + if !errors.As(err, &blocked) { + t.Fatalf("expected BlockedError, got: %v", err) + } + if blocked.SandboxDir == "" { + t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox") + } + if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) { + t.Error("sandbox directory should be preserved when blocked") + } else { + os.RemoveAll(blocked.SandboxDir) + } +} + +// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini +// exits non-zero, the sandbox path is included in the wrapped error so the +// user can inspect partial work. +func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) { + src := t.TempDir() + initGitRepo(t, src) + logDir := t.TempDir() + + // "false" exits 1, no output. + r := &GeminiRunner{ + BinaryPath: "false", + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do something", + ProjectDir: src, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"} + + err := r.Run(context.Background(), tk, e) + if err == nil { + t.Fatal("expected error from failing gemini exit") + } + if !strings.Contains(err.Error(), "sandbox preserved at ") { + t.Errorf("expected error to include sandbox path; got: %v", err) + } + // Extract path and verify it exists. + idx := strings.Index(err.Error(), "sandbox preserved at ") + rest := err.Error()[idx+len("sandbox preserved at "):] + rest = strings.TrimSuffix(rest, ")") + rest = strings.TrimSpace(rest) + if _, statErr := os.Stat(rest); os.IsNotExist(statErr) { + t.Errorf("sandbox path from error should exist on disk: %q", rest) + } else { + os.RemoveAll(rest) + } +} + +// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume +// execution runs in the preserved SandboxDir rather than cloning fresh. +func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) { + logDir := t.TempDir() + sandboxDir := t.TempDir() + initGitRepo(t, sandboxDir) + cwdFile := filepath.Join(logDir, "cwd.txt") + + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + SkipPlanning: true, + }, + } + e := &storage.Execution{ + ID: "resume-gemini-1", + TaskID: "task-resume", + ResumeSessionID: "session-abc", + SandboxDir: sandboxDir, + } + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run with preserved sandbox: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + if string(got) != sandboxDir { + t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir) + } +} + +// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume +// pointing at a missing sandbox falls back to cloning a fresh sandbox from +// project_dir rather than failing outright. +func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) { + logDir := t.TempDir() + projectDir := t.TempDir() + initGitRepo(t, projectDir) + + cwdFile := filepath.Join(logDir, "cwd.txt") + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + ProjectDir: projectDir, + SkipPlanning: true, + }, + } + staleSandbox := filepath.Join(t.TempDir(), "gone") + e := &storage.Execution{ + ID: "resume-gemini-2", + TaskID: "task-stale", + ResumeSessionID: "session-xyz", + SandboxDir: staleSandbox, + } + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run with stale sandbox: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + cwd := string(got) + if cwd == staleSandbox { + t.Error("ran in stale (nonexistent) sandbox dir") + } + if cwd == projectDir { + t.Error("ran directly in project_dir; expected a fresh sandbox clone") + } +} + +// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no +// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding +// task semantics). +func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) { + logDir := t.TempDir() + + r := &GeminiRunner{ + BinaryPath: "true", // exits 0, no output + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "summarize: 2+2", + SkipPlanning: true, + // No ProjectDir + }, + } + e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"} + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run without project_dir: %v", err) + } + if e.SandboxDir != "" { + t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir) + } +} |
