summaryrefslogtreecommitdiff
path: root/internal/executor/gemini_test.go
diff options
context:
space:
mode:
authorClaude <noreply@anthropic.com>2026-05-12 21:03:30 +0000
committerClaude <noreply@anthropic.com>2026-05-12 21:03:30 +0000
commite7171181fff10c66b2b74eabfb1fc94b3cfbb4fb (patch)
tree7c62bacc3c02ce5a910ebd176c9d62d10564a5e1 /internal/executor/gemini_test.go
parent22ecff1fde5aa17d3053f43a8ac81f9ca49d8d56 (diff)
feat(executor): bring GeminiRunner to sandbox-flow parity with Claude
All coding tasks now follow the same flow regardless of runner: when project_dir is set, the agent runs in a temp clone, not in the user's working tree. On success, edits are autocommitted and pushed back to origin/master and the sandbox is removed. On failure or BLOCKED, the sandbox is preserved and its path surfaces in the error / BlockedError so the user can inspect partial work or resume in place. Before this commit, GeminiRunner.Run set cmd.Dir to project_dir directly, so an agent run could leave half-done edits in the user's working tree with no rollback. ClaudeRunner has had the full sandbox flow for a while; this commit closes the gap. Reused the existing package-level helpers from claude.go verbatim: setupSandbox, teardownSandbox, sandboxCloneSource, gitSafe, plus the resume/stale-sandbox/blocked-error patterns. No new shared abstraction needed — same package. LocalRunner intentionally not changed. The OpenAI chat path has no tool use, so the agent can't edit files; sandbox would be theater. Tests (6 new): - Run_ProjectDir_RunsInSandbox: cwd captured by fake binary is a sandbox path, not project_dir. - Run_BlockedError_IncludesSandboxDir: when question.json appears, BlockedError.SandboxDir is set and the dir exists. - Run_ExecError_PreservesSandbox: failing exit wraps error with "(sandbox preserved at <path>)" and the path exists on disk. - Run_ResumeUsesStoredSandboxDir: ResumeSessionID + SandboxDir → runs in that dir without re-cloning. - Run_StaleSandboxDir_ClonesAfresh: resume pointing at missing dir falls back to a fresh clone from project_dir. - Run_NoProjectDir_SkipsSandbox: tasks without project_dir don't trigger sandbox setup. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
Diffstat (limited to 'internal/executor/gemini_test.go')
-rw-r--r--internal/executor/gemini_test.go268
1 files changed, 268 insertions, 0 deletions
diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go
index 4b0339e..cd11ebc 100644
--- a/internal/executor/gemini_test.go
+++ b/internal/executor/gemini_test.go
@@ -3,8 +3,11 @@ package executor
import (
"bytes"
"context"
+ "errors"
"io"
"log/slog"
+ "os"
+ "path/filepath"
"strings"
"testing"
@@ -177,3 +180,268 @@ func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) {
t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String())
}
}
+
+// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir
+// is set, the gemini subprocess runs inside a sandbox clone — not in
+// project_dir itself.
+func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) {
+ projectDir := t.TempDir()
+ initGitRepo(t, projectDir)
+
+ logDir := t.TempDir()
+ cwdFile := filepath.Join(logDir, "gemini-cwd.txt")
+
+ // Fake gemini binary that records its $PWD then exits 0.
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do work",
+ ProjectDir: projectDir,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"}
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ cwd := string(got)
+ if cwd == projectDir {
+ t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd)
+ }
+ // Sandbox should be removed after successful teardown (no edits → nothing to push).
+ // We can't assert the exact dir, but it should not be projectDir.
+}
+
+// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the
+// agent writes a question file before exiting, the BlockedError carries the
+// sandbox path so resume runs in the same dir.
+func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) {
+ src := t.TempDir()
+ initGitRepo(t, src)
+ logDir := t.TempDir()
+
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh
+if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then
+ printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE"
+fi
+`), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do something",
+ ProjectDir: src,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"}
+
+ err := r.Run(context.Background(), tk, e)
+
+ var blocked *BlockedError
+ if !errors.As(err, &blocked) {
+ t.Fatalf("expected BlockedError, got: %v", err)
+ }
+ if blocked.SandboxDir == "" {
+ t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox")
+ }
+ if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) {
+ t.Error("sandbox directory should be preserved when blocked")
+ } else {
+ os.RemoveAll(blocked.SandboxDir)
+ }
+}
+
+// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini
+// exits non-zero, the sandbox path is included in the wrapped error so the
+// user can inspect partial work.
+func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) {
+ src := t.TempDir()
+ initGitRepo(t, src)
+ logDir := t.TempDir()
+
+ // "false" exits 1, no output.
+ r := &GeminiRunner{
+ BinaryPath: "false",
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do something",
+ ProjectDir: src,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"}
+
+ err := r.Run(context.Background(), tk, e)
+ if err == nil {
+ t.Fatal("expected error from failing gemini exit")
+ }
+ if !strings.Contains(err.Error(), "sandbox preserved at ") {
+ t.Errorf("expected error to include sandbox path; got: %v", err)
+ }
+ // Extract path and verify it exists.
+ idx := strings.Index(err.Error(), "sandbox preserved at ")
+ rest := err.Error()[idx+len("sandbox preserved at "):]
+ rest = strings.TrimSuffix(rest, ")")
+ rest = strings.TrimSpace(rest)
+ if _, statErr := os.Stat(rest); os.IsNotExist(statErr) {
+ t.Errorf("sandbox path from error should exist on disk: %q", rest)
+ } else {
+ os.RemoveAll(rest)
+ }
+}
+
+// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume
+// execution runs in the preserved SandboxDir rather than cloning fresh.
+func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) {
+ logDir := t.TempDir()
+ sandboxDir := t.TempDir()
+ initGitRepo(t, sandboxDir)
+ cwdFile := filepath.Join(logDir, "cwd.txt")
+
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{
+ ID: "resume-gemini-1",
+ TaskID: "task-resume",
+ ResumeSessionID: "session-abc",
+ SandboxDir: sandboxDir,
+ }
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run with preserved sandbox: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ if string(got) != sandboxDir {
+ t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir)
+ }
+}
+
+// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume
+// pointing at a missing sandbox falls back to cloning a fresh sandbox from
+// project_dir rather than failing outright.
+func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) {
+ logDir := t.TempDir()
+ projectDir := t.TempDir()
+ initGitRepo(t, projectDir)
+
+ cwdFile := filepath.Join(logDir, "cwd.txt")
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ ProjectDir: projectDir,
+ SkipPlanning: true,
+ },
+ }
+ staleSandbox := filepath.Join(t.TempDir(), "gone")
+ e := &storage.Execution{
+ ID: "resume-gemini-2",
+ TaskID: "task-stale",
+ ResumeSessionID: "session-xyz",
+ SandboxDir: staleSandbox,
+ }
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run with stale sandbox: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ cwd := string(got)
+ if cwd == staleSandbox {
+ t.Error("ran in stale (nonexistent) sandbox dir")
+ }
+ if cwd == projectDir {
+ t.Error("ran directly in project_dir; expected a fresh sandbox clone")
+ }
+}
+
+// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no
+// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding
+// task semantics).
+func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) {
+ logDir := t.TempDir()
+
+ r := &GeminiRunner{
+ BinaryPath: "true", // exits 0, no output
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "summarize: 2+2",
+ SkipPlanning: true,
+ // No ProjectDir
+ },
+ }
+ e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"}
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run without project_dir: %v", err)
+ }
+ if e.SandboxDir != "" {
+ t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir)
+ }
+}