summaryrefslogtreecommitdiff
path: root/internal/executor/gemini_test.go
diff options
context:
space:
mode:
authorPeter Stone <thepeterstone@gmail.com>2026-05-12 11:08:46 -1000
committerGitHub <noreply@github.com>2026-05-12 11:08:46 -1000
commitd970c0730ff0dc7d714d3261197d8ba52b5d21f4 (patch)
tree7c62bacc3c02ce5a910ebd176c9d62d10564a5e1 /internal/executor/gemini_test.go
parent22ecff1fde5aa17d3053f43a8ac81f9ca49d8d56 (diff)
parente7171181fff10c66b2b74eabfb1fc94b3cfbb4fb (diff)
Merge pull request #4 from thepeterstone/claude/gemini-sandbox
Gemini runner: full sandbox-flow parity with Claude
Diffstat (limited to 'internal/executor/gemini_test.go')
-rw-r--r--internal/executor/gemini_test.go268
1 files changed, 268 insertions, 0 deletions
diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go
index 4b0339e..cd11ebc 100644
--- a/internal/executor/gemini_test.go
+++ b/internal/executor/gemini_test.go
@@ -3,8 +3,11 @@ package executor
import (
"bytes"
"context"
+ "errors"
"io"
"log/slog"
+ "os"
+ "path/filepath"
"strings"
"testing"
@@ -177,3 +180,268 @@ func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) {
t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String())
}
}
+
+// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir
+// is set, the gemini subprocess runs inside a sandbox clone — not in
+// project_dir itself.
+func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) {
+ projectDir := t.TempDir()
+ initGitRepo(t, projectDir)
+
+ logDir := t.TempDir()
+ cwdFile := filepath.Join(logDir, "gemini-cwd.txt")
+
+ // Fake gemini binary that records its $PWD then exits 0.
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do work",
+ ProjectDir: projectDir,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"}
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ cwd := string(got)
+ if cwd == projectDir {
+ t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd)
+ }
+ // Sandbox should be removed after successful teardown (no edits → nothing to push).
+ // We can't assert the exact dir, but it should not be projectDir.
+}
+
+// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the
+// agent writes a question file before exiting, the BlockedError carries the
+// sandbox path so resume runs in the same dir.
+func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) {
+ src := t.TempDir()
+ initGitRepo(t, src)
+ logDir := t.TempDir()
+
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh
+if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then
+ printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE"
+fi
+`), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do something",
+ ProjectDir: src,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"}
+
+ err := r.Run(context.Background(), tk, e)
+
+ var blocked *BlockedError
+ if !errors.As(err, &blocked) {
+ t.Fatalf("expected BlockedError, got: %v", err)
+ }
+ if blocked.SandboxDir == "" {
+ t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox")
+ }
+ if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) {
+ t.Error("sandbox directory should be preserved when blocked")
+ } else {
+ os.RemoveAll(blocked.SandboxDir)
+ }
+}
+
+// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini
+// exits non-zero, the sandbox path is included in the wrapped error so the
+// user can inspect partial work.
+func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) {
+ src := t.TempDir()
+ initGitRepo(t, src)
+ logDir := t.TempDir()
+
+ // "false" exits 1, no output.
+ r := &GeminiRunner{
+ BinaryPath: "false",
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do something",
+ ProjectDir: src,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"}
+
+ err := r.Run(context.Background(), tk, e)
+ if err == nil {
+ t.Fatal("expected error from failing gemini exit")
+ }
+ if !strings.Contains(err.Error(), "sandbox preserved at ") {
+ t.Errorf("expected error to include sandbox path; got: %v", err)
+ }
+ // Extract path and verify it exists.
+ idx := strings.Index(err.Error(), "sandbox preserved at ")
+ rest := err.Error()[idx+len("sandbox preserved at "):]
+ rest = strings.TrimSuffix(rest, ")")
+ rest = strings.TrimSpace(rest)
+ if _, statErr := os.Stat(rest); os.IsNotExist(statErr) {
+ t.Errorf("sandbox path from error should exist on disk: %q", rest)
+ } else {
+ os.RemoveAll(rest)
+ }
+}
+
+// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume
+// execution runs in the preserved SandboxDir rather than cloning fresh.
+func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) {
+ logDir := t.TempDir()
+ sandboxDir := t.TempDir()
+ initGitRepo(t, sandboxDir)
+ cwdFile := filepath.Join(logDir, "cwd.txt")
+
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{
+ ID: "resume-gemini-1",
+ TaskID: "task-resume",
+ ResumeSessionID: "session-abc",
+ SandboxDir: sandboxDir,
+ }
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run with preserved sandbox: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ if string(got) != sandboxDir {
+ t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir)
+ }
+}
+
+// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume
+// pointing at a missing sandbox falls back to cloning a fresh sandbox from
+// project_dir rather than failing outright.
+func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) {
+ logDir := t.TempDir()
+ projectDir := t.TempDir()
+ initGitRepo(t, projectDir)
+
+ cwdFile := filepath.Join(logDir, "cwd.txt")
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ ProjectDir: projectDir,
+ SkipPlanning: true,
+ },
+ }
+ staleSandbox := filepath.Join(t.TempDir(), "gone")
+ e := &storage.Execution{
+ ID: "resume-gemini-2",
+ TaskID: "task-stale",
+ ResumeSessionID: "session-xyz",
+ SandboxDir: staleSandbox,
+ }
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run with stale sandbox: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ cwd := string(got)
+ if cwd == staleSandbox {
+ t.Error("ran in stale (nonexistent) sandbox dir")
+ }
+ if cwd == projectDir {
+ t.Error("ran directly in project_dir; expected a fresh sandbox clone")
+ }
+}
+
+// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no
+// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding
+// task semantics).
+func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) {
+ logDir := t.TempDir()
+
+ r := &GeminiRunner{
+ BinaryPath: "true", // exits 0, no output
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "summarize: 2+2",
+ SkipPlanning: true,
+ // No ProjectDir
+ },
+ }
+ e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"}
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run without project_dir: %v", err)
+ }
+ if e.SandboxDir != "" {
+ t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir)
+ }
+}