diff options
Diffstat (limited to 'internal/executor/gemini_test.go')
| -rw-r--r-- | internal/executor/gemini_test.go | 447 |
1 files changed, 447 insertions, 0 deletions
diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go new file mode 100644 index 0000000..cd11ebc --- /dev/null +++ b/internal/executor/gemini_test.go @@ -0,0 +1,447 @@ +package executor + +import ( + "bytes" + "context" + "errors" + "io" + "log/slog" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/thepeterstone/claudomator/internal/storage" + "github.com/thepeterstone/claudomator/internal/task" +) + +func TestGeminiRunner_BuildArgs_BasicTask(t *testing.T) { + r := &GeminiRunner{} + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "fix the bug", + Model: "gemini-2.5-flash-lite", + SkipPlanning: true, + }, + } + + args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json") + + // Gemini CLI: instructions passed via -p for non-interactive mode + if len(args) < 2 || args[0] != "-p" || args[1] != "fix the bug" { + t.Errorf("expected -p <instructions> as first args, got: %v", args) + } + + argMap := make(map[string]bool) + for _, a := range args { + argMap[a] = true + } + for _, want := range []string{"--output-format", "stream-json", "--model", "gemini-2.5-flash-lite"} { + if !argMap[want] { + t.Errorf("missing arg %q in %v", want, args) + } + } +} + +func TestGeminiRunner_BuildArgs_PreamblePrepended(t *testing.T) { + r := &GeminiRunner{} + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "fix the bug", + SkipPlanning: false, + }, + } + + args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json") + + if len(args) < 2 || args[0] != "-p" { + t.Fatalf("expected -p <instructions> as first args, got: %v", args) + } + if !strings.HasPrefix(args[1], planningPreamble) { + t.Errorf("instructions should start with planning preamble") + } + if !strings.HasSuffix(args[1], "fix the bug") { + t.Errorf("instructions should end with original instructions") + } +} + +func TestGeminiRunner_BuildArgs_IncludesYolo(t *testing.T) { + r := &GeminiRunner{} + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "write a doc", + SkipPlanning: true, + }, + } + args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json") + argMap := make(map[string]bool) + for _, a := range args { + argMap[a] = true + } + if !argMap["--yolo"] { + t.Errorf("expected --yolo in gemini args (enables all tools); got: %v", args) + } +} + +func TestGeminiRunner_BuildArgs_IncludesPromptFlag(t *testing.T) { + r := &GeminiRunner{} + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do the thing", + SkipPlanning: true, + }, + } + args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json") + // Instructions must be passed via -p/--prompt for non-interactive headless mode, + // not as a bare positional (which starts interactive mode). + found := false + for i, a := range args { + if (a == "-p" || a == "--prompt") && i+1 < len(args) && args[i+1] == "do the thing" { + found = true + break + } + } + if !found { + t.Errorf("expected instructions passed via -p/--prompt flag; got: %v", args) + } +} + +func TestGeminiRunner_Run_InaccessibleProjectDir_ReturnsError(t *testing.T) { + r := &GeminiRunner{ + BinaryPath: "true", // would succeed if it ran + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: t.TempDir(), + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + ProjectDir: "/nonexistent/path/does/not/exist", + SkipPlanning: true, + }, + } + exec := &storage.Execution{ID: "test-exec"} + + err := r.Run(context.Background(), tk, exec) + + if err == nil { + t.Fatal("expected error for inaccessible project_dir, got nil") + } + if !strings.Contains(err.Error(), "project_dir") { + t.Errorf("expected 'project_dir' in error, got: %v", err) + } +} + +func TestGeminiRunner_BinaryPath_Default(t *testing.T) { + r := &GeminiRunner{} + if r.binaryPath() != "gemini" { + t.Errorf("want 'gemini', got %q", r.binaryPath()) + } +} + +func TestGeminiRunner_BinaryPath_Custom(t *testing.T) { + r := &GeminiRunner{BinaryPath: "/usr/local/bin/gemini"} + if r.binaryPath() != "/usr/local/bin/gemini" { + t.Errorf("want custom path, got %q", r.binaryPath()) + } +} + + +func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) { + // Simulate a stream-json input with various message types, including a result with error and cost. + input := streamLine(`{"type":"content_block_start","content_block":{"text":"Hello,"}}`) + + streamLine(`{"type":"content_block_delta","content_block":{"text":" World!"}}`) + + streamLine(`{"type":"content_block_end"}`) + + streamLine(`{"type":"result","subtype":"error_during_execution","is_error":true,"result":"something went wrong","total_cost_usd":0.123}`) + + reader := strings.NewReader(input) + var writer bytes.Buffer // To capture what's written to the output log + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + + cost, err := parseGeminiStream(reader, &writer, logger) + + if err == nil { + t.Errorf("expected an error, got nil") + } + if !strings.Contains(err.Error(), "something went wrong") { + t.Errorf("expected error message to contain 'something went wrong', got: %v", err) + } + + if cost != 0.123 { + t.Errorf("expected cost 0.123, got %f", cost) + } + + // Verify that the writer received the content (even if parseGeminiStream isn't fully parsing it yet) + expectedWriterContent := input + if writer.String() != expectedWriterContent { + t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String()) + } +} + +// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir +// is set, the gemini subprocess runs inside a sandbox clone — not in +// project_dir itself. +func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) { + projectDir := t.TempDir() + initGitRepo(t, projectDir) + + logDir := t.TempDir() + cwdFile := filepath.Join(logDir, "gemini-cwd.txt") + + // Fake gemini binary that records its $PWD then exits 0. + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do work", + ProjectDir: projectDir, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"} + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + cwd := string(got) + if cwd == projectDir { + t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd) + } + // Sandbox should be removed after successful teardown (no edits → nothing to push). + // We can't assert the exact dir, but it should not be projectDir. +} + +// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the +// agent writes a question file before exiting, the BlockedError carries the +// sandbox path so resume runs in the same dir. +func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) { + src := t.TempDir() + initGitRepo(t, src) + logDir := t.TempDir() + + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh +if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then + printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE" +fi +`), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do something", + ProjectDir: src, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"} + + err := r.Run(context.Background(), tk, e) + + var blocked *BlockedError + if !errors.As(err, &blocked) { + t.Fatalf("expected BlockedError, got: %v", err) + } + if blocked.SandboxDir == "" { + t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox") + } + if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) { + t.Error("sandbox directory should be preserved when blocked") + } else { + os.RemoveAll(blocked.SandboxDir) + } +} + +// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini +// exits non-zero, the sandbox path is included in the wrapped error so the +// user can inspect partial work. +func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) { + src := t.TempDir() + initGitRepo(t, src) + logDir := t.TempDir() + + // "false" exits 1, no output. + r := &GeminiRunner{ + BinaryPath: "false", + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do something", + ProjectDir: src, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"} + + err := r.Run(context.Background(), tk, e) + if err == nil { + t.Fatal("expected error from failing gemini exit") + } + if !strings.Contains(err.Error(), "sandbox preserved at ") { + t.Errorf("expected error to include sandbox path; got: %v", err) + } + // Extract path and verify it exists. + idx := strings.Index(err.Error(), "sandbox preserved at ") + rest := err.Error()[idx+len("sandbox preserved at "):] + rest = strings.TrimSuffix(rest, ")") + rest = strings.TrimSpace(rest) + if _, statErr := os.Stat(rest); os.IsNotExist(statErr) { + t.Errorf("sandbox path from error should exist on disk: %q", rest) + } else { + os.RemoveAll(rest) + } +} + +// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume +// execution runs in the preserved SandboxDir rather than cloning fresh. +func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) { + logDir := t.TempDir() + sandboxDir := t.TempDir() + initGitRepo(t, sandboxDir) + cwdFile := filepath.Join(logDir, "cwd.txt") + + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + SkipPlanning: true, + }, + } + e := &storage.Execution{ + ID: "resume-gemini-1", + TaskID: "task-resume", + ResumeSessionID: "session-abc", + SandboxDir: sandboxDir, + } + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run with preserved sandbox: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + if string(got) != sandboxDir { + t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir) + } +} + +// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume +// pointing at a missing sandbox falls back to cloning a fresh sandbox from +// project_dir rather than failing outright. +func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) { + logDir := t.TempDir() + projectDir := t.TempDir() + initGitRepo(t, projectDir) + + cwdFile := filepath.Join(logDir, "cwd.txt") + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + ProjectDir: projectDir, + SkipPlanning: true, + }, + } + staleSandbox := filepath.Join(t.TempDir(), "gone") + e := &storage.Execution{ + ID: "resume-gemini-2", + TaskID: "task-stale", + ResumeSessionID: "session-xyz", + SandboxDir: staleSandbox, + } + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run with stale sandbox: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + cwd := string(got) + if cwd == staleSandbox { + t.Error("ran in stale (nonexistent) sandbox dir") + } + if cwd == projectDir { + t.Error("ran directly in project_dir; expected a fresh sandbox clone") + } +} + +// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no +// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding +// task semantics). +func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) { + logDir := t.TempDir() + + r := &GeminiRunner{ + BinaryPath: "true", // exits 0, no output + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "summarize: 2+2", + SkipPlanning: true, + // No ProjectDir + }, + } + e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"} + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run without project_dir: %v", err) + } + if e.SandboxDir != "" { + t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir) + } +} |
