summaryrefslogtreecommitdiff
path: root/internal/executor/gemini_test.go
diff options
context:
space:
mode:
authorPeter Stone <thepeterstone@gmail.com>2026-05-13 04:02:20 +0000
committerPeter Stone <thepeterstone@gmail.com>2026-05-13 04:02:20 +0000
commit68399a598924775a3ec22a39c2336ae497fb07f3 (patch)
tree29ade8224eb51eca47a1d9d03bb4d0d3653a72aa /internal/executor/gemini_test.go
parentf01231cc45f41ce2dc37072e77428e467ef3fc15 (diff)
parentd970c0730ff0dc7d714d3261197d8ba52b5d21f4 (diff)
merge: integrate github/main — LocalRunner, real GeminiRunner, llm clientHEADmain
Merges 12 commits from github/main (formerly master) that were developed independently. Key additions: - LocalRunner: OpenAI-compatible local LLM execution (Ollama, LM Studio) - Real GeminiRunner with full sandbox parity to ClaudeRunner - llm.Client for enriching CI failures and elaboration via local model - retry.ParseRetryAfter moved to shared package - tokens_in/tokens_out columns in executions table Conflict resolutions: - Kept local main's VAPID/push, stories, projects, agent events schema - Merged both sets of Config fields (local + LocalModel from github/main) - Unified activePerAgent accounting (decActiveAgent helper) - Removed duplicate helpers from claude.go (now in helpers.go) - Fixed double-decrement bug in handleRunResult vs decActiveAgent Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/executor/gemini_test.go')
-rw-r--r--internal/executor/gemini_test.go447
1 files changed, 447 insertions, 0 deletions
diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go
new file mode 100644
index 0000000..cd11ebc
--- /dev/null
+++ b/internal/executor/gemini_test.go
@@ -0,0 +1,447 @@
+package executor
+
+import (
+ "bytes"
+ "context"
+ "errors"
+ "io"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/thepeterstone/claudomator/internal/storage"
+ "github.com/thepeterstone/claudomator/internal/task"
+)
+
+func TestGeminiRunner_BuildArgs_BasicTask(t *testing.T) {
+ r := &GeminiRunner{}
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "fix the bug",
+ Model: "gemini-2.5-flash-lite",
+ SkipPlanning: true,
+ },
+ }
+
+ args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+ // Gemini CLI: instructions passed via -p for non-interactive mode
+ if len(args) < 2 || args[0] != "-p" || args[1] != "fix the bug" {
+ t.Errorf("expected -p <instructions> as first args, got: %v", args)
+ }
+
+ argMap := make(map[string]bool)
+ for _, a := range args {
+ argMap[a] = true
+ }
+ for _, want := range []string{"--output-format", "stream-json", "--model", "gemini-2.5-flash-lite"} {
+ if !argMap[want] {
+ t.Errorf("missing arg %q in %v", want, args)
+ }
+ }
+}
+
+func TestGeminiRunner_BuildArgs_PreamblePrepended(t *testing.T) {
+ r := &GeminiRunner{}
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "fix the bug",
+ SkipPlanning: false,
+ },
+ }
+
+ args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+ if len(args) < 2 || args[0] != "-p" {
+ t.Fatalf("expected -p <instructions> as first args, got: %v", args)
+ }
+ if !strings.HasPrefix(args[1], planningPreamble) {
+ t.Errorf("instructions should start with planning preamble")
+ }
+ if !strings.HasSuffix(args[1], "fix the bug") {
+ t.Errorf("instructions should end with original instructions")
+ }
+}
+
+func TestGeminiRunner_BuildArgs_IncludesYolo(t *testing.T) {
+ r := &GeminiRunner{}
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "write a doc",
+ SkipPlanning: true,
+ },
+ }
+ args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+ argMap := make(map[string]bool)
+ for _, a := range args {
+ argMap[a] = true
+ }
+ if !argMap["--yolo"] {
+ t.Errorf("expected --yolo in gemini args (enables all tools); got: %v", args)
+ }
+}
+
+func TestGeminiRunner_BuildArgs_IncludesPromptFlag(t *testing.T) {
+ r := &GeminiRunner{}
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do the thing",
+ SkipPlanning: true,
+ },
+ }
+ args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+ // Instructions must be passed via -p/--prompt for non-interactive headless mode,
+ // not as a bare positional (which starts interactive mode).
+ found := false
+ for i, a := range args {
+ if (a == "-p" || a == "--prompt") && i+1 < len(args) && args[i+1] == "do the thing" {
+ found = true
+ break
+ }
+ }
+ if !found {
+ t.Errorf("expected instructions passed via -p/--prompt flag; got: %v", args)
+ }
+}
+
+func TestGeminiRunner_Run_InaccessibleProjectDir_ReturnsError(t *testing.T) {
+ r := &GeminiRunner{
+ BinaryPath: "true", // would succeed if it ran
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: t.TempDir(),
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ ProjectDir: "/nonexistent/path/does/not/exist",
+ SkipPlanning: true,
+ },
+ }
+ exec := &storage.Execution{ID: "test-exec"}
+
+ err := r.Run(context.Background(), tk, exec)
+
+ if err == nil {
+ t.Fatal("expected error for inaccessible project_dir, got nil")
+ }
+ if !strings.Contains(err.Error(), "project_dir") {
+ t.Errorf("expected 'project_dir' in error, got: %v", err)
+ }
+}
+
+func TestGeminiRunner_BinaryPath_Default(t *testing.T) {
+ r := &GeminiRunner{}
+ if r.binaryPath() != "gemini" {
+ t.Errorf("want 'gemini', got %q", r.binaryPath())
+ }
+}
+
+func TestGeminiRunner_BinaryPath_Custom(t *testing.T) {
+ r := &GeminiRunner{BinaryPath: "/usr/local/bin/gemini"}
+ if r.binaryPath() != "/usr/local/bin/gemini" {
+ t.Errorf("want custom path, got %q", r.binaryPath())
+ }
+}
+
+
+func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) {
+ // Simulate a stream-json input with various message types, including a result with error and cost.
+ input := streamLine(`{"type":"content_block_start","content_block":{"text":"Hello,"}}`) +
+ streamLine(`{"type":"content_block_delta","content_block":{"text":" World!"}}`) +
+ streamLine(`{"type":"content_block_end"}`) +
+ streamLine(`{"type":"result","subtype":"error_during_execution","is_error":true,"result":"something went wrong","total_cost_usd":0.123}`)
+
+ reader := strings.NewReader(input)
+ var writer bytes.Buffer // To capture what's written to the output log
+ logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+
+ cost, err := parseGeminiStream(reader, &writer, logger)
+
+ if err == nil {
+ t.Errorf("expected an error, got nil")
+ }
+ if !strings.Contains(err.Error(), "something went wrong") {
+ t.Errorf("expected error message to contain 'something went wrong', got: %v", err)
+ }
+
+ if cost != 0.123 {
+ t.Errorf("expected cost 0.123, got %f", cost)
+ }
+
+ // Verify that the writer received the content (even if parseGeminiStream isn't fully parsing it yet)
+ expectedWriterContent := input
+ if writer.String() != expectedWriterContent {
+ t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String())
+ }
+}
+
+// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir
+// is set, the gemini subprocess runs inside a sandbox clone — not in
+// project_dir itself.
+func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) {
+ projectDir := t.TempDir()
+ initGitRepo(t, projectDir)
+
+ logDir := t.TempDir()
+ cwdFile := filepath.Join(logDir, "gemini-cwd.txt")
+
+ // Fake gemini binary that records its $PWD then exits 0.
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do work",
+ ProjectDir: projectDir,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"}
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ cwd := string(got)
+ if cwd == projectDir {
+ t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd)
+ }
+ // Sandbox should be removed after successful teardown (no edits → nothing to push).
+ // We can't assert the exact dir, but it should not be projectDir.
+}
+
+// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the
+// agent writes a question file before exiting, the BlockedError carries the
+// sandbox path so resume runs in the same dir.
+func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) {
+ src := t.TempDir()
+ initGitRepo(t, src)
+ logDir := t.TempDir()
+
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh
+if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then
+ printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE"
+fi
+`), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do something",
+ ProjectDir: src,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"}
+
+ err := r.Run(context.Background(), tk, e)
+
+ var blocked *BlockedError
+ if !errors.As(err, &blocked) {
+ t.Fatalf("expected BlockedError, got: %v", err)
+ }
+ if blocked.SandboxDir == "" {
+ t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox")
+ }
+ if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) {
+ t.Error("sandbox directory should be preserved when blocked")
+ } else {
+ os.RemoveAll(blocked.SandboxDir)
+ }
+}
+
+// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini
+// exits non-zero, the sandbox path is included in the wrapped error so the
+// user can inspect partial work.
+func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) {
+ src := t.TempDir()
+ initGitRepo(t, src)
+ logDir := t.TempDir()
+
+ // "false" exits 1, no output.
+ r := &GeminiRunner{
+ BinaryPath: "false",
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "do something",
+ ProjectDir: src,
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"}
+
+ err := r.Run(context.Background(), tk, e)
+ if err == nil {
+ t.Fatal("expected error from failing gemini exit")
+ }
+ if !strings.Contains(err.Error(), "sandbox preserved at ") {
+ t.Errorf("expected error to include sandbox path; got: %v", err)
+ }
+ // Extract path and verify it exists.
+ idx := strings.Index(err.Error(), "sandbox preserved at ")
+ rest := err.Error()[idx+len("sandbox preserved at "):]
+ rest = strings.TrimSuffix(rest, ")")
+ rest = strings.TrimSpace(rest)
+ if _, statErr := os.Stat(rest); os.IsNotExist(statErr) {
+ t.Errorf("sandbox path from error should exist on disk: %q", rest)
+ } else {
+ os.RemoveAll(rest)
+ }
+}
+
+// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume
+// execution runs in the preserved SandboxDir rather than cloning fresh.
+func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) {
+ logDir := t.TempDir()
+ sandboxDir := t.TempDir()
+ initGitRepo(t, sandboxDir)
+ cwdFile := filepath.Join(logDir, "cwd.txt")
+
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ SkipPlanning: true,
+ },
+ }
+ e := &storage.Execution{
+ ID: "resume-gemini-1",
+ TaskID: "task-resume",
+ ResumeSessionID: "session-abc",
+ SandboxDir: sandboxDir,
+ }
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run with preserved sandbox: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ if string(got) != sandboxDir {
+ t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir)
+ }
+}
+
+// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume
+// pointing at a missing sandbox falls back to cloning a fresh sandbox from
+// project_dir rather than failing outright.
+func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) {
+ logDir := t.TempDir()
+ projectDir := t.TempDir()
+ initGitRepo(t, projectDir)
+
+ cwdFile := filepath.Join(logDir, "cwd.txt")
+ scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+ script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+ if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+ t.Fatalf("write script: %v", err)
+ }
+
+ r := &GeminiRunner{
+ BinaryPath: scriptPath,
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ ProjectDir: projectDir,
+ SkipPlanning: true,
+ },
+ }
+ staleSandbox := filepath.Join(t.TempDir(), "gone")
+ e := &storage.Execution{
+ ID: "resume-gemini-2",
+ TaskID: "task-stale",
+ ResumeSessionID: "session-xyz",
+ SandboxDir: staleSandbox,
+ }
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run with stale sandbox: %v", err)
+ }
+
+ got, err := os.ReadFile(cwdFile)
+ if err != nil {
+ t.Fatalf("cwd file not written: %v", err)
+ }
+ cwd := string(got)
+ if cwd == staleSandbox {
+ t.Error("ran in stale (nonexistent) sandbox dir")
+ }
+ if cwd == projectDir {
+ t.Error("ran directly in project_dir; expected a fresh sandbox clone")
+ }
+}
+
+// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no
+// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding
+// task semantics).
+func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) {
+ logDir := t.TempDir()
+
+ r := &GeminiRunner{
+ BinaryPath: "true", // exits 0, no output
+ Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+ LogDir: logDir,
+ }
+ tk := &task.Task{
+ Agent: task.AgentConfig{
+ Type: "gemini",
+ Instructions: "summarize: 2+2",
+ SkipPlanning: true,
+ // No ProjectDir
+ },
+ }
+ e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"}
+
+ if err := r.Run(context.Background(), tk, e); err != nil {
+ t.Fatalf("Run without project_dir: %v", err)
+ }
+ if e.SandboxDir != "" {
+ t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir)
+ }
+}