merge: integrate github/main — LocalRunner, real GeminiRunner, llm clientHEAD main

Merges 12 commits from github/main (formerly master) that were developed independently. Key additions: - LocalRunner: OpenAI-compatible local LLM execution (Ollama, LM Studio) - Real GeminiRunner with full sandbox parity to ClaudeRunner - llm.Client for enriching CI failures and elaboration via local model - retry.ParseRetryAfter moved to shared package - tokens_in/tokens_out columns in executions table Conflict resolutions: - Kept local main's VAPID/push, stories, projects, agent events schema - Merged both sets of Config fields (local + LocalModel from github/main) - Unified activePerAgent accounting (decActiveAgent helper) - Removed duplicate helpers from claude.go (now in helpers.go) - Fixed double-decrement bug in handleRunResult vs decActiveAgent Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
author: Peter Stone <thepeterstone@gmail.com> 2026-05-13 04:02:20 +0000
committer: Peter Stone <thepeterstone@gmail.com> 2026-05-13 04:02:20 +0000
commit: 68399a598924775a3ec22a39c2336ae497fb07f3 (patch)
tree: 29ade8224eb51eca47a1d9d03bb4d0d3653a72aa /internal/executor/gemini_test.go
parent: f01231cc45f41ce2dc37072e77428e467ef3fc15 (diff)
parent: d970c0730ff0dc7d714d3261197d8ba52b5d21f4 (diff)
1 files changed, 447 insertions, 0 deletions
diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go
new file mode 100644
index 0000000..cd11ebc
--- /dev/null
+++ b/internal/executor/gemini_test.go
@@ -0,0 +1,447 @@
+package executor
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/thepeterstone/claudomator/internal/storage"
+	"github.com/thepeterstone/claudomator/internal/task"
+)
+
+func TestGeminiRunner_BuildArgs_BasicTask(t *testing.T) {
+	r := &GeminiRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "fix the bug",
+			Model:        "gemini-2.5-flash-lite",
+			SkipPlanning: true,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	// Gemini CLI: instructions passed via -p for non-interactive mode
+	if len(args) < 2 || args[0] != "-p" || args[1] != "fix the bug" {
+		t.Errorf("expected -p <instructions> as first args, got: %v", args)
+	}
+
+	argMap := make(map[string]bool)
+	for _, a := range args {
+		argMap[a] = true
+	}
+	for _, want := range []string{"--output-format", "stream-json", "--model", "gemini-2.5-flash-lite"} {
+		if !argMap[want] {
+			t.Errorf("missing arg %q in %v", want, args)
+		}
+	}
+}
+
+func TestGeminiRunner_BuildArgs_PreamblePrepended(t *testing.T) {
+	r := &GeminiRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "fix the bug",
+			SkipPlanning: false,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	if len(args) < 2 || args[0] != "-p" {
+		t.Fatalf("expected -p <instructions> as first args, got: %v", args)
+	}
+	if !strings.HasPrefix(args[1], planningPreamble) {
+		t.Errorf("instructions should start with planning preamble")
+	}
+	if !strings.HasSuffix(args[1], "fix the bug") {
+		t.Errorf("instructions should end with original instructions")
+	}
+}
+
+func TestGeminiRunner_BuildArgs_IncludesYolo(t *testing.T) {
+	r := &GeminiRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "write a doc",
+			SkipPlanning: true,
+		},
+	}
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+	argMap := make(map[string]bool)
+	for _, a := range args {
+		argMap[a] = true
+	}
+	if !argMap["--yolo"] {
+		t.Errorf("expected --yolo in gemini args (enables all tools); got: %v", args)
+	}
+}
+
+func TestGeminiRunner_BuildArgs_IncludesPromptFlag(t *testing.T) {
+	r := &GeminiRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do the thing",
+			SkipPlanning: true,
+		},
+	}
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+	// Instructions must be passed via -p/--prompt for non-interactive headless mode,
+	// not as a bare positional (which starts interactive mode).
+	found := false
+	for i, a := range args {
+		if (a == "-p" || a == "--prompt") && i+1 < len(args) && args[i+1] == "do the thing" {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Errorf("expected instructions passed via -p/--prompt flag; got: %v", args)
+	}
+}
+
+func TestGeminiRunner_Run_InaccessibleProjectDir_ReturnsError(t *testing.T) {
+	r := &GeminiRunner{
+		BinaryPath: "true", // would succeed if it ran
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     t.TempDir(),
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			ProjectDir:   "/nonexistent/path/does/not/exist",
+			SkipPlanning: true,
+		},
+	}
+	exec := &storage.Execution{ID: "test-exec"}
+
+	err := r.Run(context.Background(), tk, exec)
+
+	if err == nil {
+		t.Fatal("expected error for inaccessible project_dir, got nil")
+	}
+	if !strings.Contains(err.Error(), "project_dir") {
+		t.Errorf("expected 'project_dir' in error, got: %v", err)
+	}
+}
+
+func TestGeminiRunner_BinaryPath_Default(t *testing.T) {
+	r := &GeminiRunner{}
+	if r.binaryPath() != "gemini" {
+		t.Errorf("want 'gemini', got %q", r.binaryPath())
+	}
+}
+
+func TestGeminiRunner_BinaryPath_Custom(t *testing.T) {
+	r := &GeminiRunner{BinaryPath: "/usr/local/bin/gemini"}
+	if r.binaryPath() != "/usr/local/bin/gemini" {
+		t.Errorf("want custom path, got %q", r.binaryPath())
+	}
+}
+
+
+func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) {
+	// Simulate a stream-json input with various message types, including a result with error and cost.
+	input := streamLine(`{"type":"content_block_start","content_block":{"text":"Hello,"}}`) +
+		streamLine(`{"type":"content_block_delta","content_block":{"text":" World!"}}`) +
+		streamLine(`{"type":"content_block_end"}`) +
+		streamLine(`{"type":"result","subtype":"error_during_execution","is_error":true,"result":"something went wrong","total_cost_usd":0.123}`)
+
+	reader := strings.NewReader(input)
+	var writer bytes.Buffer // To capture what's written to the output log
+	logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+
+	cost, err := parseGeminiStream(reader, &writer, logger)
+
+	if err == nil {
+		t.Errorf("expected an error, got nil")
+	}
+	if !strings.Contains(err.Error(), "something went wrong") {
+		t.Errorf("expected error message to contain 'something went wrong', got: %v", err)
+	}
+
+	if cost != 0.123 {
+		t.Errorf("expected cost 0.123, got %f", cost)
+	}
+
+	// Verify that the writer received the content (even if parseGeminiStream isn't fully parsing it yet)
+	expectedWriterContent := input
+	if writer.String() != expectedWriterContent {
+		t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String())
+	}
+}
+
+// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir
+// is set, the gemini subprocess runs inside a sandbox clone — not in
+// project_dir itself.
+func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) {
+	projectDir := t.TempDir()
+	initGitRepo(t, projectDir)
+
+	logDir := t.TempDir()
+	cwdFile := filepath.Join(logDir, "gemini-cwd.txt")
+
+	// Fake gemini binary that records its $PWD then exits 0.
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do work",
+			ProjectDir:   projectDir,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	cwd := string(got)
+	if cwd == projectDir {
+		t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd)
+	}
+	// Sandbox should be removed after successful teardown (no edits → nothing to push).
+	// We can't assert the exact dir, but it should not be projectDir.
+}
+
+// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the
+// agent writes a question file before exiting, the BlockedError carries the
+// sandbox path so resume runs in the same dir.
+func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) {
+	src := t.TempDir()
+	initGitRepo(t, src)
+	logDir := t.TempDir()
+
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh
+if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then
+  printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE"
+fi
+`), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do something",
+			ProjectDir:   src,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"}
+
+	err := r.Run(context.Background(), tk, e)
+
+	var blocked *BlockedError
+	if !errors.As(err, &blocked) {
+		t.Fatalf("expected BlockedError, got: %v", err)
+	}
+	if blocked.SandboxDir == "" {
+		t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox")
+	}
+	if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) {
+		t.Error("sandbox directory should be preserved when blocked")
+	} else {
+		os.RemoveAll(blocked.SandboxDir)
+	}
+}
+
+// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini
+// exits non-zero, the sandbox path is included in the wrapped error so the
+// user can inspect partial work.
+func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) {
+	src := t.TempDir()
+	initGitRepo(t, src)
+	logDir := t.TempDir()
+
+	// "false" exits 1, no output.
+	r := &GeminiRunner{
+		BinaryPath: "false",
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "do something",
+			ProjectDir:   src,
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"}
+
+	err := r.Run(context.Background(), tk, e)
+	if err == nil {
+		t.Fatal("expected error from failing gemini exit")
+	}
+	if !strings.Contains(err.Error(), "sandbox preserved at ") {
+		t.Errorf("expected error to include sandbox path; got: %v", err)
+	}
+	// Extract path and verify it exists.
+	idx := strings.Index(err.Error(), "sandbox preserved at ")
+	rest := err.Error()[idx+len("sandbox preserved at "):]
+	rest = strings.TrimSuffix(rest, ")")
+	rest = strings.TrimSpace(rest)
+	if _, statErr := os.Stat(rest); os.IsNotExist(statErr) {
+		t.Errorf("sandbox path from error should exist on disk: %q", rest)
+	} else {
+		os.RemoveAll(rest)
+	}
+}
+
+// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume
+// execution runs in the preserved SandboxDir rather than cloning fresh.
+func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) {
+	logDir := t.TempDir()
+	sandboxDir := t.TempDir()
+	initGitRepo(t, sandboxDir)
+	cwdFile := filepath.Join(logDir, "cwd.txt")
+
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			SkipPlanning: true,
+		},
+	}
+	e := &storage.Execution{
+		ID:              "resume-gemini-1",
+		TaskID:          "task-resume",
+		ResumeSessionID: "session-abc",
+		SandboxDir:      sandboxDir,
+	}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run with preserved sandbox: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	if string(got) != sandboxDir {
+		t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir)
+	}
+}
+
+// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume
+// pointing at a missing sandbox falls back to cloning a fresh sandbox from
+// project_dir rather than failing outright.
+func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) {
+	logDir := t.TempDir()
+	projectDir := t.TempDir()
+	initGitRepo(t, projectDir)
+
+	cwdFile := filepath.Join(logDir, "cwd.txt")
+	scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &GeminiRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			ProjectDir:   projectDir,
+			SkipPlanning: true,
+		},
+	}
+	staleSandbox := filepath.Join(t.TempDir(), "gone")
+	e := &storage.Execution{
+		ID:              "resume-gemini-2",
+		TaskID:          "task-stale",
+		ResumeSessionID: "session-xyz",
+		SandboxDir:      staleSandbox,
+	}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run with stale sandbox: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	cwd := string(got)
+	if cwd == staleSandbox {
+		t.Error("ran in stale (nonexistent) sandbox dir")
+	}
+	if cwd == projectDir {
+		t.Error("ran directly in project_dir; expected a fresh sandbox clone")
+	}
+}
+
+// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no
+// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding
+// task semantics).
+func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) {
+	logDir := t.TempDir()
+
+	r := &GeminiRunner{
+		BinaryPath: "true", // exits 0, no output
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "gemini",
+			Instructions: "summarize: 2+2",
+			SkipPlanning: true,
+			// No ProjectDir
+		},
+	}
+	e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run without project_dir: %v", err)
+	}
+	if e.SandboxDir != "" {
+		t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir)
+	}
+}
author	Peter Stone <thepeterstone@gmail.com>	2026-05-13 04:02:20 +0000
committer	Peter Stone <thepeterstone@gmail.com>	2026-05-13 04:02:20 +0000
commit	68399a598924775a3ec22a39c2336ae497fb07f3 (patch)
tree	29ade8224eb51eca47a1d9d03bb4d0d3653a72aa /internal/executor/gemini_test.go
parent	f01231cc45f41ce2dc37072e77428e467ef3fc15 (diff)
parent	d970c0730ff0dc7d714d3261197d8ba52b5d21f4 (diff)