merge: integrate github/main — LocalRunner, real GeminiRunner, llm clientHEAD main

Merges 12 commits from github/main (formerly master) that were developed independently. Key additions: - LocalRunner: OpenAI-compatible local LLM execution (Ollama, LM Studio) - Real GeminiRunner with full sandbox parity to ClaudeRunner - llm.Client for enriching CI failures and elaboration via local model - retry.ParseRetryAfter moved to shared package - tokens_in/tokens_out columns in executions table Conflict resolutions: - Kept local main's VAPID/push, stories, projects, agent events schema - Merged both sets of Config fields (local + LocalModel from github/main) - Unified activePerAgent accounting (decActiveAgent helper) - Removed duplicate helpers from claude.go (now in helpers.go) - Fixed double-decrement bug in handleRunResult vs decActiveAgent Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
author: Peter Stone <thepeterstone@gmail.com> 2026-05-13 04:02:20 +0000
committer: Peter Stone <thepeterstone@gmail.com> 2026-05-13 04:02:20 +0000
commit: 68399a598924775a3ec22a39c2336ae497fb07f3 (patch)
tree: 29ade8224eb51eca47a1d9d03bb4d0d3653a72aa /internal/executor/claude_test.go
parent: f01231cc45f41ce2dc37072e77428e467ef3fc15 (diff)
parent: d970c0730ff0dc7d714d3261197d8ba52b5d21f4 (diff)
1 files changed, 810 insertions, 0 deletions
diff --git a/internal/executor/claude_test.go b/internal/executor/claude_test.go
new file mode 100644
index 0000000..c01e160
--- /dev/null
+++ b/internal/executor/claude_test.go
@@ -0,0 +1,810 @@
+package executor
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/thepeterstone/claudomator/internal/storage"
+	"github.com/thepeterstone/claudomator/internal/task"
+)
+
+func TestClaudeRunner_BuildArgs_BasicTask(t *testing.T) {
+	r := &ClaudeRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			Instructions: "fix the bug",
+			Model:        "sonnet",
+			SkipPlanning: true,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	argMap := make(map[string]bool)
+	for _, a := range args {
+		argMap[a] = true
+	}
+	for _, want := range []string{"-p", "fix the bug", "--output-format", "stream-json", "--verbose", "--model", "sonnet"} {
+		if !argMap[want] {
+			t.Errorf("missing arg %q in %v", want, args)
+		}
+	}
+}
+
+func TestClaudeRunner_BuildArgs_FullConfig(t *testing.T) {
+	r := &ClaudeRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:               "claude",
+			Instructions:       "implement feature",
+			Model:              "opus",
+			MaxBudgetUSD:       5.0,
+			PermissionMode:     "bypassPermissions",
+			SystemPromptAppend: "Follow TDD",
+			AllowedTools:       []string{"Bash", "Edit"},
+			DisallowedTools:    []string{"Write"},
+			ContextFiles:       []string{"/src"},
+			AdditionalArgs:     []string{"--verbose"},
+			SkipPlanning:       true,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	// Check key args are present.
+	argMap := make(map[string]bool)
+	for _, a := range args {
+		argMap[a] = true
+	}
+
+	requiredArgs := []string{
+		"-p", "implement feature", "--output-format", "stream-json",
+		"--model", "opus", "--max-budget-usd", "5.00",
+		"--permission-mode", "bypassPermissions",
+		"--append-system-prompt", "Follow TDD",
+		"--allowedTools", "Bash", "Edit",
+		"--disallowedTools", "Write",
+		"--add-dir", "/src",
+		"--verbose",
+	}
+	for _, req := range requiredArgs {
+		if !argMap[req] {
+			t.Errorf("missing arg %q in %v", req, args)
+		}
+	}
+}
+
+func TestClaudeRunner_BuildArgs_DefaultsToBypassPermissions(t *testing.T) {
+	r := &ClaudeRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			Instructions: "do work",
+			SkipPlanning: true,
+			// PermissionMode intentionally not set
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	found := false
+	for i, a := range args {
+		if a == "--permission-mode" && i+1 < len(args) && args[i+1] == "bypassPermissions" {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("expected --permission-mode bypassPermissions when PermissionMode is empty, args: %v", args)
+	}
+}
+
+func TestClaudeRunner_BuildArgs_RespectsExplicitPermissionMode(t *testing.T) {
+	r := &ClaudeRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:           "claude",
+			Instructions:   "do work",
+			PermissionMode: "default",
+			SkipPlanning:   true,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	for i, a := range args {
+		if a == "--permission-mode" && i+1 < len(args) {
+			if args[i+1] != "default" {
+				t.Errorf("expected --permission-mode default, got %q", args[i+1])
+			}
+			return
+		}
+	}
+	t.Errorf("--permission-mode flag not found in args: %v", args)
+}
+
+func TestClaudeRunner_BuildArgs_AlwaysIncludesVerbose(t *testing.T) {
+	r := &ClaudeRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			Instructions: "do something",
+			SkipPlanning: true,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	found := false
+	for _, a := range args {
+		if a == "--verbose" {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Errorf("--verbose missing from args: %v", args)
+	}
+}
+
+func TestClaudeRunner_BuildArgs_PreamblePrepended(t *testing.T) {
+	r := &ClaudeRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			Instructions: "fix the bug",
+			SkipPlanning: false,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	// The -p value should start with the preamble and end with the original instructions.
+	if len(args) < 2 || args[0] != "-p" {
+		t.Fatalf("expected -p as first arg, got: %v", args)
+	}
+	if !strings.HasPrefix(args[1], "## Runtime Environment") {
+		t.Errorf("instructions should start with planning preamble, got prefix: %q", args[1][:min(len(args[1]), 20)])
+	}
+	if !strings.Contains(args[1], "$CLAUDOMATOR_PROJECT_DIR") {
+		t.Errorf("preamble should mention $CLAUDOMATOR_PROJECT_DIR")
+	}
+	if !strings.HasSuffix(args[1], "fix the bug") {
+		t.Errorf("instructions should end with original instructions")
+	}
+}
+
+func TestClaudeRunner_BuildArgs_PreambleAddsBash(t *testing.T) {
+	r := &ClaudeRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			Instructions: "do work",
+			AllowedTools: []string{"Read"},
+			SkipPlanning: false,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	// Bash should be appended to allowed tools.
+	foundBash := false
+	for i, a := range args {
+		if a == "--allowedTools" && i+1 < len(args) && args[i+1] == "Bash" {
+			foundBash = true
+		}
+	}
+	if !foundBash {
+		t.Errorf("Bash should be added to --allowedTools when preamble is active: %v", args)
+	}
+}
+
+func TestClaudeRunner_BuildArgs_PreambleBashNotDuplicated(t *testing.T) {
+	r := &ClaudeRunner{}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			Instructions: "do work",
+			AllowedTools: []string{"Bash", "Read"},
+			SkipPlanning: false,
+		},
+	}
+
+	args := r.buildArgs(tk, &storage.Execution{ID: "test-exec"}, "/tmp/q.json")
+
+	// Count Bash occurrences in --allowedTools values.
+	bashCount := 0
+	for i, a := range args {
+		if a == "--allowedTools" && i+1 < len(args) && args[i+1] == "Bash" {
+			bashCount++
+		}
+	}
+	if bashCount != 1 {
+		t.Errorf("Bash should appear exactly once in --allowedTools, got %d: %v", bashCount, args)
+	}
+}
+
+// TestClaudeRunner_Run_ResumeSetsSessionIDFromResumeSession verifies that when a
+// resume execution is itself blocked again, the stored SessionID is the original
+// resumed session, not the new execution's own UUID. Without this, a second
+// block-and-resume cycle passes the wrong --resume session ID and fails.
+func TestClaudeRunner_Run_ResumeSetsSessionIDFromResumeSession(t *testing.T) {
+	logDir := t.TempDir()
+	r := &ClaudeRunner{
+		BinaryPath: "true", // exits 0, no output
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			Instructions: "continue",
+			SkipPlanning: true,
+		},
+	}
+	exec := &storage.Execution{
+		ID:              "resume-exec-uuid",
+		TaskID:          "task-1",
+		ResumeSessionID: "original-session-uuid",
+		ResumeAnswer:    "yes",
+	}
+
+	// Run completes successfully (binary is "true").
+	_ = r.Run(context.Background(), tk, exec)
+
+	// SessionID must be the original session (ResumeSessionID), not the new
+	// exec's own ID. If it were exec.ID, a second blocked-then-resumed cycle
+	// would use the wrong --resume argument and fail.
+	if exec.SessionID != "original-session-uuid" {
+		t.Errorf("SessionID after resume Run: want %q, got %q", "original-session-uuid", exec.SessionID)
+	}
+}
+
+func TestClaudeRunner_Run_InaccessibleWorkingDir_ReturnsError(t *testing.T) {
+	r := &ClaudeRunner{
+		BinaryPath: "true", // would succeed if it ran
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     t.TempDir(),
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			ProjectDir:   "/nonexistent/path/does/not/exist",
+			SkipPlanning: true,
+		},
+	}
+	exec := &storage.Execution{ID: "test-exec"}
+
+	err := r.Run(context.Background(), tk, exec)
+
+	if err == nil {
+		t.Fatal("expected error for inaccessible working_dir, got nil")
+	}
+	if !strings.Contains(err.Error(), "project_dir") {
+		t.Errorf("expected 'project_dir' in error, got: %v", err)
+	}
+}
+
+func TestClaudeRunner_BinaryPath_Default(t *testing.T) {
+	r := &ClaudeRunner{}
+	if r.binaryPath() != "claude" {
+		t.Errorf("want 'claude', got %q", r.binaryPath())
+	}
+}
+
+func TestClaudeRunner_BinaryPath_Custom(t *testing.T) {
+	r := &ClaudeRunner{BinaryPath: "/usr/local/bin/claude"}
+	if r.binaryPath() != "/usr/local/bin/claude" {
+		t.Errorf("want custom path, got %q", r.binaryPath())
+	}
+}
+
+// TestExecOnce_NoGoroutineLeak_OnNaturalExit verifies that execOnce does not
+// leave behind any goroutines when the subprocess exits normally (no context
+// cancellation). Both the pgid-kill goroutine and the parseStream goroutine
+// must have exited before execOnce returns.
+func TestExecOnce_NoGoroutineLeak_OnNaturalExit(t *testing.T) {
+	logDir := t.TempDir()
+	r := &ClaudeRunner{
+		BinaryPath: "true", // exits immediately with status 0, produces no output
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	e := &storage.Execution{
+		ID:          "goroutine-leak-test",
+		TaskID:      "task-id",
+		StdoutPath:  filepath.Join(logDir, "stdout.log"),
+		StderrPath:  filepath.Join(logDir, "stderr.log"),
+		ArtifactDir: logDir,
+	}
+
+	// Let any goroutines from test infrastructure settle before sampling.
+	runtime.Gosched()
+	baseline := runtime.NumGoroutine()
+
+	if err := r.execOnce(context.Background(), []string{}, "", "", e); err != nil {
+		t.Fatalf("execOnce failed: %v", err)
+	}
+
+	// Give the scheduler a moment to let any leaked goroutines actually exit.
+	// In correct code the goroutines exit before execOnce returns, so this is
+	// just a safety buffer for the scheduler.
+	time.Sleep(10 * time.Millisecond)
+	runtime.Gosched()
+
+	after := runtime.NumGoroutine()
+	if after > baseline {
+		t.Errorf("goroutine leak: %d goroutines before execOnce, %d after (leaked %d)",
+			baseline, after, after-baseline)
+	}
+}
+
+// initGitRepo creates a git repo in dir with one commit so it is clonable.
+func initGitRepo(t *testing.T, dir string) {
+	t.Helper()
+	cmds := [][]string{
+		{"git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", dir, "init", "-b", "main"},
+		{"git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", dir, "config", "user.email", "test@test"},
+		{"git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", dir, "config", "user.name", "test"},
+	}
+	for _, args := range cmds {
+		if out, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil {
+			t.Fatalf("%v: %v\n%s", args, err, out)
+		}
+	}
+	if err := os.WriteFile(filepath.Join(dir, "init.txt"), []byte("init"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if out, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", dir, "add", ".").CombinedOutput(); err != nil {
+		t.Fatalf("git add: %v\n%s", err, out)
+	}
+	if out, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", dir, "commit", "-m", "init").CombinedOutput(); err != nil {
+		t.Fatalf("git commit: %v\n%s", err, out)
+	}
+}
+
+func TestSandboxCloneSource_PrefersLocalRemote(t *testing.T) {
+	dir := t.TempDir()
+	initGitRepo(t, dir)
+	// Add a "local" remote pointing to a bare repo.
+	bare := t.TempDir()
+	exec.Command("git", "init", "--bare", bare).Run()
+	exec.Command("git", "-C", dir, "remote", "add", "local", bare).Run()
+	exec.Command("git", "-C", dir, "remote", "add", "origin", "https://example.com/repo").Run()
+
+	got := sandboxCloneSource(dir)
+	if got != bare {
+		t.Errorf("expected bare repo path %q, got %q", bare, got)
+	}
+}
+
+func TestSandboxCloneSource_FallsBackToOrigin(t *testing.T) {
+	dir := t.TempDir()
+	initGitRepo(t, dir)
+	// sandboxCloneSource intentionally filters to local-FS remotes (so
+	// `git clone <src>` doesn't go over the network). Use a local path
+	// for origin to verify the fallback semantics.
+	originURL := t.TempDir()
+	exec.Command("git", "-C", dir, "remote", "add", "origin", originURL).Run()
+
+	got := sandboxCloneSource(dir)
+	if got != originURL {
+		t.Errorf("expected origin URL %q, got %q", originURL, got)
+	}
+}
+
+func TestSandboxCloneSource_FallsBackToProjectDir(t *testing.T) {
+	dir := t.TempDir()
+	initGitRepo(t, dir)
+	// No remotes configured.
+	got := sandboxCloneSource(dir)
+	if got != dir {
+		t.Errorf("expected projectDir %q (no remotes), got %q", dir, got)
+	}
+}
+
+func TestSetupSandbox_ClonesGitRepo(t *testing.T) {
+	src := t.TempDir()
+	initGitRepo(t, src)
+
+	sandbox, err := setupSandbox(src, slog.Default())
+	if err != nil {
+		t.Fatalf("setupSandbox: %v", err)
+	}
+	t.Cleanup(func() { os.RemoveAll(sandbox) })
+
+	// Force sandbox to master if it cloned as main
+	exec.Command("git", gitSafe("-C", sandbox, "checkout", "master")...).Run()
+
+	// Debug sandbox
+	logOut, _ := exec.Command("git", "-C", sandbox, "log", "-1").CombinedOutput()
+	fmt.Printf("DEBUG: sandbox log: %s\n", string(logOut))
+
+	// Verify sandbox is a git repo with at least one commit.
+	out, err := exec.Command("git", "-C", sandbox, "log", "--oneline").Output()
+	if err != nil {
+		t.Fatalf("git log in sandbox: %v", err)
+	}
+	if len(strings.TrimSpace(string(out))) == 0 {
+		t.Error("expected at least one commit in sandbox, got empty log")
+	}
+}
+
+func TestSetupSandbox_InitialisesNonGitDir(t *testing.T) {
+	// A plain directory (not a git repo) should be initialised then cloned.
+	src := t.TempDir()
+
+	sandbox, err := setupSandbox(src, slog.Default())
+	if err != nil {
+		t.Fatalf("setupSandbox on plain dir: %v", err)
+	}
+	t.Cleanup(func() { os.RemoveAll(sandbox) })
+
+	if _, err := os.Stat(filepath.Join(sandbox, ".git")); err != nil {
+		t.Errorf("sandbox should be a git repo: %v", err)
+	}
+}
+
+func TestTeardownSandbox_AutocommitsChanges(t *testing.T) {
+	// Create a bare repo as origin so push succeeds.
+	bare := t.TempDir()
+	if out, err := exec.Command("git", "init", "--bare", "-b", "main", bare).CombinedOutput(); err != nil {
+		t.Fatalf("git init bare: %v\n%s", err, out)
+	}
+
+	// Create a sandbox directly.
+	sandbox := t.TempDir()
+	initGitRepo(t, sandbox)
+	if out, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil {
+		t.Fatalf("git remote add: %v\n%s", err, out)
+	}
+	// Initial push to establish origin/main
+	if out, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "push", "origin", "main").CombinedOutput(); err != nil {
+		t.Fatalf("git push initial: %v\n%s", err, out)
+	}
+
+	// Capture startHEAD
+	headOut, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "rev-parse", "HEAD").Output()
+	if err != nil {
+		t.Fatalf("rev-parse HEAD: %v", err)
+	}
+	startHEAD := strings.TrimSpace(string(headOut))
+
+	// Leave an uncommitted file in the sandbox.
+	if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("autocommit me"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
+	execRecord := &storage.Execution{}
+	
+	err = teardownSandbox("", sandbox, startHEAD, logger, execRecord)
+	if err != nil {
+		t.Fatalf("expected autocommit to succeed, got error: %v", err)
+	}
+
+	// Sandbox should be removed after successful autocommit and push.
+	if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) {
+		t.Error("sandbox should have been removed after successful autocommit and push")
+	}
+
+	// Verify the commit exists in the bare repo.
+	out, err := exec.Command("git", "-C", bare, "log", "-1", "--pretty=%B").Output()
+	if err != nil {
+		t.Fatalf("git log in bare repo: %v", err)
+	}
+	if !strings.Contains(string(out), "chore: autocommit uncommitted changes") {
+		t.Errorf("expected autocommit message in log, got: %q", string(out))
+	}
+
+	// Verify the commit was captured in execRecord.
+	if len(execRecord.Commits) == 0 {
+		t.Error("expected at least one commit in execRecord")
+	} else if !strings.Contains(execRecord.Commits[0].Message, "chore: autocommit uncommitted changes") {
+		t.Errorf("unexpected commit message: %q", execRecord.Commits[0].Message)
+	}
+}
+
+func TestTeardownSandbox_BuildFailure_BlocksAutocommit(t *testing.T) {
+	bare := t.TempDir()
+	if out, err := exec.Command("git", "init", "--bare", "-b", "main", bare).CombinedOutput(); err != nil {
+		t.Fatalf("git init bare: %v\n%s", err, out)
+	}
+
+	sandbox := t.TempDir()
+	initGitRepo(t, sandbox)
+	if out, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil {
+		t.Fatalf("git remote add: %v\n%s", err, out)
+	}
+
+	// Capture startHEAD
+	headOut, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "rev-parse", "HEAD").Output()
+	if err != nil {
+		t.Fatalf("rev-parse HEAD: %v", err)
+	}
+	startHEAD := strings.TrimSpace(string(headOut))
+
+	// Leave an uncommitted file.
+	if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("dirty"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a failing Makefile.
+	makefile := "build:\n\t@echo 'build failed'\n\texit 1\n"
+	if err := os.WriteFile(filepath.Join(sandbox, "Makefile"), []byte(makefile), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+	execRecord := &storage.Execution{}
+
+	err = teardownSandbox("", sandbox, startHEAD, logger, execRecord)
+	if err == nil {
+		t.Error("expected teardown to fail due to build failure, but it succeeded")
+	} else if !strings.Contains(err.Error(), "build failed before autocommit") {
+		t.Errorf("expected build failure error message, got: %v", err)
+	}
+
+	// Sandbox should NOT be removed if teardown failed.
+	if _, statErr := os.Stat(sandbox); os.IsNotExist(statErr) {
+		t.Error("sandbox should have been preserved after build failure")
+	}
+
+	// Verify no new commit in bare repo.
+	out, err := exec.Command("git", "-C", bare, "log", "HEAD").CombinedOutput()
+	if strings.Contains(string(out), "chore: autocommit uncommitted changes") {
+		t.Error("autocommit should not have been pushed after build failure")
+	}
+}
+
+func TestTeardownSandbox_BuildSuccess_ProceedsToAutocommit(t *testing.T) {
+	bare := t.TempDir()
+	if out, err := exec.Command("git", "init", "--bare", "-b", "main", bare).CombinedOutput(); err != nil {
+		t.Fatalf("git init bare: %v\n%s", err, out)
+	}
+
+	sandbox := t.TempDir()
+	initGitRepo(t, sandbox)
+	if out, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil {
+		t.Fatalf("git remote add: %v\n%s", err, out)
+	}
+
+	// Capture startHEAD
+	headOut, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "rev-parse", "HEAD").Output()
+	if err != nil {
+		t.Fatalf("rev-parse HEAD: %v", err)
+	}
+	startHEAD := strings.TrimSpace(string(headOut))
+
+	// Leave an uncommitted file.
+	if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("dirty"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a successful Makefile.
+	makefile := "build:\n\t@echo 'build succeeded'\n"
+	if err := os.WriteFile(filepath.Join(sandbox, "Makefile"), []byte(makefile), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+	execRecord := &storage.Execution{}
+
+	err = teardownSandbox("", sandbox, startHEAD, logger, execRecord)
+	if err != nil {
+		t.Fatalf("expected teardown to succeed after build success, got error: %v", err)
+	}
+
+	// Sandbox should be removed after success.
+	if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) {
+		t.Error("sandbox should have been removed after successful build and autocommit")
+	}
+
+	// Verify new commit in bare repo.
+	out, err := exec.Command("git", "-C", bare, "log", "-1", "--pretty=%B").Output()
+	if err != nil {
+		t.Fatalf("git log in bare repo: %v", err)
+	}
+	if !strings.Contains(string(out), "chore: autocommit uncommitted changes") {
+		t.Errorf("expected autocommit message in log, got: %q", string(out))
+	}
+}
+
+
+func TestTeardownSandbox_CapturesExplicitCommits(t *testing.T) {
+	bare := t.TempDir()
+	if out, err := exec.Command("git", "init", "--bare", "-b", "main", bare).CombinedOutput(); err != nil {
+		t.Fatalf("git init bare: %v\n%s", err, out)
+	}
+
+	sandbox := t.TempDir()
+	initGitRepo(t, sandbox)
+	if out, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil {
+		t.Fatalf("git remote add: %v\n%s", err, out)
+	}
+	if out, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "push", "origin", "main").CombinedOutput(); err != nil {
+		t.Fatalf("git push initial: %v\n%s", err, out)
+	}
+
+	headOut, err := exec.Command("git", "-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "rev-parse", "HEAD").Output()
+	if err != nil {
+		t.Fatalf("rev-parse HEAD: %v", err)
+	}
+	startHEAD := strings.TrimSpace(string(headOut))
+
+	// Simulate Claude explicitly committing changes.
+	if err := os.WriteFile(filepath.Join(sandbox, "work.txt"), []byte("done"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	for _, args := range [][]string{
+		{"-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "add", "-A"},
+		{"-c", "safe.directory=*", "-c", "commit.gpgsign=false", "-C", sandbox, "commit", "-m", "feat: implement the feature"},
+	} {
+		if out, err := exec.Command("git", args...).CombinedOutput(); err != nil {
+			t.Fatalf("git %v: %v\n%s", args, err, out)
+		}
+	}
+
+	logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+	execRecord := &storage.Execution{}
+
+	if err := teardownSandbox("", sandbox, startHEAD, logger, execRecord); err != nil {
+		t.Fatalf("teardownSandbox: %v", err)
+	}
+
+	if len(execRecord.Commits) == 0 {
+		t.Fatal("expected commits to be captured in execRecord")
+	}
+	if !strings.Contains(execRecord.Commits[0].Message, "feat: implement the feature") {
+		t.Errorf("unexpected commit message: %q", execRecord.Commits[0].Message)
+	}
+	if execRecord.Commits[0].Hash == "" {
+		t.Error("commit hash should not be empty")
+	}
+}
+
+func TestTeardownSandbox_CleanSandboxWithNoNewCommits_RemovesSandbox(t *testing.T) {
+	src := t.TempDir()
+	initGitRepo(t, src)
+	sandbox, err := setupSandbox(src, slog.Default())
+	if err != nil {
+		t.Fatalf("setupSandbox: %v", err)
+	}
+
+	logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+	execRecord := &storage.Execution{}
+	
+	headOut, _ := exec.Command("git", "-C", sandbox, "rev-parse", "HEAD").Output()
+	startHEAD := strings.TrimSpace(string(headOut))
+
+	// Sandbox has no new commits beyond origin; teardown should succeed and remove it.
+	if err := teardownSandbox(src, sandbox, startHEAD, logger, execRecord); err != nil {
+		t.Fatalf("teardownSandbox: %v", err)
+	}
+	if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) {
+		t.Error("sandbox should have been removed after clean teardown")
+		os.RemoveAll(sandbox)
+	}
+}
+
+
+// TestClaudeRunner_Run_ResumeUsesStoredSandboxDir verifies that when a resume
+// execution has SandboxDir set, the runner uses that directory (not project_dir)
+// as the working directory, so Claude finds its session files there.
+func TestClaudeRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) {
+	logDir := t.TempDir()
+	sandboxDir := t.TempDir()
+	cwdFile := filepath.Join(logDir, "cwd.txt")
+
+	// Use a script that writes its working directory to a file in logDir (stable path).
+	scriptPath := filepath.Join(t.TempDir(), "fake-claude.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &ClaudeRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			ProjectDir:   sandboxDir, // must exist; resume overrides it with SandboxDir anyway
+			SkipPlanning: true,
+		},
+	}
+	exec := &storage.Execution{
+		ID:              "resume-exec-uuid",
+		TaskID:          "task-1",
+		ResumeSessionID: "original-session",
+		ResumeAnswer:    "yes",
+		SandboxDir:      sandboxDir,
+	}
+
+	_ = r.Run(context.Background(), tk, exec)
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	// The runner should have executed claude in sandboxDir, not in project_dir.
+	if string(got) != sandboxDir {
+		t.Errorf("resume working dir: want %q, got %q", sandboxDir, string(got))
+	}
+}
+
+func TestClaudeRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) {
+	logDir := t.TempDir()
+	projectDir := t.TempDir()
+	initGitRepo(t, projectDir)
+
+	cwdFile := filepath.Join(logDir, "cwd.txt")
+	scriptPath := filepath.Join(t.TempDir(), "fake-claude.sh")
+	script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n"
+	if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	r := &ClaudeRunner{
+		BinaryPath: scriptPath,
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	tk := &task.Task{
+		Agent: task.AgentConfig{
+			Type:         "claude",
+			ProjectDir:   projectDir,
+			SkipPlanning: true,
+		},
+	}
+	// Point to a sandbox that no longer exists (e.g. /tmp was purged).
+	staleSandbox := filepath.Join(t.TempDir(), "gone")
+	e := &storage.Execution{
+		ID:              "resume-exec-2",
+		TaskID:          "task-2",
+		ResumeSessionID: "session-abc",
+		ResumeAnswer:    "ok",
+		SandboxDir:      staleSandbox,
+	}
+
+	if err := r.Run(context.Background(), tk, e); err != nil {
+		t.Fatalf("Run with stale sandbox: %v", err)
+	}
+
+	got, err := os.ReadFile(cwdFile)
+	if err != nil {
+		t.Fatalf("cwd file not written: %v", err)
+	}
+	// Should have run in a fresh sandbox (not the stale path, not the raw projectDir).
+	// The sandbox is removed after teardown, so we only check what it wasn't.
+	cwd := string(got)
+	if cwd == staleSandbox {
+		t.Error("ran in stale sandbox dir that doesn't exist")
+	}
+	if cwd == projectDir {
+		t.Error("ran directly in project_dir; expected a fresh sandbox clone")
+	}
+	// cwd should look like a claudomator sandbox path.
+	if !strings.Contains(cwd, "claudomator-sandbox-") {
+		t.Errorf("expected sandbox path, got %q", cwd)
+	}
+}
+
+func TestTailFile_MissingFile_ReturnsEmpty(t *testing.T) {
+	got := tailFile("/nonexistent/path/file.log", 10)
+	if got != "" {
+		t.Errorf("want empty string for missing file, got %q", got)
+	}
+}
+
author	Peter Stone <thepeterstone@gmail.com>	2026-05-13 04:02:20 +0000
committer	Peter Stone <thepeterstone@gmail.com>	2026-05-13 04:02:20 +0000
commit	68399a598924775a3ec22a39c2336ae497fb07f3 (patch)
tree	29ade8224eb51eca47a1d9d03bb4d0d3653a72aa /internal/executor/claude_test.go
parent	f01231cc45f41ce2dc37072e77428e467ef3fc15 (diff)
parent	d970c0730ff0dc7d714d3261197d8ba52b5d21f4 (diff)