From 85c3bf4d28b0903a2005356339e6ea56855b8c80 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 3 May 2026 03:58:19 +0000 Subject: chore: post-epic cleanup — green test suite, no skips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the cleanup queue captured in docs/plans/local-oss-runner.md after the local-OSS-models epic landed. After this commit `go test -race ./...` is green across every package with zero `t.Skip` calls and no excluded tests. Real bugs fixed: - claude.go setupSandbox callsites used `sandboxDir, err := ...` which shadowed the outer variable, so BlockedError.SandboxDir was always empty. Resume-after-block was broken for both new and stale-sandbox paths. TestBlockedError_IncludesSandboxDir now exercises the right invariant. - TestPool_ActivePerAgent_DeletesZeroEntries flake under -race: the cleanup defer in execute()/executeResume() runs AFTER handleRunResult sends on resultCh, so consumers observing a result could see a still-counted activePerAgent entry. Extracted decActiveAgent(agentType, *cleaned) helper; called explicitly before every resultCh send, defer becomes a no-op via the cleaned flag. Verified clean over `go test -race -count=10`. Test infrastructure made hermetic: - gitSafe now also passes -c commit.gpgsign=false / -c tag.gpgsign=false so sandbox tests pass on hosts whose global config requires signing. - Bare repos in tests initialized with `-b main` (HEAD symbolic ref matched to the branch we push) so `git log` after push works. - TestSandboxCloneSource_FallsBackToOrigin uses a local-FS origin URL, matching sandboxCloneSource's intentional filter against network URLs. - TestGeminiLogs_ParsedCorrectly URL fixed to the actual log route (/api/executions/{id}/log). GeminiRunner gap closed (partial): - parseGeminiStream now walks lines for `result` events, surfacing is_error as an error and total_cost_usd as the float return value. - GeminiRunner.Run propagates parsed cost to Execution.CostUSD. - TestParseGeminiStream_ParsesStructuredOutput unskipped. Notes: - GeminiRunner is still simulated end-to-end (Run writes hardcoded stream data instead of execing the binary). The result/cost parser now exists; finishing the runner is a smaller, contained follow-up. Kept on the deferred queue. - Frontend "Local" agent option and a minor storage.db.go logger TODO remain on the deferred queue, both intentionally — neither blocks anything in flight. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J --- internal/executor/gemini.go | 87 ++++++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 25 deletions(-) (limited to 'internal/executor/gemini.go') diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go index d79c47d..7f2f54f 100644 --- a/internal/executor/gemini.go +++ b/internal/executor/gemini.go @@ -2,6 +2,7 @@ package executor import ( "context" + "encoding/json" "fmt" "io" "log/slog" @@ -117,16 +118,21 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, var streamErr error + var streamCost float64 var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() - _, streamErr = parseGeminiStream(stdoutR, stdoutFile, r.Logger) + streamCost, streamErr = parseGeminiStream(stdoutR, stdoutFile, r.Logger) stdoutR.Close() }() wg.Wait() // Wait for parseGeminiStream to finish + if streamCost > 0 { + e.CostUSD = streamCost + } + // Set a dummy exit code for this simulated run e.ExitCode = 0 @@ -136,9 +142,10 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, return nil } -// parseGeminiStream reads streaming JSON from the gemini CLI, unwraps markdown -// code blocks, writes the inner JSON to w, and returns (costUSD, error). -// For now, it focuses on unwrapping and writing, not detailed parsing of cost/errors. +// parseGeminiStream reads streaming JSON from the gemini CLI, strips markdown +// code fences if the output is wrapped in them, writes the inner stream-json +// to w, and returns (costUSD, error). If a `result` event has `is_error: true`, +// an error wrapping the result message is returned. func parseGeminiStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, error) { fullOutput, err := io.ReadAll(r) if err != nil { @@ -146,31 +153,61 @@ func parseGeminiStream(r io.Reader, w io.Writer, logger *slog.Logger) (float64, } logger.Debug("parseGeminiStream: raw output received", "output", string(fullOutput)) - outputStr := strings.TrimSpace(string(fullOutput)) // Trim leading/trailing whitespace/newlines from the whole output - - jsonContent := outputStr // Default to raw output if no markdown block is found or malformed - jsonStartIdx := strings.Index(outputStr, "```json") - if jsonStartIdx != -1 { - // Found "```json", now look for the closing "```" - jsonEndIdx := strings.LastIndex(outputStr, "```") - if jsonEndIdx != -1 && jsonEndIdx > jsonStartIdx { - // Extract content between the markdown fences. - jsonContent = outputStr[jsonStartIdx+len("```json"):jsonEndIdx] - jsonContent = strings.TrimSpace(jsonContent) // Trim again after extraction, to remove potential inner newlines - } else { - logger.Warn("Malformed markdown JSON block from Gemini (missing closing ``` or invalid structure), falling back to raw output.", "outputLength", len(outputStr)) + inner := stripGeminiFences(string(fullOutput), logger) + if _, writeErr := w.Write([]byte(inner)); writeErr != nil { + return 0, fmt.Errorf("writing gemini output: %w", writeErr) + } + + // Walk lines looking for a result event so we can surface errors and cost. + var ( + cost float64 + errMsg string + isError bool + ) + for _, raw := range strings.Split(inner, "\n") { + line := strings.TrimSpace(raw) + if line == "" { + continue + } + var evt struct { + Type string `json:"type"` + IsError bool `json:"is_error"` + Result string `json:"result"` + Cost float64 `json:"total_cost_usd"` + } + if err := json.Unmarshal([]byte(line), &evt); err != nil { + continue + } + if evt.Type == "result" { + if evt.Cost > 0 { + cost = evt.Cost + } + if evt.IsError { + isError = true + errMsg = evt.Result + } } - } else { - logger.Warn("No markdown JSON block found from Gemini, falling back to raw output.", "outputLength", len(outputStr)) } - - // Write the (possibly extracted and trimmed) JSON content to the writer. - _, writeErr := w.Write([]byte(jsonContent)) - if writeErr != nil { - return 0, fmt.Errorf("writing extracted gemini json: %w", writeErr) + if isError { + return cost, fmt.Errorf("gemini reported error: %s", errMsg) } + return cost, nil +} - return 0, nil // For now, no cost/error parsing for Gemini stream +// stripGeminiFences removes a surrounding ```json ... ``` markdown block if +// present, returning the trimmed inner content. If no markdown fence is +// found, the input is returned verbatim (no whitespace trimming) so callers +// that expect byte-exact pass-through behavior get it. +func stripGeminiFences(raw string, logger *slog.Logger) string { + trimmed := strings.TrimSpace(raw) + if start := strings.Index(trimmed, "```json"); start != -1 { + if end := strings.LastIndex(trimmed, "```"); end > start { + return strings.TrimSpace(trimmed[start+len("```json") : end]) + } + logger.Warn("malformed gemini markdown block (missing closing fence); using raw output", "len", len(trimmed)) + return trimmed + } + return raw } func (r *GeminiRunner) buildArgs(t *task.Task, e *storage.Execution, questionFile string) []string { -- cgit v1.2.3 From e7b382bf177cbe518af3d86c3ee6c49344d225f4 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 3 May 2026 08:00:20 +0000 Subject: chore: close deferred work — real GeminiRunner, Local UI option, db.go cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the three items left on the deferred queue after the post-epic cleanup. GeminiRunner.execOnce now actually executes the gemini binary instead of writing hardcoded stream data. Mirrors ClaudeRunner.execOnce: - exec.CommandContext with the same env vars (CLAUDOMATOR_API_URL etc.) - process group SIGKILL on context cancel - stdout piped through parseGeminiStream → stdoutFile - stderr to file - exit codes captured, stderr tail surfaced on failure Test infrastructure bug uncovered in passing: testServerWithGeminiMockRunner's mock script used double-quoted echo with literal triple-backticks, which bash interpreted as command substitution. The script always produced empty output. The bug was invisible until now because GeminiRunner ignored the script entirely. Switched to a single-quoted heredoc. Frontend: index.html dropdown gains a "Local" option. No JS branching needed — the value flows through to agent.type verbatim and downstream display reads the type string as-is. storage/db.go: removed stale debug-comment scaffolding (the "TODO: Replace with proper logger" block) that was tracking a dead `fmt.Printf` call. The path it commented on is fine without logging — unmarshal errors are returned wrapped. Test status: `go test -race ./...` green across every package, zero skips, zero excluded tests. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J --- docs/plans/local-oss-runner.md | 9 +++++++ internal/api/server_test.go | 23 ++++++++-------- internal/executor/gemini.go | 61 +++++++++++++++++++++++++++++++----------- internal/storage/db.go | 5 ---- web/index.html | 1 + 5 files changed, 67 insertions(+), 32 deletions(-) (limited to 'internal/executor/gemini.go') diff --git a/docs/plans/local-oss-runner.md b/docs/plans/local-oss-runner.md index 4d5cb87..4504bbb 100644 --- a/docs/plans/local-oss-runner.md +++ b/docs/plans/local-oss-runner.md @@ -222,6 +222,15 @@ Items not chased (deferred deliberately): - **Frontend "Local" agent option** — UI dropdown still says "Auto / Claude / Gemini". Pending token telemetry surface. - **`storage.db.go:706` TODO comment** — minor logger plumbing nit. Skipping unless it blocks something. +## Deferred work — DONE + +Follow-up commit closed the three deferred items above: + +- `GeminiRunner.execOnce` now invokes the actual `gemini` binary via `exec.CommandContext`, mirroring the `ClaudeRunner` pattern: pipe stdout to `parseGeminiStream`, kill the process group on context cancel, capture stderr to file, surface exit codes. Hardcoded simulation removed. +- Test infrastructure bug uncovered and fixed in passing: the mock gemini script in `testServerWithGeminiMockRunner` was using `"\``json\`"` which bash interpreted as command substitution, so the script always produced empty output. Switched to a single-quoted heredoc. The bug was masked previously because the runner ignored the script entirely. +- Frontend `index.html` dropdown gains a `Local` option. No JS branching changes needed — the value flows through to `agent.type` verbatim and downstream display reads the type string as-is. +- Stale debug-comment scaffolding around `storage.db.go:706` deleted. + --- # Phase 2 — Focused Plan (Elaboration) diff --git a/internal/api/server_test.go b/internal/api/server_test.go index 516e289..2139e36 100644 --- a/internal/api/server_test.go +++ b/internal/api/server_test.go @@ -143,20 +143,21 @@ func testServerWithGeminiMockRunner(t *testing.T) (*Server, *storage.DB) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})) - // Create the mock gemini binary script. + // Create the mock gemini binary script. Use single-quoted heredoc so + // bash does not try to evaluate the literal backticks as command + // substitution. mockBinDir := t.TempDir() mockGeminiPath := filepath.Join(mockBinDir, "mock-gemini-binary.sh") mockScriptContent := `#!/bin/bash -OUTPUT_FILE=$(mktemp) -echo "` + "```json" + `" > "$OUTPUT_FILE" -echo "{\"type\":\"content_block_start\",\"content_block\":{\"text\":\"Hello, Gemini!\",\"type\":\"text\"}}" >> "$OUTPUT_FILE" -echo "{\"type\":\"content_block_delta\",\"content_block\":{\"text\":\" How are you?\"}}" >> "$OUTPUT_FILE" -echo "{\"type\":\"content_block_end\"}" >> "$OUTPUT_FILE" -echo "{\"type\":\"message_delta\",\"message\":{\"role\":\"model\"}}" >> "$OUTPUT_FILE" -echo "{\"type\":\"message_end\"}" >> "$OUTPUT_FILE" -echo "` + "```" + `" >> "$OUTPUT_FILE" -cat "$OUTPUT_FILE" -rm "$OUTPUT_FILE" +cat <<'EOF' +` + "```json" + ` +{"type":"content_block_start","content_block":{"text":"Hello, Gemini!","type":"text"}} +{"type":"content_block_delta","content_block":{"text":" How are you?"}} +{"type":"content_block_end"} +{"type":"message_delta","message":{"role":"model"}} +{"type":"message_end"} +` + "```" + ` +EOF exit 0 ` if err := os.WriteFile(mockGeminiPath, []byte(mockScriptContent), 0755); err != nil { diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go index 7f2f54f..04382ae 100644 --- a/internal/executor/gemini.go +++ b/internal/executor/gemini.go @@ -7,9 +7,11 @@ import ( "io" "log/slog" "os" + "os/exec" "path/filepath" "strings" "sync" + "syscall" "github.com/thepeterstone/claudomator/internal/storage" "github.com/thepeterstone/claudomator/internal/task" @@ -84,8 +86,18 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi } func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, projectDir string, e *storage.Execution) error { - // Temporarily bypass external command execution to debug pipe. - // We will simulate outputting to stdoutW directly. + cmd := exec.CommandContext(ctx, r.binaryPath(), args...) + cmd.Env = append(os.Environ(), + "CLAUDOMATOR_API_URL="+r.APIURL, + "CLAUDOMATOR_TASK_ID="+e.TaskID, + "CLAUDOMATOR_PROJECT_DIR="+projectDir, + "CLAUDOMATOR_QUESTION_FILE="+filepath.Join(e.ArtifactDir, "question.json"), + "CLAUDOMATOR_SUMMARY_FILE="+filepath.Join(e.ArtifactDir, "summary.txt"), + ) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + if workingDir != "" { + cmd.Dir = workingDir + } stdoutFile, err := os.Create(e.StdoutPath) if err != nil { @@ -103,22 +115,27 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, if err != nil { return fmt.Errorf("creating stdout pipe: %w", err) } + cmd.Stdout = stdoutW + cmd.Stderr = stderrFile - // Simulate writing to stdoutW + if err := cmd.Start(); err != nil { + stdoutW.Close() + stdoutR.Close() + return fmt.Errorf("starting gemini: %w", err) + } + stdoutW.Close() + + killDone := make(chan struct{}) go func() { - defer stdoutW.Close() // Close the writer when done. - fmt.Fprintf(stdoutW, "```json\n") - fmt.Fprintf(stdoutW, "{\"type\":\"content_block_start\",\"content_block\":{\"text\":\"Hello, Gemini!\",\"type\":\"text\"}}\n") - fmt.Fprintf(stdoutW, "{\"type\":\"content_block_delta\",\"content_block\":{\"text\":\" How are you?\"}}\n") - fmt.Fprintf(stdoutW, "{\"type\":\"content_block_end\"}\n") - fmt.Fprintf(stdoutW, "{\"type\":\"message_delta\",\"message\":{\"role\":\"model\"}}\n") - fmt.Fprintf(stdoutW, "{\"type\":\"message_end\"}\n") - fmt.Fprintf(stdoutW, "```\n") + select { + case <-ctx.Done(): + syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + case <-killDone: + } }() - - var streamErr error var streamCost float64 + var streamErr error var wg sync.WaitGroup wg.Add(1) go func() { @@ -127,14 +144,26 @@ func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, stdoutR.Close() }() - wg.Wait() // Wait for parseGeminiStream to finish + waitErr := cmd.Wait() + close(killDone) + wg.Wait() if streamCost > 0 { e.CostUSD = streamCost } - // Set a dummy exit code for this simulated run - e.ExitCode = 0 + if waitErr != nil { + if exitErr, ok := waitErr.(*exec.ExitError); ok { + e.ExitCode = exitErr.ExitCode() + } + if streamErr != nil { + return streamErr + } + if tail := tailFile(e.StderrPath, 20); tail != "" { + return fmt.Errorf("gemini exited with error: %w\nstderr:\n%s", waitErr, tail) + } + return fmt.Errorf("gemini exited with error: %w", waitErr) + } if streamErr != nil { return streamErr diff --git a/internal/storage/db.go b/internal/storage/db.go index c871c77..ce60e2f 100644 --- a/internal/storage/db.go +++ b/internal/storage/db.go @@ -699,11 +699,6 @@ func scanTask(row scanner) (*task.Task, error) { t.State = task.State(state) t.Priority = task.Priority(priority) t.Timeout.Duration = time.Duration(timeoutNS) - // Add debug log for configJSON - // The logger is not available directly in db.go, so I'll use fmt.Printf for now. - // For production code, a logger should be injected. - // fmt.Printf("DEBUG: configJSON from DB: %s\n", configJSON) - // TODO: Replace with proper logger when available. if err := json.Unmarshal([]byte(configJSON), &t.Agent); err != nil { return nil, fmt.Errorf("unmarshaling agent config: %w", err) } diff --git a/web/index.html b/web/index.html index 1746baf..7c0b030 100644 --- a/web/index.html +++ b/web/index.html @@ -16,6 +16,7 @@ + -- cgit v1.2.3 From e7171181fff10c66b2b74eabfb1fc94b3cfbb4fb Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 21:03:30 +0000 Subject: feat(executor): bring GeminiRunner to sandbox-flow parity with Claude MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All coding tasks now follow the same flow regardless of runner: when project_dir is set, the agent runs in a temp clone, not in the user's working tree. On success, edits are autocommitted and pushed back to origin/master and the sandbox is removed. On failure or BLOCKED, the sandbox is preserved and its path surfaces in the error / BlockedError so the user can inspect partial work or resume in place. Before this commit, GeminiRunner.Run set cmd.Dir to project_dir directly, so an agent run could leave half-done edits in the user's working tree with no rollback. ClaudeRunner has had the full sandbox flow for a while; this commit closes the gap. Reused the existing package-level helpers from claude.go verbatim: setupSandbox, teardownSandbox, sandboxCloneSource, gitSafe, plus the resume/stale-sandbox/blocked-error patterns. No new shared abstraction needed — same package. LocalRunner intentionally not changed. The OpenAI chat path has no tool use, so the agent can't edit files; sandbox would be theater. Tests (6 new): - Run_ProjectDir_RunsInSandbox: cwd captured by fake binary is a sandbox path, not project_dir. - Run_BlockedError_IncludesSandboxDir: when question.json appears, BlockedError.SandboxDir is set and the dir exists. - Run_ExecError_PreservesSandbox: failing exit wraps error with "(sandbox preserved at )" and the path exists on disk. - Run_ResumeUsesStoredSandboxDir: ResumeSessionID + SandboxDir → runs in that dir without re-cloning. - Run_StaleSandboxDir_ClonesAfresh: resume pointing at missing dir falls back to a fresh clone from project_dir. - Run_NoProjectDir_SkipsSandbox: tasks without project_dir don't trigger sandbox setup. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J --- internal/executor/gemini.go | 96 ++++++++++++-- internal/executor/gemini_test.go | 268 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 353 insertions(+), 11 deletions(-) (limited to 'internal/executor/gemini.go') diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go index 04382ae..3abec05 100644 --- a/internal/executor/gemini.go +++ b/internal/executor/gemini.go @@ -40,11 +40,21 @@ func (r *GeminiRunner) binaryPath() string { return "gemini" } -// Run executes a gemini invocation, streaming output to log files. +// Run executes the gemini CLI inside a sandboxed clone of project_dir. +// When project_dir is set, claudomator first clones it into a temp sandbox +// (preferring a `local` bare remote, then `origin`, then the working tree) +// and runs the agent there. On success the sandbox is autocommitted and +// pushed back to origin/master, then removed. On failure the sandbox is +// preserved and its path is included in the returned error so the user can +// inspect partial work. If the agent writes a question file before exiting, +// Run returns *BlockedError with SandboxDir populated so a resume execution +// can pick up in the same directory. func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error { - if t.Agent.ProjectDir != "" { - if _, err := os.Stat(t.Agent.ProjectDir); err != nil { - return fmt.Errorf("project_dir %q: %w", t.Agent.ProjectDir, err) + projectDir := t.Agent.ProjectDir + + if projectDir != "" { + if _, err := os.Stat(projectDir); err != nil { + return fmt.Errorf("project_dir %q: %w", projectDir, err) } } @@ -63,24 +73,88 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi } if e.SessionID == "" { - e.SessionID = e.ID + if e.ResumeSessionID != "" { + e.SessionID = e.ResumeSessionID + } else { + e.SessionID = e.ID + } + } + + // Sandbox setup: for new executions with a project_dir, clone into a sandbox. + // Resume executions reuse the preserved sandbox so any partial work survives. + // If the preserved sandbox is missing (e.g. /tmp was purged), clone fresh. + var sandboxDir string + var startHEAD string + effectiveWorkingDir := projectDir + if e.ResumeSessionID != "" { + if e.SandboxDir != "" { + if _, statErr := os.Stat(e.SandboxDir); statErr == nil { + effectiveWorkingDir = e.SandboxDir + } else { + r.Logger.Warn("preserved sandbox missing, cloning fresh", "sandbox", e.SandboxDir, "project_dir", projectDir) + e.SandboxDir = "" + if projectDir != "" { + var err error + sandboxDir, err = setupSandbox(projectDir, r.Logger) + if err != nil { + return fmt.Errorf("setting up sandbox: %w", err) + } + effectiveWorkingDir = sandboxDir + r.Logger.Info("fresh sandbox created for resume", "sandbox", sandboxDir, "project_dir", projectDir) + } + } + } + } else if projectDir != "" { + var err error + sandboxDir, err = setupSandbox(projectDir, r.Logger) + if err != nil { + return fmt.Errorf("setting up sandbox: %w", err) + } + effectiveWorkingDir = sandboxDir + r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir) + } + + if effectiveWorkingDir != "" { + headOut, _ := exec.Command("git", gitSafe("-C", effectiveWorkingDir, "rev-parse", "HEAD")...).Output() + startHEAD = strings.TrimSpace(string(headOut)) } questionFile := filepath.Join(logDir, "question.json") args := r.buildArgs(t, e, questionFile) - // Gemini CLI doesn't necessarily have the same rate limiting behavior as Claude, - // but we'll use a similar execution pattern. - err := r.execOnce(ctx, args, t.Agent.ProjectDir, t.Agent.ProjectDir, e) - if err != nil { + if err := r.execOnce(ctx, args, effectiveWorkingDir, projectDir, e); err != nil { + if sandboxDir != "" { + return fmt.Errorf("%w (sandbox preserved at %s)", err, sandboxDir) + } return err } // Check whether the agent left a question before exiting. data, readErr := os.ReadFile(questionFile) if readErr == nil { - os.Remove(questionFile) // consumed - return &BlockedError{QuestionJSON: strings.TrimSpace(string(data)), SessionID: e.SessionID} + os.Remove(questionFile) + questionJSON := strings.TrimSpace(string(data)) + if isCompletionReport(questionJSON) { + r.Logger.Info("treating question file as completion report", "taskID", e.TaskID) + e.Summary = extractQuestionText(questionJSON) + } else { + // Preserve sandbox on BLOCKED so a resume can pick up in the same dir. + return &BlockedError{QuestionJSON: questionJSON, SessionID: e.SessionID, SandboxDir: sandboxDir} + } + } + + // Read agent summary if written. + summaryFile := filepath.Join(logDir, "summary.txt") + if summaryData, readErr := os.ReadFile(summaryFile); readErr == nil { + os.Remove(summaryFile) + e.Summary = strings.TrimSpace(string(summaryData)) + } + + // Merge sandbox back to project_dir and clean up. + if sandboxDir != "" { + if mergeErr := teardownSandbox(projectDir, sandboxDir, startHEAD, r.Logger, e); mergeErr != nil { + return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir) + } } return nil } diff --git a/internal/executor/gemini_test.go b/internal/executor/gemini_test.go index 4b0339e..cd11ebc 100644 --- a/internal/executor/gemini_test.go +++ b/internal/executor/gemini_test.go @@ -3,8 +3,11 @@ package executor import ( "bytes" "context" + "errors" "io" "log/slog" + "os" + "path/filepath" "strings" "testing" @@ -177,3 +180,268 @@ func TestParseGeminiStream_ParsesStructuredOutput(t *testing.T) { t.Errorf("writer content mismatch:\nwant:\n%s\ngot:\n%s", expectedWriterContent, writer.String()) } } + +// TestGeminiRunner_Run_ProjectDir_RunsInSandbox verifies that when project_dir +// is set, the gemini subprocess runs inside a sandbox clone — not in +// project_dir itself. +func TestGeminiRunner_Run_ProjectDir_RunsInSandbox(t *testing.T) { + projectDir := t.TempDir() + initGitRepo(t, projectDir) + + logDir := t.TempDir() + cwdFile := filepath.Join(logDir, "gemini-cwd.txt") + + // Fake gemini binary that records its $PWD then exits 0. + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do work", + ProjectDir: projectDir, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "sandbox-exec", TaskID: "task-1"} + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + cwd := string(got) + if cwd == projectDir { + t.Errorf("ran directly in project_dir; expected sandbox clone (cwd=%q)", cwd) + } + // Sandbox should be removed after successful teardown (no edits → nothing to push). + // We can't assert the exact dir, but it should not be projectDir. +} + +// TestGeminiRunner_Run_BlockedError_IncludesSandboxDir verifies that when the +// agent writes a question file before exiting, the BlockedError carries the +// sandbox path so resume runs in the same dir. +func TestGeminiRunner_Run_BlockedError_IncludesSandboxDir(t *testing.T) { + src := t.TempDir() + initGitRepo(t, src) + logDir := t.TempDir() + + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + if err := os.WriteFile(scriptPath, []byte(`#!/bin/sh +if [ -n "$CLAUDOMATOR_QUESTION_FILE" ]; then + printf '{"text":"Should I continue?"}' > "$CLAUDOMATOR_QUESTION_FILE" +fi +`), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do something", + ProjectDir: src, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "blocked-gemini-exec", TaskID: "task-1"} + + err := r.Run(context.Background(), tk, e) + + var blocked *BlockedError + if !errors.As(err, &blocked) { + t.Fatalf("expected BlockedError, got: %v", err) + } + if blocked.SandboxDir == "" { + t.Error("BlockedError.SandboxDir should be set when gemini task runs in a sandbox") + } + if _, statErr := os.Stat(blocked.SandboxDir); os.IsNotExist(statErr) { + t.Error("sandbox directory should be preserved when blocked") + } else { + os.RemoveAll(blocked.SandboxDir) + } +} + +// TestGeminiRunner_Run_ExecError_PreservesSandbox verifies that when gemini +// exits non-zero, the sandbox path is included in the wrapped error so the +// user can inspect partial work. +func TestGeminiRunner_Run_ExecError_PreservesSandbox(t *testing.T) { + src := t.TempDir() + initGitRepo(t, src) + logDir := t.TempDir() + + // "false" exits 1, no output. + r := &GeminiRunner{ + BinaryPath: "false", + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "do something", + ProjectDir: src, + SkipPlanning: true, + }, + } + e := &storage.Execution{ID: "err-gemini-exec", TaskID: "task-1"} + + err := r.Run(context.Background(), tk, e) + if err == nil { + t.Fatal("expected error from failing gemini exit") + } + if !strings.Contains(err.Error(), "sandbox preserved at ") { + t.Errorf("expected error to include sandbox path; got: %v", err) + } + // Extract path and verify it exists. + idx := strings.Index(err.Error(), "sandbox preserved at ") + rest := err.Error()[idx+len("sandbox preserved at "):] + rest = strings.TrimSuffix(rest, ")") + rest = strings.TrimSpace(rest) + if _, statErr := os.Stat(rest); os.IsNotExist(statErr) { + t.Errorf("sandbox path from error should exist on disk: %q", rest) + } else { + os.RemoveAll(rest) + } +} + +// TestGeminiRunner_Run_ResumeUsesStoredSandboxDir verifies that a resume +// execution runs in the preserved SandboxDir rather than cloning fresh. +func TestGeminiRunner_Run_ResumeUsesStoredSandboxDir(t *testing.T) { + logDir := t.TempDir() + sandboxDir := t.TempDir() + initGitRepo(t, sandboxDir) + cwdFile := filepath.Join(logDir, "cwd.txt") + + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + SkipPlanning: true, + }, + } + e := &storage.Execution{ + ID: "resume-gemini-1", + TaskID: "task-resume", + ResumeSessionID: "session-abc", + SandboxDir: sandboxDir, + } + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run with preserved sandbox: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + if string(got) != sandboxDir { + t.Errorf("resume should run in preserved sandbox; got cwd=%q want %q", got, sandboxDir) + } +} + +// TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh verifies that a resume +// pointing at a missing sandbox falls back to cloning a fresh sandbox from +// project_dir rather than failing outright. +func TestGeminiRunner_Run_StaleSandboxDir_ClonesAfresh(t *testing.T) { + logDir := t.TempDir() + projectDir := t.TempDir() + initGitRepo(t, projectDir) + + cwdFile := filepath.Join(logDir, "cwd.txt") + scriptPath := filepath.Join(t.TempDir(), "fake-gemini.sh") + script := "#!/bin/sh\nprintf '%s' \"$PWD\" > " + cwdFile + "\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write script: %v", err) + } + + r := &GeminiRunner{ + BinaryPath: scriptPath, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + ProjectDir: projectDir, + SkipPlanning: true, + }, + } + staleSandbox := filepath.Join(t.TempDir(), "gone") + e := &storage.Execution{ + ID: "resume-gemini-2", + TaskID: "task-stale", + ResumeSessionID: "session-xyz", + SandboxDir: staleSandbox, + } + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run with stale sandbox: %v", err) + } + + got, err := os.ReadFile(cwdFile) + if err != nil { + t.Fatalf("cwd file not written: %v", err) + } + cwd := string(got) + if cwd == staleSandbox { + t.Error("ran in stale (nonexistent) sandbox dir") + } + if cwd == projectDir { + t.Error("ran directly in project_dir; expected a fresh sandbox clone") + } +} + +// TestGeminiRunner_Run_NoProjectDir_SkipsSandbox verifies that a task with no +// project_dir doesn't trigger sandbox setup (matches LocalRunner/non-coding +// task semantics). +func TestGeminiRunner_Run_NoProjectDir_SkipsSandbox(t *testing.T) { + logDir := t.TempDir() + + r := &GeminiRunner{ + BinaryPath: "true", // exits 0, no output + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + tk := &task.Task{ + Agent: task.AgentConfig{ + Type: "gemini", + Instructions: "summarize: 2+2", + SkipPlanning: true, + // No ProjectDir + }, + } + e := &storage.Execution{ID: "no-pd-gemini", TaskID: "task-nopd"} + + if err := r.Run(context.Background(), tk, e); err != nil { + t.Fatalf("Run without project_dir: %v", err) + } + if e.SandboxDir != "" { + t.Errorf("SandboxDir should be empty for tasks without project_dir, got %q", e.SandboxDir) + } +} -- cgit v1.2.3