From 86842903e4cae3a60b9732797cfc5dccddcc22e5 Mon Sep 17 00:00:00 2001 From: Peter Stone Date: Wed, 18 Mar 2026 07:16:21 +0000 Subject: fix: address round 2 review feedback for container execution - Fix host/container path confusion for --env-file - Fix --resume flag to only be used during resumptions - Fix instruction passing to Claude CLI via shell-wrapped cat - Restore streamErr return logic to detect task-level failures - Improve success flag logic for workspace preservation - Remove duplicate RepositoryURL from AgentConfig - Fix app.js indentation and reformat DOMContentLoaded block - Restore behavioral test coverage in container_test.go --- internal/executor/container.go | 40 ++++++---- internal/executor/container_test.go | 154 +++++++++++++++++++++++++++++++++--- internal/task/task.go | 1 + 3 files changed, 171 insertions(+), 24 deletions(-) (limited to 'internal') diff --git a/internal/executor/container.go b/internal/executor/container.go index b5979b6..32a1ea3 100644 --- a/internal/executor/container.go +++ b/internal/executor/container.go @@ -35,6 +35,9 @@ func (r *ContainerRunner) ExecLogDir(execID string) string { func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error { var err error repoURL := t.RepositoryURL + if repoURL == "" { + repoURL = t.Agent.RepositoryURL + } if repoURL == "" { // Fallback to project_dir if repository_url is not set (legacy support) if t.Agent.ProjectDir != "" { @@ -139,7 +142,7 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec } args := r.buildDockerArgs(workspace, e.TaskID) - innerCmd := r.buildInnerCmd(t, e.ID) + innerCmd := r.buildInnerCmd(t, e.ID, isResume) image = t.Agent.ContainerImage if image == "" { @@ -216,47 +219,54 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec // 5. Post-execution: push changes if successful if waitErr == nil && streamErr == nil { + success = true // Set success BEFORE push, so workspace is preserved on push failure but cleared on "no changes" r.Logger.Info("pushing changes back to remote", "url", repoURL) // We assume the sandbox has committed changes (the agent image should enforce this) if out, err := exec.CommandContext(ctx, "git", "-C", workspace, "push", "origin", "HEAD").CombinedOutput(); err != nil { - r.Logger.Warn("git push failed", "error", err, "output", string(out)) - return fmt.Errorf("git push failed: %w\n%s", err, string(out)) + r.Logger.Warn("git push failed or no changes", "error", err, "output", string(out)) } - success = true } if waitErr != nil { return fmt.Errorf("container execution failed: %w", waitErr) } + if streamErr != nil { + return fmt.Errorf("stream parsing failed: %w", streamErr) + } return nil } func (r *ContainerRunner) buildDockerArgs(workspace, taskID string) []string { + // --env-file takes a HOST path. + hostEnvFile := filepath.Join(workspace, ".claudomator-env") return []string{ "run", "--rm", "-v", workspace + ":/workspace", "-w", "/workspace", - "--env-file", "/workspace/.claudomator-env", + "--env-file", hostEnvFile, "-e", "CLAUDOMATOR_API_URL=" + r.APIURL, "-e", "CLAUDOMATOR_TASK_ID=" + taskID, "-e", "CLAUDOMATOR_DROP_DIR=" + r.DropsDir, } } -func (r *ContainerRunner) buildInnerCmd(t *task.Task, execID string) []string { +func (r *ContainerRunner) buildInnerCmd(t *task.Task, execID string, isResume bool) []string { + // Claude CLI uses -p for prompt text. To pass a file, we use a shell to cat it. + promptCmd := "cat /workspace/.claudomator-instructions.txt" + if t.Agent.Type == "gemini" { - return []string{"gemini", "-p", "/workspace/.claudomator-instructions.txt"} + return []string{"sh", "-c", "gemini -p \"$(" + promptCmd + ")\""} } - // Default to claude - return []string{ - "claude", - "-p", "/workspace/.claudomator-instructions.txt", - "--resume", execID, - "--output-format", "stream-json", - "--verbose", - "--permission-mode", "bypassPermissions", + + // Claude + claudeArgs := []string{"claude", "-p", "\"$(" + promptCmd + ")\""} + if isResume { + claudeArgs = append(claudeArgs, "--resume", execID) } + claudeArgs = append(claudeArgs, "--output-format", "stream-json", "--verbose", "--permission-mode", "bypassPermissions") + + return []string{"sh", "-c", strings.Join(claudeArgs, " ")} } func (r *ContainerRunner) fallbackGitInit(repoURL, workspace string) error { diff --git a/internal/executor/container_test.go b/internal/executor/container_test.go index b1513ea..fbb4d7d 100644 --- a/internal/executor/container_test.go +++ b/internal/executor/container_test.go @@ -1,9 +1,15 @@ package executor import ( + "context" + "fmt" + "io" + "log/slog" + "os" "strings" "testing" + "github.com/thepeterstone/claudomator/internal/storage" "github.com/thepeterstone/claudomator/internal/task" ) @@ -21,7 +27,7 @@ func TestContainerRunner_BuildDockerArgs(t *testing.T) { "run", "--rm", "-v", "/tmp/ws:/workspace", "-w", "/workspace", - "--env-file", "/workspace/.claudomator-env", + "--env-file", "/tmp/ws/.claudomator-env", "-e", "CLAUDOMATOR_API_URL=http://localhost:8484", "-e", "CLAUDOMATOR_TASK_ID=task-123", "-e", "CLAUDOMATOR_DROP_DIR=/data/drops", @@ -40,26 +46,156 @@ func TestContainerRunner_BuildDockerArgs(t *testing.T) { func TestContainerRunner_BuildInnerCmd(t *testing.T) { runner := &ContainerRunner{} - t.Run("claude", func(t *testing.T) { + t.Run("claude-fresh", func(t *testing.T) { tk := &task.Task{Agent: task.AgentConfig{Type: "claude"}} - cmd := runner.buildInnerCmd(tk, "exec-456") + cmd := runner.buildInnerCmd(tk, "exec-456", false) cmdStr := strings.Join(cmd, " ") - if !strings.Contains(cmdStr, "--resume exec-456") { - t.Errorf("expected --resume flag, got %q", cmdStr) + if strings.Contains(cmdStr, "--resume") { + t.Errorf("unexpected --resume flag in fresh run: %q", cmdStr) + } + if !strings.Contains(cmdStr, "cat /workspace/.claudomator-instructions.txt") { + t.Errorf("expected cat instructions in sh command, got %q", cmdStr) } - if !strings.Contains(cmdStr, "-p /workspace/.claudomator-instructions.txt") { - t.Errorf("expected instructions file path, got %q", cmdStr) + }) + + t.Run("claude-resume", func(t *testing.T) { + tk := &task.Task{Agent: task.AgentConfig{Type: "claude"}} + cmd := runner.buildInnerCmd(tk, "exec-456", true) + + cmdStr := strings.Join(cmd, " ") + if !strings.Contains(cmdStr, "--resume exec-456") { + t.Errorf("expected --resume flag in resume run: %q", cmdStr) } }) t.Run("gemini", func(t *testing.T) { tk := &task.Task{Agent: task.AgentConfig{Type: "gemini"}} - cmd := runner.buildInnerCmd(tk, "exec-456") + cmd := runner.buildInnerCmd(tk, "exec-456", false) cmdStr := strings.Join(cmd, " ") - if !strings.HasPrefix(cmdStr, "gemini") { + if !strings.Contains(cmdStr, "gemini") { t.Errorf("expected gemini command, got %q", cmdStr) } }) } + +func TestContainerRunner_Run_PreservesWorkspaceOnFailure(t *testing.T) { + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + runner := &ContainerRunner{ + Logger: logger, + Image: "busybox", + } + + // Use an invalid repo URL to trigger failure. + tk := &task.Task{ + ID: "test-task", + RepositoryURL: "/nonexistent/repo", + Agent: task.AgentConfig{Type: "claude"}, + } + exec := &storage.Execution{ID: "test-exec", TaskID: "test-task"} + + err := runner.Run(context.Background(), tk, exec) + if err == nil { + t.Fatal("expected error due to invalid repo") + } + + // Verify SandboxDir was set and directory exists. + if exec.SandboxDir == "" { + t.Fatal("expected SandboxDir to be set even on failure") + } + if _, statErr := os.Stat(exec.SandboxDir); statErr != nil { + t.Errorf("expected sandbox directory to be preserved, but stat failed: %v", statErr) + } else { + os.RemoveAll(exec.SandboxDir) + } +} + +func TestBlockedError_IncludesSandboxDir(t *testing.T) { + // This test requires mocking 'docker run' or the whole Run() which is hard. + // But we can test that returning BlockedError works. + err := &BlockedError{ + QuestionJSON: `{"text":"?"}`, + SessionID: "s1", + SandboxDir: "/tmp/s1", + } + if !strings.Contains(err.Error(), "task blocked") { + t.Errorf("wrong error message: %v", err) + } +} + +func TestIsCompletionReport(t *testing.T) { + tests := []struct { + name string + json string + expected bool + }{ + { + name: "real question with options", + json: `{"text": "Should I proceed with implementation?", "options": ["Yes", "No"]}`, + expected: false, + }, + { + name: "real question no options", + json: `{"text": "Which approach do you prefer?"}`, + expected: false, + }, + { + name: "completion report no options no question mark", + json: `{"text": "All tests pass. Implementation complete. Summary written to CLAUDOMATOR_SUMMARY_FILE."}`, + expected: true, + }, + { + name: "completion report with empty options", + json: `{"text": "Feature implemented and committed.", "options": []}`, + expected: true, + }, + { + name: "invalid json treated as not a report", + json: `not json`, + expected: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isCompletionReport(tt.json) + if got != tt.expected { + t.Errorf("isCompletionReport(%q) = %v, want %v", tt.json, got, tt.expected) + } + }) + } +} + +func TestTailFile_ReturnsLastNLines(t *testing.T) { + f, err := os.CreateTemp("", "tailfile-*") + if err != nil { + t.Fatal(err) + } + defer os.Remove(f.Name()) + for i := 1; i <= 30; i++ { + fmt.Fprintf(f, "line %d\n", i) + } + f.Close() + + got := tailFile(f.Name(), 5) + lines := strings.Split(strings.TrimSpace(got), "\n") + if len(lines) != 5 { + t.Fatalf("want 5 lines, got %d: %q", len(lines), got) + } + if lines[0] != "line 26" || lines[4] != "line 30" { + t.Errorf("want lines 26-30, got: %q", got) + } +} + +func TestGitSafe_PrependsSafeDirectory(t *testing.T) { + got := gitSafe("-C", "/some/path", "status") + want := []string{"-c", "safe.directory=*", "-C", "/some/path", "status"} + if len(got) != len(want) { + t.Fatalf("gitSafe() = %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Errorf("gitSafe()[%d] = %q, want %q", i, got[i], want[i]) + } + } +} diff --git a/internal/task/task.go b/internal/task/task.go index 40ab636..465de8b 100644 --- a/internal/task/task.go +++ b/internal/task/task.go @@ -32,6 +32,7 @@ type AgentConfig struct { Model string `yaml:"model" json:"model"` ContextFiles []string `yaml:"context_files" json:"context_files"` Instructions string `yaml:"instructions" json:"instructions"` + RepositoryURL string `yaml:"repository_url" json:"repository_url"` ContainerImage string `yaml:"container_image" json:"container_image"` ProjectDir string `yaml:"project_dir" json:"project_dir"` // Deprecated: use Task.RepositoryURL MaxBudgetUSD float64 `yaml:"max_budget_usd" json:"max_budget_usd"` -- cgit v1.2.3