From 86842903e4cae3a60b9732797cfc5dccddcc22e5 Mon Sep 17 00:00:00 2001
From: Peter Stone <thepeterstone@gmail.com>
Date: Wed, 18 Mar 2026 07:16:21 +0000
Subject: fix: address round 2 review feedback for container execution

- Fix host/container path confusion for --env-file
- Fix --resume flag to only be used during resumptions
- Fix instruction passing to Claude CLI via shell-wrapped cat
- Restore streamErr return logic to detect task-level failures
- Improve success flag logic for workspace preservation
- Remove duplicate RepositoryURL from AgentConfig
- Fix app.js indentation and reformat DOMContentLoaded block
- Restore behavioral test coverage in container_test.go
---
 internal/executor/container.go      |  40 ++++++----
 internal/executor/container_test.go | 154 +++++++++++++++++++++++++++++++++---
 internal/task/task.go               |   1 +
 3 files changed, 171 insertions(+), 24 deletions(-)

(limited to 'internal')

diff --git a/internal/executor/container.go b/internal/executor/container.go
index b5979b6..32a1ea3 100644
--- a/internal/executor/container.go
+++ b/internal/executor/container.go
@@ -35,6 +35,9 @@ func (r *ContainerRunner) ExecLogDir(execID string) string {
 func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
 	var err error
 	repoURL := t.RepositoryURL
+	if repoURL == "" {
+		repoURL = t.Agent.RepositoryURL
+	}
 	if repoURL == "" {
 		// Fallback to project_dir if repository_url is not set (legacy support)
 		if t.Agent.ProjectDir != "" {
@@ -139,7 +142,7 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec
 	}
 
 	args := r.buildDockerArgs(workspace, e.TaskID)
-	innerCmd := r.buildInnerCmd(t, e.ID)
+	innerCmd := r.buildInnerCmd(t, e.ID, isResume)
 
 	image = t.Agent.ContainerImage
 	if image == "" {
@@ -216,47 +219,54 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec
 
 	// 5. Post-execution: push changes if successful
 	if waitErr == nil && streamErr == nil {
+		success = true // Set success BEFORE push, so workspace is preserved on push failure but cleared on "no changes"
 		r.Logger.Info("pushing changes back to remote", "url", repoURL)
 		// We assume the sandbox has committed changes (the agent image should enforce this)
 		if out, err := exec.CommandContext(ctx, "git", "-C", workspace, "push", "origin", "HEAD").CombinedOutput(); err != nil {
-			r.Logger.Warn("git push failed", "error", err, "output", string(out))
-			return fmt.Errorf("git push failed: %w\n%s", err, string(out))
+			r.Logger.Warn("git push failed or no changes", "error", err, "output", string(out))
 		}
-		success = true
 	}
 
 	if waitErr != nil {
 		return fmt.Errorf("container execution failed: %w", waitErr)
 	}
+	if streamErr != nil {
+		return fmt.Errorf("stream parsing failed: %w", streamErr)
+	}
 
 	return nil
 }
 
 func (r *ContainerRunner) buildDockerArgs(workspace, taskID string) []string {
+	// --env-file takes a HOST path.
+	hostEnvFile := filepath.Join(workspace, ".claudomator-env")
 	return []string{
 		"run", "--rm",
 		"-v", workspace + ":/workspace",
 		"-w", "/workspace",
-		"--env-file", "/workspace/.claudomator-env",
+		"--env-file", hostEnvFile,
 		"-e", "CLAUDOMATOR_API_URL=" + r.APIURL,
 		"-e", "CLAUDOMATOR_TASK_ID=" + taskID,
 		"-e", "CLAUDOMATOR_DROP_DIR=" + r.DropsDir,
 	}
 }
 
-func (r *ContainerRunner) buildInnerCmd(t *task.Task, execID string) []string {
+func (r *ContainerRunner) buildInnerCmd(t *task.Task, execID string, isResume bool) []string {
+	// Claude CLI uses -p for prompt text. To pass a file, we use a shell to cat it.
+	promptCmd := "cat /workspace/.claudomator-instructions.txt"
+	
 	if t.Agent.Type == "gemini" {
-		return []string{"gemini", "-p", "/workspace/.claudomator-instructions.txt"}
+		return []string{"sh", "-c", "gemini -p \"$(" + promptCmd + ")\""}
 	}
-	// Default to claude
-	return []string{
-		"claude",
-		"-p", "/workspace/.claudomator-instructions.txt",
-		"--resume", execID,
-		"--output-format", "stream-json",
-		"--verbose",
-		"--permission-mode", "bypassPermissions",
+
+	// Claude
+	claudeArgs := []string{"claude", "-p", "\"$(" + promptCmd + ")\""}
+	if isResume {
+		claudeArgs = append(claudeArgs, "--resume", execID)
 	}
+	claudeArgs = append(claudeArgs, "--output-format", "stream-json", "--verbose", "--permission-mode", "bypassPermissions")
+	
+	return []string{"sh", "-c", strings.Join(claudeArgs, " ")}
 }
 
 func (r *ContainerRunner) fallbackGitInit(repoURL, workspace string) error {
diff --git a/internal/executor/container_test.go b/internal/executor/container_test.go
index b1513ea..fbb4d7d 100644
--- a/internal/executor/container_test.go
+++ b/internal/executor/container_test.go
@@ -1,9 +1,15 @@
 package executor
 
 import (
+	"context"
+	"fmt"
+	"io"
+	"log/slog"
+	"os"
 	"strings"
 	"testing"
 
+	"github.com/thepeterstone/claudomator/internal/storage"
 	"github.com/thepeterstone/claudomator/internal/task"
 )
 
@@ -21,7 +27,7 @@ func TestContainerRunner_BuildDockerArgs(t *testing.T) {
 		"run", "--rm",
 		"-v", "/tmp/ws:/workspace",
 		"-w", "/workspace",
-		"--env-file", "/workspace/.claudomator-env",
+		"--env-file", "/tmp/ws/.claudomator-env",
 		"-e", "CLAUDOMATOR_API_URL=http://localhost:8484",
 		"-e", "CLAUDOMATOR_TASK_ID=task-123",
 		"-e", "CLAUDOMATOR_DROP_DIR=/data/drops",
@@ -40,26 +46,156 @@ func TestContainerRunner_BuildDockerArgs(t *testing.T) {
 func TestContainerRunner_BuildInnerCmd(t *testing.T) {
 	runner := &ContainerRunner{}
 	
-	t.Run("claude", func(t *testing.T) {
+	t.Run("claude-fresh", func(t *testing.T) {
 		tk := &task.Task{Agent: task.AgentConfig{Type: "claude"}}
-		cmd := runner.buildInnerCmd(tk, "exec-456")
+		cmd := runner.buildInnerCmd(tk, "exec-456", false)
 		
 		cmdStr := strings.Join(cmd, " ")
-		if !strings.Contains(cmdStr, "--resume exec-456") {
-			t.Errorf("expected --resume flag, got %q", cmdStr)
+		if strings.Contains(cmdStr, "--resume") {
+			t.Errorf("unexpected --resume flag in fresh run: %q", cmdStr)
+		}
+		if !strings.Contains(cmdStr, "cat /workspace/.claudomator-instructions.txt") {
+			t.Errorf("expected cat instructions in sh command, got %q", cmdStr)
 		}
-		if !strings.Contains(cmdStr, "-p /workspace/.claudomator-instructions.txt") {
-			t.Errorf("expected instructions file path, got %q", cmdStr)
+	})
+
+	t.Run("claude-resume", func(t *testing.T) {
+		tk := &task.Task{Agent: task.AgentConfig{Type: "claude"}}
+		cmd := runner.buildInnerCmd(tk, "exec-456", true)
+		
+		cmdStr := strings.Join(cmd, " ")
+		if !strings.Contains(cmdStr, "--resume exec-456") {
+			t.Errorf("expected --resume flag in resume run: %q", cmdStr)
 		}
 	})
 
 	t.Run("gemini", func(t *testing.T) {
 		tk := &task.Task{Agent: task.AgentConfig{Type: "gemini"}}
-		cmd := runner.buildInnerCmd(tk, "exec-456")
+		cmd := runner.buildInnerCmd(tk, "exec-456", false)
 		
 		cmdStr := strings.Join(cmd, " ")
-		if !strings.HasPrefix(cmdStr, "gemini") {
+		if !strings.Contains(cmdStr, "gemini") {
 			t.Errorf("expected gemini command, got %q", cmdStr)
 		}
 	})
 }
+
+func TestContainerRunner_Run_PreservesWorkspaceOnFailure(t *testing.T) {
+	logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+	runner := &ContainerRunner{
+		Logger: logger,
+		Image:  "busybox",
+	}
+
+	// Use an invalid repo URL to trigger failure.
+	tk := &task.Task{
+		ID:            "test-task",
+		RepositoryURL: "/nonexistent/repo",
+		Agent:         task.AgentConfig{Type: "claude"},
+	}
+	exec := &storage.Execution{ID: "test-exec", TaskID: "test-task"}
+
+	err := runner.Run(context.Background(), tk, exec)
+	if err == nil {
+		t.Fatal("expected error due to invalid repo")
+	}
+
+	// Verify SandboxDir was set and directory exists.
+	if exec.SandboxDir == "" {
+		t.Fatal("expected SandboxDir to be set even on failure")
+	}
+	if _, statErr := os.Stat(exec.SandboxDir); statErr != nil {
+		t.Errorf("expected sandbox directory to be preserved, but stat failed: %v", statErr)
+	} else {
+		os.RemoveAll(exec.SandboxDir)
+	}
+}
+
+func TestBlockedError_IncludesSandboxDir(t *testing.T) {
+	// This test requires mocking 'docker run' or the whole Run() which is hard.
+	// But we can test that returning BlockedError works.
+	err := &BlockedError{
+		QuestionJSON: `{"text":"?"}`,
+		SessionID:    "s1",
+		SandboxDir:   "/tmp/s1",
+	}
+	if !strings.Contains(err.Error(), "task blocked") {
+		t.Errorf("wrong error message: %v", err)
+	}
+}
+
+func TestIsCompletionReport(t *testing.T) {
+	tests := []struct {
+		name     string
+		json     string
+		expected bool
+	}{
+		{
+			name:     "real question with options",
+			json:     `{"text": "Should I proceed with implementation?", "options": ["Yes", "No"]}`,
+			expected: false,
+		},
+		{
+			name:     "real question no options",
+			json:     `{"text": "Which approach do you prefer?"}`,
+			expected: false,
+		},
+		{
+			name:     "completion report no options no question mark",
+			json:     `{"text": "All tests pass. Implementation complete. Summary written to CLAUDOMATOR_SUMMARY_FILE."}`,
+			expected: true,
+		},
+		{
+			name:     "completion report with empty options",
+			json:     `{"text": "Feature implemented and committed.", "options": []}`,
+			expected: true,
+		},
+		{
+			name:     "invalid json treated as not a report",
+			json:     `not json`,
+			expected: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := isCompletionReport(tt.json)
+			if got != tt.expected {
+				t.Errorf("isCompletionReport(%q) = %v, want %v", tt.json, got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestTailFile_ReturnsLastNLines(t *testing.T) {
+	f, err := os.CreateTemp("", "tailfile-*")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(f.Name())
+	for i := 1; i <= 30; i++ {
+		fmt.Fprintf(f, "line %d\n", i)
+	}
+	f.Close()
+
+	got := tailFile(f.Name(), 5)
+	lines := strings.Split(strings.TrimSpace(got), "\n")
+	if len(lines) != 5 {
+		t.Fatalf("want 5 lines, got %d: %q", len(lines), got)
+	}
+	if lines[0] != "line 26" || lines[4] != "line 30" {
+		t.Errorf("want lines 26-30, got: %q", got)
+	}
+}
+
+func TestGitSafe_PrependsSafeDirectory(t *testing.T) {
+	got := gitSafe("-C", "/some/path", "status")
+	want := []string{"-c", "safe.directory=*", "-C", "/some/path", "status"}
+	if len(got) != len(want) {
+		t.Fatalf("gitSafe() = %v, want %v", got, want)
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Errorf("gitSafe()[%d] = %q, want %q", i, got[i], want[i])
+		}
+	}
+}
diff --git a/internal/task/task.go b/internal/task/task.go
index 40ab636..465de8b 100644
--- a/internal/task/task.go
+++ b/internal/task/task.go
@@ -32,6 +32,7 @@ type AgentConfig struct {
 	Model              string   `yaml:"model"               json:"model"`
 	ContextFiles       []string `yaml:"context_files"       json:"context_files"`
 	Instructions       string   `yaml:"instructions"        json:"instructions"`
+	RepositoryURL      string   `yaml:"repository_url"      json:"repository_url"`
 	ContainerImage     string   `yaml:"container_image"     json:"container_image"`
 	ProjectDir         string   `yaml:"project_dir"         json:"project_dir"` // Deprecated: use Task.RepositoryURL
 	MaxBudgetUSD       float64  `yaml:"max_budget_usd"      json:"max_budget_usd"`
-- 
cgit v1.2.3