2 files changed, 55 insertions, 0 deletions
diff --git a/internal/executor/claude.go b/internal/executor/claude.go
index 9184333..db4d0fa 100644
--- a/internal/executor/claude.go
+++ b/internal/executor/claude.go
@@ -285,6 +285,18 @@ func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir s
 	stdoutW.Close()
 
 	// killDone is closed when cmd.Wait() returns, stopping the pgid-kill goroutine.
+	//
+	// Safety: this goroutine cannot block indefinitely. The select has two arms:
+	//   • ctx.Done() — fires if the caller cancels (e.g. timeout, user cancel).
+	//     The goroutine sends SIGKILL and exits immediately.
+	//   • killDone — closed by close(killDone) below, immediately after cmd.Wait()
+	//     returns. This fires when the process exits for any reason (natural exit,
+	//     SIGKILL from the ctx arm, or any other signal). The goroutine exits without
+	//     doing anything.
+	//
+	// Therefore: for a task that completes normally with a long-lived (non-cancelled)
+	// context, the killDone arm fires and the goroutine exits. There is no path where
+	// this goroutine outlives execOnce().
 	killDone := make(chan struct{})
 	go func() {
 		select {
diff --git a/internal/executor/claude_test.go b/internal/executor/claude_test.go
index b5380f4..1f95b4a 100644
--- a/internal/executor/claude_test.go
+++ b/internal/executor/claude_test.go
@@ -4,8 +4,11 @@ import (
 	"context"
 	"io"
 	"log/slog"
+	"path/filepath"
+	"runtime"
 	"strings"
 	"testing"
+	"time"
 
 	"github.com/thepeterstone/claudomator/internal/storage"
 	"github.com/thepeterstone/claudomator/internal/task"
@@ -262,3 +265,43 @@ func TestClaudeRunner_BinaryPath_Custom(t *testing.T) {
 		t.Errorf("want custom path, got %q", r.binaryPath())
 	}
 }
+
+// TestExecOnce_NoGoroutineLeak_OnNaturalExit verifies that execOnce does not
+// leave behind any goroutines when the subprocess exits normally (no context
+// cancellation). Both the pgid-kill goroutine and the parseStream goroutine
+// must have exited before execOnce returns.
+func TestExecOnce_NoGoroutineLeak_OnNaturalExit(t *testing.T) {
+	logDir := t.TempDir()
+	r := &ClaudeRunner{
+		BinaryPath: "true", // exits immediately with status 0, produces no output
+		Logger:     slog.New(slog.NewTextHandler(io.Discard, nil)),
+		LogDir:     logDir,
+	}
+	e := &storage.Execution{
+		ID:          "goroutine-leak-test",
+		TaskID:      "task-id",
+		StdoutPath:  filepath.Join(logDir, "stdout.log"),
+		StderrPath:  filepath.Join(logDir, "stderr.log"),
+		ArtifactDir: logDir,
+	}
+
+	// Let any goroutines from test infrastructure settle before sampling.
+	runtime.Gosched()
+	baseline := runtime.NumGoroutine()
+
+	if err := r.execOnce(context.Background(), []string{}, "", e); err != nil {
+		t.Fatalf("execOnce failed: %v", err)
+	}
+
+	// Give the scheduler a moment to let any leaked goroutines actually exit.
+	// In correct code the goroutines exit before execOnce returns, so this is
+	// just a safety buffer for the scheduler.
+	time.Sleep(10 * time.Millisecond)
+	runtime.Gosched()
+
+	after := runtime.NumGoroutine()
+	if after > baseline {
+		t.Errorf("goroutine leak: %d goroutines before execOnce, %d after (leaked %d)",
+			baseline, after, after-baseline)
+	}
+}