diff options
| -rw-r--r-- | internal/executor/claude.go | 12 | ||||
| -rw-r--r-- | internal/executor/claude_test.go | 43 |
2 files changed, 55 insertions, 0 deletions
diff --git a/internal/executor/claude.go b/internal/executor/claude.go index 9184333..db4d0fa 100644 --- a/internal/executor/claude.go +++ b/internal/executor/claude.go @@ -285,6 +285,18 @@ func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir s stdoutW.Close() // killDone is closed when cmd.Wait() returns, stopping the pgid-kill goroutine. + // + // Safety: this goroutine cannot block indefinitely. The select has two arms: + // • ctx.Done() — fires if the caller cancels (e.g. timeout, user cancel). + // The goroutine sends SIGKILL and exits immediately. + // • killDone — closed by close(killDone) below, immediately after cmd.Wait() + // returns. This fires when the process exits for any reason (natural exit, + // SIGKILL from the ctx arm, or any other signal). The goroutine exits without + // doing anything. + // + // Therefore: for a task that completes normally with a long-lived (non-cancelled) + // context, the killDone arm fires and the goroutine exits. There is no path where + // this goroutine outlives execOnce(). killDone := make(chan struct{}) go func() { select { diff --git a/internal/executor/claude_test.go b/internal/executor/claude_test.go index b5380f4..1f95b4a 100644 --- a/internal/executor/claude_test.go +++ b/internal/executor/claude_test.go @@ -4,8 +4,11 @@ import ( "context" "io" "log/slog" + "path/filepath" + "runtime" "strings" "testing" + "time" "github.com/thepeterstone/claudomator/internal/storage" "github.com/thepeterstone/claudomator/internal/task" @@ -262,3 +265,43 @@ func TestClaudeRunner_BinaryPath_Custom(t *testing.T) { t.Errorf("want custom path, got %q", r.binaryPath()) } } + +// TestExecOnce_NoGoroutineLeak_OnNaturalExit verifies that execOnce does not +// leave behind any goroutines when the subprocess exits normally (no context +// cancellation). Both the pgid-kill goroutine and the parseStream goroutine +// must have exited before execOnce returns. +func TestExecOnce_NoGoroutineLeak_OnNaturalExit(t *testing.T) { + logDir := t.TempDir() + r := &ClaudeRunner{ + BinaryPath: "true", // exits immediately with status 0, produces no output + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + LogDir: logDir, + } + e := &storage.Execution{ + ID: "goroutine-leak-test", + TaskID: "task-id", + StdoutPath: filepath.Join(logDir, "stdout.log"), + StderrPath: filepath.Join(logDir, "stderr.log"), + ArtifactDir: logDir, + } + + // Let any goroutines from test infrastructure settle before sampling. + runtime.Gosched() + baseline := runtime.NumGoroutine() + + if err := r.execOnce(context.Background(), []string{}, "", e); err != nil { + t.Fatalf("execOnce failed: %v", err) + } + + // Give the scheduler a moment to let any leaked goroutines actually exit. + // In correct code the goroutines exit before execOnce returns, so this is + // just a safety buffer for the scheduler. + time.Sleep(10 * time.Millisecond) + runtime.Gosched() + + after := runtime.NumGoroutine() + if after > baseline { + t.Errorf("goroutine leak: %d goroutines before execOnce, %d after (leaked %d)", + baseline, after, after-baseline) + } +} |
