diff options
| -rw-r--r-- | internal/executor/claude.go | 77 | ||||
| -rw-r--r-- | internal/executor/claude_test.go | 103 | ||||
| -rw-r--r-- | internal/executor/executor.go | 1 | ||||
| -rw-r--r-- | internal/executor/gemini.go | 5 | ||||
| -rw-r--r-- | internal/executor/preamble.go | 2 | ||||
| -rw-r--r-- | internal/storage/db.go | 55 | ||||
| -rw-r--r-- | internal/task/task.go | 6 | ||||
| -rwxr-xr-x | scripts/next-task | 7 | ||||
| -rw-r--r-- | test/next-task.test.sh | 43 | ||||
| -rw-r--r-- | web/app.js | 94 | ||||
| -rw-r--r-- | web/index.html | 6 | ||||
| -rw-r--r-- | web/style.css | 59 | ||||
| -rw-r--r-- | web/test/changestats.test.mjs | 125 | ||||
| -rw-r--r-- | web/test/tab-badges.test.mjs | 110 |
14 files changed, 643 insertions, 50 deletions
diff --git a/internal/executor/claude.go b/internal/executor/claude.go index 5a5b35e..4d92cd0 100644 --- a/internal/executor/claude.go +++ b/internal/executor/claude.go @@ -103,6 +103,7 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi // finds its session files under the same project slug. If no sandbox was // preserved (e.g. task had no project_dir), fall back to project_dir. var sandboxDir string + var startHEAD string effectiveWorkingDir := projectDir if e.ResumeSessionID != "" { if e.SandboxDir != "" { @@ -134,6 +135,12 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir) } + if effectiveWorkingDir != "" { + // Capture the initial HEAD so we can identify new commits later. + headOut, _ := exec.Command("git", gitSafe("-C", effectiveWorkingDir, "rev-parse", "HEAD")...).Output() + startHEAD = strings.TrimSpace(string(headOut)) + } + questionFile := filepath.Join(logDir, "question.json") args := r.buildArgs(t, e, questionFile) @@ -147,7 +154,7 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi ) } attempt++ - return r.execOnce(ctx, args, effectiveWorkingDir, e) + return r.execOnce(ctx, args, effectiveWorkingDir, projectDir, e) }) if err != nil { if sandboxDir != "" { @@ -183,7 +190,7 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi // Merge sandbox back to project_dir and clean up. if sandboxDir != "" { - if mergeErr := teardownSandbox(projectDir, sandboxDir, r.Logger); mergeErr != nil { + if mergeErr := teardownSandbox(projectDir, sandboxDir, startHEAD, r.Logger, e); mergeErr != nil { return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir) } } @@ -277,20 +284,57 @@ func setupSandbox(projectDir string) (string, error) { // The working copy (projectDir) is NOT updated automatically — it is the // developer's workspace and is pulled manually. This avoids permission errors // from mixed-owner .git/objects directories. -func teardownSandbox(projectDir, sandboxDir string, logger *slog.Logger) error { - // Fail if agent left uncommitted changes. +func teardownSandbox(projectDir, sandboxDir, startHEAD string, logger *slog.Logger, execRecord *storage.Execution) error { + // Automatically commit uncommitted changes. out, err := exec.Command("git", "-C", sandboxDir, "status", "--porcelain").Output() if err != nil { return fmt.Errorf("git status: %w", err) } if len(strings.TrimSpace(string(out))) > 0 { - return fmt.Errorf("uncommitted changes in sandbox (agent must commit all work):\n%s", out) + logger.Info("autocommitting uncommitted changes", "sandbox", sandboxDir) + cmds := [][]string{ + gitSafe("-C", sandboxDir, "add", "-A"), + gitSafe("-C", sandboxDir, "commit", "-m", "chore: autocommit uncommitted changes"), + } + for _, args := range cmds { + if out, err := exec.Command("git", args...).CombinedOutput(); err != nil { + return fmt.Errorf("autocommit failed (%v): %w\n%s", args, err, out) + } + } + } + + // Capture commits before pushing/deleting. + // Use startHEAD..HEAD to find all commits made during this execution. + logRange := "origin/HEAD..HEAD" + if startHEAD != "" && startHEAD != "HEAD" { + logRange = startHEAD + "..HEAD" + } + + logCmd := exec.Command("git", gitSafe("-C", sandboxDir, "log", logRange, "--pretty=format:%H|%s")...) + logOut, logErr := logCmd.CombinedOutput() + if logErr == nil { + lines := strings.Split(strings.TrimSpace(string(logOut)), "\n") + logger.Debug("captured commits", "count", len(lines), "range", logRange) + for _, line := range lines { + if line == "" { + continue + } + parts := strings.SplitN(line, "|", 2) + if len(parts) == 2 { + execRecord.Commits = append(execRecord.Commits, task.GitCommit{ + Hash: parts[0], + Message: parts[1], + }) + } + } + } else { + logger.Warn("failed to capture commits", "err", logErr, "range", logRange, "output", string(logOut)) } // Check whether there are any new commits to push. - ahead, err := exec.Command("git", "-C", sandboxDir, "rev-list", "--count", "origin/HEAD..HEAD").Output() + ahead, err := exec.Command("git", gitSafe("-C", sandboxDir, "rev-list", "--count", logRange)...).Output() if err != nil { - logger.Warn("could not determine commits ahead of origin; proceeding", "err", err) + logger.Warn("could not determine commits ahead of origin; proceeding", "err", err, "range", logRange) } if strings.TrimSpace(string(ahead)) == "0" { os.RemoveAll(sandboxDir) @@ -305,6 +349,22 @@ func teardownSandbox(projectDir, sandboxDir string, logger *slog.Logger) error { if out2, err2 := exec.Command("git", "-C", sandboxDir, "pull", "--rebase", "origin", "master").CombinedOutput(); err2 != nil { return fmt.Errorf("git rebase before retry push: %w\n%s", err2, out2) } + // Re-capture commits after rebase (hashes might have changed) + execRecord.Commits = nil + logOut, logErr = exec.Command("git", "-C", sandboxDir, "log", logRange, "--pretty=format:%H|%s").Output() + if logErr == nil { + lines := strings.Split(strings.TrimSpace(string(logOut)), "\n") + for _, line := range lines { + parts := strings.SplitN(line, "|", 2) + if len(parts) == 2 { + execRecord.Commits = append(execRecord.Commits, task.GitCommit{ + Hash: parts[0], + Message: parts[1], + }) + } + } + } + if out3, err3 := exec.Command("git", "-C", sandboxDir, "push", "origin", "HEAD").CombinedOutput(); err3 != nil { return fmt.Errorf("git push to origin (after rebase): %w\n%s", err3, out3) } @@ -319,11 +379,12 @@ func teardownSandbox(projectDir, sandboxDir string, logger *slog.Logger) error { } // execOnce runs the claude subprocess once, streaming output to e's log paths. -func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir string, e *storage.Execution) error { +func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir, projectDir string, e *storage.Execution) error { cmd := exec.CommandContext(ctx, r.binaryPath(), args...) cmd.Env = append(os.Environ(), "CLAUDOMATOR_API_URL="+r.APIURL, "CLAUDOMATOR_TASK_ID="+e.TaskID, + "CLAUDOMATOR_PROJECT_DIR="+projectDir, "CLAUDOMATOR_QUESTION_FILE="+filepath.Join(e.ArtifactDir, "question.json"), "CLAUDOMATOR_SUMMARY_FILE="+filepath.Join(e.ArtifactDir, "summary.txt"), ) diff --git a/internal/executor/claude_test.go b/internal/executor/claude_test.go index 9bb873f..02d1b2e 100644 --- a/internal/executor/claude_test.go +++ b/internal/executor/claude_test.go @@ -173,8 +173,11 @@ func TestClaudeRunner_BuildArgs_PreamblePrepended(t *testing.T) { if len(args) < 2 || args[0] != "-p" { t.Fatalf("expected -p as first arg, got: %v", args) } - if !strings.HasPrefix(args[1], planningPreamble) { - t.Errorf("instructions should start with planning preamble") + if !strings.HasPrefix(args[1], "## Runtime Environment") { + t.Errorf("instructions should start with planning preamble, got prefix: %q", args[1][:min(len(args[1]), 20)]) + } + if !strings.Contains(args[1], "$CLAUDOMATOR_PROJECT_DIR") { + t.Errorf("preamble should mention $CLAUDOMATOR_PROJECT_DIR") } if !strings.HasSuffix(args[1], "fix the bug") { t.Errorf("instructions should end with original instructions") @@ -329,7 +332,7 @@ func TestExecOnce_NoGoroutineLeak_OnNaturalExit(t *testing.T) { runtime.Gosched() baseline := runtime.NumGoroutine() - if err := r.execOnce(context.Background(), []string{}, "", e); err != nil { + if err := r.execOnce(context.Background(), []string{}, "", "", e); err != nil { t.Fatalf("execOnce failed: %v", err) } @@ -350,16 +353,24 @@ func TestExecOnce_NoGoroutineLeak_OnNaturalExit(t *testing.T) { func initGitRepo(t *testing.T, dir string) { t.Helper() cmds := [][]string{ - {"git", "-C", dir, "init"}, - {"git", "-C", dir, "config", "user.email", "test@test"}, - {"git", "-C", dir, "config", "user.name", "test"}, - {"git", "-C", dir, "commit", "--allow-empty", "-m", "init"}, + {"git", "-c", "safe.directory=*", "-C", dir, "init", "-b", "main"}, + {"git", "-c", "safe.directory=*", "-C", dir, "config", "user.email", "test@test"}, + {"git", "-c", "safe.directory=*", "-C", dir, "config", "user.name", "test"}, } for _, args := range cmds { if out, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil { t.Fatalf("%v: %v\n%s", args, err, out) } } + if err := os.WriteFile(filepath.Join(dir, "init.txt"), []byte("init"), 0644); err != nil { + t.Fatal(err) + } + if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", dir, "add", ".").CombinedOutput(); err != nil { + t.Fatalf("git add: %v\n%s", err, out) + } + if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", dir, "commit", "-m", "init").CombinedOutput(); err != nil { + t.Fatalf("git commit: %v\n%s", err, out) + } } func TestSandboxCloneSource_PrefersLocalRemote(t *testing.T) { @@ -409,6 +420,13 @@ func TestSetupSandbox_ClonesGitRepo(t *testing.T) { } t.Cleanup(func() { os.RemoveAll(sandbox) }) + // Force sandbox to master if it cloned as main + exec.Command("git", gitSafe("-C", sandbox, "checkout", "master")...).Run() + + // Debug sandbox + logOut, _ := exec.Command("git", "-C", sandbox, "log", "-1").CombinedOutput() + fmt.Printf("DEBUG: sandbox log: %s\n", string(logOut)) + // Verify sandbox is a git repo with at least one commit. out, err := exec.Command("git", "-C", sandbox, "log", "--oneline").Output() if err != nil { @@ -434,31 +452,63 @@ func TestSetupSandbox_InitialisesNonGitDir(t *testing.T) { } } -func TestTeardownSandbox_UncommittedChanges_ReturnsError(t *testing.T) { - src := t.TempDir() - initGitRepo(t, src) - sandbox, err := setupSandbox(src) +func TestTeardownSandbox_AutocommitsChanges(t *testing.T) { + // Create a bare repo as origin so push succeeds. + bare := t.TempDir() + if out, err := exec.Command("git", "init", "--bare", bare).CombinedOutput(); err != nil { + t.Fatalf("git init bare: %v\n%s", err, out) + } + + // Create a sandbox directly. + sandbox := t.TempDir() + initGitRepo(t, sandbox) + if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil { + t.Fatalf("git remote add: %v\n%s", err, out) + } + // Initial push to establish origin/main + if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "push", "origin", "main").CombinedOutput(); err != nil { + t.Fatalf("git push initial: %v\n%s", err, out) + } + + // Capture startHEAD + headOut, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "rev-parse", "HEAD").Output() if err != nil { - t.Fatalf("setupSandbox: %v", err) + t.Fatalf("rev-parse HEAD: %v", err) } - t.Cleanup(func() { os.RemoveAll(sandbox) }) + startHEAD := strings.TrimSpace(string(headOut)) // Leave an uncommitted file in the sandbox. - if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("oops"), 0644); err != nil { + if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("autocommit me"), 0644); err != nil { t.Fatal(err) } - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - err = teardownSandbox(src, sandbox, logger) - if err == nil { - t.Fatal("expected error for uncommitted changes, got nil") + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})) + execRecord := &storage.Execution{} + + err = teardownSandbox("", sandbox, startHEAD, logger, execRecord) + if err != nil { + t.Fatalf("expected autocommit to succeed, got error: %v", err) + } + + // Sandbox should be removed after successful autocommit and push. + if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) { + t.Error("sandbox should have been removed after successful autocommit and push") } - if !strings.Contains(err.Error(), "uncommitted changes") { - t.Errorf("expected 'uncommitted changes' in error, got: %v", err) + + // Verify the commit exists in the bare repo. + out, err := exec.Command("git", "-C", bare, "log", "-1", "--pretty=%B").Output() + if err != nil { + t.Fatalf("git log in bare repo: %v", err) } - // Sandbox should be preserved (not removed) on error. - if _, statErr := os.Stat(sandbox); os.IsNotExist(statErr) { - t.Error("sandbox was removed despite error; should be preserved for debugging") + if !strings.Contains(string(out), "chore: autocommit uncommitted changes") { + t.Errorf("expected autocommit message in log, got: %q", string(out)) + } + + // Verify the commit was captured in execRecord. + if len(execRecord.Commits) == 0 { + t.Error("expected at least one commit in execRecord") + } else if !strings.Contains(execRecord.Commits[0].Message, "chore: autocommit uncommitted changes") { + t.Errorf("unexpected commit message: %q", execRecord.Commits[0].Message) } } @@ -471,8 +521,13 @@ func TestTeardownSandbox_CleanSandboxWithNoNewCommits_RemovesSandbox(t *testing. } logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + execRecord := &storage.Execution{} + + headOut, _ := exec.Command("git", "-C", sandbox, "rev-parse", "HEAD").Output() + startHEAD := strings.TrimSpace(string(headOut)) + // Sandbox has no new commits beyond origin; teardown should succeed and remove it. - if err := teardownSandbox(src, sandbox, logger); err != nil { + if err := teardownSandbox(src, sandbox, startHEAD, logger, execRecord); err != nil { t.Fatalf("teardownSandbox: %v", err) } if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) { diff --git a/internal/executor/executor.go b/internal/executor/executor.go index fd37c33..f85f1ff 100644 --- a/internal/executor/executor.go +++ b/internal/executor/executor.go @@ -355,6 +355,7 @@ func (p *Pool) handleRunResult(ctx context.Context, t *task.Task, exec *storage. } if exec.StdoutPath != "" { if cs := task.ParseChangestatFromFile(exec.StdoutPath); cs != nil { + exec.Changestats = cs if csErr := p.store.UpdateExecutionChangestats(exec.ID, cs); csErr != nil { p.logger.Error("failed to store changestats", "execID", exec.ID, "error", csErr) } diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go index 2db3218..67ea7dd 100644 --- a/internal/executor/gemini.go +++ b/internal/executor/gemini.go @@ -68,7 +68,7 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi // Gemini CLI doesn't necessarily have the same rate limiting behavior as Claude, // but we'll use a similar execution pattern. - err := r.execOnce(ctx, args, t.Agent.ProjectDir, e) + err := r.execOnce(ctx, args, t.Agent.ProjectDir, t.Agent.ProjectDir, e) if err != nil { return err } @@ -82,11 +82,12 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi return nil } -func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir string, e *storage.Execution) error { +func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, projectDir string, e *storage.Execution) error { cmd := exec.CommandContext(ctx, r.binaryPath(), args...) cmd.Env = append(os.Environ(), "CLAUDOMATOR_API_URL="+r.APIURL, "CLAUDOMATOR_TASK_ID="+e.TaskID, + "CLAUDOMATOR_PROJECT_DIR="+projectDir, "CLAUDOMATOR_QUESTION_FILE="+filepath.Join(e.ArtifactDir, "question.json"), ) cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} diff --git a/internal/executor/preamble.go b/internal/executor/preamble.go index 8ae79ad..f5dba2b 100644 --- a/internal/executor/preamble.go +++ b/internal/executor/preamble.go @@ -27,7 +27,7 @@ Before doing any implementation work: 2. If YES — break it down: - Create 3–7 discrete subtasks by POSTing to $CLAUDOMATOR_API_URL/api/tasks - - Each subtask POST body should be JSON with: name, agent.instructions, agent.working_dir (copy from current task), agent.model, agent.allowed_tools, and agent.skip_planning set to true + - Each subtask POST body should be JSON with: name, agent.instructions, agent.project_dir (copy from $CLAUDOMATOR_PROJECT_DIR), agent.model, agent.allowed_tools, and agent.skip_planning set to true - Set parent_task_id to $CLAUDOMATOR_TASK_ID in each POST body - After creating all subtasks, output a brief summary and STOP. Do not implement anything. - You can also specify agent.type (either "claude" or "gemini") to choose the agent for subtasks. diff --git a/internal/storage/db.go b/internal/storage/db.go index 2b7e33f..69bcf68 100644 --- a/internal/storage/db.go +++ b/internal/storage/db.go @@ -84,6 +84,7 @@ func (s *DB) migrate() error { `ALTER TABLE tasks ADD COLUMN summary TEXT`, `ALTER TABLE tasks ADD COLUMN interactions_json TEXT NOT NULL DEFAULT '[]'`, `ALTER TABLE executions ADD COLUMN changestats_json TEXT`, + `ALTER TABLE executions ADD COLUMN commits_json TEXT NOT NULL DEFAULT '[]'`, } for _, m := range migrations { if _, err := s.db.Exec(m); err != nil { @@ -368,6 +369,7 @@ type Execution struct { SandboxDir string // preserved sandbox path when task is BLOCKED; resume must run here Changestats *task.Changestats // stored as JSON; nil if not yet recorded + Commits []task.GitCommit // stored as JSON; empty if no commits // In-memory only: set when creating a resume execution, not stored in DB. ResumeSessionID string @@ -387,24 +389,32 @@ func (s *DB) CreateExecution(e *Execution) error { s := string(b) changestatsJSON = &s } + commitsJSON := "[]" + if len(e.Commits) > 0 { + b, err := json.Marshal(e.Commits) + if err != nil { + return fmt.Errorf("marshaling commits: %w", err) + } + commitsJSON = string(b) + } _, err := s.db.Exec(` - INSERT INTO executions (id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + INSERT INTO executions (id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json, commits_json) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, e.ID, e.TaskID, e.StartTime.UTC(), e.EndTime.UTC(), e.ExitCode, e.Status, - e.StdoutPath, e.StderrPath, e.ArtifactDir, e.CostUSD, e.ErrorMsg, e.SessionID, e.SandboxDir, changestatsJSON, + e.StdoutPath, e.StderrPath, e.ArtifactDir, e.CostUSD, e.ErrorMsg, e.SessionID, e.SandboxDir, changestatsJSON, commitsJSON, ) return err } // GetExecution retrieves an execution by ID. func (s *DB) GetExecution(id string) (*Execution, error) { - row := s.db.QueryRow(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json FROM executions WHERE id = ?`, id) + row := s.db.QueryRow(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json, commits_json FROM executions WHERE id = ?`, id) return scanExecution(row) } // ListExecutions returns executions for a task. func (s *DB) ListExecutions(taskID string) ([]*Execution, error) { - rows, err := s.db.Query(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json FROM executions WHERE task_id = ? ORDER BY start_time DESC`, taskID) + rows, err := s.db.Query(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json, commits_json FROM executions WHERE task_id = ? ORDER BY start_time DESC`, taskID) if err != nil { return nil, err } @@ -423,7 +433,7 @@ func (s *DB) ListExecutions(taskID string) ([]*Execution, error) { // GetLatestExecution returns the most recent execution for a task. func (s *DB) GetLatestExecution(taskID string) (*Execution, error) { - row := s.db.QueryRow(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json FROM executions WHERE task_id = ? ORDER BY start_time DESC LIMIT 1`, taskID) + row := s.db.QueryRow(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json, commits_json FROM executions WHERE task_id = ? ORDER BY start_time DESC LIMIT 1`, taskID) return scanExecution(row) } @@ -588,12 +598,31 @@ func (s *DB) AppendTaskInteraction(taskID string, interaction task.Interaction) // UpdateExecution updates a completed execution. func (s *DB) UpdateExecution(e *Execution) error { + var changestatsJSON *string + if e.Changestats != nil { + b, err := json.Marshal(e.Changestats) + if err != nil { + return fmt.Errorf("marshaling changestats: %w", err) + } + s := string(b) + changestatsJSON = &s + } + commitsJSON := "[]" + if len(e.Commits) > 0 { + b, err := json.Marshal(e.Commits) + if err != nil { + return fmt.Errorf("marshaling commits: %w", err) + } + commitsJSON = string(b) + } _, err := s.db.Exec(` UPDATE executions SET end_time = ?, exit_code = ?, status = ?, cost_usd = ?, error_msg = ?, - stdout_path = ?, stderr_path = ?, artifact_dir = ?, session_id = ?, sandbox_dir = ? + stdout_path = ?, stderr_path = ?, artifact_dir = ?, session_id = ?, sandbox_dir = ?, + changestats_json = ?, commits_json = ? WHERE id = ?`, e.EndTime.UTC(), e.ExitCode, e.Status, e.CostUSD, e.ErrorMsg, - e.StdoutPath, e.StderrPath, e.ArtifactDir, e.SessionID, e.SandboxDir, e.ID, + e.StdoutPath, e.StderrPath, e.ArtifactDir, e.SessionID, e.SandboxDir, + changestatsJSON, commitsJSON, e.ID, ) return err } @@ -660,8 +689,9 @@ func scanExecution(row scanner) (*Execution, error) { var sessionID sql.NullString var sandboxDir sql.NullString var changestatsJSON sql.NullString + var commitsJSON sql.NullString err := row.Scan(&e.ID, &e.TaskID, &e.StartTime, &e.EndTime, &e.ExitCode, &e.Status, - &e.StdoutPath, &e.StderrPath, &e.ArtifactDir, &e.CostUSD, &e.ErrorMsg, &sessionID, &sandboxDir, &changestatsJSON) + &e.StdoutPath, &e.StderrPath, &e.ArtifactDir, &e.CostUSD, &e.ErrorMsg, &sessionID, &sandboxDir, &changestatsJSON, &commitsJSON) if err != nil { return nil, err } @@ -674,6 +704,13 @@ func scanExecution(row scanner) (*Execution, error) { } e.Changestats = &cs } + if commitsJSON.Valid && commitsJSON.String != "" { + if err := json.Unmarshal([]byte(commitsJSON.String), &e.Commits); err != nil { + return nil, fmt.Errorf("unmarshaling commits: %w", err) + } + } else { + e.Commits = []task.GitCommit{} + } return &e, nil } diff --git a/internal/task/task.go b/internal/task/task.go index b85b07c..6a9d1db 100644 --- a/internal/task/task.go +++ b/internal/task/task.go @@ -48,6 +48,12 @@ type RetryConfig struct { Backoff string `yaml:"backoff" json:"backoff"` // "linear", "exponential" } +// GitCommit represents a single git commit created during a task execution. +type GitCommit struct { + Hash string `json:"hash"` + Message string `json:"message"` +} + // Changestats records file/line change metrics from an agent execution. type Changestats struct { FilesChanged int `json:"files_changed"` diff --git a/scripts/next-task b/scripts/next-task index e74ca26..9df09f0 100755 --- a/scripts/next-task +++ b/scripts/next-task @@ -11,7 +11,7 @@ # Usage: next_id=$(scripts/next-task) # Example: scripts/start-next-task -DB_PATH="/site/doot.terst.org/data/claudomator.db" +DB_PATH="${DB_PATH:-/site/doot.terst.org/data/claudomator.db}" # 1. Fetch the most recently updated COMPLETED or READY task target=$(sqlite3 "$DB_PATH" "SELECT id, state, parent_task_id FROM tasks WHERE state IN ('COMPLETED', 'READY') ORDER BY updated_at DESC LIMIT 1;") @@ -32,7 +32,7 @@ fi if [ -z "$next_task" ]; then # 4. No child/sibling found: fall back to highest-priority oldest PENDING task - next_task=$(sqlite3 "$DB_PATH" "SELECT id FROM tasks WHERE state = 'PENDING' AND id != '$id' + FALLBACK_SQL="SELECT id FROM tasks WHERE state IN ('PENDING', 'QUEUED') AND id != '$id' ORDER BY CASE priority WHEN 'critical' THEN 4 @@ -42,7 +42,8 @@ if [ -z "$next_task" ]; then ELSE 0 END DESC, created_at ASC - LIMIT 1;") + LIMIT 1;" + next_task=$(sqlite3 "$DB_PATH" "$FALLBACK_SQL") fi echo "$next_task" diff --git a/test/next-task.test.sh b/test/next-task.test.sh new file mode 100644 index 0000000..3304efa --- /dev/null +++ b/test/next-task.test.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# test/next-task.test.sh + +set -euo pipefail + +# Create a temporary database +TEST_DB=$(mktemp) +sqlite3 "$TEST_DB" <<EOF +CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + state TEXT NOT NULL, + parent_task_id TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, + priority TEXT DEFAULT 'normal' +); +EOF + +# Insert a COMPLETED task that will not have a PENDING child or sibling, +# to ensure the fallback logic is triggered. +sqlite3 "$TEST_DB" "INSERT INTO tasks (id, state, created_at) VALUES ('completed-no-children', 'COMPLETED', '2023-01-01 12:00:00');" + +# Inject a QUEUED task (should be picked by fallback) +sqlite3 "$TEST_DB" "INSERT INTO tasks (id, state, priority, created_at) VALUES ('queued-task-id', 'QUEUED', 'high', '2023-01-01 10:00:00');" + +# Inject a PENDING task (lower priority, should not be picked first by fallback) +sqlite3 "$TEST_DB" "INSERT INTO tasks (id, state, priority, created_at) VALUES ('pending-task-id', 'PENDING', 'normal', '2023-01-01 11:00:00');" + +# Run the next-task script with the temporary database path +export DB_PATH="$TEST_DB" # Override DB_PATH for the test +SCRIPT_DIR="$(dirname "$(dirname "$0")")/scripts" +NEXT_TASK_ID=$("$SCRIPT_DIR/next-task") + +# Assert that the QUEUED task is returned +if [[ "$NEXT_TASK_ID" == "queued-task-id" ]]; then + echo "Test passed: QUEUED task was selected by fallback." +else + echo "Test failed: Expected 'queued-task-id', got '$NEXT_TASK_ID'" + exit 1 +fi + +# Clean up +rm "$TEST_DB" @@ -74,6 +74,24 @@ function formatDate(iso) { }); } +// Returns formatted string for changestats, e.g. "5 files, +127 -43". +// Returns empty string for null/undefined input. +export function formatChangestats(stats) { + if (stats == null) return ''; + return `${stats.files_changed} files, +${stats.lines_added} -${stats.lines_removed}`; +} + +// Returns a <span class="changestats-badge"> element for the given stats, +// or null if stats is null/undefined. +// Accepts an optional doc parameter for testability (defaults to document). +export function renderChangestatsBadge(stats, doc = (typeof document !== 'undefined' ? document : null)) { + if (stats == null || doc == null) return null; + const span = doc.createElement('span'); + span.className = 'changestats-badge'; + span.textContent = formatChangestats(stats); + return span; +} + function createTaskCard(task) { const card = document.createElement('div'); card.className = 'task-card'; @@ -118,6 +136,13 @@ function createTaskCard(task) { card.appendChild(desc); } + // Changestats badge for COMPLETED/READY tasks + const CHANGESTATS_STATES = new Set(['COMPLETED', 'READY']); + if (CHANGESTATS_STATES.has(task.state) && task.changestats != null) { + const csBadge = renderChangestatsBadge(task.changestats); + if (csBadge) card.appendChild(csBadge); + } + // Footer: action buttons based on state // Interrupted states (CANCELLED, FAILED, BUDGET_EXCEEDED) show both Resume and Restart. // TIMED_OUT shows Resume only. Others show a single action. @@ -339,6 +364,46 @@ export function setTaskFilterTab(tab) { localStorage.setItem('taskFilterTab', tab); } +// ── Tab badge counts ─────────────────────────────────────────────────────────── + +/** + * Computes badge counts for the 'interrupted', 'ready', and 'running' tabs. + * Returns { interrupted: N, ready: N, running: N }. + */ +export function computeTabBadgeCounts(tasks) { + let interrupted = 0; + let ready = 0; + let running = 0; + for (const t of tasks) { + if (INTERRUPTED_STATES.has(t.state)) interrupted++; + if (t.state === 'READY') ready++; + if (t.state === 'RUNNING') running++; + } + return { interrupted, ready, running }; +} + +/** + * Updates the badge count spans inside the tab buttons for + * 'interrupted', 'ready', and 'running'. + * Badge is hidden (display:none) when count is zero. + */ +export function updateTabBadges(tasks, doc = (typeof document !== 'undefined' ? document : null)) { + if (!doc) return; + const counts = computeTabBadgeCounts(tasks); + for (const [tab, count] of Object.entries(counts)) { + const btn = doc.querySelector(`.tab[data-tab="${tab}"]`); + if (!btn) continue; + let badge = btn.querySelector('.tab-count-badge'); + if (!badge) { + badge = doc.createElement('span'); + badge.className = 'tab-count-badge'; + btn.appendChild(badge); + } + badge.textContent = String(count); + badge.hidden = count === 0; + } +} + // ── Stats computations ───────────────────────────────────────────────────────── /** @@ -961,6 +1026,8 @@ async function poll() { const tasks = await fetchTasks(); if (isUserEditing()) return; + updateTabBadges(tasks); + const activeTab = getActiveTab(); switch (activeTab) { case 'queue': @@ -1648,6 +1715,33 @@ function renderTaskPanel(task, executions) { exitEl.textContent = `exit: ${exec.ExitCode ?? '—'}`; row.appendChild(exitEl); + if (exec.Changestats != null) { + const csBadge = renderChangestatsBadge(exec.Changestats); + if (csBadge) row.appendChild(csBadge); + } + + if (exec.Commits && exec.Commits.length > 0) { + const commitList = document.createElement('div'); + commitList.className = 'execution-commits'; + for (const commit of exec.Commits) { + const item = document.createElement('div'); + item.className = 'commit-item'; + + const hash = document.createElement('span'); + hash.className = 'commit-hash'; + hash.textContent = commit.hash.slice(0, 7); + item.appendChild(hash); + + const msg = document.createElement('span'); + msg.className = 'commit-msg'; + msg.textContent = commit.message; + item.appendChild(msg); + + commitList.appendChild(item); + } + row.appendChild(commitList); + } + const logsBtn = document.createElement('button'); logsBtn.className = 'btn-view-logs'; logsBtn.textContent = 'View Logs'; diff --git a/web/index.html b/web/index.html index 19cba2c..59bc56e 100644 --- a/web/index.html +++ b/web/index.html @@ -23,9 +23,9 @@ </header> <nav class="tab-bar"> <button class="tab active" data-tab="queue" title="Queue">⏳</button> - <button class="tab" data-tab="interrupted" title="Interrupted">⚠️</button> - <button class="tab" data-tab="ready" title="Ready">✅</button> - <button class="tab" data-tab="running" title="Running">▶️</button> + <button class="tab" data-tab="interrupted" title="Interrupted">⚠️<span class="tab-count-badge" hidden></span></button> + <button class="tab" data-tab="ready" title="Ready">✅<span class="tab-count-badge" hidden></span></button> + <button class="tab" data-tab="running" title="Running">▶️<span class="tab-count-badge" hidden></span></button> <button class="tab" data-tab="all" title="All">☰</button> <button class="tab" data-tab="stats" title="Stats">📊</button> <button class="tab" data-tab="settings" title="Settings">⚙️</button> diff --git a/web/style.css b/web/style.css index ee1b69c..e7d1de4 100644 --- a/web/style.css +++ b/web/style.css @@ -111,6 +111,25 @@ header h1 { border-bottom-color: var(--accent); } +.tab-count-badge { + display: inline-block; + margin-left: 0.3em; + padding: 0 0.4em; + border-radius: 999px; + font-size: 0.7em; + font-weight: 700; + line-height: 1.5; + background: var(--accent); + color: #fff; + vertical-align: middle; + min-width: 1.4em; + text-align: center; +} + +.tab-count-badge[hidden] { + display: none; +} + /* Main layout */ main { max-width: 640px; @@ -774,6 +793,39 @@ dialog label select:focus { flex-wrap: wrap; } +.execution-commits { + width: 100%; + margin-top: 0.25rem; + display: flex; + flex-direction: column; + gap: 0.25rem; + font-size: 0.8rem; + color: var(--text-muted); + border-top: 1px solid var(--border-light); + padding-top: 0.5rem; +} + +.commit-item { + display: flex; + gap: 0.5rem; + align-items: baseline; +} + +.commit-hash { + font-family: var(--font-mono); + color: var(--text); + background: var(--bg-hover); + padding: 0.125rem 0.25rem; + border-radius: 0.25rem; + font-size: 0.75rem; +} + +.commit-msg { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + .execution-id { font-family: monospace; font-size: 0.72rem; @@ -804,6 +856,13 @@ dialog label select:focus { white-space: nowrap; } +.changestats-badge { + font-family: monospace; + font-size: 0.72rem; + color: var(--text-muted); + white-space: nowrap; +} + .btn-view-logs { font-size: 0.72rem; font-weight: 600; diff --git a/web/test/changestats.test.mjs b/web/test/changestats.test.mjs new file mode 100644 index 0000000..5363812 --- /dev/null +++ b/web/test/changestats.test.mjs @@ -0,0 +1,125 @@ +// changestats.test.mjs — Unit tests for changestats display functions. +// +// Run with: node --test web/test/changestats.test.mjs + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { formatChangestats, renderChangestatsBadge } from '../app.js'; + +// ── Mock DOM ─────────────────────────────────────────────────────────────────── + +function makeDoc() { + return { + createElement(tag) { + const el = { + tag, + className: '', + textContent: '', + children: [], + appendChild(child) { this.children.push(child); return child; }, + }; + return el; + }, + }; +} + +// ── formatChangestats ────────────────────────────────────────────────────────── + +describe('formatChangestats', () => { + it('formats valid stats as "N files, +A -R"', () => { + const result = formatChangestats({ files_changed: 5, lines_added: 127, lines_removed: 43 }); + assert.equal(result, '5 files, +127 -43'); + }); + + it('returns empty string for null', () => { + const result = formatChangestats(null); + assert.equal(result, ''); + }); + + it('returns empty string for undefined', () => { + const result = formatChangestats(undefined); + assert.equal(result, ''); + }); + + it('formats zero values correctly', () => { + const result = formatChangestats({ files_changed: 0, lines_added: 0, lines_removed: 0 }); + assert.equal(result, '0 files, +0 -0'); + }); + + it('formats single file correctly', () => { + const result = formatChangestats({ files_changed: 1, lines_added: 10, lines_removed: 2 }); + assert.equal(result, '1 files, +10 -2'); + }); +}); + +// ── renderChangestatsBadge ───────────────────────────────────────────────────── + +describe('renderChangestatsBadge', () => { + it('returns element with class changestats-badge for valid stats', () => { + const doc = makeDoc(); + const el = renderChangestatsBadge({ files_changed: 5, lines_added: 127, lines_removed: 43 }, doc); + assert.ok(el, 'element should not be null'); + assert.equal(el.className, 'changestats-badge'); + }); + + it('returns element with correct text content', () => { + const doc = makeDoc(); + const el = renderChangestatsBadge({ files_changed: 5, lines_added: 127, lines_removed: 43 }, doc); + assert.equal(el.textContent, '5 files, +127 -43'); + }); + + it('returns null for null stats', () => { + const doc = makeDoc(); + const el = renderChangestatsBadge(null, doc); + assert.equal(el, null); + }); + + it('returns null for undefined stats', () => { + const doc = makeDoc(); + const el = renderChangestatsBadge(undefined, doc); + assert.equal(el, null); + }); +}); + +// ── State-based visibility ──────────────────────────────────────────────────── +// +// Changestats badge should appear on COMPLETED (and READY) tasks that have +// changestats data, and must not appear on QUEUED tasks. + +const CHANGESTATS_STATES = new Set(['COMPLETED', 'READY']); + +function shouldShowChangestats(task) { + return CHANGESTATS_STATES.has(task.state) && task.changestats != null; +} + +describe('changestats badge visibility by task state', () => { + it('COMPLETED task with changestats shows badge', () => { + const task = { state: 'COMPLETED', changestats: { files_changed: 3, lines_added: 50, lines_removed: 10 } }; + assert.equal(shouldShowChangestats(task), true); + }); + + it('READY task with changestats shows badge', () => { + const task = { state: 'READY', changestats: { files_changed: 1, lines_added: 5, lines_removed: 2 } }; + assert.equal(shouldShowChangestats(task), true); + }); + + it('QUEUED task hides changestats', () => { + const task = { state: 'QUEUED', changestats: { files_changed: 3, lines_added: 50, lines_removed: 10 } }; + assert.equal(shouldShowChangestats(task), false); + }); + + it('COMPLETED task without changestats hides badge', () => { + const task = { state: 'COMPLETED', changestats: null }; + assert.equal(shouldShowChangestats(task), false); + }); + + it('RUNNING task hides changestats', () => { + const task = { state: 'RUNNING', changestats: null }; + assert.equal(shouldShowChangestats(task), false); + }); + + it('PENDING task hides changestats', () => { + const task = { state: 'PENDING', changestats: null }; + assert.equal(shouldShowChangestats(task), false); + }); +}); diff --git a/web/test/tab-badges.test.mjs b/web/test/tab-badges.test.mjs new file mode 100644 index 0000000..c07338f --- /dev/null +++ b/web/test/tab-badges.test.mjs @@ -0,0 +1,110 @@ +// tab-badges.test.mjs — TDD tests for computeTabBadgeCounts +// +// Tests the pure function that computes badge counts for the +// 'interrupted', 'ready', and 'running' tabs. +// +// Run with: node --test web/test/tab-badges.test.mjs + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; + +// ── Inline implementation (will be replaced by import once exported) ─────────── + +const INTERRUPTED_STATES = new Set(['CANCELLED', 'FAILED', 'BUDGET_EXCEEDED', 'BLOCKED']); + +function computeTabBadgeCounts(tasks) { + let interrupted = 0; + let ready = 0; + let running = 0; + for (const t of tasks) { + if (INTERRUPTED_STATES.has(t.state)) interrupted++; + if (t.state === 'READY') ready++; + if (t.state === 'RUNNING') running++; + } + return { interrupted, ready, running }; +} + +// ── Helpers ──────────────────────────────────────────────────────────────────── + +function makeTask(state) { + return { id: state, name: `task-${state}`, state }; +} + +// ── Tests ────────────────────────────────────────────────────────────────────── + +describe('computeTabBadgeCounts', () => { + it('returns all zeros for empty task list', () => { + assert.deepEqual(computeTabBadgeCounts([]), { interrupted: 0, ready: 0, running: 0 }); + }); + + it('counts RUNNING tasks', () => { + const tasks = [makeTask('RUNNING'), makeTask('RUNNING'), makeTask('QUEUED')]; + const counts = computeTabBadgeCounts(tasks); + assert.equal(counts.running, 2); + assert.equal(counts.ready, 0); + assert.equal(counts.interrupted, 0); + }); + + it('counts READY tasks', () => { + const tasks = [makeTask('READY'), makeTask('READY'), makeTask('QUEUED')]; + const counts = computeTabBadgeCounts(tasks); + assert.equal(counts.ready, 2); + assert.equal(counts.running, 0); + assert.equal(counts.interrupted, 0); + }); + + it('counts CANCELLED as interrupted', () => { + const counts = computeTabBadgeCounts([makeTask('CANCELLED')]); + assert.equal(counts.interrupted, 1); + }); + + it('counts FAILED as interrupted', () => { + const counts = computeTabBadgeCounts([makeTask('FAILED')]); + assert.equal(counts.interrupted, 1); + }); + + it('counts BUDGET_EXCEEDED as interrupted', () => { + const counts = computeTabBadgeCounts([makeTask('BUDGET_EXCEEDED')]); + assert.equal(counts.interrupted, 1); + }); + + it('counts BLOCKED as interrupted', () => { + const counts = computeTabBadgeCounts([makeTask('BLOCKED')]); + assert.equal(counts.interrupted, 1); + }); + + it('does not count COMPLETED as interrupted', () => { + const counts = computeTabBadgeCounts([makeTask('COMPLETED')]); + assert.equal(counts.interrupted, 0); + }); + + it('does not count TIMED_OUT as interrupted', () => { + const counts = computeTabBadgeCounts([makeTask('TIMED_OUT')]); + assert.equal(counts.interrupted, 0); + }); + + it('counts across multiple states simultaneously', () => { + const tasks = [ + makeTask('RUNNING'), + makeTask('RUNNING'), + makeTask('READY'), + makeTask('CANCELLED'), + makeTask('FAILED'), + makeTask('BLOCKED'), + makeTask('QUEUED'), + makeTask('COMPLETED'), + ]; + const counts = computeTabBadgeCounts(tasks); + assert.equal(counts.running, 2); + assert.equal(counts.ready, 1); + assert.equal(counts.interrupted, 3); + }); + + it('returns zero for a tab when no tasks match that state', () => { + const tasks = [makeTask('QUEUED'), makeTask('PENDING'), makeTask('COMPLETED')]; + const counts = computeTabBadgeCounts(tasks); + assert.equal(counts.running, 0); + assert.equal(counts.ready, 0); + assert.equal(counts.interrupted, 0); + }); +}); |
