summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--internal/executor/claude.go77
-rw-r--r--internal/executor/claude_test.go103
-rw-r--r--internal/executor/executor.go1
-rw-r--r--internal/executor/gemini.go5
-rw-r--r--internal/executor/preamble.go2
-rw-r--r--internal/storage/db.go55
-rw-r--r--internal/task/task.go6
-rwxr-xr-xscripts/next-task7
-rw-r--r--test/next-task.test.sh43
-rw-r--r--web/app.js94
-rw-r--r--web/index.html6
-rw-r--r--web/style.css59
-rw-r--r--web/test/changestats.test.mjs125
-rw-r--r--web/test/tab-badges.test.mjs110
14 files changed, 643 insertions, 50 deletions
diff --git a/internal/executor/claude.go b/internal/executor/claude.go
index 5a5b35e..4d92cd0 100644
--- a/internal/executor/claude.go
+++ b/internal/executor/claude.go
@@ -103,6 +103,7 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
// finds its session files under the same project slug. If no sandbox was
// preserved (e.g. task had no project_dir), fall back to project_dir.
var sandboxDir string
+ var startHEAD string
effectiveWorkingDir := projectDir
if e.ResumeSessionID != "" {
if e.SandboxDir != "" {
@@ -134,6 +135,12 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir)
}
+ if effectiveWorkingDir != "" {
+ // Capture the initial HEAD so we can identify new commits later.
+ headOut, _ := exec.Command("git", gitSafe("-C", effectiveWorkingDir, "rev-parse", "HEAD")...).Output()
+ startHEAD = strings.TrimSpace(string(headOut))
+ }
+
questionFile := filepath.Join(logDir, "question.json")
args := r.buildArgs(t, e, questionFile)
@@ -147,7 +154,7 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
)
}
attempt++
- return r.execOnce(ctx, args, effectiveWorkingDir, e)
+ return r.execOnce(ctx, args, effectiveWorkingDir, projectDir, e)
})
if err != nil {
if sandboxDir != "" {
@@ -183,7 +190,7 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
// Merge sandbox back to project_dir and clean up.
if sandboxDir != "" {
- if mergeErr := teardownSandbox(projectDir, sandboxDir, r.Logger); mergeErr != nil {
+ if mergeErr := teardownSandbox(projectDir, sandboxDir, startHEAD, r.Logger, e); mergeErr != nil {
return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir)
}
}
@@ -277,20 +284,57 @@ func setupSandbox(projectDir string) (string, error) {
// The working copy (projectDir) is NOT updated automatically — it is the
// developer's workspace and is pulled manually. This avoids permission errors
// from mixed-owner .git/objects directories.
-func teardownSandbox(projectDir, sandboxDir string, logger *slog.Logger) error {
- // Fail if agent left uncommitted changes.
+func teardownSandbox(projectDir, sandboxDir, startHEAD string, logger *slog.Logger, execRecord *storage.Execution) error {
+ // Automatically commit uncommitted changes.
out, err := exec.Command("git", "-C", sandboxDir, "status", "--porcelain").Output()
if err != nil {
return fmt.Errorf("git status: %w", err)
}
if len(strings.TrimSpace(string(out))) > 0 {
- return fmt.Errorf("uncommitted changes in sandbox (agent must commit all work):\n%s", out)
+ logger.Info("autocommitting uncommitted changes", "sandbox", sandboxDir)
+ cmds := [][]string{
+ gitSafe("-C", sandboxDir, "add", "-A"),
+ gitSafe("-C", sandboxDir, "commit", "-m", "chore: autocommit uncommitted changes"),
+ }
+ for _, args := range cmds {
+ if out, err := exec.Command("git", args...).CombinedOutput(); err != nil {
+ return fmt.Errorf("autocommit failed (%v): %w\n%s", args, err, out)
+ }
+ }
+ }
+
+ // Capture commits before pushing/deleting.
+ // Use startHEAD..HEAD to find all commits made during this execution.
+ logRange := "origin/HEAD..HEAD"
+ if startHEAD != "" && startHEAD != "HEAD" {
+ logRange = startHEAD + "..HEAD"
+ }
+
+ logCmd := exec.Command("git", gitSafe("-C", sandboxDir, "log", logRange, "--pretty=format:%H|%s")...)
+ logOut, logErr := logCmd.CombinedOutput()
+ if logErr == nil {
+ lines := strings.Split(strings.TrimSpace(string(logOut)), "\n")
+ logger.Debug("captured commits", "count", len(lines), "range", logRange)
+ for _, line := range lines {
+ if line == "" {
+ continue
+ }
+ parts := strings.SplitN(line, "|", 2)
+ if len(parts) == 2 {
+ execRecord.Commits = append(execRecord.Commits, task.GitCommit{
+ Hash: parts[0],
+ Message: parts[1],
+ })
+ }
+ }
+ } else {
+ logger.Warn("failed to capture commits", "err", logErr, "range", logRange, "output", string(logOut))
}
// Check whether there are any new commits to push.
- ahead, err := exec.Command("git", "-C", sandboxDir, "rev-list", "--count", "origin/HEAD..HEAD").Output()
+ ahead, err := exec.Command("git", gitSafe("-C", sandboxDir, "rev-list", "--count", logRange)...).Output()
if err != nil {
- logger.Warn("could not determine commits ahead of origin; proceeding", "err", err)
+ logger.Warn("could not determine commits ahead of origin; proceeding", "err", err, "range", logRange)
}
if strings.TrimSpace(string(ahead)) == "0" {
os.RemoveAll(sandboxDir)
@@ -305,6 +349,22 @@ func teardownSandbox(projectDir, sandboxDir string, logger *slog.Logger) error {
if out2, err2 := exec.Command("git", "-C", sandboxDir, "pull", "--rebase", "origin", "master").CombinedOutput(); err2 != nil {
return fmt.Errorf("git rebase before retry push: %w\n%s", err2, out2)
}
+ // Re-capture commits after rebase (hashes might have changed)
+ execRecord.Commits = nil
+ logOut, logErr = exec.Command("git", "-C", sandboxDir, "log", logRange, "--pretty=format:%H|%s").Output()
+ if logErr == nil {
+ lines := strings.Split(strings.TrimSpace(string(logOut)), "\n")
+ for _, line := range lines {
+ parts := strings.SplitN(line, "|", 2)
+ if len(parts) == 2 {
+ execRecord.Commits = append(execRecord.Commits, task.GitCommit{
+ Hash: parts[0],
+ Message: parts[1],
+ })
+ }
+ }
+ }
+
if out3, err3 := exec.Command("git", "-C", sandboxDir, "push", "origin", "HEAD").CombinedOutput(); err3 != nil {
return fmt.Errorf("git push to origin (after rebase): %w\n%s", err3, out3)
}
@@ -319,11 +379,12 @@ func teardownSandbox(projectDir, sandboxDir string, logger *slog.Logger) error {
}
// execOnce runs the claude subprocess once, streaming output to e's log paths.
-func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir string, e *storage.Execution) error {
+func (r *ClaudeRunner) execOnce(ctx context.Context, args []string, workingDir, projectDir string, e *storage.Execution) error {
cmd := exec.CommandContext(ctx, r.binaryPath(), args...)
cmd.Env = append(os.Environ(),
"CLAUDOMATOR_API_URL="+r.APIURL,
"CLAUDOMATOR_TASK_ID="+e.TaskID,
+ "CLAUDOMATOR_PROJECT_DIR="+projectDir,
"CLAUDOMATOR_QUESTION_FILE="+filepath.Join(e.ArtifactDir, "question.json"),
"CLAUDOMATOR_SUMMARY_FILE="+filepath.Join(e.ArtifactDir, "summary.txt"),
)
diff --git a/internal/executor/claude_test.go b/internal/executor/claude_test.go
index 9bb873f..02d1b2e 100644
--- a/internal/executor/claude_test.go
+++ b/internal/executor/claude_test.go
@@ -173,8 +173,11 @@ func TestClaudeRunner_BuildArgs_PreamblePrepended(t *testing.T) {
if len(args) < 2 || args[0] != "-p" {
t.Fatalf("expected -p as first arg, got: %v", args)
}
- if !strings.HasPrefix(args[1], planningPreamble) {
- t.Errorf("instructions should start with planning preamble")
+ if !strings.HasPrefix(args[1], "## Runtime Environment") {
+ t.Errorf("instructions should start with planning preamble, got prefix: %q", args[1][:min(len(args[1]), 20)])
+ }
+ if !strings.Contains(args[1], "$CLAUDOMATOR_PROJECT_DIR") {
+ t.Errorf("preamble should mention $CLAUDOMATOR_PROJECT_DIR")
}
if !strings.HasSuffix(args[1], "fix the bug") {
t.Errorf("instructions should end with original instructions")
@@ -329,7 +332,7 @@ func TestExecOnce_NoGoroutineLeak_OnNaturalExit(t *testing.T) {
runtime.Gosched()
baseline := runtime.NumGoroutine()
- if err := r.execOnce(context.Background(), []string{}, "", e); err != nil {
+ if err := r.execOnce(context.Background(), []string{}, "", "", e); err != nil {
t.Fatalf("execOnce failed: %v", err)
}
@@ -350,16 +353,24 @@ func TestExecOnce_NoGoroutineLeak_OnNaturalExit(t *testing.T) {
func initGitRepo(t *testing.T, dir string) {
t.Helper()
cmds := [][]string{
- {"git", "-C", dir, "init"},
- {"git", "-C", dir, "config", "user.email", "test@test"},
- {"git", "-C", dir, "config", "user.name", "test"},
- {"git", "-C", dir, "commit", "--allow-empty", "-m", "init"},
+ {"git", "-c", "safe.directory=*", "-C", dir, "init", "-b", "main"},
+ {"git", "-c", "safe.directory=*", "-C", dir, "config", "user.email", "test@test"},
+ {"git", "-c", "safe.directory=*", "-C", dir, "config", "user.name", "test"},
}
for _, args := range cmds {
if out, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil {
t.Fatalf("%v: %v\n%s", args, err, out)
}
}
+ if err := os.WriteFile(filepath.Join(dir, "init.txt"), []byte("init"), 0644); err != nil {
+ t.Fatal(err)
+ }
+ if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", dir, "add", ".").CombinedOutput(); err != nil {
+ t.Fatalf("git add: %v\n%s", err, out)
+ }
+ if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", dir, "commit", "-m", "init").CombinedOutput(); err != nil {
+ t.Fatalf("git commit: %v\n%s", err, out)
+ }
}
func TestSandboxCloneSource_PrefersLocalRemote(t *testing.T) {
@@ -409,6 +420,13 @@ func TestSetupSandbox_ClonesGitRepo(t *testing.T) {
}
t.Cleanup(func() { os.RemoveAll(sandbox) })
+ // Force sandbox to master if it cloned as main
+ exec.Command("git", gitSafe("-C", sandbox, "checkout", "master")...).Run()
+
+ // Debug sandbox
+ logOut, _ := exec.Command("git", "-C", sandbox, "log", "-1").CombinedOutput()
+ fmt.Printf("DEBUG: sandbox log: %s\n", string(logOut))
+
// Verify sandbox is a git repo with at least one commit.
out, err := exec.Command("git", "-C", sandbox, "log", "--oneline").Output()
if err != nil {
@@ -434,31 +452,63 @@ func TestSetupSandbox_InitialisesNonGitDir(t *testing.T) {
}
}
-func TestTeardownSandbox_UncommittedChanges_ReturnsError(t *testing.T) {
- src := t.TempDir()
- initGitRepo(t, src)
- sandbox, err := setupSandbox(src)
+func TestTeardownSandbox_AutocommitsChanges(t *testing.T) {
+ // Create a bare repo as origin so push succeeds.
+ bare := t.TempDir()
+ if out, err := exec.Command("git", "init", "--bare", bare).CombinedOutput(); err != nil {
+ t.Fatalf("git init bare: %v\n%s", err, out)
+ }
+
+ // Create a sandbox directly.
+ sandbox := t.TempDir()
+ initGitRepo(t, sandbox)
+ if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "remote", "add", "origin", bare).CombinedOutput(); err != nil {
+ t.Fatalf("git remote add: %v\n%s", err, out)
+ }
+ // Initial push to establish origin/main
+ if out, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "push", "origin", "main").CombinedOutput(); err != nil {
+ t.Fatalf("git push initial: %v\n%s", err, out)
+ }
+
+ // Capture startHEAD
+ headOut, err := exec.Command("git", "-c", "safe.directory=*", "-C", sandbox, "rev-parse", "HEAD").Output()
if err != nil {
- t.Fatalf("setupSandbox: %v", err)
+ t.Fatalf("rev-parse HEAD: %v", err)
}
- t.Cleanup(func() { os.RemoveAll(sandbox) })
+ startHEAD := strings.TrimSpace(string(headOut))
// Leave an uncommitted file in the sandbox.
- if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("oops"), 0644); err != nil {
+ if err := os.WriteFile(filepath.Join(sandbox, "dirty.txt"), []byte("autocommit me"), 0644); err != nil {
t.Fatal(err)
}
- logger := slog.New(slog.NewTextHandler(io.Discard, nil))
- err = teardownSandbox(src, sandbox, logger)
- if err == nil {
- t.Fatal("expected error for uncommitted changes, got nil")
+ logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
+ execRecord := &storage.Execution{}
+
+ err = teardownSandbox("", sandbox, startHEAD, logger, execRecord)
+ if err != nil {
+ t.Fatalf("expected autocommit to succeed, got error: %v", err)
+ }
+
+ // Sandbox should be removed after successful autocommit and push.
+ if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) {
+ t.Error("sandbox should have been removed after successful autocommit and push")
}
- if !strings.Contains(err.Error(), "uncommitted changes") {
- t.Errorf("expected 'uncommitted changes' in error, got: %v", err)
+
+ // Verify the commit exists in the bare repo.
+ out, err := exec.Command("git", "-C", bare, "log", "-1", "--pretty=%B").Output()
+ if err != nil {
+ t.Fatalf("git log in bare repo: %v", err)
}
- // Sandbox should be preserved (not removed) on error.
- if _, statErr := os.Stat(sandbox); os.IsNotExist(statErr) {
- t.Error("sandbox was removed despite error; should be preserved for debugging")
+ if !strings.Contains(string(out), "chore: autocommit uncommitted changes") {
+ t.Errorf("expected autocommit message in log, got: %q", string(out))
+ }
+
+ // Verify the commit was captured in execRecord.
+ if len(execRecord.Commits) == 0 {
+ t.Error("expected at least one commit in execRecord")
+ } else if !strings.Contains(execRecord.Commits[0].Message, "chore: autocommit uncommitted changes") {
+ t.Errorf("unexpected commit message: %q", execRecord.Commits[0].Message)
}
}
@@ -471,8 +521,13 @@ func TestTeardownSandbox_CleanSandboxWithNoNewCommits_RemovesSandbox(t *testing.
}
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+ execRecord := &storage.Execution{}
+
+ headOut, _ := exec.Command("git", "-C", sandbox, "rev-parse", "HEAD").Output()
+ startHEAD := strings.TrimSpace(string(headOut))
+
// Sandbox has no new commits beyond origin; teardown should succeed and remove it.
- if err := teardownSandbox(src, sandbox, logger); err != nil {
+ if err := teardownSandbox(src, sandbox, startHEAD, logger, execRecord); err != nil {
t.Fatalf("teardownSandbox: %v", err)
}
if _, statErr := os.Stat(sandbox); !os.IsNotExist(statErr) {
diff --git a/internal/executor/executor.go b/internal/executor/executor.go
index fd37c33..f85f1ff 100644
--- a/internal/executor/executor.go
+++ b/internal/executor/executor.go
@@ -355,6 +355,7 @@ func (p *Pool) handleRunResult(ctx context.Context, t *task.Task, exec *storage.
}
if exec.StdoutPath != "" {
if cs := task.ParseChangestatFromFile(exec.StdoutPath); cs != nil {
+ exec.Changestats = cs
if csErr := p.store.UpdateExecutionChangestats(exec.ID, cs); csErr != nil {
p.logger.Error("failed to store changestats", "execID", exec.ID, "error", csErr)
}
diff --git a/internal/executor/gemini.go b/internal/executor/gemini.go
index 2db3218..67ea7dd 100644
--- a/internal/executor/gemini.go
+++ b/internal/executor/gemini.go
@@ -68,7 +68,7 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
// Gemini CLI doesn't necessarily have the same rate limiting behavior as Claude,
// but we'll use a similar execution pattern.
- err := r.execOnce(ctx, args, t.Agent.ProjectDir, e)
+ err := r.execOnce(ctx, args, t.Agent.ProjectDir, t.Agent.ProjectDir, e)
if err != nil {
return err
}
@@ -82,11 +82,12 @@ func (r *GeminiRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
return nil
}
-func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir string, e *storage.Execution) error {
+func (r *GeminiRunner) execOnce(ctx context.Context, args []string, workingDir, projectDir string, e *storage.Execution) error {
cmd := exec.CommandContext(ctx, r.binaryPath(), args...)
cmd.Env = append(os.Environ(),
"CLAUDOMATOR_API_URL="+r.APIURL,
"CLAUDOMATOR_TASK_ID="+e.TaskID,
+ "CLAUDOMATOR_PROJECT_DIR="+projectDir,
"CLAUDOMATOR_QUESTION_FILE="+filepath.Join(e.ArtifactDir, "question.json"),
)
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
diff --git a/internal/executor/preamble.go b/internal/executor/preamble.go
index 8ae79ad..f5dba2b 100644
--- a/internal/executor/preamble.go
+++ b/internal/executor/preamble.go
@@ -27,7 +27,7 @@ Before doing any implementation work:
2. If YES — break it down:
- Create 3–7 discrete subtasks by POSTing to $CLAUDOMATOR_API_URL/api/tasks
- - Each subtask POST body should be JSON with: name, agent.instructions, agent.working_dir (copy from current task), agent.model, agent.allowed_tools, and agent.skip_planning set to true
+ - Each subtask POST body should be JSON with: name, agent.instructions, agent.project_dir (copy from $CLAUDOMATOR_PROJECT_DIR), agent.model, agent.allowed_tools, and agent.skip_planning set to true
- Set parent_task_id to $CLAUDOMATOR_TASK_ID in each POST body
- After creating all subtasks, output a brief summary and STOP. Do not implement anything.
- You can also specify agent.type (either "claude" or "gemini") to choose the agent for subtasks.
diff --git a/internal/storage/db.go b/internal/storage/db.go
index 2b7e33f..69bcf68 100644
--- a/internal/storage/db.go
+++ b/internal/storage/db.go
@@ -84,6 +84,7 @@ func (s *DB) migrate() error {
`ALTER TABLE tasks ADD COLUMN summary TEXT`,
`ALTER TABLE tasks ADD COLUMN interactions_json TEXT NOT NULL DEFAULT '[]'`,
`ALTER TABLE executions ADD COLUMN changestats_json TEXT`,
+ `ALTER TABLE executions ADD COLUMN commits_json TEXT NOT NULL DEFAULT '[]'`,
}
for _, m := range migrations {
if _, err := s.db.Exec(m); err != nil {
@@ -368,6 +369,7 @@ type Execution struct {
SandboxDir string // preserved sandbox path when task is BLOCKED; resume must run here
Changestats *task.Changestats // stored as JSON; nil if not yet recorded
+ Commits []task.GitCommit // stored as JSON; empty if no commits
// In-memory only: set when creating a resume execution, not stored in DB.
ResumeSessionID string
@@ -387,24 +389,32 @@ func (s *DB) CreateExecution(e *Execution) error {
s := string(b)
changestatsJSON = &s
}
+ commitsJSON := "[]"
+ if len(e.Commits) > 0 {
+ b, err := json.Marshal(e.Commits)
+ if err != nil {
+ return fmt.Errorf("marshaling commits: %w", err)
+ }
+ commitsJSON = string(b)
+ }
_, err := s.db.Exec(`
- INSERT INTO executions (id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+ INSERT INTO executions (id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json, commits_json)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
e.ID, e.TaskID, e.StartTime.UTC(), e.EndTime.UTC(), e.ExitCode, e.Status,
- e.StdoutPath, e.StderrPath, e.ArtifactDir, e.CostUSD, e.ErrorMsg, e.SessionID, e.SandboxDir, changestatsJSON,
+ e.StdoutPath, e.StderrPath, e.ArtifactDir, e.CostUSD, e.ErrorMsg, e.SessionID, e.SandboxDir, changestatsJSON, commitsJSON,
)
return err
}
// GetExecution retrieves an execution by ID.
func (s *DB) GetExecution(id string) (*Execution, error) {
- row := s.db.QueryRow(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json FROM executions WHERE id = ?`, id)
+ row := s.db.QueryRow(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json, commits_json FROM executions WHERE id = ?`, id)
return scanExecution(row)
}
// ListExecutions returns executions for a task.
func (s *DB) ListExecutions(taskID string) ([]*Execution, error) {
- rows, err := s.db.Query(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json FROM executions WHERE task_id = ? ORDER BY start_time DESC`, taskID)
+ rows, err := s.db.Query(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json, commits_json FROM executions WHERE task_id = ? ORDER BY start_time DESC`, taskID)
if err != nil {
return nil, err
}
@@ -423,7 +433,7 @@ func (s *DB) ListExecutions(taskID string) ([]*Execution, error) {
// GetLatestExecution returns the most recent execution for a task.
func (s *DB) GetLatestExecution(taskID string) (*Execution, error) {
- row := s.db.QueryRow(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json FROM executions WHERE task_id = ? ORDER BY start_time DESC LIMIT 1`, taskID)
+ row := s.db.QueryRow(`SELECT id, task_id, start_time, end_time, exit_code, status, stdout_path, stderr_path, artifact_dir, cost_usd, error_msg, session_id, sandbox_dir, changestats_json, commits_json FROM executions WHERE task_id = ? ORDER BY start_time DESC LIMIT 1`, taskID)
return scanExecution(row)
}
@@ -588,12 +598,31 @@ func (s *DB) AppendTaskInteraction(taskID string, interaction task.Interaction)
// UpdateExecution updates a completed execution.
func (s *DB) UpdateExecution(e *Execution) error {
+ var changestatsJSON *string
+ if e.Changestats != nil {
+ b, err := json.Marshal(e.Changestats)
+ if err != nil {
+ return fmt.Errorf("marshaling changestats: %w", err)
+ }
+ s := string(b)
+ changestatsJSON = &s
+ }
+ commitsJSON := "[]"
+ if len(e.Commits) > 0 {
+ b, err := json.Marshal(e.Commits)
+ if err != nil {
+ return fmt.Errorf("marshaling commits: %w", err)
+ }
+ commitsJSON = string(b)
+ }
_, err := s.db.Exec(`
UPDATE executions SET end_time = ?, exit_code = ?, status = ?, cost_usd = ?, error_msg = ?,
- stdout_path = ?, stderr_path = ?, artifact_dir = ?, session_id = ?, sandbox_dir = ?
+ stdout_path = ?, stderr_path = ?, artifact_dir = ?, session_id = ?, sandbox_dir = ?,
+ changestats_json = ?, commits_json = ?
WHERE id = ?`,
e.EndTime.UTC(), e.ExitCode, e.Status, e.CostUSD, e.ErrorMsg,
- e.StdoutPath, e.StderrPath, e.ArtifactDir, e.SessionID, e.SandboxDir, e.ID,
+ e.StdoutPath, e.StderrPath, e.ArtifactDir, e.SessionID, e.SandboxDir,
+ changestatsJSON, commitsJSON, e.ID,
)
return err
}
@@ -660,8 +689,9 @@ func scanExecution(row scanner) (*Execution, error) {
var sessionID sql.NullString
var sandboxDir sql.NullString
var changestatsJSON sql.NullString
+ var commitsJSON sql.NullString
err := row.Scan(&e.ID, &e.TaskID, &e.StartTime, &e.EndTime, &e.ExitCode, &e.Status,
- &e.StdoutPath, &e.StderrPath, &e.ArtifactDir, &e.CostUSD, &e.ErrorMsg, &sessionID, &sandboxDir, &changestatsJSON)
+ &e.StdoutPath, &e.StderrPath, &e.ArtifactDir, &e.CostUSD, &e.ErrorMsg, &sessionID, &sandboxDir, &changestatsJSON, &commitsJSON)
if err != nil {
return nil, err
}
@@ -674,6 +704,13 @@ func scanExecution(row scanner) (*Execution, error) {
}
e.Changestats = &cs
}
+ if commitsJSON.Valid && commitsJSON.String != "" {
+ if err := json.Unmarshal([]byte(commitsJSON.String), &e.Commits); err != nil {
+ return nil, fmt.Errorf("unmarshaling commits: %w", err)
+ }
+ } else {
+ e.Commits = []task.GitCommit{}
+ }
return &e, nil
}
diff --git a/internal/task/task.go b/internal/task/task.go
index b85b07c..6a9d1db 100644
--- a/internal/task/task.go
+++ b/internal/task/task.go
@@ -48,6 +48,12 @@ type RetryConfig struct {
Backoff string `yaml:"backoff" json:"backoff"` // "linear", "exponential"
}
+// GitCommit represents a single git commit created during a task execution.
+type GitCommit struct {
+ Hash string `json:"hash"`
+ Message string `json:"message"`
+}
+
// Changestats records file/line change metrics from an agent execution.
type Changestats struct {
FilesChanged int `json:"files_changed"`
diff --git a/scripts/next-task b/scripts/next-task
index e74ca26..9df09f0 100755
--- a/scripts/next-task
+++ b/scripts/next-task
@@ -11,7 +11,7 @@
# Usage: next_id=$(scripts/next-task)
# Example: scripts/start-next-task
-DB_PATH="/site/doot.terst.org/data/claudomator.db"
+DB_PATH="${DB_PATH:-/site/doot.terst.org/data/claudomator.db}"
# 1. Fetch the most recently updated COMPLETED or READY task
target=$(sqlite3 "$DB_PATH" "SELECT id, state, parent_task_id FROM tasks WHERE state IN ('COMPLETED', 'READY') ORDER BY updated_at DESC LIMIT 1;")
@@ -32,7 +32,7 @@ fi
if [ -z "$next_task" ]; then
# 4. No child/sibling found: fall back to highest-priority oldest PENDING task
- next_task=$(sqlite3 "$DB_PATH" "SELECT id FROM tasks WHERE state = 'PENDING' AND id != '$id'
+ FALLBACK_SQL="SELECT id FROM tasks WHERE state IN ('PENDING', 'QUEUED') AND id != '$id'
ORDER BY
CASE priority
WHEN 'critical' THEN 4
@@ -42,7 +42,8 @@ if [ -z "$next_task" ]; then
ELSE 0
END DESC,
created_at ASC
- LIMIT 1;")
+ LIMIT 1;"
+ next_task=$(sqlite3 "$DB_PATH" "$FALLBACK_SQL")
fi
echo "$next_task"
diff --git a/test/next-task.test.sh b/test/next-task.test.sh
new file mode 100644
index 0000000..3304efa
--- /dev/null
+++ b/test/next-task.test.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# test/next-task.test.sh
+
+set -euo pipefail
+
+# Create a temporary database
+TEST_DB=$(mktemp)
+sqlite3 "$TEST_DB" <<EOF
+CREATE TABLE tasks (
+ id TEXT PRIMARY KEY,
+ state TEXT NOT NULL,
+ parent_task_id TEXT,
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+ priority TEXT DEFAULT 'normal'
+);
+EOF
+
+# Insert a COMPLETED task that will not have a PENDING child or sibling,
+# to ensure the fallback logic is triggered.
+sqlite3 "$TEST_DB" "INSERT INTO tasks (id, state, created_at) VALUES ('completed-no-children', 'COMPLETED', '2023-01-01 12:00:00');"
+
+# Inject a QUEUED task (should be picked by fallback)
+sqlite3 "$TEST_DB" "INSERT INTO tasks (id, state, priority, created_at) VALUES ('queued-task-id', 'QUEUED', 'high', '2023-01-01 10:00:00');"
+
+# Inject a PENDING task (lower priority, should not be picked first by fallback)
+sqlite3 "$TEST_DB" "INSERT INTO tasks (id, state, priority, created_at) VALUES ('pending-task-id', 'PENDING', 'normal', '2023-01-01 11:00:00');"
+
+# Run the next-task script with the temporary database path
+export DB_PATH="$TEST_DB" # Override DB_PATH for the test
+SCRIPT_DIR="$(dirname "$(dirname "$0")")/scripts"
+NEXT_TASK_ID=$("$SCRIPT_DIR/next-task")
+
+# Assert that the QUEUED task is returned
+if [[ "$NEXT_TASK_ID" == "queued-task-id" ]]; then
+ echo "Test passed: QUEUED task was selected by fallback."
+else
+ echo "Test failed: Expected 'queued-task-id', got '$NEXT_TASK_ID'"
+ exit 1
+fi
+
+# Clean up
+rm "$TEST_DB"
diff --git a/web/app.js b/web/app.js
index bca41fa..77a2d9d 100644
--- a/web/app.js
+++ b/web/app.js
@@ -74,6 +74,24 @@ function formatDate(iso) {
});
}
+// Returns formatted string for changestats, e.g. "5 files, +127 -43".
+// Returns empty string for null/undefined input.
+export function formatChangestats(stats) {
+ if (stats == null) return '';
+ return `${stats.files_changed} files, +${stats.lines_added} -${stats.lines_removed}`;
+}
+
+// Returns a <span class="changestats-badge"> element for the given stats,
+// or null if stats is null/undefined.
+// Accepts an optional doc parameter for testability (defaults to document).
+export function renderChangestatsBadge(stats, doc = (typeof document !== 'undefined' ? document : null)) {
+ if (stats == null || doc == null) return null;
+ const span = doc.createElement('span');
+ span.className = 'changestats-badge';
+ span.textContent = formatChangestats(stats);
+ return span;
+}
+
function createTaskCard(task) {
const card = document.createElement('div');
card.className = 'task-card';
@@ -118,6 +136,13 @@ function createTaskCard(task) {
card.appendChild(desc);
}
+ // Changestats badge for COMPLETED/READY tasks
+ const CHANGESTATS_STATES = new Set(['COMPLETED', 'READY']);
+ if (CHANGESTATS_STATES.has(task.state) && task.changestats != null) {
+ const csBadge = renderChangestatsBadge(task.changestats);
+ if (csBadge) card.appendChild(csBadge);
+ }
+
// Footer: action buttons based on state
// Interrupted states (CANCELLED, FAILED, BUDGET_EXCEEDED) show both Resume and Restart.
// TIMED_OUT shows Resume only. Others show a single action.
@@ -339,6 +364,46 @@ export function setTaskFilterTab(tab) {
localStorage.setItem('taskFilterTab', tab);
}
+// ── Tab badge counts ───────────────────────────────────────────────────────────
+
+/**
+ * Computes badge counts for the 'interrupted', 'ready', and 'running' tabs.
+ * Returns { interrupted: N, ready: N, running: N }.
+ */
+export function computeTabBadgeCounts(tasks) {
+ let interrupted = 0;
+ let ready = 0;
+ let running = 0;
+ for (const t of tasks) {
+ if (INTERRUPTED_STATES.has(t.state)) interrupted++;
+ if (t.state === 'READY') ready++;
+ if (t.state === 'RUNNING') running++;
+ }
+ return { interrupted, ready, running };
+}
+
+/**
+ * Updates the badge count spans inside the tab buttons for
+ * 'interrupted', 'ready', and 'running'.
+ * Badge is hidden (display:none) when count is zero.
+ */
+export function updateTabBadges(tasks, doc = (typeof document !== 'undefined' ? document : null)) {
+ if (!doc) return;
+ const counts = computeTabBadgeCounts(tasks);
+ for (const [tab, count] of Object.entries(counts)) {
+ const btn = doc.querySelector(`.tab[data-tab="${tab}"]`);
+ if (!btn) continue;
+ let badge = btn.querySelector('.tab-count-badge');
+ if (!badge) {
+ badge = doc.createElement('span');
+ badge.className = 'tab-count-badge';
+ btn.appendChild(badge);
+ }
+ badge.textContent = String(count);
+ badge.hidden = count === 0;
+ }
+}
+
// ── Stats computations ─────────────────────────────────────────────────────────
/**
@@ -961,6 +1026,8 @@ async function poll() {
const tasks = await fetchTasks();
if (isUserEditing()) return;
+ updateTabBadges(tasks);
+
const activeTab = getActiveTab();
switch (activeTab) {
case 'queue':
@@ -1648,6 +1715,33 @@ function renderTaskPanel(task, executions) {
exitEl.textContent = `exit: ${exec.ExitCode ?? '—'}`;
row.appendChild(exitEl);
+ if (exec.Changestats != null) {
+ const csBadge = renderChangestatsBadge(exec.Changestats);
+ if (csBadge) row.appendChild(csBadge);
+ }
+
+ if (exec.Commits && exec.Commits.length > 0) {
+ const commitList = document.createElement('div');
+ commitList.className = 'execution-commits';
+ for (const commit of exec.Commits) {
+ const item = document.createElement('div');
+ item.className = 'commit-item';
+
+ const hash = document.createElement('span');
+ hash.className = 'commit-hash';
+ hash.textContent = commit.hash.slice(0, 7);
+ item.appendChild(hash);
+
+ const msg = document.createElement('span');
+ msg.className = 'commit-msg';
+ msg.textContent = commit.message;
+ item.appendChild(msg);
+
+ commitList.appendChild(item);
+ }
+ row.appendChild(commitList);
+ }
+
const logsBtn = document.createElement('button');
logsBtn.className = 'btn-view-logs';
logsBtn.textContent = 'View Logs';
diff --git a/web/index.html b/web/index.html
index 19cba2c..59bc56e 100644
--- a/web/index.html
+++ b/web/index.html
@@ -23,9 +23,9 @@
</header>
<nav class="tab-bar">
<button class="tab active" data-tab="queue" title="Queue">⏳</button>
- <button class="tab" data-tab="interrupted" title="Interrupted">⚠️</button>
- <button class="tab" data-tab="ready" title="Ready">✅</button>
- <button class="tab" data-tab="running" title="Running">▶️</button>
+ <button class="tab" data-tab="interrupted" title="Interrupted">⚠️<span class="tab-count-badge" hidden></span></button>
+ <button class="tab" data-tab="ready" title="Ready">✅<span class="tab-count-badge" hidden></span></button>
+ <button class="tab" data-tab="running" title="Running">▶️<span class="tab-count-badge" hidden></span></button>
<button class="tab" data-tab="all" title="All">☰</button>
<button class="tab" data-tab="stats" title="Stats">📊</button>
<button class="tab" data-tab="settings" title="Settings">⚙️</button>
diff --git a/web/style.css b/web/style.css
index ee1b69c..e7d1de4 100644
--- a/web/style.css
+++ b/web/style.css
@@ -111,6 +111,25 @@ header h1 {
border-bottom-color: var(--accent);
}
+.tab-count-badge {
+ display: inline-block;
+ margin-left: 0.3em;
+ padding: 0 0.4em;
+ border-radius: 999px;
+ font-size: 0.7em;
+ font-weight: 700;
+ line-height: 1.5;
+ background: var(--accent);
+ color: #fff;
+ vertical-align: middle;
+ min-width: 1.4em;
+ text-align: center;
+}
+
+.tab-count-badge[hidden] {
+ display: none;
+}
+
/* Main layout */
main {
max-width: 640px;
@@ -774,6 +793,39 @@ dialog label select:focus {
flex-wrap: wrap;
}
+.execution-commits {
+ width: 100%;
+ margin-top: 0.25rem;
+ display: flex;
+ flex-direction: column;
+ gap: 0.25rem;
+ font-size: 0.8rem;
+ color: var(--text-muted);
+ border-top: 1px solid var(--border-light);
+ padding-top: 0.5rem;
+}
+
+.commit-item {
+ display: flex;
+ gap: 0.5rem;
+ align-items: baseline;
+}
+
+.commit-hash {
+ font-family: var(--font-mono);
+ color: var(--text);
+ background: var(--bg-hover);
+ padding: 0.125rem 0.25rem;
+ border-radius: 0.25rem;
+ font-size: 0.75rem;
+}
+
+.commit-msg {
+ white-space: nowrap;
+ overflow: hidden;
+ text-overflow: ellipsis;
+}
+
.execution-id {
font-family: monospace;
font-size: 0.72rem;
@@ -804,6 +856,13 @@ dialog label select:focus {
white-space: nowrap;
}
+.changestats-badge {
+ font-family: monospace;
+ font-size: 0.72rem;
+ color: var(--text-muted);
+ white-space: nowrap;
+}
+
.btn-view-logs {
font-size: 0.72rem;
font-weight: 600;
diff --git a/web/test/changestats.test.mjs b/web/test/changestats.test.mjs
new file mode 100644
index 0000000..5363812
--- /dev/null
+++ b/web/test/changestats.test.mjs
@@ -0,0 +1,125 @@
+// changestats.test.mjs — Unit tests for changestats display functions.
+//
+// Run with: node --test web/test/changestats.test.mjs
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+import { formatChangestats, renderChangestatsBadge } from '../app.js';
+
+// ── Mock DOM ───────────────────────────────────────────────────────────────────
+
+function makeDoc() {
+ return {
+ createElement(tag) {
+ const el = {
+ tag,
+ className: '',
+ textContent: '',
+ children: [],
+ appendChild(child) { this.children.push(child); return child; },
+ };
+ return el;
+ },
+ };
+}
+
+// ── formatChangestats ──────────────────────────────────────────────────────────
+
+describe('formatChangestats', () => {
+ it('formats valid stats as "N files, +A -R"', () => {
+ const result = formatChangestats({ files_changed: 5, lines_added: 127, lines_removed: 43 });
+ assert.equal(result, '5 files, +127 -43');
+ });
+
+ it('returns empty string for null', () => {
+ const result = formatChangestats(null);
+ assert.equal(result, '');
+ });
+
+ it('returns empty string for undefined', () => {
+ const result = formatChangestats(undefined);
+ assert.equal(result, '');
+ });
+
+ it('formats zero values correctly', () => {
+ const result = formatChangestats({ files_changed: 0, lines_added: 0, lines_removed: 0 });
+ assert.equal(result, '0 files, +0 -0');
+ });
+
+ it('formats single file correctly', () => {
+ const result = formatChangestats({ files_changed: 1, lines_added: 10, lines_removed: 2 });
+ assert.equal(result, '1 files, +10 -2');
+ });
+});
+
+// ── renderChangestatsBadge ─────────────────────────────────────────────────────
+
+describe('renderChangestatsBadge', () => {
+ it('returns element with class changestats-badge for valid stats', () => {
+ const doc = makeDoc();
+ const el = renderChangestatsBadge({ files_changed: 5, lines_added: 127, lines_removed: 43 }, doc);
+ assert.ok(el, 'element should not be null');
+ assert.equal(el.className, 'changestats-badge');
+ });
+
+ it('returns element with correct text content', () => {
+ const doc = makeDoc();
+ const el = renderChangestatsBadge({ files_changed: 5, lines_added: 127, lines_removed: 43 }, doc);
+ assert.equal(el.textContent, '5 files, +127 -43');
+ });
+
+ it('returns null for null stats', () => {
+ const doc = makeDoc();
+ const el = renderChangestatsBadge(null, doc);
+ assert.equal(el, null);
+ });
+
+ it('returns null for undefined stats', () => {
+ const doc = makeDoc();
+ const el = renderChangestatsBadge(undefined, doc);
+ assert.equal(el, null);
+ });
+});
+
+// ── State-based visibility ────────────────────────────────────────────────────
+//
+// Changestats badge should appear on COMPLETED (and READY) tasks that have
+// changestats data, and must not appear on QUEUED tasks.
+
+const CHANGESTATS_STATES = new Set(['COMPLETED', 'READY']);
+
+function shouldShowChangestats(task) {
+ return CHANGESTATS_STATES.has(task.state) && task.changestats != null;
+}
+
+describe('changestats badge visibility by task state', () => {
+ it('COMPLETED task with changestats shows badge', () => {
+ const task = { state: 'COMPLETED', changestats: { files_changed: 3, lines_added: 50, lines_removed: 10 } };
+ assert.equal(shouldShowChangestats(task), true);
+ });
+
+ it('READY task with changestats shows badge', () => {
+ const task = { state: 'READY', changestats: { files_changed: 1, lines_added: 5, lines_removed: 2 } };
+ assert.equal(shouldShowChangestats(task), true);
+ });
+
+ it('QUEUED task hides changestats', () => {
+ const task = { state: 'QUEUED', changestats: { files_changed: 3, lines_added: 50, lines_removed: 10 } };
+ assert.equal(shouldShowChangestats(task), false);
+ });
+
+ it('COMPLETED task without changestats hides badge', () => {
+ const task = { state: 'COMPLETED', changestats: null };
+ assert.equal(shouldShowChangestats(task), false);
+ });
+
+ it('RUNNING task hides changestats', () => {
+ const task = { state: 'RUNNING', changestats: null };
+ assert.equal(shouldShowChangestats(task), false);
+ });
+
+ it('PENDING task hides changestats', () => {
+ const task = { state: 'PENDING', changestats: null };
+ assert.equal(shouldShowChangestats(task), false);
+ });
+});
diff --git a/web/test/tab-badges.test.mjs b/web/test/tab-badges.test.mjs
new file mode 100644
index 0000000..c07338f
--- /dev/null
+++ b/web/test/tab-badges.test.mjs
@@ -0,0 +1,110 @@
+// tab-badges.test.mjs — TDD tests for computeTabBadgeCounts
+//
+// Tests the pure function that computes badge counts for the
+// 'interrupted', 'ready', and 'running' tabs.
+//
+// Run with: node --test web/test/tab-badges.test.mjs
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+
+// ── Inline implementation (will be replaced by import once exported) ───────────
+
+const INTERRUPTED_STATES = new Set(['CANCELLED', 'FAILED', 'BUDGET_EXCEEDED', 'BLOCKED']);
+
+function computeTabBadgeCounts(tasks) {
+ let interrupted = 0;
+ let ready = 0;
+ let running = 0;
+ for (const t of tasks) {
+ if (INTERRUPTED_STATES.has(t.state)) interrupted++;
+ if (t.state === 'READY') ready++;
+ if (t.state === 'RUNNING') running++;
+ }
+ return { interrupted, ready, running };
+}
+
+// ── Helpers ────────────────────────────────────────────────────────────────────
+
+function makeTask(state) {
+ return { id: state, name: `task-${state}`, state };
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────────
+
+describe('computeTabBadgeCounts', () => {
+ it('returns all zeros for empty task list', () => {
+ assert.deepEqual(computeTabBadgeCounts([]), { interrupted: 0, ready: 0, running: 0 });
+ });
+
+ it('counts RUNNING tasks', () => {
+ const tasks = [makeTask('RUNNING'), makeTask('RUNNING'), makeTask('QUEUED')];
+ const counts = computeTabBadgeCounts(tasks);
+ assert.equal(counts.running, 2);
+ assert.equal(counts.ready, 0);
+ assert.equal(counts.interrupted, 0);
+ });
+
+ it('counts READY tasks', () => {
+ const tasks = [makeTask('READY'), makeTask('READY'), makeTask('QUEUED')];
+ const counts = computeTabBadgeCounts(tasks);
+ assert.equal(counts.ready, 2);
+ assert.equal(counts.running, 0);
+ assert.equal(counts.interrupted, 0);
+ });
+
+ it('counts CANCELLED as interrupted', () => {
+ const counts = computeTabBadgeCounts([makeTask('CANCELLED')]);
+ assert.equal(counts.interrupted, 1);
+ });
+
+ it('counts FAILED as interrupted', () => {
+ const counts = computeTabBadgeCounts([makeTask('FAILED')]);
+ assert.equal(counts.interrupted, 1);
+ });
+
+ it('counts BUDGET_EXCEEDED as interrupted', () => {
+ const counts = computeTabBadgeCounts([makeTask('BUDGET_EXCEEDED')]);
+ assert.equal(counts.interrupted, 1);
+ });
+
+ it('counts BLOCKED as interrupted', () => {
+ const counts = computeTabBadgeCounts([makeTask('BLOCKED')]);
+ assert.equal(counts.interrupted, 1);
+ });
+
+ it('does not count COMPLETED as interrupted', () => {
+ const counts = computeTabBadgeCounts([makeTask('COMPLETED')]);
+ assert.equal(counts.interrupted, 0);
+ });
+
+ it('does not count TIMED_OUT as interrupted', () => {
+ const counts = computeTabBadgeCounts([makeTask('TIMED_OUT')]);
+ assert.equal(counts.interrupted, 0);
+ });
+
+ it('counts across multiple states simultaneously', () => {
+ const tasks = [
+ makeTask('RUNNING'),
+ makeTask('RUNNING'),
+ makeTask('READY'),
+ makeTask('CANCELLED'),
+ makeTask('FAILED'),
+ makeTask('BLOCKED'),
+ makeTask('QUEUED'),
+ makeTask('COMPLETED'),
+ ];
+ const counts = computeTabBadgeCounts(tasks);
+ assert.equal(counts.running, 2);
+ assert.equal(counts.ready, 1);
+ assert.equal(counts.interrupted, 3);
+ });
+
+ it('returns zero for a tab when no tasks match that state', () => {
+ const tasks = [makeTask('QUEUED'), makeTask('PENDING'), makeTask('COMPLETED')];
+ const counts = computeTabBadgeCounts(tasks);
+ assert.equal(counts.running, 0);
+ assert.equal(counts.ready, 0);
+ assert.equal(counts.interrupted, 0);
+ });
+});