diff options
Diffstat (limited to 'internal/api')
| -rw-r--r-- | internal/api/docs/RAW_NARRATIVE.md | 117 | ||||
| -rw-r--r-- | internal/api/elaborate.go | 70 | ||||
| -rw-r--r-- | internal/api/elaborate_test.go | 178 | ||||
| -rw-r--r-- | internal/api/server.go | 36 | ||||
| -rw-r--r-- | internal/api/server_test.go | 123 |
5 files changed, 515 insertions, 9 deletions
diff --git a/internal/api/docs/RAW_NARRATIVE.md b/internal/api/docs/RAW_NARRATIVE.md index 8fe69b6..3c7768a 100644 --- a/internal/api/docs/RAW_NARRATIVE.md +++ b/internal/api/docs/RAW_NARRATIVE.md @@ -1,4 +1,121 @@ +--- 2026-03-10T09:33:34Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-10T09:33:34Z --- +do something + +--- 2026-03-10T09:33:34Z --- +do something + +--- 2026-03-10T16:46:39Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-10T16:46:39Z --- +do something + +--- 2026-03-10T16:46:39Z --- +do something + +--- 2026-03-10T17:16:31Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-10T17:16:31Z --- +do something + +--- 2026-03-10T17:16:31Z --- +do something + +--- 2026-03-10T17:25:16Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-10T17:25:16Z --- +do something + +--- 2026-03-10T17:25:16Z --- +do something + +--- 2026-03-10T23:54:53Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-10T23:54:53Z --- +do something + +--- 2026-03-10T23:54:53Z --- +do something + +--- 2026-03-10T23:55:54Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-10T23:55:54Z --- +do something + +--- 2026-03-10T23:55:54Z --- +do something + +--- 2026-03-10T23:56:06Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-10T23:56:06Z --- +do something + +--- 2026-03-10T23:56:06Z --- +do something + +--- 2026-03-10T23:57:26Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-10T23:57:26Z --- +do something + +--- 2026-03-10T23:57:26Z --- +do something + +--- 2026-03-11T07:40:17Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-11T07:40:17Z --- +do something + +--- 2026-03-11T07:40:17Z --- +do something + +--- 2026-03-11T08:25:03Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-11T08:25:04Z --- +do something + +--- 2026-03-11T08:25:04Z --- +do something + +--- 2026-03-12T21:00:28Z --- +generate a report + +--- 2026-03-12T21:00:33Z --- +generate a report + +--- 2026-03-12T21:00:34Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-12T21:00:34Z --- +do something + +--- 2026-03-12T21:00:34Z --- +do something + +--- 2026-03-13T02:27:38Z --- +generate a report + +--- 2026-03-13T02:27:38Z --- +run the Go test suite with race detector and fail if coverage < 80% + +--- 2026-03-13T02:27:38Z --- +do something + +--- 2026-03-13T02:27:38Z --- +do something + --- 2026-03-11T19:04:51Z --- run the Go test suite with race detector and fail if coverage < 80% diff --git a/internal/api/elaborate.go b/internal/api/elaborate.go index eb686bf..c6d08f4 100644 --- a/internal/api/elaborate.go +++ b/internal/api/elaborate.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + "sort" "strings" "time" ) @@ -32,10 +33,10 @@ Output ONLY a valid JSON object matching this schema (no markdown fences, no pro "agent": { "type": "claude" | "gemini", "model": string — "sonnet" for claude, "gemini-2.5-flash-lite" for gemini, - "instructions": string — detailed, step-by-step instructions for the agent, + "instructions": string — detailed, step-by-step instructions for the agent. Must end with a "## Acceptance Criteria" section listing measurable conditions that define success. For coding tasks, include TDD requirements (write failing tests first, then implement), ` + workDirLine + ` "max_budget_usd": number — conservative estimate (0.25–5.00), - "allowed_tools": array — only tools the task genuinely needs + "allowed_tools": array — every tool the task genuinely needs. Include "Write" if creating files, "Edit" if modifying files, "Read" if reading files, "Bash" for shell/git/test commands, "Grep"/"Glob" for searching. }, "timeout": string — e.g. "15m", "priority": string — "normal" | "high" | "low", @@ -62,6 +63,69 @@ type elaboratedAgent struct { AllowedTools []string `json:"allowed_tools"` } +// sanitizeElaboratedTask enforces tool completeness and dev practice compliance. +// It modifies t in place, inferring missing tools from instruction keywords and +// appending required sections when they are absent. +func sanitizeElaboratedTask(t *elaboratedTask) { + lower := strings.ToLower(t.Agent.Instructions) + + // Build current tool set. + toolSet := make(map[string]bool, len(t.Agent.AllowedTools)) + for _, tool := range t.Agent.AllowedTools { + toolSet[tool] = true + } + + // Infer missing tools from instruction keywords. + type rule struct { + tool string + keywords []string + } + rules := []rule{ + {"Write", []string{"create file", "write file", "new file", "write to", "save to", "output to", "generate file", "creates a file", "create a new file"}}, + {"Edit", []string{"edit", "modify", "refactor", "replace", "patch"}}, + {"Read", []string{"read", "inspect", "examine", "look at the file"}}, + {"Bash", []string{"run", "execute", "bash", "shell", "command", "build", "compile", "git", "install", "make"}}, + {"Grep", []string{"search for", "grep", "find in", "locate in"}}, + {"Glob", []string{"find file", "list file", "search file"}}, + } + for _, r := range rules { + if toolSet[r.tool] { + continue + } + for _, kw := range r.keywords { + if strings.Contains(lower, kw) { + toolSet[r.tool] = true + break + } + } + } + // Edit without Read is almost always wrong. + if toolSet["Edit"] && !toolSet["Read"] { + toolSet["Read"] = true + } + // Rebuild the list only when tools were added. + if len(toolSet) > len(t.Agent.AllowedTools) { + tools := make([]string, 0, len(toolSet)) + for tool := range toolSet { + tools = append(tools, tool) + } + sort.Strings(tools) + t.Agent.AllowedTools = tools + } + + // Append an acceptance criteria section when none is present. + if !strings.Contains(lower, "acceptance") && + !strings.Contains(lower, "done when") && + !strings.Contains(lower, "success criteria") { + t.Agent.Instructions += "\n\n## Acceptance Criteria\nBefore finishing, verify all stated goals are met, tests pass (if applicable), and no unintended side effects were introduced." + } + + // Append a TDD reminder for coding tasks that do not already mention tests. + if (toolSet["Edit"] || toolSet["Write"]) && !strings.Contains(lower, "test") { + t.Agent.Instructions += "\n\n## Dev Practices\nFollow TDD: write a failing test first, then implement the minimum code to make it pass. Commit all changes before finishing." + } +} + // claudeJSONResult is the top-level object returned by `claude --output-format json`. type claudeJSONResult struct { Result string `json:"result"` @@ -214,5 +278,7 @@ func (s *Server) handleElaborateTask(w http.ResponseWriter, r *http.Request) { result.Agent.Type = "claude" } + sanitizeElaboratedTask(&result) + writeJSON(w, http.StatusOK, result) } diff --git a/internal/api/elaborate_test.go b/internal/api/elaborate_test.go index 330c111..9ae2e98 100644 --- a/internal/api/elaborate_test.go +++ b/internal/api/elaborate_test.go @@ -30,6 +30,184 @@ func createFakeClaude(t *testing.T, output string, exitCode int) string { return script } +// hasTool is a test helper that reports whether name is in the tools slice. +func hasTool(tools []string, name string) bool { + for _, t := range tools { + if t == name { + return true + } + } + return false +} + +// --- sanitizeElaboratedTask unit tests --- + +func TestSanitize_AddsWriteWhenInstructionsMentionFileCreation(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "Create a new file called output.txt with the results.", + AllowedTools: []string{"Bash"}, + }, + } + sanitizeElaboratedTask(task) + if !hasTool(task.Agent.AllowedTools, "Write") { + t.Errorf("expected Write in allowed_tools, got %v", task.Agent.AllowedTools) + } +} + +func TestSanitize_AddsReadWhenEditIsPresent(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "Modify the configuration file.", + AllowedTools: []string{"Edit"}, + }, + } + sanitizeElaboratedTask(task) + if !hasTool(task.Agent.AllowedTools, "Read") { + t.Errorf("expected Read added alongside Edit, got %v", task.Agent.AllowedTools) + } +} + +func TestSanitize_NoDuplicateTools(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "Run go test ./...", + AllowedTools: []string{"Bash"}, + }, + } + sanitizeElaboratedTask(task) + count := 0 + for _, tool := range task.Agent.AllowedTools { + if tool == "Bash" { + count++ + } + } + if count != 1 { + t.Errorf("Bash duplicated in allowed_tools: %v", task.Agent.AllowedTools) + } +} + +func TestSanitize_AddsAcceptanceCriteriaWhenMissing(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "Do something useful with the codebase.", + AllowedTools: []string{"Bash"}, + }, + } + sanitizeElaboratedTask(task) + lower := strings.ToLower(task.Agent.Instructions) + if !strings.Contains(lower, "acceptance") && !strings.Contains(lower, "done when") { + t.Error("expected acceptance criteria section appended to instructions") + } +} + +func TestSanitize_NoopWhenAcceptanceCriteriaAlreadyPresent(t *testing.T) { + original := "Do something.\n\n## Acceptance Criteria\n- All tests pass." + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: original, + AllowedTools: []string{"Bash"}, + }, + } + sanitizeElaboratedTask(task) + if task.Agent.Instructions != original { + t.Errorf("instructions were modified when acceptance criteria were already present") + } +} + +func TestSanitize_AddsTDDReminderForCodingTaskWithoutTestMention(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "## Acceptance Criteria\nFix the bug.\n\nModify the handler to return 404 instead of 500.", + AllowedTools: []string{"Edit", "Read"}, + }, + } + sanitizeElaboratedTask(task) + lower := strings.ToLower(task.Agent.Instructions) + if !strings.Contains(lower, "tdd") && !strings.Contains(lower, "test") { + t.Error("expected TDD reminder for coding task without test mention") + } +} + +func TestSanitize_NoTDDReminderWhenTestsAlreadyMentioned(t *testing.T) { + original := "## Acceptance Criteria\nAll tests pass.\n\nEdit the file and run go test ./... to verify." + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: original, + AllowedTools: []string{"Edit", "Read", "Bash"}, + }, + } + before := task.Agent.Instructions + sanitizeElaboratedTask(task) + // Should NOT add a second TDD block since tests are already mentioned. + // Count occurrences of "tdd" / "test" — just verify no double-append. + if strings.Count(strings.ToLower(task.Agent.Instructions), "tdd") > 1 { + t.Errorf("TDD block added twice; instructions:\n%s", task.Agent.Instructions) + } + _ = before +} + +func TestElaboratePrompt_RequiresAcceptanceCriteria(t *testing.T) { + prompt := buildElaboratePrompt("") + lower := strings.ToLower(prompt) + if !strings.Contains(lower, "acceptance criteria") { + t.Error("elaborate prompt should instruct the model to include acceptance criteria") + } +} + +func TestElaboratePrompt_RequiresAllRelevantTools(t *testing.T) { + prompt := buildElaboratePrompt("") + // Prompt must remind the model to include file-creating tools when needed. + if !strings.Contains(prompt, "Write") { + t.Error("elaborate prompt should mention the Write tool so models know to include it") + } +} + +func TestElaborateTask_SanitizationAppliedToResponse(t *testing.T) { + srv, _ := testServer(t) + + // Elaborator returns a task that needs Write (instructions say "create file") + // but does NOT include it in allowed_tools. + task := elaboratedTask{ + Name: "Generate report", + Description: "Creates a report file.", + Agent: elaboratedAgent{ + Type: "claude", + Model: "sonnet", + Instructions: "Create a new file called report.md with the analysis results.\n\n## Acceptance Criteria\n- report.md exists.", + MaxBudgetUSD: 0.5, + AllowedTools: []string{"Bash"}, // Write intentionally missing + }, + Timeout: "15m", + Priority: "normal", + Tags: []string{"report"}, + } + taskJSON, _ := json.Marshal(task) + wrapper := map[string]string{"result": string(taskJSON)} + wrapperJSON, _ := json.Marshal(wrapper) + + srv.elaborateCmdPath = createFakeClaude(t, string(wrapperJSON), 0) + + body := `{"prompt":"generate a report"}` + req := httptest.NewRequest("POST", "/api/tasks/elaborate", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("status: want 200, got %d; body: %s", w.Code, w.Body.String()) + } + + var result elaboratedTask + if err := json.NewDecoder(w.Body).Decode(&result); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if !hasTool(result.Agent.AllowedTools, "Write") { + t.Errorf("expected Write in sanitized allowed_tools, got %v", result.Agent.AllowedTools) + } +} + func TestElaboratePrompt_ContainsWorkDir(t *testing.T) { prompt := buildElaboratePrompt("/some/custom/path") if !strings.Contains(prompt, "/some/custom/path") { diff --git a/internal/api/server.go b/internal/api/server.go index c545253..df35536 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -24,6 +24,7 @@ type questionStore interface { GetLatestExecution(taskID string) (*storage.Execution, error) UpdateTaskQuestion(taskID, questionJSON string) error UpdateTaskState(id string, newState task.State) error + AppendTaskInteraction(taskID string, interaction task.Interaction) error } // Server provides the REST API and WebSocket endpoint for Claudomator. @@ -250,6 +251,25 @@ func (s *Server) handleAnswerQuestion(w http.ResponseWriter, r *http.Request) { return } + // Record the Q&A interaction before clearing the question. + if tk.QuestionJSON != "" { + var qData struct { + Text string `json:"text"` + Options []string `json:"options"` + } + if jsonErr := json.Unmarshal([]byte(tk.QuestionJSON), &qData); jsonErr == nil { + interaction := task.Interaction{ + QuestionText: qData.Text, + Options: qData.Options, + Answer: input.Answer, + AskedAt: tk.UpdatedAt, + } + if appendErr := s.questionStore.AppendTaskInteraction(taskID, interaction); appendErr != nil { + s.logger.Error("failed to append interaction", "taskID", taskID, "error", appendErr) + } + } + } + // Clear the question and transition to QUEUED. if err := s.questionStore.UpdateTaskQuestion(taskID, ""); err != nil { writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to clear question"}) @@ -277,6 +297,14 @@ func (s *Server) handleAnswerQuestion(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, map[string]string{"message": "task queued for resume", "task_id": taskID}) } +// resumableStates are the task states from which a session-based resume is valid. +var resumableStates = map[task.State]string{ + task.StateTimedOut: "Your previous execution timed out. Please continue where you left off and complete the task.", + task.StateCancelled: "Your previous execution was cancelled. Please continue where you left off and complete the task.", + task.StateFailed: "Your previous execution failed. Please review what happened and continue from where you left off.", + task.StateBudgetExceeded: "Your previous execution exceeded its budget. Please continue where you left off and complete the task.", +} + func (s *Server) handleResumeTimedOutTask(w http.ResponseWriter, r *http.Request) { taskID := r.PathValue("id") @@ -285,8 +313,10 @@ func (s *Server) handleResumeTimedOutTask(w http.ResponseWriter, r *http.Request writeJSON(w, http.StatusNotFound, map[string]string{"error": "task not found"}) return } - if tk.State != task.StateTimedOut { - writeJSON(w, http.StatusConflict, map[string]string{"error": "task is not timed out"}) + + resumeMsg, resumable := resumableStates[tk.State] + if !resumable { + writeJSON(w, http.StatusConflict, map[string]string{"error": "task is not in a resumable state"}) return } @@ -302,7 +332,7 @@ func (s *Server) handleResumeTimedOutTask(w http.ResponseWriter, r *http.Request ID: uuid.New().String(), TaskID: taskID, ResumeSessionID: latest.SessionID, - ResumeAnswer: "Your previous execution timed out. Please continue where you left off and complete the task.", + ResumeAnswer: resumeMsg, } if err := s.pool.SubmitResume(context.Background(), tk, resumeExec); err != nil { writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": err.Error()}) diff --git a/internal/api/server_test.go b/internal/api/server_test.go index afdc9d2..c90e3b3 100644 --- a/internal/api/server_test.go +++ b/internal/api/server_test.go @@ -894,10 +894,11 @@ func TestServer_CancelTask_Completed_Returns409(t *testing.T) { // mockQuestionStore implements questionStore for testing handleAnswerQuestion. type mockQuestionStore struct { - getTaskFn func(id string) (*task.Task, error) - getLatestExecutionFn func(taskID string) (*storage.Execution, error) - updateTaskQuestionFn func(taskID, questionJSON string) error - updateTaskStateFn func(id string, newState task.State) error + getTaskFn func(id string) (*task.Task, error) + getLatestExecutionFn func(taskID string) (*storage.Execution, error) + updateTaskQuestionFn func(taskID, questionJSON string) error + updateTaskStateFn func(id string, newState task.State) error + appendInteractionFn func(taskID string, interaction task.Interaction) error } func (m *mockQuestionStore) GetTask(id string) (*task.Task, error) { @@ -912,6 +913,12 @@ func (m *mockQuestionStore) UpdateTaskQuestion(taskID, questionJSON string) erro func (m *mockQuestionStore) UpdateTaskState(id string, newState task.State) error { return m.updateTaskStateFn(id, newState) } +func (m *mockQuestionStore) AppendTaskInteraction(taskID string, interaction task.Interaction) error { + if m.appendInteractionFn != nil { + return m.appendInteractionFn(taskID, interaction) + } + return nil +} func TestServer_AnswerQuestion_UpdateQuestionFails_Returns500(t *testing.T) { srv, _ := testServer(t) @@ -1178,6 +1185,114 @@ func TestResumeTimedOut_ResponseShape(t *testing.T) { } } +func TestResumeInterrupted_Cancelled_Success_Returns202(t *testing.T) { + srv, store := testServer(t) + createTaskWithState(t, store, "resume-cancelled-1", task.StateCancelled) + + exec := &storage.Execution{ + ID: "exec-cancelled-1", + TaskID: "resume-cancelled-1", + SessionID: "550e8400-e29b-41d4-a716-446655440030", + Status: "CANCELLED", + } + if err := store.CreateExecution(exec); err != nil { + t.Fatalf("create execution: %v", err) + } + + req := httptest.NewRequest("POST", "/api/tasks/resume-cancelled-1/resume", nil) + w := httptest.NewRecorder() + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusAccepted { + t.Errorf("status: want 202, got %d; body: %s", w.Code, w.Body.String()) + } + got, _ := store.GetTask("resume-cancelled-1") + if got.State != task.StateQueued && got.State != task.StateRunning && got.State != task.StateReady { + t.Errorf("task state: want QUEUED/RUNNING/READY after resume, got %v", got.State) + } +} + +func TestResumeInterrupted_Failed_Success_Returns202(t *testing.T) { + srv, store := testServer(t) + createTaskWithState(t, store, "resume-failed-1", task.StateFailed) + + exec := &storage.Execution{ + ID: "exec-failed-1", + TaskID: "resume-failed-1", + SessionID: "550e8400-e29b-41d4-a716-446655440031", + Status: "FAILED", + } + if err := store.CreateExecution(exec); err != nil { + t.Fatalf("create execution: %v", err) + } + + req := httptest.NewRequest("POST", "/api/tasks/resume-failed-1/resume", nil) + w := httptest.NewRecorder() + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusAccepted { + t.Errorf("status: want 202, got %d; body: %s", w.Code, w.Body.String()) + } + got, _ := store.GetTask("resume-failed-1") + if got.State != task.StateQueued && got.State != task.StateRunning && got.State != task.StateReady { + t.Errorf("task state: want QUEUED/RUNNING/READY after resume, got %v", got.State) + } +} + +func TestResumeInterrupted_BudgetExceeded_Success_Returns202(t *testing.T) { + srv, store := testServer(t) + createTaskWithState(t, store, "resume-budget-1", task.StateBudgetExceeded) + + exec := &storage.Execution{ + ID: "exec-budget-1", + TaskID: "resume-budget-1", + SessionID: "550e8400-e29b-41d4-a716-446655440032", + Status: "BUDGET_EXCEEDED", + } + if err := store.CreateExecution(exec); err != nil { + t.Fatalf("create execution: %v", err) + } + + req := httptest.NewRequest("POST", "/api/tasks/resume-budget-1/resume", nil) + w := httptest.NewRecorder() + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusAccepted { + t.Errorf("status: want 202, got %d; body: %s", w.Code, w.Body.String()) + } + got, _ := store.GetTask("resume-budget-1") + if got.State != task.StateQueued && got.State != task.StateRunning && got.State != task.StateReady { + t.Errorf("task state: want QUEUED/RUNNING/READY after resume, got %v", got.State) + } +} + +func TestResumeInterrupted_NoSession_Returns500(t *testing.T) { + srv, store := testServer(t) + createTaskWithState(t, store, "resume-nosess-1", task.StateCancelled) + + // No execution — no session ID available. + req := httptest.NewRequest("POST", "/api/tasks/resume-nosess-1/resume", nil) + w := httptest.NewRecorder() + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusInternalServerError { + t.Errorf("status: want 500, got %d; body: %s", w.Code, w.Body.String()) + } +} + +func TestResumeInterrupted_WrongState_Returns409(t *testing.T) { + srv, store := testServer(t) + createTaskWithState(t, store, "resume-wrong-1", task.StatePending) + + req := httptest.NewRequest("POST", "/api/tasks/resume-wrong-1/resume", nil) + w := httptest.NewRecorder() + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusConflict { + t.Errorf("status: want 409, got %d; body: %s", w.Code, w.Body.String()) + } +} + func TestRateLimit_ValidateRejectsExcess(t *testing.T) { srv, _ := testServer(t) srv.elaborateLimiter = newIPRateLimiter(0, 1) |
