From b9aba3d242482fa9cd42f2a49b2767a73d4d2213 Mon Sep 17 00:00:00 2001 From: Peter Stone Date: Fri, 13 Mar 2026 03:14:53 +0000 Subject: feat: post-elaboration sanity check for tools, acceptance criteria, and dev practices Add sanitizeElaboratedTask() called after every elaboration response: - Infers missing allowed_tools from instruction keywords (Write/Edit/Read/Bash/Grep/Glob) - Auto-adds Read when Edit is present - Appends Acceptance Criteria section if none present - Appends TDD reminder for coding tasks without test mention Also tighten buildElaboratePrompt to require acceptance criteria and list concrete tool examples, reducing how often the model omits tools. Fixes class of failures where agents couldn't create files because the elaborator omitted Write from allowed_tools. Co-Authored-By: Claude Sonnet 4.6 --- internal/api/elaborate_test.go | 178 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) (limited to 'internal/api/elaborate_test.go') diff --git a/internal/api/elaborate_test.go b/internal/api/elaborate_test.go index 330c111..9ae2e98 100644 --- a/internal/api/elaborate_test.go +++ b/internal/api/elaborate_test.go @@ -30,6 +30,184 @@ func createFakeClaude(t *testing.T, output string, exitCode int) string { return script } +// hasTool is a test helper that reports whether name is in the tools slice. +func hasTool(tools []string, name string) bool { + for _, t := range tools { + if t == name { + return true + } + } + return false +} + +// --- sanitizeElaboratedTask unit tests --- + +func TestSanitize_AddsWriteWhenInstructionsMentionFileCreation(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "Create a new file called output.txt with the results.", + AllowedTools: []string{"Bash"}, + }, + } + sanitizeElaboratedTask(task) + if !hasTool(task.Agent.AllowedTools, "Write") { + t.Errorf("expected Write in allowed_tools, got %v", task.Agent.AllowedTools) + } +} + +func TestSanitize_AddsReadWhenEditIsPresent(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "Modify the configuration file.", + AllowedTools: []string{"Edit"}, + }, + } + sanitizeElaboratedTask(task) + if !hasTool(task.Agent.AllowedTools, "Read") { + t.Errorf("expected Read added alongside Edit, got %v", task.Agent.AllowedTools) + } +} + +func TestSanitize_NoDuplicateTools(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "Run go test ./...", + AllowedTools: []string{"Bash"}, + }, + } + sanitizeElaboratedTask(task) + count := 0 + for _, tool := range task.Agent.AllowedTools { + if tool == "Bash" { + count++ + } + } + if count != 1 { + t.Errorf("Bash duplicated in allowed_tools: %v", task.Agent.AllowedTools) + } +} + +func TestSanitize_AddsAcceptanceCriteriaWhenMissing(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "Do something useful with the codebase.", + AllowedTools: []string{"Bash"}, + }, + } + sanitizeElaboratedTask(task) + lower := strings.ToLower(task.Agent.Instructions) + if !strings.Contains(lower, "acceptance") && !strings.Contains(lower, "done when") { + t.Error("expected acceptance criteria section appended to instructions") + } +} + +func TestSanitize_NoopWhenAcceptanceCriteriaAlreadyPresent(t *testing.T) { + original := "Do something.\n\n## Acceptance Criteria\n- All tests pass." + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: original, + AllowedTools: []string{"Bash"}, + }, + } + sanitizeElaboratedTask(task) + if task.Agent.Instructions != original { + t.Errorf("instructions were modified when acceptance criteria were already present") + } +} + +func TestSanitize_AddsTDDReminderForCodingTaskWithoutTestMention(t *testing.T) { + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: "## Acceptance Criteria\nFix the bug.\n\nModify the handler to return 404 instead of 500.", + AllowedTools: []string{"Edit", "Read"}, + }, + } + sanitizeElaboratedTask(task) + lower := strings.ToLower(task.Agent.Instructions) + if !strings.Contains(lower, "tdd") && !strings.Contains(lower, "test") { + t.Error("expected TDD reminder for coding task without test mention") + } +} + +func TestSanitize_NoTDDReminderWhenTestsAlreadyMentioned(t *testing.T) { + original := "## Acceptance Criteria\nAll tests pass.\n\nEdit the file and run go test ./... to verify." + task := &elaboratedTask{ + Agent: elaboratedAgent{ + Instructions: original, + AllowedTools: []string{"Edit", "Read", "Bash"}, + }, + } + before := task.Agent.Instructions + sanitizeElaboratedTask(task) + // Should NOT add a second TDD block since tests are already mentioned. + // Count occurrences of "tdd" / "test" — just verify no double-append. + if strings.Count(strings.ToLower(task.Agent.Instructions), "tdd") > 1 { + t.Errorf("TDD block added twice; instructions:\n%s", task.Agent.Instructions) + } + _ = before +} + +func TestElaboratePrompt_RequiresAcceptanceCriteria(t *testing.T) { + prompt := buildElaboratePrompt("") + lower := strings.ToLower(prompt) + if !strings.Contains(lower, "acceptance criteria") { + t.Error("elaborate prompt should instruct the model to include acceptance criteria") + } +} + +func TestElaboratePrompt_RequiresAllRelevantTools(t *testing.T) { + prompt := buildElaboratePrompt("") + // Prompt must remind the model to include file-creating tools when needed. + if !strings.Contains(prompt, "Write") { + t.Error("elaborate prompt should mention the Write tool so models know to include it") + } +} + +func TestElaborateTask_SanitizationAppliedToResponse(t *testing.T) { + srv, _ := testServer(t) + + // Elaborator returns a task that needs Write (instructions say "create file") + // but does NOT include it in allowed_tools. + task := elaboratedTask{ + Name: "Generate report", + Description: "Creates a report file.", + Agent: elaboratedAgent{ + Type: "claude", + Model: "sonnet", + Instructions: "Create a new file called report.md with the analysis results.\n\n## Acceptance Criteria\n- report.md exists.", + MaxBudgetUSD: 0.5, + AllowedTools: []string{"Bash"}, // Write intentionally missing + }, + Timeout: "15m", + Priority: "normal", + Tags: []string{"report"}, + } + taskJSON, _ := json.Marshal(task) + wrapper := map[string]string{"result": string(taskJSON)} + wrapperJSON, _ := json.Marshal(wrapper) + + srv.elaborateCmdPath = createFakeClaude(t, string(wrapperJSON), 0) + + body := `{"prompt":"generate a report"}` + req := httptest.NewRequest("POST", "/api/tasks/elaborate", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("status: want 200, got %d; body: %s", w.Code, w.Body.String()) + } + + var result elaboratedTask + if err := json.NewDecoder(w.Body).Decode(&result); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if !hasTool(result.Agent.AllowedTools, "Write") { + t.Errorf("expected Write in sanitized allowed_tools, got %v", result.Agent.AllowedTools) + } +} + func TestElaboratePrompt_ContainsWorkDir(t *testing.T) { prompt := buildElaboratePrompt("/some/custom/path") if !strings.Contains(prompt, "/some/custom/path") { -- cgit v1.2.3