summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
Diffstat (limited to 'internal')
-rw-r--r--internal/api/elaborate.go70
-rw-r--r--internal/api/elaborate_test.go178
2 files changed, 246 insertions, 2 deletions
diff --git a/internal/api/elaborate.go b/internal/api/elaborate.go
index eb686bf..c6d08f4 100644
--- a/internal/api/elaborate.go
+++ b/internal/api/elaborate.go
@@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "sort"
"strings"
"time"
)
@@ -32,10 +33,10 @@ Output ONLY a valid JSON object matching this schema (no markdown fences, no pro
"agent": {
"type": "claude" | "gemini",
"model": string — "sonnet" for claude, "gemini-2.5-flash-lite" for gemini,
- "instructions": string — detailed, step-by-step instructions for the agent,
+ "instructions": string — detailed, step-by-step instructions for the agent. Must end with a "## Acceptance Criteria" section listing measurable conditions that define success. For coding tasks, include TDD requirements (write failing tests first, then implement),
` + workDirLine + `
"max_budget_usd": number — conservative estimate (0.25–5.00),
- "allowed_tools": array — only tools the task genuinely needs
+ "allowed_tools": array — every tool the task genuinely needs. Include "Write" if creating files, "Edit" if modifying files, "Read" if reading files, "Bash" for shell/git/test commands, "Grep"/"Glob" for searching.
},
"timeout": string — e.g. "15m",
"priority": string — "normal" | "high" | "low",
@@ -62,6 +63,69 @@ type elaboratedAgent struct {
AllowedTools []string `json:"allowed_tools"`
}
+// sanitizeElaboratedTask enforces tool completeness and dev practice compliance.
+// It modifies t in place, inferring missing tools from instruction keywords and
+// appending required sections when they are absent.
+func sanitizeElaboratedTask(t *elaboratedTask) {
+ lower := strings.ToLower(t.Agent.Instructions)
+
+ // Build current tool set.
+ toolSet := make(map[string]bool, len(t.Agent.AllowedTools))
+ for _, tool := range t.Agent.AllowedTools {
+ toolSet[tool] = true
+ }
+
+ // Infer missing tools from instruction keywords.
+ type rule struct {
+ tool string
+ keywords []string
+ }
+ rules := []rule{
+ {"Write", []string{"create file", "write file", "new file", "write to", "save to", "output to", "generate file", "creates a file", "create a new file"}},
+ {"Edit", []string{"edit", "modify", "refactor", "replace", "patch"}},
+ {"Read", []string{"read", "inspect", "examine", "look at the file"}},
+ {"Bash", []string{"run", "execute", "bash", "shell", "command", "build", "compile", "git", "install", "make"}},
+ {"Grep", []string{"search for", "grep", "find in", "locate in"}},
+ {"Glob", []string{"find file", "list file", "search file"}},
+ }
+ for _, r := range rules {
+ if toolSet[r.tool] {
+ continue
+ }
+ for _, kw := range r.keywords {
+ if strings.Contains(lower, kw) {
+ toolSet[r.tool] = true
+ break
+ }
+ }
+ }
+ // Edit without Read is almost always wrong.
+ if toolSet["Edit"] && !toolSet["Read"] {
+ toolSet["Read"] = true
+ }
+ // Rebuild the list only when tools were added.
+ if len(toolSet) > len(t.Agent.AllowedTools) {
+ tools := make([]string, 0, len(toolSet))
+ for tool := range toolSet {
+ tools = append(tools, tool)
+ }
+ sort.Strings(tools)
+ t.Agent.AllowedTools = tools
+ }
+
+ // Append an acceptance criteria section when none is present.
+ if !strings.Contains(lower, "acceptance") &&
+ !strings.Contains(lower, "done when") &&
+ !strings.Contains(lower, "success criteria") {
+ t.Agent.Instructions += "\n\n## Acceptance Criteria\nBefore finishing, verify all stated goals are met, tests pass (if applicable), and no unintended side effects were introduced."
+ }
+
+ // Append a TDD reminder for coding tasks that do not already mention tests.
+ if (toolSet["Edit"] || toolSet["Write"]) && !strings.Contains(lower, "test") {
+ t.Agent.Instructions += "\n\n## Dev Practices\nFollow TDD: write a failing test first, then implement the minimum code to make it pass. Commit all changes before finishing."
+ }
+}
+
// claudeJSONResult is the top-level object returned by `claude --output-format json`.
type claudeJSONResult struct {
Result string `json:"result"`
@@ -214,5 +278,7 @@ func (s *Server) handleElaborateTask(w http.ResponseWriter, r *http.Request) {
result.Agent.Type = "claude"
}
+ sanitizeElaboratedTask(&result)
+
writeJSON(w, http.StatusOK, result)
}
diff --git a/internal/api/elaborate_test.go b/internal/api/elaborate_test.go
index 330c111..9ae2e98 100644
--- a/internal/api/elaborate_test.go
+++ b/internal/api/elaborate_test.go
@@ -30,6 +30,184 @@ func createFakeClaude(t *testing.T, output string, exitCode int) string {
return script
}
+// hasTool is a test helper that reports whether name is in the tools slice.
+func hasTool(tools []string, name string) bool {
+ for _, t := range tools {
+ if t == name {
+ return true
+ }
+ }
+ return false
+}
+
+// --- sanitizeElaboratedTask unit tests ---
+
+func TestSanitize_AddsWriteWhenInstructionsMentionFileCreation(t *testing.T) {
+ task := &elaboratedTask{
+ Agent: elaboratedAgent{
+ Instructions: "Create a new file called output.txt with the results.",
+ AllowedTools: []string{"Bash"},
+ },
+ }
+ sanitizeElaboratedTask(task)
+ if !hasTool(task.Agent.AllowedTools, "Write") {
+ t.Errorf("expected Write in allowed_tools, got %v", task.Agent.AllowedTools)
+ }
+}
+
+func TestSanitize_AddsReadWhenEditIsPresent(t *testing.T) {
+ task := &elaboratedTask{
+ Agent: elaboratedAgent{
+ Instructions: "Modify the configuration file.",
+ AllowedTools: []string{"Edit"},
+ },
+ }
+ sanitizeElaboratedTask(task)
+ if !hasTool(task.Agent.AllowedTools, "Read") {
+ t.Errorf("expected Read added alongside Edit, got %v", task.Agent.AllowedTools)
+ }
+}
+
+func TestSanitize_NoDuplicateTools(t *testing.T) {
+ task := &elaboratedTask{
+ Agent: elaboratedAgent{
+ Instructions: "Run go test ./...",
+ AllowedTools: []string{"Bash"},
+ },
+ }
+ sanitizeElaboratedTask(task)
+ count := 0
+ for _, tool := range task.Agent.AllowedTools {
+ if tool == "Bash" {
+ count++
+ }
+ }
+ if count != 1 {
+ t.Errorf("Bash duplicated in allowed_tools: %v", task.Agent.AllowedTools)
+ }
+}
+
+func TestSanitize_AddsAcceptanceCriteriaWhenMissing(t *testing.T) {
+ task := &elaboratedTask{
+ Agent: elaboratedAgent{
+ Instructions: "Do something useful with the codebase.",
+ AllowedTools: []string{"Bash"},
+ },
+ }
+ sanitizeElaboratedTask(task)
+ lower := strings.ToLower(task.Agent.Instructions)
+ if !strings.Contains(lower, "acceptance") && !strings.Contains(lower, "done when") {
+ t.Error("expected acceptance criteria section appended to instructions")
+ }
+}
+
+func TestSanitize_NoopWhenAcceptanceCriteriaAlreadyPresent(t *testing.T) {
+ original := "Do something.\n\n## Acceptance Criteria\n- All tests pass."
+ task := &elaboratedTask{
+ Agent: elaboratedAgent{
+ Instructions: original,
+ AllowedTools: []string{"Bash"},
+ },
+ }
+ sanitizeElaboratedTask(task)
+ if task.Agent.Instructions != original {
+ t.Errorf("instructions were modified when acceptance criteria were already present")
+ }
+}
+
+func TestSanitize_AddsTDDReminderForCodingTaskWithoutTestMention(t *testing.T) {
+ task := &elaboratedTask{
+ Agent: elaboratedAgent{
+ Instructions: "## Acceptance Criteria\nFix the bug.\n\nModify the handler to return 404 instead of 500.",
+ AllowedTools: []string{"Edit", "Read"},
+ },
+ }
+ sanitizeElaboratedTask(task)
+ lower := strings.ToLower(task.Agent.Instructions)
+ if !strings.Contains(lower, "tdd") && !strings.Contains(lower, "test") {
+ t.Error("expected TDD reminder for coding task without test mention")
+ }
+}
+
+func TestSanitize_NoTDDReminderWhenTestsAlreadyMentioned(t *testing.T) {
+ original := "## Acceptance Criteria\nAll tests pass.\n\nEdit the file and run go test ./... to verify."
+ task := &elaboratedTask{
+ Agent: elaboratedAgent{
+ Instructions: original,
+ AllowedTools: []string{"Edit", "Read", "Bash"},
+ },
+ }
+ before := task.Agent.Instructions
+ sanitizeElaboratedTask(task)
+ // Should NOT add a second TDD block since tests are already mentioned.
+ // Count occurrences of "tdd" / "test" — just verify no double-append.
+ if strings.Count(strings.ToLower(task.Agent.Instructions), "tdd") > 1 {
+ t.Errorf("TDD block added twice; instructions:\n%s", task.Agent.Instructions)
+ }
+ _ = before
+}
+
+func TestElaboratePrompt_RequiresAcceptanceCriteria(t *testing.T) {
+ prompt := buildElaboratePrompt("")
+ lower := strings.ToLower(prompt)
+ if !strings.Contains(lower, "acceptance criteria") {
+ t.Error("elaborate prompt should instruct the model to include acceptance criteria")
+ }
+}
+
+func TestElaboratePrompt_RequiresAllRelevantTools(t *testing.T) {
+ prompt := buildElaboratePrompt("")
+ // Prompt must remind the model to include file-creating tools when needed.
+ if !strings.Contains(prompt, "Write") {
+ t.Error("elaborate prompt should mention the Write tool so models know to include it")
+ }
+}
+
+func TestElaborateTask_SanitizationAppliedToResponse(t *testing.T) {
+ srv, _ := testServer(t)
+
+ // Elaborator returns a task that needs Write (instructions say "create file")
+ // but does NOT include it in allowed_tools.
+ task := elaboratedTask{
+ Name: "Generate report",
+ Description: "Creates a report file.",
+ Agent: elaboratedAgent{
+ Type: "claude",
+ Model: "sonnet",
+ Instructions: "Create a new file called report.md with the analysis results.\n\n## Acceptance Criteria\n- report.md exists.",
+ MaxBudgetUSD: 0.5,
+ AllowedTools: []string{"Bash"}, // Write intentionally missing
+ },
+ Timeout: "15m",
+ Priority: "normal",
+ Tags: []string{"report"},
+ }
+ taskJSON, _ := json.Marshal(task)
+ wrapper := map[string]string{"result": string(taskJSON)}
+ wrapperJSON, _ := json.Marshal(wrapper)
+
+ srv.elaborateCmdPath = createFakeClaude(t, string(wrapperJSON), 0)
+
+ body := `{"prompt":"generate a report"}`
+ req := httptest.NewRequest("POST", "/api/tasks/elaborate", bytes.NewBufferString(body))
+ req.Header.Set("Content-Type", "application/json")
+ w := httptest.NewRecorder()
+
+ srv.Handler().ServeHTTP(w, req)
+
+ if w.Code != http.StatusOK {
+ t.Fatalf("status: want 200, got %d; body: %s", w.Code, w.Body.String())
+ }
+
+ var result elaboratedTask
+ if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
+ t.Fatalf("failed to decode response: %v", err)
+ }
+ if !hasTool(result.Agent.AllowedTools, "Write") {
+ t.Errorf("expected Write in sanitized allowed_tools, got %v", result.Agent.AllowedTools)
+ }
+}
+
func TestElaboratePrompt_ContainsWorkDir(t *testing.T) {
prompt := buildElaboratePrompt("/some/custom/path")
if !strings.Contains(prompt, "/some/custom/path") {