diff options
| author | Claude <noreply@anthropic.com> | 2026-05-02 07:54:51 +0000 |
|---|---|---|
| committer | Claude <noreply@anthropic.com> | 2026-05-02 07:54:51 +0000 |
| commit | 6c5762848f4f3114a6ece9ce0bc70a84fca040ce (patch) | |
| tree | c118fe596c66b23dbf23d7aee5d6d6f823d0903a /internal/api | |
| parent | ae833b2765c7c8086bf8e1ea8e8ec8ee9b73e656 (diff) | |
feat(api): enrich CI failure task instructions via local LLM
Phase 3 of "local OSS models as agents" plan. When the webhook handler
creates a task for a failed CI run AND a local LLM is configured on
the server, the hardcoded 4-step investigation template is replaced
with a project-aware investigation plan generated by the LLM.
Scope adjustment from the original sketch: the original plan said
"summarize fetched workflow logs", but fetching logs requires GitHub
API auth that isn't wired. Narrowed to project-context triage —
recent git log + CLAUDE.md content + webhook metadata, fed to the
LLM with a system prompt asking for 6-12 lines of concrete next
steps. Deferred GitHub log fetching to post-epic cleanup.
Implementation:
- New internal/api/webhook_llm.go holds enrichCIInstructions and its
helpers (readRecentCommits via `git log`, readProjectDoc).
- enrichCIInstructions is truly additive: any failure mode (no client,
HTTP error, empty body, 10s timeout) returns the original fallback
template unchanged. Existing webhook tests pass byte-for-byte.
- Always preserves a metadata header (repo/branch/SHA/check/URL)
ahead of the LLM body so investigators don't lose context if the
LLM is terse.
- Reuses s.llm (set via Server.SetLLM in Phase 2) — no new config
knob, no per-feature gating. Asymmetric opt-out (yes-elaborate,
no-CI-triage) deferred until there's actual demand.
Tests:
- enrichCIInstructions: nil client, LLM 500, empty body all return
fallback unchanged.
- enrichCIInstructions: success path produces enriched body with
metadata header preserved; user prompt contains repo/branch/SHA.
- enrichCIInstructions: real git repo (init + 2 commits) → recent
commits appear in user prompt.
- Webhook handler regression guard: no-LLM path produces the exact
legacy template substrings.
- Webhook handler with LLM stubbed: task instructions contain LLM
body + metadata header.
Plan: docs/plans/local-oss-runner.md.
https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
Diffstat (limited to 'internal/api')
| -rw-r--r-- | internal/api/webhook.go | 15 | ||||
| -rw-r--r-- | internal/api/webhook_llm.go | 127 | ||||
| -rw-r--r-- | internal/api/webhook_llm_test.go | 228 |
3 files changed, 369 insertions, 1 deletions
diff --git a/internal/api/webhook.go b/internal/api/webhook.go index 8bf1676..9437f7d 100644 --- a/internal/api/webhook.go +++ b/internal/api/webhook.go @@ -1,6 +1,7 @@ package api import ( + "context" "crypto/hmac" "crypto/sha256" "encoding/hex" @@ -154,7 +155,7 @@ func (s *Server) handleWorkflowRunEvent(w http.ResponseWriter, body []byte) { func (s *Server) createCIFailureTask(w http.ResponseWriter, repoName, fullName, branch, sha, checkName, htmlURL string) { project := matchProject(s.projects, repoName) - instructions := fmt.Sprintf( + fallback := fmt.Sprintf( "A CI failure has been detected and requires investigation.\n\n"+ "Repository: %s\n"+ "Branch: %s\n"+ @@ -169,6 +170,18 @@ func (s *Server) createCIFailureTask(w http.ResponseWriter, repoName, fullName, fullName, branch, sha, checkName, htmlURL, ) + tctx := ciTriageContext{ + Repo: fullName, + Branch: branch, + SHA: sha, + CheckName: checkName, + URL: htmlURL, + } + if project != nil { + tctx.ProjectDir = project.Dir + } + instructions := enrichCIInstructions(context.Background(), s.llm, tctx, fallback) + now := time.Now().UTC() t := &task.Task{ ID: uuid.New().String(), diff --git a/internal/api/webhook_llm.go b/internal/api/webhook_llm.go new file mode 100644 index 0000000..1cbca17 --- /dev/null +++ b/internal/api/webhook_llm.go @@ -0,0 +1,127 @@ +package api + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/thepeterstone/claudomator/internal/llm" +) + +// ciTriagePromptTimeout caps the LLM enrichment call so a slow local model +// can't stall webhook handling. On timeout the original template is used. +const ciTriagePromptTimeout = 10 * time.Second + +// ciTriageContext holds everything we know at webhook time, plus best-effort +// project-side signals (recent git log, CLAUDE.md content) when project_dir +// is available. +type ciTriageContext struct { + Repo string + Branch string + SHA string + CheckName string + URL string + ProjectDir string + RecentCommits string // multi-line, may be "" + ProjectDoc string // first ~4 KB of CLAUDE.md, may be "" +} + +// enrichCIInstructions asks the local LLM to produce a tighter, project-aware +// investigation plan than the hardcoded template. On any error (no client, +// timeout, parse failure) it returns fallback unchanged so the webhook flow +// is never worse off for trying. +func enrichCIInstructions(parent context.Context, c *llm.Client, ctx ciTriageContext, fallback string) string { + if c == nil { + return fallback + } + + // Pull project-side signals best-effort. Errors are silently swallowed — + // the LLM still gets the metadata it does have. + if ctx.ProjectDir != "" { + ctx.RecentCommits = readRecentCommits(ctx.ProjectDir, 5) + ctx.ProjectDoc = readProjectDoc(ctx.ProjectDir) + } + + cctx, cancel := context.WithTimeout(parent, ciTriagePromptTimeout) + defer cancel() + + prompt := buildCITriagePrompt(ctx) + resp, err := c.Chat(cctx, llm.ChatRequest{ + Messages: []llm.Message{ + {Role: "system", Content: "You produce concise, actionable CI failure investigation plans. Respond with plain text only — no markdown fences, no JSON, no preamble."}, + {Role: "user", Content: prompt}, + }, + }) + if err != nil { + return fallback + } + body := strings.TrimSpace(resp.Content) + if body == "" { + return fallback + } + // Always preserve the metadata header from the fallback so investigators + // can see repo/branch/SHA/URL even if the LLM body is terse. + return ciInstructionsHeader(ctx) + "\n\n" + body +} + +func buildCITriagePrompt(ctx ciTriageContext) string { + var sb strings.Builder + fmt.Fprintf(&sb, "CI just failed.\n\nRepository: %s\nBranch: %s\nCommit SHA: %s\nCheck/Workflow: %s\nRun URL: %s\n", + ctx.Repo, ctx.Branch, ctx.SHA, ctx.CheckName, ctx.URL) + if ctx.RecentCommits != "" { + fmt.Fprintf(&sb, "\nRecent commits on this branch (newest first):\n%s\n", ctx.RecentCommits) + } + if ctx.ProjectDoc != "" { + fmt.Fprintf(&sb, "\nProject context (CLAUDE.md, truncated):\n%s\n", ctx.ProjectDoc) + } + sb.WriteString("\nProduce 6–12 lines of investigation steps. Name suspect commits or files when you can; otherwise give concrete starting actions (which logs to read, which tests to re-run locally). End with an explicit 'Acceptance Criteria' section listing what 'fixed' looks like.") + return sb.String() +} + +func ciInstructionsHeader(ctx ciTriageContext) string { + return fmt.Sprintf( + "A CI failure has been detected and requires investigation.\n\n"+ + "Repository: %s\n"+ + "Branch: %s\n"+ + "Commit SHA: %s\n"+ + "Check/Workflow: %s\n"+ + "Run URL: %s", + ctx.Repo, ctx.Branch, ctx.SHA, ctx.CheckName, ctx.URL, + ) +} + +// readRecentCommits returns the last n commits as a `git log --oneline`-style +// string, or "" on any error. +func readRecentCommits(projectDir string, n int) string { + if projectDir == "" { + return "" + } + cctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + cmd := exec.CommandContext(cctx, "git", "-C", projectDir, "log", "--oneline", fmt.Sprintf("-n%d", n)) + out, err := cmd.Output() + if err != nil { + return "" + } + return strings.TrimSpace(string(out)) +} + +// readProjectDoc returns CLAUDE.md content (capped at 4KB) or "". +func readProjectDoc(projectDir string) string { + if projectDir == "" { + return "" + } + data, err := os.ReadFile(filepath.Join(projectDir, "CLAUDE.md")) + if err != nil { + return "" + } + const cap = 4096 + if len(data) > cap { + data = data[:cap] + } + return strings.TrimSpace(string(data)) +} diff --git a/internal/api/webhook_llm_test.go b/internal/api/webhook_llm_test.go new file mode 100644 index 0000000..f2381a1 --- /dev/null +++ b/internal/api/webhook_llm_test.go @@ -0,0 +1,228 @@ +package api + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/thepeterstone/claudomator/internal/config" + "github.com/thepeterstone/claudomator/internal/llm" +) + +// initGitRepo creates a fresh git repo with two commits and returns its path. +// Used to verify enrichCIInstructions picks up recent commits. +func initGitRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + run := func(args ...string) { + cmd := exec.Command("git", append([]string{"-C", dir}, args...)...) + cmd.Env = append(os.Environ(), + "GIT_AUTHOR_NAME=test", "GIT_AUTHOR_EMAIL=test@example.com", + "GIT_COMMITTER_NAME=test", "GIT_COMMITTER_EMAIL=test@example.com", + // Disable signing in case the host has a global pre-commit signer. + "GIT_CONFIG_GLOBAL=/dev/null", + ) + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } + } + run("init", "-q") + run("config", "commit.gpgsign", "false") + run("config", "tag.gpgsign", "false") + if err := os.WriteFile(filepath.Join(dir, "README"), []byte("v1\n"), 0644); err != nil { + t.Fatal(err) + } + run("add", "README") + run("commit", "-q", "-m", "first commit", "--no-gpg-sign") + if err := os.WriteFile(filepath.Join(dir, "README"), []byte("v2\n"), 0644); err != nil { + t.Fatal(err) + } + run("add", "README") + run("commit", "-q", "-m", "fix: bump readme", "--no-gpg-sign") + return dir +} + +func TestEnrichCIInstructions_NilClient_ReturnsFallback(t *testing.T) { + got := enrichCIInstructions(context.Background(), nil, ciTriageContext{}, "FALLBACK") + if got != "FALLBACK" { + t.Errorf("nil client: want FALLBACK, got %q", got) + } +} + +func TestEnrichCIInstructions_LLMFailure_ReturnsFallback(t *testing.T) { + // Server that always 500s. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "boom", http.StatusInternalServerError) + })) + defer srv.Close() + + c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "fake"} + got := enrichCIInstructions(context.Background(), c, + ciTriageContext{Repo: "x", Branch: "main"}, "FALLBACK") + if got != "FALLBACK" { + t.Errorf("llm failure: want FALLBACK, got %q", got) + } +} + +func TestEnrichCIInstructions_EmptyLLMBody_ReturnsFallback(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + fmt.Fprintln(w, `{"model":"x","choices":[{"message":{"content":""},"finish_reason":"stop"}],"usage":{}}`) + })) + defer srv.Close() + c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "fake"} + got := enrichCIInstructions(context.Background(), c, ciTriageContext{}, "FALLBACK-2") + if got != "FALLBACK-2" { + t.Errorf("empty body: want fallback, got %q", got) + } +} + +func TestEnrichCIInstructions_LLMSuccess_ReturnsEnriched(t *testing.T) { + expected := "1. Look at commit abc123\n2. Re-run build locally\n3. Check unit tests" + + var capturedPrompt string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var body struct { + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"messages"` + } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + t.Fatal(err) + } + // Capture the user message so we can assert metadata is in the prompt. + for _, m := range body.Messages { + if m.Role == "user" { + capturedPrompt = m.Content + } + } + + w.Header().Set("Content-Type", "application/json") + fmt.Fprintf(w, `{"model":"x","choices":[{"message":{"content":%q},"finish_reason":"stop"}],"usage":{}}`, expected) + })) + defer srv.Close() + + c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "fake"} + tctx := ciTriageContext{ + Repo: "owner/myrepo", + Branch: "main", + SHA: "abc123", + CheckName: "CI Build", + URL: "https://github.com/owner/myrepo/runs/1", + } + got := enrichCIInstructions(context.Background(), c, tctx, "FALLBACK") + + if !strings.Contains(got, expected) { + t.Errorf("enriched body missing LLM content; got: %s", got) + } + if !strings.Contains(got, "Repository: owner/myrepo") { + t.Errorf("enriched body missing metadata header; got: %s", got) + } + for _, want := range []string{"owner/myrepo", "main", "abc123", "CI Build"} { + if !strings.Contains(capturedPrompt, want) { + t.Errorf("prompt missing %q; got: %s", want, capturedPrompt) + } + } +} + +func TestEnrichCIInstructions_IncludesRecentCommits(t *testing.T) { + repo := initGitRepo(t) + + var capturedPrompt string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var body struct { + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"messages"` + } + json.NewDecoder(r.Body).Decode(&body) + for _, m := range body.Messages { + if m.Role == "user" { + capturedPrompt = m.Content + } + } + w.Header().Set("Content-Type", "application/json") + fmt.Fprintln(w, `{"model":"x","choices":[{"message":{"content":"plan"},"finish_reason":"stop"}],"usage":{}}`) + })) + defer srv.Close() + + c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "fake"} + enrichCIInstructions(context.Background(), c, + ciTriageContext{Repo: "x", Branch: "y", ProjectDir: repo}, "FALLBACK") + + if !strings.Contains(capturedPrompt, "Recent commits") { + t.Errorf("expected prompt to include recent commits section; got:\n%s", capturedPrompt) + } + if !strings.Contains(capturedPrompt, "fix: bump readme") { + t.Errorf("expected most recent commit message in prompt; got:\n%s", capturedPrompt) + } +} + +// TestWebhook_NoLLM_InstructionsPreserved is the regression guard: when no +// LLM is configured, webhook task instructions match the historical template +// exactly. +func TestWebhook_NoLLM_InstructionsPreserved(t *testing.T) { + srv, store := testServer(t) + srv.projects = []config.Project{{Name: "myrepo", Dir: "/workspace/myrepo"}} + + w := webhookPost(t, srv, "check_run", checkRunFailurePayload, "") + if w.Code != http.StatusOK { + t.Fatalf("status: %d", w.Code) + } + var resp map[string]string + json.NewDecoder(w.Body).Decode(&resp) + tk, err := store.GetTask(resp["task_id"]) + if err != nil { + t.Fatal(err) + } + for _, want := range []string{ + "A CI failure has been detected", + "Please investigate the failure by:", + "1. Reviewing recent commits on the branch", + "4. Fixing the root cause and ensuring the build passes", + } { + if !strings.Contains(tk.Agent.Instructions, want) { + t.Errorf("instructions missing %q (regression: LLM path leaked into no-LLM case)", want) + } + } +} + +// TestWebhook_WithLLM_InstructionsEnriched verifies the LLM body appears in +// the created task's instructions when SetLLM is configured. +func TestWebhook_WithLLM_InstructionsEnriched(t *testing.T) { + srv, store := testServer(t) + srv.projects = []config.Project{{Name: "myrepo", Dir: "/workspace/myrepo"}} + + llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + fmt.Fprintln(w, `{"model":"x","choices":[{"message":{"content":"LLM-GENERATED-PLAN"},"finish_reason":"stop"}],"usage":{}}`) + })) + defer llmSrv.Close() + srv.SetLLM(&llm.Client{Endpoint: llmSrv.URL + "/v1", Model: "fake"}) + + w := webhookPost(t, srv, "check_run", checkRunFailurePayload, "") + if w.Code != http.StatusOK { + t.Fatalf("status: %d body: %s", w.Code, w.Body.String()) + } + var resp map[string]string + json.NewDecoder(w.Body).Decode(&resp) + tk, err := store.GetTask(resp["task_id"]) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(tk.Agent.Instructions, "LLM-GENERATED-PLAN") { + t.Errorf("instructions missing LLM body; got:\n%s", tk.Agent.Instructions) + } + if !strings.Contains(tk.Agent.Instructions, "Repository: owner/myrepo") { + t.Errorf("instructions missing metadata header; got:\n%s", tk.Agent.Instructions) + } +} |
