From ae833b2765c7c8086bf8e1ea8e8ec8ee9b73e656 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 28 Apr 2026 17:10:27 +0000 Subject: feat(api): route elaboration through local LLM when configured MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of "local OSS models as agents" plan. Adds a third elaboration path that calls the local OpenAI-compatible LLM via the internal/llm client, and reorders dispatch so the cheap path is tried first: local → claude → gemini, with each next attempt only on hard failure of the prior. Wiring is opt-out, not opt-in: when [local_model].endpoint is set, elaboration prefers local by default. Users with a slow or low-quality local model can disable just elaboration via: [local_model] endpoint = "..." prefer_for_elaborate = false without giving up the runner or the classifier path. Implementation: - Server gains an optional *llm.Client field via SetLLM (matches the existing SetNotifier/SetWorkspaceRoot setter pattern, no NewServer signature break). - elaborateWithLocal() reuses buildElaboratePrompt verbatim and asks for response_format=json_object so we skip markdown-fence cleanup. - handleElaborateTask reorders try chain; existing Claude-first behavior is preserved exactly when SetLLM is not called. - LocalModel.UseForElaborate() encapsulates the default-true gating with a *bool so explicit-false survives TOML parse. Tests: - elaborateWithLocal: parses valid response, errors on nil client, errors on bad JSON. - handler: local preferred when wired; falls back to claude when local fails; unchanged behavior when no LLM is configured. - config: UseForElaborate gating across empty/default/explicit-true/ explicit-false cases. Pre-existing test failures noted in docs/plans/local-oss-runner.md (post-epic cleanup): TestGeminiLogs_ParsedCorrectly returns 404 for gemini execution log fetch — predates this change. Plan: docs/plans/local-oss-runner.md. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J --- internal/api/elaborate.go | 60 ++++++++-- internal/api/elaborate_local_test.go | 214 +++++++++++++++++++++++++++++++++++ internal/api/server.go | 9 ++ 3 files changed, 272 insertions(+), 11 deletions(-) create mode 100644 internal/api/elaborate_local_test.go (limited to 'internal/api') diff --git a/internal/api/elaborate.go b/internal/api/elaborate.go index 0c681ae..30095c8 100644 --- a/internal/api/elaborate.go +++ b/internal/api/elaborate.go @@ -12,6 +12,8 @@ import ( "sort" "strings" "time" + + "github.com/thepeterstone/claudomator/internal/llm" ) const elaborateTimeout = 30 * time.Second @@ -245,6 +247,33 @@ func (s *Server) elaborateWithClaude(ctx context.Context, workDir, fullPrompt st return &result, nil } +// elaborateWithLocal runs elaboration through an OpenAI-compatible local LLM. +// It uses the same prompt template as the Claude/Gemini paths and requests +// json_object response format so we can decode directly without the +// markdown-fence cleanup needed for the CLI paths. +func elaborateWithLocal(ctx context.Context, c *llm.Client, workDir, fullPrompt string) (*elaboratedTask, error) { + if c == nil { + return nil, fmt.Errorf("local llm: no client configured") + } + systemPrompt := buildElaboratePrompt(workDir) + resp, err := c.Chat(ctx, llm.ChatRequest{ + Messages: []llm.Message{ + {Role: "system", Content: systemPrompt}, + {Role: "user", Content: fullPrompt}, + }, + ResponseJSON: true, + }) + if err != nil { + return nil, fmt.Errorf("local llm: %w", err) + } + body := strings.TrimSpace(resp.Content) + var result elaboratedTask + if jerr := json.Unmarshal([]byte(extractJSON(body)), &result); jerr != nil { + return nil, fmt.Errorf("local llm: parse JSON: %w (response: %s)", jerr, body) + } + return &result, nil +} + func (s *Server) elaborateWithGemini(ctx context.Context, workDir, fullPrompt string) (*elaboratedTask, error) { combinedPrompt := fmt.Sprintf("%s\n\n%s", buildElaboratePrompt(workDir), fullPrompt) cmd := exec.CommandContext(ctx, s.geminiBinaryPath(), @@ -314,18 +343,27 @@ func (s *Server) handleElaborateTask(w http.ResponseWriter, r *http.Request) { var result *elaboratedTask var err error - // Try Claude first. - result, err = s.elaborateWithClaude(ctx, workDir, fullPrompt) - if err != nil { - s.logger.Warn("elaborate: claude failed, falling back to gemini", "error", err) - // Fallback to Gemini. - result, err = s.elaborateWithGemini(ctx, workDir, fullPrompt) + // Try local LLM first when configured. Falls back to Claude → Gemini on + // hard failure of each prior attempt. + if s.llm != nil { + result, err = elaborateWithLocal(ctx, s.llm, workDir, fullPrompt) + if err != nil { + s.logger.Warn("elaborate: local llm failed, falling back to claude", "error", err) + result = nil + } + } + if result == nil { + result, err = s.elaborateWithClaude(ctx, workDir, fullPrompt) if err != nil { - s.logger.Error("elaborate: fallback gemini also failed", "error", err) - writeJSON(w, http.StatusBadGateway, map[string]string{ - "error": fmt.Sprintf("elaboration failed: %v", err), - }) - return + s.logger.Warn("elaborate: claude failed, falling back to gemini", "error", err) + result, err = s.elaborateWithGemini(ctx, workDir, fullPrompt) + if err != nil { + s.logger.Error("elaborate: gemini also failed", "error", err) + writeJSON(w, http.StatusBadGateway, map[string]string{ + "error": fmt.Sprintf("elaboration failed: %v", err), + }) + return + } } } diff --git a/internal/api/elaborate_local_test.go b/internal/api/elaborate_local_test.go new file mode 100644 index 0000000..09a8f9e --- /dev/null +++ b/internal/api/elaborate_local_test.go @@ -0,0 +1,214 @@ +package api + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + + "github.com/thepeterstone/claudomator/internal/llm" +) + +// fakeChatCompletionsServer returns an httptest server that responds to a +// /chat/completions POST with the given assistant content (which should be a +// JSON-encoded elaboratedTask). Returns the server and a counter of calls +// received so tests can assert dispatch ordering. +func fakeChatCompletionsServer(t *testing.T, assistantContent string) (*httptest.Server, *int32) { + t.Helper() + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&calls, 1) + w.Header().Set("Content-Type", "application/json") + // The assistant content has to be JSON-encoded inside the wire format. + escaped, _ := json.Marshal(assistantContent) + fmt.Fprintf(w, `{ + "model":"local", + "choices":[{"message":{"role":"assistant","content":%s},"finish_reason":"stop"}], + "usage":{"prompt_tokens":10,"completion_tokens":50} + }`, string(escaped)) + })) + t.Cleanup(srv.Close) + return srv, &calls +} + +func TestElaborateWithLocal_ParsesValidResponse(t *testing.T) { + taskBody, _ := json.Marshal(elaboratedTask{ + Name: "Test elaborated task", + Description: "From local llm", + Agent: elaboratedAgent{ + Type: "claude", + Model: "sonnet", + Instructions: "Run go build.", + MaxBudgetUSD: 0.25, + AllowedTools: []string{"Bash"}, + }, + Timeout: "10m", + Priority: "normal", + Tags: []string{"build"}, + }) + srv, calls := fakeChatCompletionsServer(t, string(taskBody)) + + c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "fake"} + result, err := elaborateWithLocal(context.Background(), c, "/some/dir", "build the project") + if err != nil { + t.Fatalf("elaborateWithLocal: %v", err) + } + if result.Name != "Test elaborated task" { + t.Errorf("Name: %q", result.Name) + } + if result.Agent.Instructions != "Run go build." { + t.Errorf("Instructions: %q", result.Agent.Instructions) + } + if got := atomic.LoadInt32(calls); got != 1 { + t.Errorf("expected 1 call, got %d", got) + } +} + +func TestElaborateWithLocal_NilClient(t *testing.T) { + _, err := elaborateWithLocal(context.Background(), nil, "", "p") + if err == nil || !strings.Contains(err.Error(), "no client") { + t.Errorf("expected nil-client error, got %v", err) + } +} + +func TestElaborateWithLocal_BadJSON(t *testing.T) { + srv, _ := fakeChatCompletionsServer(t, "this is not JSON at all") + c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "fake"} + _, err := elaborateWithLocal(context.Background(), c, "", "p") + if err == nil || !strings.Contains(err.Error(), "parse JSON") { + t.Errorf("expected parse error, got %v", err) + } +} + +// TestElaborateTask_LocalLLMPreferred verifies the dispatcher uses local LLM +// when SetLLM is configured, and does not invoke claude. +func TestElaborateTask_LocalLLMPreferred(t *testing.T) { + srv, _ := testServer(t) + + taskBody, _ := json.Marshal(elaboratedTask{ + Name: "Local-elaborated", + Description: "From local", + Agent: elaboratedAgent{ + Type: "claude", + Model: "sonnet", + Instructions: "Do work. Tests pass when complete.", + MaxBudgetUSD: 0.25, + AllowedTools: []string{"Bash"}, + }, + Timeout: "10m", + Priority: "normal", + }) + llmSrv, _ := fakeChatCompletionsServer(t, string(taskBody)) + srv.SetLLM(&llm.Client{Endpoint: llmSrv.URL + "/v1", Model: "fake"}) + // Point Claude binary at a path that would fail if called. + srv.elaborateCmdPath = "/nonexistent/claude-should-not-run" + + body := `{"prompt":"do work"}` + req := httptest.NewRequest("POST", "/api/tasks/elaborate", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("status: want 200, got %d; body: %s", w.Code, w.Body.String()) + } + var got elaboratedTask + if err := json.NewDecoder(w.Body).Decode(&got); err != nil { + t.Fatalf("decode response: %v", err) + } + if got.Name != "Local-elaborated" { + t.Errorf("Name: want Local-elaborated got %q", got.Name) + } +} + +// TestElaborateTask_LocalFails_FallsBackToClaude verifies the dispatcher +// falls back to the Claude path when the local LLM returns an error. +func TestElaborateTask_LocalFails_FallsBackToClaude(t *testing.T) { + srv, _ := testServer(t) + + // Local LLM server that always 500s. + failSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "boom", http.StatusInternalServerError) + })) + t.Cleanup(failSrv.Close) + srv.SetLLM(&llm.Client{Endpoint: failSrv.URL + "/v1", Model: "fake"}) + + // Configure a working fake Claude binary. + taskBody, _ := json.Marshal(elaboratedTask{ + Name: "Claude-fallback", + Description: "From claude after local failed", + Agent: elaboratedAgent{ + Type: "claude", + Model: "sonnet", + Instructions: "Run tests.", + MaxBudgetUSD: 0.25, + AllowedTools: []string{"Bash"}, + }, + Timeout: "10m", + Priority: "normal", + }) + wrapper, _ := json.Marshal(map[string]string{"result": string(taskBody)}) + srv.elaborateCmdPath = createFakeClaude(t, string(wrapper), 0) + + body := `{"prompt":"run tests"}` + req := httptest.NewRequest("POST", "/api/tasks/elaborate", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("status: want 200, got %d; body: %s", w.Code, w.Body.String()) + } + var got elaboratedTask + if err := json.NewDecoder(w.Body).Decode(&got); err != nil { + t.Fatalf("decode response: %v", err) + } + if got.Name != "Claude-fallback" { + t.Errorf("Name: want Claude-fallback (fallback path) got %q", got.Name) + } +} + +// TestElaborateTask_NoLocalLLM_UsesClaude verifies that when SetLLM is not +// called, behavior is unchanged (Claude path still primary). +func TestElaborateTask_NoLocalLLM_UsesClaude(t *testing.T) { + srv, _ := testServer(t) + + taskBody, _ := json.Marshal(elaboratedTask{ + Name: "Claude-only", + Description: "no local llm configured", + Agent: elaboratedAgent{ + Type: "claude", + Model: "sonnet", + Instructions: "Do work.", + MaxBudgetUSD: 0.25, + AllowedTools: []string{"Bash"}, + }, + Timeout: "10m", + Priority: "normal", + }) + wrapper, _ := json.Marshal(map[string]string{"result": string(taskBody)}) + srv.elaborateCmdPath = createFakeClaude(t, string(wrapper), 0) + + body := `{"prompt":"do work"}` + req := httptest.NewRequest("POST", "/api/tasks/elaborate", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + srv.Handler().ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("status: want 200, got %d; body: %s", w.Code, w.Body.String()) + } + var got elaboratedTask + if err := json.NewDecoder(w.Body).Decode(&got); err != nil { + t.Fatalf("decode response: %v", err) + } + if got.Name != "Claude-only" { + t.Errorf("Name: %q", got.Name) + } +} + diff --git a/internal/api/server.go b/internal/api/server.go index 8a20349..33048e4 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -12,6 +12,7 @@ import ( "github.com/thepeterstone/claudomator/internal/config" "github.com/thepeterstone/claudomator/internal/executor" + "github.com/thepeterstone/claudomator/internal/llm" "github.com/thepeterstone/claudomator/internal/notify" "github.com/thepeterstone/claudomator/internal/storage" "github.com/thepeterstone/claudomator/internal/task" @@ -50,6 +51,7 @@ type Server struct { elaborateLimiter *ipRateLimiter // per-IP rate limiter for elaborate/validate endpoints webhookSecret string // HMAC-SHA256 secret for GitHub webhook validation projects []config.Project // configured projects for webhook routing + llm *llm.Client // optional local LLM client; when set, elaboration prefers it } // SetAPIToken configures a bearer token that must be supplied to access the API. @@ -73,6 +75,13 @@ func (s *Server) SetWorkspaceRoot(path string) { s.workspaceRoot = path } +// SetLLM wires a local OpenAI-compatible LLM client for use by elaboration +// (and future internal helpers). When non-nil, elaboration will prefer it +// over the Claude CLI; on failure it falls back to claude → gemini. +func (s *Server) SetLLM(c *llm.Client) { + s.llm = c +} + func NewServer(store *storage.DB, pool *executor.Pool, logger *slog.Logger, claudeBinPath, geminiBinPath string) *Server { wd, _ := os.Getwd() s := &Server{ -- cgit v1.2.3