summaryrefslogtreecommitdiff
path: root/internal/api/elaborate.go
diff options
context:
space:
mode:
authorClaude <noreply@anthropic.com>2026-04-28 17:10:27 +0000
committerClaude <noreply@anthropic.com>2026-04-28 17:10:27 +0000
commitae833b2765c7c8086bf8e1ea8e8ec8ee9b73e656 (patch)
treeb2cda4dc982d6c04eb22033e19645091af42224b /internal/api/elaborate.go
parent0865afc43be562dbe14528e4299b9e213b54cc93 (diff)
feat(api): route elaboration through local LLM when configured
Phase 2 of "local OSS models as agents" plan. Adds a third elaboration path that calls the local OpenAI-compatible LLM via the internal/llm client, and reorders dispatch so the cheap path is tried first: local → claude → gemini, with each next attempt only on hard failure of the prior. Wiring is opt-out, not opt-in: when [local_model].endpoint is set, elaboration prefers local by default. Users with a slow or low-quality local model can disable just elaboration via: [local_model] endpoint = "..." prefer_for_elaborate = false without giving up the runner or the classifier path. Implementation: - Server gains an optional *llm.Client field via SetLLM (matches the existing SetNotifier/SetWorkspaceRoot setter pattern, no NewServer signature break). - elaborateWithLocal() reuses buildElaboratePrompt verbatim and asks for response_format=json_object so we skip markdown-fence cleanup. - handleElaborateTask reorders try chain; existing Claude-first behavior is preserved exactly when SetLLM is not called. - LocalModel.UseForElaborate() encapsulates the default-true gating with a *bool so explicit-false survives TOML parse. Tests: - elaborateWithLocal: parses valid response, errors on nil client, errors on bad JSON. - handler: local preferred when wired; falls back to claude when local fails; unchanged behavior when no LLM is configured. - config: UseForElaborate gating across empty/default/explicit-true/ explicit-false cases. Pre-existing test failures noted in docs/plans/local-oss-runner.md (post-epic cleanup): TestGeminiLogs_ParsedCorrectly returns 404 for gemini execution log fetch — predates this change. Plan: docs/plans/local-oss-runner.md. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
Diffstat (limited to 'internal/api/elaborate.go')
-rw-r--r--internal/api/elaborate.go60
1 files changed, 49 insertions, 11 deletions
diff --git a/internal/api/elaborate.go b/internal/api/elaborate.go
index 0c681ae..30095c8 100644
--- a/internal/api/elaborate.go
+++ b/internal/api/elaborate.go
@@ -12,6 +12,8 @@ import (
"sort"
"strings"
"time"
+
+ "github.com/thepeterstone/claudomator/internal/llm"
)
const elaborateTimeout = 30 * time.Second
@@ -245,6 +247,33 @@ func (s *Server) elaborateWithClaude(ctx context.Context, workDir, fullPrompt st
return &result, nil
}
+// elaborateWithLocal runs elaboration through an OpenAI-compatible local LLM.
+// It uses the same prompt template as the Claude/Gemini paths and requests
+// json_object response format so we can decode directly without the
+// markdown-fence cleanup needed for the CLI paths.
+func elaborateWithLocal(ctx context.Context, c *llm.Client, workDir, fullPrompt string) (*elaboratedTask, error) {
+ if c == nil {
+ return nil, fmt.Errorf("local llm: no client configured")
+ }
+ systemPrompt := buildElaboratePrompt(workDir)
+ resp, err := c.Chat(ctx, llm.ChatRequest{
+ Messages: []llm.Message{
+ {Role: "system", Content: systemPrompt},
+ {Role: "user", Content: fullPrompt},
+ },
+ ResponseJSON: true,
+ })
+ if err != nil {
+ return nil, fmt.Errorf("local llm: %w", err)
+ }
+ body := strings.TrimSpace(resp.Content)
+ var result elaboratedTask
+ if jerr := json.Unmarshal([]byte(extractJSON(body)), &result); jerr != nil {
+ return nil, fmt.Errorf("local llm: parse JSON: %w (response: %s)", jerr, body)
+ }
+ return &result, nil
+}
+
func (s *Server) elaborateWithGemini(ctx context.Context, workDir, fullPrompt string) (*elaboratedTask, error) {
combinedPrompt := fmt.Sprintf("%s\n\n%s", buildElaboratePrompt(workDir), fullPrompt)
cmd := exec.CommandContext(ctx, s.geminiBinaryPath(),
@@ -314,18 +343,27 @@ func (s *Server) handleElaborateTask(w http.ResponseWriter, r *http.Request) {
var result *elaboratedTask
var err error
- // Try Claude first.
- result, err = s.elaborateWithClaude(ctx, workDir, fullPrompt)
- if err != nil {
- s.logger.Warn("elaborate: claude failed, falling back to gemini", "error", err)
- // Fallback to Gemini.
- result, err = s.elaborateWithGemini(ctx, workDir, fullPrompt)
+ // Try local LLM first when configured. Falls back to Claude → Gemini on
+ // hard failure of each prior attempt.
+ if s.llm != nil {
+ result, err = elaborateWithLocal(ctx, s.llm, workDir, fullPrompt)
+ if err != nil {
+ s.logger.Warn("elaborate: local llm failed, falling back to claude", "error", err)
+ result = nil
+ }
+ }
+ if result == nil {
+ result, err = s.elaborateWithClaude(ctx, workDir, fullPrompt)
if err != nil {
- s.logger.Error("elaborate: fallback gemini also failed", "error", err)
- writeJSON(w, http.StatusBadGateway, map[string]string{
- "error": fmt.Sprintf("elaboration failed: %v", err),
- })
- return
+ s.logger.Warn("elaborate: claude failed, falling back to gemini", "error", err)
+ result, err = s.elaborateWithGemini(ctx, workDir, fullPrompt)
+ if err != nil {
+ s.logger.Error("elaborate: gemini also failed", "error", err)
+ writeJSON(w, http.StatusBadGateway, map[string]string{
+ "error": fmt.Sprintf("elaboration failed: %v", err),
+ })
+ return
+ }
}
}