From ae833b2765c7c8086bf8e1ea8e8ec8ee9b73e656 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 28 Apr 2026 17:10:27 +0000 Subject: feat(api): route elaboration through local LLM when configured MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of "local OSS models as agents" plan. Adds a third elaboration path that calls the local OpenAI-compatible LLM via the internal/llm client, and reorders dispatch so the cheap path is tried first: local → claude → gemini, with each next attempt only on hard failure of the prior. Wiring is opt-out, not opt-in: when [local_model].endpoint is set, elaboration prefers local by default. Users with a slow or low-quality local model can disable just elaboration via: [local_model] endpoint = "..." prefer_for_elaborate = false without giving up the runner or the classifier path. Implementation: - Server gains an optional *llm.Client field via SetLLM (matches the existing SetNotifier/SetWorkspaceRoot setter pattern, no NewServer signature break). - elaborateWithLocal() reuses buildElaboratePrompt verbatim and asks for response_format=json_object so we skip markdown-fence cleanup. - handleElaborateTask reorders try chain; existing Claude-first behavior is preserved exactly when SetLLM is not called. - LocalModel.UseForElaborate() encapsulates the default-true gating with a *bool so explicit-false survives TOML parse. Tests: - elaborateWithLocal: parses valid response, errors on nil client, errors on bad JSON. - handler: local preferred when wired; falls back to claude when local fails; unchanged behavior when no LLM is configured. - config: UseForElaborate gating across empty/default/explicit-true/ explicit-false cases. Pre-existing test failures noted in docs/plans/local-oss-runner.md (post-epic cleanup): TestGeminiLogs_ParsedCorrectly returns 404 for gemini execution log fetch — predates this change. Plan: docs/plans/local-oss-runner.md. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J --- internal/api/elaborate.go | 60 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 11 deletions(-) (limited to 'internal/api/elaborate.go') diff --git a/internal/api/elaborate.go b/internal/api/elaborate.go index 0c681ae..30095c8 100644 --- a/internal/api/elaborate.go +++ b/internal/api/elaborate.go @@ -12,6 +12,8 @@ import ( "sort" "strings" "time" + + "github.com/thepeterstone/claudomator/internal/llm" ) const elaborateTimeout = 30 * time.Second @@ -245,6 +247,33 @@ func (s *Server) elaborateWithClaude(ctx context.Context, workDir, fullPrompt st return &result, nil } +// elaborateWithLocal runs elaboration through an OpenAI-compatible local LLM. +// It uses the same prompt template as the Claude/Gemini paths and requests +// json_object response format so we can decode directly without the +// markdown-fence cleanup needed for the CLI paths. +func elaborateWithLocal(ctx context.Context, c *llm.Client, workDir, fullPrompt string) (*elaboratedTask, error) { + if c == nil { + return nil, fmt.Errorf("local llm: no client configured") + } + systemPrompt := buildElaboratePrompt(workDir) + resp, err := c.Chat(ctx, llm.ChatRequest{ + Messages: []llm.Message{ + {Role: "system", Content: systemPrompt}, + {Role: "user", Content: fullPrompt}, + }, + ResponseJSON: true, + }) + if err != nil { + return nil, fmt.Errorf("local llm: %w", err) + } + body := strings.TrimSpace(resp.Content) + var result elaboratedTask + if jerr := json.Unmarshal([]byte(extractJSON(body)), &result); jerr != nil { + return nil, fmt.Errorf("local llm: parse JSON: %w (response: %s)", jerr, body) + } + return &result, nil +} + func (s *Server) elaborateWithGemini(ctx context.Context, workDir, fullPrompt string) (*elaboratedTask, error) { combinedPrompt := fmt.Sprintf("%s\n\n%s", buildElaboratePrompt(workDir), fullPrompt) cmd := exec.CommandContext(ctx, s.geminiBinaryPath(), @@ -314,18 +343,27 @@ func (s *Server) handleElaborateTask(w http.ResponseWriter, r *http.Request) { var result *elaboratedTask var err error - // Try Claude first. - result, err = s.elaborateWithClaude(ctx, workDir, fullPrompt) - if err != nil { - s.logger.Warn("elaborate: claude failed, falling back to gemini", "error", err) - // Fallback to Gemini. - result, err = s.elaborateWithGemini(ctx, workDir, fullPrompt) + // Try local LLM first when configured. Falls back to Claude → Gemini on + // hard failure of each prior attempt. + if s.llm != nil { + result, err = elaborateWithLocal(ctx, s.llm, workDir, fullPrompt) + if err != nil { + s.logger.Warn("elaborate: local llm failed, falling back to claude", "error", err) + result = nil + } + } + if result == nil { + result, err = s.elaborateWithClaude(ctx, workDir, fullPrompt) if err != nil { - s.logger.Error("elaborate: fallback gemini also failed", "error", err) - writeJSON(w, http.StatusBadGateway, map[string]string{ - "error": fmt.Sprintf("elaboration failed: %v", err), - }) - return + s.logger.Warn("elaborate: claude failed, falling back to gemini", "error", err) + result, err = s.elaborateWithGemini(ctx, workDir, fullPrompt) + if err != nil { + s.logger.Error("elaborate: gemini also failed", "error", err) + writeJSON(w, http.StatusBadGateway, map[string]string{ + "error": fmt.Sprintf("elaboration failed: %v", err), + }) + return + } } } -- cgit v1.2.3