feat(executor): add LocalRunner and OpenAI-compat LLM client

Phase 1 of "local OSS models as agents" plan. Adds a third Runner backed by any OpenAI-compatible HTTP server (Ollama, vLLM, LM Studio, llama.cpp), and migrates the Gemini-CLI classifier to route through the same client when configured. Two-layer split: internal/llm.Client is the workhorse (HTTP, no Pool, no DB) used directly by the classifier and any future internal helper that needs cheap reasoning. internal/executor.LocalRunner is a thin adapter implementing Runner for user-facing tasks. This avoids Pool reentrancy/deadlock when sub-second internal calls fire from inside Pool.execute(). Highlights: - internal/retry: relocated runWithBackoff/IsRateLimitError/ParseRetryAfter into a shared package reused by executor and llm. - internal/llm: Chat (non-streaming) and ChatStream (SSE) over /chat/completions with optional bearer auth, json_object response format, retry on 429/503, Retry-After parsing. - internal/executor/LocalRunner: streams deltas into stdout.log in the same stream-json envelope ClaudeRunner emits, then writes one consolidated assistant block plus a result terminator so existing parsers (extractSummary, ParseChangestatFromOutput) work unchanged. - internal/executor/Classifier: gains optional LLM field; uses json_object response format (no markdown-fence cleanup needed). Falls back to Gemini-CLI subprocess when LLM is nil. - Pool.skipClassification: now skips only when the requested agent type is registered, so unknown types still reach the load balancer. - Storage: additive tokens_in/tokens_out ALTERs on executions; CLI runners record cost_usd as before, LocalRunner records 0 + tokens. - Config: [local_model] section (endpoint, model, timeout_seconds, default_temperature, api_key). Empty endpoint = no LocalRunner registered, classifier falls back to Gemini. Pre-existing test issues fixed in passing: - claude_test.go setupSandbox callsites updated to current signature. - gemini_test.go TestParseGeminiStream skipped (asserts unimplemented GeminiRunner stream-error parsing; tracked separately). Plan: docs/plans/local-oss-runner.md. https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
author: Claude <noreply@anthropic.com> 2026-04-28 09:24:43 +0000
committer: Claude <noreply@anthropic.com> 2026-04-28 09:24:43 +0000
commit: 0865afc43be562dbe14528e4299b9e213b54cc93 (patch)
tree: 3ffb11207fb6b9866b5a2477bba7abe38964f83a /internal/cli
parent: c2aa026f6ce1c9e216b99d74f294fc133d5fcddd (diff)
3 files changed, 60 insertions, 5 deletions
diff --git a/internal/cli/llm.go b/internal/cli/llm.go
new file mode 100644
index 0000000..04fe902
--- /dev/null
+++ b/internal/cli/llm.go
@@ -0,0 +1,31 @@
+package cli
+
+import (
+	"log/slog"
+	"net/http"
+	"time"
+
+	"github.com/thepeterstone/claudomator/internal/config"
+	"github.com/thepeterstone/claudomator/internal/llm"
+)
+
+// buildLocalLLMClient returns an *llm.Client when a local model endpoint is
+// configured. Returns nil when LocalModel.Endpoint is empty so callers can
+// gate on `if c != nil` to skip registering LocalRunner / using the LLM
+// classifier path.
+func buildLocalLLMClient(cfg config.LocalModel, logger *slog.Logger) *llm.Client {
+	if cfg.Endpoint == "" {
+		return nil
+	}
+	timeout := 60 * time.Second
+	if cfg.TimeoutSeconds > 0 {
+		timeout = time.Duration(cfg.TimeoutSeconds) * time.Second
+	}
+	return &llm.Client{
+		Endpoint:   cfg.Endpoint,
+		Model:      cfg.Model,
+		APIKey:     cfg.APIKey,
+		HTTPClient: &http.Client{Timeout: timeout},
+		Logger:     logger,
+	}
+}
diff --git a/internal/cli/run.go b/internal/cli/run.go
index 49aa28e..2da7b79 100644
--- a/internal/cli/run.go
+++ b/internal/cli/run.go
@@ -84,9 +84,21 @@ func runTasks(file string, parallel int, dryRun bool) error {
 			LogDir:     cfg.LogDir,
 		},
 	}
+
+	localClient := buildLocalLLMClient(cfg.LocalModel, logger)
+	if localClient != nil {
+		runners["local"] = &executor.LocalRunner{
+			Client:             localClient,
+			Logger:             logger,
+			LogDir:             cfg.LogDir,
+			DefaultTemperature: cfg.LocalModel.DefaultTemperature,
+		}
+	}
+
 	pool := executor.NewPool(parallel, runners, store, logger)
-	if cfg.GeminiBinaryPath != "" {
-		pool.Classifier = &executor.Classifier{GeminiBinaryPath: cfg.GeminiBinaryPath}
+	pool.Classifier = &executor.Classifier{
+		LLM:              localClient,
+		GeminiBinaryPath: cfg.GeminiBinaryPath,
 	}
 
 	// Handle graceful shutdown.
diff --git a/internal/cli/serve.go b/internal/cli/serve.go
index 94f0c5d..e183bfc 100644
--- a/internal/cli/serve.go
+++ b/internal/cli/serve.go
@@ -71,10 +71,22 @@ func serve(addr string) error {
 			APIURL:     apiURL,
 		},
 	}
-	
+
+	localClient := buildLocalLLMClient(cfg.LocalModel, logger)
+	if localClient != nil {
+		runners["local"] = &executor.LocalRunner{
+			Client:             localClient,
+			Logger:             logger,
+			LogDir:             cfg.LogDir,
+			DefaultTemperature: cfg.LocalModel.DefaultTemperature,
+		}
+		logger.Info("local runner registered", "endpoint", cfg.LocalModel.Endpoint, "model", cfg.LocalModel.Model)
+	}
+
 	pool := executor.NewPool(cfg.MaxConcurrent, runners, store, logger)
-	if cfg.GeminiBinaryPath != "" {
-		pool.Classifier = &executor.Classifier{GeminiBinaryPath: cfg.GeminiBinaryPath}
+	pool.Classifier = &executor.Classifier{
+		LLM:              localClient,
+		GeminiBinaryPath: cfg.GeminiBinaryPath,
 	}
 	pool.RecoverStaleRunning(context.Background())
 	pool.RecoverStaleQueued(context.Background())
author	Claude <noreply@anthropic.com>	2026-04-28 09:24:43 +0000
committer	Claude <noreply@anthropic.com>	2026-04-28 09:24:43 +0000
commit	0865afc43be562dbe14528e4299b9e213b54cc93 (patch)
tree	3ffb11207fb6b9866b5a2477bba7abe38964f83a /internal/cli
parent	c2aa026f6ce1c9e216b99d74f294fc133d5fcddd (diff)