From 8dca9bbb0baee59ffe0d3127180ef0958dda8b91 Mon Sep 17 00:00:00 2001 From: Claudomator Agent Date: Sat, 21 Mar 2026 23:18:50 +0000 Subject: feat: executor reliability — per-agent limit, drain gate, pre-flight creds, auth recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - maxPerAgent=1: only 1 in-flight execution per agent type at a time; excess tasks are requeued after 30s - Drain gate: after 2 consecutive failures the agent is drained and a question is set on the task; reset on first success; POST /api/pool/agents/{agent}/undrain to acknowledge - Pre-flight credential check: verify .credentials.json and .claude.json exist in agentHome before spinning up a container - Auth error auto-recovery: detect auth errors (Not logged in, OAuth token has expired, etc.) and retry once after running sync-credentials and re-copying fresh credentials - Extracted runContainer() helper from ContainerRunner.Run() to support the retry flow - Wire CredentialSyncCmd in serve.go for all three ContainerRunner instances - Tests: TestPool_MaxPerAgent_*, TestPool_ConsecutiveFailures_*, TestPool_Undrain_*, TestContainerRunner_Missing{Credentials,Settings}_FailsFast, TestIsAuthError_*, TestContainerRunner_AuthError_SyncsAndRetries Co-Authored-By: Claude Sonnet 4.6 --- internal/cli/serve.go | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'internal/cli/serve.go') diff --git a/internal/cli/serve.go b/internal/cli/serve.go index 1d0de21..e7b6b71 100644 --- a/internal/cli/serve.go +++ b/internal/cli/serve.go @@ -78,35 +78,39 @@ func serve(addr string) error { // Use configured credentials dir; sync-credentials keeps this populated. claudeConfigDir := cfg.ClaudeConfigDir + repoDir, _ := os.Getwd() runners := map[string]executor.Runner{ // ContainerRunner: binaries are resolved via PATH inside the container image, // so ClaudeBinary/GeminiBinary are left empty (host paths would not exist inside). "claude": &executor.ContainerRunner{ - Image: cfg.ClaudeImage, - Logger: logger, - LogDir: cfg.LogDir, - APIURL: apiURL, - DropsDir: cfg.DropsDir, - SSHAuthSock: cfg.SSHAuthSock, - ClaudeConfigDir: claudeConfigDir, + Image: cfg.ClaudeImage, + Logger: logger, + LogDir: cfg.LogDir, + APIURL: apiURL, + DropsDir: cfg.DropsDir, + SSHAuthSock: cfg.SSHAuthSock, + ClaudeConfigDir: claudeConfigDir, + CredentialSyncCmd: filepath.Join(repoDir, "scripts", "sync-credentials"), }, "gemini": &executor.ContainerRunner{ - Image: cfg.GeminiImage, - Logger: logger, - LogDir: cfg.LogDir, - APIURL: apiURL, - DropsDir: cfg.DropsDir, - SSHAuthSock: cfg.SSHAuthSock, - ClaudeConfigDir: claudeConfigDir, + Image: cfg.GeminiImage, + Logger: logger, + LogDir: cfg.LogDir, + APIURL: apiURL, + DropsDir: cfg.DropsDir, + SSHAuthSock: cfg.SSHAuthSock, + ClaudeConfigDir: claudeConfigDir, + CredentialSyncCmd: filepath.Join(repoDir, "scripts", "sync-credentials"), }, "container": &executor.ContainerRunner{ - Image: "claudomator-agent:latest", - Logger: logger, - LogDir: cfg.LogDir, - APIURL: apiURL, - DropsDir: cfg.DropsDir, - SSHAuthSock: cfg.SSHAuthSock, - ClaudeConfigDir: claudeConfigDir, + Image: "claudomator-agent:latest", + Logger: logger, + LogDir: cfg.LogDir, + APIURL: apiURL, + DropsDir: cfg.DropsDir, + SSHAuthSock: cfg.SSHAuthSock, + ClaudeConfigDir: claudeConfigDir, + CredentialSyncCmd: filepath.Join(repoDir, "scripts", "sync-credentials"), }, } -- cgit v1.2.3