summaryrefslogtreecommitdiff
path: root/internal/api
diff options
context:
space:
mode:
authorClaudomator Agent <agent@claudomator.local>2026-03-21 23:18:50 +0000
committerClaudomator Agent <agent@claudomator.local>2026-03-21 23:18:50 +0000
commit8dca9bbb0baee59ffe0d3127180ef0958dda8b91 (patch)
treee887036f4cce0f10694c5b9a29f4b4dc251769ba /internal/api
parent9e35f7e4087cfa6017cb65ec6a7036f394f5eb22 (diff)
feat: executor reliability — per-agent limit, drain gate, pre-flight creds, auth recovery
- maxPerAgent=1: only 1 in-flight execution per agent type at a time; excess tasks are requeued after 30s - Drain gate: after 2 consecutive failures the agent is drained and a question is set on the task; reset on first success; POST /api/pool/agents/{agent}/undrain to acknowledge - Pre-flight credential check: verify .credentials.json and .claude.json exist in agentHome before spinning up a container - Auth error auto-recovery: detect auth errors (Not logged in, OAuth token has expired, etc.) and retry once after running sync-credentials and re-copying fresh credentials - Extracted runContainer() helper from ContainerRunner.Run() to support the retry flow - Wire CredentialSyncCmd in serve.go for all three ContainerRunner instances - Tests: TestPool_MaxPerAgent_*, TestPool_ConsecutiveFailures_*, TestPool_Undrain_*, TestContainerRunner_Missing{Credentials,Settings}_FailsFast, TestIsAuthError_*, TestContainerRunner_AuthError_SyncsAndRetries Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/api')
-rw-r--r--internal/api/executions.go8
-rw-r--r--internal/api/server.go1
2 files changed, 9 insertions, 0 deletions
diff --git a/internal/api/executions.go b/internal/api/executions.go
index 4d8ba9c..d39de9f 100644
--- a/internal/api/executions.go
+++ b/internal/api/executions.go
@@ -128,6 +128,14 @@ func (s *Server) handleGetAgentStatus(w http.ResponseWriter, r *http.Request) {
})
}
+// handleUndrainAgent resets the drain state and failure counter for the given agent type.
+// POST /api/pool/agents/{agent}/undrain
+func (s *Server) handleUndrainAgent(w http.ResponseWriter, r *http.Request) {
+ agent := r.PathValue("agent")
+ s.pool.UndrainingAgent(agent)
+ w.WriteHeader(http.StatusOK)
+}
+
// tailLogFile reads the last n lines from the file at path.
func tailLogFile(path string, n int) (string, error) {
data, err := os.ReadFile(path)
diff --git a/internal/api/server.go b/internal/api/server.go
index 65823b4..ff6fdb6 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -123,6 +123,7 @@ func (s *Server) routes() {
s.mux.HandleFunc("GET /api/executions", s.handleListRecentExecutions)
s.mux.HandleFunc("GET /api/stats", s.handleGetDashboardStats)
s.mux.HandleFunc("GET /api/agents/status", s.handleGetAgentStatus)
+ s.mux.HandleFunc("POST /api/pool/agents/{agent}/undrain", s.handleUndrainAgent)
s.mux.HandleFunc("GET /api/executions/{id}", s.handleGetExecution)
s.mux.HandleFunc("GET /api/executions/{id}/log", s.handleGetExecutionLog)
s.mux.HandleFunc("GET /api/tasks/{id}/logs/stream", s.handleStreamTaskLogs)