diff options
| author | Claudomator Agent <agent@claudomator.local> | 2026-03-21 23:18:50 +0000 |
|---|---|---|
| committer | Claudomator Agent <agent@claudomator.local> | 2026-03-21 23:18:50 +0000 |
| commit | 8dca9bbb0baee59ffe0d3127180ef0958dda8b91 (patch) | |
| tree | e887036f4cce0f10694c5b9a29f4b4dc251769ba /internal/executor/container_test.go | |
| parent | 9e35f7e4087cfa6017cb65ec6a7036f394f5eb22 (diff) | |
feat: executor reliability — per-agent limit, drain gate, pre-flight creds, auth recovery
- maxPerAgent=1: only 1 in-flight execution per agent type at a time; excess tasks are requeued after 30s
- Drain gate: after 2 consecutive failures the agent is drained and a question is set on the task; reset on first success; POST /api/pool/agents/{agent}/undrain to acknowledge
- Pre-flight credential check: verify .credentials.json and .claude.json exist in agentHome before spinning up a container
- Auth error auto-recovery: detect auth errors (Not logged in, OAuth token has expired, etc.) and retry once after running sync-credentials and re-copying fresh credentials
- Extracted runContainer() helper from ContainerRunner.Run() to support the retry flow
- Wire CredentialSyncCmd in serve.go for all three ContainerRunner instances
- Tests: TestPool_MaxPerAgent_*, TestPool_ConsecutiveFailures_*, TestPool_Undrain_*, TestContainerRunner_Missing{Credentials,Settings}_FailsFast, TestIsAuthError_*, TestContainerRunner_AuthError_SyncsAndRetries
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/executor/container_test.go')
| -rw-r--r-- | internal/executor/container_test.go | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/internal/executor/container_test.go b/internal/executor/container_test.go index be80b51..b6946ef 100644 --- a/internal/executor/container_test.go +++ b/internal/executor/container_test.go @@ -7,6 +7,7 @@ import ( "log/slog" "os" "os/exec" + "path/filepath" "strings" "testing" @@ -343,3 +344,173 @@ func TestGitSafe_PrependsSafeDirectory(t *testing.T) { } } } + +func TestContainerRunner_MissingCredentials_FailsFast(t *testing.T) { + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + + claudeConfigDir := t.TempDir() + + // Set up ClaudeConfigDir with MISSING credentials (so pre-flight fails) + // Don't create .credentials.json + // But DO create .claude.json so the test isolates the credentials check + if err := os.WriteFile(filepath.Join(claudeConfigDir, ".claude.json"), []byte("{}"), 0644); err != nil { + t.Fatal(err) + } + + runner := &ContainerRunner{ + Logger: logger, + Image: "busybox", + ClaudeConfigDir: claudeConfigDir, + Command: func(ctx context.Context, name string, arg ...string) *exec.Cmd { + if name == "git" && len(arg) > 0 && arg[0] == "clone" { + dir := arg[len(arg)-1] + os.MkdirAll(dir, 0755) + return exec.Command("true") + } + return exec.Command("true") + }, + } + + tk := &task.Task{ + ID: "test-missing-creds", + RepositoryURL: "https://github.com/example/repo.git", + Agent: task.AgentConfig{Type: "claude"}, + } + e := &storage.Execution{ID: "test-exec", TaskID: "test-missing-creds"} + + err := runner.Run(context.Background(), tk, e) + if err == nil { + t.Fatal("expected error due to missing credentials, got nil") + } + if !strings.Contains(err.Error(), "credentials not found") { + t.Errorf("expected 'credentials not found' error, got: %v", err) + } +} + +func TestContainerRunner_MissingSettings_FailsFast(t *testing.T) { + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + + claudeConfigDir := t.TempDir() + + // Only create credentials but NOT .claude.json + if err := os.WriteFile(filepath.Join(claudeConfigDir, ".credentials.json"), []byte("{}"), 0600); err != nil { + t.Fatal(err) + } + + runner := &ContainerRunner{ + Logger: logger, + Image: "busybox", + ClaudeConfigDir: claudeConfigDir, + Command: func(ctx context.Context, name string, arg ...string) *exec.Cmd { + if name == "git" && len(arg) > 0 && arg[0] == "clone" { + dir := arg[len(arg)-1] + os.MkdirAll(dir, 0755) + return exec.Command("true") + } + return exec.Command("true") + }, + } + + tk := &task.Task{ + ID: "test-missing-settings", + RepositoryURL: "https://github.com/example/repo.git", + Agent: task.AgentConfig{Type: "claude"}, + } + e := &storage.Execution{ID: "test-exec-2", TaskID: "test-missing-settings"} + + err := runner.Run(context.Background(), tk, e) + if err == nil { + t.Fatal("expected error due to missing settings, got nil") + } + if !strings.Contains(err.Error(), "claude settings") { + t.Errorf("expected 'claude settings' error, got: %v", err) + } +} + +func TestIsAuthError_DetectsAllVariants(t *testing.T) { + tests := []struct { + msg string + want bool + }{ + {"Not logged in", true}, + {"OAuth token has expired", true}, + {"authentication_error: invalid token", true}, + {"Please run /login to authenticate", true}, + {"container execution failed: exit status 1", false}, + {"git clone failed", false}, + {"", false}, + } + for _, tt := range tests { + var err error + if tt.msg != "" { + err = fmt.Errorf("%s", tt.msg) + } + got := isAuthError(err) + if got != tt.want { + t.Errorf("isAuthError(%q) = %v, want %v", tt.msg, got, tt.want) + } + } +} + +func TestContainerRunner_AuthError_SyncsAndRetries(t *testing.T) { + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + + // Create a sync script that creates a marker file + syncDir := t.TempDir() + syncMarker := filepath.Join(syncDir, "sync-called") + syncScript := filepath.Join(syncDir, "sync-creds") + os.WriteFile(syncScript, []byte("#!/bin/sh\ntouch "+syncMarker+"\n"), 0755) + + claudeConfigDir := t.TempDir() + // Create both credential files in ClaudeConfigDir + os.WriteFile(filepath.Join(claudeConfigDir, ".credentials.json"), []byte(`{"token":"fresh"}`), 0600) + os.WriteFile(filepath.Join(claudeConfigDir, ".claude.json"), []byte("{}"), 0644) + + callCount := 0 + runner := &ContainerRunner{ + Logger: logger, + Image: "busybox", + ClaudeConfigDir: claudeConfigDir, + CredentialSyncCmd: syncScript, + Command: func(ctx context.Context, name string, arg ...string) *exec.Cmd { + if name == "git" { + if len(arg) > 0 && arg[0] == "clone" { + dir := arg[len(arg)-1] + os.MkdirAll(dir, 0755) + } + return exec.Command("true") + } + if name == "docker" { + callCount++ + if callCount == 1 { + // First docker call fails with auth error + return exec.Command("sh", "-c", "echo 'Not logged in' >&2; exit 1") + } + // Second docker call "succeeds" + return exec.Command("sh", "-c", "exit 0") + } + if name == syncScript { + return exec.Command("sh", "-c", "touch "+syncMarker) + } + return exec.Command("true") + }, + } + + tk := &task.Task{ + ID: "auth-retry-test", + RepositoryURL: "https://github.com/example/repo.git", + Agent: task.AgentConfig{Type: "claude", Instructions: "test"}, + } + e := &storage.Execution{ID: "auth-retry-exec", TaskID: "auth-retry-test"} + + // Run — first attempt will fail with auth error, triggering sync+retry + runner.Run(context.Background(), tk, e) + // We don't check error strictly since second run may also fail (git push etc.) + // What we care about is that docker was called twice and sync was called + if callCount < 2 { + t.Errorf("expected docker to be called at least twice (original + retry), got %d", callCount) + } + if _, err := os.Stat(syncMarker); os.IsNotExist(err) { + t.Error("expected sync-credentials to be called, but marker file not found") + } +} |
