diff options
Diffstat (limited to 'internal/executor/container.go')
| -rw-r--r-- | internal/executor/container.go | 128 |
1 files changed, 92 insertions, 36 deletions
diff --git a/internal/executor/container.go b/internal/executor/container.go index 5421108..d9ed8ef 100644 --- a/internal/executor/container.go +++ b/internal/executor/container.go @@ -2,6 +2,7 @@ package executor import ( "context" + "errors" "fmt" "log/slog" "os" @@ -22,14 +23,26 @@ type ContainerRunner struct { LogDir string APIURL string DropsDir string - SSHAuthSock string // optional path to host SSH agent - ClaudeBinary string // optional path to claude binary in container - GeminiBinary string // optional path to gemini binary in container - ClaudeConfigDir string // host path to ~/.claude; mounted into container for auth credentials + SSHAuthSock string // optional path to host SSH agent + ClaudeBinary string // optional path to claude binary in container + GeminiBinary string // optional path to gemini binary in container + ClaudeConfigDir string // host path to ~/.claude; mounted into container for auth credentials + CredentialSyncCmd string // optional path to sync-credentials script for auth-error auto-recovery // Command allows mocking exec.CommandContext for tests. Command func(ctx context.Context, name string, arg ...string) *exec.Cmd } +func isAuthError(err error) bool { + if err == nil { + return false + } + s := err.Error() + return strings.Contains(s, "Not logged in") || + strings.Contains(s, "OAuth token has expired") || + strings.Contains(s, "authentication_error") || + strings.Contains(s, "Please run /login") +} + func (r *ContainerRunner) command(ctx context.Context, name string, arg ...string) *exec.Cmd { if r.Command != nil { return r.Command(ctx, name, arg...) @@ -51,14 +64,6 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec return fmt.Errorf("task %s has no repository_url", t.ID) } - image := t.Agent.ContainerImage - if image == "" { - image = r.Image - } - if image == "" { - image = "claudomator-agent:latest" - } - // 1. Setup workspace on host var workspace string isResume := false @@ -106,6 +111,81 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec } e.SandboxDir = workspace + // Set up a writable $HOME staging dir so any agent tool (claude, gemini, etc.) + // can freely create subdirs (session-env, .gemini, .cache, …) without hitting + // a non-existent or read-only home. We copy only the claude credentials into it. + agentHome := filepath.Join(workspace, ".agent-home") + if err := os.MkdirAll(filepath.Join(agentHome, ".claude"), 0755); err != nil { + return fmt.Errorf("creating agent home staging dir: %w", err) + } + if err := os.MkdirAll(filepath.Join(agentHome, ".gemini"), 0755); err != nil { + return fmt.Errorf("creating .gemini dir: %w", err) + } + if r.ClaudeConfigDir != "" { + // credentials + if srcData, readErr := os.ReadFile(filepath.Join(r.ClaudeConfigDir, ".credentials.json")); readErr == nil { + _ = os.WriteFile(filepath.Join(agentHome, ".claude", ".credentials.json"), srcData, 0600) + } + // settings (used by claude CLI; copy so it can write updates without hitting the host) + if srcData, readErr := os.ReadFile(filepath.Join(r.ClaudeConfigDir, ".claude.json")); readErr == nil { + _ = os.WriteFile(filepath.Join(agentHome, ".claude.json"), srcData, 0644) + } + } + + // Pre-flight: verify credentials were actually copied before spinning up a container. + if r.ClaudeConfigDir != "" { + credsPath := filepath.Join(agentHome, ".claude", ".credentials.json") + settingsPath := filepath.Join(agentHome, ".claude.json") + if _, err := os.Stat(credsPath); os.IsNotExist(err) { + return fmt.Errorf("credentials not found at %s — run sync-credentials", r.ClaudeConfigDir) + } + if _, err := os.Stat(settingsPath); os.IsNotExist(err) { + return fmt.Errorf("claude settings (.claude.json) not found at %s — run sync-credentials", r.ClaudeConfigDir) + } + } + + // Run container (with auth retry on failure). + runErr := r.runContainer(ctx, t, e, workspace, agentHome, isResume) + if runErr != nil && isAuthError(runErr) && r.CredentialSyncCmd != "" { + r.Logger.Warn("auth failure detected, syncing credentials and retrying once", "taskID", t.ID) + syncOut, syncErr := r.command(ctx, r.CredentialSyncCmd).CombinedOutput() + if syncErr != nil { + r.Logger.Warn("sync-credentials failed", "error", syncErr, "output", string(syncOut)) + } + // Re-copy credentials into agentHome with fresh files. + if srcData, readErr := os.ReadFile(filepath.Join(r.ClaudeConfigDir, ".credentials.json")); readErr == nil { + _ = os.WriteFile(filepath.Join(agentHome, ".claude", ".credentials.json"), srcData, 0600) + } + if srcData, readErr := os.ReadFile(filepath.Join(r.ClaudeConfigDir, ".claude.json")); readErr == nil { + _ = os.WriteFile(filepath.Join(agentHome, ".claude.json"), srcData, 0644) + } + runErr = r.runContainer(ctx, t, e, workspace, agentHome, isResume) + } + + if runErr == nil { + success = true + } + var blockedErr *BlockedError + if errors.As(runErr, &blockedErr) { + isBlocked = true + success = true // preserve workspace for resumption + } + return runErr +} + +// runContainer runs the docker container for the given task and handles log setup, +// environment files, instructions, and post-execution git operations. +func (r *ContainerRunner) runContainer(ctx context.Context, t *task.Task, e *storage.Execution, workspace, agentHome string, isResume bool) error { + repoURL := t.RepositoryURL + + image := t.Agent.ContainerImage + if image == "" { + image = r.Image + } + if image == "" { + image = "claudomator-agent:latest" + } + // 3. Prepare logs logDir := r.ExecLogDir(e.ID) if logDir == "" { @@ -145,27 +225,6 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec return fmt.Errorf("writing instructions: %w", err) } - // Set up a writable $HOME staging dir so any agent tool (claude, gemini, etc.) - // can freely create subdirs (session-env, .gemini, .cache, …) without hitting - // a non-existent or read-only home. We copy only the claude credentials into it. - agentHome := filepath.Join(workspace, ".agent-home") - if err := os.MkdirAll(filepath.Join(agentHome, ".claude"), 0755); err != nil { - return fmt.Errorf("creating agent home staging dir: %w", err) - } - if err := os.MkdirAll(filepath.Join(agentHome, ".gemini"), 0755); err != nil { - return fmt.Errorf("creating .gemini dir: %w", err) - } - if r.ClaudeConfigDir != "" { - // credentials - if srcData, readErr := os.ReadFile(filepath.Join(r.ClaudeConfigDir, ".credentials.json")); readErr == nil { - _ = os.WriteFile(filepath.Join(agentHome, ".claude", ".credentials.json"), srcData, 0600) - } - // settings (used by claude CLI; copy so it can write updates without hitting the host) - if srcData, readErr := os.ReadFile(filepath.Join(r.ClaudeConfigDir, ".claude.json")); readErr == nil { - _ = os.WriteFile(filepath.Join(agentHome, ".claude.json"), srcData, 0644) - } - } - args := r.buildDockerArgs(workspace, agentHome, e.TaskID) innerCmd := r.buildInnerCmd(t, e, isResume) @@ -233,8 +292,6 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec r.Logger.Info("treating question file as completion report", "taskID", e.TaskID) e.Summary = extractQuestionText(questionJSON) } else { - isBlocked = true - success = true // We consider BLOCKED as a "success" for workspace preservation if e.SessionID == "" { r.Logger.Warn("missing session ID; resume will start fresh", "taskID", e.TaskID) } @@ -278,7 +335,6 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec } r.Logger.Info("no new commits to push", "taskID", t.ID) } - success = true } if waitErr != nil { |
