diff options
| author | Peter Stone <thepeterstone@gmail.com> | 2026-03-18 23:56:20 +0000 |
|---|---|---|
| committer | Peter Stone <thepeterstone@gmail.com> | 2026-03-18 23:56:20 +0000 |
| commit | 7df4f06ae0e3ae80bd967bf53cbec36e58b4a3bd (patch) | |
| tree | 740c141c52764604fc8d4c036733e5f47368b26a /internal | |
| parent | a4795d68fc5381f1ff48d043fe7554355e5899fb (diff) | |
feat: containerized execution with agent tooling and deployment fixes
- ContainerRunner replaces ClaudeRunner/GeminiRunner; all agent types run
in Docker containers via claudomator-agent:latest
- Writable agentHome staging dir (/home/agent) satisfies home-dir
requirements for both claude and gemini CLIs without exposing host creds
- Copy .credentials.json and .claude.json into staging dir at run time;
GEMINI_API_KEY passed via env file
- Fix git clone: remove MkdirTemp-created dir before cloning (git rejects
pre-existing dirs even when empty)
- Replace localhost with host.docker.internal in APIURL so container can
reach host API; add --add-host=host.docker.internal:host-gateway
- Run container as --user=$(uid):$(gid) so host-owned workspace files are
readable; chmod workspace 0755 and instructions file 0644 after clone
- Pre-create .gemini/ in staging dir to avoid atomic-rename ENOENT on first
gemini-cli run
- Add ct CLI tool to container image: pre-built Bash wrapper for
Claudomator API (ct task submit/create/run/wait/status/list)
- Document ct tool in CLAUDE.md agent instructions section
- Add drain-failed-tasks script: retries failed tasks on a 5-minute interval
- Update Dockerfile: Node 22 via NodeSource, Go 1.24, gemini-cli,
git safe.directory=*, default ~/.claude.json
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/cli/serve.go | 50 | ||||
| -rw-r--r-- | internal/executor/container.go | 79 | ||||
| -rw-r--r-- | internal/executor/container_test.go | 9 |
3 files changed, 91 insertions, 47 deletions
diff --git a/internal/cli/serve.go b/internal/cli/serve.go index 2ee020d..98e7524 100644 --- a/internal/cli/serve.go +++ b/internal/cli/serve.go @@ -75,36 +75,38 @@ func serve(addr string) error { apiURL = "http://" + addr } + // Resolve the claude config dir from HOME so the container can mount credentials. + claudeConfigDir := filepath.Join(os.Getenv("HOME"), ".claude") + runners := map[string]executor.Runner{ + // ContainerRunner: binaries are resolved via PATH inside the container image, + // so ClaudeBinary/GeminiBinary are left empty (host paths would not exist inside). "claude": &executor.ContainerRunner{ - Image: cfg.ClaudeImage, - Logger: logger, - LogDir: cfg.LogDir, - APIURL: apiURL, - DropsDir: cfg.DropsDir, - SSHAuthSock: cfg.SSHAuthSock, - ClaudeBinary: cfg.ClaudeBinaryPath, - GeminiBinary: cfg.GeminiBinaryPath, + Image: cfg.ClaudeImage, + Logger: logger, + LogDir: cfg.LogDir, + APIURL: apiURL, + DropsDir: cfg.DropsDir, + SSHAuthSock: cfg.SSHAuthSock, + ClaudeConfigDir: claudeConfigDir, }, "gemini": &executor.ContainerRunner{ - Image: cfg.GeminiImage, - Logger: logger, - LogDir: cfg.LogDir, - APIURL: apiURL, - DropsDir: cfg.DropsDir, - SSHAuthSock: cfg.SSHAuthSock, - ClaudeBinary: cfg.ClaudeBinaryPath, - GeminiBinary: cfg.GeminiBinaryPath, + Image: cfg.GeminiImage, + Logger: logger, + LogDir: cfg.LogDir, + APIURL: apiURL, + DropsDir: cfg.DropsDir, + SSHAuthSock: cfg.SSHAuthSock, + ClaudeConfigDir: claudeConfigDir, }, "container": &executor.ContainerRunner{ - Image: "claudomator-agent:latest", - Logger: logger, - LogDir: cfg.LogDir, - APIURL: apiURL, - DropsDir: cfg.DropsDir, - SSHAuthSock: cfg.SSHAuthSock, - ClaudeBinary: cfg.ClaudeBinaryPath, - GeminiBinary: cfg.GeminiBinaryPath, + Image: "claudomator-agent:latest", + Logger: logger, + LogDir: cfg.LogDir, + APIURL: apiURL, + DropsDir: cfg.DropsDir, + SSHAuthSock: cfg.SSHAuthSock, + ClaudeConfigDir: claudeConfigDir, }, } diff --git a/internal/executor/container.go b/internal/executor/container.go index 45758d2..c43e201 100644 --- a/internal/executor/container.go +++ b/internal/executor/container.go @@ -22,9 +22,10 @@ type ContainerRunner struct { LogDir string APIURL string DropsDir string - SSHAuthSock string // optional path to host SSH agent - ClaudeBinary string // optional path to claude binary in container - GeminiBinary string // optional path to gemini binary in container + SSHAuthSock string // optional path to host SSH agent + ClaudeBinary string // optional path to claude binary in container + GeminiBinary string // optional path to gemini binary in container + ClaudeConfigDir string // host path to ~/.claude; mounted into container for auth credentials // Command allows mocking exec.CommandContext for tests. Command func(ctx context.Context, name string, arg ...string) *exec.Cmd } @@ -50,9 +51,14 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec repoURL = t.Agent.RepositoryURL } if repoURL == "" { - // Fallback to project_dir if repository_url is not set (legacy support) + // Fallback to project_dir if repository_url is not set (legacy support). + // Prefer the 'local' bare remote so that git push succeeds after execution + // (pushing to a non-bare working copy on a checked-out branch is rejected by git). if t.Agent.ProjectDir != "" { repoURL = t.Agent.ProjectDir + if out, err2 := exec.Command("git", "-C", t.Agent.ProjectDir, "remote", "get-url", "local").Output(); err2 == nil { + repoURL = strings.TrimSpace(string(out)) + } } else { return fmt.Errorf("task %s has no repository_url or project_dir", t.ID) } @@ -82,6 +88,7 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec if err != nil { return fmt.Errorf("creating workspace: %w", err) } + // chmod applied after clone; see step 2. } // Note: workspace is only removed on success. On failure, it's preserved for debugging. @@ -96,18 +103,18 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec } }() - // 2. Clone repo into workspace if not resuming + // 2. Clone repo into workspace if not resuming. + // git clone requires the target directory to not exist; remove the MkdirTemp-created dir first. if !isResume { + if err := os.Remove(workspace); err != nil { + return fmt.Errorf("removing workspace before clone: %w", err) + } r.Logger.Info("cloning repository", "url", repoURL, "workspace", workspace) if out, err := r.command(ctx, "git", "clone", repoURL, workspace).CombinedOutput(); err != nil { - // If it looks like a remote URL, fail fast. - if strings.HasPrefix(repoURL, "http") || strings.HasPrefix(repoURL, "git@") || strings.HasPrefix(repoURL, "ssh://") { - return fmt.Errorf("git clone failed for remote repository: %w\n%s", err, string(out)) - } - r.Logger.Warn("git clone failed, attempting fallback init", "url", repoURL, "error", err) - if initErr := r.fallbackGitInit(repoURL, workspace); initErr != nil { - return fmt.Errorf("git clone and fallback init failed: %w\n%s", err, string(out)) - } + return fmt.Errorf("git clone failed: %w\n%s", err, string(out)) + } + if err = os.Chmod(workspace, 0755); err != nil { + return fmt.Errorf("chmod cloned workspace: %w", err) } } e.SandboxDir = workspace @@ -140,18 +147,39 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec // Write API keys to a temporary env file to avoid exposure in 'ps' or 'docker inspect' envFile := filepath.Join(workspace, ".claudomator-env") - envContent := fmt.Sprintf("ANTHROPIC_API_KEY=%s\nGOOGLE_API_KEY=%s\n", os.Getenv("ANTHROPIC_API_KEY"), os.Getenv("GOOGLE_API_KEY")) + envContent := fmt.Sprintf("ANTHROPIC_API_KEY=%s\nGOOGLE_API_KEY=%s\nGEMINI_API_KEY=%s\n", os.Getenv("ANTHROPIC_API_KEY"), os.Getenv("GOOGLE_API_KEY"), os.Getenv("GEMINI_API_KEY")) if err := os.WriteFile(envFile, []byte(envContent), 0600); err != nil { return fmt.Errorf("writing env file: %w", err) } // Inject custom instructions via file to avoid CLI length limits instructionsFile := filepath.Join(workspace, ".claudomator-instructions.txt") - if err := os.WriteFile(instructionsFile, []byte(t.Agent.Instructions), 0600); err != nil { + if err := os.WriteFile(instructionsFile, []byte(t.Agent.Instructions), 0644); err != nil { return fmt.Errorf("writing instructions: %w", err) } - args := r.buildDockerArgs(workspace, e.TaskID) + // Set up a writable $HOME staging dir so any agent tool (claude, gemini, etc.) + // can freely create subdirs (session-env, .gemini, .cache, …) without hitting + // a non-existent or read-only home. We copy only the claude credentials into it. + agentHome := filepath.Join(workspace, ".agent-home") + if err := os.MkdirAll(filepath.Join(agentHome, ".claude"), 0755); err != nil { + return fmt.Errorf("creating agent home staging dir: %w", err) + } + if err := os.MkdirAll(filepath.Join(agentHome, ".gemini"), 0755); err != nil { + return fmt.Errorf("creating .gemini dir: %w", err) + } + if r.ClaudeConfigDir != "" { + // credentials + if srcData, readErr := os.ReadFile(filepath.Join(r.ClaudeConfigDir, ".credentials.json")); readErr == nil { + _ = os.WriteFile(filepath.Join(agentHome, ".claude", ".credentials.json"), srcData, 0600) + } + // settings (used by claude CLI; copy so it can write updates without hitting the host) + if srcData, readErr := os.ReadFile(filepath.Join(filepath.Dir(r.ClaudeConfigDir), ".claude.json")); readErr == nil { + _ = os.WriteFile(filepath.Join(agentHome, ".claude.json"), srcData, 0644) + } + } + + args := r.buildDockerArgs(workspace, agentHome, e.TaskID) innerCmd := r.buildInnerCmd(t, e, isResume) fullArgs := append(args, image) @@ -240,9 +268,8 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec // 5. Post-execution: push changes if successful if waitErr == nil && streamErr == nil { - // Check if there are any commits to push (Issue 10) - // We use rev-list to see if HEAD is ahead of origin/HEAD. - // If origin/HEAD doesn't exist (e.g. fresh init), we just attempt to push. + // Check if there are any commits to push (HEAD ahead of origin/HEAD). + // If origin/HEAD doesn't exist (e.g. fresh clone with no commits), we attempt push anyway. hasCommits := true if out, err := r.command(ctx, "git", "-C", workspace, "rev-list", "origin/HEAD..HEAD").CombinedOutput(); err == nil { if len(strings.TrimSpace(string(out))) == 0 { @@ -272,15 +299,25 @@ func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Exec return nil } -func (r *ContainerRunner) buildDockerArgs(workspace, taskID string) []string { +func (r *ContainerRunner) buildDockerArgs(workspace, claudeHome, taskID string) []string { // --env-file takes a HOST path. hostEnvFile := filepath.Join(workspace, ".claudomator-env") + + // Replace localhost with host.docker.internal so the container can reach the host API. + apiURL := strings.ReplaceAll(r.APIURL, "localhost", "host.docker.internal") + args := []string{ "run", "--rm", + // Allow container to reach the host via host.docker.internal. + "--add-host=host.docker.internal:host-gateway", + // Run as the current process UID:GID so the container can read host-owned files. + fmt.Sprintf("--user=%d:%d", os.Getuid(), os.Getgid()), "-v", workspace + ":/workspace", + "-v", claudeHome + ":/home/agent", "-w", "/workspace", "--env-file", hostEnvFile, - "-e", "CLAUDOMATOR_API_URL=" + r.APIURL, + "-e", "HOME=/home/agent", + "-e", "CLAUDOMATOR_API_URL=" + apiURL, "-e", "CLAUDOMATOR_TASK_ID=" + taskID, "-e", "CLAUDOMATOR_DROP_DIR=" + r.DropsDir, } diff --git a/internal/executor/container_test.go b/internal/executor/container_test.go index d4d591e..f97f2b5 100644 --- a/internal/executor/container_test.go +++ b/internal/executor/container_test.go @@ -23,14 +23,19 @@ func TestContainerRunner_BuildDockerArgs(t *testing.T) { workspace := "/tmp/ws" taskID := "task-123" - args := runner.buildDockerArgs(workspace, taskID) + agentHome := "/tmp/ws/.agent-home" + args := runner.buildDockerArgs(workspace, agentHome, taskID) expected := []string{ "run", "--rm", + "--add-host=host.docker.internal:host-gateway", + fmt.Sprintf("--user=%d:%d", os.Getuid(), os.Getgid()), "-v", "/tmp/ws:/workspace", + "-v", "/tmp/ws/.agent-home:/home/agent", "-w", "/workspace", "--env-file", "/tmp/ws/.claudomator-env", - "-e", "CLAUDOMATOR_API_URL=http://localhost:8484", + "-e", "HOME=/home/agent", + "-e", "CLAUDOMATOR_API_URL=http://host.docker.internal:8484", "-e", "CLAUDOMATOR_TASK_ID=task-123", "-e", "CLAUDOMATOR_DROP_DIR=/data/drops", "-v", "/tmp/ssh.sock:/tmp/ssh-auth.sock", |
