package executor import ( "context" "fmt" "log/slog" "os" "os/exec" "path/filepath" "strings" "sync" "syscall" "github.com/thepeterstone/claudomator/internal/storage" "github.com/thepeterstone/claudomator/internal/task" ) // ContainerRunner executes an agent inside a container. type ContainerRunner struct { Image string // default image if not specified in task Logger *slog.Logger LogDir string APIURL string DropsDir string SSHAuthSock string // optional path to host SSH agent } func (r *ContainerRunner) ExecLogDir(execID string) string { if r.LogDir == "" { return "" } return filepath.Join(r.LogDir, execID) } func (r *ContainerRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error { var err error repoURL := t.RepositoryURL if repoURL == "" { repoURL = t.Agent.RepositoryURL } if repoURL == "" { // Fallback to project_dir if repository_url is not set (legacy support) if t.Agent.ProjectDir != "" { repoURL = t.Agent.ProjectDir } else { return fmt.Errorf("task %s has no repository_url or project_dir", t.ID) } } image := t.Agent.ContainerImage if image == "" { image = r.Image } if image == "" { image = "claudomator-agent:latest" } // 1. Setup workspace on host var workspace string isResume := false if e.SandboxDir != "" { if _, err = os.Stat(e.SandboxDir); err == nil { workspace = e.SandboxDir isResume = true r.Logger.Info("resuming in preserved workspace", "path", workspace) } } if workspace == "" { workspace, err = os.MkdirTemp("", "claudomator-workspace-*") if err != nil { return fmt.Errorf("creating workspace: %w", err) } } // Note: workspace is only removed on success. On failure, it's preserved for debugging. // If the task becomes BLOCKED, it's also preserved for resumption. success := false isBlocked := false defer func() { if success && !isBlocked { os.RemoveAll(workspace) } else { r.Logger.Warn("preserving workspace", "path", workspace, "success", success, "blocked", isBlocked) } }() // 2. Clone repo into workspace if not resuming if !isResume { r.Logger.Info("cloning repository", "url", repoURL, "workspace", workspace) // git clone requires the target to be empty or non-existent. // Since we just created workspace as a temp dir, it's empty. // But git clone wants to CREATE the dir if it's the target, or clone INTO it. if out, err := exec.CommandContext(ctx, "git", "clone", repoURL, workspace).CombinedOutput(); err != nil { // If it's a local path and not a repo, we might need to init it (legacy support from ADR-005) r.Logger.Warn("git clone failed, attempting fallback init", "url", repoURL, "error", err) if initErr := r.fallbackGitInit(repoURL, workspace); initErr != nil { return fmt.Errorf("git clone and fallback init failed: %w\n%s", err, string(out)) } } } e.SandboxDir = workspace // 3. Prepare logs logDir := r.ExecLogDir(e.ID) if logDir == "" { logDir = filepath.Join(workspace, ".claudomator-logs") } if err := os.MkdirAll(logDir, 0700); err != nil { return fmt.Errorf("creating log dir: %w", err) } e.StdoutPath = filepath.Join(logDir, "stdout.log") e.StderrPath = filepath.Join(logDir, "stderr.log") e.ArtifactDir = logDir stdoutFile, err := os.Create(e.StdoutPath) if err != nil { return fmt.Errorf("creating stdout log: %w", err) } defer stdoutFile.Close() stderrFile, err := os.Create(e.StderrPath) if err != nil { return fmt.Errorf("creating stderr log: %w", err) } defer stderrFile.Close() // 4. Run container // TODO: Support Resume/BLOCKED by re-attaching to preserved workspace. // Write API keys to a temporary env file to avoid exposure in 'ps' or 'docker inspect' envFile := filepath.Join(workspace, ".claudomator-env") envContent := fmt.Sprintf("ANTHROPIC_API_KEY=%s\nGOOGLE_API_KEY=%s\n", os.Getenv("ANTHROPIC_API_KEY"), os.Getenv("GOOGLE_API_KEY")) if err := os.WriteFile(envFile, []byte(envContent), 0600); err != nil { return fmt.Errorf("writing env file: %w", err) } // Inject custom instructions via file to avoid CLI length limits instructionsFile := filepath.Join(workspace, ".claudomator-instructions.txt") if err := os.WriteFile(instructionsFile, []byte(t.Agent.Instructions), 0600); err != nil { return fmt.Errorf("writing instructions: %w", err) } args := r.buildDockerArgs(workspace, e.TaskID) innerCmd := r.buildInnerCmd(t, e.ID, isResume) image = t.Agent.ContainerImage if image == "" { image = r.Image } if image == "" { image = "claudomator-agent:latest" } fullArgs := append(args, image) fullArgs = append(fullArgs, innerCmd...) r.Logger.Info("starting container", "image", image, "taskID", t.ID) cmd := exec.CommandContext(ctx, "docker", fullArgs...) cmd.Stderr = stderrFile cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} // Use os.Pipe for stdout so we can parse it in real-time var stdoutR, stdoutW *os.File stdoutR, stdoutW, err = os.Pipe() if err != nil { return fmt.Errorf("creating stdout pipe: %w", err) } cmd.Stdout = stdoutW if err := cmd.Start(); err != nil { stdoutW.Close() stdoutR.Close() return fmt.Errorf("starting container: %w", err) } stdoutW.Close() // Stream stdout to the log file and parse cost/errors. var costUSD float64 var streamErr error var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() costUSD, streamErr = parseStream(stdoutR, stdoutFile, r.Logger) stdoutR.Close() }() waitErr := cmd.Wait() wg.Wait() e.CostUSD = costUSD // Check whether the agent left a question before exiting. questionFile := filepath.Join(logDir, "question.json") if data, readErr := os.ReadFile(questionFile); readErr == nil { os.Remove(questionFile) // consumed questionJSON := strings.TrimSpace(string(data)) if isCompletionReport(questionJSON) { r.Logger.Info("treating question file as completion report", "taskID", e.TaskID) e.Summary = extractQuestionText(questionJSON) } else { isBlocked = true success = true // We consider BLOCKED as a "success" for workspace preservation return &BlockedError{ QuestionJSON: questionJSON, SessionID: e.ID, // For container runner, we use exec ID as session ID SandboxDir: workspace, } } } // Read agent summary if written. summaryFile := filepath.Join(logDir, "summary.txt") if summaryData, readErr := os.ReadFile(summaryFile); readErr == nil { os.Remove(summaryFile) // consumed e.Summary = strings.TrimSpace(string(summaryData)) } // 5. Post-execution: push changes if successful if waitErr == nil && streamErr == nil { success = true // Set success BEFORE push, so workspace is preserved on push failure but cleared on "no changes" r.Logger.Info("pushing changes back to remote", "url", repoURL) // We assume the sandbox has committed changes (the agent image should enforce this) if out, err := exec.CommandContext(ctx, "git", "-C", workspace, "push", "origin", "HEAD").CombinedOutput(); err != nil { r.Logger.Warn("git push failed or no changes", "error", err, "output", string(out)) } } if waitErr != nil { return fmt.Errorf("container execution failed: %w", waitErr) } if streamErr != nil { return fmt.Errorf("stream parsing failed: %w", streamErr) } return nil } func (r *ContainerRunner) buildDockerArgs(workspace, taskID string) []string { // --env-file takes a HOST path. hostEnvFile := filepath.Join(workspace, ".claudomator-env") return []string{ "run", "--rm", "-v", workspace + ":/workspace", "-w", "/workspace", "--env-file", hostEnvFile, "-e", "CLAUDOMATOR_API_URL=" + r.APIURL, "-e", "CLAUDOMATOR_TASK_ID=" + taskID, "-e", "CLAUDOMATOR_DROP_DIR=" + r.DropsDir, } } func (r *ContainerRunner) buildInnerCmd(t *task.Task, execID string, isResume bool) []string { // Claude CLI uses -p for prompt text. To pass a file, we use a shell to cat it. promptCmd := "cat /workspace/.claudomator-instructions.txt" if t.Agent.Type == "gemini" { return []string{"sh", "-c", "gemini -p \"$(" + promptCmd + ")\""} } // Claude claudeArgs := []string{"claude", "-p", "\"$(" + promptCmd + ")\""} if isResume { claudeArgs = append(claudeArgs, "--resume", execID) } claudeArgs = append(claudeArgs, "--output-format", "stream-json", "--verbose", "--permission-mode", "bypassPermissions") return []string{"sh", "-c", strings.Join(claudeArgs, " ")} } func (r *ContainerRunner) fallbackGitInit(repoURL, workspace string) error { // Ensure directory exists if err := os.MkdirAll(workspace, 0755); err != nil { return err } // If it's a local directory but not a repo, init it. cmds := [][]string{ gitSafe("-C", workspace, "init"), gitSafe("-C", workspace, "add", "-A"), gitSafe("-C", workspace, "commit", "--allow-empty", "-m", "chore: initial commit"), } // If it was a local path, maybe we should have copied it? // git clone handle local paths fine if they are repos. // This fallback is only if it's NOT a repo. for _, args := range cmds { if out, err := exec.Command("git", args...).CombinedOutput(); err != nil { return fmt.Errorf("git init failed: %w\n%s", err, out) } } return nil }