8 files changed, 158 insertions, 24 deletions
diff --git a/internal/api/elaborate.go b/internal/api/elaborate.go
index 00f3297..e480e00 100644
--- a/internal/api/elaborate.go
+++ b/internal/api/elaborate.go
@@ -14,9 +14,9 @@ import (
 const elaborateTimeout = 30 * time.Second
 
 func buildElaboratePrompt(workDir string) string {
-	workDirLine := `    "working_dir":     string  — leave empty unless you have a specific reason to set it,`
+	workDirLine := `    "project_dir":     string  — leave empty unless you have a specific reason to set it,`
 	if workDir != "" {
-		workDirLine = fmt.Sprintf(`    "working_dir":     string  — use %q for tasks that operate on this codebase, empty string otherwise,`, workDir)
+		workDirLine = fmt.Sprintf(`    "project_dir":     string  — use %q for tasks that operate on this codebase, empty string otherwise,`, workDir)
 	}
 	return `You are a task configuration assistant for Claudomator, an AI task runner that executes tasks by running Claude as a subprocess.
 
@@ -53,7 +53,7 @@ type elaboratedTask struct {
 type elaboratedClaude struct {
 	Model        string   `json:"model"`
 	Instructions string   `json:"instructions"`
-	WorkingDir   string   `json:"working_dir"`
+	ProjectDir   string   `json:"project_dir"`
 	MaxBudgetUSD float64  `json:"max_budget_usd"`
 	AllowedTools []string `json:"allowed_tools"`
 }
@@ -87,7 +87,7 @@ func (s *Server) claudeBinaryPath() string {
 func (s *Server) handleElaborateTask(w http.ResponseWriter, r *http.Request) {
 	var input struct {
 		Prompt     string `json:"prompt"`
-		WorkingDir string `json:"working_dir"`
+		ProjectDir string `json:"project_dir"`
 	}
 	if err := json.NewDecoder(r.Body).Decode(&input); err != nil {
 		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid JSON: " + err.Error()})
@@ -99,8 +99,8 @@ func (s *Server) handleElaborateTask(w http.ResponseWriter, r *http.Request) {
 	}
 
 	workDir := s.workDir
-	if input.WorkingDir != "" {
-		workDir = input.WorkingDir
+	if input.ProjectDir != "" {
+		workDir = input.ProjectDir
 	}
 
 	ctx, cancel := context.WithTimeout(r.Context(), elaborateTimeout)
diff --git a/internal/api/elaborate_test.go b/internal/api/elaborate_test.go
index 52f7fdf..09f7fbe 100644
--- a/internal/api/elaborate_test.go
+++ b/internal/api/elaborate_test.go
@@ -56,7 +56,7 @@ func TestElaborateTask_Success(t *testing.T) {
 		Claude: elaboratedClaude{
 			Model:        "sonnet",
 			Instructions: "Run go test -race ./... and report results.",
-			WorkingDir:   "",
+			ProjectDir:   "",
 			MaxBudgetUSD: 0.5,
 			AllowedTools: []string{"Bash"},
 		},
diff --git a/internal/api/validate.go b/internal/api/validate.go
index d8ebde9..4b691a9 100644
--- a/internal/api/validate.go
+++ b/internal/api/validate.go
@@ -56,7 +56,7 @@ func (s *Server) handleValidateTask(w http.ResponseWriter, r *http.Request) {
 		Name  string `json:"name"`
 		Claude struct {
 			Instructions string   `json:"instructions"`
-			WorkingDir   string   `json:"working_dir"`
+			ProjectDir   string   `json:"project_dir"`
 			AllowedTools []string `json:"allowed_tools"`
 		} `json:"claude"`
 	}
@@ -74,8 +74,8 @@ func (s *Server) handleValidateTask(w http.ResponseWriter, r *http.Request) {
 	}
 
 	userMsg := fmt.Sprintf("Task name: %s\n\nInstructions:\n%s", input.Name, input.Claude.Instructions)
-	if input.Claude.WorkingDir != "" {
-		userMsg += fmt.Sprintf("\n\nWorking directory: %s", input.Claude.WorkingDir)
+	if input.Claude.ProjectDir != "" {
+		userMsg += fmt.Sprintf("\n\nWorking directory: %s", input.Claude.ProjectDir)
 	}
 	if len(input.Claude.AllowedTools) > 0 {
 		userMsg += fmt.Sprintf("\n\nAllowed tools: %v", input.Claude.AllowedTools)
diff --git a/internal/cli/create.go b/internal/cli/create.go
index fdad932..addd034 100644
--- a/internal/cli/create.go
+++ b/internal/cli/create.go
@@ -4,7 +4,7 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
-	"net/http"
+	"io"
 
 	"github.com/spf13/cobra"
 )
@@ -52,7 +52,7 @@ func createTask(serverURL, name, instructions, workingDir, model, parentID strin
 		"priority": priority,
 		"claude": map[string]interface{}{
 			"instructions":   instructions,
-			"working_dir":    workingDir,
+			"project_dir":    workingDir,
 			"model":          model,
 			"max_budget_usd": budget,
 		},
@@ -62,20 +62,26 @@ func createTask(serverURL, name, instructions, workingDir, model, parentID strin
 	}
 
 	data, _ := json.Marshal(body)
-	resp, err := http.Post(serverURL+"/api/tasks", "application/json", bytes.NewReader(data)) //nolint:noctx
+	resp, err := httpClient.Post(serverURL+"/api/tasks", "application/json", bytes.NewReader(data)) //nolint:noctx
 	if err != nil {
 		return fmt.Errorf("POST /api/tasks: %w", err)
 	}
 	defer resp.Body.Close()
 
+	raw, _ := io.ReadAll(resp.Body)
 	var result map[string]interface{}
-	_ = json.NewDecoder(resp.Body).Decode(&result)
+	if err := json.Unmarshal(raw, &result); err != nil {
+		return fmt.Errorf("server returned invalid JSON (status %d): %s", resp.StatusCode, string(raw))
+	}
 
 	if resp.StatusCode >= 300 {
 		return fmt.Errorf("server returned %d: %v", resp.StatusCode, result["error"])
 	}
 
 	id, _ := result["id"].(string)
+	if id == "" {
+		return fmt.Errorf("server returned task without id field")
+	}
 	fmt.Printf("Created task %s\n", id)
 
 	if autoStart {
diff --git a/internal/executor/claude.go b/internal/executor/claude.go
index c04a747..aa715da 100644
--- a/internal/executor/claude.go
+++ b/internal/executor/claude.go
@@ -55,10 +55,18 @@ func (r *ClaudeRunner) binaryPath() string {
 // Run executes a claude -p invocation, streaming output to log files.
 // It retries up to 3 times on rate-limit errors using exponential backoff.
 // If the agent writes a question file and exits, Run returns *BlockedError.
+//
+// When project_dir is set and this is not a resume execution, Run clones the
+// project into a temp sandbox, runs the agent there, then merges committed
+// changes back to project_dir. On failure the sandbox is preserved and its
+// path is included in the error.
 func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Execution) error {
-	if t.Claude.WorkingDir != "" {
-		if _, err := os.Stat(t.Claude.WorkingDir); err != nil {
-			return fmt.Errorf("working_dir %q: %w", t.Claude.WorkingDir, err)
+	projectDir := t.Claude.ProjectDir
+
+	// Validate project_dir exists when set.
+	if projectDir != "" {
+		if _, err := os.Stat(projectDir); err != nil {
+			return fmt.Errorf("project_dir %q: %w", projectDir, err)
 		}
 	}
 
@@ -82,6 +90,20 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
 		e.SessionID = e.ID // reuse execution UUID as session UUID (both are UUIDs)
 	}
 
+	// For new (non-resume) executions with a project_dir, clone into a sandbox.
+	// Resume executions run directly in project_dir to pick up the previous session.
+	var sandboxDir string
+	effectiveWorkingDir := projectDir
+	if projectDir != "" && e.ResumeSessionID == "" {
+		var err error
+		sandboxDir, err = setupSandbox(projectDir)
+		if err != nil {
+			return fmt.Errorf("setting up sandbox: %w", err)
+		}
+		effectiveWorkingDir = sandboxDir
+		r.Logger.Info("sandbox created", "sandbox", sandboxDir, "project_dir", projectDir)
+	}
+
 	questionFile := filepath.Join(logDir, "question.json")
 	args := r.buildArgs(t, e, questionFile)
 
@@ -95,9 +117,12 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
 			)
 		}
 		attempt++
-		return r.execOnce(ctx, args, t.Claude.WorkingDir, e)
+		return r.execOnce(ctx, args, effectiveWorkingDir, e)
 	})
 	if err != nil {
+		if sandboxDir != "" {
+			return fmt.Errorf("%w (sandbox preserved at %s)", err, sandboxDir)
+		}
 		return err
 	}
 
@@ -105,8 +130,89 @@ func (r *ClaudeRunner) Run(ctx context.Context, t *task.Task, e *storage.Executi
 	data, readErr := os.ReadFile(questionFile)
 	if readErr == nil {
 		os.Remove(questionFile) // consumed
+		// Preserve sandbox on BLOCKED — agent may have partial work.
 		return &BlockedError{QuestionJSON: strings.TrimSpace(string(data)), SessionID: e.SessionID}
 	}
+
+	// Merge sandbox back to project_dir and clean up.
+	if sandboxDir != "" {
+		if mergeErr := teardownSandbox(projectDir, sandboxDir, r.Logger); mergeErr != nil {
+			return fmt.Errorf("sandbox teardown: %w (sandbox preserved at %s)", mergeErr, sandboxDir)
+		}
+	}
+	return nil
+}
+
+// setupSandbox prepares a temporary git clone of projectDir.
+// If projectDir is not a git repo it is initialised with an initial commit first.
+func setupSandbox(projectDir string) (string, error) {
+	// Ensure projectDir is a git repo; initialise if not.
+	check := exec.Command("git", "-C", projectDir, "rev-parse", "--git-dir")
+	if err := check.Run(); err != nil {
+		// Not a git repo — init and commit everything.
+		cmds := [][]string{
+			{"git", "-C", projectDir, "init"},
+			{"git", "-C", projectDir, "add", "-A"},
+			{"git", "-C", projectDir, "commit", "--allow-empty", "-m", "chore: initial commit"},
+		}
+		for _, args := range cmds {
+			if out, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil { //nolint:gosec
+				return "", fmt.Errorf("git init %s: %w\n%s", projectDir, err, out)
+			}
+		}
+	}
+
+	tempDir, err := os.MkdirTemp("", "claudomator-sandbox-*")
+	if err != nil {
+		return "", fmt.Errorf("creating sandbox dir: %w", err)
+	}
+
+	// Clone into the pre-created dir (git clone requires the target to not exist,
+	// so remove it first and let git recreate it).
+	if err := os.Remove(tempDir); err != nil {
+		return "", fmt.Errorf("removing temp dir placeholder: %w", err)
+	}
+	out, err := exec.Command("git", "clone", "--local", projectDir, tempDir).CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("git clone: %w\n%s", err, out)
+	}
+	return tempDir, nil
+}
+
+// teardownSandbox verifies the sandbox is clean, merges commits back to
+// projectDir via fast-forward, then removes the sandbox.
+func teardownSandbox(projectDir, sandboxDir string, logger *slog.Logger) error {
+	// Fail if agent left uncommitted changes.
+	out, err := exec.Command("git", "-C", sandboxDir, "status", "--porcelain").Output()
+	if err != nil {
+		return fmt.Errorf("git status: %w", err)
+	}
+	if len(strings.TrimSpace(string(out))) > 0 {
+		return fmt.Errorf("uncommitted changes in sandbox (agent must commit all work):\n%s", out)
+	}
+
+	// Check whether there are any new commits to merge.
+	ahead, err := exec.Command("git", "-C", sandboxDir, "rev-list", "--count", "origin/HEAD..HEAD").Output()
+	if err != nil {
+		// No origin/HEAD (e.g. fresh init with no prior commits) — proceed anyway.
+		logger.Warn("could not determine commits ahead of origin; proceeding with merge", "err", err)
+	}
+	if strings.TrimSpace(string(ahead)) == "0" {
+		// Nothing to merge — clean up and return.
+		os.RemoveAll(sandboxDir)
+		return nil
+	}
+
+	// Fetch new commits from sandbox into project_dir and fast-forward merge.
+	if out, err := exec.Command("git", "-C", projectDir, "fetch", sandboxDir, "HEAD").CombinedOutput(); err != nil {
+		return fmt.Errorf("git fetch from sandbox: %w\n%s", err, out)
+	}
+	if out, err := exec.Command("git", "-C", projectDir, "merge", "--ff-only", "FETCH_HEAD").CombinedOutput(); err != nil {
+		return fmt.Errorf("git merge --ff-only FETCH_HEAD: %w\n%s", err, out)
+	}
+
+	logger.Info("sandbox merged and cleaned up", "sandbox", sandboxDir, "project_dir", projectDir)
+	os.RemoveAll(sandboxDir)
 	return nil
 }
 
diff --git a/internal/executor/claude_test.go b/internal/executor/claude_test.go
index 056c7e8..31dcf52 100644
--- a/internal/executor/claude_test.go
+++ b/internal/executor/claude_test.go
@@ -224,7 +224,7 @@ func TestClaudeRunner_Run_InaccessibleWorkingDir_ReturnsError(t *testing.T) {
 	}
 	tk := &task.Task{
 		Claude: task.ClaudeConfig{
-			WorkingDir:   "/nonexistent/path/does/not/exist",
+			ProjectDir:   "/nonexistent/path/does/not/exist",
 			SkipPlanning: true,
 		},
 	}
@@ -235,8 +235,8 @@ func TestClaudeRunner_Run_InaccessibleWorkingDir_ReturnsError(t *testing.T) {
 	if err == nil {
 		t.Fatal("expected error for inaccessible working_dir, got nil")
 	}
-	if !strings.Contains(err.Error(), "working_dir") {
-		t.Errorf("expected 'working_dir' in error, got: %v", err)
+	if !strings.Contains(err.Error(), "project_dir") {
+		t.Errorf("expected 'project_dir' in error, got: %v", err)
 	}
 }
 
diff --git a/internal/task/task.go b/internal/task/task.go
index f6635cc..498c364 100644
--- a/internal/task/task.go
+++ b/internal/task/task.go
@@ -1,6 +1,9 @@
 package task
 
-import "time"
+import (
+	"encoding/json"
+	"time"
+)
 
 type State string
 
@@ -29,7 +32,7 @@ type ClaudeConfig struct {
 	Model              string   `yaml:"model"               json:"model"`
 	ContextFiles       []string `yaml:"context_files"       json:"context_files"`
 	Instructions       string   `yaml:"instructions"        json:"instructions"`
-	WorkingDir         string   `yaml:"working_dir"         json:"working_dir"`
+	ProjectDir         string   `yaml:"project_dir"         json:"project_dir"`
 	MaxBudgetUSD       float64  `yaml:"max_budget_usd"      json:"max_budget_usd"`
 	PermissionMode     string   `yaml:"permission_mode"     json:"permission_mode"`
 	AllowedTools       []string `yaml:"allowed_tools"       json:"allowed_tools"`
@@ -39,6 +42,25 @@ type ClaudeConfig struct {
 	SkipPlanning       bool     `yaml:"skip_planning"       json:"skip_planning"`
 }
 
+// UnmarshalJSON reads project_dir with fallback to legacy working_dir.
+func (c *ClaudeConfig) UnmarshalJSON(data []byte) error {
+	type Alias ClaudeConfig
+	aux := &struct {
+		ProjectDir string `json:"project_dir"`
+		WorkingDir string `json:"working_dir"` // legacy
+		*Alias
+	}{Alias: (*Alias)(c)}
+	if err := json.Unmarshal(data, aux); err != nil {
+		return err
+	}
+	if aux.ProjectDir != "" {
+		c.ProjectDir = aux.ProjectDir
+	} else {
+		c.ProjectDir = aux.WorkingDir
+	}
+	return nil
+}
+
 type RetryConfig struct {
 	MaxAttempts int    `yaml:"max_attempts" json:"max_attempts"`
 	Backoff     string `yaml:"backoff"      json:"backoff"` // "linear", "exponential"
diff --git a/internal/task/validator_test.go b/internal/task/validator_test.go
index 967eed3..02bde45 100644
--- a/internal/task/validator_test.go
+++ b/internal/task/validator_test.go
@@ -11,7 +11,7 @@ func validTask() *Task {
 		Name: "Valid Task",
 		Claude: ClaudeConfig{
 			Instructions: "do something",
-			WorkingDir:   "/tmp",
+			ProjectDir:   "/tmp",
 		},
 		Priority: PriorityNormal,
 		Retry:    RetryConfig{MaxAttempts: 1, Backoff: "exponential"},