internal/executor/classifier.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

package executor

import (
	"context"
	"encoding/json"
	"fmt"
	"os/exec"
	"strings"

	"github.com/thepeterstone/claudomator/internal/llm"
)

type Classification struct {
	AgentType string `json:"agent_type"`
	Model     string `json:"model"`
	Reason    string `json:"reason"`
}

type SystemStatus struct {
	ActiveTasks map[string]int
	RateLimited map[string]bool
}

// Classifier picks a model for an incoming task. When LLM is non-nil the
// classifier routes through the local OpenAI-compatible client (cheap,
// private, fast). Otherwise it falls back to invoking the Gemini CLI
// at GeminiBinaryPath.
type Classifier struct {
	LLM              *llm.Client
	GeminiBinaryPath string
}

const classificationPrompt = `
You are a model selector for Claudomator.
The agent has already been chosen by the load balancer. Your ONLY job is to select the best model for that agent.

REQUIRED agent: %s

Available Models:
Claude:
- claude-sonnet-4-6 (default, balanced, best for most coding tasks)
- claude-opus-4-6 (most powerful, expensive, use for hardest tasks only)
- claude-haiku-4-5-20251001 (fast, cheap, use for simple tasks)

Gemini:
- gemini-2.5-flash-lite (fastest, most efficient, best for simple/trivial tasks)
- gemini-2.5-flash (fast, balanced)
- gemini-2.5-pro (most powerful, use for hardest tasks only)

Selection Criteria:
- Use powerful models (opus, pro) only for the hardest reasoning/coding tasks.
- Use lite/haiku for simple, short, or low-stakes tasks.
- Default to the balanced model (sonnet, flash) for everything else.

Task:
Name: %s
Instructions: %s

Respond with ONLY a JSON object:
{
  "agent_type": "%s",
  "model": "model-name",
  "reason": "brief reason"
}
`

func (c *Classifier) Classify(ctx context.Context, taskName, instructions string, _ SystemStatus, agentType string) (*Classification, error) {
	prompt := fmt.Sprintf(classificationPrompt,
		agentType, taskName, instructions, agentType,
	)

	if c.LLM != nil {
		return c.classifyViaLLM(ctx, prompt, agentType)
	}

	binary := c.GeminiBinaryPath
	if binary == "" {
		binary = "gemini"
	}

	// Use a minimal model for classification to be fast and cheap.
	args := []string{
		"--prompt", prompt,
		"--model", "gemini-2.5-flash-lite",
		"--output-format", "json",
	}

	cmd := exec.CommandContext(ctx, binary, args...)
	out, err := cmd.Output()
	if err != nil {
		if exitErr, ok := err.(*exec.ExitError); ok {
			return nil, fmt.Errorf("classifier failed (%v): %s", err, string(exitErr.Stderr))
		}
		return nil, fmt.Errorf("classifier failed: %w", err)
	}

	// 1. Parse the JSON envelope from the gemini CLI.
	var cliOut struct {
		Response string `json:"response"`
	}
	if err := json.Unmarshal(out, &cliOut); err != nil {
		// If it's not JSON, it might be raw text (though we requested JSON).
		// This can happen if the CLI prints "Loaded cached credentials" or other info.
		cliOut.Response = string(out)
	}

	// 2. Extract the model response from the "response" field if present.
	// If it was already raw text, cliOut.Response will have it.
	cleanOut := strings.TrimSpace(cliOut.Response)

	// 3. Clean up "Loaded cached credentials" or other noise that might be in the string
	// if we fell back to string(out).
	if strings.Contains(cleanOut, "Loaded cached credentials.") {
		lines := strings.Split(cleanOut, "\n")
		var modelLines []string
		for _, line := range lines {
			if !strings.Contains(line, "Loaded cached credentials.") {
				modelLines = append(modelLines, line)
			}
		}
		cleanOut = strings.TrimSpace(strings.Join(modelLines, "\n"))
	}

	// 4. Gemini might wrap the JSON in markdown code blocks.
	cleanOut = strings.TrimPrefix(cleanOut, "```json")
	cleanOut = strings.TrimPrefix(cleanOut, "```") // fallback
	cleanOut = strings.TrimSuffix(cleanOut, "```")
	cleanOut = strings.TrimSpace(cleanOut)

	var cls Classification
	if err := json.Unmarshal([]byte(cleanOut), &cls); err != nil {
		return nil, fmt.Errorf("failed to parse classification JSON: %w\nOriginal Output: %s\nCleaned Output: %s", err, string(out), cleanOut)
	}

	return &cls, nil
}

// classifyViaLLM routes classification through the local OpenAI-compatible
// client with response_format=json_object, so we get clean JSON without the
// markdown-fence cleanup needed for the Gemini CLI fallback.
func (c *Classifier) classifyViaLLM(ctx context.Context, prompt, agentType string) (*Classification, error) {
	resp, err := c.LLM.Chat(ctx, llm.ChatRequest{
		Messages:     []llm.Message{{Role: "user", Content: prompt}},
		ResponseJSON: true,
	})
	if err != nil {
		return nil, fmt.Errorf("classifier (local llm): %w", err)
	}
	body := strings.TrimSpace(resp.Content)
	var cls Classification
	if err := json.Unmarshal([]byte(body), &cls); err != nil {
		return nil, fmt.Errorf("classifier (local llm): parse JSON: %w\nbody: %s", err, body)
	}
	if cls.AgentType == "" {
		cls.AgentType = agentType
	}
	return &cls, nil
}