summaryrefslogtreecommitdiff
path: root/internal/executor/summary_synth_test.go
diff options
context:
space:
mode:
authorPeter Stone <thepeterstone@gmail.com>2026-05-01 22:14:37 -1000
committerGitHub <noreply@github.com>2026-05-01 22:14:37 -1000
commit99115d8158137083239c45e5a860b718ff4cefa1 (patch)
tree1bf3bd0505eea79375c67af83c7c5fe8c0f274ff /internal/executor/summary_synth_test.go
parentc2aa026f6ce1c9e216b99d74f294fc133d5fcddd (diff)
parent50f8fe8c1ff8b82e0bd399e5776e58bda3e57d1c (diff)
Merge pull request #1 from thepeterstone/claude/local-oss-model-agents-MEBqj
Local OSS models as a third runner (epic)
Diffstat (limited to 'internal/executor/summary_synth_test.go')
-rw-r--r--internal/executor/summary_synth_test.go241
1 files changed, 241 insertions, 0 deletions
diff --git a/internal/executor/summary_synth_test.go b/internal/executor/summary_synth_test.go
new file mode 100644
index 0000000..7ad396d
--- /dev/null
+++ b/internal/executor/summary_synth_test.go
@@ -0,0 +1,241 @@
+package executor
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync/atomic"
+ "testing"
+
+ "github.com/thepeterstone/claudomator/internal/llm"
+ "github.com/thepeterstone/claudomator/internal/storage"
+)
+
+func writeStreamLog(t *testing.T, lines []string) string {
+ t.Helper()
+ dir := t.TempDir()
+ path := filepath.Join(dir, "stdout.log")
+ var sb strings.Builder
+ for _, l := range lines {
+ sb.WriteString(l)
+ sb.WriteString("\n")
+ }
+ if err := os.WriteFile(path, []byte(sb.String()), 0600); err != nil {
+ t.Fatal(err)
+ }
+ return path
+}
+
+func TestSynthesizeSummary_NilClient(t *testing.T) {
+ got := synthesizeSummary(context.Background(), nil, "/some/path")
+ if got != "" {
+ t.Errorf("nil client: want empty, got %q", got)
+ }
+}
+
+func TestSynthesizeSummary_EmptyPath(t *testing.T) {
+ c := &llm.Client{Endpoint: "http://unused", Model: "x"}
+ got := synthesizeSummary(context.Background(), c, "")
+ if got != "" {
+ t.Errorf("empty path: want empty, got %q", got)
+ }
+}
+
+func TestSynthesizeSummary_MissingFile(t *testing.T) {
+ c := &llm.Client{Endpoint: "http://unused", Model: "x"}
+ got := synthesizeSummary(context.Background(), c, "/nonexistent/file.log")
+ if got != "" {
+ t.Errorf("missing file: want empty, got %q", got)
+ }
+}
+
+func TestSynthesizeSummary_EmptyAssistantContent(t *testing.T) {
+ // Log contains only system/result events — no assistant text. The function
+ // should short-circuit without calling the LLM.
+ path := writeStreamLog(t, []string{
+ `{"type":"system","subtype":"init"}`,
+ `{"type":"result","subtype":"success","total_cost_usd":0}`,
+ })
+
+ var calls int32
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ atomic.AddInt32(&calls, 1)
+ w.Header().Set("Content-Type", "application/json")
+ fmt.Fprintln(w, `{"choices":[{"message":{"content":"should not be returned"},"finish_reason":"stop"}],"usage":{}}`)
+ }))
+ defer srv.Close()
+ c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "x"}
+
+ got := synthesizeSummary(context.Background(), c, path)
+ if got != "" {
+ t.Errorf("empty content: want empty, got %q", got)
+ }
+ if atomic.LoadInt32(&calls) != 0 {
+ t.Errorf("LLM should not be called for empty assistant content")
+ }
+}
+
+func TestSynthesizeSummary_LLMSuccess(t *testing.T) {
+ path := writeStreamLog(t, []string{
+ `{"type":"assistant","message":{"content":[{"type":"text","text":"Ran the tests."}]}}`,
+ `{"type":"assistant","message":{"content":[{"type":"text","text":"Fixed the import."}]}}`,
+ `{"type":"result","subtype":"success"}`,
+ })
+
+ var capturedUser string
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ var body struct {
+ Messages []struct {
+ Role, Content string
+ } `json:"messages"`
+ }
+ json.NewDecoder(r.Body).Decode(&body)
+ for _, m := range body.Messages {
+ if m.Role == "user" {
+ capturedUser = m.Content
+ }
+ }
+ w.Header().Set("Content-Type", "application/json")
+ fmt.Fprintln(w, `{"choices":[{"message":{"content":" Agent ran tests and fixed an import. "},"finish_reason":"stop"}],"usage":{}}`)
+ }))
+ defer srv.Close()
+ c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "x"}
+
+ got := synthesizeSummary(context.Background(), c, path)
+ if got != "Agent ran tests and fixed an import." {
+ t.Errorf("summary: got %q", got)
+ }
+ if !strings.Contains(capturedUser, "Ran the tests.") {
+ t.Errorf("user prompt missing first assistant text; got: %s", capturedUser)
+ }
+ if !strings.Contains(capturedUser, "Fixed the import.") {
+ t.Errorf("user prompt missing second assistant text; got: %s", capturedUser)
+ }
+}
+
+// TestPool_HandleRunResult_LLMSummaryFallback verifies the Pool falls back to
+// LLM-synthesized summary when extractSummary returns empty.
+func TestPool_HandleRunResult_LLMSummaryFallback(t *testing.T) {
+ // stdout has assistant text but no "## Summary" heading.
+ stdoutPath := writeStreamLog(t, []string{
+ `{"type":"assistant","message":{"content":[{"type":"text","text":"Did the work without writing a summary section."}]}}`,
+ })
+
+ llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ fmt.Fprintln(w, `{"choices":[{"message":{"content":"Synthesized summary."},"finish_reason":"stop"}],"usage":{}}`)
+ }))
+ defer llmSrv.Close()
+
+ store := newMinimalMockStore()
+ pool := newPoolWithMockStore(store)
+ pool.LLM = &llm.Client{Endpoint: llmSrv.URL + "/v1", Model: "x"}
+
+ tk := makeTask("synth-summary")
+ store.tasks[tk.ID] = tk
+ exec := &storage.Execution{ID: "e-synth", TaskID: tk.ID, Status: "RUNNING", StdoutPath: stdoutPath}
+
+ pool.handleRunResult(context.Background(), tk, exec, nil, "claude")
+
+ id, summary, ok := store.lastSummaryUpdate()
+ if !ok {
+ t.Fatalf("expected UpdateTaskSummary to be called")
+ }
+ if id != tk.ID {
+ t.Errorf("summary recorded for wrong task: %q", id)
+ }
+ if summary != "Synthesized summary." {
+ t.Errorf("summary: got %q", summary)
+ }
+
+ // Drain the result channel so the test exits cleanly.
+ <-pool.resultCh
+}
+
+// TestPool_HandleRunResult_ExtractSummaryWins verifies the LLM is NOT called
+// when the agent already wrote a "## Summary" section.
+func TestPool_HandleRunResult_ExtractSummaryWins(t *testing.T) {
+ stdoutPath := writeStreamLog(t, []string{
+ `{"type":"assistant","message":{"content":[{"type":"text","text":"## Summary\nAgent wrote its own summary."}]}}`,
+ })
+
+ var llmCalls int32
+ llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ atomic.AddInt32(&llmCalls, 1)
+ w.Header().Set("Content-Type", "application/json")
+ fmt.Fprintln(w, `{"choices":[{"message":{"content":"should not be used"},"finish_reason":"stop"}],"usage":{}}`)
+ }))
+ defer llmSrv.Close()
+
+ store := newMinimalMockStore()
+ pool := newPoolWithMockStore(store)
+ pool.LLM = &llm.Client{Endpoint: llmSrv.URL + "/v1", Model: "x"}
+
+ tk := makeTask("agent-summary")
+ store.tasks[tk.ID] = tk
+ exec := &storage.Execution{ID: "e-agent", TaskID: tk.ID, Status: "RUNNING", StdoutPath: stdoutPath}
+
+ pool.handleRunResult(context.Background(), tk, exec, nil, "claude")
+
+ if got := atomic.LoadInt32(&llmCalls); got != 0 {
+ t.Errorf("LLM should not be called when ## Summary is present; got %d calls", got)
+ }
+ _, summary, ok := store.lastSummaryUpdate()
+ if !ok {
+ t.Fatalf("expected UpdateTaskSummary")
+ }
+ if summary != "Agent wrote its own summary." {
+ t.Errorf("summary: got %q (want extractSummary output)", summary)
+ }
+ <-pool.resultCh
+}
+
+func TestSynthesizeSummary_LLMFailure_ReturnsEmpty(t *testing.T) {
+ path := writeStreamLog(t, []string{
+ `{"type":"assistant","message":{"content":[{"type":"text","text":"Did something."}]}}`,
+ })
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ http.Error(w, "boom", http.StatusInternalServerError)
+ }))
+ defer srv.Close()
+ c := &llm.Client{Endpoint: srv.URL + "/v1", Model: "x"}
+
+ got := synthesizeSummary(context.Background(), c, path)
+ if got != "" {
+ t.Errorf("LLM failure: want empty, got %q", got)
+ }
+}
+
+// TestReadAssistantTextTail_TailingLargeFile verifies the seek-to-tail
+// behavior drops early content but keeps later assistant text.
+func TestReadAssistantTextTail_TailingLargeFile(t *testing.T) {
+ dir := t.TempDir()
+ path := filepath.Join(dir, "stdout.log")
+ f, err := os.Create(path)
+ if err != nil {
+ t.Fatal(err)
+ }
+ // Write a ton of garbage assistant lines, then a final marker.
+ for i := 0; i < 500; i++ {
+ fmt.Fprintf(f, `{"type":"assistant","message":{"content":[{"type":"text","text":"filler line that should be in the early part of a large file %04d"}]}}`+"\n", i)
+ }
+ fmt.Fprintln(f, `{"type":"assistant","message":{"content":[{"type":"text","text":"FINAL_MARKER_LINE"}]}}`)
+ f.Close()
+
+ got := readAssistantTextTail(path, 4*1024) // 4 KB cap
+ if !strings.Contains(got, "FINAL_MARKER_LINE") {
+ t.Errorf("tail should contain final line; got: %s", got)
+ }
+ if strings.Contains(got, "filler line that should be in the early part of a large file 0000") {
+ end := 200
+ if len(got) < end {
+ end = len(got)
+ }
+ t.Errorf("tail should NOT contain very-early line; got first 200 chars: %s", got[:end])
+ }
+}