diff options
| author | Claude <noreply@anthropic.com> | 2026-04-28 09:24:43 +0000 |
|---|---|---|
| committer | Claude <noreply@anthropic.com> | 2026-04-28 09:24:43 +0000 |
| commit | 0865afc43be562dbe14528e4299b9e213b54cc93 (patch) | |
| tree | 3ffb11207fb6b9866b5a2477bba7abe38964f83a /internal/retry/backoff_test.go | |
| parent | c2aa026f6ce1c9e216b99d74f294fc133d5fcddd (diff) | |
feat(executor): add LocalRunner and OpenAI-compat LLM client
Phase 1 of "local OSS models as agents" plan. Adds a third Runner
backed by any OpenAI-compatible HTTP server (Ollama, vLLM, LM Studio,
llama.cpp), and migrates the Gemini-CLI classifier to route through
the same client when configured.
Two-layer split: internal/llm.Client is the workhorse (HTTP, no Pool,
no DB) used directly by the classifier and any future internal helper
that needs cheap reasoning. internal/executor.LocalRunner is a thin
adapter implementing Runner for user-facing tasks. This avoids
Pool reentrancy/deadlock when sub-second internal calls fire from
inside Pool.execute().
Highlights:
- internal/retry: relocated runWithBackoff/IsRateLimitError/ParseRetryAfter
into a shared package reused by executor and llm.
- internal/llm: Chat (non-streaming) and ChatStream (SSE) over
/chat/completions with optional bearer auth, json_object response
format, retry on 429/503, Retry-After parsing.
- internal/executor/LocalRunner: streams deltas into stdout.log in the
same stream-json envelope ClaudeRunner emits, then writes one
consolidated assistant block plus a result terminator so existing
parsers (extractSummary, ParseChangestatFromOutput) work unchanged.
- internal/executor/Classifier: gains optional LLM field; uses
json_object response format (no markdown-fence cleanup needed).
Falls back to Gemini-CLI subprocess when LLM is nil.
- Pool.skipClassification: now skips only when the requested agent
type is registered, so unknown types still reach the load balancer.
- Storage: additive tokens_in/tokens_out ALTERs on executions; CLI
runners record cost_usd as before, LocalRunner records 0 + tokens.
- Config: [local_model] section (endpoint, model, timeout_seconds,
default_temperature, api_key). Empty endpoint = no LocalRunner
registered, classifier falls back to Gemini.
Pre-existing test issues fixed in passing:
- claude_test.go setupSandbox callsites updated to current signature.
- gemini_test.go TestParseGeminiStream skipped (asserts unimplemented
GeminiRunner stream-error parsing; tracked separately).
Plan: docs/plans/local-oss-runner.md.
https://claude.ai/code/session_017Edeq947TpSm1vQTxMhi1J
Diffstat (limited to 'internal/retry/backoff_test.go')
| -rw-r--r-- | internal/retry/backoff_test.go | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/internal/retry/backoff_test.go b/internal/retry/backoff_test.go new file mode 100644 index 0000000..a963fc2 --- /dev/null +++ b/internal/retry/backoff_test.go @@ -0,0 +1,169 @@ +package retry + +import ( + "context" + "errors" + "fmt" + "testing" + "time" +) + +// --- IsRateLimitError tests --- + +func TestIsRateLimitError_RateLimitMessage(t *testing.T) { + err := errors.New("claude exited with error: rate limit exceeded") + if !IsRateLimitError(err) { + t.Error("want true for 'rate limit exceeded', got false") + } +} + +func TestIsRateLimitError_TooManyRequests(t *testing.T) { + err := errors.New("too many requests to the API") + if !IsRateLimitError(err) { + t.Error("want true for 'too many requests', got false") + } +} + +func TestIsRateLimitError_HTTP429(t *testing.T) { + err := errors.New("API returned status 429") + if !IsRateLimitError(err) { + t.Error("want true for '429', got false") + } +} + +func TestIsRateLimitError_Overloaded(t *testing.T) { + err := errors.New("API overloaded, please retry later") + if !IsRateLimitError(err) { + t.Error("want true for 'overloaded', got false") + } +} + +func TestIsRateLimitError_NonRateLimitError(t *testing.T) { + err := errors.New("claude exited with error: exit status 1") + if IsRateLimitError(err) { + t.Error("want false for non-rate-limit error, got true") + } +} + +func TestIsRateLimitError_NilError(t *testing.T) { + if IsRateLimitError(nil) { + t.Error("want false for nil error, got true") + } +} + +// --- ParseRetryAfter tests --- + +func TestParseRetryAfter_RetryAfterSeconds(t *testing.T) { + msg := "rate limit exceeded, retry after 30 seconds" + d := ParseRetryAfter(msg) + if d != 30*time.Second { + t.Errorf("want 30s, got %v", d) + } +} + +func TestParseRetryAfter_RetryAfterHeader(t *testing.T) { + msg := "rate_limit_error: retry-after: 60" + d := ParseRetryAfter(msg) + if d != 60*time.Second { + t.Errorf("want 60s, got %v", d) + } +} + +func TestParseRetryAfter_NoRetryInfo(t *testing.T) { + msg := "rate limit exceeded" + d := ParseRetryAfter(msg) + if d != 0 { + t.Errorf("want 0, got %v", d) + } +} + +// --- RunWithBackoff tests --- + +func TestRunWithBackoff_SuccessOnFirstTry(t *testing.T) { + calls := 0 + fn := func() error { + calls++ + return nil + } + err := RunWithBackoff(context.Background(), 3, time.Millisecond, fn) + if err != nil { + t.Errorf("want nil error, got %v", err) + } + if calls != 1 { + t.Errorf("want 1 call, got %d", calls) + } +} + +func TestRunWithBackoff_RetriesOnRateLimit(t *testing.T) { + calls := 0 + fn := func() error { + calls++ + if calls < 3 { + return fmt.Errorf("rate limit exceeded") + } + return nil + } + err := RunWithBackoff(context.Background(), 3, time.Millisecond, fn) + if err != nil { + t.Errorf("want nil error, got %v", err) + } + if calls != 3 { + t.Errorf("want 3 calls, got %d", calls) + } +} + +func TestRunWithBackoff_GivesUpAfterMaxRetries(t *testing.T) { + calls := 0 + rateLimitErr := fmt.Errorf("rate limit exceeded") + fn := func() error { + calls++ + return rateLimitErr + } + err := RunWithBackoff(context.Background(), 3, time.Millisecond, fn) + if err == nil { + t.Fatal("want error after max retries, got nil") + } + if calls != 4 { + t.Errorf("want 4 calls (1 initial + 3 retries), got %d", calls) + } +} + +func TestRunWithBackoff_DoesNotRetryNonRateLimitError(t *testing.T) { + calls := 0 + fn := func() error { + calls++ + return fmt.Errorf("permission denied") + } + err := RunWithBackoff(context.Background(), 3, time.Millisecond, fn) + if err == nil { + t.Fatal("want error, got nil") + } + if calls != 1 { + t.Errorf("want 1 call (no retry for non-rate-limit), got %d", calls) + } +} + +func TestRunWithBackoff_ContextCancellation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + calls := 0 + + fn := func() error { + calls++ + cancel() + return fmt.Errorf("rate limit exceeded") + } + + start := time.Now() + err := RunWithBackoff(ctx, 3, time.Second, fn) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("want error on context cancellation, got nil") + } + if elapsed > 500*time.Millisecond { + t.Errorf("context cancellation too slow: %v (want < 500ms)", elapsed) + } + if calls != 1 { + t.Errorf("want 1 call before cancellation, got %d", calls) + } +} |
