summaryrefslogtreecommitdiff
path: root/internal/executor/ratelimit.go
blob: 1f38a6db7b115c2b497c721413415483afa68473 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
package executor

import (
	"context"
	"fmt"
	"regexp"
	"strconv"
	"strings"
	"time"
)

var retryAfterRe = regexp.MustCompile(`(?i)retry[-_ ]after[:\s]+(\d+)`)

const maxBackoffDelay = 5 * time.Minute

// isRateLimitError returns true if err looks like a transient Claude API
// rate-limit that is worth retrying (e.g. per-minute/per-request throttle).
func isRateLimitError(err error) bool {
	if err == nil {
		return false
	}
	msg := strings.ToLower(err.Error())
	return strings.Contains(msg, "rate limit") ||
		strings.Contains(msg, "too many requests") ||
		strings.Contains(msg, "429") ||
		strings.Contains(msg, "overloaded")
}

// isQuotaExhausted returns true if err indicates the 5-hour usage quota is
// fully exhausted. Unlike transient rate limits, these should not be retried.
func isQuotaExhausted(err error) bool {
	if err == nil {
		return false
	}
	msg := strings.ToLower(err.Error())
	return strings.Contains(msg, "hit your limit") ||
		strings.Contains(msg, "you've hit your limit") ||
		strings.Contains(msg, "you have hit your limit") ||
		strings.Contains(msg, "rate limit reached (rejected)") ||
		strings.Contains(msg, "status: rejected")
}

// parseRetryAfter extracts a Retry-After duration from an error message.
// Returns 0 if no retry-after value is found.
func parseRetryAfter(msg string) time.Duration {
	m := retryAfterRe.FindStringSubmatch(msg)
	if m == nil {
		return 0
	}
	secs, err := strconv.Atoi(m[1])
	if err != nil || secs <= 0 {
		return 0
	}
	return time.Duration(secs) * time.Second
}

// runWithBackoff calls fn repeatedly on rate-limit errors, using exponential backoff.
// maxRetries is the max number of retry attempts (not counting the initial call).
// baseDelay is the initial backoff duration (doubled each retry).
func runWithBackoff(ctx context.Context, maxRetries int, baseDelay time.Duration, fn func() error) error {
	var lastErr error
	for attempt := 0; attempt <= maxRetries; attempt++ {
		lastErr = fn()
		if lastErr == nil {
			return nil
		}
		if !isRateLimitError(lastErr) {
			return lastErr
		}
		if attempt == maxRetries {
			break
		}

		// Compute exponential backoff delay.
		delay := baseDelay * (1 << attempt)
		if delay > maxBackoffDelay {
			delay = maxBackoffDelay
		}
		// Use Retry-After header value if present.
		if ra := parseRetryAfter(lastErr.Error()); ra > 0 {
			delay = ra
		}

		select {
		case <-ctx.Done():
			return fmt.Errorf("context cancelled during rate-limit backoff: %w", ctx.Err())
		case <-time.After(delay):
		}
	}
	return lastErr
}