package migration import ( "context" "errors" "fmt" "io" "log/slog" "net/http" "strconv" "strings" "sync" "time" "github.com/ultisuite/ulti-backend/internal/observability" ) const ( defaultRateLimitMaxRetries = 6 defaultRateLimitBaseDelay = 2 * time.Second defaultRateLimitMaxDelay = 2 * time.Minute ) // RateLimitConfig controls exponential backoff for migration provider API calls. type RateLimitConfig struct { MaxRetries int BaseDelay time.Duration MaxDelay time.Duration } // RateLimitError is returned when a provider keeps responding with HTTP 429 // after all configured retries are exhausted. type RateLimitError struct { Cause error RetryAfter time.Duration } func (e *RateLimitError) Error() string { if e == nil || e.Cause == nil { return "migration api rate limited" } return e.Cause.Error() } func (e *RateLimitError) Unwrap() error { if e == nil { return nil } return e.Cause } // IsRateLimitError reports whether err is a terminal migration rate-limit error. func IsRateLimitError(err error) bool { var rl *RateLimitError return errors.As(err, &rl) } var ( rateLimitMu sync.RWMutex rateLimitConfig = RateLimitConfig{ MaxRetries: defaultRateLimitMaxRetries, BaseDelay: defaultRateLimitBaseDelay, MaxDelay: defaultRateLimitMaxDelay, } ) // ConfigureRateLimit sets package-wide retry settings for migration HTTP calls. func ConfigureRateLimit(cfg RateLimitConfig) { rateLimitMu.Lock() defer rateLimitMu.Unlock() if cfg.MaxRetries > 0 { rateLimitConfig.MaxRetries = cfg.MaxRetries } if cfg.BaseDelay > 0 { rateLimitConfig.BaseDelay = cfg.BaseDelay } if cfg.MaxDelay > 0 { rateLimitConfig.MaxDelay = cfg.MaxDelay } } func currentRateLimitConfig() RateLimitConfig { rateLimitMu.RLock() defer rateLimitMu.RUnlock() return rateLimitConfig } func parseRetryAfter(v string) time.Duration { v = strings.TrimSpace(v) if v == "" { return 0 } if secs, err := strconv.Atoi(v); err == nil && secs >= 0 { return time.Duration(secs) * time.Second } if t, err := http.ParseTime(v); err == nil { d := time.Until(t) if d < 0 { return 0 } return d } return 0 } func rateLimitDelay(attempt int, retryAfter time.Duration) time.Duration { cfg := currentRateLimitConfig() delay := cfg.BaseDelay for i := 1; i < attempt; i++ { if delay >= cfg.MaxDelay { delay = cfg.MaxDelay break } delay *= 2 } if retryAfter > delay { delay = retryAfter } if delay > cfg.MaxDelay { delay = cfg.MaxDelay } return delay } func migrationDo(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) { if client == nil { client = migrationHTTPClient() } cfg := currentRateLimitConfig() var lastErr error var lastRetryAfter time.Duration for attempt := 0; attempt <= cfg.MaxRetries; attempt++ { if attempt > 0 { observability.IncMigrationRateLimitRetry() delay := rateLimitDelay(attempt, lastRetryAfter) slog.Default().Warn( "migration api rate limited, backing off", "component", "migration-http", "attempt", attempt, "max_retries", cfg.MaxRetries, "delay", delay.String(), "method", req.Method, "host", req.URL.Host, "path", req.URL.Path, ) timer := time.NewTimer(delay) select { case <-ctx.Done(): timer.Stop() return nil, ctx.Err() case <-timer.C: } } cloned := req.Clone(ctx) resp, err := client.Do(cloned) if err != nil { return nil, err } if resp.StatusCode == http.StatusTooManyRequests { lastRetryAfter = parseRetryAfter(resp.Header.Get("Retry-After")) raw, _ := io.ReadAll(resp.Body) resp.Body.Close() lastErr = fmt.Errorf("api rate limited (429): %s", strings.TrimSpace(string(raw))) if attempt >= cfg.MaxRetries { return nil, &RateLimitError{Cause: lastErr, RetryAfter: lastRetryAfter} } continue } if resp.StatusCode >= 400 { raw, _ := io.ReadAll(resp.Body) resp.Body.Close() return nil, fmt.Errorf("api %s: %s", resp.Status, strings.TrimSpace(string(raw))) } return resp, nil } if lastErr == nil { lastErr = errors.New("migration api rate limited") } return nil, &RateLimitError{Cause: lastErr, RetryAfter: lastRetryAfter} }