From b48e5b8bd3fd870b85ee33123f160914129cdb18 Mon Sep 17 00:00:00 2001 From: Vincent Derks Date: Thu, 30 Apr 2026 16:41:37 +0200 Subject: [PATCH] vercel-flags-core: jitter ingest retries and batch-wait window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the deterministic 100/200ms retry schedule with full-jitter exponential backoff, jitter MAX_BATCH_WAIT_MS by ±20% to desynchronize concurrent flushes, and log when a flush exhausts all retries. Helpers extracted to utils/backoff.ts so they can be reused. Co-authored-by: Cursor --- .changeset/jitter-ingest-retries.md | 11 ++++ .../src/utils/backoff.test.ts | 64 +++++++++++++++++++ .../vercel-flags-core/src/utils/backoff.ts | 55 ++++++++++++++++ .../src/utils/usage-tracker.test.ts | 50 +++++++++++++++ .../src/utils/usage-tracker.ts | 21 +++++- 5 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 .changeset/jitter-ingest-retries.md create mode 100644 packages/vercel-flags-core/src/utils/backoff.test.ts create mode 100644 packages/vercel-flags-core/src/utils/backoff.ts diff --git a/.changeset/jitter-ingest-retries.md b/.changeset/jitter-ingest-retries.md new file mode 100644 index 00000000..ef5ae3b0 --- /dev/null +++ b/.changeset/jitter-ingest-retries.md @@ -0,0 +1,11 @@ +--- +'@vercel/flags-core': minor +--- + +Add jitter to ingest retries and the batch-flush window. + +The usage tracker now uses AWS-style "Full Jitter" exponential backoff between +retry attempts (replacing the previous deterministic 100/200ms schedule) and +randomizes the 5s batch-flush window by ±20% to desynchronize concurrent +processes. When all retry attempts are exhausted the SDK now logs a structured +warning so consumers can alert on dropped batches. diff --git a/packages/vercel-flags-core/src/utils/backoff.test.ts b/packages/vercel-flags-core/src/utils/backoff.test.ts new file mode 100644 index 00000000..a8aa8f4e --- /dev/null +++ b/packages/vercel-flags-core/src/utils/backoff.test.ts @@ -0,0 +1,64 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { getJitteredWaitMs, getRetryDelayMs } from './backoff'; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('getRetryDelayMs', () => { + it('uses Full Jitter exponential backoff', () => { + vi.spyOn(Math, 'random').mockReturnValue(0.5); + + // attempt n → floor(0.5 * baseMs * 2^(n-1)) given default baseMs=250 + expect(getRetryDelayMs(1)).toBe(125); // floor(0.5 * 250) + expect(getRetryDelayMs(2)).toBe(250); // floor(0.5 * 500) + expect(getRetryDelayMs(3)).toBe(500); // floor(0.5 * 1000) + expect(getRetryDelayMs(4)).toBe(1000); // floor(0.5 * 2000) + }); + + it('caps the exponential ceiling at capMs', () => { + vi.spyOn(Math, 'random').mockReturnValue(0.999); + + // attempt 10 with default cap=5000 → ceiling clamped to 5000 + expect(getRetryDelayMs(10)).toBeGreaterThanOrEqual(4990); + expect(getRetryDelayMs(10)).toBeLessThan(5000); + }); + + it('respects custom baseMs and capMs', () => { + vi.spyOn(Math, 'random').mockReturnValue(0.5); + + expect(getRetryDelayMs(1, { baseMs: 100 })).toBe(50); + expect(getRetryDelayMs(5, { baseMs: 100, capMs: 200 })).toBe(100); + }); + + it('treats attempt < 1 as the first attempt', () => { + vi.spyOn(Math, 'random').mockReturnValue(0.5); + + expect(getRetryDelayMs(0)).toBe(125); + expect(getRetryDelayMs(-1)).toBe(125); + }); +}); + +describe('getJitteredWaitMs', () => { + it('returns the lower bound when Math.random=0', () => { + vi.spyOn(Math, 'random').mockReturnValue(0); + expect(getJitteredWaitMs(5000, 0.2)).toBe(4000); + }); + + it('returns the mean when Math.random=0.5', () => { + vi.spyOn(Math, 'random').mockReturnValue(0.5); + expect(getJitteredWaitMs(5000, 0.2)).toBe(5000); + }); + + it('approaches but never reaches the upper bound', () => { + vi.spyOn(Math, 'random').mockReturnValue(0.9999); + const value = getJitteredWaitMs(5000, 0.2); + expect(value).toBeGreaterThanOrEqual(5999); + expect(value).toBeLessThan(6000); + }); + + it('returns baseMs when ratio is 0', () => { + vi.spyOn(Math, 'random').mockReturnValue(0.5); + expect(getJitteredWaitMs(5000, 0)).toBe(5000); + }); +}); diff --git a/packages/vercel-flags-core/src/utils/backoff.ts b/packages/vercel-flags-core/src/utils/backoff.ts new file mode 100644 index 00000000..3ab4adfc --- /dev/null +++ b/packages/vercel-flags-core/src/utils/backoff.ts @@ -0,0 +1,55 @@ +/** + * Backoff and jitter utilities for retry loops. + * + * The functions here are intentionally generic so they can be used wherever + * the SDK retries an operation. They use `Math.random` directly; mock it from + * tests if you need deterministic behaviour. + */ + +const DEFAULT_BASE_MS = 250; +const DEFAULT_CAP_MS = 5000; + +export interface RetryDelayOptions { + /** + * The base delay in milliseconds. With Full Jitter, the first failed attempt + * sleeps in `[0, baseMs)`, the second in `[0, baseMs * 2)`, etc. + * Defaults to 250ms. + */ + baseMs?: number; + /** + * Hard ceiling on the computed delay. Defaults to 5000ms. + */ + capMs?: number; +} + +/** + * Returns the sleep duration before the next retry attempt using AWS-style + * "Full Jitter" exponential backoff. + * + * @param attempt The 1-indexed attempt number that just failed. + * @see https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter + */ +export function getRetryDelayMs( + attempt: number, + options: RetryDelayOptions = {}, +): number { + const baseMs = options.baseMs ?? DEFAULT_BASE_MS; + const capMs = options.capMs ?? DEFAULT_CAP_MS; + + const ceiling = Math.min(capMs, baseMs * 2 ** Math.max(0, attempt - 1)); + return Math.floor(Math.random() * ceiling); +} + +/** + * Returns `baseMs` perturbed by `±ratio` of itself, drawn uniformly from + * `[baseMs * (1 - ratio), baseMs * (1 + ratio))`. Useful for desynchronizing + * fixed-interval timers across independent processes. + * + * @param baseMs The target mean wait, in milliseconds. + * @param ratio The fractional jitter to apply on each side, in `[0, 1)`. + */ +export function getJitteredWaitMs(baseMs: number, ratio: number): number { + const min = baseMs * (1 - ratio); + const span = baseMs * 2 * ratio; + return Math.floor(min + Math.random() * span); +} diff --git a/packages/vercel-flags-core/src/utils/usage-tracker.test.ts b/packages/vercel-flags-core/src/utils/usage-tracker.test.ts index cb7bd6ab..c2e5ace0 100644 --- a/packages/vercel-flags-core/src/utils/usage-tracker.test.ts +++ b/packages/vercel-flags-core/src/utils/usage-tracker.test.ts @@ -437,6 +437,56 @@ describe('UsageTracker', () => { // 2 failed + 1 success = 3 total expect(requestCount).toBe(3); }); + + it('should log a structured warning when all retries are exhausted', async () => { + vi.spyOn(Math, 'random').mockReturnValue(0); + const consoleSpy = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); + + fetchMock.mockResolvedValue(new Response('err', { status: 500 })); + + const tracker = createTracker(); + tracker.trackRead(); + await tracker.flush(); + + // All 3 attempts fail; SDK logs an extra "Dropped" line + expect(fetchMock).toHaveBeenCalledTimes(3); + const droppedLogs = consoleSpy.mock.calls.filter( + ([msg]) => + typeof msg === 'string' && msg.includes('Dropped 1 events after 3'), + ); + expect(droppedLogs).toHaveLength(1); + + consoleSpy.mockRestore(); + }); + + it('should not log the exhaustion warning when a retry eventually succeeds', async () => { + vi.spyOn(Math, 'random').mockReturnValue(0); + const consoleSpy = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); + + let requestCount = 0; + fetchMock.mockImplementation(async () => { + requestCount++; + if (requestCount < 3) { + return new Response('err', { status: 500 }); + } + return jsonResponse({ ok: true }); + }); + + const tracker = createTracker(); + tracker.trackRead(); + await tracker.flush(); + + const droppedLogs = consoleSpy.mock.calls.filter( + ([msg]) => typeof msg === 'string' && msg.includes('Dropped'), + ); + expect(droppedLogs).toHaveLength(0); + + consoleSpy.mockRestore(); + }); }); describe('batch size limit', () => { diff --git a/packages/vercel-flags-core/src/utils/usage-tracker.ts b/packages/vercel-flags-core/src/utils/usage-tracker.ts index 2be7411f..033b06b2 100644 --- a/packages/vercel-flags-core/src/utils/usage-tracker.ts +++ b/packages/vercel-flags-core/src/utils/usage-tracker.ts @@ -1,5 +1,6 @@ import { waitUntil } from '@vercel/functions'; import { version } from '../../package.json'; +import { getJitteredWaitMs, getRetryDelayMs } from './backoff'; const RESOLVED_VOID: Promise = Promise.resolve(); @@ -43,6 +44,12 @@ const MAX_RETRIES = 3; const MAX_BATCH_SIZE = 50; const MAX_BATCH_WAIT_MS = 5000; +/** + * Symmetric jitter applied to MAX_BATCH_WAIT_MS so that independent processes + * that started at the same wall-clock time do not flush in lockstep. + */ +const BATCH_WAIT_JITTER_RATIO = 0.2; + interface RequestContext { ctx: object | undefined; headers: Record | undefined; @@ -212,7 +219,10 @@ export class UsageTracker { const pending = (async () => { await new Promise((res) => { this.batcher.resolveWait = res; - timeout = setTimeout(res, MAX_BATCH_WAIT_MS); + timeout = setTimeout( + res, + getJitteredWaitMs(MAX_BATCH_WAIT_MS, BATCH_WAIT_JITTER_RATIO), + ); }); this.batcher.pending = null; @@ -286,7 +296,14 @@ export class UsageTracker { error, ); if (attempt < MAX_RETRIES) { - await new Promise((res) => setTimeout(res, attempt * 100)); + const delayMs = getRetryDelayMs(attempt); + await new Promise((res) => setTimeout(res, delayMs)); + } else { + // All retries exhausted — surface a structured warning so consumers + // can alert on dropped batches. The events are not persisted anywhere. + console.error( + `@vercel/flags-core: Dropped ${eventsToSend.length} events after ${MAX_RETRIES} attempts (flushId=${flushId})`, + ); } } }