Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .changeset/jitter-ingest-retries.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
'@vercel/flags-core': minor
---

Add jitter to ingest retries and the batch-flush window.

The usage tracker now uses AWS-style "Full Jitter" exponential backoff between
retry attempts (replacing the previous deterministic 100/200ms schedule) and
randomizes the 5s batch-flush window by ±20% to desynchronize concurrent
processes. When all retry attempts are exhausted the SDK now logs a structured
warning so consumers can alert on dropped batches.
64 changes: 64 additions & 0 deletions packages/vercel-flags-core/src/utils/backoff.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { afterEach, describe, expect, it, vi } from 'vitest';
import { getJitteredWaitMs, getRetryDelayMs } from './backoff';

afterEach(() => {
vi.restoreAllMocks();
});

describe('getRetryDelayMs', () => {
it('uses Full Jitter exponential backoff', () => {
vi.spyOn(Math, 'random').mockReturnValue(0.5);

// attempt n → floor(0.5 * baseMs * 2^(n-1)) given default baseMs=250
expect(getRetryDelayMs(1)).toBe(125); // floor(0.5 * 250)
expect(getRetryDelayMs(2)).toBe(250); // floor(0.5 * 500)
expect(getRetryDelayMs(3)).toBe(500); // floor(0.5 * 1000)
expect(getRetryDelayMs(4)).toBe(1000); // floor(0.5 * 2000)
});

it('caps the exponential ceiling at capMs', () => {
vi.spyOn(Math, 'random').mockReturnValue(0.999);

// attempt 10 with default cap=5000 → ceiling clamped to 5000
expect(getRetryDelayMs(10)).toBeGreaterThanOrEqual(4990);
expect(getRetryDelayMs(10)).toBeLessThan(5000);
});

it('respects custom baseMs and capMs', () => {
vi.spyOn(Math, 'random').mockReturnValue(0.5);

expect(getRetryDelayMs(1, { baseMs: 100 })).toBe(50);
expect(getRetryDelayMs(5, { baseMs: 100, capMs: 200 })).toBe(100);
});

it('treats attempt < 1 as the first attempt', () => {
vi.spyOn(Math, 'random').mockReturnValue(0.5);

expect(getRetryDelayMs(0)).toBe(125);
expect(getRetryDelayMs(-1)).toBe(125);
});
});

describe('getJitteredWaitMs', () => {
it('returns the lower bound when Math.random=0', () => {
vi.spyOn(Math, 'random').mockReturnValue(0);
expect(getJitteredWaitMs(5000, 0.2)).toBe(4000);
});

it('returns the mean when Math.random=0.5', () => {
vi.spyOn(Math, 'random').mockReturnValue(0.5);
expect(getJitteredWaitMs(5000, 0.2)).toBe(5000);
});

it('approaches but never reaches the upper bound', () => {
vi.spyOn(Math, 'random').mockReturnValue(0.9999);
const value = getJitteredWaitMs(5000, 0.2);
expect(value).toBeGreaterThanOrEqual(5999);
expect(value).toBeLessThan(6000);
});

it('returns baseMs when ratio is 0', () => {
vi.spyOn(Math, 'random').mockReturnValue(0.5);
expect(getJitteredWaitMs(5000, 0)).toBe(5000);
});
});
55 changes: 55 additions & 0 deletions packages/vercel-flags-core/src/utils/backoff.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/**
* Backoff and jitter utilities for retry loops.
*
* The functions here are intentionally generic so they can be used wherever
* the SDK retries an operation. They use `Math.random` directly; mock it from
* tests if you need deterministic behaviour.
*/

const DEFAULT_BASE_MS = 250;
const DEFAULT_CAP_MS = 5000;

export interface RetryDelayOptions {
/**
* The base delay in milliseconds. With Full Jitter, the first failed attempt
* sleeps in `[0, baseMs)`, the second in `[0, baseMs * 2)`, etc.
* Defaults to 250ms.
*/
baseMs?: number;
/**
* Hard ceiling on the computed delay. Defaults to 5000ms.
*/
capMs?: number;
}

/**
* Returns the sleep duration before the next retry attempt using AWS-style
* "Full Jitter" exponential backoff.
*
* @param attempt The 1-indexed attempt number that just failed.
* @see https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter
*/
export function getRetryDelayMs(
attempt: number,
options: RetryDelayOptions = {},
): number {
const baseMs = options.baseMs ?? DEFAULT_BASE_MS;
const capMs = options.capMs ?? DEFAULT_CAP_MS;

const ceiling = Math.min(capMs, baseMs * 2 ** Math.max(0, attempt - 1));
return Math.floor(Math.random() * ceiling);
}

/**
* Returns `baseMs` perturbed by `±ratio` of itself, drawn uniformly from
* `[baseMs * (1 - ratio), baseMs * (1 + ratio))`. Useful for desynchronizing
* fixed-interval timers across independent processes.
*
* @param baseMs The target mean wait, in milliseconds.
* @param ratio The fractional jitter to apply on each side, in `[0, 1)`.
*/
export function getJitteredWaitMs(baseMs: number, ratio: number): number {
const min = baseMs * (1 - ratio);
const span = baseMs * 2 * ratio;
return Math.floor(min + Math.random() * span);
}
50 changes: 50 additions & 0 deletions packages/vercel-flags-core/src/utils/usage-tracker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,56 @@ describe('UsageTracker', () => {
// 2 failed + 1 success = 3 total
expect(requestCount).toBe(3);
});

it('should log a structured warning when all retries are exhausted', async () => {
vi.spyOn(Math, 'random').mockReturnValue(0);
const consoleSpy = vi
.spyOn(console, 'error')
.mockImplementation(() => {});

fetchMock.mockResolvedValue(new Response('err', { status: 500 }));

const tracker = createTracker();
tracker.trackRead();
await tracker.flush();

// All 3 attempts fail; SDK logs an extra "Dropped" line
expect(fetchMock).toHaveBeenCalledTimes(3);
const droppedLogs = consoleSpy.mock.calls.filter(
([msg]) =>
typeof msg === 'string' && msg.includes('Dropped 1 events after 3'),
);
expect(droppedLogs).toHaveLength(1);

consoleSpy.mockRestore();
});

it('should not log the exhaustion warning when a retry eventually succeeds', async () => {
vi.spyOn(Math, 'random').mockReturnValue(0);
const consoleSpy = vi
.spyOn(console, 'error')
.mockImplementation(() => {});

let requestCount = 0;
fetchMock.mockImplementation(async () => {
requestCount++;
if (requestCount < 3) {
return new Response('err', { status: 500 });
}
return jsonResponse({ ok: true });
});

const tracker = createTracker();
tracker.trackRead();
await tracker.flush();

const droppedLogs = consoleSpy.mock.calls.filter(
([msg]) => typeof msg === 'string' && msg.includes('Dropped'),
);
expect(droppedLogs).toHaveLength(0);

consoleSpy.mockRestore();
});
});

describe('batch size limit', () => {
Expand Down
21 changes: 19 additions & 2 deletions packages/vercel-flags-core/src/utils/usage-tracker.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { waitUntil } from '@vercel/functions';
import { version } from '../../package.json';
import { getJitteredWaitMs, getRetryDelayMs } from './backoff';

const RESOLVED_VOID: Promise<void> = Promise.resolve();

Expand Down Expand Up @@ -43,6 +44,12 @@ const MAX_RETRIES = 3;
const MAX_BATCH_SIZE = 50;
const MAX_BATCH_WAIT_MS = 5000;

/**
* Symmetric jitter applied to MAX_BATCH_WAIT_MS so that independent processes
* that started at the same wall-clock time do not flush in lockstep.
*/
const BATCH_WAIT_JITTER_RATIO = 0.2;

interface RequestContext {
ctx: object | undefined;
headers: Record<string, string> | undefined;
Expand Down Expand Up @@ -212,7 +219,10 @@ export class UsageTracker {
const pending = (async () => {
await new Promise<void>((res) => {
this.batcher.resolveWait = res;
timeout = setTimeout(res, MAX_BATCH_WAIT_MS);
timeout = setTimeout(
res,
getJitteredWaitMs(MAX_BATCH_WAIT_MS, BATCH_WAIT_JITTER_RATIO),
);
});

this.batcher.pending = null;
Expand Down Expand Up @@ -286,7 +296,14 @@ export class UsageTracker {
error,
);
if (attempt < MAX_RETRIES) {
await new Promise((res) => setTimeout(res, attempt * 100));
const delayMs = getRetryDelayMs(attempt);
await new Promise((res) => setTimeout(res, delayMs));
} else {
// All retries exhausted — surface a structured warning so consumers
// can alert on dropped batches. The events are not persisted anywhere.
console.error(
`@vercel/flags-core: Dropped ${eventsToSend.length} events after ${MAX_RETRIES} attempts (flushId=${flushId})`,
);
}
}
}
Expand Down
Loading