Skip to content

Commit 9ab9fd5

Browse files
committed
fix: improve cartographer-to-lighthouse Redis connectivity
1 parent 1520b34 commit 9ab9fd5

File tree

17 files changed

+417
-30
lines changed

17 files changed

+417
-30
lines changed

ops/mainnet/staging/core/main.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ module "lighthouse_queue_cache" {
542542
public_redis = true
543543
transit_encryption_enabled = true
544544
auth_token = var.lighthouse_queue_redis_auth_token
545+
maxmemory_policy = "noeviction"
545546
}
546547

547548
# Expose the lighthouse queue Redis over PrivateLink so the cartographer-handler

ops/modules/redis/main.tf

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,32 @@ locals {
33
use_replication_group = var.transit_encryption_enabled
44
}
55

6+
# BullMQ requires maxmemory-policy = noeviction. The default ElastiCache
7+
# parameter group uses volatile-lru, which can evict keys BullMQ depends on.
8+
resource "aws_elasticache_parameter_group" "redis" {
9+
count = var.maxmemory_policy != null ? 1 : 0
10+
name = "redis-params-${var.environment}-${var.stage}-${var.family}"
11+
family = var.parameter_group_family
12+
13+
parameter {
14+
name = "maxmemory-policy"
15+
value = var.maxmemory_policy
16+
}
17+
18+
tags = {
19+
Stage = var.stage
20+
Environment = var.environment
21+
}
22+
}
23+
624
resource "aws_elasticache_cluster" "redis" {
725
count = local.use_replication_group ? 0 : 1
826
cluster_id = "redis-cluster-${var.environment}-${var.stage}-${var.family}"
927
engine = "redis"
1028
node_type = var.node_type
1129
num_cache_nodes = 1
12-
# parameter_group_name = "default.redis6.x"
13-
# engine_version = "6.x"
30+
parameter_group_name = var.maxmemory_policy != null ? aws_elasticache_parameter_group.redis[0].name : null
31+
engine_version = var.engine_version
1432
port = 6379
1533
subnet_group_name = aws_elasticache_subnet_group.default.name
1634
security_group_ids = [aws_security_group.redis.id]
@@ -28,6 +46,8 @@ resource "aws_elasticache_replication_group" "redis" {
2846
description = "Redis replication group for ${var.family} (${var.environment}-${var.stage})"
2947
node_type = var.node_type
3048
num_cache_clusters = 1
49+
parameter_group_name = var.maxmemory_policy != null ? aws_elasticache_parameter_group.redis[0].name : null
50+
engine_version = var.engine_version
3151
port = 6379
3252
subnet_group_name = aws_elasticache_subnet_group.default.name
3353
security_group_ids = [aws_security_group.redis.id]

ops/modules/redis/variables.tf

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,21 @@ variable "transit_encryption_enabled" {
4949
default = false
5050
type = bool
5151
}
52+
53+
variable "maxmemory_policy" {
54+
description = "Redis maxmemory-policy. Set to 'noeviction' for BullMQ workloads. When null, the default parameter group is used."
55+
default = null
56+
type = string
57+
}
58+
59+
variable "parameter_group_family" {
60+
description = "ElastiCache parameter group family (e.g. redis7, redis6.x). Must match the engine version."
61+
default = "redis7"
62+
type = string
63+
}
64+
65+
variable "engine_version" {
66+
description = "Redis OSS engine_version for ElastiCache (e.g. 7.1). Must align with parameter_group_family (use redis7 for 7.x)."
67+
default = "7.1"
68+
type = string
69+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"extends": "@istanbuljs/nyc-config-typescript",
3+
"all": true,
4+
"check-coverage": true,
5+
"branches": 60,
6+
"lines": 50,
7+
"functions": 60,
8+
"statements": 50,
9+
"exclude": ["dist/**", "test/**"]
10+
}

packages/adapters/mqclient/package.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,21 @@
1313
"lint:fix": "yarn lint:src --fix",
1414
"clean": "rimraf ./dist ./tsconfig.tsBuildInfo",
1515
"build": "tsc --build ./tsconfig.json",
16+
"test": "nyc ts-mocha --require 'test/globalTestHook.ts' --check-leaks --exit --timeout 60000 'test/**/*.spec.ts'",
17+
"test:unit": "nyc ts-mocha --require 'test/globalTestHook.ts' --check-leaks --exit --timeout 60000 'test/**/*.spec.ts'",
1618
"purge": "yarn clean && rimraf ./node_modules"
1719
},
1820
"dependencies": {
21+
"@chimera-monorepo/utils": "workspace:*",
1922
"bullmq": "5.34.8"
2023
},
2124
"devDependencies": {
25+
"@istanbuljs/nyc-config-typescript": "1.0.2",
2226
"eslint": "8.34.0",
27+
"nyc": "15.1.0",
2328
"rimraf": "5.0.1",
29+
"sinon": "15.0.1",
30+
"ts-mocha": "10.0.0",
2431
"typescript": "5.9.2"
2532
}
2633
}

packages/adapters/mqclient/src/index.ts

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { Queue, Worker, Job, ConnectionOptions, QueueOptions, WorkerOptions } from 'bullmq';
2+
import { jsonifyError, Logger } from '@chimera-monorepo/utils';
23

34
export { Queue, Worker, Job } from 'bullmq';
45

@@ -24,15 +25,38 @@ export const parseRedisUrl = (redisUrl: string): ConnectionOptions => {
2425
...(url.username ? { username: url.username } : {}),
2526
...(url.protocol === 'rediss:' ? { tls: tlsServername ? { servername: tlsServername } : {} } : {}),
2627
connectTimeout: 17_000,
27-
maxRetriesPerRequest: 4,
28-
retryStrategy: (times: number) => Math.min(times * 30, 1000),
28+
maxRetriesPerRequest: null,
29+
retryStrategy: (times: number) => Math.min(times * 50, 5_000),
2930
keepAlive: 30_000,
3031
};
3132
};
3233

33-
export const createProducer = (redisUrl: string, queueName: string, opts?: Partial<QueueOptions>): Queue => {
34+
export const pingRedis = async (queueOrWorker: Queue | Worker, timeoutMs = 3_000): Promise<boolean> => {
35+
try {
36+
const client = await Promise.race([
37+
queueOrWorker.client,
38+
new Promise<never>((_, reject) => setTimeout(() => reject(new Error('timeout')), timeoutMs)),
39+
]);
40+
if (client.status !== 'ready') return false;
41+
42+
const result = await Promise.race([
43+
client.ping(),
44+
new Promise<never>((_, reject) => setTimeout(() => reject(new Error('timeout')), timeoutMs)),
45+
]);
46+
return result === 'PONG';
47+
} catch {
48+
return false;
49+
}
50+
};
51+
52+
export const createProducer = (
53+
redisUrl: string,
54+
queueName: string,
55+
logger: Logger,
56+
opts?: Partial<QueueOptions>,
57+
): Queue => {
3458
const connection = parseRedisUrl(redisUrl);
35-
return new Queue(queueName, {
59+
const queue = new Queue(queueName, {
3660
connection,
3761
defaultJobOptions: {
3862
attempts: 3,
@@ -42,19 +66,40 @@ export const createProducer = (redisUrl: string, queueName: string, opts?: Parti
4266
},
4367
...opts,
4468
});
69+
queue.on('error', (err) =>
70+
logger.error(`Queue "${queueName}" Redis connection error`, undefined, undefined, jsonifyError(err), {
71+
queueName,
72+
role: 'producer',
73+
}),
74+
);
75+
return queue;
4576
};
4677

4778
export const createWorker = (
4879
redisUrl: string,
4980
queueName: string,
5081
processor: (job: Job) => Promise<void>,
82+
logger: Logger,
5183
opts?: Partial<WorkerOptions>,
5284
): Worker => {
5385
const connection = parseRedisUrl(redisUrl);
54-
return new Worker(queueName, processor, {
86+
const worker = new Worker(queueName, processor, {
5587
connection,
5688
concurrency: 1,
5789
lockDuration: 300_000, // 5 minutes — prevents stale-lock re-processing for long tasks
5890
...opts,
5991
});
92+
worker.on('ready', () =>
93+
logger.info(`Worker "${queueName}" Redis connected`, undefined, undefined, {
94+
queueName,
95+
role: 'worker',
96+
}),
97+
);
98+
worker.on('error', (err) =>
99+
logger.error(`Worker "${queueName}" Redis connection error`, undefined, undefined, jsonifyError(err), {
100+
queueName,
101+
role: 'worker',
102+
}),
103+
);
104+
return worker;
60105
};
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import { restore, reset } from 'sinon';
2+
3+
export const mochaHooks = {
4+
afterEach() {
5+
restore();
6+
reset();
7+
},
8+
};

0 commit comments

Comments
 (0)