Skip to content

Commit 99693a3

Browse files
committed
Implement Feature #7: Health Check API
New Module: health-check.ts - /health: Returns system health status (uptime, memory, groups) - /ready: Returns readiness for traffic - /metrics: Prometheus-compatible metrics format Config (config.ts): - HEALTH_CHECK.ENABLED: Toggle via HEALTH_CHECK_ENABLED env - HEALTH_CHECK.PORT: Default 8080 Integration (index.ts): - Started in main() after database init - Stopped gracefully on SIGINT/SIGTERM - Added SIGTERM handler for container orchestration This enables monitoring via external systems like Kubernetes, Docker healthchecks, or Prometheus.
1 parent bf44ad3 commit 99693a3

3 files changed

Lines changed: 167 additions & 0 deletions

File tree

src/config.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,16 @@ export const CONTAINER_MAX_OUTPUT_SIZE = parseInt(
4747
); // 10MB default
4848
export const IPC_POLL_INTERVAL = 1000;
4949

50+
/**
51+
* Health check HTTP server configuration
52+
*/
53+
export const HEALTH_CHECK = {
54+
/** Enable health check HTTP server */
55+
ENABLED: process.env.HEALTH_CHECK_ENABLED !== 'false',
56+
/** Port for health check server */
57+
PORT: parseInt(process.env.HEALTH_CHECK_PORT || '8080', 10),
58+
} as const;
59+
5060
function escapeRegex(str: string): string {
5161
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
5262
}

src/health-check.ts

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/**
2+
* Health Check HTTP Server
3+
*
4+
* Provides /health and /ready endpoints for container orchestration
5+
* and external monitoring systems.
6+
*/
7+
8+
import { createServer, IncomingMessage, ServerResponse } from 'http';
9+
import { HEALTH_CHECK } from './config.js';
10+
import { logger } from './logger.js';
11+
12+
// ============================================================================
13+
// Types
14+
// ============================================================================
15+
16+
interface HealthStatus {
17+
status: 'healthy' | 'degraded' | 'unhealthy';
18+
uptime: number;
19+
memory: {
20+
heapUsed: number;
21+
heapTotal: number;
22+
rss: number;
23+
};
24+
groups: number;
25+
version: string;
26+
timestamp: string;
27+
}
28+
29+
// ============================================================================
30+
// State (injected at startup)
31+
// ============================================================================
32+
33+
let getGroupCount: () => number = () => 0;
34+
let getActiveContainers: () => number = () => 0;
35+
36+
export function setHealthCheckDependencies(deps: {
37+
getGroupCount: () => number;
38+
getActiveContainers?: () => number;
39+
}): void {
40+
getGroupCount = deps.getGroupCount;
41+
if (deps.getActiveContainers) {
42+
getActiveContainers = deps.getActiveContainers;
43+
}
44+
}
45+
46+
// ============================================================================
47+
// Health Status
48+
// ============================================================================
49+
50+
function getHealthStatus(): HealthStatus {
51+
const memUsage = process.memoryUsage();
52+
53+
return {
54+
status: 'healthy',
55+
uptime: process.uptime(),
56+
memory: {
57+
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
58+
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
59+
rss: Math.round(memUsage.rss / 1024 / 1024),
60+
},
61+
groups: getGroupCount(),
62+
version: process.env.npm_package_version || '1.0.0',
63+
timestamp: new Date().toISOString(),
64+
};
65+
}
66+
67+
// ============================================================================
68+
// HTTP Server
69+
// ============================================================================
70+
71+
function handleRequest(req: IncomingMessage, res: ServerResponse): void {
72+
const url = req.url || '/';
73+
74+
// CORS headers for browser access
75+
res.setHeader('Access-Control-Allow-Origin', '*');
76+
res.setHeader('Content-Type', 'application/json');
77+
78+
if (url === '/health' || url === '/healthz') {
79+
const health = getHealthStatus();
80+
res.statusCode = health.status === 'healthy' ? 200 : 503;
81+
res.end(JSON.stringify(health, null, 2));
82+
} else if (url === '/ready' || url === '/readyz') {
83+
// Readiness check - are we ready to accept traffic?
84+
const ready = getGroupCount() > 0;
85+
res.statusCode = ready ? 200 : 503;
86+
res.end(JSON.stringify({
87+
ready,
88+
groups: getGroupCount(),
89+
activeContainers: getActiveContainers(),
90+
}));
91+
} else if (url === '/metrics') {
92+
// Prometheus-style metrics
93+
const health = getHealthStatus();
94+
res.setHeader('Content-Type', 'text/plain');
95+
res.end(`# HELP nanogemclaw_uptime_seconds Application uptime
96+
# TYPE nanogemclaw_uptime_seconds gauge
97+
nanogemclaw_uptime_seconds ${health.uptime.toFixed(0)}
98+
99+
# HELP nanogemclaw_memory_heap_bytes Heap memory used
100+
# TYPE nanogemclaw_memory_heap_bytes gauge
101+
nanogemclaw_memory_heap_bytes ${health.memory.heapUsed * 1024 * 1024}
102+
103+
# HELP nanogemclaw_groups_total Number of registered groups
104+
# TYPE nanogemclaw_groups_total gauge
105+
nanogemclaw_groups_total ${health.groups}
106+
`);
107+
} else {
108+
res.statusCode = 404;
109+
res.end(JSON.stringify({ error: 'Not found' }));
110+
}
111+
}
112+
113+
let server: ReturnType<typeof createServer> | null = null;
114+
115+
export function startHealthCheckServer(): void {
116+
if (!HEALTH_CHECK.ENABLED) {
117+
logger.info('Health check server disabled');
118+
return;
119+
}
120+
121+
server = createServer(handleRequest);
122+
123+
server.listen(HEALTH_CHECK.PORT, () => {
124+
logger.info(
125+
{ port: HEALTH_CHECK.PORT },
126+
'Health check server started',
127+
);
128+
});
129+
130+
server.on('error', (err) => {
131+
logger.error({ err }, 'Health check server error');
132+
});
133+
}
134+
135+
export function stopHealthCheckServer(): Promise<void> {
136+
return new Promise((resolve) => {
137+
if (server) {
138+
server.close(() => {
139+
logger.info('Health check server stopped');
140+
resolve();
141+
});
142+
} else {
143+
resolve();
144+
}
145+
});
146+
}

src/index.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,13 @@ async function main(): Promise<void> {
10281028
initDatabase();
10291029
loadState();
10301030

1031+
// Start health check server
1032+
const { setHealthCheckDependencies, startHealthCheckServer } = await import('./health-check.js');
1033+
setHealthCheckDependencies({
1034+
getGroupCount: () => Object.keys(registeredGroups).length,
1035+
});
1036+
startHealthCheckServer();
1037+
10311038
// Connect to Telegram
10321039
await connectTelegram();
10331040
}
@@ -1041,6 +1048,8 @@ main().catch((err) => {
10411048
process.on('SIGINT', async () => {
10421049
console.log('\nShutting down gracefully...');
10431050
try {
1051+
const { stopHealthCheckServer } = await import('./health-check.js');
1052+
await stopHealthCheckServer();
10441053
await bot?.stopPolling();
10451054
saveState();
10461055
closeDatabase();
@@ -1054,6 +1063,8 @@ process.on('SIGINT', async () => {
10541063
process.on('SIGTERM', async () => {
10551064
console.log('Received SIGTERM, shutting down...');
10561065
try {
1066+
const { stopHealthCheckServer } = await import('./health-check.js');
1067+
await stopHealthCheckServer();
10571068
await bot?.stopPolling();
10581069
saveState();
10591070
closeDatabase();

0 commit comments

Comments
 (0)