node: redesign readiness check system & improve event loop lag check
This commit is contained in:
@@ -1,26 +1,40 @@
|
||||
import { IntervalHistogram, monitorEventLoopDelay } from 'node:perf_hooks';
|
||||
import { ReadinessDetail, ReadinessFunction, ReadinessStatus } from './readiness';
|
||||
|
||||
let hist: IntervalHistogram | null = null;
|
||||
import { monitorEventLoopDelay } from 'node:perf_hooks';
|
||||
import { ReadinessCheck, ReadinessFunctionReturn, ReadinessStatus } from './readiness';
|
||||
|
||||
/**
|
||||
* Builds a readiness check function that monitors event loop lag.
|
||||
* @param degradedMs - The threshold in milliseconds above which the status is 'degraded' (default 200 ms).
|
||||
* @param options - Configuration options for the event loop lag check.
|
||||
* @param options.degradedMs - Threshold in milliseconds for degraded status (default: 200).
|
||||
* @param options.failMs - Threshold in milliseconds for error status (default: 1000).
|
||||
* @param options.histResetMs - Interval in milliseconds to reset the histogram (default: 60000).
|
||||
* @param options.percentile - Percentile to monitor (default: 50).
|
||||
* @returns A ReadinessFunction that checks event loop lag.
|
||||
*/
|
||||
export const buildEventLoopLagCheck = (degradedMs: number = 200): ReadinessFunction => {
|
||||
if (!hist) {
|
||||
hist = monitorEventLoopDelay({ resolution: 10 });
|
||||
hist.enable();
|
||||
}
|
||||
export const buildEventLoopLagCheck = (options: {
|
||||
degradedMs?: number;
|
||||
failMs?: number;
|
||||
histResetMs?: number;
|
||||
percentile?: number;
|
||||
}): ReadinessCheck => {
|
||||
const { degradedMs = 200, failMs = 1000, histResetMs = 60000, percentile = 50 } = options;
|
||||
|
||||
return (): ReadinessDetail => {
|
||||
const lag = hist!.mean / 1e6; // Convert from nanoseconds to milliseconds
|
||||
const status: ReadinessStatus = lag < degradedMs ? 'ok' : 'degraded';
|
||||
return {
|
||||
name: 'event-loop-lag',
|
||||
status,
|
||||
message: `Event loop lag is ${lag.toFixed(2)} ms`,
|
||||
};
|
||||
const hist = monitorEventLoopDelay({ resolution: 10 });
|
||||
hist.enable();
|
||||
|
||||
setInterval(() => {
|
||||
hist.reset();
|
||||
}, histResetMs).unref();
|
||||
|
||||
return {
|
||||
name: 'event-loop-lag',
|
||||
fn: async (): Promise<ReadinessFunctionReturn> => {
|
||||
const lag = hist.percentile(percentile) / 1e6; // Convert from nanoseconds to milliseconds
|
||||
const status: ReadinessStatus = lag < degradedMs ? 'ok' : lag < failMs ? 'degraded' : 'error';
|
||||
return {
|
||||
status,
|
||||
message: `Event loop lag is ${lag.toFixed(2)} ms`,
|
||||
};
|
||||
},
|
||||
timeout: 500,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
/** Return type of a readiness check function */
|
||||
export type ReadinessFunctionReturn = {
|
||||
/** Status of the readiness check */
|
||||
status: ReadinessStatus;
|
||||
/** Optional message providing additional information about the readiness check */
|
||||
message?: string;
|
||||
};
|
||||
|
||||
/** Function that performs a readiness check */
|
||||
export type ReadinessFunction = () => Promise<ReadinessDetail> | ReadinessDetail;
|
||||
export type ReadinessFunction = (check: ReadinessCheck) => Promise<ReadinessFunctionReturn>;
|
||||
|
||||
/** Status of a readiness check */
|
||||
export type ReadinessStatus = 'ok' | 'error' | 'degraded';
|
||||
@@ -10,6 +18,16 @@ const aggregateStatus = (statuses: ReadinessStatus[]): ReadinessStatus => {
|
||||
return 'ok';
|
||||
};
|
||||
|
||||
/** Represents a readiness check with an optional timeout */
|
||||
export type ReadinessCheck = {
|
||||
/** Name of the readiness check */
|
||||
name: string;
|
||||
/** Function that performs the readiness check */
|
||||
fn: ReadinessFunction;
|
||||
/** Timeout in milliseconds for the readiness check (default: 5000) */
|
||||
timeout?: number;
|
||||
};
|
||||
|
||||
/** Result of a system readiness check */
|
||||
export type ReadinessResult = {
|
||||
/**
|
||||
@@ -32,7 +50,9 @@ export type ReadinessDetail = {
|
||||
name: string;
|
||||
/** Status of the readiness check */
|
||||
status: ReadinessStatus;
|
||||
/** Optional message providing additional information about the readiness check */
|
||||
/** Duration of the readiness check in milliseconds */
|
||||
duration: number;
|
||||
/** Message providing additional information about the readiness check */
|
||||
message?: string;
|
||||
};
|
||||
|
||||
@@ -41,20 +61,31 @@ export type ReadinessDetail = {
|
||||
* @param checks - An array of readiness functions to execute.
|
||||
* @returns A Promise that resolves to a ReadinessResult object.
|
||||
*/
|
||||
export const readiness = async (checks: ReadinessFunction[]): Promise<ReadinessResult> => {
|
||||
export const readiness = async (checks: ReadinessCheck[]): Promise<ReadinessResult> => {
|
||||
const start = Date.now();
|
||||
const t0 = performance.now();
|
||||
const details: ReadinessDetail[] = [];
|
||||
|
||||
for (const check of checks) {
|
||||
const checkt0 = performance.now();
|
||||
try {
|
||||
const result = await Promise.resolve(check());
|
||||
details.push(result);
|
||||
const result = await withTimeout(
|
||||
check.fn(check),
|
||||
check.timeout ?? 5000,
|
||||
`Readiness check '${check.name}' timed out after ${check.timeout ?? 5000} ms`,
|
||||
);
|
||||
details.push({
|
||||
name: check.name,
|
||||
status: result.status,
|
||||
message: result.message,
|
||||
duration: performance.now() - checkt0,
|
||||
});
|
||||
} catch (err) {
|
||||
details.push({
|
||||
name: 'unknown',
|
||||
name: check.name,
|
||||
status: 'error',
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
duration: performance.now() - checkt0,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -75,7 +106,7 @@ export const readiness = async (checks: ReadinessFunction[]): Promise<ReadinessR
|
||||
* @param checks - An array of readiness functions to execute.
|
||||
* @returns A function that returns a Response object with ReadinessResult in JSON format.
|
||||
*/
|
||||
export const createReadinessHandler = (checks: ReadinessFunction[]): (() => Promise<Response>) => {
|
||||
export const createReadinessHandler = (checks: ReadinessCheck[]): (() => Promise<Response>) => {
|
||||
return async () => {
|
||||
const result = await readiness(checks);
|
||||
return respondWithResult(result);
|
||||
@@ -86,7 +117,7 @@ export const createReadinessHandler = (checks: ReadinessFunction[]): (() => Prom
|
||||
* Class that schedules periodic readiness checks.
|
||||
*/
|
||||
export class ScheduledReadiness {
|
||||
private checks: ReadinessFunction[];
|
||||
private checks: ReadinessCheck[];
|
||||
private interval: number;
|
||||
private started: boolean = false;
|
||||
private timer: NodeJS.Timeout | null = null;
|
||||
@@ -98,7 +129,7 @@ export class ScheduledReadiness {
|
||||
* @param checks - An array of readiness functions to execute.
|
||||
* @param interval - Interval in milliseconds between readiness checks.
|
||||
*/
|
||||
constructor(checks: ReadinessFunction[], interval: number) {
|
||||
constructor(checks: ReadinessCheck[], interval: number) {
|
||||
this.checks = checks;
|
||||
this.interval = interval;
|
||||
}
|
||||
@@ -201,3 +232,20 @@ const httpStatusFromReadiness = (status: ReadinessStatus | 'unknown'): number =>
|
||||
if (status === 'error') return 503;
|
||||
return 200; // unknown, treat as ok to avoid false alarms
|
||||
};
|
||||
|
||||
const withTimeout = async <T>(
|
||||
promise: Promise<T>,
|
||||
ms: number,
|
||||
timeoutMessage: string,
|
||||
): Promise<T> => {
|
||||
let timeoutHandle: NodeJS.Timeout;
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
timeoutHandle = setTimeout(() => {
|
||||
reject(new Error(timeoutMessage));
|
||||
}, ms);
|
||||
});
|
||||
|
||||
return Promise.race([promise, timeoutPromise]).finally(() => {
|
||||
clearTimeout(timeoutHandle);
|
||||
});
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user