node: redesign readiness check system & improve event loop lag check
This commit is contained in:
@@ -1,26 +1,40 @@
|
|||||||
import { IntervalHistogram, monitorEventLoopDelay } from 'node:perf_hooks';
|
import { monitorEventLoopDelay } from 'node:perf_hooks';
|
||||||
import { ReadinessDetail, ReadinessFunction, ReadinessStatus } from './readiness';
|
import { ReadinessCheck, ReadinessFunctionReturn, ReadinessStatus } from './readiness';
|
||||||
|
|
||||||
let hist: IntervalHistogram | null = null;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds a readiness check function that monitors event loop lag.
|
* Builds a readiness check function that monitors event loop lag.
|
||||||
* @param degradedMs - The threshold in milliseconds above which the status is 'degraded' (default 200 ms).
|
* @param options - Configuration options for the event loop lag check.
|
||||||
|
* @param options.degradedMs - Threshold in milliseconds for degraded status (default: 200).
|
||||||
|
* @param options.failMs - Threshold in milliseconds for error status (default: 1000).
|
||||||
|
* @param options.histResetMs - Interval in milliseconds to reset the histogram (default: 60000).
|
||||||
|
* @param options.percentile - Percentile to monitor (default: 50).
|
||||||
* @returns A ReadinessFunction that checks event loop lag.
|
* @returns A ReadinessFunction that checks event loop lag.
|
||||||
*/
|
*/
|
||||||
export const buildEventLoopLagCheck = (degradedMs: number = 200): ReadinessFunction => {
|
export const buildEventLoopLagCheck = (options: {
|
||||||
if (!hist) {
|
degradedMs?: number;
|
||||||
hist = monitorEventLoopDelay({ resolution: 10 });
|
failMs?: number;
|
||||||
hist.enable();
|
histResetMs?: number;
|
||||||
}
|
percentile?: number;
|
||||||
|
}): ReadinessCheck => {
|
||||||
|
const { degradedMs = 200, failMs = 1000, histResetMs = 60000, percentile = 50 } = options;
|
||||||
|
|
||||||
return (): ReadinessDetail => {
|
const hist = monitorEventLoopDelay({ resolution: 10 });
|
||||||
const lag = hist!.mean / 1e6; // Convert from nanoseconds to milliseconds
|
hist.enable();
|
||||||
const status: ReadinessStatus = lag < degradedMs ? 'ok' : 'degraded';
|
|
||||||
return {
|
setInterval(() => {
|
||||||
name: 'event-loop-lag',
|
hist.reset();
|
||||||
status,
|
}, histResetMs).unref();
|
||||||
message: `Event loop lag is ${lag.toFixed(2)} ms`,
|
|
||||||
};
|
return {
|
||||||
|
name: 'event-loop-lag',
|
||||||
|
fn: async (): Promise<ReadinessFunctionReturn> => {
|
||||||
|
const lag = hist.percentile(percentile) / 1e6; // Convert from nanoseconds to milliseconds
|
||||||
|
const status: ReadinessStatus = lag < degradedMs ? 'ok' : lag < failMs ? 'degraded' : 'error';
|
||||||
|
return {
|
||||||
|
status,
|
||||||
|
message: `Event loop lag is ${lag.toFixed(2)} ms`,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
timeout: 500,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,5 +1,13 @@
|
|||||||
|
/** Return type of a readiness check function */
|
||||||
|
export type ReadinessFunctionReturn = {
|
||||||
|
/** Status of the readiness check */
|
||||||
|
status: ReadinessStatus;
|
||||||
|
/** Optional message providing additional information about the readiness check */
|
||||||
|
message?: string;
|
||||||
|
};
|
||||||
|
|
||||||
/** Function that performs a readiness check */
|
/** Function that performs a readiness check */
|
||||||
export type ReadinessFunction = () => Promise<ReadinessDetail> | ReadinessDetail;
|
export type ReadinessFunction = (check: ReadinessCheck) => Promise<ReadinessFunctionReturn>;
|
||||||
|
|
||||||
/** Status of a readiness check */
|
/** Status of a readiness check */
|
||||||
export type ReadinessStatus = 'ok' | 'error' | 'degraded';
|
export type ReadinessStatus = 'ok' | 'error' | 'degraded';
|
||||||
@@ -10,6 +18,16 @@ const aggregateStatus = (statuses: ReadinessStatus[]): ReadinessStatus => {
|
|||||||
return 'ok';
|
return 'ok';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Represents a readiness check with an optional timeout */
|
||||||
|
export type ReadinessCheck = {
|
||||||
|
/** Name of the readiness check */
|
||||||
|
name: string;
|
||||||
|
/** Function that performs the readiness check */
|
||||||
|
fn: ReadinessFunction;
|
||||||
|
/** Timeout in milliseconds for the readiness check (default: 5000) */
|
||||||
|
timeout?: number;
|
||||||
|
};
|
||||||
|
|
||||||
/** Result of a system readiness check */
|
/** Result of a system readiness check */
|
||||||
export type ReadinessResult = {
|
export type ReadinessResult = {
|
||||||
/**
|
/**
|
||||||
@@ -32,7 +50,9 @@ export type ReadinessDetail = {
|
|||||||
name: string;
|
name: string;
|
||||||
/** Status of the readiness check */
|
/** Status of the readiness check */
|
||||||
status: ReadinessStatus;
|
status: ReadinessStatus;
|
||||||
/** Optional message providing additional information about the readiness check */
|
/** Duration of the readiness check in milliseconds */
|
||||||
|
duration: number;
|
||||||
|
/** Message providing additional information about the readiness check */
|
||||||
message?: string;
|
message?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -41,20 +61,31 @@ export type ReadinessDetail = {
|
|||||||
* @param checks - An array of readiness functions to execute.
|
* @param checks - An array of readiness functions to execute.
|
||||||
* @returns A Promise that resolves to a ReadinessResult object.
|
* @returns A Promise that resolves to a ReadinessResult object.
|
||||||
*/
|
*/
|
||||||
export const readiness = async (checks: ReadinessFunction[]): Promise<ReadinessResult> => {
|
export const readiness = async (checks: ReadinessCheck[]): Promise<ReadinessResult> => {
|
||||||
const start = Date.now();
|
const start = Date.now();
|
||||||
const t0 = performance.now();
|
const t0 = performance.now();
|
||||||
const details: ReadinessDetail[] = [];
|
const details: ReadinessDetail[] = [];
|
||||||
|
|
||||||
for (const check of checks) {
|
for (const check of checks) {
|
||||||
|
const checkt0 = performance.now();
|
||||||
try {
|
try {
|
||||||
const result = await Promise.resolve(check());
|
const result = await withTimeout(
|
||||||
details.push(result);
|
check.fn(check),
|
||||||
|
check.timeout ?? 5000,
|
||||||
|
`Readiness check '${check.name}' timed out after ${check.timeout ?? 5000} ms`,
|
||||||
|
);
|
||||||
|
details.push({
|
||||||
|
name: check.name,
|
||||||
|
status: result.status,
|
||||||
|
message: result.message,
|
||||||
|
duration: performance.now() - checkt0,
|
||||||
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
details.push({
|
details.push({
|
||||||
name: 'unknown',
|
name: check.name,
|
||||||
status: 'error',
|
status: 'error',
|
||||||
message: err instanceof Error ? err.message : String(err),
|
message: err instanceof Error ? err.message : String(err),
|
||||||
|
duration: performance.now() - checkt0,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -75,7 +106,7 @@ export const readiness = async (checks: ReadinessFunction[]): Promise<ReadinessR
|
|||||||
* @param checks - An array of readiness functions to execute.
|
* @param checks - An array of readiness functions to execute.
|
||||||
* @returns A function that returns a Response object with ReadinessResult in JSON format.
|
* @returns A function that returns a Response object with ReadinessResult in JSON format.
|
||||||
*/
|
*/
|
||||||
export const createReadinessHandler = (checks: ReadinessFunction[]): (() => Promise<Response>) => {
|
export const createReadinessHandler = (checks: ReadinessCheck[]): (() => Promise<Response>) => {
|
||||||
return async () => {
|
return async () => {
|
||||||
const result = await readiness(checks);
|
const result = await readiness(checks);
|
||||||
return respondWithResult(result);
|
return respondWithResult(result);
|
||||||
@@ -86,7 +117,7 @@ export const createReadinessHandler = (checks: ReadinessFunction[]): (() => Prom
|
|||||||
* Class that schedules periodic readiness checks.
|
* Class that schedules periodic readiness checks.
|
||||||
*/
|
*/
|
||||||
export class ScheduledReadiness {
|
export class ScheduledReadiness {
|
||||||
private checks: ReadinessFunction[];
|
private checks: ReadinessCheck[];
|
||||||
private interval: number;
|
private interval: number;
|
||||||
private started: boolean = false;
|
private started: boolean = false;
|
||||||
private timer: NodeJS.Timeout | null = null;
|
private timer: NodeJS.Timeout | null = null;
|
||||||
@@ -98,7 +129,7 @@ export class ScheduledReadiness {
|
|||||||
* @param checks - An array of readiness functions to execute.
|
* @param checks - An array of readiness functions to execute.
|
||||||
* @param interval - Interval in milliseconds between readiness checks.
|
* @param interval - Interval in milliseconds between readiness checks.
|
||||||
*/
|
*/
|
||||||
constructor(checks: ReadinessFunction[], interval: number) {
|
constructor(checks: ReadinessCheck[], interval: number) {
|
||||||
this.checks = checks;
|
this.checks = checks;
|
||||||
this.interval = interval;
|
this.interval = interval;
|
||||||
}
|
}
|
||||||
@@ -201,3 +232,20 @@ const httpStatusFromReadiness = (status: ReadinessStatus | 'unknown'): number =>
|
|||||||
if (status === 'error') return 503;
|
if (status === 'error') return 503;
|
||||||
return 200; // unknown, treat as ok to avoid false alarms
|
return 200; // unknown, treat as ok to avoid false alarms
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const withTimeout = async <T>(
|
||||||
|
promise: Promise<T>,
|
||||||
|
ms: number,
|
||||||
|
timeoutMessage: string,
|
||||||
|
): Promise<T> => {
|
||||||
|
let timeoutHandle: NodeJS.Timeout;
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
timeoutHandle = setTimeout(() => {
|
||||||
|
reject(new Error(timeoutMessage));
|
||||||
|
}, ms);
|
||||||
|
});
|
||||||
|
|
||||||
|
return Promise.race([promise, timeoutPromise]).finally(() => {
|
||||||
|
clearTimeout(timeoutHandle);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user