From 73729a68c2edb9a7fab071408ce168a99d1b6fca Mon Sep 17 00:00:00 2001 From: decobot Date: Thu, 19 Mar 2026 23:12:59 -0300 Subject: [PATCH 1/6] feat(o11y): improve telemetry quality, reduce cost, add visibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Fixes - fix(otel): use HTTP exporter for traces — proto was rejected by collectors - fix(otel): flush traces and metrics on process shutdown (data loss on restart) - fix(otel): guard against Deno 2 native OTel double instrumentation - fix(otel): break circular dep between config.ts and metrics.ts - fix(otel): reduce log noise — detectResources:false suppresses K_REVISION warns - fix(metrics): always record resolver_latency, remove OTEL_ENABLE_EXTRA_METRICS gate - fix(metrics): use loader file path as fallback when resolverId is 'obj' - fix(metrics): wire meterProvider into registerInstrumentations (deno metrics were no-op) - fix(trace): attach cache-match span to active request context (was orphaned root span) - fix(cache): log Redis errors instead of silently swallowing them - fix(cache): LRU async dispose bug — lru-cache does not await async callbacks ## New metrics - feat(metrics): LRU cache observability — lru.evictions_total, lru.size_bytes, lru.item_count, lru.fill_ratio, lru.hits_total, lru.misses_total - feat(metrics): Deno runtime memory — deno.memory_usage (rss/heap/external), deno.open_resources by type ## Trace quality & cost reduction - feat(trace): FilteringSpanProcessor — drops framework noise (cache-match, router, fresh, Page.tsx, htmx) and sub-5ms child spans; keeps errors and slow spans; token-bucket rate limiter (maxExportPerSecond) prevents export explosion under bot traffic - feat(sampler): bot detection in head sampler — cf-verified-bot header and UA regex; bots get NOT_RECORD so zero spans are created, zero cost - feat(sampler): built-in exclusions for /_frsh/, _liveness, favicons, static assets — no spans created for infrastructure noise Co-Authored-By: Claude Sonnet 4.6 (1M context) --- blocks/loader.ts | 16 ++- deps.ts | 5 +- observability/otel/config.ts | 80 +++++-------- .../otel/instrumentation/deno-runtime.ts | 103 +++++++--------- observability/otel/metrics.ts | 9 +- observability/otel/processors/filtering.ts | 112 ++++++++++++++++++ observability/otel/resource.ts | 41 +++++++ observability/otel/samplers/urlBased.ts | 43 ++++++- runtime/caches/common.ts | 4 +- runtime/caches/lrucache.ts | 78 +++++++++++- runtime/caches/redis.ts | 15 ++- 11 files changed, 373 insertions(+), 133 deletions(-) create mode 100644 observability/otel/processors/filtering.ts create mode 100644 observability/otel/resource.ts diff --git a/blocks/loader.ts b/blocks/loader.ts index b44e36bf2..bdf5a78e6 100644 --- a/blocks/loader.ts +++ b/blocks/loader.ts @@ -10,10 +10,7 @@ import { HttpError } from "../engine/errors.ts"; import type { ResolverMiddlewareContext } from "../engine/middleware.ts"; import type { State } from "../mod.ts"; import { logger } from "../observability/otel/config.ts"; -import { - meter, - OTEL_ENABLE_EXTRA_METRICS, -} from "../observability/otel/metrics.ts"; +import { meter } from "../observability/otel/metrics.ts"; import { caches, ENABLE_LOADER_CACHE } from "../runtime/caches/mod.ts"; import { inFuture } from "../runtime/caches/utils.ts"; import type { DebugProperties } from "../utils/vary.ts"; @@ -184,6 +181,7 @@ const wrapLoader = ( }: LoaderModule, resolveChain: FieldResolver[], release: DecofileProvider, + loaderKey?: string, ) => { const [cacheMaxAge, mode] = typeof cache === "string" ? [MAX_AGE_S, cache] @@ -204,7 +202,9 @@ const wrapLoader = ( req: Request, ctx: FnContext, ): Promise> => { - const loader = ctx.resolverId || "unknown"; + const loader = (ctx.resolverId && ctx.resolverId !== "obj") + ? ctx.resolverId + : (loaderKey ?? ctx.resolverId ?? "unknown"); const start = performance.now(); let status: "bypass" | "miss" | "stale" | "hit" | undefined; @@ -339,9 +339,7 @@ const wrapLoader = ( return await flights.do(request.url, staleWhileRevalidate); } finally { const dimension = { loader, status }; - if (OTEL_ENABLE_EXTRA_METRICS) { - stats.latency.record(performance.now() - start, dimension); - } + stats.latency.record(performance.now() - start, dimension); ctx.monitoring?.currentSpan?.setDesc(status); } }, @@ -356,7 +354,7 @@ const loaderBlock: Block = { wrapCaughtErrors, (props: TProps, ctx: HttpContext<{ global: any } & RequestState>) => applyProps( - wrapLoader(mod, ctx.resolveChain, ctx.context.state.release), + wrapLoader(mod, ctx.resolveChain, ctx.context.state.release, key), )( props, ctx, diff --git a/deps.ts b/deps.ts index 4ec50a83e..861359f70 100644 --- a/deps.ts +++ b/deps.ts @@ -59,7 +59,7 @@ export type { export * as weakcache from "npm:weak-lru-cache@1.0.0"; export type Handler = Deno.ServeHandler; -export { OTLPTraceExporter } from "npm:@opentelemetry/exporter-trace-otlp-proto@0.52.1"; +export { OTLPTraceExporter } from "npm:@opentelemetry/exporter-trace-otlp-http@0.52.1"; export { Resource } from "npm:@opentelemetry/resources@1.25.1"; export { BatchSpanProcessor, @@ -69,8 +69,11 @@ export { } from "npm:@opentelemetry/sdk-trace-base@1.25.1"; export type { + ReadableSpan, Sampler, SamplingResult, + SpanExporter, + SpanProcessor, } from "npm:@opentelemetry/sdk-trace-base@1.25.1"; export { NodeTracerProvider } from "npm:@opentelemetry/sdk-trace-node@1.25.1"; export { diff --git a/observability/otel/config.ts b/observability/otel/config.ts index 161921563..9f58945c3 100644 --- a/observability/otel/config.ts +++ b/observability/otel/config.ts @@ -1,68 +1,36 @@ import * as log from "@std/log"; import { Logger } from "@std/log/logger"; -import { Context, context } from "../../deco.ts"; -import denoJSON from "../../deno.json" with { type: "json" }; import { BatchSpanProcessor, + diag, + DiagConsoleLogger, + DiagLogLevel, FetchInstrumentation, NodeTracerProvider, opentelemetry, OTLPTraceExporter, ParentBasedSampler, registerInstrumentations, - Resource, - SemanticResourceAttributes, } from "../../deps.ts"; -import { DenoRuntimeInstrumentation } from "./instrumentation/deno-runtime.ts"; + +if (Deno.env.has("OTEL_DIAG")) { + diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG); +} +import "./instrumentation/deno-runtime.ts"; import { DebugSampler } from "./samplers/debug.ts"; import { type SamplingOptions, URLBasedSampler } from "./samplers/urlBased.ts"; - -import { ENV_SITE_NAME } from "../../engine/decofile/constants.ts"; -import { safeImportResolve } from "../../engine/importmap/builder.ts"; +import { FilteringSpanProcessor } from "./processors/filtering.ts"; +import { OTEL_IS_ENABLED, resource } from "./resource.ts"; import { OpenTelemetryHandler } from "./logger.ts"; -const tryGetVersionOf = (pkg: string) => { - try { - const [_, ver] = safeImportResolve(pkg).split("@"); - return ver.substring(0, ver.length - 1); - } catch { - return undefined; - } -}; -const apps_ver = tryGetVersionOf("apps/") ?? - tryGetVersionOf("deco-sites/std/") ?? "_"; - -export const resource = Resource.default().merge( - new Resource({ - [SemanticResourceAttributes.SERVICE_NAME]: Deno.env.get(ENV_SITE_NAME) ?? - "deco", - [SemanticResourceAttributes.SERVICE_VERSION]: - Context.active().deploymentId ?? - Deno.hostname(), - [SemanticResourceAttributes.SERVICE_INSTANCE_ID]: crypto.randomUUID(), - [SemanticResourceAttributes.CLOUD_PROVIDER]: context.platform, - "deco.runtime.version": denoJSON.version, - "deco.apps.version": apps_ver, - [SemanticResourceAttributes.CLOUD_REGION]: Deno.env.get("DENO_REGION") ?? - "unknown", - [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: Deno.env.get( - "DECO_ENV_NAME", - ) - ? `env-${Deno.env.get("DECO_ENV_NAME")}` - : "production", - }), -); - const loggerName = "deco-logger"; -export const OTEL_IS_ENABLED: boolean = Deno.env.has( - "OTEL_EXPORTER_OTLP_ENDPOINT", -); export const logger: Logger = new Logger(loggerName, "INFO", { handlers: [ ...OTEL_IS_ENABLED ? [ new OpenTelemetryHandler("INFO", { resourceAttributes: resource.attributes, + detectResources: false, }), ] : [new log.ConsoleHandler("INFO")], @@ -82,6 +50,7 @@ registerInstrumentations({ // @ts-ignore: no idea why this is failing, but it should work new FetchInstrumentation( { + ignoreUrls: [/127\.0\.0\.1/, /localhost/], applyCustomAttributesOnSpan: ( span, _req, @@ -101,7 +70,6 @@ registerInstrumentations({ }, }, ), - new DenoRuntimeInstrumentation(), ], }); @@ -125,8 +93,9 @@ const parseSamplingOptions = (): SamplingOptions | undefined => { } }; +const samplingOptions = parseSamplingOptions(); const debugSampler = new DebugSampler( - new URLBasedSampler(parseSamplingOptions()), + new URLBasedSampler(samplingOptions), ); const provider = new NodeTracerProvider({ resource: resource, @@ -137,12 +106,27 @@ const provider = new NodeTracerProvider({ ), }); -if (OTEL_IS_ENABLED) { +// Deno 2.2+ has built-in OTel support via OTEL_DENO=true. +// If enabled, it sets its own global TracerProvider and instruments fetch/console, +// which would conflict with our manual setup (double spans, double logs). +// Skip provider.register() when Deno's native OTel is active. +const DENO_OTEL_ACTIVE = Deno.env.get("OTEL_DENO") === "true"; + +if (OTEL_IS_ENABLED && !DENO_OTEL_ACTIVE) { const traceExporter = new OTLPTraceExporter(); - // @ts-ignore: no idea why this is failing, but it should work - provider.addSpanProcessor(new BatchSpanProcessor(traceExporter)); + provider.addSpanProcessor( + new FilteringSpanProcessor( + // @ts-ignore: no idea why this is failing, but it should work + new BatchSpanProcessor(traceExporter), + samplingOptions, + ), + ); provider.register(); + + addEventListener("unload", () => { + provider.shutdown().catch(() => {}); + }); } export const tracer = opentelemetry.trace.getTracer( diff --git a/observability/otel/instrumentation/deno-runtime.ts b/observability/otel/instrumentation/deno-runtime.ts index 3d81e423d..3d720566b 100644 --- a/observability/otel/instrumentation/deno-runtime.ts +++ b/observability/otel/instrumentation/deno-runtime.ts @@ -1,69 +1,54 @@ -/** - * Heavily inspired from unlicensed code: https://github.com/cloudydeno/deno-observability/blob/main/instrumentation/deno-runtime.ts - */ import { type Attributes, - InstrumentationBase, - type InstrumentationConfig, - type ObservableCounter, type ObservableGauge, type ObservableResult, type ObservableUpDownCounter, ValueType, } from "../../../deps.ts"; - -export class DenoRuntimeInstrumentation extends InstrumentationBase { - readonly component: string = "deno-runtime"; - moduleName = this.component; - - constructor(_config?: InstrumentationConfig) { - super("deno-runtime", "0.1.0", { enabled: false }); +import { meter } from "../metrics.ts"; + +const memoryUsage: ObservableGauge = meter + .createObservableGauge("deno.memory_usage", { + unit: "By", + valueType: ValueType.DOUBLE, + description: "Deno process memory usage in bytes.", + }); + +const openResources: ObservableUpDownCounter = meter + .createObservableUpDownCounter("deno.open_resources", { + valueType: ValueType.DOUBLE, + description: "Number of open resources of a particular type.", + }); + +const gatherMemoryUsage = (x: ObservableResult) => { + const usage = Deno.memoryUsage(); + x.observe(usage.rss, { "deno.memory.type": "rss" }); + x.observe(usage.heapTotal, { "deno.memory.type": "heap_total" }); + x.observe(usage.heapUsed, { "deno.memory.type": "heap_used" }); + x.observe(usage.external, { "deno.memory.type": "external" }); +}; + +const gatherOpenResources = (x: ObservableResult) => { + try { + // deno-lint-ignore no-explicit-any + const resources = (Deno as any).resources?.() as + | Record + | undefined; + if (!resources) return; + const counts: Record = {}; + for (const type of Object.values(resources)) { + counts[type] = (counts[type] ?? 0) + 1; + } + for (const [type, count] of Object.entries(counts)) { + x.observe(count, { "deno.resource.type": type }); + } + } catch { + // Deno.resources() may not be available in all environments } +}; - metrics!: { - openResources: ObservableUpDownCounter; - memoryUsage: ObservableGauge; - dispatchedCtr: ObservableCounter; - inflightCtr: ObservableUpDownCounter; - }; - - protected init() {} - - private gatherMemoryUsage = (x: ObservableResult) => { - const usage = Deno.memoryUsage(); - x.observe(usage.rss, { "deno.memory.type": "rss" }); - x.observe(usage.heapTotal, { "deno.memory.type": "heap_total" }); - x.observe(usage.heapUsed, { "deno.memory.type": "heap_used" }); - x.observe(usage.external, { "deno.memory.type": "external" }); - }; +memoryUsage.addCallback(gatherMemoryUsage); +openResources.addCallback(gatherOpenResources); - override enable() { - this.metrics ??= { - openResources: this.meter - .createObservableUpDownCounter("deno.open_resources", { - valueType: ValueType.DOUBLE, - description: "Number of open resources of a particular type.", - }), - memoryUsage: this.meter - .createObservableGauge("deno.memory_usage", { - valueType: ValueType.DOUBLE, - }), - dispatchedCtr: this.meter - .createObservableCounter("deno.ops_dispatched", { - valueType: ValueType.DOUBLE, - description: "Total number of Deno op invocations.", - }), - inflightCtr: this.meter - .createObservableUpDownCounter("deno.ops_inflight", { - valueType: ValueType.DOUBLE, - description: "Number of currently-inflight Deno ops.", - }), - }; - - this.metrics.memoryUsage.addCallback(this.gatherMemoryUsage); - } - - override disable() { - this.metrics.memoryUsage.removeCallback(this.gatherMemoryUsage); - } -} +// Kept for backward compatibility — no longer needed but exported to avoid import errors +export class DenoRuntimeInstrumentation {} diff --git a/observability/otel/metrics.ts b/observability/otel/metrics.ts index b5ea77d7d..2c043d718 100644 --- a/observability/otel/metrics.ts +++ b/observability/otel/metrics.ts @@ -5,13 +5,12 @@ import { PeriodicExportingMetricReader, View, } from "../../deps.ts"; -import { OTEL_IS_ENABLED, resource } from "./config.ts"; +import { OTEL_IS_ENABLED, resource } from "./resource.ts"; export const OTEL_ENABLE_EXTRA_METRICS: boolean = Deno.env.has( "OTEL_ENABLE_EXTRA_METRICS", ); -// 2 minutes. We don't need frequent updates here. export const OTEL_EXPORT_INTERVAL: number = parseInt( Deno.env.get("OTEL_EXPORT_INTERVAL") ?? "60000", 10, @@ -33,7 +32,7 @@ const msBoundaries = [10, 100, 500, 1000, 5000, 10000, 15000]; const sBoundaries = [1, 5, 10, 50]; type IMeter = ReturnType; -const meterProvider: MeterProvider = new MeterProvider({ +export const meterProvider: MeterProvider = new MeterProvider({ resource, views: [ new View({ @@ -60,6 +59,10 @@ if (OTEL_IS_ENABLED) { exportIntervalMillis: OTEL_EXPORT_INTERVAL, }), ); + + addEventListener("unload", () => { + meterProvider.shutdown().catch(() => {}); + }); } export const meter: IMeter = meterProvider.getMeter("deco"); diff --git a/observability/otel/processors/filtering.ts b/observability/otel/processors/filtering.ts new file mode 100644 index 000000000..214b2cd9c --- /dev/null +++ b/observability/otel/processors/filtering.ts @@ -0,0 +1,112 @@ +import { SpanStatusCode } from "../../../deps.ts"; +import type { + ReadableSpan, + SpanProcessor, +} from "../../../deps.ts"; +import type { Context } from "../../../deps.ts"; +import type { SamplingOptions } from "../samplers/urlBased.ts"; + +// Span names that are framework internals with no actionable signal. +const ALWAYS_DROP_NAMES = new Set([ + "cache-match", + "website/functions/requestToParam.ts", + "website/handlers/router.ts", + "website/handlers/fresh.ts", + "website/pages/Page.tsx", + "htmx/sections/htmx.tsx", +]); + +const isRoot = (span: ReadableSpan): boolean => !span.parentSpanId; +const hasError = (span: ReadableSpan): boolean => + span.status.code === SpanStatusCode.ERROR; +const durationMs = (span: ReadableSpan): number => + span.duration[0] * 1000 + span.duration[1] / 1_000_000; +const isSlow = (span: ReadableSpan, thresholdMs: number): boolean => + durationMs(span) >= thresholdMs; + +/** + * Per-span filtering processor. + * + * Rules (in order, applied to every span individually): + * 1. Error spans → always keep + * 2. Root span + rate limit token available → keep + * 3. Root span + no token → drop (prevents export explosion under traffic spikes) + * 4. Known framework noise names → drop + * 5. Slow spans (> slowThresholdMs) → keep + * 6. Fast spans (< 5ms) → drop + * 7. Everything else → keep + * + * Zero memory overhead — no buffering, O(1) per span. + */ +export class FilteringSpanProcessor implements SpanProcessor { + private readonly slowThresholdMs: number; + private readonly maxExportPerSecond: number; + private tokens: number; + private lastRefill: number = Date.now(); + + constructor( + private readonly inner: SpanProcessor, + options: Pick< + SamplingOptions, + "slowThresholdMs" | "maxExportPerSecond" + > = {}, + ) { + this.slowThresholdMs = options.slowThresholdMs ?? 2000; + this.maxExportPerSecond = options.maxExportPerSecond ?? Infinity; + this.tokens = this.maxExportPerSecond; + } + + // @ts-ignore: onStart signature varies across SDK versions + onStart(span: ReadableSpan, ctx: Context): void { + // @ts-ignore + this.inner.onStart(span, ctx); + } + + onEnd(span: ReadableSpan): void { + if (this.shouldKeep(span)) { + this.inner.onEnd(span); + } + } + + private shouldKeep(span: ReadableSpan): boolean { + // Always keep error spans — critical signal regardless of anything else + if (hasError(span)) return true; + + // Root spans: apply rate limiting for normal traffic + if (isRoot(span)) { + if (isFinite(this.maxExportPerSecond) && !this.consumeToken()) { + return false; + } + return true; + } + + // Child spans: filter noise + if (ALWAYS_DROP_NAMES.has(span.name)) return false; + if (isSlow(span, this.slowThresholdMs)) return true; + if (durationMs(span) < 5) return false; + return true; + } + + private consumeToken(): boolean { + const now = Date.now(); + const elapsed = (now - this.lastRefill) / 1000; + this.tokens = Math.min( + this.maxExportPerSecond, + this.tokens + elapsed * this.maxExportPerSecond, + ); + this.lastRefill = now; + if (this.tokens >= 1) { + this.tokens--; + return true; + } + return false; + } + + shutdown(): Promise { + return this.inner.shutdown(); + } + + forceFlush(): Promise { + return this.inner.forceFlush(); + } +} diff --git a/observability/otel/resource.ts b/observability/otel/resource.ts new file mode 100644 index 000000000..e9f9a69d1 --- /dev/null +++ b/observability/otel/resource.ts @@ -0,0 +1,41 @@ +import { Context, context } from "../../deco.ts"; +import denoJSON from "../../deno.json" with { type: "json" }; +import { Resource, SemanticResourceAttributes } from "../../deps.ts"; +import { ENV_SITE_NAME } from "../../engine/decofile/constants.ts"; +import { safeImportResolve } from "../../engine/importmap/builder.ts"; + +const tryGetVersionOf = (pkg: string) => { + try { + const [_, ver] = safeImportResolve(pkg).split("@"); + return ver.substring(0, ver.length - 1); + } catch { + return undefined; + } +}; +const apps_ver = tryGetVersionOf("apps/") ?? + tryGetVersionOf("deco-sites/std/") ?? "_"; + +export const OTEL_IS_ENABLED: boolean = Deno.env.has( + "OTEL_EXPORTER_OTLP_ENDPOINT", +); + +export const resource = Resource.default().merge( + new Resource({ + [SemanticResourceAttributes.SERVICE_NAME]: Deno.env.get(ENV_SITE_NAME) ?? + "deco", + [SemanticResourceAttributes.SERVICE_VERSION]: + Context.active().deploymentId ?? + Deno.hostname(), + [SemanticResourceAttributes.SERVICE_INSTANCE_ID]: crypto.randomUUID(), + [SemanticResourceAttributes.CLOUD_PROVIDER]: context.platform, + "deco.runtime.version": denoJSON.version, + "deco.apps.version": apps_ver, + [SemanticResourceAttributes.CLOUD_REGION]: Deno.env.get("DENO_REGION") ?? + "unknown", + [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: Deno.env.get( + "DECO_ENV_NAME", + ) + ? `env-${Deno.env.get("DECO_ENV_NAME")}` + : "production", + }), +); diff --git a/observability/otel/samplers/urlBased.ts b/observability/otel/samplers/urlBased.ts index 4dc2e2162..1320f7c41 100644 --- a/observability/otel/samplers/urlBased.ts +++ b/observability/otel/samplers/urlBased.ts @@ -14,6 +14,11 @@ export interface URLBasedSampling { export interface SamplingOptions { defaultRatio?: number; byURLPattern?: URLBasedSampling[]; + // Tail sampling options — applied after the trace completes + alwaysExportErrors?: boolean; // default: true + slowThresholdMs?: number; // always export traces slower than this (default: 2000) + exportRatio?: number; // fraction of non-error/non-slow traces to export (default: 1) + maxExportPerSecond?: number; // hard cap on normal traces/s — errors/slow bypass this } interface CompiledURLBasedSampling { @@ -21,6 +26,30 @@ interface CompiledURLBasedSampling { matches(urlSegments: string[]): boolean; } +// Paths that are always excluded regardless of sampling config. +// These are noise: assets, health checks, and framework internals. +const ALWAYS_EXCLUDE = [ + /^\/_frsh\//, + /^\/deco\/_liveness$/, + /^\/favicon\./, + /\.(js|css|map|woff2?|ttf|eot|ico|png|jpg|jpeg|svg|webp|gif)$/, +]; + +const isExcluded = (pathname: string): boolean => + ALWAYS_EXCLUDE.some((re) => re.test(pathname)); + +// Bot detection — zero spans for bot traffic. +// Bots pollute cache with unique keys and inflate trace volume without signal. +const isBot = (req: Request): boolean => { + // Cloudflare verified bot header — most reliable + if (req.headers.get("cf-verified-bot") === "true") return true; + const ua = req.headers.get("user-agent") ?? ""; + if (!ua) return false; + // Known monitoring robots and crawlers + return /bot|crawl|spider|slurp|monitoring|uptimerobot|pingdom|datadog|newrelic|googlebot|bingbot|yandex/i + .test(ua); +}; + export class URLBasedSampler implements Sampler { protected compiledOptions: CompiledURLBasedSampling[]; constructor(protected options?: SamplingOptions) { @@ -44,9 +73,17 @@ export class URLBasedSampler implements Sampler { context: Context, ): SamplingResult { const req = context.getValue(REQUEST_CONTEXT_KEY) as Request; - const pathnameSegments = req?.url - ? new URL(req.url).pathname.split("/") - : undefined; + const url = req?.url ? new URL(req.url) : undefined; + + if (url && isExcluded(url.pathname)) { + return { decision: SamplingDecision.NOT_RECORD }; + } + + if (req && isBot(req)) { + return { decision: SamplingDecision.NOT_RECORD }; + } + + const pathnameSegments = url?.pathname.split("/"); const defaultRatio = this.options?.defaultRatio ?? 0; const ratio = pathnameSegments ? this.compiledOptions.find((opt) => opt.matches(pathnameSegments)) diff --git a/runtime/caches/common.ts b/runtime/caches/common.ts index 915037f8d..d4e46985c 100644 --- a/runtime/caches/common.ts +++ b/runtime/caches/common.ts @@ -1,4 +1,4 @@ -import { type Exception, ValueType } from "../../deps.ts"; +import { context, type Exception, ValueType } from "../../deps.ts"; import { tracer } from "../../observability/otel/config.ts"; import { meter } from "../../observability/otel/metrics.ts"; import { inFuture } from "./utils.ts"; @@ -39,7 +39,7 @@ export const withInstrumentation = ( match: async (req, opts) => { const span = tracer.startSpan("cache-match", { attributes: { engine }, - }); + }, context.active()); try { const isMatch = await cacheImpl.match(req, opts); //there is an edge case where there is no expires header, but technically our loader always sets it diff --git a/runtime/caches/lrucache.ts b/runtime/caches/lrucache.ts index f564cb84c..982aea8b1 100644 --- a/runtime/caches/lrucache.ts +++ b/runtime/caches/lrucache.ts @@ -1,4 +1,7 @@ import { LRUCache } from "npm:lru-cache@10.2.0"; +import { type ObservableResult, ValueType } from "../../deps.ts"; +import { logger } from "../../observability/otel/config.ts"; +import { meter } from "../../observability/otel/metrics.ts"; import { assertCanBeCached, assertNoOptions, @@ -24,14 +27,76 @@ const STALE_TTL_PERIOD = parseInt( Deno.env.get("STALE_TTL_PERIOD") ?? "30000", ); -const cacheOptions = (cache: Cache) => ( +const lruEvictions = meter.createCounter("lru.evictions_total", { + unit: "1", + valueType: ValueType.DOUBLE, + description: "Number of LRU cache evictions.", +}); + +const lruSizeGauge = meter.createObservableGauge("lru.size_bytes", { + unit: "By", + valueType: ValueType.DOUBLE, + description: "Current LRU cache size in bytes.", +}); + +const lruItemsGauge = meter.createObservableGauge("lru.item_count", { + unit: "1", + valueType: ValueType.DOUBLE, + description: "Current number of items in the LRU cache.", +}); + +const lruFillRatioGauge = meter.createObservableGauge("lru.fill_ratio", { + valueType: ValueType.DOUBLE, + description: "LRU cache fill ratio (0-1). Above 0.9 eviction pressure is high.", +}); + +const lruHits = meter.createCounter("lru.hits_total", { + unit: "1", + valueType: ValueType.DOUBLE, + description: "Number of LRU cache hits.", +}); + +const lruMisses = meter.createCounter("lru.misses_total", { + unit: "1", + valueType: ValueType.DOUBLE, + description: "Number of LRU cache misses.", +}); + +// Track active LRU instances per cache name for observable gauges +const lruInstances = new Map>(); + +lruSizeGauge.addCallback((result: ObservableResult) => { + for (const [cacheName, cache] of lruInstances) { + result.observe(cache.calculatedSize ?? 0, { cache: cacheName }); + } +}); + +lruItemsGauge.addCallback((result: ObservableResult) => { + for (const [cacheName, cache] of lruInstances) { + result.observe(cache.size, { cache: cacheName }); + } +}); + +lruFillRatioGauge.addCallback((result: ObservableResult) => { + for (const [cacheName, cache] of lruInstances) { + const ratio = CACHE_MAX_SIZE > 0 + ? (cache.calculatedSize ?? 0) / CACHE_MAX_SIZE + : 0; + result.observe(ratio, { cache: cacheName }); + } +}); + +const cacheOptions = (cache: Cache, cacheName: string) => ( { max: CACHE_MAX_ITEMS, maxSize: CACHE_MAX_SIZE, ttlAutopurge: CACHE_TTL_AUTOPURGE, ttlResolution: CACHE_TTL_RESOLUTION, - dispose: async (_value: boolean, key: string) => { - await cache.delete(key); + dispose: (_value: boolean, key: string) => { + lruEvictions.add(1, { cache: cacheName }); + cache.delete(key).catch((err) => { + logger.warn(`lru dispose failed to delete key from backing cache: ${err}`, { cache: cacheName }); + }); }, } ); @@ -39,8 +104,9 @@ const cacheOptions = (cache: Cache) => ( function createLruCacheStorage(cacheStorageInner: CacheStorage): CacheStorage { const caches = createBaseCacheStorage( cacheStorageInner, - (_cacheName, cacheInner, requestURLSHA1) => { - const fileCache = new LRUCache(cacheOptions(cacheInner)); + (cacheName, cacheInner, requestURLSHA1) => { + const fileCache = new LRUCache(cacheOptions(cacheInner, cacheName)); + lruInstances.set(cacheName, fileCache); return Promise.resolve({ ...baseCache, delete: async ( @@ -58,9 +124,11 @@ function createLruCacheStorage(cacheStorageInner: CacheStorage): CacheStorage { assertNoOptions(options); const cacheKey = await requestURLSHA1(request); if (fileCache.has(cacheKey)) { + lruHits.add(1, { cache: cacheName }); const result = cacheInner.match(cacheKey); return result; } + lruMisses.add(1, { cache: cacheName }); return undefined; }, put: async ( diff --git a/runtime/caches/redis.ts b/runtime/caches/redis.ts index ffb816e0c..f8d2e75cb 100644 --- a/runtime/caches/redis.ts +++ b/runtime/caches/redis.ts @@ -1,3 +1,4 @@ +import { logger } from "../../observability/otel/config.ts"; import { assertCanBeCached, assertNoOptions, @@ -105,7 +106,10 @@ export function create(redis: RedisConnection | null, namespace: string) { COMMAND_TIMEOUT, ) ) - .catch(() => 0); + .catch((err) => { + logger.warn(`redis cache delete error: ${err}`); + return 0; + }); return result > 0; }, @@ -129,7 +133,10 @@ export function create(redis: RedisConnection | null, namespace: string) { return deserialize(result); }) - .catch(() => undefined); + .catch((err) => { + logger.warn(`redis cache match error: ${err}`); + return undefined; + }); return result; }, @@ -153,7 +160,9 @@ export function create(redis: RedisConnection | null, namespace: string) { COMMAND_TIMEOUT, ) ) - .catch(() => {}); + .catch((err) => { + logger.warn(`redis cache put error: ${err}`); + }); }, }; } From ae8a99709e0906ec04de4d20920bd74d48759a65 Mon Sep 17 00:00:00 2001 From: decobot Date: Thu, 19 Mar 2026 23:30:37 -0300 Subject: [PATCH 2/6] feat(o11y): Redis health metrics, startup duration, remove K_REVISION noise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - redis.errors_total: counter on connection errors - redis.reconnections_total: counter on reconnect attempts - redis.connected: gauge 1/0 tracking live connection state - instance_startup_duration_ms: histogram measuring time from instance start to readyAt — key signal for slow cold start diagnosis - Remove logger.warn for missing K_REVISION — fires on every cacheable request in non-Cloud Run envs, was pure noise with no action to take Co-Authored-By: Claude Sonnet 4.6 (1M context) --- blocks/loader.ts | 1 - engine/manifest/manifest.ts | 11 ++++++++++- runtime/caches/redis.ts | 37 ++++++++++++++++++++++++++++++++++++- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/blocks/loader.ts b/blocks/loader.ts index bdf5a78e6..6cf0370b1 100644 --- a/blocks/loader.ts +++ b/blocks/loader.ts @@ -267,7 +267,6 @@ const wrapLoader = ( (await release?.revision() ?? undefined); if (!revisionID) { - logger.warn(`Could not get K_REVISION`); timing?.end(); return await handler(props, req, ctx); } diff --git a/engine/manifest/manifest.ts b/engine/manifest/manifest.ts index 93c722c65..3a0bbe197 100644 --- a/engine/manifest/manifest.ts +++ b/engine/manifest/manifest.ts @@ -1,5 +1,7 @@ // deno-lint-ignore-file no-explicit-any import { parse } from "@std/flags"; +import { ValueType } from "../../deps.ts"; +import { meter } from "../../observability/otel/metrics.ts"; import { gray, green, red } from "@std/fmt/colors"; import { type AppManifest, @@ -284,8 +286,15 @@ export const fulfillContext = async < ctx.site = currentSite!; const provider = release ?? await getProvider(); const runtimePromise = deferred>(); + const startedAt = ctx.instance.startedAt; ctx.runtime = runtimePromise.finally(() => { - ctx.instance.readyAt = new Date(); + const readyAt = new Date(); + ctx.instance.readyAt = readyAt; + meter.createHistogram("instance_startup_duration_ms", { + description: "Time from instance start to first request readiness.", + unit: "ms", + valueType: ValueType.DOUBLE, + }).record(readyAt.getTime() - startedAt.getTime()); }); ctx.release = provider; diff --git a/runtime/caches/redis.ts b/runtime/caches/redis.ts index f8d2e75cb..411f7ee52 100644 --- a/runtime/caches/redis.ts +++ b/runtime/caches/redis.ts @@ -1,4 +1,6 @@ import { logger } from "../../observability/otel/config.ts"; +import { ValueType } from "../../deps.ts"; +import { meter } from "../../observability/otel/metrics.ts"; import { assertCanBeCached, assertNoOptions, @@ -8,6 +10,29 @@ import { } from "./utils.ts"; import { Redis } from "npm:ioredis@^5.10.1"; +const redisErrors = meter.createCounter("redis.errors_total", { + unit: "1", + valueType: ValueType.DOUBLE, + description: "Number of Redis connection or command errors.", +}); + +const redisReconnections = meter.createCounter("redis.reconnections_total", { + unit: "1", + valueType: ValueType.DOUBLE, + description: "Number of Redis reconnection attempts.", +}); + +const redisConnected = meter.createObservableGauge("redis.connected", { + valueType: ValueType.DOUBLE, + description: "1 when Redis is connected, 0 otherwise.", +}); + +let redisConnectionState = 0; +// deno-lint-ignore no-explicit-any +redisConnected.addCallback((result: any) => { + result.observe(redisConnectionState); +}); + const CONNECTION_TIMEOUT = 500; const COMMAND_TIMEOUT = 500; const TTL = parseInt(Deno.env.get("LOADER_CACHE_REDIS_TTL_SECONDS") || "180"); // 3 minutes @@ -173,7 +198,17 @@ export const caches: CacheStorage = { if (isAvailable) { redis = createRedisClient(); - redis.on("error", () => {}); + redis.on("error", () => { + redisConnectionState = 0; + redisErrors.add(1); + }); + redis.on("connect", () => { + redisConnectionState = 1; + }); + redis.on("reconnecting", () => { + redisConnectionState = 0; + redisReconnections.add(1); + }); await wait(CONNECTION_TIMEOUT); } From 61ed616bb65a597e3aa75942b45afdf1d86a912d Mon Sep 17 00:00:00 2001 From: decobot Date: Fri, 20 Mar 2026 00:07:44 -0300 Subject: [PATCH 3/6] =?UTF-8?q?fix(trace):=20FilteringSpanProcessor=20now?= =?UTF-8?q?=20works=20=E2=80=94=20use=20provider.getTracer=20directly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: opentelemetry.trace.getTracer returns a ProxyTracer that delegates to the *current* global TracerProvider. Another provider was overriding the global after our provider.register(), causing all spans to bypass FilteringSpanProcessor entirely. Fix: use provider.getTracer("deco-tracer") directly when OTel is enabled, bypassing the global API. FilteringSpanProcessor.onStart/onEnd are now called for every span. Also filter loopback fetch spans (POST/GET http://127.0.0.1 or localhost) which FetchInstrumentation creates for internal /deco/render calls — these produce high-cardinality span names and have no signal value. Verified: cache-match, router.ts, fresh.ts, htmx.tsx, Page.tsx no longer appear in exported traces. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- observability/otel/config.ts | 10 +++++++--- observability/otel/processors/filtering.ts | 8 +++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/observability/otel/config.ts b/observability/otel/config.ts index 9f58945c3..0629f1912 100644 --- a/observability/otel/config.ts +++ b/observability/otel/config.ts @@ -112,6 +112,7 @@ const provider = new NodeTracerProvider({ // Skip provider.register() when Deno's native OTel is active. const DENO_OTEL_ACTIVE = Deno.env.get("OTEL_DENO") === "true"; + if (OTEL_IS_ENABLED && !DENO_OTEL_ACTIVE) { const traceExporter = new OTLPTraceExporter(); provider.addSpanProcessor( @@ -129,9 +130,12 @@ if (OTEL_IS_ENABLED && !DENO_OTEL_ACTIVE) { }); } -export const tracer = opentelemetry.trace.getTracer( - "deco-tracer", -); +// Use provider.getTracer directly (not via global API) to ensure spans +// always go through our FilteringSpanProcessor, even if another TracerProvider +// overrides the global after our provider.register() call. +export const tracer = OTEL_IS_ENABLED && !DENO_OTEL_ACTIVE + ? provider.getTracer("deco-tracer") + : opentelemetry.trace.getTracer("deco-tracer"); export const tracerIsRecording = () => opentelemetry.trace.getActiveSpan()?.isRecording() ?? false; diff --git a/observability/otel/processors/filtering.ts b/observability/otel/processors/filtering.ts index 214b2cd9c..10f9a3778 100644 --- a/observability/otel/processors/filtering.ts +++ b/observability/otel/processors/filtering.ts @@ -16,6 +16,11 @@ const ALWAYS_DROP_NAMES = new Set([ "htmx/sections/htmx.tsx", ]); +// Drop internal loopback fetch spans (FetchInstrumentation for /deco/render calls). +// These are server-to-self requests that create high-cardinality span names. +const isLoopbackFetch = (name: string): boolean => + /^(GET|POST|PUT|DELETE|PATCH)\s+https?:\/\/(127\.0\.0\.1|localhost)/.test(name); + const isRoot = (span: ReadableSpan): boolean => !span.parentSpanId; const hasError = (span: ReadableSpan): boolean => span.status.code === SpanStatusCode.ERROR; @@ -72,8 +77,9 @@ export class FilteringSpanProcessor implements SpanProcessor { // Always keep error spans — critical signal regardless of anything else if (hasError(span)) return true; - // Root spans: apply rate limiting for normal traffic + // Root spans: drop known noise even if appearing as root (context propagation issues) if (isRoot(span)) { + if (ALWAYS_DROP_NAMES.has(span.name) || isLoopbackFetch(span.name)) return false; if (isFinite(this.maxExportPerSecond) && !this.consumeToken()) { return false; } From a5060addecaa568ba641aec3bd4504ca1baa5481 Mon Sep 17 00:00:00 2001 From: decobot Date: Fri, 20 Mar 2026 07:27:41 -0300 Subject: [PATCH 4/6] fix(sampler): DebugSampler was forcing 100% sampling on all requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit state.correlationId is always set (UUID generated per request), so the ??fallback made DebugSampler return RECORD_AND_SAMPLED for every request, completely nullifying the defaultRatio in URLBasedSampler. In production with defaultRatio:0.01, all requests were being sampled — this was masked before because the sampler itself was bypassed (the global TracerProvider override bug, fixed in the previous commit). Now only force RECORD_AND_SAMPLED when: - x-trace-debug-id header is explicitly set (programmatic debug) - state.debugEnabled is true (?__d= param or debug cookie) The ?__d and x-trace-debug-id debug workflows are unchanged. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- observability/otel/samplers/debug.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/observability/otel/samplers/debug.ts b/observability/otel/samplers/debug.ts index 52446a69b..68cf46de4 100644 --- a/observability/otel/samplers/debug.ts +++ b/observability/otel/samplers/debug.ts @@ -23,8 +23,14 @@ export class DebugSampler implements Sampler { const req = context.getValue(REQUEST_CONTEXT_KEY) as Request; const state = context.getValue(STATE_CONTEXT_KEY) as DecoState; + // Only force sampling when explicitly requested: + // 1. via x-trace-debug-id header (programmatic debug) + // 2. via ?__d= query param or debug cookie (debugEnabled = true) + // Normal requests always have a correlationId (UUID) — do NOT use it here, + // otherwise every request would be forced to RECORD_AND_SAMPLED, + // nullifying the defaultRatio in URLBasedSampler. const correlationId = req?.headers?.get?.("x-trace-debug-id") ?? - state?.correlationId; + (state?.debugEnabled ? state?.correlationId : undefined); if (correlationId) { return { decision: SamplingDecision.RECORD_AND_SAMPLED, From 3b6c792878c0c1beb558f8f5530443eee4f8bd56 Mon Sep 17 00:00:00 2001 From: decobot Date: Fri, 20 Mar 2026 07:39:57 -0300 Subject: [PATCH 5/6] fix lrucache counter evictions --- runtime/caches/lrucache.ts | 6 ++++-- runtime/caches/redis.ts | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/runtime/caches/lrucache.ts b/runtime/caches/lrucache.ts index 982aea8b1..d7bbad933 100644 --- a/runtime/caches/lrucache.ts +++ b/runtime/caches/lrucache.ts @@ -92,8 +92,10 @@ const cacheOptions = (cache: Cache, cacheName: string) => ( maxSize: CACHE_MAX_SIZE, ttlAutopurge: CACHE_TTL_AUTOPURGE, ttlResolution: CACHE_TTL_RESOLUTION, - dispose: (_value: boolean, key: string) => { - lruEvictions.add(1, { cache: cacheName }); + dispose: (_value: boolean, key: string, reason: string) => { + if (reason === "evict") { + lruEvictions.add(1, { cache: cacheName }); + } cache.delete(key).catch((err) => { logger.warn(`lru dispose failed to delete key from backing cache: ${err}`, { cache: cacheName }); }); diff --git a/runtime/caches/redis.ts b/runtime/caches/redis.ts index 411f7ee52..4c1f978d2 100644 --- a/runtime/caches/redis.ts +++ b/runtime/caches/redis.ts @@ -133,6 +133,7 @@ export function create(redis: RedisConnection | null, namespace: string) { ) .catch((err) => { logger.warn(`redis cache delete error: ${err}`); + redisErrors.add(1); return 0; }); @@ -160,6 +161,7 @@ export function create(redis: RedisConnection | null, namespace: string) { }) .catch((err) => { logger.warn(`redis cache match error: ${err}`); + redisErrors.add(1); return undefined; }); @@ -187,6 +189,7 @@ export function create(redis: RedisConnection | null, namespace: string) { ) .catch((err) => { logger.warn(`redis cache put error: ${err}`); + redisErrors.add(1); }); }, }; From 36f17b82e89453f1cc96061b833b3fef6cb3c6ec Mon Sep 17 00:00:00 2001 From: decobot Date: Fri, 20 Mar 2026 07:58:58 -0300 Subject: [PATCH 6/6] =?UTF-8?q?fix(o11y):=20address=20code=20review=20?= =?UTF-8?q?=E2=80=94=20metrics=20accuracy=20and=20deno=20fmt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - loader.ts: set status="bypass" on !revisionID early return so finally block never emits resolver_latency with status:undefined - redis.ts: increment redis.errors_total on command errors (delete/match/put); track connected clients via Set so redis.connected gauge is correct when multiple namespaces open separate clients - lrucache.ts: only count evict reason in lru.evictions_total (not TTL/delete) - filtering.ts, lrucache.ts: deno fmt Co-Authored-By: Claude Sonnet 4.6 (1M context) --- blocks/loader.ts | 2 ++ observability/otel/processors/filtering.ts | 13 +++++++------ runtime/caches/lrucache.ts | 8 ++++++-- runtime/caches/redis.ts | 12 ++++++------ 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/blocks/loader.ts b/blocks/loader.ts index 6cf0370b1..84a088f0f 100644 --- a/blocks/loader.ts +++ b/blocks/loader.ts @@ -268,6 +268,8 @@ const wrapLoader = ( if (!revisionID) { timing?.end(); + status = "bypass"; + stats.cache.add(1, { status, loader }); return await handler(props, req, ctx); } diff --git a/observability/otel/processors/filtering.ts b/observability/otel/processors/filtering.ts index 10f9a3778..c498a7e51 100644 --- a/observability/otel/processors/filtering.ts +++ b/observability/otel/processors/filtering.ts @@ -1,8 +1,5 @@ import { SpanStatusCode } from "../../../deps.ts"; -import type { - ReadableSpan, - SpanProcessor, -} from "../../../deps.ts"; +import type { ReadableSpan, SpanProcessor } from "../../../deps.ts"; import type { Context } from "../../../deps.ts"; import type { SamplingOptions } from "../samplers/urlBased.ts"; @@ -19,7 +16,9 @@ const ALWAYS_DROP_NAMES = new Set([ // Drop internal loopback fetch spans (FetchInstrumentation for /deco/render calls). // These are server-to-self requests that create high-cardinality span names. const isLoopbackFetch = (name: string): boolean => - /^(GET|POST|PUT|DELETE|PATCH)\s+https?:\/\/(127\.0\.0\.1|localhost)/.test(name); + /^(GET|POST|PUT|DELETE|PATCH)\s+https?:\/\/(127\.0\.0\.1|localhost)/.test( + name, + ); const isRoot = (span: ReadableSpan): boolean => !span.parentSpanId; const hasError = (span: ReadableSpan): boolean => @@ -79,7 +78,9 @@ export class FilteringSpanProcessor implements SpanProcessor { // Root spans: drop known noise even if appearing as root (context propagation issues) if (isRoot(span)) { - if (ALWAYS_DROP_NAMES.has(span.name) || isLoopbackFetch(span.name)) return false; + if (ALWAYS_DROP_NAMES.has(span.name) || isLoopbackFetch(span.name)) { + return false; + } if (isFinite(this.maxExportPerSecond) && !this.consumeToken()) { return false; } diff --git a/runtime/caches/lrucache.ts b/runtime/caches/lrucache.ts index d7bbad933..d7d305cbd 100644 --- a/runtime/caches/lrucache.ts +++ b/runtime/caches/lrucache.ts @@ -47,7 +47,8 @@ const lruItemsGauge = meter.createObservableGauge("lru.item_count", { const lruFillRatioGauge = meter.createObservableGauge("lru.fill_ratio", { valueType: ValueType.DOUBLE, - description: "LRU cache fill ratio (0-1). Above 0.9 eviction pressure is high.", + description: + "LRU cache fill ratio (0-1). Above 0.9 eviction pressure is high.", }); const lruHits = meter.createCounter("lru.hits_total", { @@ -97,7 +98,10 @@ const cacheOptions = (cache: Cache, cacheName: string) => ( lruEvictions.add(1, { cache: cacheName }); } cache.delete(key).catch((err) => { - logger.warn(`lru dispose failed to delete key from backing cache: ${err}`, { cache: cacheName }); + logger.warn( + `lru dispose failed to delete key from backing cache: ${err}`, + { cache: cacheName }, + ); }); }, } diff --git a/runtime/caches/redis.ts b/runtime/caches/redis.ts index 4c1f978d2..9fefaab93 100644 --- a/runtime/caches/redis.ts +++ b/runtime/caches/redis.ts @@ -24,13 +24,13 @@ const redisReconnections = meter.createCounter("redis.reconnections_total", { const redisConnected = meter.createObservableGauge("redis.connected", { valueType: ValueType.DOUBLE, - description: "1 when Redis is connected, 0 otherwise.", + description: "Number of Redis clients currently connected.", }); -let redisConnectionState = 0; +const connectedClientSet = new Set(); // deno-lint-ignore no-explicit-any redisConnected.addCallback((result: any) => { - result.observe(redisConnectionState); + result.observe(connectedClientSet.size); }); const CONNECTION_TIMEOUT = 500; @@ -202,14 +202,14 @@ export const caches: CacheStorage = { if (isAvailable) { redis = createRedisClient(); redis.on("error", () => { - redisConnectionState = 0; + connectedClientSet.delete(redis!); redisErrors.add(1); }); redis.on("connect", () => { - redisConnectionState = 1; + connectedClientSet.add(redis!); }); redis.on("reconnecting", () => { - redisConnectionState = 0; + connectedClientSet.delete(redis!); redisReconnections.add(1); }); await wait(CONNECTION_TIMEOUT);