-
Notifications
You must be signed in to change notification settings - Fork 403
[INC-669] Add tail sampling implemented in OTEL SDK #2164
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
66b90b1
c3c0867
461ecba
d1e3e68
c61247d
0c2097e
e7771d8
b1bf52e
519ebe4
81d6231
9779ee0
afbf49a
007b6e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4,15 +4,30 @@ import { | |||||
| ATTR_SERVICE_INSTANCE_ID, | ||||||
| } from "@opentelemetry/semantic-conventions/incubating"; | ||||||
| import { createBatchSpanProcessor } from "@saleor/apps-otel/src/batch-span-processor-factory"; | ||||||
| import { DeferredSampler } from "@saleor/apps-otel/src/deferred-sampler"; | ||||||
| import { createHttpInstrumentation } from "@saleor/apps-otel/src/http-instrumentation-factory"; | ||||||
| import { ObservabilityAttributes } from "@saleor/apps-otel/src/observability-attributes"; | ||||||
| import { createServiceInstanceId } from "@saleor/apps-otel/src/service-instance-id-factory"; | ||||||
| import { TailSamplingProcessor } from "@saleor/apps-otel/src/tail-sampling-processor"; | ||||||
| import { registerOTel } from "@vercel/otel"; | ||||||
|
|
||||||
| import pkg from "../../package.json"; | ||||||
|
|
||||||
| const batchProcessor = createBatchSpanProcessor({ | ||||||
| accessToken: process.env.OTEL_ACCESS_TOKEN, | ||||||
| }); | ||||||
|
|
||||||
| const tailSamplingProcessor = new TailSamplingProcessor({ | ||||||
| processor: batchProcessor, | ||||||
| slowThresholdMs: 5000, | ||||||
| exportErrors: true, | ||||||
| exportSlowSpans: true, | ||||||
| }); | ||||||
|
|
||||||
| registerOTel({ | ||||||
| serviceName: process.env.OTEL_SERVICE_NAME, | ||||||
| // Note: DeferredSampler + TailSamplingProcessor must be used together | ||||||
| traceSampler: new DeferredSampler(), | ||||||
| attributes: { | ||||||
| [ATTR_SERVICE_VERSION]: pkg.version, | ||||||
| [ATTR_DEPLOYMENT_ENVIRONMENT_NAME]: process.env.ENV, | ||||||
|
|
@@ -23,10 +38,6 @@ registerOTel({ | |||||
| env: undefined, | ||||||
| [ObservabilityAttributes.VERCEL_ENV]: process.env.VERCEL_ENV, | ||||||
| }, | ||||||
| spanProcessors: [ | ||||||
| createBatchSpanProcessor({ | ||||||
| accessToken: process.env.OTEL_ACCESS_TOKEN, | ||||||
| }), | ||||||
| ], | ||||||
| instrumentations: [createHttpInstrumentation()], | ||||||
| spanProcessors: [tailSamplingProcessor], | ||||||
| instrumentations: [createHttpInstrumentation({ usingDeferredSpanProcessor: true })], | ||||||
|
||||||
| instrumentations: [createHttpInstrumentation({ usingDeferredSpanProcessor: true })], | |
| instrumentations: [createHttpInstrumentation({ usingDeferredSampler: true })], |
Copilot
AI
Dec 12, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The TailSamplingFetchInstrumentation is not being added to the instrumentations array. According to the documentation in fetch-instrumentation.ts, this instrumentation was created specifically to support tail sampling by recording span data for non-sampled spans. Without it, fetch calls won't be properly instrumented for tail sampling. The instrumentations array should include an instance of TailSamplingFetchInstrumentation.
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -5,7 +5,9 @@ | |||||||||||||||||
| "scripts": { | ||||||||||||||||||
| "check-types": "tsc", | ||||||||||||||||||
| "lint": "eslint .", | ||||||||||||||||||
| "lint:fix": "eslint --fix ." | ||||||||||||||||||
| "lint:fix": "eslint --fix .", | ||||||||||||||||||
| "test": "vitest", | ||||||||||||||||||
| "test:ci": "vitest run --coverage" | ||||||||||||||||||
|
Comment on lines
+9
to
+10
|
||||||||||||||||||
| }, | ||||||||||||||||||
| "dependencies": { | ||||||||||||||||||
| "@opentelemetry/exporter-metrics-otlp-http": "catalog:", | ||||||||||||||||||
|
|
@@ -18,24 +20,31 @@ | |||||||||||||||||
| }, | ||||||||||||||||||
| "devDependencies": { | ||||||||||||||||||
| "@opentelemetry/api": "catalog:", | ||||||||||||||||||
| "@opentelemetry/instrumentation": "catalog:", | ||||||||||||||||||
| "@opentelemetry/sdk-metrics": "catalog:", | ||||||||||||||||||
| "@opentelemetry/sdk-trace-node": "catalog:", | ||||||||||||||||||
| "@opentelemetry/semantic-conventions": "catalog:", | ||||||||||||||||||
| "@saleor/app-sdk": "link:../../node_modules/@saleor/app-sdk", | ||||||||||||||||||
| "@saleor/eslint-config-apps": "workspace:*", | ||||||||||||||||||
| "@saleor/typescript-config-apps": "workspace:*", | ||||||||||||||||||
| "@types/node": "catalog:", | ||||||||||||||||||
| "@vercel/otel": "catalog:", | ||||||||||||||||||
| "@vitest/coverage-v8": "catalog:", | ||||||||||||||||||
| "eslint": "catalog:", | ||||||||||||||||||
| "next": "catalog:", | ||||||||||||||||||
| "typescript": "catalog:", | ||||||||||||||||||
| "urql": "catalog:" | ||||||||||||||||||
| "urql": "catalog:", | ||||||||||||||||||
| "vite": "catalog:", | ||||||||||||||||||
| "vitest": "catalog:" | ||||||||||||||||||
| }, | ||||||||||||||||||
| "peerDependencies": { | ||||||||||||||||||
| "@opentelemetry/api": "catalog:", | ||||||||||||||||||
| "@opentelemetry/instrumentation": "catalog:", | ||||||||||||||||||
| "@opentelemetry/sdk-metrics": "catalog:", | ||||||||||||||||||
| "@opentelemetry/sdk-trace-node": "catalog:", | ||||||||||||||||||
| "@opentelemetry/semantic-conventions": "catalog:", | ||||||||||||||||||
| "@saleor/app-sdk": "catalog:", | ||||||||||||||||||
| "@vercel/otel": "catalog:", | ||||||||||||||||||
| "next": "catalog:", | ||||||||||||||||||
| "urql": "catalog:" | ||||||||||||||||||
|
Comment on lines
+47
to
49
|
||||||||||||||||||
| "@vercel/otel": "catalog:", | |
| "next": "catalog:", | |
| "urql": "catalog:" | |
| "next": "catalog:", | |
| "urql": "catalog:" | |
| }, | |
| "optionalDependencies": { | |
| "@vercel/otel": "catalog:" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| import { ROOT_CONTEXT, SpanKind, trace, TraceFlags } from "@opentelemetry/api"; | ||
| import { SamplingDecision } from "@opentelemetry/sdk-trace-node"; | ||
| import { describe, expect, it } from "vitest"; | ||
|
|
||
| import { DeferredSampler, SALEOR_SAMPLING_DECISION_ATTR } from "./deferred-sampler"; | ||
|
|
||
| describe("DeferredSampler", () => { | ||
| const traceId = "0af7651916cd43dd8448eb211c80319c"; | ||
| const spanName = "test-span"; | ||
| const spanKind = SpanKind.SERVER; | ||
| const attributes = {}; | ||
| const links: never[] = []; | ||
|
|
||
| describe("when parent is sampled", () => { | ||
| it("should return RECORD_AND_SAMPLED decision", () => { | ||
| const sampler = new DeferredSampler(); | ||
| const parentContext = trace.setSpanContext(ROOT_CONTEXT, { | ||
| traceId, | ||
| spanId: "b7ad6b7169203331", | ||
| traceFlags: TraceFlags.SAMPLED, | ||
| isRemote: true, | ||
| }); | ||
|
|
||
| const result = sampler.shouldSample( | ||
| parentContext, | ||
| traceId, | ||
| spanName, | ||
| spanKind, | ||
| attributes, | ||
| links, | ||
| ); | ||
|
|
||
| expect(result.decision).toBe(SamplingDecision.RECORD_AND_SAMPLED); | ||
| expect(result.attributes?.[SALEOR_SAMPLING_DECISION_ATTR]).toBe("sampled"); | ||
| }); | ||
| }); | ||
|
|
||
| describe("when parent is not sampled", () => { | ||
| it("should return RECORD decision (defer to TailSamplingProcessor)", () => { | ||
| const sampler = new DeferredSampler(); | ||
| const parentContext = trace.setSpanContext(ROOT_CONTEXT, { | ||
| traceId, | ||
| spanId: "b7ad6b7169203331", | ||
| traceFlags: TraceFlags.NONE, | ||
| isRemote: true, | ||
| }); | ||
|
|
||
| const result = sampler.shouldSample( | ||
| parentContext, | ||
| traceId, | ||
| spanName, | ||
| spanKind, | ||
| attributes, | ||
| links, | ||
| ); | ||
|
|
||
| expect(result.decision).toBe(SamplingDecision.RECORD); | ||
| expect(result.attributes?.[SALEOR_SAMPLING_DECISION_ATTR]).toBe("not_sampled"); | ||
| }); | ||
| }); | ||
|
|
||
| describe("when there is no parent (root span)", () => { | ||
| it("should return RECORD decision (defer to TailSamplingProcessor)", () => { | ||
| const sampler = new DeferredSampler(); | ||
|
|
||
| const result = sampler.shouldSample( | ||
| ROOT_CONTEXT, | ||
| traceId, | ||
| spanName, | ||
| spanKind, | ||
| attributes, | ||
| links, | ||
| ); | ||
|
|
||
| expect(result.decision).toBe(SamplingDecision.RECORD); | ||
| expect(result.attributes?.[SALEOR_SAMPLING_DECISION_ATTR]).toBe("none"); | ||
| }); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,66 @@ | ||||||||||||||||||||||||||||||||||||||
| import { Attributes, Context, Link, SpanKind, trace, TraceFlags } from "@opentelemetry/api"; | ||||||||||||||||||||||||||||||||||||||
| import { Sampler, SamplingDecision, SamplingResult } from "@opentelemetry/sdk-trace-node"; | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||
| * Attribute key to store Saleor's original sampling decision. | ||||||||||||||||||||||||||||||||||||||
| * Used by TailSamplingProcessor to know if Saleor wanted this trace. | ||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||
| export const SALEOR_SAMPLING_DECISION_ATTR = "saleor.sampling.decision"; | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||
| * Check if SAMPLED flag is set in traceFlags bitmask. | ||||||||||||||||||||||||||||||||||||||
| * Uses bitwise AND operation according to OTEL spec | ||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||
| function isSampled(traceFlags: number): boolean { | ||||||||||||||||||||||||||||||||||||||
| return (traceFlags & TraceFlags.SAMPLED) !== 0; | ||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||
| * A sampler that defers the final sampling decision to span end. | ||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||
| * - When parent is SAMPLED → return RECORD_AND_SAMPLED (respects parent) | ||||||||||||||||||||||||||||||||||||||
| * - When parent is NOT SAMPLED or NO parent → return RECORD (defer decision to TailSamplingProcessor) | ||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||
| * This allows the TailSamplingProcessor to make the final decision | ||||||||||||||||||||||||||||||||||||||
| * based on error status or latency at span end. | ||||||||||||||||||||||||||||||||||||||
| * Without setting `RECORD` we wouldn't store any span data during runtime | ||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||
| export class DeferredSampler implements Sampler { | ||||||||||||||||||||||||||||||||||||||
| // eslint-disable-next-line @typescript-eslint/max-params -- Required by OpenTelemetry Sampler interface | ||||||||||||||||||||||||||||||||||||||
| shouldSample( | ||||||||||||||||||||||||||||||||||||||
| context: Context, | ||||||||||||||||||||||||||||||||||||||
| _traceId: string, | ||||||||||||||||||||||||||||||||||||||
| _spanName: string, | ||||||||||||||||||||||||||||||||||||||
| _spanKind: SpanKind, | ||||||||||||||||||||||||||||||||||||||
| _attributes: Attributes, | ||||||||||||||||||||||||||||||||||||||
| _links: Link[], | ||||||||||||||||||||||||||||||||||||||
| ): SamplingResult { | ||||||||||||||||||||||||||||||||||||||
| const parentSpanContext = trace.getSpanContext(context); | ||||||||||||||||||||||||||||||||||||||
| const parentSampled = parentSpanContext && isSampled(parentSpanContext.traceFlags); | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| if (parentSampled) { | ||||||||||||||||||||||||||||||||||||||
| // Parent decided to sample - we MUST sample too | ||||||||||||||||||||||||||||||||||||||
| return { | ||||||||||||||||||||||||||||||||||||||
| decision: SamplingDecision.RECORD_AND_SAMPLED, | ||||||||||||||||||||||||||||||||||||||
| attributes: { | ||||||||||||||||||||||||||||||||||||||
| [SALEOR_SAMPLING_DECISION_ATTR]: "sampled", | ||||||||||||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||||||||||||
| }; | ||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+41
to
+49
|
||||||||||||||||||||||||||||||||||||||
| if (parentSampled) { | |
| // Parent decided to sample - we MUST sample too | |
| return { | |
| decision: SamplingDecision.RECORD_AND_SAMPLED, | |
| attributes: { | |
| [SALEOR_SAMPLING_DECISION_ATTR]: "sampled", | |
| }, | |
| }; | |
| } | |
| // Always return RECORD to allow TailSamplingProcessor to make the final export decision, | |
| // even if the parent is sampled. This ensures all spans can be processed for tail sampling | |
| // and receive consistent attributes (e.g., TAIL_SAMPLING_PROMOTED_ATTR). | |
| return { | |
| decision: SamplingDecision.RECORD, | |
| attributes: { | |
| [SALEOR_SAMPLING_DECISION_ATTR]: parentSampled ? "sampled" : (parentSpanContext ? "not_sampled" : "none"), | |
| }, | |
| }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The tail sampling configuration values (slowThresholdMs: 5000, exportErrors: true, exportSlowSpans: true) are hardcoded. Consider extracting these to environment variables or a configuration file to allow tuning in different environments (development, staging, production) without code changes.