From a22bbd3877d443a2a18049f1c65d9d1cd77c4dbe Mon Sep 17 00:00:00 2001 From: Yutong Zhang Date: Tue, 28 Apr 2026 15:14:55 +0800 Subject: [PATCH 1/3] Add --enable-otlp configure flag and 'otlp' debian build profile Wire the build system to optionally pull in opentelemetry-cpp so a follow-up PR can add an OTLP sink to ComponentStats. The default build is unchanged: * configure.ac gains a new --enable-otlp option (default: disabled). When enabled, the build probes for opentelemetry-cpp via pkg-config and falls back to a header check + a hard-coded -l list for SDKs that are not packaged with .pc files. HAVE_OTLP is defined and OTLP is exposed as an automake conditional, plus OPENTELEMETRY_CFLAGS / OPENTELEMETRY_LIBS substitutions for use by Makefile.am in later PRs. * debian/rules gains a new 'otlp' build profile. When the profile is active, --enable-otlp is passed to configure; otherwise --disable-otlp is passed, which is the current behaviour. This is the build-system half of Phase 2 in the Component Statistics HLD (sonic-net/SONiC#2312). It does not add any source files, does not change the public API, and does not affect any default build path. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Yutong Zhang --- configure.ac | 30 ++++++++++++++++++++++++++++++ debian/rules | 10 ++++++++++ 2 files changed, 40 insertions(+) diff --git a/configure.ac b/configure.ac index 287ff2722..13c973178 100644 --- a/configure.ac +++ b/configure.ac @@ -44,9 +44,39 @@ AC_ARG_ENABLE(yangmodules, no) yangmodules=false ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-yangmodules) ;; esac],[yangmodules=true]) +AC_ARG_ENABLE(otlp, +[ --enable-otlp Build the OTLP sink (requires opentelemetry-cpp)], +[case "${enableval}" in + yes) otlp=true ;; + no) otlp=false ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-otlp) ;; +esac],[otlp=false]) AM_CONDITIONAL(DEBUG, test x$debug = xtrue) AM_CONDITIONAL(PYTHON2, test x$python2 = xtrue) AM_CONDITIONAL(YANGMODS, test x$yangmodules = xtrue) +AM_CONDITIONAL(OTLP, test x$otlp = xtrue) + +if test x$otlp = xtrue; then + PKG_CHECK_MODULES([OPENTELEMETRY], + [opentelemetry_api opentelemetry_sdk opentelemetry_exporter_otlp_grpc], + [have_otlp_pkgconfig=yes], + [have_otlp_pkgconfig=no]) + if test x$have_otlp_pkgconfig = xno; then + # opentelemetry-cpp does not ship pkg-config files in all distributions. + # Fall back to header + library probes so the build still succeeds when + # the SDK is installed under a standard prefix. + AC_LANG_PUSH([C++]) + AC_CHECK_HEADER([opentelemetry/version.h], [], + [AC_MSG_ERROR([--enable-otlp requested but opentelemetry-cpp headers were not found. Install opentelemetry-cpp or pass CPPFLAGS=-I/include.])]) + AC_LANG_POP([C++]) + OPENTELEMETRY_CFLAGS="" + OPENTELEMETRY_LIBS="-lopentelemetry_exporter_otlp_grpc -lopentelemetry_otlp_recordable -lopentelemetry_proto -lopentelemetry_resources -lopentelemetry_trace -lopentelemetry_metrics -lopentelemetry_common -lgrpc++ -lgrpc -lprotobuf" + fi + AC_DEFINE([HAVE_OTLP], [1], [Define to 1 if the OTLP sink is built]) +fi +AC_SUBST([OPENTELEMETRY_CFLAGS]) +AC_SUBST([OPENTELEMETRY_LIBS]) + if test x$CONFIGURED_ARCH = xarmhf && test x$CROSS_BUILD_ENVIRON = xy; then AM_CONDITIONAL(ARCH64, false) else diff --git a/debian/rules b/debian/rules index 928253b00..f2cf172a3 100755 --- a/debian/rules +++ b/debian/rules @@ -35,6 +35,16 @@ else CONFIGURE_ARGS += --enable-yangmodules endif +# Build the OTLP sink only when the 'otlp' build profile is active. The +# OTLP sink depends on opentelemetry-cpp, which is not yet packaged in +# every distribution; the default build keeps OTLP off so behaviour is +# unchanged. +ifneq (,$(filter otlp,$(DEB_BUILD_PROFILES))) +CONFIGURE_ARGS += --enable-otlp +else +CONFIGURE_ARGS += --disable-otlp +endif + # main packaging script %: dh $@ From 2e731e2538a893fbdaac9ef256722c58bca00c3e Mon Sep 17 00:00:00 2001 From: Yutong Zhang Date: Tue, 28 Apr 2026 15:52:34 +0800 Subject: [PATCH 2/3] Add OtlpSink wrapper for ComponentStats OTLP/gRPC export Introduce a thin C++ wrapper, swss::OtlpSink, that converts a snapshot of ComponentStats counters into an OTLP/gRPC batch destined for a local OpenTelemetry Collector. The class is the OTLP half of the dual-sink design described in the Component Statistics HLD (sonic-net/SONiC#2312). What the wrapper provides: * A small public header (common/component_stats_otlp.h) that exposes a Config struct, a DataPoint struct, exportBatch(), and shutdown(), and intentionally hides every OpenTelemetry C++ SDK type behind PIMPL so callers (notably ComponentStats) do not transitively include any OTel headers. * An implementation (common/component_stats_otlp.cpp) that: - constructs an OtlpGrpcMetricExporter lazily and shares one gRPC channel for the lifetime of the sink; - groups data points by metric name so that 'entity' is exported as a label rather than as part of the metric name (HLD section 7.7); - maps isMonotonic=true to a CUMULATIVE Sum, isMonotonic=false to a Gauge with last-value semantics; - never throws: all SDK exceptions and Export() error results are caught, logged, and converted to a 'false' return so a dead Collector cannot stall the ComponentStats writer thread or affect the DB sink (HLD requirement R9); - is move-only and idempotent on shutdown. * Five smoke tests (tests/component_stats_otlp_ut.cpp) covering the contract: construct/destruct, empty batch is a no-op, exporting to an unreachable Collector does not throw, shutdown is idempotent, and a moved-from instance is harmless. A real in-process gRPC mock server test is deferred to a follow-up. Build wiring: * common/Makefile.am: when --enable-otlp is active, append component_stats_otlp.cpp to libswsscommon and link OPENTELEMETRY_LIBS. * tests/Makefile.am: when --enable-otlp is active, append the unit test and link OPENTELEMETRY_LIBS. Default builds are unaffected because --enable-otlp is opt-in and ships disabled by default (added in #1183). Phase 2 follow-ups: * PR C: connect OtlpSink to ComponentStats writer-thread fan-out. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Yutong Zhang --- common/Makefile.am | 9 ++ common/component_stats_otlp.cpp | 215 ++++++++++++++++++++++++++++++ common/component_stats_otlp.h | 95 +++++++++++++ tests/Makefile.am | 6 + tests/component_stats_otlp_ut.cpp | 75 +++++++++++ 5 files changed, 400 insertions(+) create mode 100644 common/component_stats_otlp.cpp create mode 100644 common/component_stats_otlp.h create mode 100644 tests/component_stats_otlp_ut.cpp diff --git a/common/Makefile.am b/common/Makefile.am index 53ab345f3..e4f275de7 100644 --- a/common/Makefile.am +++ b/common/Makefile.am @@ -127,6 +127,15 @@ common_libswsscommon_la_SOURCES += \ common_libswsscommon_la_LIBADD += -lyang endif +if OTLP +common_libswsscommon_la_SOURCES += \ + common/component_stats_otlp.cpp + +common_libswsscommon_la_CPPFLAGS += $(OPENTELEMETRY_CFLAGS) +common_libswsscommon_la_CXXFLAGS += $(OPENTELEMETRY_CFLAGS) +common_libswsscommon_la_LIBADD += $(OPENTELEMETRY_LIBS) +endif + common_swssloglevel_SOURCES = \ common/loglevel.cpp \ common/loglevel_util.cpp diff --git a/common/component_stats_otlp.cpp b/common/component_stats_otlp.cpp new file mode 100644 index 000000000..72684f3dc --- /dev/null +++ b/common/component_stats_otlp.cpp @@ -0,0 +1,215 @@ +#include "common/component_stats_otlp.h" + +#include "common/logger.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace otlp_exporter = opentelemetry::exporter::otlp; +namespace sdk_common = opentelemetry::sdk::common; +namespace sdk_metrics = opentelemetry::sdk::metrics; +namespace sdk_resource = opentelemetry::sdk::resource; +namespace sdk_scope = opentelemetry::sdk::instrumentationscope; + +namespace swss { + +namespace { + +// One InstrumentationScope per process is plenty; the SDK does not require +// it to be unique per sink. +std::unique_ptr makeScope() +{ + return sdk_scope::InstrumentationScope::Create( + "swss.component_stats", "1.0", "https://opentelemetry.io/schemas/1.30.0"); +} + +opentelemetry::common::SystemTimestamp toSystemTimestamp(uint64_t unixNano) +{ + return opentelemetry::common::SystemTimestamp{ + std::chrono::system_clock::time_point{std::chrono::nanoseconds{unixNano}}}; +} + +} // namespace + +struct OtlpSink::Impl +{ + Config config; + sdk_resource::Resource resource; + std::unique_ptr scope; + std::unique_ptr exporter; + bool stopped = false; + + explicit Impl(Config c) + : config(std::move(c)), + resource(sdk_resource::Resource::Create({ + {"service.name", config.componentName}, + {"service.instance.id", config.serviceInstanceId}, + {"sonic.component", config.componentName}, + })), + scope(makeScope()) + { + otlp_exporter::OtlpGrpcMetricExporterOptions opts; + opts.endpoint = config.endpoint; + opts.use_ssl_credentials = false; + opts.timeout = std::chrono::duration_cast( + config.exportTimeout); + exporter = otlp_exporter::OtlpGrpcMetricExporterFactory::Create(opts); + } + + bool exportBatch(const std::vector& points) + { + if (stopped) + { + return false; + } + if (points.empty()) + { + return true; + } + + const auto startTs = toSystemTimestamp(config.startTimeUnixNano); + const auto endTs = opentelemetry::common::SystemTimestamp{std::chrono::system_clock::now()}; + + // Group data points by full metric name. Each metric carries one + // PointDataAttributes per entity, which keeps "entity" as a label + // rather than as part of the metric name. + std::unordered_map byMetric; + + for (const auto& dp : points) + { + const std::string fullName = "sonic." + config.componentName + "." + dp.metric; + + auto it = byMetric.find(fullName); + if (it == byMetric.end()) + { + sdk_metrics::MetricData md; + md.instrument_descriptor.name_ = fullName; + md.instrument_descriptor.description_ = ""; + md.instrument_descriptor.unit_ = "1"; + md.instrument_descriptor.type_ = dp.isMonotonic + ? sdk_metrics::InstrumentType::kCounter + : sdk_metrics::InstrumentType::kGauge; + md.instrument_descriptor.value_type_ = sdk_metrics::InstrumentValueType::kLong; + md.aggregation_temporality = sdk_metrics::AggregationTemporality::kCumulative; + md.start_ts = startTs; + md.end_ts = endTs; + it = byMetric.emplace(fullName, std::move(md)).first; + } + + sdk_metrics::PointDataAttributes pda; + pda.attributes.SetAttribute("entity", dp.entity); + + if (dp.isMonotonic) + { + sdk_metrics::SumPointData spd; + spd.value_ = static_cast(dp.value); + spd.is_monotonic_ = true; + pda.point_data = std::move(spd); + } + else + { + sdk_metrics::LastValuePointData lvd; + lvd.value_ = static_cast(dp.value); + lvd.is_lastvalue_valid_ = true; + lvd.sample_ts_ = endTs; + pda.point_data = std::move(lvd); + } + + it->second.point_data_attr_.push_back(std::move(pda)); + } + + sdk_metrics::ScopeMetrics scopeMetrics; + scopeMetrics.scope_ = scope.get(); + scopeMetrics.metric_data_.reserve(byMetric.size()); + for (auto& kv : byMetric) + { + scopeMetrics.metric_data_.push_back(std::move(kv.second)); + } + + sdk_metrics::ResourceMetrics rm; + rm.resource_ = &resource; + rm.scope_metric_data_.push_back(std::move(scopeMetrics)); + + try + { + const auto result = exporter->Export(rm); + if (result != sdk_common::ExportResult::kSuccess) + { + SWSS_LOG_WARN("OtlpSink: Export to %s returned %d", + config.endpoint.c_str(), static_cast(result)); + return false; + } + } + catch (const std::exception& e) + { + SWSS_LOG_WARN("OtlpSink: Export to %s threw: %s", + config.endpoint.c_str(), e.what()); + return false; + } + return true; + } + + void shutdown() + { + if (stopped) + { + return; + } + stopped = true; + + if (!exporter) + { + return; + } + + try + { + exporter->ForceFlush(config.exportTimeout); + exporter->Shutdown(config.exportTimeout); + } + catch (...) + { + // shutdown() is contractually noexcept from the caller's view. + } + } +}; + +OtlpSink::OtlpSink(Config config) + : m_impl(std::make_unique(std::move(config))) +{ +} + +OtlpSink::~OtlpSink() +{ + if (m_impl) + { + m_impl->shutdown(); + } +} + +OtlpSink::OtlpSink(OtlpSink&&) noexcept = default; +OtlpSink& OtlpSink::operator=(OtlpSink&&) noexcept = default; + +bool OtlpSink::exportBatch(const std::vector& points) +{ + return m_impl->exportBatch(points); +} + +void OtlpSink::shutdown() +{ + m_impl->shutdown(); +} + +} // namespace swss diff --git a/common/component_stats_otlp.h b/common/component_stats_otlp.h new file mode 100644 index 000000000..873439087 --- /dev/null +++ b/common/component_stats_otlp.h @@ -0,0 +1,95 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace swss { + +// OtlpSink converts ComponentStats counter snapshots into OTLP/gRPC metric +// exports destined for a local OpenTelemetry Collector. +// +// This is the OTLP half of the dual-sink design described in +// doc/component-stats/component-stats-hld.md (sonic-net/SONiC#2312). Phase 1 +// (sonic-net/sonic-swss-common#1180 and sonic-net/sonic-swss#4516) provides +// the in-memory counters and the COUNTERS_DB sink. Phase 2 plugs this class +// into the ComponentStats writer thread so the same snapshot is also +// exported via OTLP. +// +// Design notes: +// * Construction does not connect; the underlying gRPC channel is created +// lazily on the first exportBatch() call. +// * Failures are non-throwing: exportBatch() returns false and writes one +// log line, so a dead Collector cannot stall the ComponentStats writer +// thread or affect the DB sink (HLD requirement R9). +// * The class uses the PIMPL idiom so that callers (notably +// ComponentStats) do not transitively include any OpenTelemetry C++ SDK +// headers. +// * The class is move-only by design; copying a sink would imply two +// gRPC channels exporting the same counters. +class OtlpSink +{ +public: + struct Config + { + // gRPC endpoint, e.g. "localhost:4317". Plaintext on loopback by + // design — TLS and authentication live in the local OTel Collector, + // not in the producer. + std::string endpoint = "localhost:4317"; + + // OTel resource attributes applied to every batch. + // componentName → service.name, sonic.component + // serviceInstanceId → service.instance.id + std::string componentName; + std::string serviceInstanceId; + + // Per-Export() deadline. Short by design: the writer thread runs + // every intervalSec (default 1 s) and must not block the next tick. + std::chrono::milliseconds exportTimeout{500}; + + // Cumulative-sum start time. Captured once in the ComponentStats + // constructor; advances on every container restart, which is the + // OTel-defined signal for counter reset. + uint64_t startTimeUnixNano = 0; + }; + + // One counter sample contributed by the ComponentStats writer thread. + // + // Final OTel metric name is "sonic..". + // The entity is exported as a data-point attribute, never as part of + // the metric name, so dashboards can pivot freely. + struct DataPoint + { + std::string entity; + std::string metric; + uint64_t value = 0; + bool isMonotonic = true; // false ⇒ exported as Gauge + }; + + explicit OtlpSink(Config config); + ~OtlpSink(); + + OtlpSink(const OtlpSink&) = delete; + OtlpSink& operator=(const OtlpSink&) = delete; + OtlpSink(OtlpSink&&) noexcept; + OtlpSink& operator=(OtlpSink&&) noexcept; + + // Convert one snapshot to OTLP and ship it. Returns true on a + // successful Export() RPC. Never throws. Safe to call from the + // ComponentStats writer thread. + // + // An empty batch is a no-op and returns true. + bool exportBatch(const std::vector& points); + + // Flush in-flight batches and tear down the gRPC channel. Idempotent; + // exportBatch() after shutdown() is a no-op that returns false. + void shutdown(); + +private: + struct Impl; + std::unique_ptr m_impl; +}; + +} // namespace swss diff --git a/tests/Makefile.am b/tests/Makefile.am index 2724e2e3c..5b44d1f77 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -55,3 +55,9 @@ tests_tests_LDADD = $(LDADD_GTEST) -lpthread common/libswsscommon.la $(LIBNL_LIB if YANGMODS tests_tests_SOURCES += tests/defaultvalueprovider_ut.cpp endif + +if OTLP +tests_tests_SOURCES += tests/component_stats_otlp_ut.cpp +tests_tests_CPPFLAGS += $(OPENTELEMETRY_CFLAGS) +tests_tests_LDADD += $(OPENTELEMETRY_LIBS) +endif diff --git a/tests/component_stats_otlp_ut.cpp b/tests/component_stats_otlp_ut.cpp new file mode 100644 index 000000000..f58380df5 --- /dev/null +++ b/tests/component_stats_otlp_ut.cpp @@ -0,0 +1,75 @@ +#include "common/component_stats_otlp.h" + +#include +#include +#include + +namespace swss { +namespace test { + +namespace { + +OtlpSink::Config makeConfig() +{ + OtlpSink::Config c; + // Pick a port that no Collector is listening on. The point of these + // tests is to verify the wrapper's contract — never throw, never crash — + // not to validate the on-the-wire OTLP shape, which requires an + // in-process gRPC mock server (deferred to a follow-up). + c.endpoint = "127.0.0.1:14317"; + c.componentName = "swss-ut"; + c.serviceInstanceId = "ut-host"; + c.startTimeUnixNano = 1700000000000000000ULL; + c.exportTimeout = std::chrono::milliseconds(100); + return c; +} + +} // namespace + +TEST(OtlpSink, ConstructAndDestructDoesNotCrash) +{ + OtlpSink sink(makeConfig()); + SUCCEED(); +} + +TEST(OtlpSink, EmptyBatchIsNoOp) +{ + OtlpSink sink(makeConfig()); + EXPECT_TRUE(sink.exportBatch({})); +} + +TEST(OtlpSink, ExportToUnreachableCollectorDoesNotThrow) +{ + OtlpSink sink(makeConfig()); + const std::vector points = { + {"PORT_TABLE", "SET", 42, /*isMonotonic*/ true}, + {"PORT_TABLE", "DEL", 7, /*isMonotonic*/ true}, + {"PORT_TABLE", "BACKLOG", 3, /*isMonotonic*/ false}, // gauge + }; + + bool ok = true; + EXPECT_NO_THROW(ok = sink.exportBatch(points)); + // The exact return value depends on local network behaviour: in CI the + // gRPC client may report failure quickly, on a developer box it may + // succeed against a stray listener. The wrapper's contract only forbids + // throwing; the boolean is informational. + (void)ok; +} + +TEST(OtlpSink, ShutdownIsIdempotent) +{ + OtlpSink sink(makeConfig()); + sink.shutdown(); + sink.shutdown(); + EXPECT_FALSE(sink.exportBatch({{"E", "M", 1, true}})); +} + +TEST(OtlpSink, MovedFromInstanceIsHarmless) +{ + OtlpSink first(makeConfig()); + OtlpSink second(std::move(first)); + EXPECT_TRUE(second.exportBatch({})); +} + +} // namespace test +} // namespace swss From 639b093662515b8499d003d13d02ec33a6f36b08 Mon Sep 17 00:00:00 2001 From: Yutong Zhang Date: Tue, 28 Apr 2026 16:28:10 +0800 Subject: [PATCH 3/3] OtlpSink: emit DELTA temporality for Geneva mdm compatibility Geneva mdm rejects OTLP metrics whose Sum points carry CUMULATIVE aggregation temporality (mdm log: 'Raw metrics data were dropped because OTLP metrics with cumulative aggregation temporality is not supported. Data Dropped Count: 1'). Switch the sink to DELTA so the very first production deployment is not silently a no-op. Behaviour change inside Impl::exportBatch(): * Per-series cache (lastValue, lastEndTs) keyed by '\\x1f'. * For each Sum point: delta = current - lastValue, with a counter-reset guard (current < lastValue is treated as 'delta = current', no uint64_t underflow). cache then advances unconditionally so a transient Export() failure costs at most one batch. * Per-metric MetricData.start_ts is the previous end_ts (or creationTs on the first export of that series), end_ts is now - matching the OTLP delta contract. * Gauge points are unchanged (LastValuePointData has no temporality); their MetricData.aggregation_temporality is set to Unspecified. API stays cumulative: callers (ComponentStats in PR C) still pass the cumulative in-memory counter, so the Phase 1 COUNTERS_DB sink and this sink share the exact same input. Cumulative-to-delta conversion is the sink's responsibility, not the caller's. Tests: two new GTest cases cover (1) three consecutive snapshots of the same series across exportBatch() calls, and (2) a counter-reset (current < last) without underflow. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Yutong Zhang --- common/component_stats_otlp.cpp | 46 +++++++++++++++++++++++++------ common/component_stats_otlp.h | 13 +++++++-- tests/component_stats_otlp_ut.cpp | 29 +++++++++++++++++++ 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/common/component_stats_otlp.cpp b/common/component_stats_otlp.cpp index 72684f3dc..446bbf9f8 100644 --- a/common/component_stats_otlp.cpp +++ b/common/component_stats_otlp.cpp @@ -45,10 +45,21 @@ opentelemetry::common::SystemTimestamp toSystemTimestamp(uint64_t unixNano) struct OtlpSink::Impl { - Config config; - sdk_resource::Resource resource; + // Per-series state needed to convert cumulative counters into the delta + // points that Geneva mdm requires. Keyed by "\x1f". + struct SeriesState + { + uint64_t lastValue = 0; + opentelemetry::common::SystemTimestamp lastEndTs; + bool hasLastEndTs = false; + }; + + Config config; + sdk_resource::Resource resource; std::unique_ptr scope; std::unique_ptr exporter; + std::unordered_map series; + opentelemetry::common::SystemTimestamp creationTs; bool stopped = false; explicit Impl(Config c) @@ -58,7 +69,8 @@ struct OtlpSink::Impl {"service.instance.id", config.serviceInstanceId}, {"sonic.component", config.componentName}, })), - scope(makeScope()) + scope(makeScope()), + creationTs(toSystemTimestamp(config.startTimeUnixNano)) { otlp_exporter::OtlpGrpcMetricExporterOptions opts; opts.endpoint = config.endpoint; @@ -79,17 +91,24 @@ struct OtlpSink::Impl return true; } - const auto startTs = toSystemTimestamp(config.startTimeUnixNano); - const auto endTs = opentelemetry::common::SystemTimestamp{std::chrono::system_clock::now()}; + const auto endTs = opentelemetry::common::SystemTimestamp{std::chrono::system_clock::now()}; // Group data points by full metric name. Each metric carries one // PointDataAttributes per entity, which keeps "entity" as a label // rather than as part of the metric name. + // + // For Sum points we emit DELTA temporality (Geneva mdm rejects + // CUMULATIVE), so we maintain a per-series cache of the last + // cumulative value and the last end_ts. delta = current - last; on + // counter reset (current < last) we treat current as the delta and + // restart the window at creationTs. std::unordered_map byMetric; for (const auto& dp : points) { const std::string fullName = "sonic." + config.componentName + "." + dp.metric; + const std::string seriesKey = dp.entity + "\x1f" + dp.metric; + auto& state = series[seriesKey]; auto it = byMetric.find(fullName); if (it == byMetric.end()) @@ -102,8 +121,11 @@ struct OtlpSink::Impl ? sdk_metrics::InstrumentType::kCounter : sdk_metrics::InstrumentType::kGauge; md.instrument_descriptor.value_type_ = sdk_metrics::InstrumentValueType::kLong; - md.aggregation_temporality = sdk_metrics::AggregationTemporality::kCumulative; - md.start_ts = startTs; + // Gauge ignores temporality; Sum requires DELTA for mdm. + md.aggregation_temporality = dp.isMonotonic + ? sdk_metrics::AggregationTemporality::kDelta + : sdk_metrics::AggregationTemporality::kUnspecified; + md.start_ts = state.hasLastEndTs ? state.lastEndTs : creationTs; md.end_ts = endTs; it = byMetric.emplace(fullName, std::move(md)).first; } @@ -113,8 +135,12 @@ struct OtlpSink::Impl if (dp.isMonotonic) { + const uint64_t delta = (dp.value >= state.lastValue) + ? (dp.value - state.lastValue) + : dp.value; // counter reset: treat current as delta + sdk_metrics::SumPointData spd; - spd.value_ = static_cast(dp.value); + spd.value_ = static_cast(delta); spd.is_monotonic_ = true; pda.point_data = std::move(spd); } @@ -128,6 +154,10 @@ struct OtlpSink::Impl } it->second.point_data_attr_.push_back(std::move(pda)); + + state.lastValue = dp.value; + state.lastEndTs = endTs; + state.hasLastEndTs = true; } sdk_metrics::ScopeMetrics scopeMetrics; diff --git a/common/component_stats_otlp.h b/common/component_stats_otlp.h index 873439087..56da30677 100644 --- a/common/component_stats_otlp.h +++ b/common/component_stats_otlp.h @@ -49,9 +49,11 @@ class OtlpSink // every intervalSec (default 1 s) and must not block the next tick. std::chrono::milliseconds exportTimeout{500}; - // Cumulative-sum start time. Captured once in the ComponentStats - // constructor; advances on every container restart, which is the - // OTel-defined signal for counter reset. + // Wall-clock time at which this sink was constructed, used as the + // start_time of the very first delta export per (entity, metric). + // Subsequent exports use the previous export's end_time as their + // start_time, which is the OTLP-defined contract for delta + // temporality. uint64_t startTimeUnixNano = 0; }; @@ -60,6 +62,11 @@ class OtlpSink // Final OTel metric name is "sonic..". // The entity is exported as a data-point attribute, never as part of // the metric name, so dashboards can pivot freely. + // + // value is always the *cumulative* in-memory counter from + // ComponentStats. The sink converts cumulative → delta internally + // (Geneva mdm only accepts AGGREGATION_TEMPORALITY_DELTA), so callers + // never have to track per-sample state. struct DataPoint { std::string entity; diff --git a/tests/component_stats_otlp_ut.cpp b/tests/component_stats_otlp_ut.cpp index f58380df5..7f1a24456 100644 --- a/tests/component_stats_otlp_ut.cpp +++ b/tests/component_stats_otlp_ut.cpp @@ -71,5 +71,34 @@ TEST(OtlpSink, MovedFromInstanceIsHarmless) EXPECT_TRUE(second.exportBatch({})); } +TEST(OtlpSink, DeltaConversionDoesNotThrowAcrossMultipleExports) +{ + OtlpSink sink(makeConfig()); + // Three consecutive cumulative snapshots of the same series. The sink + // is expected to convert these to deltas (10, 5, 8) internally without + // crashing or throwing, regardless of whether the export RPC itself + // succeeds. + const std::vector first = {{"PORT", "SET", 10, true}}; + const std::vector second = {{"PORT", "SET", 15, true}}; + const std::vector third = {{"PORT", "SET", 23, true}}; + + EXPECT_NO_THROW({ + (void)sink.exportBatch(first); + (void)sink.exportBatch(second); + (void)sink.exportBatch(third); + }); +} + +TEST(OtlpSink, CounterResetIsTreatedAsDelta) +{ + OtlpSink sink(makeConfig()); + // After a counter reset (current < last), the sink must not underflow + // a uint64_t. The contract is to emit the current value as the delta. + EXPECT_NO_THROW({ + (void)sink.exportBatch({{"PORT", "SET", 100, true}}); + (void)sink.exportBatch({{"PORT", "SET", 7, true}}); + }); +} + } // namespace test } // namespace swss