diff --git a/.vale/styles/config/vocabularies/technical/accept.txt b/.vale/styles/config/vocabularies/technical/accept.txt index a4a23fc4af1..888eca03483 100644 --- a/.vale/styles/config/vocabularies/technical/accept.txt +++ b/.vale/styles/config/vocabularies/technical/accept.txt @@ -148,6 +148,7 @@ cooldown crypto deserializable deserializer +dictTagsets downcasted upcasted env @@ -158,6 +159,8 @@ formatters globals implementor(s?) jittered +serializer +varint losslessly lossy lookups diff --git a/Cargo.lock b/Cargo.lock index 113c6f50417..985790822e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -128,6 +128,12 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "anymap3" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170433209e817da6aae2c51aa0dd443009a613425dd041ebfb2492d1c4c11a25" + [[package]] name = "arc-swap" version = "1.9.1" @@ -4151,6 +4157,7 @@ dependencies = [ "tracing-appender", "tracing-rolling-file", "url", + "uuid", "zstd", ] @@ -4194,6 +4201,7 @@ dependencies = [ name = "saluki-core" version = "0.1.0" dependencies = [ + "anymap3", "async-trait", "bitmask-enum", "ddsketch", @@ -4791,6 +4799,7 @@ dependencies = [ "serde", "serde_json", "serde_with", + "simdutf8", "stringtheory", ] diff --git a/Cargo.toml b/Cargo.toml index b2ab748ced0..fbb05b8e67a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -158,6 +158,7 @@ tokio-rustls = { version = "0.26", default-features = false, features = [ "aws_lc_rs", ] } anyhow = { version = "1", default-features = false } +anymap3 = { version = "1", default-features = false, features = ["std"] } chrono = { version = "0.4", default-features = false, features = ["clock"] } bytesize = { version = "2", default-features = false, features = ["serde"] } criterion = { version = "0.8", features = ["html_reports"] } diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index d108c3c2b5a..1ef041c7f1a 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -8,6 +8,7 @@ android_system_properties,https://github.com/nical/android_system_properties,MIT anes,https://github.com/zrzka/anes-rs,MIT OR Apache-2.0,Robert Vojta anstyle,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The anstyle Authors anyhow,https://github.com/dtolnay/anyhow,MIT OR Apache-2.0,David Tolnay +anymap3,https://github.com/reivilibre/anymap3,BlueOak-1.0.0 OR MIT OR Apache-2.0,"Olivier 'reivilibre' (fork maintainer) , Chris Morgan (original author) " arc-swap,https://github.com/vorner/arc-swap,MIT OR Apache-2.0,Michal 'vorner' Vaner argh,https://github.com/google/argh,BSD-3-Clause,"Taylor Cramer , Benjamin Brittain , Erick Tryzelaar " arrayvec,https://github.com/bluss/arrayvec,MIT OR Apache-2.0,bluss diff --git a/Makefile b/Makefile index 1e48f3be9cf..cba7ac5be37 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ export LADING_VERSION ?= sha-d608ffbce8f8c77b147d6750b3bb6d6948af239a # Version of source repositories (Git tag) for vendored Protocol Buffers definitions. export PROTOBUF_SRC_REPO_DD_AGENT ?= 7.73.x -export PROTOBUF_SRC_REPO_AGENT_PAYLOAD ?= v5.0.164 +export PROTOBUF_SRC_REPO_AGENT_PAYLOAD ?= v5.0.180 export PROTOBUF_SRC_REPO_CONTAINERD ?= v2.2.0 export PROTOBUF_SRC_REPO_SKETCHES_GO ?= v1.4.7 diff --git a/bin/correctness/datadog-intake/src/app/metrics/handlers.rs b/bin/correctness/datadog-intake/src/app/metrics/handlers.rs index eff1578ecc5..1adf331de14 100644 --- a/bin/correctness/datadog-intake/src/app/metrics/handlers.rs +++ b/bin/correctness/datadog-intake/src/app/metrics/handlers.rs @@ -1,4 +1,10 @@ -use axum::{body::Bytes, extract::State, http::StatusCode, Json}; +use axum::{ + body::Bytes, + extract::State, + http::{HeaderMap, StatusCode}, + Json, +}; +use datadog_protos::metrics::v3::Payload as V3Payload; use datadog_protos::metrics::{MetricPayload, SketchPayload}; use protobuf::Message as _; use stele::Metric; @@ -6,11 +12,43 @@ use tracing::{error, info}; use super::MetricsState; +/// Extracts the validation batch headers from a request. +/// +/// Returns `(batch_id, batch_seq, batch_len)` if all three headers are present, otherwise `None`. +fn extract_batch_info(headers: &HeaderMap) -> Option<(String, usize, usize)> { + let id = headers.get("x-metrics-request-id")?.to_str().ok()?.to_string(); + let seq = headers.get("x-metrics-request-seq")?.to_str().ok()?.parse().ok()?; + let len = headers.get("x-metrics-request-len")?.to_str().ok()?.parse().ok()?; + Some((id, seq, len)) +} + pub async fn handle_metrics_dump(State(state): State) -> Json> { info!("Got request to dump metrics."); Json(state.dump_metrics()) } +pub async fn handle_metrics_validation_status(State(state): State) -> (StatusCode, String) { + info!("Got request to dump metrics validation status."); + + let status = state.validation_status(); + if status.is_ok() { + (StatusCode::OK, "ok\n".to_string()) + } else { + let mut body = format!( + "metrics validation failed: failures={}, pending_series_batches={}, pending_sketches_batches={}\n", + status.failures.len(), + status.pending_series_batches, + status.pending_sketches_batches + ); + for failure in status.failures { + body.push_str("- "); + body.push_str(&failure); + body.push('\n'); + } + (StatusCode::INTERNAL_SERVER_ERROR, body) + } +} + pub async fn handle_series_v1(State(state): State, body: Bytes) -> StatusCode { // Fast path check to see if this is a diagnostic request. // @@ -35,7 +73,7 @@ pub async fn handle_series_v1(State(state): State, body: Bytes) -> } } -pub async fn handle_series_v2(State(state): State, body: Bytes) -> StatusCode { +pub async fn handle_series_v2(State(state): State, headers: HeaderMap, body: Bytes) -> StatusCode { // Fast path check to see if this is a diagnostic request. // // The Datadog Agent will send dummy payloads to certain endpoints when checking for connectivity, so if we see `{}` @@ -46,7 +84,6 @@ pub async fn handle_series_v2(State(state): State, body: Bytes) -> } info!("Received series v2 payload."); - let payload = match MetricPayload::parse_from_bytes(&body[..]) { Ok(payload) => payload, Err(e) => { @@ -55,21 +92,30 @@ pub async fn handle_series_v2(State(state): State, body: Bytes) -> } }; - match state.merge_series_v2_payload(payload) { - Ok(()) => { - info!("Processed series v2 payload."); - StatusCode::ACCEPTED + if let Some((batch_id, batch_seq, batch_len)) = extract_batch_info(&headers) { + info!(batch_id, batch_seq, batch_len, "Received V2 series validation pair."); + match state.accumulate_v2_series(payload, batch_id, batch_seq, batch_len) { + Ok(()) => StatusCode::ACCEPTED, + Err(e) => { + error!(error = %e, "Failed to accumulate V2 series validation pair."); + StatusCode::BAD_REQUEST + } } - Err(e) => { - error!(error = %e, "Failed to merge series v2 payload."); - StatusCode::BAD_REQUEST + } else { + match state.merge_series_v2_payload(payload) { + Ok(()) => { + info!("Processed series v2 payload."); + StatusCode::ACCEPTED + } + Err(e) => { + error!(error = %e, "Failed to merge series v2 payload."); + StatusCode::BAD_REQUEST + } } } } -pub async fn handle_sketch_beta(State(state): State, body: Bytes) -> StatusCode { - info!("Received sketch payload."); - +pub async fn handle_sketch_beta(State(state): State, headers: HeaderMap, body: Bytes) -> StatusCode { let payload = match SketchPayload::parse_from_bytes(&body[..]) { Ok(payload) => payload, Err(e) => { @@ -78,14 +124,92 @@ pub async fn handle_sketch_beta(State(state): State, body: Bytes) } }; - match state.merge_sketch_payload(payload) { - Ok(()) => { - info!("Processed sketch payload."); - StatusCode::ACCEPTED + if let Some((batch_id, batch_seq, batch_len)) = extract_batch_info(&headers) { + info!(batch_id, batch_seq, batch_len, "Received V2 sketches validation pair."); + match state.accumulate_v2_sketches(payload, batch_id, batch_seq, batch_len) { + Ok(()) => StatusCode::ACCEPTED, + Err(e) => { + error!(error = %e, "Failed to accumulate V2 sketches validation pair."); + StatusCode::BAD_REQUEST + } } + } else { + info!("Received sketch payload."); + match state.merge_sketch_payload(payload) { + Ok(()) => { + info!("Processed sketch payload."); + StatusCode::ACCEPTED + } + Err(e) => { + error!(error = %e, "Failed to merge sketch payload."); + StatusCode::BAD_REQUEST + } + } + } +} + +pub async fn handle_series_v3(State(state): State, headers: HeaderMap, body: Bytes) -> StatusCode { + let payload = match V3Payload::parse_from_bytes(&body[..]) { + Ok(payload) => payload, Err(e) => { - error!(error = %e, "Failed to merge sketch payload."); - StatusCode::BAD_REQUEST + error!(error = %e, "Failed to parse v3 series payload."); + return StatusCode::BAD_REQUEST; + } + }; + + if let Some((batch_id, batch_seq, batch_len)) = extract_batch_info(&headers) { + info!(batch_id, batch_seq, batch_len, "Received V3 series validation pair."); + match state.accumulate_v3_series_and_merge(payload, batch_id, batch_seq, batch_len) { + Ok(()) => StatusCode::ACCEPTED, + Err(e) => { + error!(error = %e, "Failed to accumulate V3 series validation pair."); + StatusCode::BAD_REQUEST + } + } + } else { + info!("Received v3 series payload."); + match state.merge_v3_payload(payload) { + Ok(()) => { + info!("Processed v3 series payload."); + StatusCode::ACCEPTED + } + Err(e) => { + error!(error = %e, "Failed to merge v3 series payload."); + StatusCode::BAD_REQUEST + } + } + } +} + +pub async fn handle_sketch_v3(State(state): State, headers: HeaderMap, body: Bytes) -> StatusCode { + let payload = match V3Payload::parse_from_bytes(&body[..]) { + Ok(payload) => payload, + Err(e) => { + error!(error = %e, "Failed to parse v3 sketch payload."); + return StatusCode::BAD_REQUEST; + } + }; + + if let Some((batch_id, batch_seq, batch_len)) = extract_batch_info(&headers) { + info!(batch_id, batch_seq, batch_len, "Received V3 sketches validation pair."); + match state.accumulate_v3_sketches_and_merge(payload, batch_id, batch_seq, batch_len) { + Ok(()) => StatusCode::ACCEPTED, + Err(e) => { + error!(error = %e, "Failed to accumulate V3 sketches validation pair."); + StatusCode::BAD_REQUEST + } + } + } else { + info!("Received v3 sketch payload."); + match state.merge_v3_payload(payload) { + Ok(()) => { + info!("Processed v3 sketch payload."); + StatusCode::ACCEPTED + } + Err(e) => { + error!(error = %e, "Failed to merge v3 sketch payload."); + StatusCode::BAD_REQUEST + } } } } diff --git a/bin/correctness/datadog-intake/src/app/metrics/mod.rs b/bin/correctness/datadog-intake/src/app/metrics/mod.rs index d2414990ee6..4899637470d 100644 --- a/bin/correctness/datadog-intake/src/app/metrics/mod.rs +++ b/bin/correctness/datadog-intake/src/app/metrics/mod.rs @@ -12,8 +12,11 @@ use self::state::MetricsState; pub fn build_metrics_router() -> Router { Router::new() .route("/metrics/dump", get(handle_metrics_dump)) + .route("/metrics/validation_status", get(handle_metrics_validation_status)) .route("/api/v1/series", post(handle_series_v1)) .route("/api/v2/series", post(handle_series_v2)) .route("/api/beta/sketches", post(handle_sketch_beta)) + .route("/api/intake/metrics/v3/series", post(handle_series_v3)) + .route("/api/intake/metrics/v3/sketches", post(handle_sketch_v3)) .with_state(MetricsState::new()) } diff --git a/bin/correctness/datadog-intake/src/app/metrics/state.rs b/bin/correctness/datadog-intake/src/app/metrics/state.rs index ef40aad3857..e2eb2a6f55f 100644 --- a/bin/correctness/datadog-intake/src/app/metrics/state.rs +++ b/bin/correctness/datadog-intake/src/app/metrics/state.rs @@ -1,26 +1,95 @@ -use std::sync::{Arc, Mutex}; +use std::{ + collections::{HashMap, HashSet}, + sync::{Arc, Mutex}, +}; +use datadog_protos::metrics::v3::Payload as V3Payload; use datadog_protos::metrics::{MetricPayload, SketchPayload}; use saluki_error::GenericError; -use stele::Metric; +use stele::{Metric, MetricValue}; +use tracing::{debug, warn}; + +struct ValidationBatch { + v2_segments: HashMap>, + v2_expected: Option, + v3_segments: HashMap>, + v3_expected: Option, +} + +impl ValidationBatch { + fn new() -> Self { + Self { + v2_segments: HashMap::new(), + v2_expected: None, + v3_segments: HashMap::new(), + v3_expected: None, + } + } + + fn is_complete(&self) -> bool { + segments_complete(&self.v2_segments, self.v2_expected) && segments_complete(&self.v3_segments, self.v3_expected) + } +} + +#[derive(Clone)] +struct ValidationPairs { + pending: Arc>>, + completed: Arc>>, +} + +impl ValidationPairs { + fn new() -> Self { + Self { + pending: Arc::new(Mutex::new(HashMap::new())), + completed: Arc::new(Mutex::new(HashSet::new())), + } + } +} + +pub struct ValidationStatus { + pub failures: Vec, + pub pending_series_batches: usize, + pub pending_sketches_batches: usize, +} + +impl ValidationStatus { + pub fn is_ok(&self) -> bool { + self.failures.is_empty() && self.pending_series_batches == 0 && self.pending_sketches_batches == 0 + } +} #[derive(Clone)] pub struct MetricsState { metrics: Arc>>, + series_pairs: ValidationPairs, + sketches_pairs: ValidationPairs, + validation_failures: Arc>>, } impl MetricsState { - /// Creates a new `MetricsState`. pub fn new() -> Self { Self { metrics: Arc::new(Mutex::new(Vec::new())), + series_pairs: ValidationPairs::new(), + sketches_pairs: ValidationPairs::new(), + validation_failures: Arc::new(Mutex::new(Vec::new())), } } - /// Dumps the current metrics state. pub fn dump_metrics(&self) -> Vec { - let data = self.metrics.lock().unwrap(); - data.clone() + self.metrics.lock().unwrap().clone() + } + + pub fn validation_status(&self) -> ValidationStatus { + let pending_series_batches = self.series_pairs.pending.lock().unwrap().len(); + let pending_sketches_batches = self.sketches_pairs.pending.lock().unwrap().len(); + let failures = self.validation_failures.lock().unwrap().clone(); + + ValidationStatus { + failures, + pending_series_batches, + pending_sketches_batches, + } } /// Merges the given series v2 payload into the current metrics state. @@ -33,13 +102,15 @@ impl MetricsState { Ok(()) } - /// Merges the given sketch payload into the current metrics state. pub fn merge_sketch_payload(&self, payload: SketchPayload) -> Result<(), GenericError> { let metrics = Metric::try_from_sketch(payload)?; + self.metrics.lock().unwrap().extend(metrics); + Ok(()) + } - let mut data = self.metrics.lock().unwrap(); - data.extend(metrics); - + pub fn merge_v3_payload(&self, payload: V3Payload) -> Result<(), GenericError> { + let metrics = Metric::try_from_v3(payload)?; + self.metrics.lock().unwrap().extend(metrics); Ok(()) } @@ -52,4 +123,418 @@ impl MetricsState { Ok(()) } + + pub fn accumulate_v2_series( + &self, payload: MetricPayload, batch_id: String, batch_seq: usize, batch_len: usize, + ) -> Result<(), GenericError> { + let metrics = Metric::try_from_series_v2(payload)?; + self.accumulate_v2(&self.series_pairs, "series", metrics, batch_id, batch_seq, batch_len) + } + + pub fn accumulate_v2_sketches( + &self, payload: SketchPayload, batch_id: String, batch_seq: usize, batch_len: usize, + ) -> Result<(), GenericError> { + let metrics = Metric::try_from_sketch(payload)?; + self.accumulate_v2( + &self.sketches_pairs, + "sketches", + metrics, + batch_id, + batch_seq, + batch_len, + ) + } + + pub fn accumulate_v3_series_and_merge( + &self, payload: V3Payload, batch_id: String, batch_seq: usize, batch_len: usize, + ) -> Result<(), GenericError> { + let metrics = Metric::try_from_v3(payload)?; + if !self.accumulate_v3( + &self.series_pairs, + "series", + metrics.clone(), + batch_id, + batch_seq, + batch_len, + )? { + self.metrics.lock().unwrap().extend(metrics); + } + Ok(()) + } + + pub fn accumulate_v3_sketches_and_merge( + &self, payload: V3Payload, batch_id: String, batch_seq: usize, batch_len: usize, + ) -> Result<(), GenericError> { + let metrics = Metric::try_from_v3(payload)?; + if !self.accumulate_v3( + &self.sketches_pairs, + "sketches", + metrics.clone(), + batch_id, + batch_seq, + batch_len, + )? { + self.metrics.lock().unwrap().extend(metrics); + } + Ok(()) + } + + fn accumulate_v2( + &self, pairs: &ValidationPairs, kind: &str, metrics: Vec, batch_id: String, batch_seq: usize, + batch_len: usize, + ) -> Result<(), GenericError> { + if pairs.completed.lock().unwrap().contains(&batch_id) { + debug!( + kind, + batch_id, batch_seq, "Ignoring V2 validation segment for completed batch." + ); + return Ok(()); + } + + if !self.validate_segment_metadata(kind, "v2", &batch_id, batch_seq, batch_len) { + return Ok(()); + } + + let (completed_batch, validation_failures) = { + let mut map = pairs.pending.lock().unwrap(); + let batch = map.entry(batch_id.clone()).or_insert_with(ValidationBatch::new); + let insert_result = insert_segment( + kind, + "v2", + &batch_id, + &mut batch.v2_segments, + &mut batch.v2_expected, + batch_seq, + batch_len, + metrics, + ); + ( + batch.is_complete().then(|| map.remove(&batch_id).unwrap()), + insert_result.failures, + ) + }; + self.record_validation_failures(validation_failures); + if let Some(batch) = completed_batch { + self.complete_validation_batch(pairs, kind, batch_id, batch); + } + + Ok(()) + } + + fn accumulate_v3( + &self, pairs: &ValidationPairs, kind: &str, metrics: Vec, batch_id: String, batch_seq: usize, + batch_len: usize, + ) -> Result { + if pairs.completed.lock().unwrap().contains(&batch_id) { + debug!( + kind, + batch_id, batch_seq, "Ignoring V3 validation segment for completed batch." + ); + return Ok(true); + } + + if !self.validate_segment_metadata(kind, "v3", &batch_id, batch_seq, batch_len) { + return Ok(true); + } + + let (was_duplicate, completed_batch, validation_failures) = { + let mut map = pairs.pending.lock().unwrap(); + let batch = map.entry(batch_id.clone()).or_insert_with(ValidationBatch::new); + let insert_result = insert_segment( + kind, + "v3", + &batch_id, + &mut batch.v3_segments, + &mut batch.v3_expected, + batch_seq, + batch_len, + metrics, + ); + ( + insert_result.was_duplicate, + batch.is_complete().then(|| map.remove(&batch_id).unwrap()), + insert_result.failures, + ) + }; + self.record_validation_failures(validation_failures); + if let Some(batch) = completed_batch { + self.complete_validation_batch(pairs, kind, batch_id, batch); + } + + Ok(was_duplicate) + } + + fn complete_validation_batch(&self, pairs: &ValidationPairs, kind: &str, batch_id: String, batch: ValidationBatch) { + let v2_metrics = flatten_segments(batch.v2_segments); + let v3_metrics = flatten_segments(batch.v3_segments); + let mismatches = compare_validation_pair(kind, &batch_id, &v2_metrics, &v3_metrics); + if mismatches > 0 { + self.record_validation_failure(format!( + "{kind} validation batch {batch_id} had {mismatches} mismatch(es)" + )); + } + pairs.completed.lock().unwrap().insert(batch_id); + } + + fn validate_segment_metadata(&self, kind: &str, protocol: &str, batch_id: &str, seq: usize, len: usize) -> bool { + if len == 0 { + self.record_validation_failure(format!( + "{kind} validation batch {batch_id} has invalid {protocol} length 0" + )); + return false; + } + if seq >= len { + self.record_validation_failure(format!( + "{kind} validation batch {batch_id} has invalid {protocol} sequence {seq} for length {len}" + )); + return false; + } + true + } + + fn record_validation_failure(&self, failure: String) { + warn!(failure = %failure, "Metrics validation failure."); + self.validation_failures.lock().unwrap().push(failure); + } + + fn record_validation_failures(&self, failures: Vec) { + for failure in failures { + self.record_validation_failure(failure); + } + } +} + +type MetricKey = (String, Vec); +type MetricPoints = Vec<(u64, MetricValue)>; + +struct InsertSegmentResult { + was_duplicate: bool, + failures: Vec, +} + +#[allow(clippy::too_many_arguments)] +fn insert_segment( + kind: &str, protocol: &str, batch_id: &str, segments: &mut HashMap>, + expected: &mut Option, batch_seq: usize, batch_len: usize, metrics: Vec, +) -> InsertSegmentResult { + let mut failures = Vec::new(); + + if let Some(previous_len) = *expected { + if previous_len != batch_len { + failures.push(format!( + "{kind} validation batch {batch_id} changed {protocol} length from {previous_len} to {batch_len}" + )); + } + } else { + *expected = Some(batch_len); + } + + let was_duplicate = match segments.get(&batch_seq) { + Some(existing) if existing == &metrics => { + debug!( + kind, + protocol, batch_id, batch_seq, "Ignoring duplicate validation segment." + ); + true + } + Some(_) => { + failures.push(format!( + "{kind} validation batch {batch_id} received conflicting {protocol} segment {batch_seq}" + )); + true + } + None => { + segments.insert(batch_seq, metrics); + false + } + }; + + InsertSegmentResult { + was_duplicate, + failures, + } +} + +fn segments_complete(segments: &HashMap>, expected: Option) -> bool { + expected.is_some_and(|expected| segments.len() == expected && (0..expected).all(|seq| segments.contains_key(&seq))) +} + +fn flatten_segments(segments: HashMap>) -> Vec { + let mut ordered_segments: Vec<_> = segments.into_iter().collect(); + ordered_segments.sort_by_key(|(seq, _)| *seq); + ordered_segments.into_iter().flat_map(|(_, metrics)| metrics).collect() +} + +fn compare_validation_pair(kind: &str, batch_id: &str, v2: &[Metric], v3: &[Metric]) -> usize { + // Normalize metric context by sorting tags so tag ordering differences don't produce false mismatches. + let normalize_key = |m: &Metric| -> MetricKey { + let (name, mut tags) = m.context().clone().into_parts(); + tags.sort(); + (name, tags) + }; + let format_key = |(name, tags): &MetricKey| -> String { + if tags.is_empty() { + name.clone() + } else { + format!("{} {{{}}}", name, tags.join(", ")) + } + }; + + let mut v2_map: HashMap = HashMap::new(); + for m in v2 { + v2_map + .entry(normalize_key(m)) + .or_default() + .extend(m.values().iter().cloned()); + } + + let mut v3_map: HashMap = HashMap::new(); + for m in v3 { + v3_map + .entry(normalize_key(m)) + .or_default() + .extend(m.values().iter().cloned()); + } + + let mut mismatches = 0usize; + + for (ctx, v2_vals) in &v2_map { + match v3_map.get(ctx) { + None => { + let context = format_key(ctx); + warn!(kind, batch_id, context = %context, "Validation pair: context present in V2 but missing from V3"); + mismatches += 1; + } + Some(v3_vals) => { + if !point_multisets_match(v2_vals, v3_vals) { + let context = format_key(ctx); + warn!( + kind, + batch_id, + context = %context, + v2_points = v2_vals.len(), + v3_points = v3_vals.len(), + "Validation pair: V2/V3 value mismatch" + ); + mismatches += 1; + } + } + } + } + + for ctx in v3_map.keys() { + if !v2_map.contains_key(ctx) { + let context = format_key(ctx); + warn!(kind, batch_id, context = %context, "Validation pair: context present in V3 but missing from V2"); + mismatches += 1; + } + } + + if mismatches == 0 { + debug!(kind, batch_id, v2_contexts = v2_map.len(), "Validation pair matches."); + } else { + warn!(kind, batch_id, mismatches, "Validation pair has mismatches."); + } + + mismatches +} + +fn point_multisets_match(v2_vals: &MetricPoints, v3_vals: &MetricPoints) -> bool { + if v2_vals.len() != v3_vals.len() { + return false; + } + + let mut unmatched = v3_vals.clone(); + for v2_val in v2_vals { + let Some(pos) = unmatched + .iter() + .position(|v3_val| v2_val.0 == v3_val.0 && v2_val.1 == v3_val.1) + else { + return false; + }; + unmatched.swap_remove(pos); + } + + unmatched.is_empty() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn validation_batch_requires_all_expected_sequences() { + let mut batch = ValidationBatch::new(); + batch.v2_expected = Some(2); + batch.v2_segments.insert(1, Vec::new()); + batch.v3_expected = Some(1); + batch.v3_segments.insert(0, Vec::new()); + + assert!(!batch.is_complete()); + + batch.v2_segments.insert(0, Vec::new()); + + assert!(batch.is_complete()); + } + + #[test] + fn duplicate_segments_do_not_count_twice() { + let state = MetricsState::new(); + let mut segments = HashMap::new(); + let mut expected = None; + + let first = insert_segment( + "series", + "v2", + "test-batch", + &mut segments, + &mut expected, + 0, + 1, + Vec::new(), + ); + let duplicate = insert_segment( + "series", + "v2", + "test-batch", + &mut segments, + &mut expected, + 0, + 1, + Vec::new(), + ); + + assert!(!first.was_duplicate); + assert!(duplicate.was_duplicate); + assert_eq!(1, segments.len()); + assert!(state.validation_status().is_ok()); + } + + #[test] + fn points_match_with_same_timestamp_in_different_order() { + let v2 = vec![ + (10, MetricValue::Gauge { value: 1.0 }), + (10, MetricValue::Gauge { value: 2.0 }), + ]; + let v3 = vec![ + (10, MetricValue::Gauge { value: 2.0 }), + (10, MetricValue::Gauge { value: 1.0 }), + ]; + + assert!(point_multisets_match(&v2, &v3)); + } + + #[test] + fn points_detect_same_timestamp_value_mismatch() { + let v2 = vec![ + (10, MetricValue::Gauge { value: 1.0 }), + (10, MetricValue::Gauge { value: 2.0 }), + ]; + let v3 = vec![ + (10, MetricValue::Gauge { value: 1.0 }), + (10, MetricValue::Gauge { value: 3.0 }), + ]; + + assert!(!point_multisets_match(&v2, &v3)); + } } diff --git a/bin/correctness/panoramic/src/correctness/analysis/collected.rs b/bin/correctness/panoramic/src/correctness/analysis/collected.rs index b083936ccb2..df415ad066a 100644 --- a/bin/correctness/panoramic/src/correctness/analysis/collected.rs +++ b/bin/correctness/panoramic/src/correctness/analysis/collected.rs @@ -23,6 +23,7 @@ impl CollectedData { pub async fn for_port(datadog_intake_port: u16) -> Result { let events = get_captured_events(datadog_intake_port).await?; let metrics = get_captured_metrics(datadog_intake_port).await?; + check_metrics_validation_status(datadog_intake_port).await?; let service_checks = get_captured_service_checks(datadog_intake_port).await?; let spans = get_captured_spans(datadog_intake_port).await?; let trace_stats = get_captured_trace_stats(datadog_intake_port).await?; @@ -101,6 +102,35 @@ async fn get_captured_metrics(datadog_intake_port: u16) -> Result, G Ok(metrics) } +async fn check_metrics_validation_status(datadog_intake_port: u16) -> Result<(), GenericError> { + let client = reqwest::Client::new(); + let response = client + .get(format!( + "http://localhost:{}/metrics/validation_status", + datadog_intake_port + )) + .send() + .await + .error_context("Failed to call metrics validation status endpoint on datadog-intake server.")?; + + let status = response.status(); + let body = response + .text() + .await + .error_context("Failed to read metrics validation status response body.")?; + + if status.is_success() { + debug!("Metrics validation status checked successfully."); + Ok(()) + } else { + Err(generic_error!( + "Metrics validation failed with status {}: {}", + status, + body.trim() + )) + } +} + async fn get_captured_service_checks(datadog_intake_port: u16) -> Result, GenericError> { let client = reqwest::Client::new(); let checks = client diff --git a/bin/correctness/stele/Cargo.toml b/bin/correctness/stele/Cargo.toml index 8b3dfff3eca..07d52a89efe 100644 --- a/bin/correctness/stele/Cargo.toml +++ b/bin/correctness/stele/Cargo.toml @@ -20,4 +20,5 @@ saluki-error = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } serde_with = { workspace = true } +simdutf8 = { workspace = true } stringtheory = { workspace = true } diff --git a/bin/correctness/stele/src/metrics.rs b/bin/correctness/stele/src/metrics.rs index b4b6847281e..9cf7c036b18 100644 --- a/bin/correctness/stele/src/metrics.rs +++ b/bin/correctness/stele/src/metrics.rs @@ -1,6 +1,7 @@ use std::fmt; -use datadog_protos::metrics::{MetricPayload, MetricType, SketchPayload}; +use datadog_protos::metrics::v3::Payload as V3Payload; +use datadog_protos::metrics::{Dogsketch, MetricPayload, MetricType, SketchPayload}; use ddsketch::DDSketch; use float_cmp::ApproxEqRatio as _; use saluki_error::{generic_error, GenericError}; @@ -350,6 +351,355 @@ impl Metric { } } +// V3 metric type constants (from intake_v3.proto metricType enum). +const V3_METRIC_TYPE_COUNT: u64 = 1; +const V3_METRIC_TYPE_RATE: u64 = 2; +const V3_METRIC_TYPE_GAUGE: u64 = 3; +const V3_METRIC_TYPE_SKETCH: u64 = 4; + +// V3 value type constants (from intake_v3.proto valueType enum). +const V3_VALUE_TYPE_ZERO: u64 = 0x00; +const V3_VALUE_TYPE_SINT64: u64 = 0x10; +const V3_VALUE_TYPE_FLOAT32: u64 = 0x20; +const V3_VALUE_TYPE_FLOAT64: u64 = 0x30; + +/// Tracks cursors into the various value arrays of a v3 payload during decoding. +struct V3ValueCursors { + timestamp: usize, + sint64: usize, + float32: usize, + float64: usize, + sketch_point: usize, + sketch_bin_key: usize, + sketch_bin_cnt: usize, +} + +impl V3ValueCursors { + fn new() -> Self { + Self { + timestamp: 0, + sint64: 0, + float32: 0, + float64: 0, + sketch_point: 0, + sketch_bin_key: 0, + sketch_bin_cnt: 0, + } + } +} + +impl Metric { + /// Attempts to parse metrics from a v3 payload. + /// + /// The v3 format uses columnar encoding with dictionary deduplication and delta encoding. + /// + /// # Errors + /// + /// If the payload contains invalid data, an error will be returned. + pub fn try_from_v3(mut payload: V3Payload) -> Result, GenericError> { + let data = payload + .metricData + .take() + .ok_or_else(|| generic_error!("V3 payload missing metricData"))?; + + let num_metrics = data.types.len(); + if num_metrics == 0 { + return Ok(Vec::new()); + } + + // Parse dictionaries. + let names_dict = parse_dict_strings(&data.dictNameStr)?; + let tags_dict = parse_dict_strings(&data.dictTagStr)?; + let tagsets_dict = parse_tagsets(&data.dictTagsets, &tags_dict)?; + + // Delta-decode index arrays. + let mut name_refs = data.nameRefs; + let mut tagset_refs = data.tagsetRefs; + let mut timestamps = data.timestamps; + delta_decode(&mut name_refs); + delta_decode(&mut tagset_refs); + delta_decode(&mut timestamps); + + // Delta-decode sketch bin keys (per-sketch sequences are individually delta-encoded, + // but we handle that during iteration). + let mut sketch_bin_keys = data.sketchBinKeys; + + let mut cursors = V3ValueCursors::new(); + let mut metrics = Vec::with_capacity(num_metrics); + + for i in 0..num_metrics { + let type_field = data.types[i]; + let metric_type = type_field & 0x0F; + let value_type = type_field & 0xF0; + let num_points = data.numPoints[i] as usize; + + // Resolve name (1-based index). + let name_ref = name_refs[i] as usize; + let name = if name_ref == 0 { + String::new() + } else { + names_dict + .get(name_ref - 1) + .ok_or_else(|| generic_error!("Invalid name ref {} (dict size {})", name_ref, names_dict.len()))? + .clone() + }; + + // Resolve tags (1-based index). + let tagset_ref = tagset_refs[i] as usize; + let tags = if tagset_ref == 0 { + Vec::new() + } else { + tagsets_dict + .get(tagset_ref - 1) + .ok_or_else(|| { + generic_error!("Invalid tagset ref {} (dict size {})", tagset_ref, tagsets_dict.len()) + })? + .clone() + }; + + let mut values = Vec::with_capacity(num_points); + + if metric_type == V3_METRIC_TYPE_SKETCH { + for _ in 0..num_points { + // Read timestamp. + let ts = *timestamps + .get(cursors.timestamp) + .ok_or_else(|| generic_error!("Ran out of timestamps"))?; + let timestamp = u64::try_from(ts).map_err(|_| generic_error!("Invalid timestamp: {}", ts))?; + cursors.timestamp += 1; + + // Sketch count is always in valsSint64. + let cnt = *data + .valsSint64 + .get(cursors.sint64) + .ok_or_else(|| generic_error!("Ran out of sint64 values for sketch count"))?; + cursors.sint64 += 1; + + // Sum, min, max are stored as 3 consecutive values based on value_type. + let sum = read_value( + value_type, + &mut cursors, + &data.valsSint64, + &data.valsFloat32, + &data.valsFloat64, + )?; + let min = read_value( + value_type, + &mut cursors, + &data.valsSint64, + &data.valsFloat32, + &data.valsFloat64, + )?; + let max = read_value( + value_type, + &mut cursors, + &data.valsSint64, + &data.valsFloat32, + &data.valsFloat64, + )?; + let avg = if cnt != 0 { sum / cnt as f64 } else { 0.0 }; + + // Read bin data. + let num_bins = *data + .sketchNumBins + .get(cursors.sketch_point) + .ok_or_else(|| generic_error!("Ran out of sketchNumBins"))? + as usize; + cursors.sketch_point += 1; + + let bin_key_start = cursors.sketch_bin_key; + let bin_key_end = bin_key_start + num_bins; + if bin_key_end > sketch_bin_keys.len() { + return Err(generic_error!("Ran out of sketch bin keys")); + } + + // Delta-decode this sketch's bin keys. + delta_decode_i32(&mut sketch_bin_keys[bin_key_start..bin_key_end]); + + let k: Vec = sketch_bin_keys[bin_key_start..bin_key_end].to_vec(); + cursors.sketch_bin_key = bin_key_end; + + let bin_cnt_start = cursors.sketch_bin_cnt; + let bin_cnt_end = bin_cnt_start + num_bins; + if bin_cnt_end > data.sketchBinCnts.len() { + return Err(generic_error!("Ran out of sketch bin counts")); + } + let n: Vec = data.sketchBinCnts[bin_cnt_start..bin_cnt_end].to_vec(); + cursors.sketch_bin_cnt = bin_cnt_end; + + // Build a Dogsketch proto and use the existing TryFrom conversion. + let mut dogsketch = Dogsketch::new(); + dogsketch.ts = ts; + dogsketch.cnt = cnt; + dogsketch.min = min; + dogsketch.max = max; + dogsketch.avg = avg; + dogsketch.sum = sum; + dogsketch.set_k(k); + dogsketch.set_n(n); + + let sketch = DDSketch::try_from(dogsketch) + .map_err(|e| generic_error!("Failed to convert v3 sketch to DDSketch: {}", e))?; + values.push((timestamp, MetricValue::Sketch { sketch })); + } + } else { + for _ in 0..num_points { + // Read timestamp. + let ts = *timestamps + .get(cursors.timestamp) + .ok_or_else(|| generic_error!("Ran out of timestamps"))?; + let timestamp = u64::try_from(ts).map_err(|_| generic_error!("Invalid timestamp: {}", ts))?; + cursors.timestamp += 1; + + // Read point value. + let value = read_value( + value_type, + &mut cursors, + &data.valsSint64, + &data.valsFloat32, + &data.valsFloat64, + )?; + + let metric_value = match metric_type { + V3_METRIC_TYPE_COUNT => MetricValue::Count { value }, + V3_METRIC_TYPE_RATE => MetricValue::Rate { + interval: data.intervals[i], + value, + }, + V3_METRIC_TYPE_GAUGE => MetricValue::Gauge { value }, + other => return Err(generic_error!("Unknown v3 metric type: {}", other)), + }; + + values.push((timestamp, metric_value)); + } + } + + metrics.push(Metric { + context: MetricContext { name, tags }, + values, + }); + } + + Ok(metrics) + } +} + +/// Delta-decode in place: convert deltas to absolute values (prefix sum). +fn delta_decode(s: &mut [i64]) { + for i in 1..s.len() { + s[i] += s[i - 1]; + } +} + +/// Delta-decode i32 values in place. +fn delta_decode_i32(s: &mut [i32]) { + for i in 1..s.len() { + s[i] += s[i - 1]; + } +} + +/// Read a varint from a byte slice, returning `(value, bytes_consumed)`. +fn read_varint(data: &[u8]) -> Result<(u64, usize), GenericError> { + let mut value: u64 = 0; + let mut shift = 0; + for (i, &byte) in data.iter().enumerate() { + value |= ((byte & 0x7F) as u64) << shift; + if byte & 0x80 == 0 { + return Ok((value, i + 1)); + } + shift += 7; + if shift >= 64 { + return Err(generic_error!("Varint too large")); + } + } + Err(generic_error!("Unexpected end of data reading varint")) +} + +/// Parse varint-length-prefixed strings from a byte buffer. +fn parse_dict_strings(data: &[u8]) -> Result, GenericError> { + let mut strings = Vec::new(); + let mut offset = 0; + while offset < data.len() { + let (len, varint_size) = read_varint(&data[offset..])?; + offset += varint_size; + let len = len as usize; + if offset + len > data.len() { + return Err(generic_error!("Dictionary string extends past end of buffer")); + } + let s = simdutf8::basic::from_utf8(&data[offset..offset + len]) + .map_err(|e| generic_error!("Invalid UTF-8 in dictionary string: {}", e))?; + strings.push(s.to_string()); + offset += len; + } + Ok(strings) +} + +/// Parse tagsets from the dictTagsets array using the tag dictionary. +/// +/// Each tagset in `dict_tagsets` is encoded as: length, then that many delta-encoded tag indices. +fn parse_tagsets(dict_tagsets: &[i64], tags_dict: &[String]) -> Result>, GenericError> { + let mut tagsets = Vec::new(); + let mut offset = 0; + while offset < dict_tagsets.len() { + let count = dict_tagsets[offset] as usize; + offset += 1; + if offset + count > dict_tagsets.len() { + return Err(generic_error!("Tagset extends past end of dictTagsets array")); + } + + // Delta-decode the tag indices within this tagset. + let mut tag_indices: Vec = dict_tagsets[offset..offset + count].to_vec(); + delta_decode(&mut tag_indices); + + // Resolve tag indices (1-based) to tag strings. + let mut tags = Vec::with_capacity(count); + for &idx in &tag_indices { + let idx = idx as usize; + if idx == 0 { + continue; + } + let tag = tags_dict + .get(idx - 1) + .ok_or_else(|| generic_error!("Invalid tag index {} (dict size {})", idx, tags_dict.len()))?; + tags.push(tag.clone()); + } + tagsets.push(tags); + offset += count; + } + Ok(tagsets) +} + +/// Read the next f64 value from the appropriate value array based on `value_type`. +fn read_value( + value_type: u64, cursors: &mut V3ValueCursors, vals_sint64: &[i64], vals_float32: &[f32], vals_float64: &[f64], +) -> Result { + match value_type { + V3_VALUE_TYPE_ZERO => Ok(0.0), + V3_VALUE_TYPE_SINT64 => { + let v = *vals_sint64 + .get(cursors.sint64) + .ok_or_else(|| generic_error!("Ran out of sint64 values"))?; + cursors.sint64 += 1; + Ok(v as f64) + } + V3_VALUE_TYPE_FLOAT32 => { + let v = *vals_float32 + .get(cursors.float32) + .ok_or_else(|| generic_error!("Ran out of float32 values"))?; + cursors.float32 += 1; + Ok(v as f64) + } + V3_VALUE_TYPE_FLOAT64 => { + let v = *vals_float64 + .get(cursors.float64) + .ok_or_else(|| generic_error!("Ran out of float64 values"))?; + cursors.float64 += 1; + Ok(v) + } + _ => Err(generic_error!("Unknown v3 value type: {:#x}", value_type)), + } +} + fn approx_eq_ratio_optional(a: Option, b: Option, ratio: f64) -> bool { match (a, b) { (Some(a), Some(b)) => a.approx_eq_ratio(&b, ratio), diff --git a/ci/tooling/update-protos.sh b/ci/tooling/update-protos.sh index 05f687455d9..3315539e05c 100755 --- a/ci/tooling/update-protos.sh +++ b/ci/tooling/update-protos.sh @@ -46,6 +46,7 @@ cp -R ${TMP_DIR}/datadog-agent/pkg/proto/datadog lib/protos/datadog/proto/datado mkdir -p lib/protos/datadog/proto/agent-payload cp ${TMP_DIR}/agent-payload/proto/metrics/agent_payload.proto lib/protos/datadog/proto/agent-payload/ +cp ${TMP_DIR}/agent-payload/proto/metrics/intake_v3.proto lib/protos/datadog/proto/agent-payload/ ## containerd and containerd-related definitions. ## diff --git a/lib/protos/datadog/build.rs b/lib/protos/datadog/build.rs index 4c576e001b7..d77e28dd751 100644 --- a/lib/protos/datadog/build.rs +++ b/lib/protos/datadog/build.rs @@ -88,7 +88,10 @@ fn main() { protobuf_codegen::Codegen::new() .protoc() .includes(["proto", "proto/datadog-agent"]) - .inputs(["proto/agent-payload/agent_payload.proto"]) + .inputs([ + "proto/agent-payload/agent_payload.proto", + "proto/agent-payload/intake_v3.proto", + ]) .cargo_out_dir("protos") .customize(codegen_customize.clone()) .run_from_script(); diff --git a/lib/protos/datadog/proto/agent-payload/README.md b/lib/protos/datadog/proto/agent-payload/README.md index 5260c8e6d9e..5e0e6acc7a8 100644 --- a/lib/protos/datadog/proto/agent-payload/README.md +++ b/lib/protos/datadog/proto/agent-payload/README.md @@ -6,4 +6,4 @@ Agent/Agent Data Plane send telemetry payloads to. ## Source **Repository:** https://github.com/DataDog/agent-payload.git -**Branch / Tag**: v5.0.164 +**Branch / Tag**: v5.0.180 diff --git a/lib/protos/datadog/proto/agent-payload/intake_v3.proto b/lib/protos/datadog/proto/agent-payload/intake_v3.proto new file mode 100644 index 00000000000..5660bb8612a --- /dev/null +++ b/lib/protos/datadog/proto/agent-payload/intake_v3.proto @@ -0,0 +1,79 @@ +syntax = "proto3"; + +package datadoghq.api.metrics.v3; + +message Payload { + reserved 1; // for compatibility with agentpayload.MetricPayload.series + Metadata metadata = 2; + MetricData metricData = 3; +} + +message Metadata { + repeated string tags = 1; + repeated string resources = 2; // even number of elements, [Type, Name] pairs +} + +message MetricData { + // Dictionaries + // All dictionary indexes are base-1, zero implicitly represents an empty value. + bytes dictNameStr = 1; // varint length + value + bytes dictTagStr = 2; // varint length + value + repeated sint64 dictTagsets = 3; // length, delta encoded set of indexes into dictTagsStr + + bytes dictResourceStr = 4; // varint length + value + repeated int64 dictResourceLen = 5; // number of elements in Type and Name arrays + repeated sint64 dictResourceType = 6; // delta encoded set of indexes into dictResourceStr + repeated sint64 dictResourceName = 7; // delta encoded set of indexes into dictResourceStr + + bytes dictSourceTypeName = 8; // varint length + value + repeated int32 dictOriginInfo = 9; // (product, category, service) tuples + bytes dictUnitStr = 25; // varint length + value + + // One entry per time series + repeated uint64 types = 10; // type = metricType | valueType | metricFlags + repeated sint64 nameRefs = 11; // index into dictNameStr, entire array is delta encoded + repeated sint64 tagsetRefs = 12; // index into dictTagsets, entire array is delta encoded + repeated sint64 resourcesRefs = 13; // index into dictResourceLen, entire array is delta encoded + repeated uint64 intervals = 14; + repeated uint64 numPoints = 15; + repeated sint64 sourceTypeNameRefs = 23; // index into dictSourceTypeName, entire array is delta encoded + repeated sint64 originInfoRefs = 24; // index into dictOriginInfo, entire array is delta encoded + repeated sint64 unitRefs = 26; // index into dictUnitStr, value present if flagHasUnit is set, entire array is delta encoded + + // each metric has numPoints values in this section + repeated sint64 timestamps = 16; // entire array delta encoded + repeated sint64 valsSint64 = 17; // or + repeated float valsFloat32 = 18; // or + repeated double valsFloat64 = 19; // based on valueType + repeated uint64 sketchNumBins = 20; + repeated sint32 sketchBinKeys = 21; // per-metric sequence is delta encoded + repeated uint32 sketchBinCnts = 22; + // sketch summary Sum, Min, Max are encoded as three consecutive elements in one of vals using valueType + // sketch summary Cnt is always encoded in valInt64 + // sketch summary Avg is reconstructed as Sum/Cnt in the intake +} + +enum metricType { + UNUSED = 0; + Count = 1; + Rate = 2; + Gauge = 3; + Sketch = 4; +} + +enum valueType { + Zero = 0x00; // value is zero, not stored explicitly + Sint64 = 0x10; // value is stored in valsSint64 + Float32 = 0x20; // value is stored in valsFloat32 + Float64 = 0x30; // value is stored in valsFloat64 +} + +enum metricFlags { + flagNone = 0; + flagNoIndex = 0x100; // metric should not be indexed (equivalent to origin metric type == agent_hidden in v2) + flagHasUnit = 0x200; // timeseries has a unit in the unitRefs column +} + +message Response { + string error = 1; +} diff --git a/lib/protos/datadog/src/lib.rs b/lib/protos/datadog/src/lib.rs index 6a5a17272a7..92a4094201d 100644 --- a/lib/protos/datadog/src/lib.rs +++ b/lib/protos/datadog/src/lib.rs @@ -38,6 +38,11 @@ pub mod metrics { pub use super::include::agent_payload::metric_payload::*; pub use super::include::agent_payload::sketch_payload::{sketch::*, Sketch}; pub use super::include::agent_payload::*; + + /// Metrics V3 API-related definitions. + pub mod v3 { + pub use super::super::include::intake_v3::*; + } } /// Event-related definitions. diff --git a/lib/saluki-components/Cargo.toml b/lib/saluki-components/Cargo.toml index 2f384e6e02c..2c6c371bf60 100644 --- a/lib/saluki-components/Cargo.toml +++ b/lib/saluki-components/Cargo.toml @@ -83,6 +83,7 @@ tracing = { workspace = true } tracing-appender = { workspace = true } tracing-rolling-file = { workspace = true } url = { workspace = true } +uuid = { workspace = true, features = ["std", "v7"] } zstd = { workspace = true } [dev-dependencies] diff --git a/lib/saluki-components/etc/ignored_keys.yaml b/lib/saluki-components/etc/ignored_keys.yaml index afa8981c6a9..c2bf9e661fb 100644 --- a/lib/saluki-components/etc/ignored_keys.yaml +++ b/lib/saluki-components/etc/ignored_keys.yaml @@ -3074,10 +3074,6 @@ reason: initial bulk - name: security_agent.log_file reason: initial bulk -- name: serializer_experimental_use_v3_api.series.beta_route - reason: initial bulk -- name: serializer_experimental_use_v3_api.series.use_beta - reason: initial bulk - name: server_timeout reason: initial bulk - name: serverless.enabled diff --git a/lib/saluki-components/src/common/datadog/config.rs b/lib/saluki-components/src/common/datadog/config.rs index ac9a94a2802..5e0c6cadf5c 100644 --- a/lib/saluki-components/src/common/datadog/config.rs +++ b/lib/saluki-components/src/common/datadog/config.rs @@ -9,6 +9,7 @@ use tracing::warn; use super::{ endpoints::{EndpointConfiguration, EndpointRoute, RoutableEndpoint}, + protocol::V3ApiConfig, proxy::ProxyConfiguration, retry::RetryConfiguration, }; @@ -203,6 +204,13 @@ pub struct ForwarderConfiguration { )] connection_reset_interval_secs: u64, + /// V3 API configuration for per-endpoint V3 support. + /// + /// This is read from the encoder configuration and used by the I/O layer to filter payloads + /// based on endpoint URL matching. + #[serde(rename = "serializer_experimental_use_v3_api", default)] + v3_api: V3ApiConfig, + /// Whether to disable TLS certificate validation for Datadog intake forwarding. /// /// Defaults to `false`. If set to `true`, HTTPS clients built for the shared Datadog forwarder accept invalid @@ -341,6 +349,11 @@ impl ForwarderConfiguration { Duration::from_secs(self.connection_reset_interval_secs) } + /// Returns a reference to the V3 API configuration. + pub fn v3_api(&self) -> &V3ApiConfig { + &self.v3_api + } + /// Returns whether TLS certificate validation is disabled for Datadog intake forwarding. pub const fn skip_ssl_validation(&self) -> bool { self.skip_ssl_validation @@ -875,7 +888,10 @@ mod config_smoke { // config load in the smoke test has a valid starting point. run_config_smoke_tests( structs::FORWARDER_CONFIGURATION, - &[], + &[ + "serializer_experimental_use_v3_api.sketches.beta_route", + "serializer_experimental_use_v3_api.sketches.use_beta", + ], json!({ "api_key": "smoke-test-api-key" }), |cfg| ForwarderConfiguration::from_configuration(&cfg).expect("ForwarderConfiguration should deserialize"), ) diff --git a/lib/saluki-components/src/common/datadog/endpoints.rs b/lib/saluki-components/src/common/datadog/endpoints.rs index dd6c376ccea..b52ef8718da 100644 --- a/lib/saluki-components/src/common/datadog/endpoints.rs +++ b/lib/saluki-components/src/common/datadog/endpoints.rs @@ -16,6 +16,8 @@ use snafu::{ResultExt, Snafu}; use tracing::debug; use url::Url; +use super::protocol::{MetricsPayloadInfo, MetricsProtocolVersion}; + static DD_URL_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"^app(\.mrf)?(\.[a-z]{2}\d)?\.(datad(oghq|0g)\.(com|eu)|ddog-gov\.com)$").unwrap()); @@ -25,6 +27,104 @@ fn default_site() -> String { DEFAULT_SITE.to_owned() } +/// Per-endpoint V3 protocol settings. +/// +/// These settings control which protocol versions an endpoint will accept for metrics payloads. +/// Settings are derived from a global `V3ApiConfig` by matching the endpoint URL against the +/// configured V3 endpoint lists. +#[derive(Clone, Debug, Default)] +pub struct EndpointV3Settings { + /// Whether this endpoint accepts V3 series payloads. + pub use_v3_series: bool, + + /// Whether this endpoint accepts V3 sketches payloads. + pub use_v3_sketches: bool, + + /// Whether validation mode is enabled for series (send both V2 and V3). + pub series_validation_mode: bool, + + /// Whether validation mode is enabled for sketches (send both V2 and V3). + pub sketches_validation_mode: bool, +} + +impl EndpointV3Settings { + /// Creates V3 settings for a specific endpoint based on URL matching. + /// + /// The `v3_series_endpoints` and `v3_sketches_endpoints` are lists of configured endpoint names. + /// If the endpoint name matches any entry, V3 is enabled for that metric type. + pub fn from_endpoint_url( + configured_endpoint: &str, v3_series_endpoints: &[String], v3_sketches_endpoints: &[String], + series_validate: bool, sketches_validate: bool, + ) -> Self { + let use_v3_series = v3_series_endpoints.iter().any(|e| configured_endpoint == e); + let use_v3_sketches = v3_sketches_endpoints.iter().any(|e| configured_endpoint == e); + + Self { + use_v3_series, + use_v3_sketches, + series_validation_mode: use_v3_series && series_validate, + sketches_validation_mode: use_v3_sketches && sketches_validate, + } + } + + /// Determines if this endpoint should receive a payload with the given payload info. + /// + /// Returns `true` if the endpoint should receive the payload, `false` otherwise. + /// + /// The logic is: + /// - V2 series payload: accept if series V3 is disabled OR series validation mode is enabled + /// - V2 sketches payload: accept if sketches V3 is disabled OR sketches validation mode is enabled + /// - V3 series payload: accept if series V3 is enabled + /// - V3 sketches payload: accept if sketches V3 is enabled + /// - Non-metrics payloads (None): always accept + pub fn should_receive_payload(&self, payload_info: Option) -> bool { + let Some(info) = payload_info else { + // No payload info - this is a non-metrics payload or legacy payload, always accept. + return true; + }; + + let is_sketch = info.is_sketch(); + + match info.version { + MetricsProtocolVersion::V2 => { + if is_sketch { + // V2 sketches: accept if V3 sketches is disabled OR validation mode is enabled + !self.use_v3_sketches || self.sketches_validation_mode + } else { + // V2 series: accept if V3 series is disabled OR validation mode is enabled + !self.use_v3_series || self.series_validation_mode + } + } + + MetricsProtocolVersion::V3 => { + if is_sketch { + // V3 sketches: accept if V3 sketches is enabled + self.use_v3_sketches + } else { + // V3 series: accept if V3 series is enabled + self.use_v3_series + } + } + } + } + + /// Determines if this endpoint should receive metrics validation headers. + /// + /// Validation headers are endpoint-scoped: they should only be sent to endpoints that are + /// receiving both V2 and V3 payloads for the payload's metric family. + pub fn should_receive_validation_headers(&self, payload_info: Option) -> bool { + let Some(info) = payload_info else { + return false; + }; + + if info.is_sketch() { + self.sketches_validation_mode + } else { + self.series_validation_mode + } + } +} + /// Error type for invalid endpoints. #[derive(Debug, Snafu)] #[snafu(context(suffix(false)))] @@ -105,6 +205,7 @@ impl AdditionalEndpoints { seen.insert(trimmed_api_key); resolved.push(ResolvedEndpoint { endpoint: endpoint.clone(), + configured_endpoint: raw_endpoint.to_string(), api_key: trimmed_api_key.to_string(), config: configuration.clone(), api_key_index: Some(index), @@ -215,6 +316,7 @@ impl EndpointConfiguration { #[derive(Clone, Debug)] pub struct ResolvedEndpoint { endpoint: Url, + configured_endpoint: String, api_key: String, config: Option, /// Position of this key in the `additional_endpoints` config key list for its URL (raw @@ -286,6 +388,7 @@ impl ResolvedEndpoint { let traces_authority = compute_traces_authority(&endpoint); Ok(Self { endpoint, + configured_endpoint: raw_endpoint.to_string(), api_key: api_key.to_string(), config: None, api_key_index: None, @@ -299,6 +402,7 @@ impl ResolvedEndpoint { pub fn with_configuration(self, config: Option) -> Self { Self { endpoint: self.endpoint, + configured_endpoint: self.configured_endpoint, api_key: self.api_key, config, api_key_index: self.api_key_index, @@ -313,6 +417,13 @@ impl ResolvedEndpoint { &self.endpoint } + /// Returns the endpoint string as it was provided by configuration. + /// + /// Unlike [`ResolvedEndpoint::endpoint`], this is not rewritten with the data plane version prefix. + pub fn configured_endpoint(&self) -> &str { + &self.configured_endpoint + } + /// Returns the API key associated with the endpoint. /// /// If a [`GenericConfiguration`] has been configured, the API key will be queried from the configuration and @@ -396,15 +507,19 @@ impl ResolvedEndpoint { } } +fn endpoint_with_default_scheme(raw_endpoint: &str) -> String { + if !raw_endpoint.starts_with("http://") && !raw_endpoint.starts_with("https://") { + format!("https://{}", raw_endpoint) + } else { + raw_endpoint.to_string() + } +} + fn parse_and_normalize_endpoint(raw_endpoint: &str) -> Result { // Start out by parsing the given domain/endpoint, which means ensuring first that it has a scheme. // // If no scheme is present, we assume HTTPS. - let raw_endpoint = if !raw_endpoint.starts_with("http://") && !raw_endpoint.starts_with("https://") { - format!("https://{}", raw_endpoint) - } else { - raw_endpoint.to_string() - }; + let raw_endpoint = endpoint_with_default_scheme(raw_endpoint); let endpoint = Url::parse(&raw_endpoint).context(Parse { endpoint: raw_endpoint })?; @@ -490,7 +605,7 @@ fn calculate_resolved_endpoint( // // We also do a little bit of prefixing to get it in the right shape before creating the resolved endpoint. let base_domain = if site.is_empty() { DEFAULT_SITE } else { site }; - format!("app.{}", base_domain) + format!("https://app.{}", base_domain) } }; @@ -806,4 +921,71 @@ mod tests { .expect("error calculating override API endpoint"); assert_eq!(expected_endpoint, resolved.endpoint().to_string()); } + + #[test] + fn validation_headers_are_scoped_to_payload_family() { + let settings = EndpointV3Settings { + use_v3_series: true, + use_v3_sketches: false, + series_validation_mode: true, + sketches_validation_mode: false, + }; + + assert!(settings.should_receive_validation_headers(Some(MetricsPayloadInfo::v2_series()))); + assert!(settings.should_receive_validation_headers(Some(MetricsPayloadInfo::v3_series()))); + assert!(!settings.should_receive_validation_headers(Some(MetricsPayloadInfo::v2_sketches()))); + assert!(!settings.should_receive_validation_headers(Some(MetricsPayloadInfo::v3_sketches()))); + assert!(!settings.should_receive_validation_headers(None)); + } + + #[test] + fn v3_endpoint_matching_uses_configured_endpoint_before_version_prefix() { + let resolved = ResolvedEndpoint::from_raw_endpoint("https://app.datadoghq.com", "fake-api-key") + .expect("endpoint should resolve"); + + assert_eq!("https://app.datadoghq.com", resolved.configured_endpoint()); + assert_ne!("app.datadoghq.com", resolved.endpoint().host_str().unwrap()); + + let v3_series_endpoints = vec!["https://app.datadoghq.com".to_string()]; + let settings = EndpointV3Settings::from_endpoint_url( + resolved.configured_endpoint(), + &v3_series_endpoints, + &[], + false, + false, + ); + + assert!(settings.use_v3_series); + } + + #[test] + fn v3_endpoint_matching_is_endpoint_based() { + let v3_series_endpoints = vec!["https://app.us".to_string()]; + let settings = EndpointV3Settings::from_endpoint_url( + "https://app.us5.datadoghq.com", + &v3_series_endpoints, + &[], + false, + false, + ); + + assert!(!settings.use_v3_series); + } + + #[test] + fn v3_endpoint_matching_requires_exact_configured_endpoint() { + let v3_series_endpoints = vec!["app.datadoghq.com/".to_string()]; + let settings = + EndpointV3Settings::from_endpoint_url("https://app.datadoghq.com", &v3_series_endpoints, &[], false, false); + + assert!(!settings.use_v3_series); + } + + #[test] + fn calculated_site_endpoint_uses_agent_configured_endpoint_shape() { + let resolved = + calculate_resolved_endpoint(None, "datadoghq.com", "").expect("error calculating default API endpoint"); + + assert_eq!("https://app.datadoghq.com", resolved.configured_endpoint()); + } } diff --git a/lib/saluki-components/src/common/datadog/io.rs b/lib/saluki-components/src/common/datadog/io.rs index 231d0861e18..7139529c032 100644 --- a/lib/saluki-components/src/common/datadog/io.rs +++ b/lib/saluki-components/src/common/datadog/io.rs @@ -36,7 +36,7 @@ use tracing::{debug, error, warn}; use super::{ config::ForwarderConfiguration, - endpoints::{EndpointRoute, ResolvedEndpoint, RoutableEndpoint}, + endpoints::{EndpointRoute, EndpointV3Settings, ResolvedEndpoint, RoutableEndpoint}, middleware::{for_resolved_endpoint, with_allow_arbitrary_tags, with_version_info}, telemetry::{ComponentTelemetry, SharedTransactionQueueTelemetry, TransactionQueueTelemetry}, transaction::{Metadata, Transaction, TransactionBody}, @@ -288,7 +288,8 @@ async fn run_io_loop( // Listen for transactions to forward, and send a copy of each one to the matching endpoint I/O tasks. while let Some(transaction) = transactions_rx.recv().await { - let is_metrics_request = is_metrics_request_uri(transaction.request_uri()); + let is_metrics_request = + is_metrics_request_uri(transaction.request_uri(), config.v3_api().series.beta_route.as_str()); for endpoint_sender in &endpoint_txs { if !should_route_to_endpoint(is_metrics_request, has_metrics_primary, endpoint_sender.route) { continue; @@ -332,8 +333,8 @@ where tx: mpsc::Sender>, } -fn is_metrics_request_uri(uri: &Uri) -> bool { - METRIC_INTAKE_PATHS.contains(&uri.path()) +fn is_metrics_request_uri(uri: &Uri, v3_beta_series_route: &str) -> bool { + METRIC_INTAKE_PATHS.contains(&uri.path()) || uri.path() == v3_beta_series_route } fn should_route_to_endpoint(is_metrics_request: bool, has_metrics_primary: bool, route: EndpointRoute) -> bool { @@ -360,10 +361,23 @@ async fn run_endpoint_io_loop( { let queue_id = generate_retry_queue_id(context, &endpoint); let endpoint_url = endpoint.endpoint().to_string(); + let configured_endpoint = endpoint.configured_endpoint().to_string(); let endpoint_domain = endpoint.endpoint().origin().ascii_serialization(); + + // Match against the endpoint string from configuration, not the version-prefixed URL used for requests. + let v3_api = config.v3_api(); + let endpoint_v3_settings = EndpointV3Settings::from_endpoint_url( + &configured_endpoint, + &v3_api.series.endpoints, + &v3_api.sketches.endpoints, + v3_api.series.validate, + v3_api.sketches.validate, + ); debug!( endpoint_url, + configured_endpoint, num_workers = config.endpoint_concurrency(), + ?endpoint_v3_settings, "Starting endpoint I/O task." ); @@ -417,9 +431,27 @@ async fn run_endpoint_io_loop( select! { // Try and drain the next transaction from our channel, and push it into the pending transactions queue. maybe_txn = txns_rx.recv(), if !done => match maybe_txn { - Some(txn) => match pending_txns.push_high_priority(txn).await { - Ok(push_result) => track_queue_drops(&telemetry, &endpoint_domain, push_result), - Err(e) => error!(endpoint_url, error = %e, "Failed to enqueue transaction. Events may be permanently lost."), + Some(txn) => { + // Filter transactions based on endpoint's V3 settings and the transaction's payload info. + let payload_info = txn.metadata().payload_info; + if !endpoint_v3_settings.should_receive_payload(payload_info) { + debug!( + endpoint_url, + ?payload_info, + "Filtering out transaction based on endpoint V3 settings." + ); + continue; + } + let txn = if endpoint_v3_settings.should_receive_validation_headers(payload_info) { + txn + } else { + strip_metrics_validation_headers(txn) + }; + + match pending_txns.push_high_priority(txn).await { + Ok(push_result) => track_queue_drops(&telemetry, &endpoint_domain, push_result), + Err(e) => error!(endpoint_url, error = %e, "Failed to enqueue transaction. Events may be permanently lost."), + } }, None => { // Our transactions channel has been closed, so mark ourselves as done which will stop any further @@ -512,6 +544,18 @@ async fn run_endpoint_io_loop( task_barrier.wait().await; } +fn strip_metrics_validation_headers(txn: Transaction) -> Transaction +where + B: Buf + Clone, +{ + let (metadata, mut request) = txn.into_parts(); + let headers = request.headers_mut(); + headers.remove("X-Metrics-Request-ID"); + headers.remove("X-Metrics-Request-Seq"); + headers.remove("X-Metrics-Request-Len"); + Transaction::reassemble(metadata, request) +} + fn generate_retry_queue_id(context: ComponentContext, endpoint: &ResolvedEndpoint) -> String { // TODO: This logic does not take into account cases where the API key is updated dynamically. While a running // process would just keep using the existing retry queue, based on the queue ID we generate here... the next time @@ -845,23 +889,45 @@ mod tests { use super::*; use crate::common::datadog::transaction::{Metadata as TxnMetadata, Transaction}; - use crate::common::datadog::{METRICS_SERIES_V1_PATH, METRICS_SERIES_V2_PATH, METRICS_SKETCHES_PATH}; + use crate::common::datadog::{ + METRICS_SERIES_V1_PATH, METRICS_SERIES_V2_PATH, METRICS_SERIES_V3_BETA_PATH, METRICS_SERIES_V3_PATH, + METRICS_SKETCHES_PATH, METRICS_SKETCHES_V3_PATH, + }; fn uri(path: &'static str) -> Uri { Uri::from_static(path) } + fn is_metrics_request_path(path: &'static str) -> bool { + is_metrics_request_uri(&uri(path), METRICS_SERIES_V3_BETA_PATH) + } + fn forwarder_config_from_value(value: serde_json::Value) -> ForwarderConfiguration { serde_json::from_value(value).expect("ForwarderConfiguration should deserialize") } #[test] fn identifies_metrics_request_paths() { - assert!(is_metrics_request_uri(&uri(METRICS_SERIES_V1_PATH))); - assert!(is_metrics_request_uri(&uri(METRICS_SERIES_V2_PATH))); - assert!(is_metrics_request_uri(&uri(METRICS_SKETCHES_PATH))); - assert!(!is_metrics_request_uri(&uri("/api/v2/logs"))); - assert!(!is_metrics_request_uri(&uri("/api/v0.2/traces"))); + assert!(is_metrics_request_path(METRICS_SERIES_V1_PATH)); + assert!(is_metrics_request_path(METRICS_SERIES_V2_PATH)); + assert!(is_metrics_request_path(METRICS_SERIES_V3_PATH)); + assert!(is_metrics_request_path(METRICS_SERIES_V3_BETA_PATH)); + assert!(is_metrics_request_path(METRICS_SKETCHES_PATH)); + assert!(is_metrics_request_path(METRICS_SKETCHES_V3_PATH)); + assert!(!is_metrics_request_path("/api/v2/logs")); + assert!(!is_metrics_request_path("/api/v0.2/traces")); + } + + #[test] + fn identifies_configured_v3_beta_series_route_as_metrics_path() { + assert!(is_metrics_request_uri( + &uri("/custom/v3beta/series"), + "/custom/v3beta/series" + )); + assert!(!is_metrics_request_uri( + &uri("/custom/v3beta/series"), + METRICS_SERIES_V3_BETA_PATH + )); } #[test] diff --git a/lib/saluki-components/src/common/datadog/mod.rs b/lib/saluki-components/src/common/datadog/mod.rs index 69f9adf5ef0..ced0dc8049a 100644 --- a/lib/saluki-components/src/common/datadog/mod.rs +++ b/lib/saluki-components/src/common/datadog/mod.rs @@ -4,6 +4,7 @@ pub mod endpoints; pub mod io; pub mod middleware; pub mod obfuscation; +pub mod protocol; mod proxy; pub mod request_builder; mod retry; @@ -45,14 +46,29 @@ pub(crate) const METRICS_SERIES_V1_PATH: &str = "/api/v1/series"; /// V2 metric series intake path. pub(crate) const METRICS_SERIES_V2_PATH: &str = "/api/v2/series"; +/// V3 metric series intake path. +pub(crate) const METRICS_SERIES_V3_PATH: &str = "/api/intake/metrics/v3/series"; + +/// V3 beta metric series intake path. +pub(crate) const METRICS_SERIES_V3_BETA_PATH: &str = "/api/intake/metrics/v3beta/series"; + /// Metric sketches intake path. pub(crate) const METRICS_SKETCHES_PATH: &str = "/api/beta/sketches"; +/// V3 metric sketches intake path. +pub(crate) const METRICS_SKETCHES_V3_PATH: &str = "/api/intake/metrics/v3/sketches"; + /// Metric intake paths emitted by the encoder and matched by OPW routing. /// /// Keep these paths in one place so metric encoding and OPW routing don't drift. -pub(crate) const METRIC_INTAKE_PATHS: [&str; 3] = - [METRICS_SERIES_V1_PATH, METRICS_SERIES_V2_PATH, METRICS_SKETCHES_PATH]; +pub(crate) const METRIC_INTAKE_PATHS: [&str; 6] = [ + METRICS_SERIES_V1_PATH, + METRICS_SERIES_V2_PATH, + METRICS_SERIES_V3_PATH, + METRICS_SERIES_V3_BETA_PATH, + METRICS_SKETCHES_PATH, + METRICS_SKETCHES_V3_PATH, +]; /// Metadata tag used to store the sampling decision maker (`_dd.p.dm`). pub const TAG_DECISION_MAKER: &str = "_dd.p.dm"; diff --git a/lib/saluki-components/src/common/datadog/protocol.rs b/lib/saluki-components/src/common/datadog/protocol.rs new file mode 100644 index 00000000000..394f78ceaa7 --- /dev/null +++ b/lib/saluki-components/src/common/datadog/protocol.rs @@ -0,0 +1,170 @@ +//! Protocol version types for Datadog payloads. + +use facet::Facet; +use serde::{Deserialize, Serialize}; + +use super::METRICS_SERIES_V3_BETA_PATH; + +fn default_v3_beta_series_route() -> String { + METRICS_SERIES_V3_BETA_PATH.to_owned() +} + +/// The type of metrics payload. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum MetricsPayloadType { + /// Series metrics (counters, gauges, rates, sets). + Series, + + /// Sketch metrics (histograms, distributions). + Sketches, +} + +/// Protocol version for metrics payloads. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum MetricsProtocolVersion { + /// V2 protocol (legacy format). + V2, + + /// V3 protocol (columnar format). + V3, +} + +/// Combined payload info for metrics, encoding both protocol version and metric type. +/// +/// This is stored in `PayloadMetadata` and used by the I/O layer to filter payloads +/// based on endpoint V3 settings. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct MetricsPayloadInfo { + /// The protocol version (V2 or V3). + pub version: MetricsProtocolVersion, + + /// The type of metrics (series or sketches). + pub payload_type: MetricsPayloadType, +} + +impl MetricsPayloadInfo { + /// Creates a new V2 series payload info. + pub const fn v2_series() -> Self { + Self { + version: MetricsProtocolVersion::V2, + payload_type: MetricsPayloadType::Series, + } + } + + /// Creates a new V2 sketches payload info. + pub const fn v2_sketches() -> Self { + Self { + version: MetricsProtocolVersion::V2, + payload_type: MetricsPayloadType::Sketches, + } + } + + /// Creates a new V3 series payload info. + pub const fn v3_series() -> Self { + Self { + version: MetricsProtocolVersion::V3, + payload_type: MetricsPayloadType::Series, + } + } + + /// Creates a new V3 sketches payload info. + pub const fn v3_sketches() -> Self { + Self { + version: MetricsProtocolVersion::V3, + payload_type: MetricsPayloadType::Sketches, + } + } + + /// Returns true if this is a sketch payload. + pub const fn is_sketch(&self) -> bool { + matches!(self.payload_type, MetricsPayloadType::Sketches) + } +} + +/// V3 API settings for a specific metric type (series or sketches). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Facet)] +pub struct V3ApiSettings { + /// Endpoints that should receive V3 payloads for this metric type. + /// + /// Each entry should be a configured endpoint name, such as `https://app.datadoghq.com`. + /// If empty, no V3 payloads are generated for this metric type. + #[serde(default)] + pub endpoints: Vec, + + /// Whether to also send V2 payloads to V3-enabled endpoints (validation mode). + /// + /// When true, endpoints in the `endpoints` list receive both V2 and V3 payloads. + /// When false, endpoints in the `endpoints` list receive only V3 payloads. + #[serde(default)] + pub validate: bool, + + /// Whether to use the beta V3 route for this metric type. + /// + /// This only applies to series metrics. Sketches always use the standard V3 sketches route. + #[serde(default)] + pub use_beta: bool, + + /// Beta V3 route to use when `use_beta` is enabled for series metrics. + /// + /// Defaults to `/api/intake/metrics/v3beta/series`. + #[serde(default = "default_v3_beta_series_route")] + pub beta_route: String, +} + +impl Default for V3ApiSettings { + fn default() -> Self { + Self { + endpoints: Vec::new(), + validate: false, + use_beta: false, + beta_route: default_v3_beta_series_route(), + } + } +} + +impl V3ApiSettings { + /// Returns true if V3 is enabled for any endpoint. + pub fn is_enabled(&self) -> bool { + !self.endpoints.is_empty() + } +} + +/// V3 API configuration for per-endpoint V3 support. +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, Facet)] +pub struct V3ApiConfig { + /// V3 settings for series metrics (counters, gauges, rates, sets). + #[serde(default)] + pub series: V3ApiSettings, + + /// V3 settings for sketch metrics (histograms, distributions). + #[serde(default)] + pub sketches: V3ApiSettings, + + /// Override compression level for V3 payloads. + /// + /// Defaults to `0`, which uses the normal serializer compression level. + #[serde(default)] + pub compression_level: i32, +} + +impl V3ApiConfig { + /// Returns true if V3 is enabled for series metrics. + pub fn use_v3_series(&self) -> bool { + self.series.is_enabled() + } + + /// Returns true if V3 is enabled for sketch metrics. + pub fn use_v3_sketches(&self) -> bool { + self.sketches.is_enabled() + } + + /// Returns true if validation mode is enabled for series metrics. + pub fn use_v3_series_validate(&self) -> bool { + self.series.is_enabled() && self.series.validate + } + + /// Returns true if validation mode is enabled for sketch metrics. + pub fn use_v3_sketches_validate(&self) -> bool { + self.sketches.is_enabled() && self.sketches.validate + } +} diff --git a/lib/saluki-components/src/common/datadog/transaction.rs b/lib/saluki-components/src/common/datadog/transaction.rs index 5c946b0b9a5..e2258aeccb6 100644 --- a/lib/saluki-components/src/common/datadog/transaction.rs +++ b/lib/saluki-components/src/common/datadog/transaction.rs @@ -10,6 +10,8 @@ use pin_project::pin_project; use saluki_io::net::util::retry::{EventContainer, Retryable}; use serde::{ser::SerializeSeq as _, Deserialize, Serialize, Serializer}; +use super::protocol::MetricsPayloadInfo; + /// Data type for the body of `TransactionBody`. pub enum TransactionBodyData where @@ -178,6 +180,12 @@ pub struct Metadata { /// Number of metric data points represented by this transaction. #[serde(default)] pub data_point_count: usize, + + /// Payload info containing protocol version and metric type, if applicable. + /// + /// This is `Some` for metrics payloads and `None` for non-metrics payloads. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub payload_info: Option, } impl Metadata { @@ -186,6 +194,7 @@ impl Metadata { Self { event_count, data_point_count, + payload_info: None, } } } diff --git a/lib/saluki-components/src/config_registry/datadog/encoders.rs b/lib/saluki-components/src/config_registry/datadog/encoders.rs index db59dd4259c..3c05967bd4a 100644 --- a/lib/saluki-components/src/config_registry/datadog/encoders.rs +++ b/lib/saluki-components/src/config_registry/datadog/encoders.rs @@ -111,6 +111,105 @@ crate::declare_annotations! { test_json: None, }; + /// `serializer_experimental_use_v3_api.compression_level`—V3 API compression level. + /// Schema declares Float; field is i32. + SERIALIZER_EXPERIMENTAL_USE_V3_API_COMPRESSION_LEVEL = SalukiAnnotation { + schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_COMPRESSION_LEVEL, + support_level: SupportLevel::Full, + additional_yaml_paths: &[], + env_var_override: None, + used_by: &[ + structs::DATADOG_METRICS_CONFIGURATION, + structs::FORWARDER_CONFIGURATION, + ], + value_type_override: Some(ValueType::Integer), + test_json: None, + }; + + /// `serializer_experimental_use_v3_api.series.beta_route`—V3 beta intake route path for series. + SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_BETA_ROUTE = SalukiAnnotation { + schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_BETA_ROUTE, + support_level: SupportLevel::Full, + additional_yaml_paths: &[], + env_var_override: None, + used_by: &[ + structs::DATADOG_METRICS_CONFIGURATION, + structs::FORWARDER_CONFIGURATION, + ], + value_type_override: None, + test_json: None, + }; + + /// `serializer_experimental_use_v3_api.series.endpoints`—endpoints enabled for V3 series API. + SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_ENDPOINTS = SalukiAnnotation { + schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_ENDPOINTS, + support_level: SupportLevel::Full, + additional_yaml_paths: &[], + env_var_override: None, + used_by: &[ + structs::DATADOG_METRICS_CONFIGURATION, + structs::FORWARDER_CONFIGURATION, + ], + value_type_override: None, + test_json: None, + }; + + /// `serializer_experimental_use_v3_api.series.use_beta`—use the V3 beta route for series. + SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_USE_BETA = SalukiAnnotation { + schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_USE_BETA, + support_level: SupportLevel::Full, + additional_yaml_paths: &[], + env_var_override: None, + used_by: &[ + structs::DATADOG_METRICS_CONFIGURATION, + structs::FORWARDER_CONFIGURATION, + ], + value_type_override: None, + test_json: None, + }; + + /// `serializer_experimental_use_v3_api.series.validate`—dual-send V2 and V3 series for validation. + SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_VALIDATE = SalukiAnnotation { + schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_VALIDATE, + support_level: SupportLevel::Full, + additional_yaml_paths: &[], + env_var_override: None, + used_by: &[ + structs::DATADOG_METRICS_CONFIGURATION, + structs::FORWARDER_CONFIGURATION, + ], + value_type_override: None, + test_json: None, + }; + + /// `serializer_experimental_use_v3_api.sketches.endpoints`—endpoints enabled for V3 sketches API. + SERIALIZER_EXPERIMENTAL_USE_V3_API_SKETCHES_ENDPOINTS = SalukiAnnotation { + schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SKETCHES_ENDPOINTS, + support_level: SupportLevel::Full, + additional_yaml_paths: &[], + env_var_override: None, + used_by: &[ + structs::DATADOG_METRICS_CONFIGURATION, + structs::FORWARDER_CONFIGURATION, + ], + value_type_override: None, + test_json: None, + }; + + /// `serializer_experimental_use_v3_api.sketches.validate`—dual-send V2 and V3 sketches for validation. + SERIALIZER_EXPERIMENTAL_USE_V3_API_SKETCHES_VALIDATE = SalukiAnnotation { + schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SKETCHES_VALIDATE, + support_level: SupportLevel::Full, + additional_yaml_paths: &[], + env_var_override: None, + used_by: &[ + structs::DATADOG_METRICS_CONFIGURATION, + structs::FORWARDER_CONFIGURATION, + ], + value_type_override: None, + test_json: None, + }; + /// `serializer_max_series_payload_size`—max compressed V2 series payload size. SERIALIZER_MAX_SERIES_PAYLOAD_SIZE = SalukiAnnotation { schema: &schema::SERIALIZER_MAX_SERIES_PAYLOAD_SIZE, diff --git a/lib/saluki-components/src/config_registry/datadog/unsupported.rs b/lib/saluki-components/src/config_registry/datadog/unsupported.rs index 0c20724a4e4..0a6d48d97d8 100644 --- a/lib/saluki-components/src/config_registry/datadog/unsupported.rs +++ b/lib/saluki-components/src/config_registry/datadog/unsupported.rs @@ -170,66 +170,6 @@ crate::declare_annotations! { test_json: None, }; - /// `serializer_experimental_use_v3_api.compression_level` - V3 API compression level. - SERIALIZER_EXPERIMENTAL_USE_V3_API_COMPRESSION_LEVEL = SalukiAnnotation { - schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_COMPRESSION_LEVEL, - // V3 metrics API not implemented. #1468 - support_level: SupportLevel::Incompatible(Severity::Low), - additional_yaml_paths: &[], - env_var_override: None, - used_by: &[], - value_type_override: None, - test_json: None, - }; - - /// `serializer_experimental_use_v3_api.series.endpoints` - V3 API series endpoints. - SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_ENDPOINTS = SalukiAnnotation { - schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_ENDPOINTS, - // V3 metrics API not implemented. #1468 - support_level: SupportLevel::Incompatible(Severity::Low), - additional_yaml_paths: &[], - env_var_override: None, - used_by: &[], - value_type_override: None, - test_json: None, - }; - - /// `serializer_experimental_use_v3_api.series.validate` - V3 API series validation. - SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_VALIDATE = SalukiAnnotation { - schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SERIES_VALIDATE, - // V3 metrics API not implemented. #1468 - support_level: SupportLevel::Incompatible(Severity::Low), - additional_yaml_paths: &[], - env_var_override: None, - used_by: &[], - value_type_override: None, - test_json: None, - }; - - /// `serializer_experimental_use_v3_api.sketches.endpoints` - V3 API sketches endpoints. - SERIALIZER_EXPERIMENTAL_USE_V3_API_SKETCHES_ENDPOINTS = SalukiAnnotation { - schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SKETCHES_ENDPOINTS, - // V3 metrics API not implemented. #1468 - support_level: SupportLevel::Incompatible(Severity::Low), - additional_yaml_paths: &[], - env_var_override: None, - used_by: &[], - value_type_override: None, - test_json: None, - }; - - /// `serializer_experimental_use_v3_api.sketches.validate` - V3 API sketches validation. - SERIALIZER_EXPERIMENTAL_USE_V3_API_SKETCHES_VALIDATE = SalukiAnnotation { - schema: &schema::SERIALIZER_EXPERIMENTAL_USE_V3_API_SKETCHES_VALIDATE, - // V3 metrics API not implemented. #1468 - support_level: SupportLevel::Incompatible(Severity::Low), - additional_yaml_paths: &[], - env_var_override: None, - used_by: &[], - value_type_override: None, - test_json: None, - }; - /// `serializer_max_series_points_per_payload` - max series points per payload. SERIALIZER_MAX_SERIES_POINTS_PER_PAYLOAD = SalukiAnnotation { schema: &schema::SERIALIZER_MAX_SERIES_POINTS_PER_PAYLOAD, diff --git a/lib/saluki-components/src/encoders/datadog/metrics/endpoint.rs b/lib/saluki-components/src/encoders/datadog/metrics/endpoint.rs new file mode 100644 index 00000000000..a567848ec03 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/endpoint.rs @@ -0,0 +1,64 @@ +use saluki_context::tags::SharedTagSet; +use saluki_core::data_model::event::metric::{Metric, MetricValues}; +use saluki_io::compression::CompressionScheme; + +/// Metrics intake endpoint. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum MetricsEndpoint { + /// V1 series metrics, encoded as JSON and sent to `/api/v1/series`. + /// + /// Includes counters, gauges, rates, and sets. Selected when `use_v2_api_series` is `false`. + SeriesV1, + + /// V2 series metrics, encoded as Protocol Buffers and sent to `/api/v2/series`. + /// + /// Includes counters, gauges, rates, and sets. The default series encoding. + SeriesV2, + + /// Sketch metrics, encoded as Protocol Buffers and sent to `/api/beta/sketches`. + /// + /// Includes histograms and distributions. Always uses the V2 endpoint regardless of `use_v2_api_series`. + Sketches, +} + +impl MetricsEndpoint { + /// Creates a new `MetricsEndpoint` from the given metric. + pub fn from_metric(metric: &Metric) -> Self { + match metric.values() { + MetricValues::Counter(..) | MetricValues::Rate(..) | MetricValues::Gauge(..) | MetricValues::Set(..) => { + Self::SeriesV2 + } + MetricValues::Histogram(..) | MetricValues::Distribution(..) => Self::Sketches, + } + } +} + +pub struct EndpointConfiguration { + compression_scheme: CompressionScheme, + max_metrics_per_payload: usize, + additional_tags: SharedTagSet, +} + +impl EndpointConfiguration { + pub fn new( + compression_scheme: CompressionScheme, max_metrics_per_payload: usize, additional_tags: Option, + ) -> Self { + Self { + compression_scheme, + max_metrics_per_payload, + additional_tags: additional_tags.unwrap_or_default(), + } + } + + pub fn compression_scheme(&self) -> CompressionScheme { + self.compression_scheme + } + + pub fn max_metrics_per_payload(&self) -> usize { + self.max_metrics_per_payload + } + + pub fn additional_tags(&self) -> &SharedTagSet { + &self.additional_tags + } +} diff --git a/lib/saluki-components/src/encoders/datadog/metrics/mod.rs b/lib/saluki-components/src/encoders/datadog/metrics/mod.rs index 17cd3021666..da9454da6e2 100644 --- a/lib/saluki-components/src/encoders/datadog/metrics/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/metrics/mod.rs @@ -1,13 +1,15 @@ -use std::{fmt, num::NonZeroU64, time::Duration}; +use std::{collections::VecDeque, ops::Range, time::Duration}; use async_trait::async_trait; -use datadog_protos::metrics as proto; use ddsketch::DDSketch; use facet::Facet; -use http::{uri::PathAndQuery, HeaderValue, Method, Uri}; -use protobuf::{rt::WireType, CodedOutputStream, Enum as _}; +use http::{HeaderValue, Method, Request}; +use protobuf::{rt::WireType, CodedOutputStream}; use resource_accounting::{MemoryBounds, MemoryBoundsBuilder}; -use saluki_common::{iter::ReusableDeduplicator, task::HandleExt as _}; +use saluki_common::{ + buf::{ChunkedBytesBuffer, FrozenChunkedBytesBuffer}, + task::HandleExt as _, +}; use saluki_config::GenericConfiguration; use saluki_context::tags::{SharedTagSet, Tag}; use saluki_core::{ @@ -23,78 +25,40 @@ use saluki_core::{ topology::{EventsBuffer, PayloadsBuffer}, }; use saluki_error::{generic_error, ErrorContext as _, GenericError}; -use saluki_io::compression::CompressionScheme; +use saluki_io::compression::{CompressionScheme, Compressor}; use saluki_metrics::MetricsBuilder; use serde::Deserialize; -use serde_json::{Map as JsonMap, Number as JsonNumber, Value as JsonValue}; -use tokio::{select, sync::mpsc, time::sleep}; +use tokio::{io::AsyncWriteExt as _, select, sync::mpsc, time::sleep}; use tracing::{debug, error, warn}; - -use crate::common::datadog::{ - clamp_payload_limits, - io::RB_BUFFER_CHUNK_SIZE, - request_builder::{EndpointEncoder, RequestBuilder}, - telemetry::ComponentTelemetry, - DEFAULT_SERIALIZER_COMPRESSED_SIZE_LIMIT, DEFAULT_SERIALIZER_UNCOMPRESSED_SIZE_LIMIT, METRICS_SERIES_V1_PATH, - METRICS_SERIES_V2_PATH, METRICS_SKETCHES_PATH, +use uuid::Uuid; + +use self::v3::{V3EncodedRequest, V3PayloadLimits, V3PayloadRequest}; +use crate::{ + common::datadog::{ + clamp_payload_limits, + io::RB_BUFFER_CHUNK_SIZE, + protocol::{MetricsPayloadInfo, V3ApiConfig}, + request_builder::RequestBuilder, + telemetry::ComponentTelemetry, + DEFAULT_SERIALIZER_COMPRESSED_SIZE_LIMIT, DEFAULT_SERIALIZER_UNCOMPRESSED_SIZE_LIMIT, METRICS_SERIES_V3_PATH, + METRICS_SKETCHES_V3_PATH, + }, + encoders::datadog::metrics::v2::MetricsEndpointEncoder, }; -const SERIES_V2_COMPRESSED_SIZE_LIMIT: usize = 512_000; // 500 KiB -const SERIES_V2_UNCOMPRESSED_SIZE_LIMIT: usize = 5_242_880; // 5 MiB +mod endpoint; +use self::endpoint::{EndpointConfiguration, MetricsEndpoint}; -// V1 series JSON endpoint limits match the Datadog Agent's generic serializer defaults. -const SERIES_V1_COMPRESSED_SIZE_LIMIT: usize = DEFAULT_SERIALIZER_COMPRESSED_SIZE_LIMIT; -const SERIES_V1_UNCOMPRESSED_SIZE_LIMIT: usize = DEFAULT_SERIALIZER_UNCOMPRESSED_SIZE_LIMIT; +mod v1; +mod v2; +mod v3; const DEFAULT_SERIALIZER_COMPRESSOR_KIND: &str = "zstd"; +const V3_SERIES_ENDPOINT_URI: &str = METRICS_SERIES_V3_PATH; +const V3_SKETCHES_ENDPOINT_URI: &str = METRICS_SKETCHES_V3_PATH; -// Protocol Buffers field numbers for series and sketch payload messages. -// -// These field numbers come from the Protocol Buffers definitions in `lib/datadog-protos/proto/agent_payload.proto`. -const RESOURCES_TYPE_FIELD_NUMBER: u32 = 1; -const RESOURCES_NAME_FIELD_NUMBER: u32 = 2; - -const METADATA_ORIGIN_FIELD_NUMBER: u32 = 1; - -const ORIGIN_ORIGIN_PRODUCT_FIELD_NUMBER: u32 = 4; -const ORIGIN_ORIGIN_CATEGORY_FIELD_NUMBER: u32 = 5; -const ORIGIN_ORIGIN_SERVICE_FIELD_NUMBER: u32 = 6; - -const METRIC_POINT_VALUE_FIELD_NUMBER: u32 = 1; -const METRIC_POINT_TIMESTAMP_FIELD_NUMBER: u32 = 2; - -const DOGSKETCH_TS_FIELD_NUMBER: u32 = 1; -const DOGSKETCH_CNT_FIELD_NUMBER: u32 = 2; -const DOGSKETCH_MIN_FIELD_NUMBER: u32 = 3; -const DOGSKETCH_MAX_FIELD_NUMBER: u32 = 4; -const DOGSKETCH_AVG_FIELD_NUMBER: u32 = 5; -const DOGSKETCH_SUM_FIELD_NUMBER: u32 = 6; -const DOGSKETCH_K_FIELD_NUMBER: u32 = 7; -const DOGSKETCH_N_FIELD_NUMBER: u32 = 8; - -const SERIES_RESOURCES_FIELD_NUMBER: u32 = 1; -const SERIES_METRIC_FIELD_NUMBER: u32 = 2; -const SERIES_TAGS_FIELD_NUMBER: u32 = 3; -const SERIES_POINTS_FIELD_NUMBER: u32 = 4; -const SERIES_TYPE_FIELD_NUMBER: u32 = 5; -const SERIES_UNIT_FIELD_NUMBER: u32 = 6; -const SERIES_SOURCE_TYPE_NAME_FIELD_NUMBER: u32 = 7; -const SERIES_INTERVAL_FIELD_NUMBER: u32 = 8; -const SERIES_METADATA_FIELD_NUMBER: u32 = 9; - -const SKETCH_METRIC_FIELD_NUMBER: u32 = 1; -const SKETCH_HOST_FIELD_NUMBER: u32 = 2; -const SKETCH_TAGS_FIELD_NUMBER: u32 = 4; -const SKETCH_DOGSKETCHES_FIELD_NUMBER: u32 = 7; -const SKETCH_METADATA_FIELD_NUMBER: u32 = 8; - -static CONTENT_TYPE_PROTOBUF: HeaderValue = HeaderValue::from_static("application/x-protobuf"); -static CONTENT_TYPE_JSON: HeaderValue = HeaderValue::from_static("application/json"); - -// JSON framing for the V1 series payload, which wraps the array of `Serie` objects in a top-level object. -const SERIES_V1_PAYLOAD_PREFIX: &[u8] = b"{\"series\":["; -const SERIES_V1_PAYLOAD_SUFFIX: &[u8] = b"]}"; -const SERIES_V1_INPUT_SEPARATOR: &[u8] = b","; +// V3 keeps the Datadog Agent's point-count limit as an internal bound, not user-facing ADP configuration. +const SERIES_V3_POINTS_PER_PAYLOAD_LIMIT: usize = 10_000; const fn default_max_metrics_per_payload() -> usize { 10_000 @@ -109,11 +73,11 @@ const fn default_max_uncompressed_payload_size() -> usize { } const fn default_max_series_payload_size() -> usize { - SERIES_V2_COMPRESSED_SIZE_LIMIT + v2::SERIES_V2_COMPRESSED_SIZE_LIMIT } const fn default_max_series_uncompressed_payload_size() -> usize { - SERIES_V2_UNCOMPRESSED_SIZE_LIMIT + v2::SERIES_V2_UNCOMPRESSED_SIZE_LIMIT } const fn default_flush_timeout_secs() -> u64 { @@ -132,12 +96,45 @@ const fn default_use_v2_api_series() -> bool { true } +/// Encoding mode for a metrics endpoint. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum MetricsEncoderMode { + /// Send V2 payloads only. + V2Only, + /// V3 is enabled for at least one endpoint; generate tagged V2 and V3 payloads so each endpoint + /// receives the protocol version configured for it. + V3Enabled, + /// Send both V2 and V3 payloads simultaneously with a shared batch ID for backend validation. + Validation, +} + +impl MetricsEncoderMode { + fn from_config(use_v3: bool, validate: bool) -> Self { + match (use_v3, validate) { + (false, _) => Self::V2Only, + (true, false) => Self::V3Enabled, + (true, true) => Self::Validation, + } + } + + fn needs_v3(self) -> bool { + matches!(self, Self::V3Enabled | Self::Validation) + } + + fn needs_batch_id(self) -> bool { + matches!(self, Self::Validation) + } + + fn needs_tagging(self) -> bool { + matches!(self, Self::V3Enabled | Self::Validation) + } +} + /// Datadog Metrics encoder. /// /// Generates Datadog metrics payloads for the Datadog platform. #[derive(Clone, Deserialize, Facet)] #[cfg_attr(test, derive(Debug, PartialEq, serde::Serialize))] -#[allow(dead_code)] pub struct DatadogMetricsConfiguration { /// Maximum number of input metrics to encode into a single request payload. /// @@ -209,7 +206,7 @@ pub struct DatadogMetricsConfiguration { /// Flush timeout for pending requests, in seconds. /// - /// When the destination has written metrics to the in-flight request payload, but it hasn't yet reached the + /// When the destination has written metrics to the in-flight request payload, but it has not yet reached the /// payload size limits that would force the payload to be flushed, the destination will wait for a period of time /// before flushing the in-flight request payload. This allows for the possibility of other events to be processed /// and written into the request payload, thereby maximizing the payload size and reducing the number of requests @@ -251,6 +248,12 @@ pub struct DatadogMetricsConfiguration { #[serde(default, skip)] #[facet(opaque)] additional_tags: Option, + + /// V3 API configuration for per-endpoint V3 support. + /// + /// Configures which endpoints receive V3 payloads and whether validation mode is enabled. + #[serde(rename = "serializer_experimental_use_v3_api", default)] + v3_api: V3ApiConfig, } impl DatadogMetricsConfiguration { @@ -261,7 +264,6 @@ impl DatadogMetricsConfiguration { /// Sets additional tags to be applied uniformly to all metrics forwarded by this destination. pub fn with_additional_tags(mut self, additional_tags: SharedTagSet) -> Self { - // Add the additional tags to the forwarder configuration. self.additional_tags = Some(additional_tags); self } @@ -280,25 +282,47 @@ impl EncoderBuilder for DatadogMetricsConfiguration { async fn build(&self, context: ComponentContext) -> Result, GenericError> { let metrics_builder = MetricsBuilder::from_component_context(&context); let telemetry = ComponentTelemetry::from_builder(&metrics_builder); - let compression_scheme = CompressionScheme::new(&self.compressor_kind, self.zstd_compressor_level); - // Create our request builders. - let series_endpoint = if self.use_v2_api_series { - MetricsEndpoint::SeriesV2 + let v2_compression_scheme = CompressionScheme::new(&self.compressor_kind, self.zstd_compressor_level); + let v3_compression_scheme = if self.v3_api.compression_level > 0 { + CompressionScheme::new(&self.compressor_kind, self.v3_api.compression_level) } else { - MetricsEndpoint::SeriesV1 + v2_compression_scheme + }; + let v3_series_endpoint_uri = if self.v3_api.series.use_beta { + self.v3_api.series.beta_route.clone() + } else { + V3_SERIES_ENDPOINT_URI.to_string() }; - let mut series_encoder = MetricsEndpointEncoder::from_endpoint(series_endpoint); - let mut sketches_encoder = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::Sketches); + let v3_payload_limits = V3PayloadLimits::new( + self.max_series_payload_size, + self.max_series_uncompressed_payload_size, + self.max_metrics_per_payload, + SERIES_V3_POINTS_PER_PAYLOAD_LIMIT, + ); - if let Some(additional_tags) = self.additional_tags.as_ref() { - series_encoder = series_encoder.with_additional_tags(additional_tags.clone()); - sketches_encoder = sketches_encoder.with_additional_tags(additional_tags.clone()); - } + let v2_endpoint_config = EndpointConfiguration::new( + v2_compression_scheme, + self.max_metrics_per_payload, + self.additional_tags.clone(), + ); + let v3_endpoint_config = EndpointConfiguration::new( + v3_compression_scheme, + self.max_metrics_per_payload, + self.additional_tags.clone(), + ); - let mut series_rb = RequestBuilder::new(series_encoder, compression_scheme, RB_BUFFER_CHUNK_SIZE).await?; - series_rb.with_max_inputs_per_payload(self.max_metrics_per_payload); + // Derive the encoding mode for each metric type from the configuration. + let series_mode = + MetricsEncoderMode::from_config(self.v3_api.use_v3_series(), self.v3_api.use_v3_series_validate()); + let sketches_mode = + MetricsEncoderMode::from_config(self.v3_api.use_v3_sketches(), self.v3_api.use_v3_sketches_validate()); + let series_endpoint = if self.use_v2_api_series { + MetricsEndpoint::SeriesV2 + } else { + MetricsEndpoint::SeriesV1 + }; let generic_payload_limits = clamp_payload_limits( self.max_uncompressed_payload_size, self.max_payload_size, @@ -309,18 +333,24 @@ impl EncoderBuilder for DatadogMetricsConfiguration { clamp_payload_limits( self.max_series_uncompressed_payload_size, self.max_series_payload_size, - SERIES_V2_UNCOMPRESSED_SIZE_LIMIT, - SERIES_V2_COMPRESSED_SIZE_LIMIT, + v2::SERIES_V2_UNCOMPRESSED_SIZE_LIMIT, + v2::SERIES_V2_COMPRESSED_SIZE_LIMIT, ) } else { generic_payload_limits }; - series_rb.with_len_limits(series_uncompressed_limit, series_compressed_limit)?; + let mut v2_series_builder = v2::create_v2_request_builder(series_endpoint, &v2_endpoint_config) + .await + .error_context("Failed to create V2 series request builder.")?; + v2_series_builder.with_len_limits(series_uncompressed_limit, series_compressed_limit)?; + let v2_series_builder = Some(v2_series_builder); - let mut sketches_rb = RequestBuilder::new(sketches_encoder, compression_scheme, RB_BUFFER_CHUNK_SIZE).await?; - sketches_rb.with_max_inputs_per_payload(self.max_metrics_per_payload); let (sketches_uncompressed_limit, sketches_compressed_limit) = generic_payload_limits; - sketches_rb.with_len_limits(sketches_uncompressed_limit, sketches_compressed_limit)?; + let mut v2_sketch_builder = v2::create_v2_request_builder(MetricsEndpoint::Sketches, &v2_endpoint_config) + .await + .error_context("Failed to create V2 sketches request builder.")?; + v2_sketch_builder.with_len_limits(sketches_uncompressed_limit, sketches_compressed_limit)?; + let v2_sketch_builder = Some(v2_sketch_builder); let flush_timeout = match self.flush_timeout_secs { // We always give ourselves a minimum flush timeout of 10ms to allow for some very minimal amount of @@ -329,9 +359,24 @@ impl EncoderBuilder for DatadogMetricsConfiguration { secs => Duration::from_secs(secs), }; + if series_mode.needs_v3() || sketches_mode.needs_v3() { + debug!( + ?series_mode, + ?sketches_mode, + v3_series_endpoints = ?self.v3_api.series.endpoints, + v3_sketches_endpoints = ?self.v3_api.sketches.endpoints, + "V3 encoding support is enabled." + ); + } + Ok(Box::new(DatadogMetrics { - series_rb, - sketches_rb, + v2_series_builder, + v2_sketch_builder, + series_mode, + sketches_mode, + v3_endpoint_config, + v3_payload_limits, + v3_series_endpoint_uri, telemetry, flush_timeout, })) @@ -363,8 +408,13 @@ impl MemoryBounds for DatadogMetricsConfiguration { } pub struct DatadogMetrics { - series_rb: RequestBuilder, - sketches_rb: RequestBuilder, + v2_series_builder: Option>, + v2_sketch_builder: Option>, + series_mode: MetricsEncoderMode, + sketches_mode: MetricsEncoderMode, + v3_endpoint_config: EndpointConfiguration, + v3_payload_limits: V3PayloadLimits, + v3_series_endpoint_uri: String, telemetry: ComponentTelemetry, flush_timeout: Duration, } @@ -373,8 +423,13 @@ pub struct DatadogMetrics { impl Encoder for DatadogMetrics { async fn run(mut self: Box, mut context: EncoderContext) -> Result<(), GenericError> { let Self { - series_rb, - sketches_rb, + v2_series_builder, + v2_sketch_builder, + series_mode, + sketches_mode, + v3_endpoint_config, + v3_payload_limits, + v3_series_endpoint_uri, telemetry, flush_timeout, } = *self; @@ -384,8 +439,19 @@ impl Encoder for DatadogMetrics { // Spawn our request builder task. let (events_tx, events_rx) = mpsc::channel(8); let (payloads_tx, mut payloads_rx) = mpsc::channel(8); - let request_builder_fut = - run_request_builder(series_rb, sketches_rb, telemetry, events_rx, payloads_tx, flush_timeout); + let request_builder_fut = run_request_builder( + v2_series_builder, + v2_sketch_builder, + series_mode, + sketches_mode, + v3_endpoint_config, + v3_payload_limits, + v3_series_endpoint_uri, + telemetry, + events_rx, + payloads_tx, + flush_timeout, + ); let request_builder_handle = context .topology_context() .global_thread_pool() @@ -438,15 +504,33 @@ impl Encoder for DatadogMetrics { } } +#[allow(clippy::too_many_arguments)] async fn run_request_builder( - mut series_request_builder: RequestBuilder, - mut sketches_request_builder: RequestBuilder, telemetry: ComponentTelemetry, - mut events_rx: mpsc::Receiver, payloads_tx: mpsc::Sender, flush_timeout: Duration, + mut v2_series_builder: Option>, + mut v2_sketch_builder: Option>, series_mode: MetricsEncoderMode, + sketches_mode: MetricsEncoderMode, v3_endpoint_config: EndpointConfiguration, v3_payload_limits: V3PayloadLimits, + v3_series_endpoint_uri: String, telemetry: ComponentTelemetry, mut events_rx: mpsc::Receiver, + mut payloads_tx: mpsc::Sender, flush_timeout: Duration, ) -> Result<(), GenericError> { let mut pending_flush = false; let pending_flush_timeout = sleep(flush_timeout); tokio::pin!(pending_flush_timeout); + let mut v3_series_metrics = series_mode.needs_v3().then(Vec::::new); + let mut v3_sketch_metrics = sketches_mode.needs_v3().then(Vec::::new); + + let mut series_batch_id = None; + let mut sketches_batch_id = None; + + let tag_series = series_mode.needs_tagging(); + let tag_sketches = sketches_mode.needs_tagging(); + let v3_flush_context = V3FlushContext { + endpoint_config: &v3_endpoint_config, + payload_limits: v3_payload_limits, + series_endpoint_uri: &v3_series_endpoint_uri, + telemetry: &telemetry, + }; + loop { select! { Some(event_buffer) = events_rx.recv() => { @@ -456,63 +540,103 @@ async fn run_request_builder( None => continue, }; - // Series metrics (counters, gauges, rates, sets) and sketch metrics (histograms, distributions) - // route to their respective request builders. Whether the series builder targets the V1 or V2 - // intake is decided once at builder time based on `use_v2_api_series`. - let request_builder = match metric.values() { - MetricValues::Counter(..) - | MetricValues::Rate(..) - | MetricValues::Gauge(..) - | MetricValues::Set(..) => &mut series_request_builder, - MetricValues::Histogram(..) | MetricValues::Distribution(..) => &mut sketches_request_builder, + // Figure out which endpoint the metric belongs to, and grab the relevant V2 builder/V3 storage. + let endpoint = MetricsEndpoint::from_metric(&metric); + let (endpoint_mode, maybe_v2_builder, maybe_v3_metrics, batch_id) = match endpoint { + MetricsEndpoint::SeriesV1 | MetricsEndpoint::SeriesV2 => ( + series_mode, + &mut v2_series_builder, + &mut v3_series_metrics, + &mut series_batch_id, + ), + MetricsEndpoint::Sketches => ( + sketches_mode, + &mut v2_sketch_builder, + &mut v3_sketch_metrics, + &mut sketches_batch_id, + ), }; + if endpoint_mode.needs_batch_id() && batch_id.is_none() { + *batch_id = Some(Uuid::now_v7()); + } + let active_batch_id = endpoint_mode.needs_batch_id().then_some(batch_id.as_ref()).flatten(); + + // Store a copy of the metric in `maybe_v3_metrics` if it's present. + // + // We have to do this before encoding because `RequestBuilder::encode` consumes the metric. This also means we'll + // need to _remove_ the metric if encoding fails. + if let Some(metrics) = maybe_v3_metrics { + metrics.push(metric.clone()); + } - // Encode the metric. If we get it back, that means the current request is full, and we need to - // flush it before we can try to encode the metric again... so we'll hold on to it in that case - // before flushing and trying to encode it again. - let metric_to_retry = match request_builder.encode(metric).await { - Ok(None) => continue, - Ok(Some(metric)) => metric, - Err(e) => { - error!(error = %e, "Failed to encode metric."); - telemetry.events_dropped_encoder().increment(1); - continue; - } + // Attempt encoding the metric for V2 if configured. + // + // If the metric couldn't be encoded (too big, some other issue), the call returns `false` which is + // our signal to remove the metric from `maybe_v3_metrics` (if we added it), since we know now that + // the metric wasn't encoded for V2 and we want our V2/V3 payload batches to be consistent in + // validation mode. + let v2_payload_info = match endpoint { + MetricsEndpoint::SeriesV1 | MetricsEndpoint::SeriesV2 => tag_series.then(MetricsPayloadInfo::v2_series), + MetricsEndpoint::Sketches => tag_sketches.then(MetricsPayloadInfo::v2_sketches), }; + let v2_flushed = if let Some(builder) = maybe_v2_builder { + let result = encode_v2_metrics(builder, metric, &telemetry, &mut payloads_tx, active_batch_id, v2_payload_info).await?; + if !result.encoded() { + if let Some(metrics) = maybe_v3_metrics { + let _ = metrics.pop(); + } + } + result.flushed() + } else { + false + }; - let maybe_requests = request_builder.flush().await; - if maybe_requests.is_empty() { - panic!("builder told us to flush, but gave us nothing"); - } - - for maybe_request in maybe_requests { - match maybe_request { - Ok((events, data_points, request)) => { - let payload_meta = PayloadMetadata::from_event_and_data_point_count(events, data_points); - let http_payload = HttpPayload::new(payload_meta, request); - let payload = Payload::Http(http_payload); - - payloads_tx.send(payload).await - .map_err(|_| generic_error!("Failed to send payload to encoder."))?; - }, - - // TODO: Increment a counter here that metrics were dropped due to a flush failure. - Err(e) => if e.is_recoverable() { - // If the error is recoverable, we'll hold on to the metric to retry it later. - continue; - } else { - return Err(GenericError::from(e).context("Failed to flush request.")); + // If we flushed via V2, or we've hit our max metrics per payload limit in pure V3 mode, we need to flush our V3 metrics + // as well. + let v3_payload_info = match endpoint { + MetricsEndpoint::SeriesV1 | MetricsEndpoint::SeriesV2 => tag_series.then(MetricsPayloadInfo::v3_series), + MetricsEndpoint::Sketches => tag_sketches.then(MetricsPayloadInfo::v3_sketches), + }; + let mut carried_metric_into_next_batch = false; + let v3_flushed = if let Some(v3_metrics) = maybe_v3_metrics { + let should_flush_v3 = match endpoint_mode { + MetricsEncoderMode::V2Only => false, + MetricsEncoderMode::V3Enabled => { + v2_flushed || v3_flush_context.payload_limits.should_flush_metric_count_limit(v3_metrics) + } + MetricsEncoderMode::Validation => v2_flushed, + }; + if should_flush_v3 { + // V2 flushes the previous batch without the current metric (the metric + // that triggered the flush is re-encoded into the next V2 batch). Pop it + // from V3 before flushing so both batches cover the same set of metrics. + let split_metric = if v2_flushed { v3_metrics.pop() } else { None }; + encode_and_flush_v3_metrics( + endpoint, + v3_flush_context, + v3_metrics, + &mut payloads_tx, + active_batch_id, + v3_payload_info, + ) + .await?; + if let Some(m) = split_metric { + carried_metric_into_next_batch = true; + v3_metrics.push(m); } + true + } else { + false } - } + } else { + false + }; - // Now try to encode the metric again. If it fails again, we'll just log it because it shouldn't - // be possible to fail at this point, otherwise we would have already caught that the first - // time. - if let Err(e) = request_builder.encode(metric_to_retry).await { - error!(error = %e, "Failed to encode metric."); - telemetry.events_dropped_encoder().increment(1); + // If a V2-triggered split leaves the current metric pending in the next batch, assign that pending + // V2/V3 pair a fresh validation ID. Otherwise, the next timeout flush would omit validation headers. + if endpoint_mode.needs_batch_id() && (v2_flushed || v3_flushed) { + *batch_id = carried_metric_into_next_batch.then(Uuid::now_v7); } } @@ -529,51 +653,73 @@ async fn run_request_builder( pending_flush = false; - // Once we've encoded and written all metrics, we flush the request builders to generate a request with - // anything left over. Again, we'll enqueue those requests to be sent immediately. - let maybe_series_requests = series_request_builder.flush().await; - for maybe_request in maybe_series_requests { - match maybe_request { - Ok((events, data_points, request)) => { - let payload_meta = PayloadMetadata::from_event_and_data_point_count(events, data_points); - let http_payload = HttpPayload::new(payload_meta, request); - let payload = Payload::Http(http_payload); - - payloads_tx.send(payload).await - .map_err(|_| generic_error!("Failed to send payload to encoder."))?; - }, - - // TODO: Increment a counter here that metrics were dropped due to a flush failure. - Err(e) => if e.is_recoverable() { - // If the error is recoverable, we'll hold on to the metric to retry it later. - continue; - } else { - return Err(GenericError::from(e).context("Failed to flush request.")); + // Flush any pending series metrics. + let v2_series_payload_info = tag_series.then(MetricsPayloadInfo::v2_series); + let series_active_batch_id = series_mode.needs_batch_id().then_some(series_batch_id.as_ref()).flatten(); + let mut v2_series_flush_succeeded = true; + if let Some(builder) = &mut v2_series_builder { + if let Err(e) = flush_v2_metrics(builder, &mut payloads_tx, series_active_batch_id, v2_series_payload_info).await { + error!(error = %e, "Failed to flush V2 series metrics: {}", e); + v2_series_flush_succeeded = false; + } + } + + let v3_series_payload_info = tag_series.then(MetricsPayloadInfo::v3_series); + if let Some(metrics) = &mut v3_series_metrics { + if v2_series_flush_succeeded { + if let Err(e) = encode_and_flush_v3_series_metrics( + v3_flush_context, + metrics, + &mut payloads_tx, + series_active_batch_id, + v3_series_payload_info, + ) + .await + { + error!(error = %e, "Failed to flush V3 series metrics: {}", e); } + } else { + warn!("Failed to flush V2 series metrics, skipping V3 series flush."); + metrics.clear(); } } + if series_mode.needs_batch_id() { + series_batch_id = None; + } - let maybe_sketches_requests = sketches_request_builder.flush().await; - for maybe_request in maybe_sketches_requests { - match maybe_request { - Ok((events, data_points, request)) => { - let payload_meta = PayloadMetadata::from_event_and_data_point_count(events, data_points); - let http_payload = HttpPayload::new(payload_meta, request); - let payload = Payload::Http(http_payload); - - payloads_tx.send(payload).await - .map_err(|_| generic_error!("Failed to send payload to encoder."))?; - }, - - // TODO: Increment a counter here that metrics were dropped due to a flush failure. - Err(e) => if e.is_recoverable() { - // If the error is recoverable, we'll hold on to the metric to retry it later. - continue; - } else { - return Err(GenericError::from(e).context("Failed to flush request.")); + // Flush any pending sketch metrics. + let v2_sketches_payload_info = tag_sketches.then(MetricsPayloadInfo::v2_sketches); + let sketches_active_batch_id = sketches_mode.needs_batch_id().then_some(sketches_batch_id.as_ref()).flatten(); + let mut v2_sketches_flush_succeeded = true; + if let Some(builder) = &mut v2_sketch_builder { + if let Err(e) = flush_v2_metrics(builder, &mut payloads_tx, sketches_active_batch_id, v2_sketches_payload_info).await { + error!(error = %e, "Failed to flush V2 sketch metrics: {}", e); + v2_sketches_flush_succeeded = false; + } + } + + let v3_sketches_payload_info = tag_sketches.then(MetricsPayloadInfo::v3_sketches); + if let Some(metrics) = &mut v3_sketch_metrics { + if v2_sketches_flush_succeeded { + if let Err(e) = encode_and_flush_v3_sketch_metrics( + v3_flush_context, + metrics, + &mut payloads_tx, + sketches_active_batch_id, + v3_sketches_payload_info, + ) + .await + { + error!(error = %e, "Failed to flush V3 sketch metrics: {}", e); } + } else { + warn!("Failed to flush V2 sketch metrics, skipping V3 sketch flush."); + metrics.clear(); } } + if sketches_mode.needs_batch_id() { + sketches_batch_id = None; + } debug!("All flushed requests sent to I/O task. Waiting for next event buffer..."); }, @@ -586,999 +732,1118 @@ async fn run_request_builder( Ok(()) } -/// Metrics intake endpoint. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum MetricsEndpoint { - /// V1 series metrics, encoded as JSON and sent to `/api/v1/series`. - /// - /// Includes counters, gauges, rates, and sets. Selected when `use_v2_api.series` is `false`. - SeriesV1, - - /// V2 series metrics, encoded as Protocol Buffers and sent to `/api/v2/series`. - /// - /// Includes counters, gauges, rates, and sets. The default series encoding. - SeriesV2, - - /// Sketch metrics, encoded as Protocol Buffers and sent to `/api/beta/sketches`. - /// - /// Includes histograms and distributions. Always uses the V2 endpoint regardless of `use_v2_api.series`. - Sketches, +struct EncodeResult { + encoded: bool, + flushed: bool, } -/// Error returned when a metric fails to encode for either the V1 JSON or V2 protobuf intake. -#[derive(Debug)] -pub enum MetricsEncodeError { - /// Protobuf encoding failed. - Protobuf(protobuf::Error), +impl EncodeResult { + pub const fn new(encoded: bool, flushed: bool) -> Self { + Self { encoded, flushed } + } - /// JSON encoding failed. - Json(serde_json::Error), -} + pub const fn encoded(&self) -> bool { + self.encoded + } -impl fmt::Display for MetricsEncodeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Protobuf(e) => write!(f, "protobuf encode error: {}", e), - Self::Json(e) => write!(f, "json encode error: {}", e), - } + pub const fn flushed(&self) -> bool { + self.flushed } } -impl std::error::Error for MetricsEncodeError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - Self::Protobuf(e) => Some(e), - Self::Json(e) => Some(e), +async fn encode_v2_metrics( + request_builder: &mut RequestBuilder, metric: Metric, telemetry: &ComponentTelemetry, + payloads_tx: &mut mpsc::Sender, batch_id: Option<&Uuid>, payload_info: Option, +) -> Result { + // Encode the metric. If we get it back, that means the current request is full, and we need to + // flush it before we can try to encode the metric again... so we'll hold on to it in that case + // before flushing and trying to encode it again. + let metric_to_retry = match request_builder.encode(metric).await { + Ok(None) => return Ok(EncodeResult::new(true, false)), + Ok(Some(metric)) => metric, + Err(e) => { + error!(error = %e, "Failed to encode metric."); + telemetry.events_dropped_encoder().increment(1); + return Ok(EncodeResult::new(false, false)); } - } -} + }; -impl From for MetricsEncodeError { - fn from(value: protobuf::Error) -> Self { - Self::Protobuf(value) + flush_v2_metrics(request_builder, payloads_tx, batch_id, payload_info).await?; + + // Now try to encode the metric again. If it fails again, we'll just log it because it shouldn't + // be possible to fail at this point, otherwise we would have already caught that the first + // time. + match request_builder.encode(metric_to_retry).await { + Ok(None) => Ok(EncodeResult::new(true, true)), + Ok(Some(_)) => unreachable!( + "failure to encode due to size should never occur after flush for metrics which aren't unencodable" + ), + Err(e) => { + error!(error = %e, "Failed to encode metric."); + telemetry.events_dropped_encoder().increment(1); + Ok(EncodeResult::new(false, true)) + } } } -impl From for MetricsEncodeError { - fn from(value: serde_json::Error) -> Self { - Self::Json(value) +async fn flush_v2_metrics( + request_builder: &mut RequestBuilder, payloads_tx: &mut mpsc::Sender, + batch_id: Option<&Uuid>, payload_info: Option, +) -> Result { + let mut requests_flushed = 0; + + let maybe_requests = request_builder.flush().await; + let batch_len = maybe_requests.len(); + for (batch_seq, maybe_request) in maybe_requests.into_iter().enumerate() { + match maybe_request { + Ok((events, data_points, request)) => { + requests_flushed += 1; + + flush_payload( + request, + events, + data_points, + payloads_tx, + batch_id, + batch_seq, + batch_len, + payload_info, + ) + .await?; + } + + // TODO: Increment a counter here that metrics were dropped due to a flush failure. + Err(e) => { + if !e.is_recoverable() { + return Err(GenericError::from(e).context("Failed to flush request.")); + } + } + } } + + Ok(requests_flushed) } -#[derive(Debug)] -struct MetricsEndpointEncoder { - endpoint: MetricsEndpoint, - primary_scratch_buf: Vec, - secondary_scratch_buf: Vec, - packed_scratch_buf: Vec, - additional_tags: SharedTagSet, - tags_deduplicator: ReusableDeduplicator, +#[derive(Clone, Copy)] +struct V3FlushContext<'a> { + endpoint_config: &'a EndpointConfiguration, + payload_limits: V3PayloadLimits, + series_endpoint_uri: &'a str, + telemetry: &'a ComponentTelemetry, } -impl MetricsEndpointEncoder { - /// Creates a new `MetricsEndpointEncoder` for the given endpoint. - pub fn from_endpoint(endpoint: MetricsEndpoint) -> Self { - Self { - endpoint, - primary_scratch_buf: Vec::new(), - secondary_scratch_buf: Vec::new(), - packed_scratch_buf: Vec::new(), - additional_tags: SharedTagSet::default(), - tags_deduplicator: ReusableDeduplicator::new(), +async fn encode_and_flush_v3_metrics( + endpoint: MetricsEndpoint, context: V3FlushContext<'_>, metrics: &mut Vec, + payloads_tx: &mut mpsc::Sender, batch_id: Option<&Uuid>, payload_info: Option, +) -> Result<(), GenericError> { + match endpoint { + MetricsEndpoint::SeriesV1 | MetricsEndpoint::SeriesV2 => { + encode_and_flush_v3_series_metrics(context, metrics, payloads_tx, batch_id, payload_info).await + } + MetricsEndpoint::Sketches => { + encode_and_flush_v3_sketch_metrics(context, metrics, payloads_tx, batch_id, payload_info).await } } +} - /// Sets the additional tags to be included with every metric encoded by this encoder. - /// - /// These tags are added in a deduplicated fashion, the same as instrumented tags and origin tags. This is an - /// optimized codepath for tag inclusion in high-volume scenarios, where creating new additional contexts - /// through the traditional means (for example, `ContextResolver`) would be too expensive. - pub fn with_additional_tags(mut self, additional_tags: SharedTagSet) -> Self { - self.additional_tags = additional_tags; - self +async fn encode_and_flush_v3_series_metrics( + context: V3FlushContext<'_>, metrics: &mut Vec, payloads_tx: &mut mpsc::Sender, + batch_id: Option<&Uuid>, payload_info: Option, +) -> Result<(), GenericError> { + if metrics.is_empty() { + return Ok(()); + } + let metrics_to_flush = std::mem::take(metrics); + + let requests = encode_v3_payload_requests(context.series_endpoint_uri, &metrics_to_flush, context, "series").await; + let batch_len = requests.len(); + for (batch_seq, payload_request) in requests.into_iter().enumerate() { + flush_payload( + payload_request.request, + payload_request.event_count, + payload_request.data_point_count, + payloads_tx, + batch_id, + batch_seq, + batch_len, + payload_info, + ) + .await?; + debug!( + events = payload_request.event_count, + data_points = payload_request.data_point_count, + "Sent V3 series payload." + ); } -} -impl EndpointEncoder for MetricsEndpointEncoder { - type Input = Metric; - type EncodeError = MetricsEncodeError; + Ok(()) +} - fn encoder_name() -> &'static str { - "metrics" +async fn encode_and_flush_v3_sketch_metrics( + context: V3FlushContext<'_>, metrics: &mut Vec, payloads_tx: &mut mpsc::Sender, + batch_id: Option<&Uuid>, payload_info: Option, +) -> Result<(), GenericError> { + if metrics.is_empty() { + return Ok(()); } - - fn compressed_size_limit(&self) -> usize { - match self.endpoint { - MetricsEndpoint::SeriesV1 => SERIES_V1_COMPRESSED_SIZE_LIMIT, - MetricsEndpoint::SeriesV2 => SERIES_V2_COMPRESSED_SIZE_LIMIT, - MetricsEndpoint::Sketches => DEFAULT_SERIALIZER_COMPRESSED_SIZE_LIMIT, - } + let metrics_to_flush = std::mem::take(metrics); + + let requests = encode_v3_payload_requests(V3_SKETCHES_ENDPOINT_URI, &metrics_to_flush, context, "sketches").await; + let batch_len = requests.len(); + for (batch_seq, payload_request) in requests.into_iter().enumerate() { + flush_payload( + payload_request.request, + payload_request.event_count, + payload_request.data_point_count, + payloads_tx, + batch_id, + batch_seq, + batch_len, + payload_info, + ) + .await?; + debug!( + events = payload_request.event_count, + data_points = payload_request.data_point_count, + "Sent V3 sketches payload." + ); } - fn uncompressed_size_limit(&self) -> usize { - match self.endpoint { - MetricsEndpoint::SeriesV1 => SERIES_V1_UNCOMPRESSED_SIZE_LIMIT, - MetricsEndpoint::SeriesV2 => SERIES_V2_UNCOMPRESSED_SIZE_LIMIT, - MetricsEndpoint::Sketches => DEFAULT_SERIALIZER_UNCOMPRESSED_SIZE_LIMIT, + Ok(()) +} + +async fn encode_v3_payload_requests( + endpoint_uri: &str, metrics: &[Metric], context: V3FlushContext<'_>, payload_kind: &'static str, +) -> Vec { + let mut requests = Vec::new(); + let mut pending_ranges = split_v3_metric_ranges_by_point_limit(metrics, context, payload_kind); + + while let Some(range) = pending_ranges.pop_front() { + if range.is_empty() { + continue; } - } - fn input_data_point_count(&self, input: &Self::Input) -> usize { - input.values().len() - } + let metrics_in_range = &metrics[range.clone()]; + let event_count = metrics_in_range.len(); + let data_point_count = metrics_in_range.iter().map(|metric| metric.values().len()).sum(); - fn is_valid_input(&self, input: &Self::Input) -> bool { - let is_series_input = matches!( - input.values(), - MetricValues::Counter(..) | MetricValues::Rate(..) | MetricValues::Gauge(..) | MetricValues::Set(..) - ); + let encoded = match encode_v3_metrics_batch(metrics_in_range, context.endpoint_config.additional_tags()) { + Ok(encoded) => encoded, + Err(e) => { + error!(error = %e, payload_kind, events = event_count, "Failed to encode V3 metrics payload request."); + context.telemetry.events_dropped_encoder().increment(event_count as u64); + continue; + } + }; - match self.endpoint { - MetricsEndpoint::SeriesV1 | MetricsEndpoint::SeriesV2 => is_series_input, - MetricsEndpoint::Sketches => !is_series_input, + let encoded_request = + match create_v3_request(endpoint_uri, encoded, context.endpoint_config.compression_scheme()).await { + Ok(request) => request, + Err(e) => { + error!(error = %e, payload_kind, events = event_count, "Failed to create V3 metrics request."); + context.telemetry.events_dropped_encoder().increment(event_count as u64); + continue; + } + }; + + if context.payload_limits.request_fits(&encoded_request) { + requests.push(V3PayloadRequest { + request: encoded_request.request, + event_count, + data_point_count, + }); + continue; } - } - fn get_payload_prefix(&self) -> Option<&'static [u8]> { - match self.endpoint { - MetricsEndpoint::SeriesV1 => Some(SERIES_V1_PAYLOAD_PREFIX), - _ => None, + if range.len() == 1 { + // The encoded request is too large and this range cannot be split any further. + warn!( + payload_kind, + compressed_len = encoded_request.compressed_len, + compressed_limit = context.payload_limits.max_compressed_size, + uncompressed_len = encoded_request.uncompressed_len, + uncompressed_limit = context.payload_limits.max_uncompressed_size, + "Dropping oversized V3 metric that cannot be split further." + ); + context.telemetry.events_dropped_encoder().increment(1); + continue; } - } - fn get_payload_suffix(&self) -> Option<&'static [u8]> { - match self.endpoint { - MetricsEndpoint::SeriesV1 => Some(SERIES_V1_PAYLOAD_SUFFIX), - _ => None, - } + // Retry this oversized range as two smaller ranges, preserving the original metric order. + let pivot = range.start + range.len() / 2; + pending_ranges.push_front(pivot..range.end); + pending_ranges.push_front(range.start..pivot); } - fn get_input_separator(&self) -> Option<&'static [u8]> { - match self.endpoint { - MetricsEndpoint::SeriesV1 => Some(SERIES_V1_INPUT_SEPARATOR), - _ => None, - } - } + requests +} - fn encode(&mut self, input: &Self::Input, buffer: &mut Vec) -> Result<(), Self::EncodeError> { - match self.endpoint { - MetricsEndpoint::SeriesV1 => { - encode_series_v1_metric(input, &self.additional_tags, buffer, &mut self.tags_deduplicator)?; - Ok(()) +fn split_v3_metric_ranges_by_point_limit( + metrics: &[Metric], context: V3FlushContext<'_>, payload_kind: &'static str, +) -> VecDeque> { + let mut ranges = VecDeque::new(); + let mut current_start = None; + let mut current_points = 0usize; + + for (idx, metric) in metrics.iter().enumerate() { + let metric_points = metric.values().len(); + if metric_points == 0 { + // The Agent drops zero-point V3 metrics before writing them. + if let Some(start) = current_start.take() { + if start < idx { + ranges.push_back(start..idx); + } } - MetricsEndpoint::SeriesV2 | MetricsEndpoint::Sketches => { - // NOTE: We're passing _four_ buffers to `encode_single_metric`, which is a lot, but with good reason. - // - // The first buffer, `buffer`, is the overall output buffer: the caller expects us to put the full - // encoded metric payload into this buffer. - // - // The second and third buffers, `primary_scratch_buf` and `secondary_scratch_buf`, are used for - // roughly the same thing but deal with _nesting_. When writing a "message" in Protocol Buffers, the - // message data itself is prefixed with the field number and a length delimiter that specifies how - // long the message is. We can't write that length delimiter until we know the full size of the - // message, so we write the message to a scratch buffer, calculate its size, and then write the field - // number and length delimiter to the output buffer followed by the message data from the scratch - // buffer. - // - // We have _two_ scratch buffers because you need a dedicated buffer for each level of nested message. - // We have to be able to nest up to two levels deep in our metrics payload, so we need two scratch - // buffers to handle that. - // - // The fourth buffer, `packed_scratch_buf`, is used for writing out packed repeated fields. This is - // similar to the situation describe above, except it's not _exactly_ the same as an additional level - // of nesting.. so I just decided to give it a somewhat more descriptive name. - encode_single_metric( - input, - &self.additional_tags, - buffer, - &mut self.primary_scratch_buf, - &mut self.secondary_scratch_buf, - &mut self.packed_scratch_buf, - &mut self.tags_deduplicator, - )?; - Ok(()) + context.telemetry.events_dropped_encoder().increment(1); + current_points = 0; + continue; + } + + if !context.payload_limits.point_count_fits(metric_points) { + // This metric exceeds the point limit by itself, so it cannot fit in any V3 payload request. + // Close the current range before dropping this oversized metric. + if let Some(start) = current_start.take() { + if start < idx { + ranges.push_back(start..idx); + } } + warn!( + payload_kind, + data_points = metric_points, + point_limit = context.payload_limits.max_points_per_payload, + "Dropping oversized V3 metric that exceeds the point-count limit." + ); + context.telemetry.events_dropped_encoder().increment(1); + current_points = 0; + continue; } - } - fn endpoint_uri(&self) -> Uri { - match self.endpoint { - MetricsEndpoint::SeriesV1 => PathAndQuery::from_static(METRICS_SERIES_V1_PATH).into(), - MetricsEndpoint::SeriesV2 => PathAndQuery::from_static(METRICS_SERIES_V2_PATH).into(), - MetricsEndpoint::Sketches => PathAndQuery::from_static(METRICS_SKETCHES_PATH).into(), + let would_exceed_point_limit = + current_points > 0 && !context.payload_limits.point_count_fits(current_points + metric_points); + if would_exceed_point_limit { + // This metric fits by itself, but not together with the current range. + // Adding this metric would overflow the current range, so start a new range at this metric. + if let Some(start) = current_start { + ranges.push_back(start..idx); + } + current_start = Some(idx); + current_points = 0; + } else if current_start.is_none() { + current_start = Some(idx); } - } - fn endpoint_method(&self) -> Method { - // All endpoints use POST. - Method::POST + current_points += metric_points; } - fn content_type(&self) -> HeaderValue { - match self.endpoint { - MetricsEndpoint::SeriesV1 => CONTENT_TYPE_JSON.clone(), - MetricsEndpoint::SeriesV2 | MetricsEndpoint::Sketches => CONTENT_TYPE_PROTOBUF.clone(), + if let Some(start) = current_start { + if start < metrics.len() { + ranges.push_back(start..metrics.len()); } } + + ranges } -fn field_number_for_metric_type(metric: &Metric) -> u32 { - match metric.values() { - MetricValues::Counter(..) | MetricValues::Rate(..) | MetricValues::Gauge(..) | MetricValues::Set(..) => 1, - MetricValues::Histogram(..) | MetricValues::Distribution(..) => 1, - } +/// Converts a `Uuid` to a `HeaderValue`. +fn uuid_to_header_value(uuid: &Uuid) -> HeaderValue { + let s = uuid.as_hyphenated().to_string(); + // SAFETY: UUID hyphenated format only contains [0-9a-f-], all valid ASCII header chars. + unsafe { HeaderValue::from_maybe_shared_unchecked(s) } } -fn get_message_size(raw_msg_size: usize) -> Result { - const MAX_MESSAGE_SIZE: u64 = i32::MAX as u64; +/// Converts a `usize` to a `HeaderValue`. +fn usize_to_header_value(value: usize) -> HeaderValue { + let s = value.to_string(); + // SAFETY: Integer strings only contain ASCII digits [0-9], all valid header chars. + unsafe { HeaderValue::from_maybe_shared_unchecked(s) } +} - // Individual messages cannot be larger than `i32::MAX`, so check that here before proceeding. - if raw_msg_size as u64 > MAX_MESSAGE_SIZE { - return Err(std::io::Error::other("message size exceeds limit (2147483648 bytes)").into()); +async fn flush_payload( + mut request: Request, event_count: usize, data_point_count: usize, + payloads_tx: &mut mpsc::Sender, batch_id: Option<&Uuid>, batch_seq: usize, batch_len: usize, + payload_info: Option, +) -> Result<(), GenericError> { + // Attach the validation batch UUID and sequence headers if present. + if let Some(batch_id) = batch_id { + let headers = request.headers_mut(); + headers.insert("X-Metrics-Request-ID", uuid_to_header_value(batch_id)); + headers.insert("X-Metrics-Request-Seq", usize_to_header_value(batch_seq)); + headers.insert("X-Metrics-Request-Len", usize_to_header_value(batch_len)); } - Ok(raw_msg_size as u32) + let mut payload_meta = PayloadMetadata::from_event_and_data_point_count(event_count, data_point_count); + if let Some(info) = payload_info { + payload_meta = payload_meta.with(info); + } + let http_payload = HttpPayload::new(payload_meta, request); + let payload = Payload::Http(http_payload); + + payloads_tx + .send(payload) + .await + .error_context("Failed to send payload.")?; + + Ok(()) } -fn get_message_size_from_buffer(buf: &[u8]) -> Result { - get_message_size(buf.len()) +// Encodes a batch of metrics to V3 columnar format. +fn encode_v3_metrics_batch(metrics: &[Metric], additional_tags: &SharedTagSet) -> Result, GenericError> { + let mut writer = v3::V3Writer::new(); + + for metric in metrics { + write_metric_to_v3(&mut writer, metric, additional_tags); + } + + let mut output = Vec::new(); + writer + .finalize(&mut output) + .map_err(|e| generic_error!("Failed to serialize V3 payload: {}", e))?; + + Ok(output) } -fn encode_single_metric( - metric: &Metric, additional_tags: &SharedTagSet, output_buf: &mut Vec, primary_scratch_buf: &mut Vec, - secondary_scratch_buf: &mut Vec, packed_scratch_buf: &mut Vec, - tags_deduplicator: &mut ReusableDeduplicator, -) -> Result<(), protobuf::Error> { - let mut output_stream = CodedOutputStream::vec(output_buf); - let field_number = field_number_for_metric_type(metric); - - write_nested_message(&mut output_stream, primary_scratch_buf, field_number, |os| { - // Depending on the metric type, we write out the appropriate fields. - match metric.values() { - MetricValues::Counter(..) | MetricValues::Rate(..) | MetricValues::Gauge(..) | MetricValues::Set(..) => { - encode_series_v2_metric(metric, additional_tags, os, secondary_scratch_buf, tags_deduplicator) +/// Writes a single metric to the V3 writer. +fn write_metric_to_v3(writer: &mut v3::V3Writer, metric: &Metric, additional_tags: &SharedTagSet) { + let metric_type = match metric.values() { + MetricValues::Counter(..) => v3::V3MetricType::Count, + MetricValues::Rate(..) => v3::V3MetricType::Rate, + MetricValues::Gauge(..) | MetricValues::Set(..) => v3::V3MetricType::Gauge, + MetricValues::Histogram(..) | MetricValues::Distribution(..) => v3::V3MetricType::Sketch, + }; + let is_sketch = metric_type == v3::V3MetricType::Sketch; + + let mut builder = writer.write(metric_type, metric.context().name()); + + // Tags - chain instrumented + additional + origin tags + let all_tags = metric + .context() + .tags() + .into_iter() + .chain(additional_tags) + .chain(metric.context().origin_tags()) + .filter(|t| is_sketch || !is_v3_series_resource_tag(t) && !is_v3_series_device_tag(t)) + .map(|t| t.as_str()); + builder.set_tags(all_tags); + + // Resources - extract host and, for series, promoted resource tags. + let mut resources = Vec::new(); + if let Some(host) = metric.metadata().hostname().filter(|host| !host.is_empty()) { + resources.push(("host", host)); + } + if !is_sketch { + let mut device_resource = None; + for tag in metric + .context() + .origin_tags() + .into_iter() + .chain(metric.context().tags()) + .chain(additional_tags) + { + if is_v3_series_device_tag(tag) { + device_resource = tag.value().filter(|device| !device.is_empty()); + } else if is_v3_series_resource_tag(tag) { + if let Some((rtype, rname)) = tag.value().and_then(|value| value.split_once(':')) { + if !rtype.is_empty() && !rname.is_empty() { + resources.push((rtype, rname)); + } + } } - MetricValues::Histogram(..) | MetricValues::Distribution(..) => encode_sketch_metric( - metric, - additional_tags, - os, - secondary_scratch_buf, - packed_scratch_buf, - tags_deduplicator, - ), } - }) -} + if let Some(device) = device_resource { + let device_idx = usize::from(metric.metadata().hostname().is_some_and(|host| !host.is_empty())); + resources.insert(device_idx, ("device", device)); + } + } + builder.set_resources(&resources); -fn encode_series_v2_metric( - metric: &Metric, additional_tags: &SharedTagSet, output_stream: &mut CodedOutputStream<'_>, - scratch_buf: &mut Vec, tags_deduplicator: &mut ReusableDeduplicator, -) -> Result<(), protobuf::Error> { - // Write the metric name and tags. - output_stream.write_string(SERIES_METRIC_FIELD_NUMBER, metric.context().name())?; - - let deduplicated_tags = get_deduplicated_tags(metric, additional_tags, tags_deduplicator); - write_series_tags(deduplicated_tags, output_stream, scratch_buf)?; - - // Set the host resource. - write_resource( - output_stream, - scratch_buf, - "host", - metric.metadata().hostname().unwrap_or_default(), - )?; - - // Write the origin metadata, if it exists. + // Origin metadata if let Some(origin) = metric.metadata().origin() { match origin { MetricOrigin::SourceType(source_type) => { - output_stream.write_string(SERIES_SOURCE_TYPE_NAME_FIELD_NUMBER, source_type.as_ref())?; + builder.set_source_type(source_type.as_ref()); } MetricOrigin::OriginMetadata { product, subproduct, product_detail, } => { - write_origin_metadata( - output_stream, - scratch_buf, - SERIES_METADATA_FIELD_NUMBER, - *product, - *subproduct, - *product_detail, - )?; + builder.set_origin(*product, *subproduct, *product_detail, false); } } } - // Now write out our metric type, points, and interval (if applicable). - let (metric_type, points, maybe_interval) = match metric.values() { - MetricValues::Counter(points) => (proto::MetricType::COUNT, points.into_iter(), None), - MetricValues::Rate(points, interval) => (proto::MetricType::RATE, points.into_iter(), Some(interval)), - MetricValues::Gauge(points) => (proto::MetricType::GAUGE, points.into_iter(), None), - MetricValues::Set(points) => (proto::MetricType::GAUGE, points.into_iter(), None), - _ => unreachable!("encode_series_v2_metric called with non-series metric"), - }; - - output_stream.write_enum(SERIES_TYPE_FIELD_NUMBER, metric_type.value())?; - - if let Some(unit) = metric.metadata().unit() { - output_stream.write_string(SERIES_UNIT_FIELD_NUMBER, unit)?; - } - - for (timestamp, value) in points { - // If this is a rate metric, scale our value by the interval, in seconds. - let value = maybe_interval - .map(|interval| value / interval.as_secs_f64()) - .unwrap_or(value); - let timestamp = timestamp.map(|ts| ts.get()).unwrap_or(0) as i64; - - write_point(output_stream, scratch_buf, value, timestamp)?; - } - - if let Some(interval) = maybe_interval { - output_stream.write_int64(SERIES_INTERVAL_FIELD_NUMBER, interval.as_secs() as i64)?; + if metric_type != v3::V3MetricType::Sketch { + if let Some(unit) = metric.metadata().unit() { + builder.set_unit(unit); + } } - Ok(()) -} - -fn encode_series_v1_metric( - metric: &Metric, additional_tags: &SharedTagSet, buffer: &mut Vec, - tags_deduplicator: &mut ReusableDeduplicator, -) -> Result<(), serde_json::Error> { - let mut obj = JsonMap::new(); - - obj.insert("metric".into(), JsonValue::String(metric.context().name().to_string())); - - let (type_str, points_iter, maybe_interval) = match metric.values() { - MetricValues::Counter(points) => ("count", points.into_iter(), None), - MetricValues::Rate(points, interval) => ("rate", points.into_iter(), Some(*interval)), - MetricValues::Gauge(points) => ("gauge", points.into_iter(), None), - MetricValues::Set(points) => ("gauge", points.into_iter(), None), - _ => unreachable!("encode_series_v1_metric called with non-series metric"), - }; - - let mut points = Vec::new(); - for (timestamp, value) in points_iter { - // For rates, value is scaled by interval seconds — same as the V2 encoder. - let value = maybe_interval - .map(|interval| value / interval.as_secs_f64()) - .unwrap_or(value); - let timestamp = timestamp.map(|ts| ts.get()).unwrap_or(0) as i64; - - // V1 emits each point as a [timestamp, value] tuple — not a nested object. - let value_json = JsonNumber::from_f64(value) - .map(JsonValue::Number) - .unwrap_or_else(|| JsonValue::from(0)); - points.push(JsonValue::Array(vec![JsonValue::from(timestamp), value_json])); - } - obj.insert("points".into(), JsonValue::Array(points)); - - // Walk the deduplicated tag set once, extracting the first `device:` tag into the device JSON field while - // dropping `dd.internal.resource` (which is a V2-protobuf-only concept with no V1 representation). - let deduplicated = get_deduplicated_tags(metric, additional_tags, tags_deduplicator); - let mut tags_out = Vec::new(); - let mut device: Option = None; - for tag in deduplicated { - if tag.name() == "dd.internal.resource" { - continue; + // Points based on metric type + match metric.values() { + MetricValues::Counter(points) | MetricValues::Gauge(points) => { + for (ts, val) in points { + let timestamp = ts.map(|t| t.get() as i64).unwrap_or(0); + builder.add_point(timestamp, val); + } } - if device.is_none() && tag.name() == "device" { - if let Some(v) = tag.value() { - device = Some(v.to_string()); - continue; + MetricValues::Rate(points, interval) => { + builder.set_interval(interval.as_secs()); + for (ts, val) in points { + let timestamp = ts.map(|t| t.get() as i64).unwrap_or(0); + // Scale by interval as done in V2 + let scaled = val / interval.as_secs_f64(); + builder.add_point(timestamp, scaled); } } - tags_out.push(JsonValue::String(tag.as_str().to_string())); - } - obj.insert("tags".into(), JsonValue::Array(tags_out)); - - // V1 always emits `host` and `interval`, even when empty/zero — matches the Agent encoder. - obj.insert( - "host".into(), - JsonValue::String(metric.metadata().hostname().unwrap_or_default().to_string()), - ); - - if let Some(d) = device.filter(|s| !s.is_empty()) { - obj.insert("device".into(), JsonValue::String(d)); - } - - obj.insert("type".into(), JsonValue::String(type_str.into())); - - let interval_secs = maybe_interval.map(|iv| iv.as_secs() as i64).unwrap_or(0); - obj.insert("interval".into(), JsonValue::from(interval_secs)); - - // V1 only emits `source_type_name` from `MetricOrigin::SourceType`. - if let Some(MetricOrigin::SourceType(s)) = metric.metadata().origin() { - obj.insert("source_type_name".into(), JsonValue::String(s.as_ref().to_string())); - } - - if let Some(unit) = metric.metadata().unit() { - if !unit.is_empty() { - obj.insert("unit".into(), JsonValue::String(unit.to_string())); + MetricValues::Set(points) => { + // Set values are already converted to count in the iterator + for (ts, count) in points { + let timestamp = ts.map(|t| t.get() as i64).unwrap_or(0); + builder.add_point(timestamp, count); + } } - } - - serde_json::to_writer(buffer, &JsonValue::Object(obj)) -} - -fn encode_sketch_metric( - metric: &Metric, additional_tags: &SharedTagSet, output_stream: &mut CodedOutputStream<'_>, - scratch_buf: &mut Vec, packed_scratch_buf: &mut Vec, tags_deduplicator: &mut ReusableDeduplicator, -) -> Result<(), protobuf::Error> { - // Write the metric name and tags. - output_stream.write_string(SKETCH_METRIC_FIELD_NUMBER, metric.context().name())?; - - let deduplicated_tags = get_deduplicated_tags(metric, additional_tags, tags_deduplicator); - write_sketch_tags(deduplicated_tags, output_stream, scratch_buf)?; - - // Write the host. - output_stream.write_string( - SKETCH_HOST_FIELD_NUMBER, - metric.metadata().hostname().unwrap_or_default(), - )?; - - // Set the origin metadata, if it exists. - if let Some(MetricOrigin::OriginMetadata { - product, - subproduct, - product_detail, - }) = metric.metadata().origin() - { - write_origin_metadata( - output_stream, - scratch_buf, - SKETCH_METADATA_FIELD_NUMBER, - *product, - *subproduct, - *product_detail, - )?; - } - - // TODO: emit `metric.metadata().unit()` in the sketch payload once the upstream `agent-payload` proto defines a - // unit field on `SketchPayload.Sketch`. - - // Write out our sketches. - match metric.values() { MetricValues::Distribution(sketches) => { - for (timestamp, value) in sketches { - write_dogsketch(output_stream, scratch_buf, packed_scratch_buf, timestamp, value)?; + for (ts, sketch) in sketches { + let timestamp = ts.map(|t| t.get() as i64).unwrap_or(0); + if !sketch.is_empty() { + let bin_keys: Vec = sketch.bins().iter().map(|b| b.key()).collect(); + let bin_counts: Vec = sketch.bins().iter().map(|b| b.count()).collect(); + builder.add_sketch( + timestamp, + sketch.count() as i64, + sketch.sum().unwrap_or(0.0), + sketch.min().unwrap_or(0.0), + sketch.max().unwrap_or(0.0), + &bin_keys, + &bin_counts, + ); + } } } - MetricValues::Histogram(points) => { - for (timestamp, histogram) in points { - // We convert histograms to sketches to be able to write them out in the payload. - let mut ddsketch = DDSketch::default(); + MetricValues::Histogram(histograms) => { + for (ts, histogram) in histograms { + let timestamp = ts.map(|t| t.get() as i64).unwrap_or(0); + // Convert histogram to DDSketch + let mut sketch = DDSketch::default(); for sample in histogram.samples() { - ddsketch.insert_n(sample.value.into_inner(), sample.weight.0 as u64); + sketch.insert_n(sample.value.into_inner(), sample.weight.0 as u64); + } + if !sketch.is_empty() { + let bin_keys: Vec = sketch.bins().iter().map(|b| b.key()).collect(); + let bin_counts: Vec = sketch.bins().iter().map(|b| b.count()).collect(); + builder.add_sketch( + timestamp, + sketch.count() as i64, + sketch.sum().unwrap_or(0.0), + sketch.min().unwrap_or(0.0), + sketch.max().unwrap_or(0.0), + &bin_keys, + &bin_counts, + ); } - - write_dogsketch(output_stream, scratch_buf, packed_scratch_buf, timestamp, &ddsketch)?; } } - _ => unreachable!("encode_sketch_metric called with non-sketch metric"), } - Ok(()) + builder.close(); } -fn write_resource( - output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, resource_type: &str, resource_name: &str, -) -> Result<(), protobuf::Error> { - write_nested_message(output_stream, scratch_buf, SERIES_RESOURCES_FIELD_NUMBER, |os| { - os.write_string(RESOURCES_TYPE_FIELD_NUMBER, resource_type)?; - os.write_string(RESOURCES_NAME_FIELD_NUMBER, resource_name) - }) +fn is_v3_series_device_tag(tag: &Tag) -> bool { + tag.name() == "device" && tag.value().is_some() } -fn write_origin_metadata( - output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, field_number: u32, origin_product: u32, - origin_category: u32, origin_service: u32, -) -> Result<(), protobuf::Error> { - // TODO: Figure out how to cleanly use `write_nested_message` here. - - scratch_buf.clear(); - - { - let mut origin_output_stream = CodedOutputStream::vec(scratch_buf); - origin_output_stream.write_uint32(ORIGIN_ORIGIN_PRODUCT_FIELD_NUMBER, origin_product)?; - origin_output_stream.write_uint32(ORIGIN_ORIGIN_CATEGORY_FIELD_NUMBER, origin_category)?; - origin_output_stream.write_uint32(ORIGIN_ORIGIN_SERVICE_FIELD_NUMBER, origin_service)?; - origin_output_stream.flush()?; - } +fn is_v3_series_resource_tag(tag: &Tag) -> bool { + tag.name() == "dd.internal.resource" && tag.value().is_some() +} - // We do a little song and dance here because the `Origin` message is embedded inside of `Metadata`, so we need to - // write out field numbers/length delimiters in order: `Metadata`, and then `Origin`... but we write out origin - // message to the scratch buffer first... so we write out our `Metadata` preamble stuff to get its length, and then - // use that in conjunction with the `Origin` message size to write out the full `Metadata` message. - let origin_message_size = get_message_size_from_buffer(scratch_buf)?; - - let mut metadata_preamble_buf = [0; 64]; - let metadata_preamble_len = { - let mut metadata_output_stream = CodedOutputStream::bytes(&mut metadata_preamble_buf[..]); - metadata_output_stream.write_tag(METADATA_ORIGIN_FIELD_NUMBER, WireType::LengthDelimited)?; - metadata_output_stream.write_raw_varint32(origin_message_size)?; - metadata_output_stream.flush()?; - metadata_output_stream.total_bytes_written() as usize +/// Creates a V3 HTTP request from encoded payload data. +async fn create_v3_request( + endpoint_uri: &str, payload: Vec, compression_scheme: CompressionScheme, +) -> Result { + // Our `payload` is the inner `MetricData` message structure at this point, so we just manually write out the + // `Payload` message framing before writing the metric data. + let mut header_buf = [0; 16]; + let header_len = { + let mut header_writer = CodedOutputStream::bytes(&mut header_buf); + header_writer.write_tag(3, WireType::LengthDelimited)?; + header_writer.write_uint64_no_tag(payload.len() as u64)?; + header_writer.flush()?; + header_writer.total_bytes_written() as usize }; - let metadata_message_size = get_message_size(scratch_buf.len() + metadata_preamble_len)?; + let uncompressed_len = header_len + payload.len(); + let buffer = ChunkedBytesBuffer::new(RB_BUFFER_CHUNK_SIZE); + let mut compressor = Compressor::from_scheme(compression_scheme, buffer); + compressor + .write_all(&header_buf[..header_len]) + .await + .error_context("Failed to compress V3 payload.")?; + compressor + .write_all(&payload) + .await + .error_context("Failed to compress V3 payload.")?; + compressor + .flush() + .await + .error_context("Failed to flush V3 compressor.")?; + compressor + .shutdown() + .await + .error_context("Failed to shutdown V3 compressor.")?; - output_stream.write_tag(field_number, WireType::LengthDelimited)?; - output_stream.write_raw_varint32(metadata_message_size)?; - output_stream.write_raw_bytes(&metadata_preamble_buf[..metadata_preamble_len])?; - output_stream.write_raw_bytes(scratch_buf) -} + let content_encoding = compressor.content_encoding(); + let compressed_buf = compressor.into_inner().freeze(); + let compressed_len = compressed_buf.len(); -fn write_point( - output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, value: f64, timestamp: i64, -) -> Result<(), protobuf::Error> { - write_nested_message(output_stream, scratch_buf, SERIES_POINTS_FIELD_NUMBER, |os| { - os.write_double(METRIC_POINT_VALUE_FIELD_NUMBER, value)?; - os.write_int64(METRIC_POINT_TIMESTAMP_FIELD_NUMBER, timestamp) - }) -} + let mut builder = Request::builder() + .method(Method::POST) + .uri(endpoint_uri) + .header(http::header::CONTENT_TYPE, "application/x-protobuf"); -fn write_dogsketch( - output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, packed_scratch_buf: &mut Vec, - timestamp: Option, sketch: &DDSketch, -) -> Result<(), protobuf::Error> { - // If the sketch is empty, we don't write it out. - if sketch.is_empty() { - warn!("Attempted to write an empty sketch to sketches payload, skipping."); - return Ok(()); + if let Some(encoding) = content_encoding { + builder = builder.header(http::header::CONTENT_ENCODING, encoding); } - write_nested_message(output_stream, scratch_buf, SKETCH_DOGSKETCHES_FIELD_NUMBER, |os| { - os.write_int64(DOGSKETCH_TS_FIELD_NUMBER, timestamp.map_or(0, |ts| ts.get() as i64))?; - os.write_int64(DOGSKETCH_CNT_FIELD_NUMBER, sketch.count() as i64)?; - os.write_double(DOGSKETCH_MIN_FIELD_NUMBER, sketch.min().unwrap())?; - os.write_double(DOGSKETCH_MAX_FIELD_NUMBER, sketch.max().unwrap())?; - os.write_double(DOGSKETCH_AVG_FIELD_NUMBER, sketch.avg().unwrap())?; - os.write_double(DOGSKETCH_SUM_FIELD_NUMBER, sketch.sum().unwrap())?; - - let bin_keys = sketch.bins().iter().map(|bin| bin.key()); - write_repeated_packed_from_iter( - os, - packed_scratch_buf, - DOGSKETCH_K_FIELD_NUMBER, - bin_keys, - |inner_os, value| inner_os.write_sint32_no_tag(value), - )?; - - let bin_counts = sketch.bins().iter().map(|bin| bin.count()); - write_repeated_packed_from_iter( - os, - packed_scratch_buf, - DOGSKETCH_N_FIELD_NUMBER, - bin_counts, - |inner_os, value| inner_os.write_uint32_no_tag(value), - ) + let request = builder + .body(compressed_buf) + .map_err(|e| generic_error!("Failed to build V3 request: {}", e))?; + + Ok(V3EncodedRequest { + request, + compressed_len, + uncompressed_len, }) } -fn get_deduplicated_tags<'a>( - metric: &'a Metric, additional_tags: &'a SharedTagSet, tags_deduplicator: &'a mut ReusableDeduplicator, -) -> impl Iterator { - let chained_tags = metric - .context() - .tags() - .into_iter() - .chain(additional_tags) - .chain(metric.context().origin_tags()); - - tags_deduplicator.deduplicated(chained_tags) -} +#[cfg(test)] +mod tests { + use std::sync::Arc; -fn write_tags<'a, I, F>( - tags: I, output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, tag_encoder: F, -) -> Result<(), protobuf::Error> -where - I: Iterator, - F: Fn(&Tag, &mut CodedOutputStream<'_>, &mut Vec) -> Result<(), protobuf::Error>, -{ - for tag in tags { - tag_encoder(tag, output_stream, scratch_buf)?; - } + use saluki_context::{ + tags::{Tag, TagSet}, + Context, + }; + use saluki_core::data_model::{ + event::{metric::MetricMetadata, Event}, + payload::Payload, + }; + use stringtheory::MetaString; + use tokio::time::timeout; - Ok(()) -} + use super::*; -fn write_series_tags<'a, I>( - tags: I, output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, -) -> Result<(), protobuf::Error> -where - I: Iterator, -{ - write_tags(tags, output_stream, scratch_buf, |tag, os, buf| { - // If this is a resource tag, we'll convert it directly to a resource entry. - if tag.name() == "dd.internal.resource" { - if let Some((resource_type, resource_name)) = tag.value().and_then(|s| s.split_once(':')) { - write_resource(os, buf, resource_type, resource_name) - } else { - Ok(()) - } - } else { - // We're dealing with a normal tag. - os.write_string(SERIES_TAGS_FIELD_NUMBER, tag.as_str()) - } - }) -} + #[test] + fn deser_agent_v3_api_nested_settings() { + let raw = r#" +serializer_experimental_use_v3_api: + compression_level: 7 + series: + endpoints: + - https://app.datadoghq.com + validate: true + use_beta: true + beta_route: /api/intake/metrics/custom/series + sketches: + endpoints: + - https://app.datadoghq.eu +"#; + + let config = + serde_yaml::from_str::(raw).expect("configuration should deserialize"); + + assert_eq!(7, config.v3_api.compression_level); + assert_eq!( + Some("https://app.datadoghq.com"), + config.v3_api.series.endpoints.first().map(String::as_str) + ); + assert!(config.v3_api.series.validate); + assert!(config.v3_api.series.use_beta); + assert_eq!("/api/intake/metrics/custom/series", config.v3_api.series.beta_route); + assert_eq!( + Some("https://app.datadoghq.eu"), + config.v3_api.sketches.endpoints.first().map(String::as_str) + ); + } -fn write_sketch_tags<'a, I>( - tags: I, output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, -) -> Result<(), protobuf::Error> -where - I: Iterator, -{ - write_tags(tags, output_stream, scratch_buf, |tag, os, _buf| { - // We always write the tags as-is, without any special handling for resource tags. - os.write_string(SKETCH_TAGS_FIELD_NUMBER, tag.as_str()) - }) -} + #[tokio::test] + async fn create_v3_request_uses_configured_endpoint_uri() { + let request = create_v3_request( + "/api/intake/metrics/custom/series", + Vec::new(), + CompressionScheme::noop(), + ) + .await + .expect("request should be created"); -fn write_nested_message( - output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, field_number: u32, writer: F, -) -> Result<(), protobuf::Error> -where - F: FnOnce(&mut CodedOutputStream<'_>) -> Result<(), protobuf::Error>, -{ - scratch_buf.clear(); - - { - let mut nested_output_stream = CodedOutputStream::vec(scratch_buf); - writer(&mut nested_output_stream)?; - nested_output_stream.flush()?; + assert_eq!("/api/intake/metrics/custom/series", request.request.uri()); } - output_stream.write_tag(field_number, WireType::LengthDelimited)?; - - let nested_message_size = get_message_size_from_buffer(scratch_buf)?; - output_stream.write_raw_varint32(nested_message_size)?; - output_stream.write_raw_bytes(scratch_buf) -} + async fn create_v3_test_request(metrics: &[Metric]) -> V3EncodedRequest { + let encoded = encode_v3_metrics_batch(metrics, &SharedTagSet::default()).expect("metrics should encode to V3"); + create_v3_request(V3_SERIES_ENDPOINT_URI, encoded, CompressionScheme::noop()) + .await + .expect("request should be created") + } -fn write_repeated_packed_from_iter( - output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, field_number: u32, values: I, writer: F, -) -> Result<(), protobuf::Error> -where - I: Iterator, - F: Fn(&mut CodedOutputStream<'_>, T) -> Result<(), protobuf::Error>, -{ - // This is a helper function that lets us write out a packed repeated field from an iterator of values. - // `CodedOutputStream` has similar functions to handle this, but they require a slice of values, which would mean we - // need to either allocate a new vector each time to hold the values, or thread through two additional vectors (one - // for `i32`, one for `u32`) to reuse the allocation... both of which are not great options. - // - // We've simply opted to pass through a _single_ vector that we can reuse, and write the packed values directly to - // that, almost identically to how `CodedOutputStream::write_repeated_packed_*` methods would do it. - - scratch_buf.clear(); - - { - let mut packed_output_stream = CodedOutputStream::vec(scratch_buf); - for value in values { - writer(&mut packed_output_stream, value)?; + fn test_v3_flush_context<'a>( + ep_config: &'a EndpointConfiguration, payload_limits: V3PayloadLimits, telemetry: &'a ComponentTelemetry, + ) -> V3FlushContext<'a> { + V3FlushContext { + endpoint_config: ep_config, + payload_limits, + series_endpoint_uri: V3_SERIES_ENDPOINT_URI, + telemetry, } - packed_output_stream.flush()?; } - let data_size = get_message_size_from_buffer(scratch_buf)?; - - output_stream.write_tag(field_number, WireType::LengthDelimited)?; - output_stream.write_raw_varint32(data_size)?; - output_stream.write_raw_bytes(scratch_buf) -} - -#[cfg(test)] -mod tests { - use std::{sync::Arc, time::Duration}; - - use protobuf::CodedOutputStream; - use saluki_common::iter::ReusableDeduplicator; - use saluki_context::{tags::SharedTagSet, Context}; - use saluki_core::data_model::event::metric::{Metric, MetricMetadata, MetricOrigin, MetricValues}; - use serde_json::Value as JsonValue; - use stringtheory::MetaString; - - use super::{ - encode_series_v1_metric, encode_series_v2_metric, encode_sketch_metric, MetricsEndpoint, - MetricsEndpointEncoder, SERIES_V1_INPUT_SEPARATOR, SERIES_V1_PAYLOAD_PREFIX, SERIES_V1_PAYLOAD_SUFFIX, - }; - use crate::common::datadog::{ - request_builder::EndpointEncoder as _, DEFAULT_SERIALIZER_COMPRESSED_SIZE_LIMIT, - DEFAULT_SERIALIZER_UNCOMPRESSED_SIZE_LIMIT, - }; + #[tokio::test] + async fn v3_payload_requests_split_by_compressed_size_limit() { + let metrics = vec![ + Metric::counter("v3.compressed.split.one", 1.0), + Metric::counter("v3.compressed.split.two", 2.0), + ]; + let single_request = create_v3_test_request(&metrics[..1]).await; + let combined_request = create_v3_test_request(&metrics).await; + assert!(combined_request.compressed_len > single_request.compressed_len); + + let limits = V3PayloadLimits::new(single_request.compressed_len, usize::MAX, 10_000, 10_000); + let ep_config = EndpointConfiguration::new(CompressionScheme::noop(), 10_000, None); + let telemetry = ComponentTelemetry::from_builder(&MetricsBuilder::default()); + + let context = test_v3_flush_context(&ep_config, limits, &telemetry); + let requests = encode_v3_payload_requests(V3_SERIES_ENDPOINT_URI, &metrics, context, "series").await; + + assert_eq!(2, requests.len()); + assert_eq!( + vec![1, 1], + requests.iter().map(|request| request.event_count).collect::>() + ); + assert!(requests + .iter() + .all(|request| request.request.body().len() <= limits.max_compressed_size)); + } - fn encode_one_v1(metric: &Metric) -> JsonValue { - let mut buf = Vec::new(); - let host_tags = SharedTagSet::default(); - let mut tags_deduplicator = ReusableDeduplicator::new(); - encode_series_v1_metric(metric, &host_tags, &mut buf, &mut tags_deduplicator) - .expect("encode_series_v1_metric should succeed"); - serde_json::from_slice(&buf).expect("encoder produced invalid JSON") + #[tokio::test] + async fn v3_payload_requests_split_by_uncompressed_size_limit() { + let metrics = vec![ + Metric::counter("v3.uncompressed.split.one", 1.0), + Metric::counter("v3.uncompressed.split.two", 2.0), + ]; + let single_request = create_v3_test_request(&metrics[..1]).await; + let combined_request = create_v3_test_request(&metrics).await; + assert!(combined_request.uncompressed_len > single_request.uncompressed_len); + + let limits = V3PayloadLimits::new(usize::MAX, single_request.uncompressed_len, 10_000, 10_000); + let ep_config = EndpointConfiguration::new(CompressionScheme::noop(), 10_000, None); + let telemetry = ComponentTelemetry::from_builder(&MetricsBuilder::default()); + + let context = test_v3_flush_context(&ep_config, limits, &telemetry); + let requests = encode_v3_payload_requests(V3_SERIES_ENDPOINT_URI, &metrics, context, "series").await; + + assert_eq!(2, requests.len()); + assert_eq!( + vec![1, 1], + requests.iter().map(|request| request.event_count).collect::>() + ); } #[test] - fn histogram_vs_sketch_identical_payload() { - // For the same exact set of points, we should be able to construct either a histogram or distribution from - // those points, and when encoded as a sketch payload, end up with the same exact payload. - // - // They should be identical because the goal is that we convert histograms into sketches in the same way we - // would have originally constructed a sketch based on the same samples. - let samples = &[1.0, 2.0, 3.0, 4.0, 5.0]; - let histogram = Metric::histogram("simple_samples", samples); - let distribution = Metric::distribution("simple_samples", samples); - let host_tags = SharedTagSet::default(); - - let mut buf1 = Vec::new(); - let mut buf2 = Vec::new(); - let mut tags_deduplicator = ReusableDeduplicator::new(); - - let mut histogram_payload = Vec::new(); - { - let mut histogram_writer = CodedOutputStream::vec(&mut histogram_payload); - encode_sketch_metric( - &histogram, - &host_tags, - &mut histogram_writer, - &mut buf1, - &mut buf2, - &mut tags_deduplicator, - ) - .expect("Failed to encode histogram as sketch"); - } - - let mut distribution_payload = Vec::new(); - { - let mut distribution_writer = CodedOutputStream::vec(&mut distribution_payload); - encode_sketch_metric( - &distribution, - &host_tags, - &mut distribution_writer, - &mut buf1, - &mut buf2, - &mut tags_deduplicator, - ) - .expect("Failed to encode distribution as sketch"); - } - - assert_eq!(histogram_payload, distribution_payload); + fn v3_metric_ranges_split_by_point_limit() { + let metrics = vec![ + Metric::counter("v3.points.split.one", [(123, 1.0), (124, 2.0)]), + Metric::counter("v3.points.split.two", [(123, 3.0), (124, 4.0)]), + Metric::counter("v3.points.split.three", 5.0), + ]; + let limits = V3PayloadLimits::new(usize::MAX, usize::MAX, 10_000, 3); + let ep_config = EndpointConfiguration::new(CompressionScheme::noop(), 10_000, None); + let telemetry = ComponentTelemetry::from_builder(&MetricsBuilder::default()); + let context = test_v3_flush_context(&ep_config, limits, &telemetry); + + let ranges = split_v3_metric_ranges_by_point_limit(&metrics, context, "series") + .into_iter() + .collect::>(); + + assert_eq!(vec![0..1, 1..3], ranges); } #[test] - fn input_valid() { - // Our encoder should always consider series metrics valid when set to either series endpoint, and similarly - // for sketch metrics when set to the sketches endpoint. - let counter = Metric::counter("counter", 1.0); - let rate = Metric::rate("rate", 1.0, Duration::from_secs(1)); - let gauge = Metric::gauge("gauge", 1.0); - let set = Metric::set("set", "foo"); - let histogram = Metric::histogram("histogram", [1.0, 2.0, 3.0]); - let distribution = Metric::distribution("distribution", [1.0, 2.0, 3.0]); - - let series_v1 = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV1); - let series_v2 = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV2); - let sketches_endpoint = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::Sketches); - - for series_endpoint in [&series_v1, &series_v2] { - assert!(series_endpoint.is_valid_input(&counter)); - assert!(series_endpoint.is_valid_input(&rate)); - assert!(series_endpoint.is_valid_input(&gauge)); - assert!(series_endpoint.is_valid_input(&set)); - assert!(!series_endpoint.is_valid_input(&histogram)); - assert!(!series_endpoint.is_valid_input(&distribution)); + fn v3_metric_ranges_skip_zero_point_metrics() { + let metrics = vec![ + Metric::counter("v3.points.zero.before", 1.0), + Metric::counter("v3.points.zero.empty", &[] as &[f64]), + Metric::counter("v3.points.zero.after", 2.0), + ]; + let limits = V3PayloadLimits::new(usize::MAX, usize::MAX, 10_000, 10_000); + let ep_config = EndpointConfiguration::new(CompressionScheme::noop(), 10_000, None); + let telemetry = ComponentTelemetry::from_builder(&MetricsBuilder::default()); + let context = test_v3_flush_context(&ep_config, limits, &telemetry); + + let ranges = split_v3_metric_ranges_by_point_limit(&metrics, context, "series") + .into_iter() + .collect::>(); + + assert_eq!(vec![0..1, 2..3], ranges); + } + + #[tokio::test] + async fn v3_split_flush_uses_payload_request_batch_headers() { + let mut metrics = vec![ + Metric::counter("v3.headers.split.one", 1.0), + Metric::counter("v3.headers.split.two", 2.0), + ]; + let single_request = create_v3_test_request(&metrics[..1]).await; + let combined_request = create_v3_test_request(&metrics).await; + assert!(combined_request.compressed_len > single_request.compressed_len); + + let limits = V3PayloadLimits::new(single_request.compressed_len, usize::MAX, 10_000, 10_000); + let ep_config = EndpointConfiguration::new(CompressionScheme::noop(), 10_000, None); + let telemetry = ComponentTelemetry::from_builder(&MetricsBuilder::default()); + let batch_id = Uuid::now_v7(); + let (mut payloads_tx, mut payloads_rx) = tokio::sync::mpsc::channel(8); + + let context = test_v3_flush_context(&ep_config, limits, &telemetry); + encode_and_flush_v3_series_metrics( + context, + &mut metrics, + &mut payloads_tx, + Some(&batch_id), + Some(MetricsPayloadInfo::v3_series()), + ) + .await + .expect("V3 metrics should flush"); + + for expected_seq in 0..2 { + let payload = payloads_rx.recv().await.expect("payload should be emitted"); + let Payload::Http(http_payload) = payload else { + panic!("expected HTTP payload"); + }; + let (_, request) = http_payload.into_parts(); + assert_eq!( + batch_id.as_hyphenated().to_string(), + request + .headers() + .get("X-Metrics-Request-ID") + .expect("batch ID header should be present") + .to_str() + .expect("batch ID header should be valid") + ); + assert_eq!( + expected_seq.to_string(), + request + .headers() + .get("X-Metrics-Request-Seq") + .expect("batch sequence header should be present") + .to_str() + .expect("batch sequence header should be valid") + ); + assert_eq!( + "2", + request + .headers() + .get("X-Metrics-Request-Len") + .expect("batch length header should be present") + .to_str() + .expect("batch length header should be valid") + ); } - assert!(!sketches_endpoint.is_valid_input(&counter)); - assert!(!sketches_endpoint.is_valid_input(&rate)); - assert!(!sketches_endpoint.is_valid_input(&gauge)); - assert!(!sketches_endpoint.is_valid_input(&set)); - assert!(sketches_endpoint.is_valid_input(&histogram)); - assert!(sketches_endpoint.is_valid_input(&distribution)); + assert!(metrics.is_empty()); } - #[test] - fn input_data_point_count_tracks_metric_values() { - let counter = Metric::counter("counter", [(123, 1.0), (124, 2.0)]); - let histogram = Metric::histogram("histogram", [1.0, 2.0, 3.0]); - - let series_endpoint = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV2); - let sketches_endpoint = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::Sketches); + #[tokio::test] + async fn v3_sketch_flush_uses_split_payload_requests() { + let mut metrics = vec![ + Metric::distribution("v3.sketch.split.one", [1.0, 2.0, 3.0]), + Metric::distribution("v3.sketch.split.two", [4.0, 5.0, 6.0]), + ]; + let single_request = create_v3_test_request(&metrics[..1]).await; + let combined_request = create_v3_test_request(&metrics).await; + assert!(combined_request.compressed_len > single_request.compressed_len); + + let limits = V3PayloadLimits::new(single_request.compressed_len, usize::MAX, 10_000, 10_000); + let ep_config = EndpointConfiguration::new(CompressionScheme::noop(), 10_000, None); + let telemetry = ComponentTelemetry::from_builder(&MetricsBuilder::default()); + let batch_id = Uuid::now_v7(); + let (mut payloads_tx, mut payloads_rx) = tokio::sync::mpsc::channel(8); + + let context = test_v3_flush_context(&ep_config, limits, &telemetry); + encode_and_flush_v3_sketch_metrics( + context, + &mut metrics, + &mut payloads_tx, + Some(&batch_id), + Some(MetricsPayloadInfo::v3_sketches()), + ) + .await + .expect("V3 sketches should flush"); + + for expected_seq in 0..2 { + let payload = payloads_rx.recv().await.expect("payload should be emitted"); + let Payload::Http(http_payload) = payload else { + panic!("expected HTTP payload"); + }; + let (_, request) = http_payload.into_parts(); + assert_eq!(V3_SKETCHES_ENDPOINT_URI, request.uri()); + assert_eq!( + expected_seq.to_string(), + request + .headers() + .get("X-Metrics-Request-Seq") + .expect("batch sequence header should be present") + .to_str() + .expect("batch sequence header should be valid") + ); + assert_eq!( + "2", + request + .headers() + .get("X-Metrics-Request-Len") + .expect("batch length header should be present") + .to_str() + .expect("batch length header should be valid") + ); + } - assert_eq!(series_endpoint.input_data_point_count(&counter), 2); - assert_eq!(sketches_endpoint.input_data_point_count(&histogram), 1); + assert!(metrics.is_empty()); } #[test] - fn series_metric_unit_encoded() { - // A gauge with a unit in its metadata must produce a series protobuf payload that contains the unit string - // in field 6 (MetricSeries.unit), which the Datadog backend already accepts. - // - // In production this state is reached when histogram aggregation flushes timer (`ms`) statistics as gauges, - // each carrying unit = "millisecond" propagated through MetricMetadata. + fn v3_series_metric_unit_refs_are_encoded_sparsely() { let context = Context::from_static_parts("my.timer.avg", &[]); let metadata = MetricMetadata::default().with_unit(MetaString::from_static("millisecond")); let gauge = Metric::from_parts(context, MetricValues::gauge([1.0_f64]), metadata); + let context = Context::from_static_parts("my.counter", &[]); + let no_unit = Metric::from_parts(context, MetricValues::gauge([2.0_f64]), MetricMetadata::default()); + let context = Context::from_static_parts("my.timer.max", &[]); + let metadata = MetricMetadata::default().with_unit(MetaString::from_static("millisecond")); + let same_unit = Metric::from_parts(context, MetricValues::gauge([3.0_f64]), metadata); - let host_tags = SharedTagSet::default(); - let mut scratch_buf = Vec::new(); - let mut tags_deduplicator = ReusableDeduplicator::new(); - - let mut payload = Vec::new(); - { - let mut writer = CodedOutputStream::vec(&mut payload); - encode_series_v2_metric( - &gauge, - &host_tags, - &mut writer, - &mut scratch_buf, - &mut tags_deduplicator, - ) - .expect("Failed to encode gauge as series metric"); - writer.flush().expect("Failed to flush"); - } - - // In the protobuf wire format, a string field with field number 6 has tag byte 0x32 ((6 << 3) | 2). - // The tag is followed by a varint length and then the UTF-8 bytes of the string. - let expected_tag: u8 = (6 << 3) | 2; // 0x32 - let expected_value = b"millisecond"; - - let tag_pos = payload - .windows(1 + 1 + expected_value.len()) - .position(|w| w[0] == expected_tag && w[1] == expected_value.len() as u8 && &w[2..] == expected_value); + let payload = encode_v3_metrics_batch(&[gauge, no_unit, same_unit], &SharedTagSet::default()) + .expect("V3 metric should encode successfully"); + let expected_unit_dict = [ + 0xca, 0x01, // field 25, length-delimited. + 0x0c, // field payload length: varint string length + string bytes. + 0x0b, b'm', b'i', b'l', b'l', b'i', b's', b'e', b'c', b'o', b'n', b'd', + ]; assert!( - tag_pos.is_some(), - "series payload should contain unit field (field 6 = 'millisecond'), got bytes: {:?}", + payload + .windows(expected_unit_dict.len()) + .any(|window| window == expected_unit_dict), + "V3 payload should contain DictUnitStr field for 'millisecond', got bytes: {:?}", payload ); - } - #[test] - fn series_v1_basic_payload_shape() { - // Each metric variant maps to the right `type` string, points are emitted as [ts, value] tuples, - // and `interval`/`host` are always present (zero/empty when not set). - let counter = Metric::counter("my.count", 5.0); - let counter_json = encode_one_v1(&counter); - assert_eq!(counter_json["metric"], "my.count"); - assert_eq!(counter_json["type"], "count"); - assert_eq!(counter_json["interval"], 0); - assert_eq!(counter_json["host"], ""); - assert_eq!(counter_json["tags"], JsonValue::Array(vec![])); - let points = counter_json["points"].as_array().expect("points is array"); - assert_eq!(points.len(), 1); - assert_eq!(points[0][0], 0); - assert_eq!(points[0][1], 5.0); - // Optional fields must be absent when not set. - assert!(counter_json.get("unit").is_none()); - assert!(counter_json.get("source_type_name").is_none()); - assert!(counter_json.get("device").is_none()); - - let rate = Metric::rate("my.rate", 30.0, Duration::from_secs(10)); - let rate_json = encode_one_v1(&rate); - assert_eq!(rate_json["type"], "rate"); - assert_eq!(rate_json["interval"], 10); - // Rate value scaled by interval seconds: 30 / 10 = 3. - let rate_points = rate_json["points"].as_array().expect("rate points is array"); - assert_eq!(rate_points[0][1], 3.0); - - let gauge = Metric::gauge("my.gauge", 42.0); - let gauge_json = encode_one_v1(&gauge); - assert_eq!(gauge_json["type"], "gauge"); - - // Sets are encoded as gauges with the set cardinality as the value (consistent with V2). - let set = Metric::set("my.set", "alpha"); - let set_json = encode_one_v1(&set); - assert_eq!(set_json["type"], "gauge"); - let set_points = set_json["points"].as_array().expect("set points is array"); - assert_eq!(set_points[0][1], 1.0); + let expected_unit_ref = [ + 0xd2, 0x01, // field 26, length-delimited. + 0x02, // packed field payload length. + 0x02, 0x00, // sparse unit refs for metrics 1 and 3 only: refs [1, 1] -> deltas [1, 0]. + ]; + assert!( + payload + .windows(expected_unit_ref.len()) + .any(|window| window == expected_unit_ref), + "V3 payload should contain UnitRef field for 'millisecond', got bytes: {:?}", + payload + ); } #[test] - fn series_v1_unit_and_hostname_emitted() { - let context = Context::from_static_parts("my.timer.avg", &[]); - let metadata = MetricMetadata::default() - .with_unit(MetaString::from_static("millisecond")) - .with_hostname(Some(Arc::from("host-1"))); - let gauge = Metric::from_parts(context, MetricValues::gauge([1.0_f64]), metadata); + fn v3_sketch_metric_unit_not_encoded() { + let context = Context::from_static_parts("my.histogram", &[]); + let metadata = MetricMetadata::default().with_unit(MetaString::from_static("millisecond")); + let histogram = Metric::from_parts(context, MetricValues::histogram([1.0_f64]), metadata); - let json = encode_one_v1(&gauge); - assert_eq!(json["unit"], "millisecond"); - assert_eq!(json["host"], "host-1"); - } + let payload = encode_v3_metrics_batch(&[histogram], &SharedTagSet::default()) + .expect("V3 sketch metric should encode successfully"); - #[test] - fn series_v1_device_tag_extraction() { - // A `device:` tag is extracted into the `device` JSON field and dropped from `tags`. - let context = Context::from_static_parts("my.metric", &["device:eth0", "env:prod"]); - let counter = Metric::from_parts(context, MetricValues::counter([1.0_f64]), MetricMetadata::default()); - - let json = encode_one_v1(&counter); - assert_eq!(json["device"], "eth0"); - let tags = json["tags"].as_array().expect("tags is array"); - let tag_strs: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect(); assert!( - !tag_strs.iter().any(|t| t.starts_with("device:")), - "device tag must be removed: {:?}", - tag_strs + !payload + .windows(b"millisecond".len()) + .any(|window| window == b"millisecond"), + "V3 sketch payload should not contain unit bytes, matching the Agent V3 sketch builder: {:?}", + payload ); - assert!(tag_strs.contains(&"env:prod")); } #[test] - fn series_v1_source_type_name_from_source_type_origin() { - let context = Context::from_static_parts("my.metric", &[]); - let metadata = MetricMetadata::default().with_source_type(Some(Arc::from("integration_x"))); - let counter = Metric::from_parts(context, MetricValues::counter([1.0_f64]), metadata); - - let json = encode_one_v1(&counter); - assert_eq!(json["source_type_name"], "integration_x"); + fn v3_series_promotes_device_and_internal_resource_tags_to_resources() { + let context = Context::from_static_parts( + "series.resources", + &[ + "env:prod", + "device:switch1", + "dd.internal.resource:pod:pod-a", + "dd.internal.resource:malformed", + ], + ); + let metadata = MetricMetadata::default().with_hostname(Some(Arc::from("host-a"))); + let metric = Metric::from_parts(context, MetricValues::gauge([1.0_f64]), metadata); + + let payload = + encode_v3_metrics_batch(&[metric], &SharedTagSet::default()).expect("V3 series should encode successfully"); + + assert_contains_bytes(&payload, b"env:prod"); + assert!(!contains_bytes(&payload, b"device:switch1")); + assert!(!contains_bytes(&payload, b"dd.internal.resource:pod:pod-a")); + assert!(!contains_bytes(&payload, b"dd.internal.resource:malformed")); + + let expected_resource_dict = [ + 0x22, // field 4, length-delimited. + 0x25, // field payload length. + 0x04, b'h', b'o', b's', b't', 0x06, b'h', b'o', b's', b't', b'-', b'a', 0x06, b'd', b'e', b'v', b'i', b'c', + b'e', 0x07, b's', b'w', b'i', b't', b'c', b'h', b'1', 0x03, b'p', b'o', b'd', 0x05, b'p', b'o', b'd', b'-', + b'a', + ]; + assert_contains_bytes(&payload, &expected_resource_dict); } #[test] - fn series_v1_origin_metadata_dropped() { - // OriginMetadata is V2-protobuf only; V1 must drop it. - let context = Context::from_static_parts("my.metric", &[]); - let metadata = MetricMetadata::default().with_origin(Some(MetricOrigin::dogstatsd())); - let counter = Metric::from_parts(context, MetricValues::counter([1.0_f64]), metadata); - - let json = encode_one_v1(&counter); - assert!(json.get("source_type_name").is_none()); + fn v3_series_promotes_additional_and_origin_resource_tags_without_empty_host() { + let context = Context::from_static_parts("series.additional_origin_resources", &["env:prod"]) + .with_origin_tags(tag_set(["dd.internal.resource:pod:pod-origin"])); + let additional_tags = SharedTagSet::from(tag_set([ + "team:core", + "device:switch1", + "dd.internal.resource:container:container-a", + ])); + let metadata = MetricMetadata::default().with_hostname(Some(Arc::from(""))); + let metric = Metric::from_parts(context, MetricValues::gauge([1.0_f64]), metadata); + + let payload = + encode_v3_metrics_batch(&[metric], &additional_tags).expect("V3 series should encode successfully"); + + assert_contains_bytes(&payload, b"env:prod"); + assert_contains_bytes(&payload, b"team:core"); + assert!(!contains_bytes(&payload, b"device:switch1")); + assert!(!contains_bytes(&payload, b"dd.internal.resource:container:container-a")); + assert!(!contains_bytes(&payload, b"dd.internal.resource:pod:pod-origin")); + + let expected_resource_dict = [ + 0x22, // field 4, length-delimited. + 0x34, // field payload length. + 0x06, b'd', b'e', b'v', b'i', b'c', b'e', 0x07, b's', b'w', b'i', b't', b'c', b'h', b'1', 0x03, b'p', b'o', + b'd', 0x0a, b'p', b'o', b'd', b'-', b'o', b'r', b'i', b'g', b'i', b'n', 0x09, b'c', b'o', b'n', b't', b'a', + b'i', b'n', b'e', b'r', 0x0b, b'c', b'o', b'n', b't', b'a', b'i', b'n', b'e', b'r', b'-', b'a', + ]; + assert_contains_bytes(&payload, &expected_resource_dict); + assert!(!contains_bytes(&payload, b"host")); } #[test] - fn series_v1_dd_internal_resource_dropped() { - // `dd.internal.resource` is V2-protobuf-only; V1 must drop these tags silently. - let context = Context::from_static_parts("my.metric", &["dd.internal.resource:host:foo", "env:prod"]); - let counter = Metric::from_parts(context, MetricValues::counter([1.0_f64]), MetricMetadata::default()); - - let json = encode_one_v1(&counter); - let tags = json["tags"].as_array().expect("tags is array"); - let tag_strs: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect(); - assert!( - !tag_strs.iter().any(|t| t.starts_with("dd.internal.resource:")), - "dd.internal.resource tag must be dropped: {:?}", - tag_strs + fn v3_sketch_keeps_device_and_internal_resource_tags_as_tags() { + let context = Context::from_static_parts( + "sketch.resources", + &["env:prod", "device:switch1", "dd.internal.resource:pod:pod-a"], ); - assert!(tag_strs.contains(&"env:prod")); + let metadata = MetricMetadata::default().with_hostname(Some(Arc::from("host-a"))); + let metric = Metric::from_parts(context, MetricValues::histogram([1.0_f64]), metadata); + + let payload = + encode_v3_metrics_batch(&[metric], &SharedTagSet::default()).expect("V3 sketch should encode successfully"); + + assert_contains_bytes(&payload, b"env:prod"); + assert_contains_bytes(&payload, b"device:switch1"); + assert_contains_bytes(&payload, b"dd.internal.resource:pod:pod-a"); + + let expected_resource_dict = [ + 0x22, // field 4, length-delimited. + 0x0c, // field payload length. + 0x04, b'h', b'o', b's', b't', 0x06, b'h', b'o', b's', b't', b'-', b'a', + ]; + assert_contains_bytes(&payload, &expected_resource_dict); } - #[test] - fn series_v1_endpoint_routing() { - // SeriesV1 advertises the V1 URI, JSON content type, and the {"series":[...]} framing. - let encoder = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV1); - assert_eq!(encoder.endpoint_uri().path(), "/api/v1/series"); - assert_eq!(encoder.content_type(), "application/json"); - assert_eq!(encoder.get_payload_prefix(), Some(SERIES_V1_PAYLOAD_PREFIX)); - assert_eq!(encoder.get_payload_suffix(), Some(SERIES_V1_PAYLOAD_SUFFIX)); - assert_eq!(encoder.get_input_separator(), Some(SERIES_V1_INPUT_SEPARATOR)); - assert_eq!( - encoder.compressed_size_limit(), - DEFAULT_SERIALIZER_COMPRESSED_SIZE_LIMIT - ); - assert_eq!( - encoder.uncompressed_size_limit(), - DEFAULT_SERIALIZER_UNCOMPRESSED_SIZE_LIMIT + #[tokio::test] + async fn validation_split_flush_assigns_batch_id_to_carried_metric() { + let v2_endpoint_config = EndpointConfiguration::new(CompressionScheme::noop(), 1, None); + let v2_series_builder = Some( + v2::create_v2_request_builder(MetricsEndpoint::SeriesV2, &v2_endpoint_config) + .await + .expect("V2 request builder should be created"), ); + let v3_endpoint_config = EndpointConfiguration::new(CompressionScheme::noop(), 10_000, None); + let telemetry = ComponentTelemetry::from_builder(&MetricsBuilder::default()); + let (events_tx, events_rx) = tokio::sync::mpsc::channel(1); + let (payloads_tx, mut payloads_rx) = tokio::sync::mpsc::channel(8); + + let request_builder_handle = tokio::spawn(run_request_builder( + v2_series_builder, + None, + MetricsEncoderMode::Validation, + MetricsEncoderMode::V2Only, + v3_endpoint_config, + V3PayloadLimits::new(usize::MAX, usize::MAX, 10_000, 10_000), + V3_SERIES_ENDPOINT_URI.to_string(), + telemetry, + events_rx, + payloads_tx, + Duration::from_millis(10), + )); + + let mut events = EventsBuffer::default(); + assert!(events + .try_push(Event::Metric(Metric::counter("validation.split.one", 1.0))) + .is_none()); + assert!(events + .try_push(Event::Metric(Metric::counter("validation.split.two", 2.0))) + .is_none()); + events_tx + .send(events) + .await + .expect("events should be sent to request builder"); + + let mut flushed_requests = Vec::new(); + for _ in 0..4 { + let payload = timeout(Duration::from_secs(1), payloads_rx.recv()) + .await + .expect("payload should arrive before timeout") + .expect("payload channel should remain open"); + let Payload::Http(http_payload) = payload else { + panic!("expected HTTP payload"); + }; + let (_, request) = http_payload.into_parts(); + let batch_id = request + .headers() + .get("X-Metrics-Request-ID") + .expect("validation batch ID header should be present") + .to_str() + .expect("validation batch ID should be valid header text") + .to_string(); + flushed_requests.push((request.uri().to_string(), batch_id)); + } - // Sketches use the generic serializer payload limits in the Datadog Agent. - let sketches = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::Sketches); - assert_eq!( - sketches.compressed_size_limit(), - DEFAULT_SERIALIZER_COMPRESSED_SIZE_LIMIT - ); - assert_eq!( - sketches.uncompressed_size_limit(), - DEFAULT_SERIALIZER_UNCOMPRESSED_SIZE_LIMIT + assert_eq!("/api/v2/series", flushed_requests[0].0); + assert_eq!(V3_SERIES_ENDPOINT_URI, flushed_requests[1].0); + assert_eq!("/api/v2/series", flushed_requests[2].0); + assert_eq!(V3_SERIES_ENDPOINT_URI, flushed_requests[3].0); + + assert_eq!(flushed_requests[0].1, flushed_requests[1].1); + assert_eq!(flushed_requests[2].1, flushed_requests[3].1); + assert_ne!(flushed_requests[0].1, flushed_requests[2].1); + + drop(events_tx); + request_builder_handle + .await + .expect("request builder task should complete") + .expect("request builder should stop cleanly"); + } + + fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool { + haystack.windows(needle.len()).any(|window| window == needle) + } + + fn assert_contains_bytes(haystack: &[u8], needle: &[u8]) { + assert!( + contains_bytes(haystack, needle), + "expected payload to contain bytes {:?}, got {:?}", + needle, + haystack ); + } - // V2 series stays on protobuf with no framing. - let v2 = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV2); - assert_eq!(v2.endpoint_uri().path(), "/api/v2/series"); - assert_eq!(v2.content_type(), "application/x-protobuf"); - assert!(v2.get_payload_prefix().is_none()); + fn tag_set(tags: [&'static str; N]) -> TagSet { + tags.into_iter().map(Tag::from_static).collect() } } @@ -1592,10 +1857,18 @@ mod config_smoke { #[tokio::test] async fn smoke_test() { - run_config_smoke_tests(structs::DATADOG_METRICS_CONFIGURATION, &[], json!({}), |cfg| { - cfg.as_typed::() - .expect("DatadogMetricsConfiguration should deserialize") - }) + run_config_smoke_tests( + structs::DATADOG_METRICS_CONFIGURATION, + &[ + "serializer_experimental_use_v3_api.sketches.beta_route", + "serializer_experimental_use_v3_api.sketches.use_beta", + ], + json!({}), + |cfg| { + cfg.as_typed::() + .expect("DatadogMetricsConfiguration should deserialize") + }, + ) .await } } @@ -1605,12 +1878,10 @@ mod use_v2_api_series_default { use saluki_config::ConfigurationLoader; use serde_json::json; - use super::{DatadogMetricsConfiguration, SERIES_V2_COMPRESSED_SIZE_LIMIT, SERIES_V2_UNCOMPRESSED_SIZE_LIMIT}; + use super::{v2, DatadogMetricsConfiguration}; use crate::{common::datadog::clamp_payload_limits, config::KEY_ALIASES}; - /// `use_v2_api_series` defaults to `true` (preserves V2 protobuf behavior when the flag is absent). - /// The nested-form (`use_v2_api.series`) and env-var (`DD_USE_V2_API_SERIES`) paths to the flat key - /// are exercised end-to-end by the `config_smoke::smoke_test` runner via `KEY_ALIASES`. + /// `use_v2_api_series` defaults to `true`, preserving V2 protobuf behavior when the flag is absent. #[tokio::test] async fn defaults_to_true_when_absent() { let cfg = ConfigurationLoader::default() @@ -1647,19 +1918,19 @@ mod use_v2_api_series_default { #[test] fn clamps_series_payload_limit_keys_to_api_limits() { let (uncompressed_limit, compressed_limit) = clamp_payload_limits( - SERIES_V2_UNCOMPRESSED_SIZE_LIMIT + 1, - SERIES_V2_COMPRESSED_SIZE_LIMIT + 1, - SERIES_V2_UNCOMPRESSED_SIZE_LIMIT, - SERIES_V2_COMPRESSED_SIZE_LIMIT, + v2::SERIES_V2_UNCOMPRESSED_SIZE_LIMIT + 1, + v2::SERIES_V2_COMPRESSED_SIZE_LIMIT + 1, + v2::SERIES_V2_UNCOMPRESSED_SIZE_LIMIT, + v2::SERIES_V2_COMPRESSED_SIZE_LIMIT, ); - assert_eq!(uncompressed_limit, SERIES_V2_UNCOMPRESSED_SIZE_LIMIT); - assert_eq!(compressed_limit, SERIES_V2_COMPRESSED_SIZE_LIMIT); + assert_eq!(uncompressed_limit, v2::SERIES_V2_UNCOMPRESSED_SIZE_LIMIT); + assert_eq!(compressed_limit, v2::SERIES_V2_COMPRESSED_SIZE_LIMIT); let (uncompressed_limit, compressed_limit) = clamp_payload_limits( 5678, 1234, - SERIES_V2_UNCOMPRESSED_SIZE_LIMIT, - SERIES_V2_COMPRESSED_SIZE_LIMIT, + v2::SERIES_V2_UNCOMPRESSED_SIZE_LIMIT, + v2::SERIES_V2_COMPRESSED_SIZE_LIMIT, ); assert_eq!(uncompressed_limit, 5678); assert_eq!(compressed_limit, 1234); diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v1/mod.rs b/lib/saluki-components/src/encoders/datadog/metrics/v1/mod.rs new file mode 100644 index 00000000000..e345c9c66e8 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v1/mod.rs @@ -0,0 +1,235 @@ +use http::HeaderValue; +use saluki_common::iter::ReusableDeduplicator; +use saluki_context::tags::{SharedTagSet, Tag}; +use saluki_core::data_model::event::metric::{Metric, MetricOrigin, MetricValues}; +use serde_json::{Map as JsonMap, Number as JsonNumber, Value as JsonValue}; + +pub(super) const SERIES_COMPRESSED_SIZE_LIMIT: usize = 2_000_000; // ~2 MiB +pub(super) const SERIES_UNCOMPRESSED_SIZE_LIMIT: usize = 4_000_000; // ~4 MiB + +pub(super) static CONTENT_TYPE: HeaderValue = HeaderValue::from_static("application/json"); + +// JSON framing for the V1 series payload, which wraps the array of `Serie` objects in a top-level object. +pub(super) const SERIES_PAYLOAD_PREFIX: &[u8] = b"{\"series\":["; +pub(super) const SERIES_PAYLOAD_SUFFIX: &[u8] = b"]}"; +pub(super) const SERIES_INPUT_SEPARATOR: &[u8] = b","; + +pub(super) fn encode_series_metric( + metric: &Metric, additional_tags: &SharedTagSet, buffer: &mut Vec, + tags_deduplicator: &mut ReusableDeduplicator, +) -> Result<(), serde_json::Error> { + let mut obj = JsonMap::new(); + + obj.insert("metric".into(), JsonValue::String(metric.context().name().to_string())); + + let (type_str, points_iter, maybe_interval) = match metric.values() { + MetricValues::Counter(points) => ("count", points.into_iter(), None), + MetricValues::Rate(points, interval) => ("rate", points.into_iter(), Some(*interval)), + MetricValues::Gauge(points) => ("gauge", points.into_iter(), None), + MetricValues::Set(points) => ("gauge", points.into_iter(), None), + _ => unreachable!("encode_series_metric called with non-series metric"), + }; + + let mut points = Vec::new(); + for (timestamp, value) in points_iter { + let value = maybe_interval + .map(|interval| value / interval.as_secs_f64()) + .unwrap_or(value); + let timestamp = timestamp.map(|ts| ts.get()).unwrap_or(0) as i64; + let value_json = JsonNumber::from_f64(value) + .map(JsonValue::Number) + .unwrap_or_else(|| JsonValue::from(0)); + points.push(JsonValue::Array(vec![JsonValue::from(timestamp), value_json])); + } + obj.insert("points".into(), JsonValue::Array(points)); + + let deduplicated = get_deduplicated_tags(metric, additional_tags, tags_deduplicator); + let mut tags_out = Vec::new(); + let mut device: Option = None; + for tag in deduplicated { + if tag.name() == "dd.internal.resource" { + continue; + } + if device.is_none() && tag.name() == "device" { + if let Some(v) = tag.value() { + device = Some(v.to_string()); + continue; + } + } + tags_out.push(JsonValue::String(tag.as_str().to_string())); + } + obj.insert("tags".into(), JsonValue::Array(tags_out)); + + obj.insert( + "host".into(), + JsonValue::String(metric.metadata().hostname().unwrap_or_default().to_string()), + ); + + if let Some(device) = device.filter(|device| !device.is_empty()) { + obj.insert("device".into(), JsonValue::String(device)); + } + + obj.insert("type".into(), JsonValue::String(type_str.into())); + + let interval_secs = maybe_interval.map(|interval| interval.as_secs() as i64).unwrap_or(0); + obj.insert("interval".into(), JsonValue::from(interval_secs)); + + if let Some(MetricOrigin::SourceType(source_type)) = metric.metadata().origin() { + obj.insert( + "source_type_name".into(), + JsonValue::String(source_type.as_ref().to_string()), + ); + } + + if let Some(unit) = metric.metadata().unit() { + if !unit.is_empty() { + obj.insert("unit".into(), JsonValue::String(unit.to_string())); + } + } + + serde_json::to_writer(buffer, &JsonValue::Object(obj)) +} + +fn get_deduplicated_tags<'a>( + metric: &'a Metric, additional_tags: &'a SharedTagSet, tags_deduplicator: &'a mut ReusableDeduplicator, +) -> impl Iterator { + let chained_tags = metric + .context() + .tags() + .into_iter() + .chain(additional_tags) + .chain(metric.context().origin_tags()); + + tags_deduplicator.deduplicated(chained_tags) +} + +#[cfg(test)] +mod tests { + use std::{sync::Arc, time::Duration}; + + use saluki_common::iter::ReusableDeduplicator; + use saluki_context::{tags::SharedTagSet, Context}; + use saluki_core::data_model::event::metric::{Metric, MetricMetadata, MetricOrigin, MetricValues}; + use serde_json::Value as JsonValue; + use stringtheory::MetaString; + + use super::encode_series_metric; + + fn encode_one(metric: &Metric) -> JsonValue { + let mut buf = Vec::new(); + let host_tags = SharedTagSet::default(); + let mut tags_deduplicator = ReusableDeduplicator::new(); + encode_series_metric(metric, &host_tags, &mut buf, &mut tags_deduplicator) + .expect("encode_series_metric should succeed"); + serde_json::from_slice(&buf).expect("encoder produced invalid JSON") + } + + #[test] + fn basic_payload_shape() { + // Each metric variant maps to the right `type` string, points are emitted as [ts, value] tuples, + // and `interval`/`host` are always present (zero/empty when not set). + let counter = Metric::counter("my.count", 5.0); + let counter_json = encode_one(&counter); + assert_eq!(counter_json["metric"], "my.count"); + assert_eq!(counter_json["type"], "count"); + assert_eq!(counter_json["interval"], 0); + assert_eq!(counter_json["host"], ""); + assert_eq!(counter_json["tags"], JsonValue::Array(vec![])); + let points = counter_json["points"].as_array().expect("points is array"); + assert_eq!(points.len(), 1); + assert_eq!(points[0][0], 0); + assert_eq!(points[0][1], 5.0); + // Optional fields must be absent when not set. + assert!(counter_json.get("unit").is_none()); + assert!(counter_json.get("source_type_name").is_none()); + assert!(counter_json.get("device").is_none()); + + let rate = Metric::rate("my.rate", 30.0, Duration::from_secs(10)); + let rate_json = encode_one(&rate); + assert_eq!(rate_json["type"], "rate"); + assert_eq!(rate_json["interval"], 10); + // Rate value scaled by interval seconds: 30 / 10 = 3. + let rate_points = rate_json["points"].as_array().expect("rate points is array"); + assert_eq!(rate_points[0][1], 3.0); + + let gauge = Metric::gauge("my.gauge", 42.0); + let gauge_json = encode_one(&gauge); + assert_eq!(gauge_json["type"], "gauge"); + + // Sets are encoded as gauges with the set cardinality as the value, consistent with V2. + let set = Metric::set("my.set", "alpha"); + let set_json = encode_one(&set); + assert_eq!(set_json["type"], "gauge"); + let set_points = set_json["points"].as_array().expect("set points is array"); + assert_eq!(set_points[0][1], 1.0); + } + + #[test] + fn unit_and_hostname_emitted() { + let context = Context::from_static_parts("my.timer.avg", &[]); + let metadata = MetricMetadata::default() + .with_unit(MetaString::from_static("millisecond")) + .with_hostname(Some(Arc::from("host-1"))); + let gauge = Metric::from_parts(context, MetricValues::gauge([1.0_f64]), metadata); + + let json = encode_one(&gauge); + assert_eq!(json["unit"], "millisecond"); + assert_eq!(json["host"], "host-1"); + } + + #[test] + fn device_tag_extraction() { + // A `device:` tag is extracted into the `device` JSON field and dropped from `tags`. + let context = Context::from_static_parts("my.metric", &["device:eth0", "env:prod"]); + let counter = Metric::from_parts(context, MetricValues::counter([1.0_f64]), MetricMetadata::default()); + + let json = encode_one(&counter); + assert_eq!(json["device"], "eth0"); + let tags = json["tags"].as_array().expect("tags is array"); + let tag_strs: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect(); + assert!( + !tag_strs.iter().any(|t| t.starts_with("device:")), + "device tag must be removed: {:?}", + tag_strs + ); + assert!(tag_strs.contains(&"env:prod")); + } + + #[test] + fn source_type_name_from_source_type_origin() { + let context = Context::from_static_parts("my.metric", &[]); + let metadata = MetricMetadata::default().with_source_type(Some(Arc::from("integration_x"))); + let counter = Metric::from_parts(context, MetricValues::counter([1.0_f64]), metadata); + + let json = encode_one(&counter); + assert_eq!(json["source_type_name"], "integration_x"); + } + + #[test] + fn origin_metadata_dropped() { + // OriginMetadata is V2-protobuf only; V1 must drop it. + let context = Context::from_static_parts("my.metric", &[]); + let metadata = MetricMetadata::default().with_origin(Some(MetricOrigin::dogstatsd())); + let counter = Metric::from_parts(context, MetricValues::counter([1.0_f64]), metadata); + + let json = encode_one(&counter); + assert!(json.get("source_type_name").is_none()); + } + + #[test] + fn dd_internal_resource_dropped() { + // `dd.internal.resource` is V2-protobuf-only; V1 must drop these tags silently. + let context = Context::from_static_parts("my.metric", &["dd.internal.resource:host:foo", "env:prod"]); + let counter = Metric::from_parts(context, MetricValues::counter([1.0_f64]), MetricMetadata::default()); + + let json = encode_one(&counter); + let tags = json["tags"].as_array().expect("tags is array"); + let tag_strs: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect(); + assert!( + !tag_strs.iter().any(|t| t.starts_with("dd.internal.resource:")), + "dd.internal.resource tag must be dropped: {:?}", + tag_strs + ); + assert!(tag_strs.contains(&"env:prod")); + } +} diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v2/constants.rs b/lib/saluki-components/src/encoders/datadog/metrics/v2/constants.rs new file mode 100644 index 00000000000..4b9bdc85570 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v2/constants.rs @@ -0,0 +1,42 @@ +pub const SERIES_V2_COMPRESSED_SIZE_LIMIT: usize = 512_000; // 500 KiB +pub const SERIES_V2_UNCOMPRESSED_SIZE_LIMIT: usize = 5_242_880; // 5 MiB + +// Protocol Buffers field numbers for series and sketch payload messages in the V2 format. +// +// These field numbers come from the Protocol Buffers definitions in `lib/protos/datadog/proto/agent-payload/agent_payload.proto`. +pub const RESOURCES_TYPE_FIELD_NUMBER: u32 = 1; +pub const RESOURCES_NAME_FIELD_NUMBER: u32 = 2; + +pub const METADATA_ORIGIN_FIELD_NUMBER: u32 = 1; + +pub const ORIGIN_ORIGIN_PRODUCT_FIELD_NUMBER: u32 = 4; +pub const ORIGIN_ORIGIN_CATEGORY_FIELD_NUMBER: u32 = 5; +pub const ORIGIN_ORIGIN_SERVICE_FIELD_NUMBER: u32 = 6; + +pub const METRIC_POINT_VALUE_FIELD_NUMBER: u32 = 1; +pub const METRIC_POINT_TIMESTAMP_FIELD_NUMBER: u32 = 2; + +pub const DOGSKETCH_TS_FIELD_NUMBER: u32 = 1; +pub const DOGSKETCH_CNT_FIELD_NUMBER: u32 = 2; +pub const DOGSKETCH_MIN_FIELD_NUMBER: u32 = 3; +pub const DOGSKETCH_MAX_FIELD_NUMBER: u32 = 4; +pub const DOGSKETCH_AVG_FIELD_NUMBER: u32 = 5; +pub const DOGSKETCH_SUM_FIELD_NUMBER: u32 = 6; +pub const DOGSKETCH_K_FIELD_NUMBER: u32 = 7; +pub const DOGSKETCH_N_FIELD_NUMBER: u32 = 8; + +pub const SERIES_RESOURCES_FIELD_NUMBER: u32 = 1; +pub const SERIES_METRIC_FIELD_NUMBER: u32 = 2; +pub const SERIES_TAGS_FIELD_NUMBER: u32 = 3; +pub const SERIES_POINTS_FIELD_NUMBER: u32 = 4; +pub const SERIES_TYPE_FIELD_NUMBER: u32 = 5; +pub const SERIES_UNIT_FIELD_NUMBER: u32 = 6; +pub const SERIES_SOURCE_TYPE_NAME_FIELD_NUMBER: u32 = 7; +pub const SERIES_INTERVAL_FIELD_NUMBER: u32 = 8; +pub const SERIES_METADATA_FIELD_NUMBER: u32 = 9; + +pub const SKETCH_METRIC_FIELD_NUMBER: u32 = 1; +pub const SKETCH_HOST_FIELD_NUMBER: u32 = 2; +pub const SKETCH_TAGS_FIELD_NUMBER: u32 = 4; +pub const SKETCH_DOGSKETCHES_FIELD_NUMBER: u32 = 7; +pub const SKETCH_METADATA_FIELD_NUMBER: u32 = 8; diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v2/mod.rs b/lib/saluki-components/src/encoders/datadog/metrics/v2/mod.rs new file mode 100644 index 00000000000..f50b4fc5ed4 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v2/mod.rs @@ -0,0 +1,793 @@ +use std::{fmt, num::NonZeroU64}; + +use datadog_protos::metrics as proto; +use ddsketch::DDSketch; +use http::{uri::PathAndQuery, HeaderValue, Method, Uri}; +use protobuf::{rt::WireType, CodedOutputStream, Enum}; +use saluki_common::iter::ReusableDeduplicator; +use saluki_context::tags::{SharedTagSet, Tag}; +use saluki_core::data_model::event::metric::{Metric, MetricOrigin, MetricValues}; +use saluki_error::GenericError; +use tracing::warn; + +use super::{ + endpoint::{EndpointConfiguration, MetricsEndpoint}, + v1, +}; +use crate::common::datadog::{ + io::RB_BUFFER_CHUNK_SIZE, + request_builder::{EndpointEncoder, RequestBuilder}, + DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, METRICS_SERIES_V1_PATH, + METRICS_SERIES_V2_PATH, METRICS_SKETCHES_PATH, +}; + +mod constants; +pub(super) use constants::{SERIES_V2_COMPRESSED_SIZE_LIMIT, SERIES_V2_UNCOMPRESSED_SIZE_LIMIT}; + +/// Creates a V2 request builder for the given endpoint. +/// +/// # Errors +/// +/// If the request builder cannot be created, an error is returned. +pub async fn create_v2_request_builder( + endpoint: MetricsEndpoint, endpoint_config: &EndpointConfiguration, +) -> Result, GenericError> { + let encoder = + MetricsEndpointEncoder::from_endpoint(endpoint).with_additional_tags(endpoint_config.additional_tags().clone()); + + let mut request_builder = + RequestBuilder::new(encoder, endpoint_config.compression_scheme(), RB_BUFFER_CHUNK_SIZE).await?; + request_builder.with_max_inputs_per_payload(endpoint_config.max_metrics_per_payload()); + + Ok(request_builder) +} + +/// An encoder for V2 metrics. +/// +/// This also handles the legacy V1 JSON series endpoint when `use_v2_api_series` is disabled. +#[derive(Debug)] +pub struct MetricsEndpointEncoder { + endpoint: MetricsEndpoint, + primary_scratch_buf: Vec, + secondary_scratch_buf: Vec, + packed_scratch_buf: Vec, + additional_tags: SharedTagSet, + tags_deduplicator: ReusableDeduplicator, +} + +impl MetricsEndpointEncoder { + /// Creates a new `MetricsEndpointEncoder` for the given endpoint. + pub fn from_endpoint(endpoint: MetricsEndpoint) -> Self { + Self { + endpoint, + primary_scratch_buf: Vec::new(), + secondary_scratch_buf: Vec::new(), + packed_scratch_buf: Vec::new(), + additional_tags: SharedTagSet::default(), + tags_deduplicator: ReusableDeduplicator::new(), + } + } + + /// Sets the additional tags to be included with every metric encoded by this encoder. + /// + /// These tags are added in a deduplicated fashion, the same as instrumented tags and origin tags. This is an + /// optimized codepath for tag inclusion in high-volume scenarios, where creating new additional contexts + /// through the traditional means (for example, `ContextResolver`) would be too expensive. + pub fn with_additional_tags(mut self, additional_tags: SharedTagSet) -> Self { + self.additional_tags = additional_tags; + self + } +} + +/// Error returned when a metric fails to encode for either the V1 JSON or V2 protobuf intake. +#[derive(Debug)] +pub enum MetricsEncodeError { + /// Protobuf encoding failed. + Protobuf(protobuf::Error), + + /// JSON encoding failed. + Json(serde_json::Error), +} + +impl fmt::Display for MetricsEncodeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Protobuf(e) => write!(f, "protobuf encode error: {}", e), + Self::Json(e) => write!(f, "json encode error: {}", e), + } + } +} + +impl std::error::Error for MetricsEncodeError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Protobuf(e) => Some(e), + Self::Json(e) => Some(e), + } + } +} + +impl From for MetricsEncodeError { + fn from(value: protobuf::Error) -> Self { + Self::Protobuf(value) + } +} + +impl From for MetricsEncodeError { + fn from(value: serde_json::Error) -> Self { + Self::Json(value) + } +} + +impl EndpointEncoder for MetricsEndpointEncoder { + type Input = Metric; + type EncodeError = MetricsEncodeError; + + fn encoder_name() -> &'static str { + "metrics" + } + + fn compressed_size_limit(&self) -> usize { + match self.endpoint { + MetricsEndpoint::SeriesV1 => v1::SERIES_COMPRESSED_SIZE_LIMIT, + MetricsEndpoint::SeriesV2 => constants::SERIES_V2_COMPRESSED_SIZE_LIMIT, + MetricsEndpoint::Sketches => DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, + } + } + + fn uncompressed_size_limit(&self) -> usize { + match self.endpoint { + MetricsEndpoint::SeriesV1 => v1::SERIES_UNCOMPRESSED_SIZE_LIMIT, + MetricsEndpoint::SeriesV2 => constants::SERIES_V2_UNCOMPRESSED_SIZE_LIMIT, + MetricsEndpoint::Sketches => DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, + } + } + + fn input_data_point_count(&self, input: &Self::Input) -> usize { + input.values().len() + } + + fn is_valid_input(&self, input: &Self::Input) -> bool { + let is_series_input = matches!( + input.values(), + MetricValues::Counter(..) | MetricValues::Rate(..) | MetricValues::Gauge(..) | MetricValues::Set(..) + ); + + match self.endpoint { + MetricsEndpoint::SeriesV1 | MetricsEndpoint::SeriesV2 => is_series_input, + MetricsEndpoint::Sketches => !is_series_input, + } + } + + fn get_payload_prefix(&self) -> Option<&'static [u8]> { + match self.endpoint { + MetricsEndpoint::SeriesV1 => Some(v1::SERIES_PAYLOAD_PREFIX), + _ => None, + } + } + + fn get_payload_suffix(&self) -> Option<&'static [u8]> { + match self.endpoint { + MetricsEndpoint::SeriesV1 => Some(v1::SERIES_PAYLOAD_SUFFIX), + _ => None, + } + } + + fn get_input_separator(&self) -> Option<&'static [u8]> { + match self.endpoint { + MetricsEndpoint::SeriesV1 => Some(v1::SERIES_INPUT_SEPARATOR), + _ => None, + } + } + + fn encode(&mut self, input: &Self::Input, buffer: &mut Vec) -> Result<(), Self::EncodeError> { + match self.endpoint { + MetricsEndpoint::SeriesV1 => { + v1::encode_series_metric(input, &self.additional_tags, buffer, &mut self.tags_deduplicator)?; + Ok(()) + } + MetricsEndpoint::SeriesV2 | MetricsEndpoint::Sketches => { + // NOTE: We're passing _four_ buffers to `encode_single_metric`, which is a lot, but with good reason. + encode_single_metric( + input, + &self.additional_tags, + buffer, + &mut self.primary_scratch_buf, + &mut self.secondary_scratch_buf, + &mut self.packed_scratch_buf, + &mut self.tags_deduplicator, + )?; + + Ok(()) + } + } + } + + fn endpoint_uri(&self) -> Uri { + match self.endpoint { + MetricsEndpoint::SeriesV1 => PathAndQuery::from_static(METRICS_SERIES_V1_PATH).into(), + MetricsEndpoint::SeriesV2 => PathAndQuery::from_static(METRICS_SERIES_V2_PATH).into(), + MetricsEndpoint::Sketches => PathAndQuery::from_static(METRICS_SKETCHES_PATH).into(), + } + } + + fn endpoint_method(&self) -> Method { + // All endpoints use POST. + Method::POST + } + + fn content_type(&self) -> HeaderValue { + match self.endpoint { + MetricsEndpoint::SeriesV1 => v1::CONTENT_TYPE.clone(), + MetricsEndpoint::SeriesV2 | MetricsEndpoint::Sketches => HeaderValue::from_static("application/x-protobuf"), + } + } +} + +fn field_number_for_metric_type(metric: &Metric) -> u32 { + match metric.values() { + MetricValues::Counter(..) | MetricValues::Rate(..) | MetricValues::Gauge(..) | MetricValues::Set(..) => 1, + MetricValues::Histogram(..) | MetricValues::Distribution(..) => 1, + } +} + +fn get_message_size(raw_msg_size: usize) -> Result { + const MAX_MESSAGE_SIZE: u64 = i32::MAX as u64; + + // Individual messages cannot be larger than `i32::MAX`, so check that here before proceeding. + if raw_msg_size as u64 > MAX_MESSAGE_SIZE { + return Err(std::io::Error::other("message size exceeds limit (2147483648 bytes)").into()); + } + + Ok(raw_msg_size as u32) +} + +fn get_message_size_from_buffer(buf: &[u8]) -> Result { + get_message_size(buf.len()) +} + +fn encode_single_metric( + metric: &Metric, additional_tags: &SharedTagSet, output_buf: &mut Vec, primary_scratch_buf: &mut Vec, + secondary_scratch_buf: &mut Vec, packed_scratch_buf: &mut Vec, + tags_deduplicator: &mut ReusableDeduplicator, +) -> Result<(), protobuf::Error> { + let mut output_stream = CodedOutputStream::vec(output_buf); + let field_number = field_number_for_metric_type(metric); + + write_nested_message(&mut output_stream, primary_scratch_buf, field_number, |os| { + // Depending on the metric type, we write out the appropriate fields. + match metric.values() { + MetricValues::Counter(..) | MetricValues::Rate(..) | MetricValues::Gauge(..) | MetricValues::Set(..) => { + encode_series_metric(metric, additional_tags, os, secondary_scratch_buf, tags_deduplicator) + } + MetricValues::Histogram(..) | MetricValues::Distribution(..) => encode_sketch_metric( + metric, + additional_tags, + os, + secondary_scratch_buf, + packed_scratch_buf, + tags_deduplicator, + ), + } + }) +} + +fn encode_series_metric( + metric: &Metric, additional_tags: &SharedTagSet, output_stream: &mut CodedOutputStream<'_>, + scratch_buf: &mut Vec, tags_deduplicator: &mut ReusableDeduplicator, +) -> Result<(), protobuf::Error> { + // Write the metric name and tags. + output_stream.write_string(constants::SERIES_METRIC_FIELD_NUMBER, metric.context().name())?; + + let deduplicated_tags = get_deduplicated_tags(metric, additional_tags, tags_deduplicator); + write_series_tags(deduplicated_tags, output_stream, scratch_buf)?; + + // Set the host resource. + write_resource( + output_stream, + scratch_buf, + "host", + metric.metadata().hostname().unwrap_or_default(), + )?; + + // Write the origin metadata, if it exists. + if let Some(origin) = metric.metadata().origin() { + match origin { + MetricOrigin::SourceType(source_type) => { + output_stream.write_string(constants::SERIES_SOURCE_TYPE_NAME_FIELD_NUMBER, source_type.as_ref())?; + } + MetricOrigin::OriginMetadata { + product, + subproduct, + product_detail, + } => { + write_origin_metadata( + output_stream, + scratch_buf, + constants::SERIES_METADATA_FIELD_NUMBER, + *product, + *subproduct, + *product_detail, + )?; + } + } + } + + // Now write out our metric type, points, and interval (if applicable). + let (metric_type, points, maybe_interval) = match metric.values() { + MetricValues::Counter(points) => (proto::MetricType::COUNT, points.into_iter(), None), + MetricValues::Rate(points, interval) => (proto::MetricType::RATE, points.into_iter(), Some(interval)), + MetricValues::Gauge(points) => (proto::MetricType::GAUGE, points.into_iter(), None), + MetricValues::Set(points) => (proto::MetricType::GAUGE, points.into_iter(), None), + _ => unreachable!(), + }; + + output_stream.write_enum(constants::SERIES_TYPE_FIELD_NUMBER, metric_type.value())?; + + if let Some(unit) = metric.metadata().unit() { + output_stream.write_string(constants::SERIES_UNIT_FIELD_NUMBER, unit)?; + } + + for (timestamp, value) in points { + // If this is a rate metric, scale our value by the interval, in seconds. + let value = maybe_interval + .map(|interval| value / interval.as_secs_f64()) + .unwrap_or(value); + let timestamp = timestamp.map(|ts| ts.get()).unwrap_or(0) as i64; + + write_point(output_stream, scratch_buf, value, timestamp)?; + } + + if let Some(interval) = maybe_interval { + output_stream.write_int64(constants::SERIES_INTERVAL_FIELD_NUMBER, interval.as_secs() as i64)?; + } + + Ok(()) +} + +fn encode_sketch_metric( + metric: &Metric, additional_tags: &SharedTagSet, output_stream: &mut CodedOutputStream<'_>, + scratch_buf: &mut Vec, packed_scratch_buf: &mut Vec, tags_deduplicator: &mut ReusableDeduplicator, +) -> Result<(), protobuf::Error> { + // Write the metric name and tags. + output_stream.write_string(constants::SKETCH_METRIC_FIELD_NUMBER, metric.context().name())?; + + let deduplicated_tags = get_deduplicated_tags(metric, additional_tags, tags_deduplicator); + write_sketch_tags(deduplicated_tags, output_stream, scratch_buf)?; + + // Write the host. + output_stream.write_string( + constants::SKETCH_HOST_FIELD_NUMBER, + metric.metadata().hostname().unwrap_or_default(), + )?; + + // Set the origin metadata, if it exists. + if let Some(MetricOrigin::OriginMetadata { + product, + subproduct, + product_detail, + }) = metric.metadata().origin() + { + write_origin_metadata( + output_stream, + scratch_buf, + constants::SKETCH_METADATA_FIELD_NUMBER, + *product, + *subproduct, + *product_detail, + )?; + } + + // Write out our sketches. + match metric.values() { + MetricValues::Distribution(sketches) => { + for (timestamp, value) in sketches { + write_dogsketch(output_stream, scratch_buf, packed_scratch_buf, timestamp, value)?; + } + } + MetricValues::Histogram(points) => { + for (timestamp, histogram) in points { + // We convert histograms to sketches to be able to write them out in the payload. + let mut ddsketch = DDSketch::default(); + for sample in histogram.samples() { + ddsketch.insert_n(sample.value.into_inner(), sample.weight.0 as u64); + } + + write_dogsketch(output_stream, scratch_buf, packed_scratch_buf, timestamp, &ddsketch)?; + } + } + _ => unreachable!(), + } + + Ok(()) +} + +fn write_resource( + output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, resource_type: &str, resource_name: &str, +) -> Result<(), protobuf::Error> { + write_nested_message( + output_stream, + scratch_buf, + constants::SERIES_RESOURCES_FIELD_NUMBER, + |os| { + os.write_string(constants::RESOURCES_TYPE_FIELD_NUMBER, resource_type)?; + os.write_string(constants::RESOURCES_NAME_FIELD_NUMBER, resource_name) + }, + ) +} + +fn write_origin_metadata( + output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, field_number: u32, origin_product: u32, + origin_category: u32, origin_service: u32, +) -> Result<(), protobuf::Error> { + // TODO: Figure out how to cleanly use `write_nested_message` here. + + scratch_buf.clear(); + + { + let mut origin_output_stream = CodedOutputStream::vec(scratch_buf); + origin_output_stream.write_uint32(constants::ORIGIN_ORIGIN_PRODUCT_FIELD_NUMBER, origin_product)?; + origin_output_stream.write_uint32(constants::ORIGIN_ORIGIN_CATEGORY_FIELD_NUMBER, origin_category)?; + origin_output_stream.write_uint32(constants::ORIGIN_ORIGIN_SERVICE_FIELD_NUMBER, origin_service)?; + origin_output_stream.flush()?; + } + + // We do a little song and dance here because the `Origin` message is embedded inside of `Metadata`, so we need to + // write out field numbers/length delimiters in order: `Metadata`, and then `Origin`... but we write out origin + // message to the scratch buffer first... so we write out our `Metadata` preamble stuff to get its length, and then + // use that in conjunction with the `Origin` message size to write out the full `Metadata` message. + let origin_message_size = get_message_size_from_buffer(scratch_buf)?; + + let mut metadata_preamble_buf = [0; 64]; + let metadata_preamble_len = { + let mut metadata_output_stream = CodedOutputStream::bytes(&mut metadata_preamble_buf[..]); + metadata_output_stream.write_tag(constants::METADATA_ORIGIN_FIELD_NUMBER, WireType::LengthDelimited)?; + metadata_output_stream.write_raw_varint32(origin_message_size)?; + metadata_output_stream.flush()?; + metadata_output_stream.total_bytes_written() as usize + }; + + let metadata_message_size = get_message_size(scratch_buf.len() + metadata_preamble_len)?; + + output_stream.write_tag(field_number, WireType::LengthDelimited)?; + output_stream.write_raw_varint32(metadata_message_size)?; + output_stream.write_raw_bytes(&metadata_preamble_buf[..metadata_preamble_len])?; + output_stream.write_raw_bytes(scratch_buf) +} + +fn write_point( + output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, value: f64, timestamp: i64, +) -> Result<(), protobuf::Error> { + write_nested_message( + output_stream, + scratch_buf, + constants::SERIES_POINTS_FIELD_NUMBER, + |os| { + os.write_double(constants::METRIC_POINT_VALUE_FIELD_NUMBER, value)?; + os.write_int64(constants::METRIC_POINT_TIMESTAMP_FIELD_NUMBER, timestamp) + }, + ) +} + +fn write_dogsketch( + output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, packed_scratch_buf: &mut Vec, + timestamp: Option, sketch: &DDSketch, +) -> Result<(), protobuf::Error> { + // If the sketch is empty, we don't write it out. + if sketch.is_empty() { + warn!("Attempted to write an empty sketch to sketches payload, skipping."); + return Ok(()); + } + + write_nested_message( + output_stream, + scratch_buf, + constants::SKETCH_DOGSKETCHES_FIELD_NUMBER, + |os| { + os.write_int64( + constants::DOGSKETCH_TS_FIELD_NUMBER, + timestamp.map_or(0, |ts| ts.get() as i64), + )?; + os.write_int64(constants::DOGSKETCH_CNT_FIELD_NUMBER, sketch.count() as i64)?; + os.write_double(constants::DOGSKETCH_MIN_FIELD_NUMBER, sketch.min().unwrap())?; + os.write_double(constants::DOGSKETCH_MAX_FIELD_NUMBER, sketch.max().unwrap())?; + os.write_double(constants::DOGSKETCH_AVG_FIELD_NUMBER, sketch.avg().unwrap())?; + os.write_double(constants::DOGSKETCH_SUM_FIELD_NUMBER, sketch.sum().unwrap())?; + + let bin_keys = sketch.bins().iter().map(|bin| bin.key()); + write_repeated_packed_from_iter( + os, + packed_scratch_buf, + constants::DOGSKETCH_K_FIELD_NUMBER, + bin_keys, + |inner_os, value| inner_os.write_sint32_no_tag(value), + )?; + + let bin_counts = sketch.bins().iter().map(|bin| bin.count()); + write_repeated_packed_from_iter( + os, + packed_scratch_buf, + constants::DOGSKETCH_N_FIELD_NUMBER, + bin_counts, + |inner_os, value| inner_os.write_uint32_no_tag(value), + ) + }, + ) +} + +fn get_deduplicated_tags<'a>( + metric: &'a Metric, additional_tags: &'a SharedTagSet, tags_deduplicator: &'a mut ReusableDeduplicator, +) -> impl Iterator { + let chained_tags = metric + .context() + .tags() + .into_iter() + .chain(additional_tags) + .chain(metric.context().origin_tags()); + + tags_deduplicator.deduplicated(chained_tags) +} + +fn write_tags<'a, I, F>( + tags: I, output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, tag_encoder: F, +) -> Result<(), protobuf::Error> +where + I: Iterator, + F: Fn(&Tag, &mut CodedOutputStream<'_>, &mut Vec) -> Result<(), protobuf::Error>, +{ + for tag in tags { + tag_encoder(tag, output_stream, scratch_buf)?; + } + + Ok(()) +} + +fn write_series_tags<'a, I>( + tags: I, output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, +) -> Result<(), protobuf::Error> +where + I: Iterator, +{ + write_tags(tags, output_stream, scratch_buf, |tag, os, buf| { + // If this is a resource tag, we'll convert it directly to a resource entry. + if tag.name() == "dd.internal.resource" { + if let Some((resource_type, resource_name)) = tag.value().and_then(|s| s.split_once(':')) { + write_resource(os, buf, resource_type, resource_name) + } else { + Ok(()) + } + } else { + // We're dealing with a normal tag. + os.write_string(constants::SERIES_TAGS_FIELD_NUMBER, tag.as_str()) + } + }) +} + +fn write_sketch_tags<'a, I>( + tags: I, output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, +) -> Result<(), protobuf::Error> +where + I: Iterator, +{ + write_tags(tags, output_stream, scratch_buf, |tag, os, _buf| { + // We always write the tags as-is, without any special handling for resource tags. + os.write_string(constants::SKETCH_TAGS_FIELD_NUMBER, tag.as_str()) + }) +} + +fn write_nested_message( + output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, field_number: u32, writer: F, +) -> Result<(), protobuf::Error> +where + F: FnOnce(&mut CodedOutputStream<'_>) -> Result<(), protobuf::Error>, +{ + scratch_buf.clear(); + + { + let mut nested_output_stream = CodedOutputStream::vec(scratch_buf); + writer(&mut nested_output_stream)?; + nested_output_stream.flush()?; + } + + output_stream.write_tag(field_number, WireType::LengthDelimited)?; + + let nested_message_size = get_message_size_from_buffer(scratch_buf)?; + output_stream.write_raw_varint32(nested_message_size)?; + output_stream.write_raw_bytes(scratch_buf) +} + +fn write_repeated_packed_from_iter( + output_stream: &mut CodedOutputStream<'_>, scratch_buf: &mut Vec, field_number: u32, values: I, writer: F, +) -> Result<(), protobuf::Error> +where + I: Iterator, + F: Fn(&mut CodedOutputStream<'_>, T) -> Result<(), protobuf::Error>, +{ + // This is a helper function that lets us write out a packed repeated field from an iterator of values. + // `CodedOutputStream` has similar functions to handle this, but they require a slice of values, which would mean we + // need to either allocate a new vector each time to hold the values, or thread through two additional vectors (one + // for `i32`, one for `u32`) to reuse the allocation... both of which are not great options. + // + // We've simply opted to pass through a _single_ vector that we can reuse, and write the packed values directly to + // that, almost identically to how `CodedOutputStream::write_repeated_packed_*` methods would do it. + + scratch_buf.clear(); + + { + let mut packed_output_stream = CodedOutputStream::vec(scratch_buf); + for value in values { + writer(&mut packed_output_stream, value)?; + } + packed_output_stream.flush()?; + } + + let data_size = get_message_size_from_buffer(scratch_buf)?; + + output_stream.write_tag(field_number, WireType::LengthDelimited)?; + output_stream.write_raw_varint32(data_size)?; + output_stream.write_raw_bytes(scratch_buf) +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use protobuf::CodedOutputStream; + use saluki_common::iter::ReusableDeduplicator; + use saluki_context::{tags::SharedTagSet, Context}; + use saluki_core::data_model::event::metric::{Metric, MetricMetadata, MetricValues}; + use stringtheory::MetaString; + + use super::{encode_series_metric, encode_sketch_metric, v1, MetricsEndpoint, MetricsEndpointEncoder}; + use crate::common::datadog::request_builder::EndpointEncoder as _; + + #[test] + fn histogram_vs_sketch_identical_payload() { + // For the same exact set of points, we should be able to construct either a histogram or distribution from + // those points, and when encoded as a sketch payload, end up with the same exact payload. + // + // They should be identical because the goal is that we convert histograms into sketches in the same way we + // would have originally constructed a sketch based on the same samples. + let samples = &[1.0, 2.0, 3.0, 4.0, 5.0]; + let histogram = Metric::histogram("simple_samples", samples); + let distribution = Metric::distribution("simple_samples", samples); + let host_tags = SharedTagSet::default(); + + let mut buf1 = Vec::new(); + let mut buf2 = Vec::new(); + let mut tags_deduplicator = ReusableDeduplicator::new(); + + let mut histogram_payload = Vec::new(); + { + let mut histogram_writer = CodedOutputStream::vec(&mut histogram_payload); + encode_sketch_metric( + &histogram, + &host_tags, + &mut histogram_writer, + &mut buf1, + &mut buf2, + &mut tags_deduplicator, + ) + .expect("Failed to encode histogram as sketch"); + } + + let mut distribution_payload = Vec::new(); + { + let mut distribution_writer = CodedOutputStream::vec(&mut distribution_payload); + encode_sketch_metric( + &distribution, + &host_tags, + &mut distribution_writer, + &mut buf1, + &mut buf2, + &mut tags_deduplicator, + ) + .expect("Failed to encode distribution as sketch"); + } + + assert_eq!(histogram_payload, distribution_payload); + } + + #[test] + fn input_valid() { + // Our encoder should always consider series metrics valid when set to either series endpoint, and similarly + // for sketch metrics when set to the sketches endpoint. + let counter = Metric::counter("counter", 1.0); + let rate = Metric::rate("rate", 1.0, Duration::from_secs(1)); + let gauge = Metric::gauge("gauge", 1.0); + let set = Metric::set("set", "foo"); + let histogram = Metric::histogram("histogram", [1.0, 2.0, 3.0]); + let distribution = Metric::distribution("distribution", [1.0, 2.0, 3.0]); + + let series_v1 = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV1); + let series_v2 = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV2); + let sketches_endpoint = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::Sketches); + + for series_endpoint in [&series_v1, &series_v2] { + assert!(series_endpoint.is_valid_input(&counter)); + assert!(series_endpoint.is_valid_input(&rate)); + assert!(series_endpoint.is_valid_input(&gauge)); + assert!(series_endpoint.is_valid_input(&set)); + assert!(!series_endpoint.is_valid_input(&histogram)); + assert!(!series_endpoint.is_valid_input(&distribution)); + } + + assert!(!sketches_endpoint.is_valid_input(&counter)); + assert!(!sketches_endpoint.is_valid_input(&rate)); + assert!(!sketches_endpoint.is_valid_input(&gauge)); + assert!(!sketches_endpoint.is_valid_input(&set)); + assert!(sketches_endpoint.is_valid_input(&histogram)); + assert!(sketches_endpoint.is_valid_input(&distribution)); + } + + #[test] + fn input_data_point_count_tracks_metric_values() { + let counter = Metric::counter("counter", [(123, 1.0), (124, 2.0)]); + let histogram = Metric::histogram("histogram", [1.0, 2.0, 3.0]); + + let series_endpoint = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV2); + let sketches_endpoint = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::Sketches); + + assert_eq!(series_endpoint.input_data_point_count(&counter), 2); + assert_eq!(sketches_endpoint.input_data_point_count(&histogram), 1); + } + + #[test] + fn series_metric_unit_encoded() { + // A gauge with a unit in its metadata must produce a series protobuf payload that contains the unit string + // in field 6 (MetricSeries.unit), which the Datadog backend already accepts. + let context = Context::from_static_parts("my.timer.avg", &[]); + let metadata = MetricMetadata::default().with_unit(MetaString::from_static("millisecond")); + let gauge = Metric::from_parts(context, MetricValues::gauge([1.0_f64]), metadata); + + let host_tags = SharedTagSet::default(); + let mut scratch_buf = Vec::new(); + let mut tags_deduplicator = ReusableDeduplicator::new(); + + let mut payload = Vec::new(); + { + let mut writer = CodedOutputStream::vec(&mut payload); + encode_series_metric( + &gauge, + &host_tags, + &mut writer, + &mut scratch_buf, + &mut tags_deduplicator, + ) + .expect("Failed to encode gauge as series metric"); + writer.flush().expect("Failed to flush"); + } + + // In the protobuf wire format, a string field with field number 6 has tag byte 0x32 ((6 << 3) | 2). + // The tag is followed by a varint length and then the UTF-8 bytes of the string. + let expected_tag: u8 = (6 << 3) | 2; // 0x32 + let expected_value = b"millisecond"; + + let tag_pos = payload + .windows(1 + 1 + expected_value.len()) + .position(|w| w[0] == expected_tag && w[1] == expected_value.len() as u8 && &w[2..] == expected_value); + + assert!( + tag_pos.is_some(), + "series payload should contain unit field (field 6 = 'millisecond'), got bytes: {:?}", + payload + ); + } + + #[test] + fn series_v1_endpoint_routing() { + // SeriesV1 advertises the V1 URI, JSON content type, and the {"series":[...]} framing. + let encoder = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV1); + assert_eq!(encoder.endpoint_uri().path(), "/api/v1/series"); + assert_eq!(encoder.content_type(), "application/json"); + assert_eq!(encoder.get_payload_prefix(), Some(v1::SERIES_PAYLOAD_PREFIX)); + assert_eq!(encoder.get_payload_suffix(), Some(v1::SERIES_PAYLOAD_SUFFIX)); + assert_eq!(encoder.get_input_separator(), Some(v1::SERIES_INPUT_SEPARATOR)); + + // V2 series stays on protobuf with no framing. + let v2 = MetricsEndpointEncoder::from_endpoint(MetricsEndpoint::SeriesV2); + assert_eq!(v2.endpoint_uri().path(), "/api/v2/series"); + assert_eq!(v2.content_type(), "application/x-protobuf"); + assert!(v2.get_payload_prefix().is_none()); + } +} diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v3/constants.rs b/lib/saluki-components/src/encoders/datadog/metrics/v3/constants.rs new file mode 100644 index 00000000000..6f54ea38af4 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v3/constants.rs @@ -0,0 +1,29 @@ +// Protocol Buffers field numbers for series and sketch payload messages in the V3 format. +// +// These field numbers come from the Protocol Buffers definitions in `lib/protos/datadog/proto/agent-payload/intake_v3.proto`. +pub const DICT_NAME_STR_FIELD_NUMBER: u32 = 1; +pub const DICT_TAGS_STR_FIELD_NUMBER: u32 = 2; +pub const DICT_TAGSETS_FIELD_NUMBER: u32 = 3; +pub const DICT_RESOURCE_STR_FIELD_NUMBER: u32 = 4; +pub const DICT_RESOURCE_LEN_FIELD_NUMBER: u32 = 5; +pub const DICT_RESOURCE_TYPE_FIELD_NUMBER: u32 = 6; +pub const DICT_RESOURCE_NAME_FIELD_NUMBER: u32 = 7; +pub const DICT_SOURCE_TYPE_NAME_FIELD_NUMBER: u32 = 8; +pub const DICT_ORIGIN_INFO_FIELD_NUMBER: u32 = 9; +pub const TYPES_FIELD_NUMBER: u32 = 10; +pub const NAMES_FIELD_NUMBER: u32 = 11; +pub const TAGS_FIELD_NUMBER: u32 = 12; +pub const RESOURCES_FIELD_NUMBER: u32 = 13; +pub const INTERVALS_FIELD_NUMBER: u32 = 14; +pub const NUM_POINTS_FIELD_NUMBER: u32 = 15; +pub const TIMESTAMPS_FIELD_NUMBER: u32 = 16; +pub const VALS_SINT64_FIELD_NUMBER: u32 = 17; +pub const VALS_FLOAT32_FIELD_NUMBER: u32 = 18; +pub const VALS_FLOAT64_FIELD_NUMBER: u32 = 19; +pub const SKETCH_NUM_BINS_FIELD_NUMBER: u32 = 20; +pub const SKETCH_BIN_KEYS_FIELD_NUMBER: u32 = 21; +pub const SKETCH_BIN_CNTS_FIELD_NUMBER: u32 = 22; +pub const SOURCE_TYPE_NAME_FIELD_NUMBER: u32 = 23; +pub const ORIGIN_INFO_FIELD_NUMBER: u32 = 24; +pub const DICT_UNIT_STR_FIELD_NUMBER: u32 = 25; +pub const UNIT_REFS_FIELD_NUMBER: u32 = 26; diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v3/interner.rs b/lib/saluki-components/src/encoders/datadog/metrics/v3/interner.rs new file mode 100644 index 00000000000..c0ed8219dd6 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v3/interner.rs @@ -0,0 +1,94 @@ +//! Generic interning for dictionary deduplication. + +use std::{borrow::Borrow, hash::Hash}; + +use saluki_common::collections::FastHashMap; + +/// Generic interning structure for dictionary deduplication. +/// +/// Assigns unique 1-based IDs to values, returning the same ID for duplicate values. +/// ID 0 is reserved for "empty/none" in the V3 format. +#[derive(Debug)] +pub struct Interner { + index: FastHashMap, + last_id: i64, +} + +impl Default for Interner { + fn default() -> Self { + Self::new() + } +} + +impl Interner { + /// Creates a new empty interner. + pub fn new() -> Self { + Self { + index: FastHashMap::default(), + last_id: 0, + } + } + + /// Gets the ID for a key, inserting it if not present. + /// + /// Returns `(id, is_new)` where `is_new` is true if the key was newly inserted. + /// IDs are 1-based (0 is reserved for empty/none values). + pub fn get_or_insert(&mut self, key: &Q) -> (i64, bool) + where + K: Borrow, + Q: ToOwned + Hash + Eq + ?Sized, + { + if let Some(&id) = self.index.get(key) { + (id, false) + } else { + self.last_id += 1; + self.index.insert(key.to_owned(), self.last_id); + (self.last_id, true) + } + } + + /// Returns the number of interned values. + #[cfg(test)] + pub fn len(&self) -> usize { + self.index.len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_interner_basic() { + let mut interner: Interner = Interner::new(); + + // First insertion returns ID 1 and is_new=true + let (id1, is_new1) = interner.get_or_insert("hello"); + assert_eq!(id1, 1); + assert!(is_new1); + + // Second insertion of same value returns same ID and is_new=false + let (id2, is_new2) = interner.get_or_insert("hello"); + assert_eq!(id2, 1); + assert!(!is_new2); + + // New value gets next ID + let (id3, is_new3) = interner.get_or_insert("world"); + assert_eq!(id3, 2); + assert!(is_new3); + + assert_eq!(interner.len(), 2); + } + + #[test] + fn test_interner_tuples() { + let mut interner: Interner<(i32, i32, i32)> = Interner::new(); + + let (id1, _) = interner.get_or_insert(&(1, 2, 3)); + let (id2, _) = interner.get_or_insert(&(1, 2, 3)); + let (id3, _) = interner.get_or_insert(&(4, 5, 6)); + + assert_eq!(id1, id2); + assert_ne!(id1, id3); + } +} diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v3/mod.rs b/lib/saluki-components/src/encoders/datadog/metrics/v3/mod.rs new file mode 100644 index 00000000000..47cb2c4dc1f --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v3/mod.rs @@ -0,0 +1,26 @@ +//! V3 columnar metrics payload encoder. +//! +//! This module implements the V3 columnar format for Datadog metrics payloads. Unlike the V2 +//! row-based protobuf format where each metric is a complete message, V3 uses a columnar layout +//! with dictionary-based string deduplication for efficient encoding. +//! +//! The key differences from V2: +//! - Dictionary deduplication for metric names, tags, resources, and origin info +//! - Delta encoding for index arrays to reduce payload size +//! - Batch encoding - all metrics must be collected before serialization +//! - Separate value columns for different numeric types (sint64, float32, float64) +//! +//! # Missing +//! +//! - Incrementally compressed blocks. This is a centerpiece of the implementation on the Agent side, +//! but we do this in a single shot as part of this initial implementation. + +mod constants; +mod interner; +mod payload; +mod types; +mod writer; + +pub(super) use payload::{V3EncodedRequest, V3PayloadLimits, V3PayloadRequest}; +pub use types::V3MetricType; +pub use writer::V3Writer; diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v3/payload.rs b/lib/saluki-components/src/encoders/datadog/metrics/v3/payload.rs new file mode 100644 index 00000000000..9dbfce7a607 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v3/payload.rs @@ -0,0 +1,52 @@ +use http::Request; +use saluki_common::buf::FrozenChunkedBytesBuffer; +use saluki_core::data_model::event::metric::Metric; + +/// Limits used when building V3 metrics payloads. +#[derive(Clone, Copy, Debug)] +pub(crate) struct V3PayloadLimits { + pub(crate) max_compressed_size: usize, + pub(crate) max_uncompressed_size: usize, + max_metrics_per_payload: usize, + pub(crate) max_points_per_payload: usize, +} + +impl V3PayloadLimits { + pub(crate) const fn new( + max_compressed_size: usize, max_uncompressed_size: usize, max_metrics_per_payload: usize, + max_points_per_payload: usize, + ) -> Self { + Self { + max_compressed_size, + max_uncompressed_size, + max_metrics_per_payload, + max_points_per_payload, + } + } + + pub(crate) fn request_fits(self, request: &V3EncodedRequest) -> bool { + request.compressed_len <= self.max_compressed_size && request.uncompressed_len <= self.max_uncompressed_size + } + + pub(crate) fn point_count_fits(self, count: usize) -> bool { + count <= self.max_points_per_payload + } + + pub(crate) fn should_flush_metric_count_limit(self, metrics: &[Metric]) -> bool { + metrics.len() >= self.max_metrics_per_payload + } +} + +/// Encoded V3 request with measured payload sizes. +pub(crate) struct V3EncodedRequest { + pub(crate) request: Request, + pub(crate) compressed_len: usize, + pub(crate) uncompressed_len: usize, +} + +/// V3 payload request ready to send with telemetry counts. +pub(crate) struct V3PayloadRequest { + pub(crate) request: Request, + pub(crate) event_count: usize, + pub(crate) data_point_count: usize, +} diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v3/types.rs b/lib/saluki-components/src/encoders/datadog/metrics/v3/types.rs new file mode 100644 index 00000000000..be332b44a22 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v3/types.rs @@ -0,0 +1,211 @@ +//! V3 payload type definitions and protocol buffer field numbers. + +/// V3 metric type values. +/// +/// These match the `metricType` enum in `intake_v3.proto`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum V3MetricType { + Count = 1, + Rate = 2, + Gauge = 3, + Sketch = 4, +} + +impl V3MetricType { + /// Returns the numeric value for encoding in the types column. + pub const fn as_u64(self) -> u64 { + self as u64 + } +} + +/// V3 value type values. +/// +/// These are encoded in bits 4-7 of the types column and indicate which +/// value array contains the metric's points. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum V3ValueType { + /// Value is zero, not stored explicitly. + Zero = 0x00, + + /// Value is stored in vals_sint64. + Sint64 = 0x10, + + /// Value is stored in vals_float32. + Float32 = 0x20, + + /// Value is stored in vals_float64. + Float64 = 0x30, +} + +impl V3ValueType { + /// Returns the numeric value for encoding in the types column. + pub fn as_u64(self) -> u64 { + self as u64 + } +} + +/// Intermediate point classification for value type compaction. +/// +/// This provides finer-grained classification than [`V3ValueType`] to avoid +/// precision loss when combining different value types. In particular, it +/// distinguishes small integers (that fit losslessly in f32) from large integers +/// (that don't), so that mixing a large integer with a Float32 value correctly +/// escalates to Float64 rather than silently truncating the integer. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(u8)] +enum PointKind { + /// Value is zero. + Zero = 0, + /// Integer with |v| <= 2^24, fits losslessly in both sint64 and f32. + Int24 = 1, + /// Integer with |v| > 2^24, fits in sint64 varint but NOT losslessly in f32. + Int48 = 2, + /// Fractional value exactly representable as f32. + Float32 = 3, + /// Everything else - requires full f64 precision. + Float64 = 4, +} + +/// Maximum integer magnitude that fits losslessly in f32 (2^24). +const F32_INT_MAX: i64 = 1 << 24; + +impl PointKind { + /// Classifies a single f64 value. + fn for_value(v: f64) -> Self { + if v == 0.0 { + return Self::Zero; + } + + // Varint range that fits in 7 bytes or less (49 bits). + const VARINT_WIDTH: i32 = 7 * 7 - 1; + const MAX_INT: i64 = 1 << VARINT_WIDTH; + const MIN_INT: i64 = -MAX_INT; + + let i = v as i64; + if (MIN_INT..MAX_INT).contains(&i) && (i as f64) == v { + if (-F32_INT_MAX..=F32_INT_MAX).contains(&i) { + return Self::Int24; + } + return Self::Int48; + } + + if (v as f32 as f64) == v { + return Self::Float32; + } + + Self::Float64 + } + + /// Combines two point kinds into the smallest kind that can represent both. + /// + /// This is `max(self, other)` in all cases **except**: + /// - `Int48 + Float32 = Float64` (and vice versa), because large integers + /// lose precision in f32, and fractional values can't be stored as sint64. + fn union(self, other: Self) -> Self { + match (self, other) { + (Self::Int48, Self::Float32) | (Self::Float32, Self::Int48) => Self::Float64, + _ => self.max(other), + } + } + + /// Converts to the wire-format value type. + fn to_value_type(self) -> V3ValueType { + match self { + Self::Zero => V3ValueType::Zero, + Self::Int24 | Self::Int48 => V3ValueType::Sint64, + Self::Float32 => V3ValueType::Float32, + Self::Float64 => V3ValueType::Float64, + } + } +} + +/// Determines the best [`V3ValueType`] for a set of f64 values. +/// +/// Uses [`PointKind`] internally to avoid precision loss when mixing +/// large integers with fractional float32 values. +pub(super) fn value_type_for_values(values: impl Iterator) -> V3ValueType { + let mut kind = PointKind::Zero; + for v in values { + kind = kind.union(PointKind::for_value(v)); + } + kind.to_value_type() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_point_kind_classification() { + // Zero + assert_eq!(PointKind::for_value(0.0), PointKind::Zero); + + // Small integers (fit in f32) + assert_eq!(PointKind::for_value(100.0), PointKind::Int24); + assert_eq!(PointKind::for_value(-100.0), PointKind::Int24); + assert_eq!(PointKind::for_value((1 << 24) as f64), PointKind::Int24); + assert_eq!(PointKind::for_value(-((1 << 24) as f64)), PointKind::Int24); + + // Large integers (don't fit losslessly in f32) + assert_eq!(PointKind::for_value(((1 << 24) + 1) as f64), PointKind::Int48); + assert_eq!(PointKind::for_value((1i64 << 30) as f64), PointKind::Int48); + + // Float32 + assert_eq!(PointKind::for_value(1.5), PointKind::Float32); + assert_eq!(PointKind::for_value(2.75), PointKind::Float32); + + // Float64 + assert_eq!(PointKind::for_value(std::f64::consts::PI), PointKind::Float64); + let large = ((1i64 << 50) + 1) as f64; + assert_eq!(PointKind::for_value(large), PointKind::Float64); + } + + #[test] + fn test_point_kind_union() { + // Standard widening (max) + assert_eq!(PointKind::Zero.union(PointKind::Int24), PointKind::Int24); + assert_eq!(PointKind::Int24.union(PointKind::Int48), PointKind::Int48); + assert_eq!(PointKind::Int24.union(PointKind::Float32), PointKind::Float32); + assert_eq!(PointKind::Float32.union(PointKind::Float64), PointKind::Float64); + assert_eq!(PointKind::Float64.union(PointKind::Zero), PointKind::Float64); + + // The critical case: large integer + float32 must escalate to float64 + assert_eq!(PointKind::Int48.union(PointKind::Float32), PointKind::Float64); + assert_eq!(PointKind::Float32.union(PointKind::Int48), PointKind::Float64); + } + + #[test] + fn test_value_type_for_values() { + // All zeros + assert_eq!(value_type_for_values([0.0, 0.0].into_iter()), V3ValueType::Zero); + + // Small integers + assert_eq!(value_type_for_values([100.0, 200.0].into_iter()), V3ValueType::Sint64); + + // Large integers + assert_eq!( + value_type_for_values([(1i64 << 30) as f64, 200.0].into_iter()), + V3ValueType::Sint64 + ); + + // Small integer + float32 → Float32 (safe, small int fits in f32) + assert_eq!(value_type_for_values([100.0, 1.5].into_iter()), V3ValueType::Float32); + + // Large integer + float32 → Float64 (the bug fix!) + assert_eq!( + value_type_for_values([(1i64 << 30) as f64, 1.5].into_iter()), + V3ValueType::Float64 + ); + + // Float64 value forces Float64 + assert_eq!( + value_type_for_values([100.0, std::f64::consts::PI].into_iter()), + V3ValueType::Float64 + ); + + // Empty iterator + assert_eq!(value_type_for_values(std::iter::empty()), V3ValueType::Zero); + } +} diff --git a/lib/saluki-components/src/encoders/datadog/metrics/v3/writer.rs b/lib/saluki-components/src/encoders/datadog/metrics/v3/writer.rs new file mode 100644 index 00000000000..71ccd96e8f8 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/metrics/v3/writer.rs @@ -0,0 +1,820 @@ +//! V3 columnar metrics writer. +//! +//! The [`V3Writer`] accumulates metrics in columnar format with dictionary deduplication, +//! then produces [`V3EncodedData`] ready for protobuf serialization. + +use protobuf::CodedOutputStream; +use saluki_error::GenericError; + +use super::constants::*; +use super::interner::Interner; +use super::types::{value_type_for_values, V3MetricType, V3ValueType}; + +const FLAG_NO_INDEX: u64 = 0x100; +const FLAG_HAS_UNIT: u64 = 0x200; + +/// Encoded V3 payload data ready for protobuf serialization. +/// +/// Used primarily as a helper for testing. +#[derive(Debug, Default)] +struct V3EncodedData { + // Dictionary encoded bytes (varint-length-prefixed strings) + pub dict_name_bytes: Vec, + pub dict_tags_bytes: Vec, + pub dict_tagsets: Vec, + pub dict_resource_str_bytes: Vec, + pub dict_resource_len: Vec, + pub dict_resource_type: Vec, + pub dict_resource_name: Vec, + pub dict_source_type_bytes: Vec, + pub dict_origin_info: Vec, + pub dict_unit_bytes: Vec, + + // Per-metric columns (one entry per metric, except conditional columns) + pub types: Vec, + pub names: Vec, + pub tags: Vec, + pub resources: Vec, + pub intervals: Vec, + pub num_points: Vec, + pub source_type_names: Vec, + pub origin_infos: Vec, + pub unit_refs: Vec, // Present only for metrics with FLAG_HAS_UNIT set. + + // Point data (varies per metric based on num_points) + pub timestamps: Vec, + pub vals_sint64: Vec, + pub vals_float32: Vec, + pub vals_float64: Vec, + + // Sketch data + pub sketch_num_bins: Vec, + pub sketch_bin_keys: Vec, + pub sketch_bin_cnts: Vec, +} + +/// V3 columnar metrics writer. +/// +/// Accumulates metrics in columnar format with dictionary deduplication. +/// Call [`V3Writer::write`] for each metric, then [`V3Writer::close`] to finalize +/// and get the encoded data. +#[derive(Debug, Default)] +pub struct V3Writer { + // Interners for dictionary deduplication + name_interner: Interner, + tag_interner: Interner, + tagset_interner: Interner>, + resource_str_interner: Interner, + resource_interner: Interner>, + source_type_interner: Interner, + origin_interner: Interner<(i32, i32, i32)>, + unit_interner: Interner, + + // Dictionary encoded bytes + dict_name_bytes: Vec, + dict_tags_bytes: Vec, + dict_tagsets: Vec, + dict_resource_str_bytes: Vec, + dict_resource_len: Vec, + dict_resource_type: Vec, + dict_resource_name: Vec, + dict_source_type_bytes: Vec, + dict_origin_info: Vec, + dict_unit_bytes: Vec, + + // Per-metric columns (one entry per metric, except conditional columns) + types: Vec, + names: Vec, + tags: Vec, + resources: Vec, + intervals: Vec, + num_points: Vec, + source_type_names: Vec, + origin_infos: Vec, + unit_refs: Vec, // Present only for metrics with FLAG_HAS_UNIT set. + + // Point data + timestamps: Vec, + vals_sint64: Vec, + vals_float32: Vec, + vals_float64: Vec, + + // Sketch data + sketch_num_bins: Vec, + sketch_bin_keys: Vec, + sketch_bin_cnts: Vec, + + // Scratch data + tag_ids: Vec, + resource_ids: Vec<(i64, i64)>, +} + +impl V3Writer { + /// Creates a new V3 writer. + pub fn new() -> Self { + Self::default() + } + + /// Begins writing a new metric. + /// + /// Returns a [`V3MetricBuilder`] that must be used to set the metric's + /// properties and add points, then closed with [`V3MetricBuilder::close`]. + pub fn write(&mut self, metric_type: V3MetricType, name: &str) -> V3MetricBuilder<'_> { + let name_id = self.intern_name(name); + let metric_idx = self.types.len(); + let point_start_idx = self.vals_float64.len(); + let sint64_start_idx = self.vals_sint64.len(); + + // Initialize the per-metric columns with default values + self.types.push(metric_type.as_u64()); + self.names.push(name_id); + self.tags.push(0); + self.resources.push(0); + self.intervals.push(0); + self.num_points.push(0); + self.source_type_names.push(0); + self.origin_infos.push(0); + + V3MetricBuilder { + writer: self, + point_start_idx, + sint64_start_idx, + metric_idx, + unit_ref_idx: None, + } + } + + fn finalize_inner(mut self) -> V3EncodedData { + // Delta encode all of the index arrays first. + delta_encode(&mut self.names); + delta_encode(&mut self.tags); + delta_encode(&mut self.resources); + delta_encode(&mut self.source_type_names); + delta_encode(&mut self.origin_infos); + delta_encode(&mut self.unit_refs); + delta_encode(&mut self.timestamps); + + V3EncodedData { + dict_name_bytes: self.dict_name_bytes, + dict_tags_bytes: self.dict_tags_bytes, + dict_tagsets: self.dict_tagsets, + dict_resource_str_bytes: self.dict_resource_str_bytes, + dict_resource_len: self.dict_resource_len, + dict_resource_type: self.dict_resource_type, + dict_resource_name: self.dict_resource_name, + dict_source_type_bytes: self.dict_source_type_bytes, + dict_origin_info: self.dict_origin_info, + dict_unit_bytes: self.dict_unit_bytes, + types: self.types, + names: self.names, + tags: self.tags, + resources: self.resources, + intervals: self.intervals, + num_points: self.num_points, + source_type_names: self.source_type_names, + origin_infos: self.origin_infos, + unit_refs: self.unit_refs, + timestamps: self.timestamps, + vals_sint64: self.vals_sint64, + vals_float32: self.vals_float32, + vals_float64: self.vals_float64, + sketch_num_bins: self.sketch_num_bins, + sketch_bin_keys: self.sketch_bin_keys, + sketch_bin_cnts: self.sketch_bin_cnts, + } + } + + /// Finalizes the writer and serializes the data to the given output buffer. + /// + /// This performs delta encoding on all index arrays. + pub fn finalize(self, output: &mut Vec) -> Result<(), GenericError> { + let data = self.finalize_inner(); + + // Create our writer and start, well.. writing! + let mut os = CodedOutputStream::vec(output); + + // Dictionary fields (bytes - varint-length-prefixed strings concatenated) + if !data.dict_name_bytes.is_empty() { + os.write_bytes(DICT_NAME_STR_FIELD_NUMBER, &data.dict_name_bytes)?; + } + if !data.dict_tags_bytes.is_empty() { + os.write_bytes(DICT_TAGS_STR_FIELD_NUMBER, &data.dict_tags_bytes)?; + } + + // Packed repeated fields for dictionaries + os.write_repeated_packed_sint64(DICT_TAGSETS_FIELD_NUMBER, &data.dict_tagsets)?; + + if !data.dict_resource_str_bytes.is_empty() { + os.write_bytes(DICT_RESOURCE_STR_FIELD_NUMBER, &data.dict_resource_str_bytes)?; + } + + os.write_repeated_packed_int64(DICT_RESOURCE_LEN_FIELD_NUMBER, &data.dict_resource_len)?; + os.write_repeated_packed_sint64(DICT_RESOURCE_TYPE_FIELD_NUMBER, &data.dict_resource_type)?; + os.write_repeated_packed_sint64(DICT_RESOURCE_NAME_FIELD_NUMBER, &data.dict_resource_name)?; + + if !data.dict_source_type_bytes.is_empty() { + os.write_bytes(DICT_SOURCE_TYPE_NAME_FIELD_NUMBER, &data.dict_source_type_bytes)?; + } + + os.write_repeated_packed_int32(DICT_ORIGIN_INFO_FIELD_NUMBER, &data.dict_origin_info)?; + if !data.dict_unit_bytes.is_empty() { + os.write_bytes(DICT_UNIT_STR_FIELD_NUMBER, &data.dict_unit_bytes)?; + } + + // Per-metric columns + os.write_repeated_packed_uint64(TYPES_FIELD_NUMBER, &data.types)?; + os.write_repeated_packed_sint64(NAMES_FIELD_NUMBER, &data.names)?; + os.write_repeated_packed_sint64(TAGS_FIELD_NUMBER, &data.tags)?; + os.write_repeated_packed_sint64(RESOURCES_FIELD_NUMBER, &data.resources)?; + os.write_repeated_packed_uint64(INTERVALS_FIELD_NUMBER, &data.intervals)?; + os.write_repeated_packed_uint64(NUM_POINTS_FIELD_NUMBER, &data.num_points)?; + os.write_repeated_packed_sint64(SOURCE_TYPE_NAME_FIELD_NUMBER, &data.source_type_names)?; + os.write_repeated_packed_sint64(ORIGIN_INFO_FIELD_NUMBER, &data.origin_infos)?; + os.write_repeated_packed_sint64(UNIT_REFS_FIELD_NUMBER, &data.unit_refs)?; + + // Point data + os.write_repeated_packed_sint64(TIMESTAMPS_FIELD_NUMBER, &data.timestamps)?; + os.write_repeated_packed_sint64(VALS_SINT64_FIELD_NUMBER, &data.vals_sint64)?; + os.write_repeated_packed_float(VALS_FLOAT32_FIELD_NUMBER, &data.vals_float32)?; + os.write_repeated_packed_double(VALS_FLOAT64_FIELD_NUMBER, &data.vals_float64)?; + + // Sketch data + os.write_repeated_packed_uint64(SKETCH_NUM_BINS_FIELD_NUMBER, &data.sketch_num_bins)?; + os.write_repeated_packed_sint32(SKETCH_BIN_KEYS_FIELD_NUMBER, &data.sketch_bin_keys)?; + os.write_repeated_packed_uint32(SKETCH_BIN_CNTS_FIELD_NUMBER, &data.sketch_bin_cnts)?; + + os.flush()?; + Ok(()) + } + + // Internal helper methods + + fn intern_name(&mut self, name: &str) -> i64 { + if name.is_empty() { + return 0; + } + let (id, is_new) = self.name_interner.get_or_insert(name); + if is_new { + append_len_str(&mut self.dict_name_bytes, name); + } + id + } + + fn intern_tag(&mut self, tag: &str) { + if tag.is_empty() { + self.tag_ids.push(0); + return; + } + + let (id, is_new) = self.tag_interner.get_or_insert(tag); + if is_new { + append_len_str(&mut self.dict_tags_bytes, tag); + } + self.tag_ids.push(id); + } + + fn intern_tagset(&mut self, tags: I) -> i64 + where + I: Iterator, + S: AsRef, + { + self.tag_ids.clear(); + for tag in tags { + self.intern_tag(tag.as_ref()); + } + + if self.tag_ids.is_empty() { + return 0; + } + + let (id, is_new) = self.tagset_interner.get_or_insert(&self.tag_ids); + if is_new { + self.encode_tagset(); + } + id + } + + fn encode_tagset(&mut self) { + // Push the length + self.dict_tagsets.push(self.tag_ids.len() as i64); + + let start = self.dict_tagsets.len(); + + // Add all tag IDs + self.dict_tagsets.extend_from_slice(&self.tag_ids); + + // Sort and delta-encode the tagset portion + self.dict_tagsets[start..].sort_unstable(); + delta_encode(&mut self.dict_tagsets[start..]); + } + + fn intern_resource_str(&mut self, s: &str) -> i64 { + if s.is_empty() { + return 0; + } + let (id, is_new) = self.resource_str_interner.get_or_insert(s); + if is_new { + append_len_str(&mut self.dict_resource_str_bytes, s); + } + id + } + + fn intern_resources(&mut self, resources: &[(&str, &str)]) -> i64 { + self.resource_ids.clear(); + for (resource_type, resource_name) in resources { + let type_id = self.intern_resource_str(resource_type); + let name_id = self.intern_resource_str(resource_name); + self.resource_ids.push((type_id, name_id)); + } + + if self.resource_ids.is_empty() { + return 0; + } + + let (id, is_new) = self.resource_interner.get_or_insert(&self.resource_ids); + if is_new { + self.encode_resources(); + } + id + } + + fn encode_resources(&mut self) { + self.dict_resource_len.push(self.resource_ids.len() as i64); + + let type_start = self.dict_resource_type.len(); + let name_start = self.dict_resource_name.len(); + + for (type_id, name_id) in &self.resource_ids { + self.dict_resource_type.push(*type_id); + self.dict_resource_name.push(*name_id); + } + + delta_encode(&mut self.dict_resource_type[type_start..]); + delta_encode(&mut self.dict_resource_name[name_start..]); + } + + fn intern_source_type(&mut self, s: &str) -> i64 { + if s.is_empty() { + return 0; + } + let (id, is_new) = self.source_type_interner.get_or_insert(s); + if is_new { + append_len_str(&mut self.dict_source_type_bytes, s); + } + id + } + + fn intern_origin(&mut self, product: i32, category: i32, service: i32) -> i64 { + if product == 0 && category == 0 && service == 0 { + return 0; + } + + let (id, is_new) = self.origin_interner.get_or_insert(&(product, category, service)); + if is_new { + self.dict_origin_info.push(product); + self.dict_origin_info.push(category); + self.dict_origin_info.push(service); + } + id + } + + fn intern_unit(&mut self, unit: &str) -> i64 { + if unit.is_empty() { + return 0; + } + let (id, is_new) = self.unit_interner.get_or_insert(unit); + if is_new { + append_len_str(&mut self.dict_unit_bytes, unit); + } + id + } +} + +/// Builder for a single metric within a V3 payload. +/// +/// Use the setter methods to configure the metric, add points with [`add_point`](Self::add_point), +/// then call [`close`](Self::close) to finalize. +pub struct V3MetricBuilder<'a> { + writer: &'a mut V3Writer, + point_start_idx: usize, + sint64_start_idx: usize, + metric_idx: usize, + unit_ref_idx: Option, +} + +impl<'a> V3MetricBuilder<'a> { + /// Sets the tags for this metric. + /// + /// Tags should be in "key:value" format. + pub fn set_tags(&mut self, tags: I) + where + I: Iterator, + S: AsRef, + { + let tagset_id = self.writer.intern_tagset(tags); + self.writer.tags[self.metric_idx] = tagset_id; + } + + /// Sets the resources for this metric. + /// + /// Resources are (type, name) pairs, for example, (`host`, `server1`). + pub fn set_resources(&mut self, resources: &[(&str, &str)]) { + let res_id = self.writer.intern_resources(resources); + self.writer.resources[self.metric_idx] = res_id; + } + + /// Sets the interval for this metric (used for rate metrics). + pub fn set_interval(&mut self, interval: u64) { + self.writer.intervals[self.metric_idx] = interval; + } + + /// Sets the source type name for this metric. + pub fn set_source_type(&mut self, source_type: &str) { + if source_type.is_empty() { + self.writer.source_type_names[self.metric_idx] = 0; + return; + } + let id = self.writer.intern_source_type(source_type); + self.writer.source_type_names[self.metric_idx] = id; + } + + /// Sets the origin metadata for this metric. + pub fn set_origin(&mut self, product: u32, category: u32, service: u32, no_index: bool) { + let id = self + .writer + .intern_origin(product as i32, category as i32, service as i32); + self.writer.origin_infos[self.metric_idx] = id; + if no_index { + self.writer.types[self.metric_idx] |= FLAG_NO_INDEX; + } + } + + /// Sets the unit for this metric. + pub fn set_unit(&mut self, unit: &str) { + if unit.is_empty() { + self.writer.types[self.metric_idx] &= !FLAG_HAS_UNIT; + if let Some(unit_ref_idx) = self.unit_ref_idx.take() { + self.writer.unit_refs.remove(unit_ref_idx); + } + return; + } + + let id = self.writer.intern_unit(unit); + if let Some(unit_ref_idx) = self.unit_ref_idx { + self.writer.unit_refs[unit_ref_idx] = id; + } else { + self.unit_ref_idx = Some(self.writer.unit_refs.len()); + self.writer.unit_refs.push(id); + } + self.writer.types[self.metric_idx] |= FLAG_HAS_UNIT; + } + + /// Adds a data point to this metric. + pub fn add_point(&mut self, timestamp: i64, value: f64) { + self.writer.timestamps.push(timestamp); + self.writer.vals_float64.push(value); + self.writer.num_points[self.metric_idx] += 1; + } + + /// Adds sketch data for a distribution metric. + /// + /// For sketches, the summary values (count, sum, min, max) are stored as points, + /// and the bin keys/counts are stored separately. + pub fn add_sketch( + &mut self, timestamp: i64, count: i64, sum: f64, min: f64, max: f64, bin_keys: &[i32], bin_counts: &[u32], + ) { + self.writer.timestamps.push(timestamp); + + // Count goes in sint64, sum/min/max go in float64 + self.writer.vals_sint64.push(count); + self.writer.vals_float64.push(sum); + self.writer.vals_float64.push(min); + self.writer.vals_float64.push(max); + + // Store bin data + self.writer.sketch_num_bins.push(bin_keys.len() as u64); + + let key_start = self.writer.sketch_bin_keys.len(); + self.writer.sketch_bin_keys.extend_from_slice(bin_keys); + self.writer.sketch_bin_cnts.extend_from_slice(bin_counts); + + // Delta-encode this sketch's bin keys + delta_encode_i32(&mut self.writer.sketch_bin_keys[key_start..]); + + self.writer.num_points[self.metric_idx] += 1; + } + + /// Finalizes this metric. + /// + /// This compacts the point values to use the smallest representation + /// that can hold all values without loss. + pub fn close(mut self) { + self.compact_values(); + } + + fn compact_values(&mut self) { + let count = self.writer.num_points[self.metric_idx] as usize; + if count == 0 { + return; + } + + let start = self.point_start_idx; + let end = self.writer.vals_float64.len(); + + // Determine the best value type for all points in this metric. + let val_ty = value_type_for_values(self.writer.vals_float64[start..end].iter().copied()); + + // Update the type field + self.writer.types[self.metric_idx] |= val_ty.as_u64(); + + // Convert values to the appropriate storage + match val_ty { + V3ValueType::Zero => { + // Values are all zero, don't store anything + self.writer.vals_float64.truncate(start); + } + V3ValueType::Sint64 => { + let is_sketch = (self.writer.types[self.metric_idx] & 0x0F) == V3MetricType::Sketch as u64; + if is_sketch { + // For sketches, vals_sint64 already has one count per point (pushed by add_sketch), + // and vals_float64 has 3 values per point (sum, min, max). When compacting to Sint64, + // we need to interleave them as: sum, min, max, cnt per point. + let counts: Vec = self.writer.vals_sint64[self.sint64_start_idx..].to_vec(); + self.writer.vals_sint64.truncate(self.sint64_start_idx); + for (i, cnt) in counts.into_iter().enumerate() { + let f_off = start + i * 3; + self.writer.vals_sint64.push(self.writer.vals_float64[f_off] as i64); + self.writer.vals_sint64.push(self.writer.vals_float64[f_off + 1] as i64); + self.writer.vals_sint64.push(self.writer.vals_float64[f_off + 2] as i64); + self.writer.vals_sint64.push(cnt); + } + } else { + for i in start..end { + self.writer.vals_sint64.push(self.writer.vals_float64[i] as i64); + } + } + self.writer.vals_float64.truncate(start); + } + V3ValueType::Float32 => { + for i in start..end { + self.writer.vals_float32.push(self.writer.vals_float64[i] as f32); + } + self.writer.vals_float64.truncate(start); + } + V3ValueType::Float64 => { + // Already stored in vals_float64, keep them + } + } + } +} + +fn append_len_str(dst: &mut Vec, s: &str) { + let mut len = s.len() as u64; + loop { + let mut byte = (len & 0x7F) as u8; + len >>= 7; + if len != 0 { + byte |= 0x80; + } + dst.push(byte); + if len == 0 { + break; + } + } + dst.extend_from_slice(s.as_bytes()); +} + +fn delta_encode(s: &mut [i64]) { + if s.len() < 2 { + return; + } + for i in (1..s.len()).rev() { + s[i] -= s[i - 1]; + } +} + +fn delta_encode_i32(s: &mut [i32]) { + if s.len() < 2 { + return; + } + for i in (1..s.len()).rev() { + s[i] -= s[i - 1]; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_delta_encode() { + let mut data = vec![100, 110, 130, 145]; + delta_encode(&mut data); + assert_eq!(data, vec![100, 10, 20, 15]); + } + + #[test] + fn test_delta_encode_empty() { + let mut data: Vec = vec![]; + delta_encode(&mut data); + assert!(data.is_empty()); + } + + #[test] + fn test_delta_encode_single() { + let mut data = vec![42]; + delta_encode(&mut data); + assert_eq!(data, vec![42]); + } + + #[test] + fn test_append_len_str() { + let mut buf = Vec::new(); + append_len_str(&mut buf, "hello"); + // Length 5 = 0x05, then "hello" + assert_eq!(buf, vec![5, b'h', b'e', b'l', b'l', b'o']); + } + + #[test] + fn test_writer_basic() { + let mut writer = V3Writer::new(); + + { + let mut metric = writer.write(V3MetricType::Gauge, "test.metric"); + metric.set_tags(["env:prod", "service:web"].iter().copied()); + metric.add_point(1000, 42.0); + metric.add_point(1010, 43.5); + metric.close(); + } + + let data = writer.finalize_inner(); + + assert_eq!(data.types.len(), 1); + assert_eq!(data.names.len(), 1); + assert_eq!(data.timestamps.len(), 2); + } + + #[test] + fn test_writer_unit() { + let mut writer = V3Writer::new(); + + { + let mut metric = writer.write(V3MetricType::Gauge, "has.unit"); + metric.set_unit("millisecond"); + metric.add_point(1000, 42.0); + metric.close(); + } + { + let mut metric = writer.write(V3MetricType::Gauge, "no.unit"); + metric.add_point(1000, 43.0); + metric.close(); + } + { + let mut metric = writer.write(V3MetricType::Gauge, "same.unit"); + metric.set_unit("millisecond"); + metric.add_point(1000, 44.0); + metric.close(); + } + + let data = writer.finalize_inner(); + + assert_eq!(data.unit_refs, vec![1, 0]); + assert_eq!(data.dict_unit_bytes, b"\x0bmillisecond"); + assert_eq!(data.types[0] & FLAG_HAS_UNIT, FLAG_HAS_UNIT); + assert_eq!(data.types[1] & FLAG_HAS_UNIT, 0); + assert_eq!(data.types[2] & FLAG_HAS_UNIT, FLAG_HAS_UNIT); + } + + #[test] + fn test_writer_multiple_metrics() { + let mut writer = V3Writer::new(); + + { + let mut m1 = writer.write(V3MetricType::Count, "metric1"); + m1.add_point(1000, 10.0); + m1.close(); + } + + { + let mut m2 = writer.write(V3MetricType::Rate, "metric2"); + m2.set_interval(60); + m2.add_point(2000, 20.0); + m2.close(); + } + + let data = writer.finalize_inner(); + + assert_eq!(data.types.len(), 2); + assert_eq!(data.names.len(), 2); + assert_eq!(data.intervals[0], 0); + // Second metric's interval won't be 60 directly since names is delta-encoded, + // but we can verify the structure is correct + } + + #[test] + fn test_value_compaction_zero() { + let mut writer = V3Writer::new(); + + { + let mut metric = writer.write(V3MetricType::Gauge, "zero.metric"); + metric.add_point(1000, 0.0); + metric.add_point(2000, 0.0); + metric.close(); + } + + let data = writer.finalize_inner(); + + // Values should be compacted - zero values don't need storage + assert!(data.vals_float64.is_empty()); + assert!(data.vals_sint64.is_empty()); + assert!(data.vals_float32.is_empty()); + } + + #[test] + fn test_value_compaction_int() { + let mut writer = V3Writer::new(); + + { + let mut metric = writer.write(V3MetricType::Count, "int.metric"); + metric.add_point(1000, 100.0); + metric.add_point(2000, 200.0); + metric.close(); + } + + let data = writer.finalize_inner(); + + // Integer values should be stored in sint64 + assert!(data.vals_float64.is_empty()); + assert_eq!(data.vals_sint64, vec![100, 200]); + assert!(data.vals_float32.is_empty()); + } + + #[test] + fn test_serialize_empty() { + let writer = V3Writer::new(); + let mut output = Vec::new(); + writer.finalize(&mut output).unwrap(); + assert!(output.is_empty()); + } + + #[test] + fn test_value_compaction_large_int_plus_float32() { + // Regression test: a large integer (> 2^24) mixed with a fractional + // float32 value must use Float64, not Float32, to avoid precision loss. + let mut writer = V3Writer::new(); + + { + let mut metric = writer.write(V3MetricType::Gauge, "mixed.metric"); + metric.add_point(1000, (1i64 << 30) as f64); // large int, doesn't fit in f32 + metric.add_point(2000, 1.5); // fractional, fits in f32 + metric.close(); + } + + let data = writer.finalize_inner(); + + // Must be stored in float64, not float32 + assert!( + data.vals_float32.is_empty(), + "large int should not be stored as float32" + ); + assert_eq!(data.vals_float64, vec![(1i64 << 30) as f64, 1.5]); + assert!(data.vals_sint64.is_empty()); + } + + #[test] + fn test_value_compaction_small_int_plus_float32() { + // Small integers (|v| <= 2^24) mixed with float32 values should + // compact to Float32, since small ints fit losslessly in f32. + let mut writer = V3Writer::new(); + + { + let mut metric = writer.write(V3MetricType::Gauge, "small.mixed"); + metric.add_point(1000, 100.0); + metric.add_point(2000, 1.5); + metric.close(); + } + + let data = writer.finalize_inner(); + + assert!(data.vals_float64.is_empty()); + assert_eq!(data.vals_float32, vec![100.0, 1.5]); + assert!(data.vals_sint64.is_empty()); + } + + #[test] + fn test_serialize_basic_metric() { + let mut writer = V3Writer::new(); + + { + let mut metric = writer.write(V3MetricType::Gauge, "test.metric"); + metric.add_point(1000, 42.0); + metric.close(); + } + + let mut output = Vec::new(); + writer.finalize(&mut output).unwrap(); + + // Should produce non-empty output + assert!(!output.is_empty()); + } +} diff --git a/lib/saluki-components/src/forwarders/datadog/mod.rs b/lib/saluki-components/src/forwarders/datadog/mod.rs index b60943bcf23..f9406facd17 100644 --- a/lib/saluki-components/src/forwarders/datadog/mod.rs +++ b/lib/saluki-components/src/forwarders/datadog/mod.rs @@ -17,6 +17,7 @@ use tracing::debug; use crate::common::datadog::{ config::ForwarderConfiguration, io::TransactionForwarder, + protocol::MetricsPayloadInfo, telemetry::ComponentTelemetry, transaction::{Metadata, Transaction}, DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, @@ -151,10 +152,11 @@ impl Forwarder for Datadog { maybe_payload = context.payloads().next() => match maybe_payload { Some(payload) => if let Some(http_payload) = payload.try_into_http_payload() { let (payload_meta, request) = http_payload.into_parts(); - let transaction_meta = Metadata::from_event_and_data_point_count( + let mut transaction_meta = Metadata::from_event_and_data_point_count( payload_meta.event_count(), payload_meta.data_point_count(), ); + transaction_meta.payload_info = payload_meta.get::().copied(); let transaction = Transaction::from_original(transaction_meta, request); forwarder.send_transaction(transaction).await?; @@ -182,6 +184,8 @@ fn get_dd_endpoint_name(uri: &Uri) -> Option { "/api/v1/check_run" => Some(MetaString::from_static("check_run_v1")), "/api/v1/events_batch" => Some(MetaString::from_static("events_batch_v1")), "/api/v0.2/traces" => Some(MetaString::from_static("traces_v0.2")), + "/api/intake/metrics/v3/series" => Some(MetaString::from_static("series_v3")), + "/api/intake/metrics/v3/sketches" => Some(MetaString::from_static("sketches_v3")), _ => None, } } diff --git a/lib/saluki-core/Cargo.toml b/lib/saluki-core/Cargo.toml index b1fc8a7438c..8235c4c020b 100644 --- a/lib/saluki-core/Cargo.toml +++ b/lib/saluki-core/Cargo.toml @@ -9,6 +9,7 @@ repository = { workspace = true } workspace = true [dependencies] +anymap3 = { workspace = true } async-trait = { workspace = true } bitmask-enum = { workspace = true } ddsketch = { workspace = true } diff --git a/lib/saluki-core/src/data_model/payload/metadata.rs b/lib/saluki-core/src/data_model/payload/metadata.rs index 9e8c3458999..fcfced12276 100644 --- a/lib/saluki-core/src/data_model/payload/metadata.rs +++ b/lib/saluki-core/src/data_model/payload/metadata.rs @@ -1,8 +1,16 @@ +use std::any::Any; + +use anymap3::{CloneAny, Map}; + /// Payload metadata. +/// +/// Contains the event count and an extensible map of typed metadata values. +/// Components can store and retrieve arbitrary typed data using the `set` and `get` methods. #[derive(Clone)] pub struct PayloadMetadata { event_count: usize, data_point_count: usize, + extensions: Map, } impl PayloadMetadata { @@ -11,6 +19,7 @@ impl PayloadMetadata { PayloadMetadata { event_count, data_point_count: 0, + extensions: Map::new(), } } @@ -19,6 +28,7 @@ impl PayloadMetadata { PayloadMetadata { event_count, data_point_count, + extensions: Map::new(), } } @@ -31,4 +41,20 @@ impl PayloadMetadata { pub fn data_point_count(&self) -> usize { self.data_point_count } + + /// Gets a reference to a typed extension value, if present. + pub fn get(&self) -> Option<&T> { + self.extensions.get::() + } + + /// Sets a typed extension value, returning `self` for chaining. + pub fn with(mut self, value: T) -> Self { + self.extensions.insert(value); + self + } + + /// Sets a typed extension value in place. + pub fn set(&mut self, value: T) { + self.extensions.insert(value); + } } diff --git a/test/correctness/dsd-plain-v3-validation/config.yaml b/test/correctness/dsd-plain-v3-validation/config.yaml new file mode 100644 index 00000000000..c91eb1af1c7 --- /dev/null +++ b/test/correctness/dsd-plain-v3-validation/config.yaml @@ -0,0 +1,22 @@ +analysis_mode: metrics +millstone: + image: saluki-images/millstone:latest + config_path: millstone.yaml +datadog_intake: + image: saluki-images/datadog-intake:latest + config_path: ../datadog-intake.yaml +baseline: + image: saluki-images/datadog-agent:testing-release + files: + - datadog.yaml:/etc/datadog-agent/datadog.yaml + additional_env_vars: + - DD_API_KEY=correctness-test +comparison: + image: saluki-images/datadog-agent:testing-release + files: + - datadog.yaml:/etc/datadog-agent/datadog.yaml + additional_env_vars: + - DD_API_KEY=correctness-test + - DD_DATA_PLANE_ENABLED=true + - DD_DATA_PLANE_DOGSTATSD_ENABLED=true + - DD_AGGREGATE_CONTEXT_LIMIT=500000 diff --git a/test/correctness/dsd-plain-v3-validation/datadog.yaml b/test/correctness/dsd-plain-v3-validation/datadog.yaml new file mode 100644 index 00000000000..98525edcdd0 --- /dev/null +++ b/test/correctness/dsd-plain-v3-validation/datadog.yaml @@ -0,0 +1,36 @@ +# Using a fixed hostname is both required to avoid errors, and also will ensure consistent tags between DSD/ADP. +hostname: "correctness-testing" + +# Dummy API key. +api_key: dummy-api-key-correctness-testing + +# We have to specifically configure the health port to use. +health_port: 5555 + +# Point ourselves at the datadog-intake service. +dd_url: "http://datadog-intake:2049" + +# Turn off UDP and listen on a UDS socket instead. +dogstatsd_port: 0 +dogstatsd_socket: /airlock/metrics.sock + +# Ensure origin detection is disabled since we can't support it with ADP in standalone mode. +dogstatsd_origin_detection: false + +# Gauges can be processed out-of-order when multiple workers are used, while ADP does not use multiple workers, so ADP +# always ends up with the correct (last seen) value, while DSD might return the last seen value... or the value seen +# four updates ago, etc etc. +dogstatsd_workers_count: 1 + +# Enable V3 metrics encoding in validation mode: both V2 and V3 payloads are sent simultaneously, +# paired by X-Metrics-Request-ID. V3 payloads are counted in the metrics dump; V2 payloads are +# used only for comparison against V3 to validate encoding correctness. +serializer_experimental_use_v3_api: + series: + endpoints: + - "http://datadog-intake:2049" + validate: true + sketches: + endpoints: + - "http://datadog-intake:2049" + validate: true diff --git a/test/correctness/dsd-plain-v3-validation/millstone.yaml b/test/correctness/dsd-plain-v3-validation/millstone.yaml new file mode 100644 index 00000000000..3e0b309eeb5 --- /dev/null +++ b/test/correctness/dsd-plain-v3-validation/millstone.yaml @@ -0,0 +1,91 @@ +seed: + [ + 2, + 3, + 5, + 7, + 11, + 13, + 17, + 19, + 23, + 29, + 31, + 37, + 41, + 43, + 47, + 53, + 59, + 61, + 67, + 71, + 73, + 79, + 83, + 89, + 97, + 101, + 103, + 107, + 109, + 113, + 127, + 131, + ] +target: "unixgram:///airlock/metrics.sock" +aggregation_bucket_width_secs: 10 +volume: 10000 +corpus: + # TODO: This is a little confusing, because we're specifying the number of metrics to generate (which we _will_ + # honor faithfully) but since we're specifying the contexts count in the payload definition, we might not + # actually generate 10,000 unique contexts, but instead somewhere below 3,000, where each of them is repeated a + # few times to reach the total count. + # + # We need to figure that out, since the intent is that specifying a fixed count should lead to that many metrics + # (and no more) being generated, such that you could depend on that for testing purposes. + size: 10000 + payload: + dogstatsd: + contexts: + constant: 3000 + name_length: + inclusive: + min: 4 + max: 8 + tag_length: + inclusive: + min: 4 + max: 8 + tags_per_msg: + inclusive: + min: 2 + max: 4 + value: + float_probability: 0.5 + range: + inclusive: + min: -9999999 + max: 9999999 + multivalue_count: + inclusive: + min: 2 + max: 32 + multivalue_pack_probability: 0.08 + kind_weights: + metric: 100 + event: 0 + service_check: 0 + # Weights based on analyzing internal Datadog usage data of metric type for metrics sent to the Agent over DogStatsD. + metric_weights: + count: 208 + gauge: 66 + timer: 0 + distribution: 72 + # We specifically _don't_ want to generate sets, because we can't assert their correctness once they've been + # aggregated: a gauge is generated for each aggregator flush that represents the unique number of values in a + # given set, but in general, gauges are meant to be last-write-wins, so unless the metric names/tags can + # indicate that they're for a set, we can't know that it's safe for us to _aggregate_ the gauge values, and with + # our default behavior of taking the latest gauge value... we end up with non-deterministic results. + set: 0 + histogram: 1 diff --git a/test/correctness/dsd-plain-v3/config.yaml b/test/correctness/dsd-plain-v3/config.yaml new file mode 100644 index 00000000000..c91eb1af1c7 --- /dev/null +++ b/test/correctness/dsd-plain-v3/config.yaml @@ -0,0 +1,22 @@ +analysis_mode: metrics +millstone: + image: saluki-images/millstone:latest + config_path: millstone.yaml +datadog_intake: + image: saluki-images/datadog-intake:latest + config_path: ../datadog-intake.yaml +baseline: + image: saluki-images/datadog-agent:testing-release + files: + - datadog.yaml:/etc/datadog-agent/datadog.yaml + additional_env_vars: + - DD_API_KEY=correctness-test +comparison: + image: saluki-images/datadog-agent:testing-release + files: + - datadog.yaml:/etc/datadog-agent/datadog.yaml + additional_env_vars: + - DD_API_KEY=correctness-test + - DD_DATA_PLANE_ENABLED=true + - DD_DATA_PLANE_DOGSTATSD_ENABLED=true + - DD_AGGREGATE_CONTEXT_LIMIT=500000 diff --git a/test/correctness/dsd-plain-v3/datadog.yaml b/test/correctness/dsd-plain-v3/datadog.yaml new file mode 100644 index 00000000000..f2bed674a08 --- /dev/null +++ b/test/correctness/dsd-plain-v3/datadog.yaml @@ -0,0 +1,35 @@ +# Using a fixed hostname is both required to avoid errors, and also will ensure consistent tags between DSD/ADP. +hostname: "correctness-testing" + +# Dummy API key. +api_key: dummy-api-key-correctness-testing + +# We have to specifically configure the health port to use. +health_port: 5555 + +# Point ourselves at the datadog-intake service. +dd_url: "http://datadog-intake:2049" + +# Turn off UDP and listen on a UDS socket instead. +dogstatsd_port: 0 +dogstatsd_socket: /airlock/metrics.sock + +# Ensure origin detection is disabled since we can't support it with ADP in standalone mode. +dogstatsd_origin_detection: false + +# Gauges can be processed out-of-order when multiple workers are used, while ADP does not use multiple workers, so ADP +# always ends up with the correct (last seen) value, while DSD might return the last seen value... or the value seen +# four updates ago, etc etc. +dogstatsd_workers_count: 1 + +# Enable V3 metrics encoding for all endpoints. +# +# We leave validation mode off since we want to focus on just the V3 metrics, and we don't yet have a way to separate +# the V3 metrics from the V2 metrics in order to emulate validation done on the backend. +serializer_experimental_use_v3_api: + series: + endpoints: + - "http://datadog-intake:2049" + sketches: + endpoints: + - "http://datadog-intake:2049" diff --git a/test/correctness/dsd-plain-v3/millstone.yaml b/test/correctness/dsd-plain-v3/millstone.yaml new file mode 100644 index 00000000000..3e0b309eeb5 --- /dev/null +++ b/test/correctness/dsd-plain-v3/millstone.yaml @@ -0,0 +1,91 @@ +seed: + [ + 2, + 3, + 5, + 7, + 11, + 13, + 17, + 19, + 23, + 29, + 31, + 37, + 41, + 43, + 47, + 53, + 59, + 61, + 67, + 71, + 73, + 79, + 83, + 89, + 97, + 101, + 103, + 107, + 109, + 113, + 127, + 131, + ] +target: "unixgram:///airlock/metrics.sock" +aggregation_bucket_width_secs: 10 +volume: 10000 +corpus: + # TODO: This is a little confusing, because we're specifying the number of metrics to generate (which we _will_ + # honor faithfully) but since we're specifying the contexts count in the payload definition, we might not + # actually generate 10,000 unique contexts, but instead somewhere below 3,000, where each of them is repeated a + # few times to reach the total count. + # + # We need to figure that out, since the intent is that specifying a fixed count should lead to that many metrics + # (and no more) being generated, such that you could depend on that for testing purposes. + size: 10000 + payload: + dogstatsd: + contexts: + constant: 3000 + name_length: + inclusive: + min: 4 + max: 8 + tag_length: + inclusive: + min: 4 + max: 8 + tags_per_msg: + inclusive: + min: 2 + max: 4 + value: + float_probability: 0.5 + range: + inclusive: + min: -9999999 + max: 9999999 + multivalue_count: + inclusive: + min: 2 + max: 32 + multivalue_pack_probability: 0.08 + kind_weights: + metric: 100 + event: 0 + service_check: 0 + # Weights based on analyzing internal Datadog usage data of metric type for metrics sent to the Agent over DogStatsD. + metric_weights: + count: 208 + gauge: 66 + timer: 0 + distribution: 72 + # We specifically _don't_ want to generate sets, because we can't assert their correctness once they've been + # aggregated: a gauge is generated for each aggregator flush that represents the unique number of values in a + # given set, but in general, gauges are meant to be last-write-wins, so unless the metric names/tags can + # indicate that they're for a set, we can't know that it's safe for us to _aggregate_ the gauge values, and with + # our default behavior of taking the latest gauge value... we end up with non-deterministic results. + set: 0 + histogram: 1