Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ test-all: test test-property test-docs test-miri test-loom

.PHONY: test-correctness
test-correctness: ## Runs the complete correctness suite
test-correctness: test-correctness-dsd-plain test-correctness-dsd-origin-detection test-correctness-otlp-metrics test-correctness-otlp-traces test-correctness-otlp-traces-ottl-filtering test-correctness-otlp-traces-ottl-transform
test-correctness: test-correctness-dsd-plain test-correctness-dsd-origin-detection test-correctness-dsd-tag-filterlist test-correctness-otlp-metrics test-correctness-otlp-traces test-correctness-otlp-traces-ottl-filtering test-correctness-otlp-traces-ottl-transform

.PHONY: test-correctness-dsd-plain
test-correctness-dsd-plain: build-ground-truth
Expand All @@ -535,6 +535,12 @@ test-correctness-dsd-origin-detection: ## Runs the 'dsd-origin-detection' correc
@echo "[*] Running 'dsd-origin-detection' correctness test case..."
@target/release/ground-truth $(shell pwd)/test/correctness/dsd-origin-detection/config.yaml

.PHONY: test-correctness-dsd-tag-filterlist
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we reduce code duplication by having the correctness test "recipe" stated only once then sending the name into it?

  echo-anything:
        @echo $(NAME)

  hello:
        $(MAKE) echo-anything NAME="Hello, World"

  goodbye:
        $(MAKE) echo-anything NAME="Goodbye, World"

  farewell:
        $(MAKE) echo-anything NAME="Farewell, friend"

test-correctness-dsd-tag-filterlist: build-ground-truth
test-correctness-dsd-tag-filterlist: ## Runs the 'dsd-tag-filterlist' correctness test case
@echo "[*] Running 'dsd-tag-filterlist' correctness test case..."
@target/release/ground-truth $(shell pwd)/test/correctness/dsd-tag-filterlist/config.yaml

.PHONY: test-correctness-otlp-metrics
test-correctness-otlp-metrics: build-ground-truth
test-correctness-otlp-metrics: ## Runs the 'otlp-metrics' correctness test case
Expand Down
54 changes: 47 additions & 7 deletions bin/correctness/ground-truth/src/analysis/metrics/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,16 @@ impl NormalizedMetric {
/// # Errors
///
/// If the raw values are empty, or if the values cannot be normalized, an error is returned.
pub fn try_from_values(
context: MetricContext, mut raw_values: Vec<(u64, MetricValue)>,
fn try_from_normalized_values(
context: NormalizedMetricContext, mut raw_values: Vec<(u64, MetricValue)>,
) -> Result<Self, GenericError> {
// We need to first sort the raw values by timestamp, to ensure we have proper ordering semantics.
raw_values.sort_by(|a, b| a.0.cmp(&b.0));
let value = try_normalize_values(&raw_values)
.with_error_context(|| format!("Failed to normalize values for metric '{}'", context.name()))?;

Ok(Self {
context: NormalizedMetricContext::from_stele_context(context),
context,
raw_values,
value,
})
Expand Down Expand Up @@ -176,23 +176,24 @@ impl NormalizedMetrics {
return Err(generic_error!("Cannot normalize an empty set of metrics."));
}

// Aggregate metric values by (context, type), using a `BTreeMap` so that we can get sorted metrics for ~free.
// Aggregate metric values by normalized (context, type), using a `BTreeMap` so that metric ordering is stable
// even when the original intake payload emits the same tag set in a different order.
//
// We group by type because metrics with the same context but different types (e.g., Count vs Rate) cannot be
// merged together during normalization.
let mut aggregated_context_values = BTreeMap::new();

for metric in metrics {
let context = metric.context();
let context = NormalizedMetricContext::from_stele_context(metric.context().clone());
let metric_type = metric_value_type(metric.values());
let key = (context.clone(), metric_type);
let key = (context, metric_type);
let context_values = aggregated_context_values.entry(key).or_insert_with(Vec::new);
context_values.extend_from_slice(metric.values());
}

let metrics = aggregated_context_values
.into_iter()
.map(|((context, _type), values)| NormalizedMetric::try_from_values(context, values))
.map(|((context, _type), values)| NormalizedMetric::try_from_normalized_values(context, values))
.try_fold(Vec::new(), |mut metrics, maybe_metric| {
metrics.push(maybe_metric?);
Ok::<_, GenericError>(metrics)
Expand Down Expand Up @@ -316,3 +317,42 @@ fn try_normalize_values(raw_values: &[(u64, MetricValue)]) -> Result<MetricValue

Ok(current_value)
}

#[cfg(test)]
mod tests {
use serde_json::json;
use stele::Metric;

use super::NormalizedMetrics;

fn metric(name: &str, tags: &[&str], points: &[(u64, f64)]) -> Metric {
serde_json::from_value(json!({
"context": {
"name": name,
"tags": tags,
},
"values": points
.iter()
.map(|(ts, value)| json!([ts, {"mtype": "Count", "value": value}]))
.collect::<Vec<_>>(),
}))
.expect("metric should deserialize")
}

#[test]
fn normalizes_context_order_before_grouping_metrics() {
let metrics = vec![
metric("tagfilter.miss", &["pod:pod-a", "env:prod"], &[(10, 2.0)]),
metric("tagfilter.miss", &["env:prod", "pod:pod-a"], &[(20, 3.0)]),
];

let normalized = NormalizedMetrics::try_from_stele_metrics(&metrics).expect("metrics should normalize");

assert_eq!(normalized.len(), 1);
assert_eq!(
normalized.metrics()[0].context().to_string(),
"tagfilter.miss[env:prod, pod:pod-a]"
);
assert_eq!(normalized.metrics()[0].raw_values().len(), 2);
}
}
4 changes: 2 additions & 2 deletions lib/saluki-components/src/transforms/tag_filterlist/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use saluki_core::{
transforms::{Transform, TransformBuilder, TransformContext},
ComponentContext,
},
data_model::event::EventType,
data_model::event::{metric::Metric, EventType},
topology::OutputDefinition,
};
use saluki_error::GenericError;
Expand Down Expand Up @@ -215,7 +215,7 @@ fn apply_tag_filter(tags: &SharedTagSet, is_exclude: bool, names: &HashSet<Strin
/// If the metric name is not present in `filters`, the metric is left unchanged.
/// If filtering would not change any tags, the metric context is left untouched (zero allocations).
#[inline]
pub fn filter_metric_tags(metric: &mut saluki_core::data_model::event::metric::Metric, filters: &CompiledFilters) {
pub fn filter_metric_tags(metric: &mut Metric, filters: &CompiledFilters) {
let Some((is_exclude, tag_names)) = filters.get(metric.context().name().as_ref()) else {
return;
};
Expand Down
22 changes: 22 additions & 0 deletions test/correctness/dsd-tag-filterlist/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
analysis_mode: metrics
millstone:
image: saluki-images/millstone:latest
config_path: millstone.yaml
datadog_intake:
image: saluki-images/datadog-intake:latest
config_path: ../datadog-intake.yaml
baseline:
image: registry.datadoghq.com/agent:7.76.2-jmx
files:
- datadog.yaml:/etc/datadog-agent/datadog.yaml
additional_env_vars:
- DD_API_KEY=correctness-test
comparison:
image: saluki-images/datadog-agent:testing-release
files:
- datadog.yaml:/etc/datadog-agent/datadog.yaml
additional_env_vars:
- DD_API_KEY=correctness-test
- DD_DATA_PLANE_ENABLED=true
- DD_DATA_PLANE_DOGSTATSD_ENABLED=true
- DD_AGGREGATE_CONTEXT_LIMIT=500000
38 changes: 38 additions & 0 deletions test/correctness/dsd-tag-filterlist/datadog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Using a fixed hostname is both required to avoid errors, and also ensures
# consistent tags between the baseline Agent and the ADP comparison target.
hostname: "correctness-testing"

# Dummy API key.
api_key: dummy-api-key-correctness-testing

# We have to specifically configure the health port to use.
health_port: 5555

# Point ourselves at the datadog-intake service.
dd_url: "http://datadog-intake:2049"

# Turn off UDP and listen on a UDS socket instead.
dogstatsd_port: 0
dogstatsd_socket: /airlock/metrics.sock

# Keep origin detection enabled to stay closer to real DogStatsD behavior.
#
# Note: the current corpus still primarily exercises instrumented-tag filtering;
# enabling origin detection did not change the final comparable metric set in the
# latest parity run.
dogstatsd_origin_detection: true

# Gauges can be processed out-of-order when multiple workers are used, while ADP
# does not use multiple workers. Keep worker count at 1 to remove that
# difference from this parity test.
dogstatsd_workers_count: 1

metric_tag_filterlist:
- metric_name: tagfilter.exclude
action: exclude
tags:
- pod
- metric_name: tagfilter.include
action: include
tags:
- env
56 changes: 56 additions & 0 deletions test/correctness/dsd-tag-filterlist/millstone.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
seed: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131]
target: "unixgram:///airlock/metrics.sock"
aggregation_bucket_width_secs: 10
volume: 10000
corpus:
size: 10000
payload:
dogstatsd:
contexts:
constant: 256
metric_names:
- "tagfilter.exclude"
- "tagfilter.include"
- "tagfilter.miss"
tag_names:
- "env"
- "pod"
tag_values:
- "prod"
- "staging"
- "pod-a"
- "pod-b"
- "pod-c"
name_length:
inclusive:
min: 1
max: 32
tag_length:
inclusive:
min: 3
max: 16
tags_per_msg:
constant: 2
unique_tag_ratio: 1.0
value:
float_probability: 0.5
range:
inclusive:
min: -9999999
max: 9999999
multivalue_count:
inclusive:
min: 2
max: 32
multivalue_pack_probability: 0.08
kind_weights:
metric: 100
event: 0
service_check: 0
metric_weights:
count: 0
gauge: 0
timer: 0
distribution: 1
set: 0
histogram: 0
Loading