From bc9e3b977c7b662c8d2e404a4cfd154ab58e62d6 Mon Sep 17 00:00:00 2001 From: "Brian L. Troutwine" Date: Sun, 31 May 2026 03:34:57 +0000 Subject: [PATCH] DogStatsD load generation This commit introduces DogStatD load generation. If you're familiar with my work in lading you might notice this is very different. The implementation here is responsible for giving Antithesis choices between 'compliant' dogstatsd lines and 'feral' lines which are _mechanically_ allowable lines but are in practice not sensible. Also of interest control of contexts and send rates are jettisoned entirely, that's an antithesis scheduling concern. Metrics, events and service-checks are all present. Depending on the antithesis shots I may need to adjust the ratios of compliant to feral outcomes. --- Cargo.lock | 2 + test/antithesis/deploy/Dockerfile | 21 +- test/antithesis/deploy/docker-compose.yaml | 1 - test/antithesis/harness/Cargo.toml | 2 + .../src/bin/first_sample_config/config.rs | 10 +- .../src/bin/parallel_driver_send_dogstatsd.rs | 117 +++++----- test/antithesis/harness/src/lib.rs | 1 + test/antithesis/harness/src/payload.rs | 3 + .../harness/src/payload/dogstatsd.rs | 86 +++++++ .../harness/src/payload/dogstatsd/common.rs | 209 ++++++++++++++++++ .../harness/src/payload/dogstatsd/events.rs | 85 +++++++ .../harness/src/payload/dogstatsd/metrics.rs | 116 ++++++++++ .../src/payload/dogstatsd/service_checks.rs | 47 ++++ test/antithesis/harness/src/rand.rs | 158 ++++++++++++- test/antithesis/scratchbook/bug-ledger.md | 18 +- .../scratchbook/deployment-topology.md | 28 +-- .../scratchbook/existing-assertions.md | 55 +++-- .../scratchbook/property-catalog.md | 28 ++- 18 files changed, 865 insertions(+), 122 deletions(-) create mode 100644 test/antithesis/harness/src/payload.rs create mode 100644 test/antithesis/harness/src/payload/dogstatsd.rs create mode 100644 test/antithesis/harness/src/payload/dogstatsd/common.rs create mode 100644 test/antithesis/harness/src/payload/dogstatsd/events.rs create mode 100644 test/antithesis/harness/src/payload/dogstatsd/metrics.rs create mode 100644 test/antithesis/harness/src/payload/dogstatsd/service_checks.rs diff --git a/Cargo.lock b/Cargo.lock index 26ac5d08ab..5e8002cb05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1679,9 +1679,11 @@ dependencies = [ "antithesis_sdk", "anyhow", "clap", + "itoa", "num-traits", "rand 0.10.1", "rand_distr", + "ryu", "serde", "serde_json", "serde_yaml", diff --git a/test/antithesis/deploy/Dockerfile b/test/antithesis/deploy/Dockerfile index d8de1bb5c0..05ada256c8 100644 --- a/test/antithesis/deploy/Dockerfile +++ b/test/antithesis/deploy/Dockerfile @@ -5,7 +5,7 @@ # Build context is the repository root. Three named targets: # - adp : agent-data-plane built WITH Antithesis coverage instrumentation + SDK (the SUT) # - intake : datadog-intake mock Datadog intake (dependency) -# - workload : millstone load generator + test templates + setup-complete (the client) +# - workload : DogStatsD driver + test templates + setup-complete (the client) # # ADP is built native x86_64-unknown-linux-gnu (glibc), so no musl cross-compile headers are needed. @@ -67,7 +67,7 @@ RUN --mount=type=cache,target=/adp/target,id=antithesis-adp-target \ echo "Instrumentation symbols present." # --------------------------------------------------------------------------- -# Build the correctness tools (datadog-intake + millstone), uninstrumented. +# Build the correctness tools (datadog-intake) and the test-command binaries, uninstrumented. # These are supporting harness components, not the SUT, so they need no coverage instrumentation. # --------------------------------------------------------------------------- FROM build-base AS tools-builder @@ -77,11 +77,10 @@ RUN --mount=type=cache,target=/tools/target,id=antithesis-tools-target \ --mount=type=cache,target=/root/.cargo/registry,id=cargo-registry \ --mount=type=cache,target=/root/.cargo/git,id=cargo-git \ cargo build --release \ - --bin datadog-intake --bin millstone \ + --bin datadog-intake \ --bin parallel_driver_send_dogstatsd --bin finally_verify_delivery --bin eventually_adp_alive \ --bin first_sample_config && \ cp /tools/target/release/datadog-intake /usr/local/bin/datadog-intake && \ - cp /tools/target/release/millstone /usr/local/bin/millstone && \ cp /tools/target/release/parallel_driver_send_dogstatsd /usr/local/bin/parallel_driver_send_dogstatsd && \ cp /tools/target/release/finally_verify_delivery /usr/local/bin/finally_verify_delivery && \ cp /tools/target/release/eventually_adp_alive /usr/local/bin/eventually_adp_alive && \ @@ -91,19 +90,18 @@ RUN --mount=type=cache,target=/tools/target,id=antithesis-tools-target \ # Runtime: Agent Data Plane (SUT). # --------------------------------------------------------------------------- FROM ${APP_IMAGE} AS adp -ENV NO_COLOR=1 \ - RUST_BACKTRACE=1 +ENV NO_COLOR=1 RUN apt-get update && \ apt-get install --no-install-recommends -y ca-certificates openssl && \ rm -rf /var/lib/apt/lists/* COPY --from=adp-builder /usr/local/bin/agent-data-plane /usr/local/bin/agent-data-plane # Expose DWARF/build-id symbols to Antithesis for symbolization (one-hop symlink to the unstripped binary). RUN mkdir -p /symbols && ln -s /usr/local/bin/agent-data-plane /symbols/agent-data-plane -# main.rs requires the bootstrap config file to exist at the default path; ship a minimal standalone -# config as a fallback. The boot wrapper overwrites it with the per-replay config written by the -# `first_sample_config` workload command onto the shared `agent-config` volume. +# main.rs requires a config file at the default path. Ship a minimal standalone config as a +# fallback. The boot wrapper overwrites it with the per-timeline config that first_sample_config +# samples onto the shared `agent-config` volume. COPY test/antithesis/deploy/adp/datadog.yaml /etc/datadog-agent/datadog.yaml -# Boot wrapper: waits for the drawn config sentinel, copies the config into place, then execs ADP. +# Boot wrapper waits for the config sentinel, copies the config into place, then execs ADP. COPY --chmod=755 test/antithesis/deploy/adp/entrypoint.sh /entrypoint.sh # ADP's control-plane secure API requires an IPC TLS cert (a single PEM holding both certificate and # private key) that the Core Agent normally generates. In standalone mode there is no Core Agent, so @@ -126,7 +124,7 @@ COPY --from=tools-builder /usr/local/bin/datadog-intake /usr/local/bin/datadog-i ENTRYPOINT ["/usr/local/bin/datadog-intake"] # --------------------------------------------------------------------------- -# Runtime: workload client (millstone load generator + test templates). +# Runtime: workload client (DogStatsD driver + test templates). # --------------------------------------------------------------------------- FROM ${APP_IMAGE} AS workload ENV NO_COLOR=1 @@ -134,7 +132,6 @@ RUN test -d /usr/share/ca-certificates || ( \ apt-get update && \ apt-get install --no-install-recommends -y ca-certificates && \ rm -rf /var/lib/apt/lists/* ) -COPY --from=tools-builder /usr/local/bin/millstone /usr/local/bin/millstone # Antithesis setup-complete helper and test templates (helper files + the "main" template dir). COPY --chmod=755 test/antithesis/deploy/workload/setup-complete.sh /opt/antithesis/setup-complete.sh COPY test/antithesis/deploy/workload/test/ /opt/antithesis/test/ diff --git a/test/antithesis/deploy/docker-compose.yaml b/test/antithesis/deploy/docker-compose.yaml index 6e3b3bf9a7..4a54c2b95b 100644 --- a/test/antithesis/deploy/docker-compose.yaml +++ b/test/antithesis/deploy/docker-compose.yaml @@ -33,7 +33,6 @@ services: command: ["run"] environment: NO_COLOR: "1" - RUST_BACKTRACE: "1" DD_API_KEY: "antithesis-test-api-key" DD_DATA_PLANE_ENABLED: "true" DD_DATA_PLANE_STANDALONE_MODE: "true" diff --git a/test/antithesis/harness/Cargo.toml b/test/antithesis/harness/Cargo.toml index 9d35aa056c..c7d13dcd35 100644 --- a/test/antithesis/harness/Cargo.toml +++ b/test/antithesis/harness/Cargo.toml @@ -16,9 +16,11 @@ clap = { workspace = true, features = [ "std", "usage", ] } +itoa = { workspace = true } num-traits = { workspace = true } rand = { workspace = true } rand_distr = { workspace = true } +ryu = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { workspace = true } diff --git a/test/antithesis/harness/src/bin/first_sample_config/config.rs b/test/antithesis/harness/src/bin/first_sample_config/config.rs index d84f0756ea..eaff67e535 100644 --- a/test/antithesis/harness/src/bin/first_sample_config/config.rs +++ b/test/antithesis/harness/src/bin/first_sample_config/config.rs @@ -64,17 +64,17 @@ impl Distribution for Probe { #[derive(Debug, Clone, Copy, Serialize)] #[serde(rename_all = "lowercase")] pub(crate) enum LogLevel { - /// Warnings and above. - Warn, - /// Errors only. + /// Errors only — the quietest level that still logs. Error, + /// No logs at all — the floor of the log-output budget. + Off, } impl Distribution for StandardUniform { fn sample(&self, rng: &mut R) -> LogLevel { match rng.random_range(0..2u8) { - 0 => LogLevel::Warn, - _ => LogLevel::Error, + 0 => LogLevel::Error, + _ => LogLevel::Off, } } } diff --git a/test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs b/test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs index 9f19dd7c59..e0d177a886 100644 --- a/test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs +++ b/test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs @@ -1,9 +1,6 @@ -//! Antithesis `parallel_driver_` test command: sends a batch of `DogStatsD` metrics to ADP. -//! -//! Draws a per-timeline cardinality regime (swarm biasing) and a batch size, then sends metrics over -//! UDS. The high-cardinality regime floods distinct aggregation contexts, targeting the -//! `rss-bounded-under-cardinality` property (ADP's memory limiter is disabled by default, so RSS can -//! grow without bound under sustained high cardinality). +//! Feral `DogStatsD` load generator: pick a batch size, then fire that many +//! sampled metric lines at the socket and exit. Antithesis runs many of these +//! in parallel to drive concurrency and push context limits. use std::os::unix::net::UnixDatagram; use std::path::{Path, PathBuf}; @@ -11,10 +8,10 @@ use std::thread::sleep; use std::time::{Duration, Instant}; use antithesis_sdk::prelude::*; -use antithesis_sdk::random::AntithesisRng; -use anyhow::Context as _; +use antithesis_sdk::random::{random_choice, AntithesisRng}; use clap::Parser; -use rand::{rand_core::UnwrapErr, seq::IndexedRandom as _, RngExt as _}; +use harness::payload::dogstatsd; +use rand::{rand_core::UnwrapErr, RngExt}; use serde_json::json; #[derive(Debug, Parser)] @@ -28,11 +25,12 @@ struct Config { dogstatsd_socket: PathBuf, } -#[derive(Clone, Copy, Debug)] -enum Cardinality { - Low, - Medium, - High, +/// Per-batch composition: 50% clean, 25% feral, 25% mixed. +#[derive(Clone, Copy)] +enum Batch { + Clean, + Feral, + Mixed, } fn main() -> anyhow::Result<()> { @@ -40,69 +38,72 @@ fn main() -> anyhow::Result<()> { let config = Config::try_parse()?; let mut rng = UnwrapErr(AntithesisRng); - let regimes = [Cardinality::Low, Cardinality::Medium, Cardinality::High]; - let regime = *regimes - .choose(&mut rng) - .context("cardinality regime choices must not be empty")?; - let regime_label = match regime { - Cardinality::Low => "low", - Cardinality::Medium => "medium", - Cardinality::High => "high", - }; - let count: u64 = rng.random_range(50..=2000); - let socket = connect_with_retry(&config.dogstatsd_socket)?; + // Socket unavailable (ADP booting, or a fault). No-op exit, not a failure. + let Some(socket) = connect_with_retry(&config.dogstatsd_socket) else { + return Ok(()); + }; - let names = ["adp.test.foo", "adp.test.bar", "adp.test.balkajsldfkjasdlfkjasdfz"]; - let metric_types = ["c", "g"]; + let batch = match random_choice(&[Batch::Clean, Batch::Clean, Batch::Feral, Batch::Mixed]) { + Some(Batch::Feral) => Batch::Feral, + Some(Batch::Mixed) => Batch::Mixed, + _ => Batch::Clean, + }; + let count = rng.random_range(0..=10_000u64); + let mut line: Vec = Vec::new(); let mut attempted = 0usize; - for i in 0..count { - let name = *names - .choose(&mut rng) - .context("metric name choices must not be empty")?; - let metric_type = *metric_types - .choose(&mut rng) - .context("metric type choices must not be empty")?; - let value: u64 = rng.random_range(0..=1000); - let tag = match regime { - Cardinality::Low => format!("host:h{}", rng.random_range(0..4)), - Cardinality::Medium => format!("host:h{}", rng.random_range(0..256)), - Cardinality::High => format!("uid:{i}-{}", rng.random::()), + for _ in 0..count { + let vibe = match batch { + Batch::Clean => dogstatsd::Vibe::Clean, + Batch::Feral => dogstatsd::Vibe::Feral, + Batch::Mixed => dogstatsd::sample_vibe(), }; - let line = format!("{name}:{value}|{metric_type}|#{tag}\n"); - if socket.send(line.as_bytes()).is_ok() { + dogstatsd::send(&mut rng, &mut line, vibe); + if socket.send(&line).is_ok() { attempted += 1; } } assert_reachable!( - "workload sent a dogstatsd batch", - &json!({ - "attempted": attempted, - "regime": regime_label, - "socket": config.dogstatsd_socket.display().to_string(), - }) + "workload ran a dogstatsd batch", + &json!({ "attempted": attempted, "dogstatsd_socket": config.dogstatsd_socket.display().to_string() }) + ); + assert_sometimes!( + attempted > 0, + "workload delivered a dogstatsd line", + &json!({ "attempted": attempted }) ); - - // Confirm timelines sometimes drive a high-cardinality flood (the interesting case for memory). assert_sometimes!( - matches!(regime, Cardinality::High), - "workload drove a high-cardinality dogstatsd flood", + attempted > 0 && matches!(batch, Batch::Clean), + "workload ran a fully clean batch", + &json!({ "attempted": attempted }) + ); + assert_sometimes!( + attempted > 0 && matches!(batch, Batch::Feral), + "workload ran a fully feral batch", + &json!({ "attempted": attempted }) + ); + assert_sometimes!( + attempted > 0 && matches!(batch, Batch::Mixed), + "workload ran a mixed batch", &json!({ "attempted": attempted }) ); Ok(()) } -// Wait for ADP to bind the socket, intentionally naive. -fn connect_with_retry(path: &Path) -> anyhow::Result { +/// Wait for ADP to bind the socket, intentionally naive. +fn connect_with_retry(path: &Path) -> Option { let deadline = Instant::now() + Duration::from_secs(30); loop { - let socket = UnixDatagram::unbound()?; - match socket.connect(path) { - Ok(()) => return Ok(socket), - Err(_) if Instant::now() < deadline => sleep(Duration::from_millis(250)), - Err(e) => return Err(e).with_context(|| format!("ADP did not bind {} within 30s", path.display())), + if let Ok(socket) = UnixDatagram::unbound() { + if socket.connect(path).is_ok() { + return Some(socket); + } + } + if Instant::now() >= deadline { + return None; } + sleep(Duration::from_millis(250)); } } diff --git a/test/antithesis/harness/src/lib.rs b/test/antithesis/harness/src/lib.rs index 8c05e117db..ebc75125fa 100644 --- a/test/antithesis/harness/src/lib.rs +++ b/test/antithesis/harness/src/lib.rs @@ -1,4 +1,5 @@ //! Shared helpers for the Antithesis harness, used by the `src/bin/*` test //! commands. +pub mod payload; pub mod rand; diff --git a/test/antithesis/harness/src/payload.rs b/test/antithesis/harness/src/payload.rs new file mode 100644 index 0000000000..515e4ac842 --- /dev/null +++ b/test/antithesis/harness/src/payload.rs @@ -0,0 +1,3 @@ +//! Payload generators for the protocols under test. + +pub mod dogstatsd; diff --git a/test/antithesis/harness/src/payload/dogstatsd.rs b/test/antithesis/harness/src/payload/dogstatsd.rs new file mode 100644 index 0000000000..060bd50b21 --- /dev/null +++ b/test/antithesis/harness/src/payload/dogstatsd.rs @@ -0,0 +1,86 @@ +//! `DogStatsD` payload generation. + +// Here's the basic idea. +// +// Dogstatsd is three message types: +// +// * metric +// * event +// * service check +// +// # Metrics +// +// :||@|#,...|c:|T|e:|card: +// +// Required: :|. +// +// * := [^:|\n]+ +// * := (:)* ':'-packed multi-value, non-set +// | [^|\n]+ raw string, set type +// * := [+-]?(\d+\.?\d*|\.\d+)([eE][+-]?\d+)? | [+-]?(inf|infinity|nan) +// * := c|g|ms|h|s|d count gauge timer histogram set distribution +// * := @ +// * := [^,|\n]+ conventionally :, the ':' is not required +// * := c:[^|\n]+ e.g. ci-, in- +// * := T\d+ unix seconds +// * := e:[^|\n]+ e.g. it-,cn-,pu- +// * := card:[^|\n]+ recognized: none|low|orchestrator|high +// +// # Events +// +// _e{,}:|<TEXT>|d:<TS>|h:<HOST>|k:<AGGKEY>|p:<PRIO>|s:<SRC>|t:<ALERT>|#<TAGS> +// +// Required: _e{<TITLE_LEN>,<TEXT_LEN>}:<TITLE>|<TEXT>. c: / e: / card: are valid here too. +// +// * <TITLE_LEN>, +// <TEXT_LEN> := \d+ byte length of TITLE / TEXT +// * <TITLE>, +// <TEXT> := [^\n]{LEN} length-delimited, so '|' and ':' are allowed; '\\n' -> newline +// * <TS> := d:\d+ unix seconds +// * <HOST> := h:[^|\n]+ +// * <AGGKEY> := k:[^|\n]+ +// * <PRIO> := p:[^|\n]+ recognized: normal|low (else default) +// * <SRC> := s:[^|\n]+ +// * <ALERT> := t:[^|\n]+ recognized: error|warning|info|success (else default) +// * <TAGS> := #<TAG>(,<TAG>)* +// +// # Service checks +// +// _sc|<NAME>|<STATUS>|d:<TS>|h:<HOST>|#<TAG>,<TAG>...|m:<MESSAGE> +// +// Required: _sc|<NAME>|<STATUS>. c: / e: / card: are valid here too. +// +// * <NAME> := [^|\n]+ +// * <STATUS> := [0-3] OK warning critical unknown +// * <TS> := d:\d+ unix seconds +// * <HOST> := h:[^|\n]+ +// * <TAGS> := #<TAG>(,<TAG>)* +// * <MESSAGE> := m:[^|\n]+ + +use antithesis_sdk::random::random_choice; +use rand::Rng; + +mod common; +mod events; +mod metrics; +mod service_checks; + +pub use common::{sample_vibe, Vibe}; + +/// The three `DogStatsD` message types. +#[derive(Clone, Copy)] +enum Message { + Metric, + Event, + ServiceCheck, +} + +/// Write one `DogStatsD` message of a random type to `buf` at the given vibe. +pub fn send<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe) { + buf.clear(); + match random_choice(&[Message::Metric, Message::Event, Message::ServiceCheck]) { + Some(Message::Event) => events::write(rng, buf, vibe), + Some(Message::ServiceCheck) => service_checks::write(rng, buf, vibe), + _ => metrics::write(rng, buf, vibe), + } +} diff --git a/test/antithesis/harness/src/payload/dogstatsd/common.rs b/test/antithesis/harness/src/payload/dogstatsd/common.rs new file mode 100644 index 0000000000..1d18ef959b --- /dev/null +++ b/test/antithesis/harness/src/payload/dogstatsd/common.rs @@ -0,0 +1,209 @@ +//! Shared `DogStatsD` payload sampling: vibe, segment and number builders, tags. + +use antithesis_sdk::random::random_choice; +use rand::distr::Distribution; +use rand::Rng; + +use crate::rand::Boundary; + +/// Clean by-the-book output, or feral. +#[derive(Clone, Copy, Debug)] +pub enum Vibe { + /// Well-formed. + Clean, + /// Aberrant. + Feral, +} + +/// Sample a per-line vibe, evenly. +#[must_use] +pub fn sample_vibe() -> Vibe { + match random_choice(&[Vibe::Clean, Vibe::Feral]) { + Some(Vibe::Feral) => Vibe::Feral, + _ => Vibe::Clean, + } +} + +/// The Agent's name-legal separators, for joining name-like segments. +pub(crate) const NAME_SEPARATORS: &[u8] = b"._- "; + +/// Compliant identifier segments: names, hosts, keys, source types. +pub(crate) const COMPLIANT_WORD: &[&[u8]] = &[ + b"adp", + b"dogstatsd", + b"requests", + b"latency", + b"errors", + b"count", + b"total", + b"bytes", + b"queue", + b"workers", +]; + +/// Aberrant identifier segments: empty, whitespace, NUL, embedded delimiters, +/// invalid UTF-8, message-type prefixes. +pub(crate) const ABERRANT_WORD: &[&[u8]] = &[ + b"", + b" ", + b"\t", + b"\0", + b"a:b", + b"a|b", + b"a,b", + b"#hash", + b"@at", + b"_sc", + b"_e{1,1}", + b"\x80", + b"\xc3", + b"\xed\xa0\x80", + b"\xc0\x80", + b"\xff\xfe", + b"emoji\xf0\x9f\x92\xa9", +]; + +/// Values that break number parsers, including long encodings and unicode that +/// looks numeric: infinity, fullwidth and Arabic-Indic digits. +pub(crate) const ABERRANT_VALUES: &[&[u8]] = &[ + b"0", + b"-0", + b"inf", + b"-inf", + b"+inf", + b"nan", + b"infinity", + b"1e999999", + b"-1e999999", + b"0x1p4", + b"1_000", + b".", + b"+", + b"-", + b"1.", + b".5", + b"1:2:3:4:5", + b"00000000000000000000000000000000000000000000000000000001.5", + b"3.141592653589793115997963468544185161590576171875000000000000000000000000", + "\u{221e}".as_bytes(), + "-\u{221e}".as_bytes(), + "\u{ff11}\u{ff12}\u{ff13}".as_bytes(), + "\u{0664}\u{0662}".as_bytes(), +]; + +/// Unix-timestamp payloads (the `d:` / `T` fields). +pub(crate) const COMPLIANT_TS: &[&[u8]] = &[b"1700000000", b"1", b"1609459200"]; + +const COMPLIANT_TAG_KEYS: &[&[u8]] = &[b"env", b"service", b"region", b"version", b"team", b"host", b"shard"]; +const ABERRANT_TAG_KEYS: &[&[u8]] = &[b"", b" ", b":", b",", b"#", b"\0", b"\x80"]; +const COMPLIANT_TAG_VALUES: &[&[u8]] = &[ + b"prod", + b"staging", + b"adp", + b"us-east-1", + b"eu-west-1", + b"1.2.3", + b"web01", + b"0", +]; +const ABERRANT_TAG_VALUES: &[&[u8]] = &[b"", b",", b"|", b":", b"\xff", b"\xed\xa0\x80", b"a,b"]; + +/// Compact, or a cursed-but-equivalent padded encoding. +#[derive(Clone, Copy)] +enum Form { + Compact, + Expanded, +} + +/// Extend `buf` with one item. Clean draws from `compliant`; feral chooses +/// between compliant and aberrant — a choice, never a coin flip. +pub(crate) fn extend_choice(buf: &mut Vec<u8>, vibe: Vibe, compliant: &[&[u8]], aberrant: &[&[u8]]) { + let pools: &[&[&[u8]]] = match vibe { + Vibe::Clean => &[compliant], + Vibe::Feral => &[compliant, aberrant], + }; + if let Some(&pool) = random_choice(pools) { + if let Some(&item) = random_choice(pool) { + buf.extend_from_slice(item); + } + } +} + +/// Sample a count of segments and join them with sampled `separators`. A pool of +/// `N` segments over a count `c` gives `N^c` results. +pub(crate) fn write_segments<R: Rng + ?Sized>( + rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe, compliant: &[&[u8]], aberrant: &[&[u8]], separators: &[u8], +) { + let count = Boundary::<u8>::new().sample(rng); + for i in 0..count { + if i > 0 { + if let Some(&sep) = random_choice(separators) { + buf.push(sep); + } + } + extend_choice(buf, vibe, compliant, aberrant); + } +} + +/// An identifier (name, host, key, source) built from word segments. +pub(crate) fn write_words<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe) { + write_segments(rng, buf, vibe, COMPLIANT_WORD, ABERRANT_WORD, NAME_SEPARATORS); +} + +/// Append `|<prefix><item>`, the item chosen for the vibe. +pub(crate) fn write_field(buf: &mut Vec<u8>, vibe: Vibe, prefix: &[u8], compliant: &[&[u8]], aberrant: &[&[u8]]) { + buf.push(b'|'); + buf.extend_from_slice(prefix); + extend_choice(buf, vibe, compliant, aberrant); +} + +/// A boundary-sampled count of `key:value` tags joined by ','. A count of zero +/// writes no tags. Clean draws compliant keys and values; feral mixes aberrant +/// ones in, key and value independently. +pub(crate) fn write_tags<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe) { + let count = Boundary::<u8>::new().sample(rng); + for t in 0..count { + if t == 0 { + buf.extend_from_slice(b"|#"); + } else { + buf.push(b','); + } + write_segments(rng, buf, vibe, COMPLIANT_TAG_KEYS, ABERRANT_TAG_KEYS, NAME_SEPARATORS); + buf.push(b':'); + write_segments( + rng, + buf, + vibe, + COMPLIANT_TAG_VALUES, + ABERRANT_TAG_VALUES, + NAME_SEPARATORS, + ); + } +} + +/// Write `digits` to `buf` as-is, or padded with equivalent leading zeros (and +/// trailing zeros when there is a fractional part). Same value, cursed encoding. +pub(crate) fn write_number<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, digits: &[u8]) { + match random_choice(&[Form::Compact, Form::Expanded]) { + Some(Form::Expanded) => { + let (sign, rest) = match digits.first() { + Some(&(b'-' | b'+')) => (&digits[..1], &digits[1..]), + _ => (&digits[..0], digits), + }; + buf.extend_from_slice(sign); + pad_zeros(rng, buf); + buf.extend_from_slice(rest); + let fractional = rest.contains(&b'.') && !rest.iter().any(|&c| c == b'e' || c == b'E'); + if fractional { + pad_zeros(rng, buf); + } + } + _ => buf.extend_from_slice(digits), + } +} + +/// Append a boundary-sampled run of '0' bytes to `buf`. +fn pad_zeros<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>) { + let zeros = usize::from(Boundary::<u8>::new().sample(rng)); + buf.resize(buf.len() + zeros, b'0'); +} diff --git a/test/antithesis/harness/src/payload/dogstatsd/events.rs b/test/antithesis/harness/src/payload/dogstatsd/events.rs new file mode 100644 index 0000000000..8e14be614c --- /dev/null +++ b/test/antithesis/harness/src/payload/dogstatsd/events.rs @@ -0,0 +1,85 @@ +//! Feral `DogStatsD` event generation. + +use antithesis_sdk::random::random_choice; +use rand::distr::Distribution; +use rand::Rng; + +use super::common::{self, Vibe}; +use crate::rand::Boundary; + +/// Priority payloads (the `p:` field). +const COMPLIANT_PRIO: &[&[u8]] = &[b"normal", b"low"]; + +/// Alert-type payloads (the `t:` field). +const COMPLIANT_ALERT: &[&[u8]] = &[b"error", b"warning", b"info", b"success"]; + +/// An event optional field. +#[derive(Clone, Copy)] +enum Opt { + Timestamp, + Hostname, + AggKey, + Priority, + Source, + Alert, +} + +/// Append one event `_e{<TLEN>,<XLEN>}:<TITLE>|<TEXT>[|opt...]` to `buf`. +pub(crate) fn write<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe) { + let mut title = Vec::new(); + common::write_words(rng, &mut title, vibe); + let mut text = Vec::new(); + common::write_words(rng, &mut text, vibe); + + buf.extend_from_slice(b"_e{"); + write_len(rng, buf, vibe, title.len()); + buf.push(b','); + write_len(rng, buf, vibe, text.len()); + buf.extend_from_slice(b"}:"); + buf.extend_from_slice(&title); + buf.push(b'|'); + buf.extend_from_slice(&text); + + let count = Boundary::<u8>::new().sample(rng); + for _ in 0..count { + match random_choice(&[ + Opt::Timestamp, + Opt::Hostname, + Opt::AggKey, + Opt::Priority, + Opt::Source, + Opt::Alert, + ]) { + Some(Opt::Timestamp) => { + common::write_field(buf, vibe, b"d:", common::COMPLIANT_TS, common::ABERRANT_VALUES); + } + Some(Opt::Hostname) => { + buf.extend_from_slice(b"|h:"); + common::write_words(rng, buf, vibe); + } + Some(Opt::AggKey) => { + buf.extend_from_slice(b"|k:"); + common::write_words(rng, buf, vibe); + } + Some(Opt::Priority) => common::write_field(buf, vibe, b"p:", COMPLIANT_PRIO, common::ABERRANT_WORD), + Some(Opt::Source) => { + buf.extend_from_slice(b"|s:"); + common::write_words(rng, buf, vibe); + } + _ => common::write_field(buf, vibe, b"t:", COMPLIANT_ALERT, common::ABERRANT_WORD), + } + } + + common::write_tags(rng, buf, vibe); + buf.push(b'\n'); +} + +/// The event header length. Clean writes the true byte length; feral writes a +/// boundary-sampled lie — the malformed-event surface. +fn write_len<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe, actual: usize) { + let mut itoa = itoa::Buffer::new(); + match vibe { + Vibe::Clean => buf.extend_from_slice(itoa.format(actual).as_bytes()), + Vibe::Feral => buf.extend_from_slice(itoa.format(Boundary::<u64>::new().sample(rng)).as_bytes()), + } +} diff --git a/test/antithesis/harness/src/payload/dogstatsd/metrics.rs b/test/antithesis/harness/src/payload/dogstatsd/metrics.rs new file mode 100644 index 0000000000..6660ceb0a3 --- /dev/null +++ b/test/antithesis/harness/src/payload/dogstatsd/metrics.rs @@ -0,0 +1,116 @@ +//! Feral `DogStatsD` metric-line generation. + +use antithesis_sdk::random::random_choice; +use rand::distr::Distribution; +use rand::Rng; + +use super::common::{self, Vibe}; +use crate::rand::{Boundary, Probe}; + +const METRIC_TYPES: &[&[u8]] = &[b"c", b"g", b"ms", b"h", b"s", b"d"]; + +/// Sample-rate payloads (the `@` field). +const COMPLIANT_RATE: &[&[u8]] = &[b"1", b"0.5", b"0.25", b"0.1", b"0.001"]; + +/// Container-id payloads (the `c:` field). +const COMPLIANT_CONTAINER: &[&[u8]] = &[b"ci-0a1b2c3d4e5f", b"cid-deadbeef", b"in-4026531840"]; + +/// External-data items (the `e:` field), joined by ',' at runtime. +const COMPLIANT_EXT: &[&[u8]] = &[ + b"it-true", + b"it-false", + b"cn-redis", + b"cn-web", + b"pu-810fe89d", + b"pu-abc", +]; + +/// Cardinality payloads (the `card:` field). +const COMPLIANT_CARD: &[&[u8]] = &[b"none", b"low", b"orchestrator", b"high"]; + +/// The `e:` external-data item separator. +const EXT_SEPARATORS: &[u8] = b","; + +/// How to build a value. +#[derive(Clone, Copy)] +enum ValueKind { + Aberrant, + Int, + Float, +} + +/// A metric extension field. +#[derive(Clone, Copy)] +enum Ext { + Rate, + Container, + Timestamp, + External, + Cardinality, +} + +/// Append one metric line `<NAME>:<VALUE>|<TYPE>[|ext...]` to `buf`. +pub(crate) fn write<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe) { + common::write_words(rng, buf, vibe); + buf.push(b':'); + write_value(rng, buf, vibe); + buf.push(b'|'); + if let Some(&t) = random_choice(METRIC_TYPES) { + buf.extend_from_slice(t); + } + common::write_tags(rng, buf, vibe); + write_extensions(rng, buf, vibe); + buf.push(b'\n'); +} + +/// Clean: a compact integer. Feral: an aberrant literal, or an int/float in a +/// compact or cursed-but-equivalent expanded encoding. +fn write_value<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe) { + let mut itoa = itoa::Buffer::new(); + match vibe { + Vibe::Clean => { + let v = Boundary::<i64>::new().sample(rng); + buf.extend_from_slice(itoa.format(v).as_bytes()); + } + Vibe::Feral => match random_choice(&[ValueKind::Aberrant, ValueKind::Int, ValueKind::Float]) { + Some(ValueKind::Aberrant) => { + if let Some(&v) = random_choice(common::ABERRANT_VALUES) { + buf.extend_from_slice(v); + } + } + Some(ValueKind::Float) => { + let v: f64 = Probe.sample(rng); + let mut ryu = ryu::Buffer::new(); + common::write_number(rng, buf, ryu.format(v).as_bytes()); + } + _ => { + let v = Boundary::<i64>::new().sample(rng); + common::write_number(rng, buf, itoa.format(v).as_bytes()); + } + }, + } +} + +/// A boundary-sampled count of extension fields, each a random kind. Repeats and +/// zero are allowed. +fn write_extensions<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe) { + let count = Boundary::<u8>::new().sample(rng); + for _ in 0..count { + match random_choice(&[ + Ext::Rate, + Ext::Container, + Ext::Timestamp, + Ext::External, + Ext::Cardinality, + ]) { + Some(Ext::Rate) => common::write_field(buf, vibe, b"@", COMPLIANT_RATE, common::ABERRANT_VALUES), + Some(Ext::Container) => common::write_field(buf, vibe, b"c:", COMPLIANT_CONTAINER, common::ABERRANT_WORD), + Some(Ext::Timestamp) => common::write_field(buf, vibe, b"T", common::COMPLIANT_TS, common::ABERRANT_VALUES), + Some(Ext::External) => { + buf.extend_from_slice(b"|e:"); + common::write_segments(rng, buf, vibe, COMPLIANT_EXT, common::ABERRANT_WORD, EXT_SEPARATORS); + } + _ => common::write_field(buf, vibe, b"card:", COMPLIANT_CARD, common::ABERRANT_WORD), + } + } +} diff --git a/test/antithesis/harness/src/payload/dogstatsd/service_checks.rs b/test/antithesis/harness/src/payload/dogstatsd/service_checks.rs new file mode 100644 index 0000000000..ee8ba784a9 --- /dev/null +++ b/test/antithesis/harness/src/payload/dogstatsd/service_checks.rs @@ -0,0 +1,47 @@ +//! Feral `DogStatsD` service-check generation. + +use antithesis_sdk::random::random_choice; +use rand::distr::Distribution; +use rand::Rng; + +use super::common::{self, Vibe}; +use crate::rand::Boundary; + +/// Status payloads: OK, warning, critical, unknown. +const COMPLIANT_STATUS: &[&[u8]] = &[b"0", b"1", b"2", b"3"]; + +/// A service-check optional field. +#[derive(Clone, Copy)] +enum Opt { + Timestamp, + Hostname, + Message, +} + +/// Append one service check `_sc|<NAME>|<STATUS>[|opt...]` to `buf`. +pub(crate) fn write<R: Rng + ?Sized>(rng: &mut R, buf: &mut Vec<u8>, vibe: Vibe) { + buf.extend_from_slice(b"_sc|"); + common::write_words(rng, buf, vibe); + buf.push(b'|'); + common::extend_choice(buf, vibe, COMPLIANT_STATUS, common::ABERRANT_VALUES); + + let count = Boundary::<u8>::new().sample(rng); + for _ in 0..count { + match random_choice(&[Opt::Timestamp, Opt::Hostname, Opt::Message]) { + Some(Opt::Timestamp) => { + common::write_field(buf, vibe, b"d:", common::COMPLIANT_TS, common::ABERRANT_VALUES); + } + Some(Opt::Hostname) => { + buf.extend_from_slice(b"|h:"); + common::write_words(rng, buf, vibe); + } + _ => { + buf.extend_from_slice(b"|m:"); + common::write_words(rng, buf, vibe); + } + } + } + + common::write_tags(rng, buf, vibe); + buf.push(b'\n'); +} diff --git a/test/antithesis/harness/src/rand.rs b/test/antithesis/harness/src/rand.rs index 125babecfa..674710a626 100644 --- a/test/antithesis/harness/src/rand.rs +++ b/test/antithesis/harness/src/rand.rs @@ -1,11 +1,18 @@ //! Randomness utilities. +use std::marker::PhantomData; + use rand::distr::Distribution; use rand::{Rng, RngExt}; use rand_distr::LogNormal; -/// Boundary values for the u64 field. -const BOUNDARIES: &[u64] = &[ +// =========================================================================== +// Probe — a boundary-biased magnitude sampler. ~1/8 of draws are a boundary +// value, the rest a typical log-normal magnitude. +// =========================================================================== + +/// `u64` boundary values: 0, 1, and each fixed-width max ±1. +const BOUNDARIES_U64: &[u64] = &[ 0, 1, i8::MAX as u64 - 1, @@ -33,21 +40,82 @@ const BOUNDARIES: &[u64] = &[ u64::MAX, ]; -/// Produces `u64` values that are generally 'normal' and with some being -/// boundary values. +/// `i64` boundary values: 0, ±1, and each signed-width min/max. +const BOUNDARIES_I64: &[i64] = &[ + i64::MIN, + i64::MIN + 1, + i32::MIN as i64, + i16::MIN as i64, + i8::MIN as i64, + -1, + 0, + 1, + i8::MAX as i64, + i16::MAX as i64, + i32::MAX as i64, + i64::MAX - 1, + i64::MAX, +]; + +/// `f64` boundary values (no NaN/inf — those break frame parsing and belong to a +/// dedicated malformed-input driver). +const BOUNDARIES_F64: &[f64] = &[ + 0.0, + 1.0, + -1.0, + f64::MIN_POSITIVE, + -f64::MIN_POSITIVE, + f64::MAX, + f64::MIN, +]; + +/// A boundary-biased distribution: ~1/8 of draws are a boundary value, the rest a +/// "typical" log-normal magnitude. Generic over the numeric output type so a draw +/// site reads `let v: i64 = Probe.sample(rng)` and gets type-appropriate +/// boundaries. `i64`/`f64` draws carry a random sign. #[derive(Debug, Clone, Copy)] pub struct Probe; impl Distribution<u64> for Probe { fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 { if rng.random_ratio(1, 8) { - BOUNDARIES[rng.random_range(0..BOUNDARIES.len())] + BOUNDARIES_U64[rng.random_range(0..BOUNDARIES_U64.len())] } else { typical(rng) } } } +impl Distribution<i64> for Probe { + fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> i64 { + if rng.random_ratio(1, 8) { + BOUNDARIES_I64[rng.random_range(0..BOUNDARIES_I64.len())] + } else { + let magnitude = num_traits::cast::<u64, i64>(typical(rng)).unwrap_or(i64::MAX); + if rng.random_ratio(1, 2) { + -magnitude + } else { + magnitude + } + } + } +} + +impl Distribution<f64> for Probe { + fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 { + if rng.random_ratio(1, 8) { + BOUNDARIES_F64[rng.random_range(0..BOUNDARIES_F64.len())] + } else { + let magnitude = num_traits::cast::<u64, f64>(typical(rng)).unwrap_or(f64::MAX); + if rng.random_ratio(1, 2) { + -magnitude + } else { + magnitude + } + } + } +} + /// Approximate probability of a typical draw landing in each range: /// /// | Value range | Probability | @@ -63,3 +131,83 @@ fn typical<R: Rng + ?Sized>(rng: &mut R) -> u64 { let dist = LogNormal::new(1024.0_f64.ln(), 4.0).expect("median > 0 and sigma >= 0"); num_traits::cast::<f64, u64>(dist.sample(rng).round()).unwrap_or(u64::MAX) } + +// =========================================================================== +// Boundary<T> — a finite type-boundary sampler: each fixed-width max ±1 and the +// half-range midpoint ±1, the same idea as Probe's arrays but for one type. +// =========================================================================== + +/// A boundary-value sampler for `T`: each fixed-width max ±1 and the half-range +/// midpoint ±1. `Boundary::<T>::new().sample(rng)` returns one. +#[derive(Clone, Copy, Debug, Default)] +pub struct Boundary<T>(PhantomData<T>); + +impl<T> Boundary<T> { + /// A boundary sampler for `T`. + #[must_use] + pub const fn new() -> Self { + Boundary(PhantomData) + } +} + +const BOUNDARY_U8: &[u8] = &[0, 1, 2, 126, 127, 128, 129, 254, 255]; + +impl Distribution<u8> for Boundary<u8> { + fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 { + BOUNDARY_U8[rng.random_range(0..BOUNDARY_U8.len())] + } +} + +const BOUNDARY_U64: &[u64] = &[ + 0, + 1, + 2, + u8::MAX as u64 - 1, + u8::MAX as u64, + u8::MAX as u64 + 1, + u16::MAX as u64 - 1, + u16::MAX as u64, + u16::MAX as u64 + 1, + u32::MAX as u64 - 1, + u32::MAX as u64, + u32::MAX as u64 + 1, + u64::MAX / 2 - 1, + u64::MAX / 2, + u64::MAX / 2 + 1, + u64::MAX - 1, + u64::MAX, +]; + +impl Distribution<u64> for Boundary<u64> { + fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 { + BOUNDARY_U64[rng.random_range(0..BOUNDARY_U64.len())] + } +} + +const BOUNDARY_I64: &[i64] = &[ + i64::MIN, + i64::MIN + 1, + i64::MIN / 2 - 1, + i64::MIN / 2, + i64::MIN / 2 + 1, + i32::MIN as i64, + i16::MIN as i64, + i8::MIN as i64, + -1, + 0, + 1, + i8::MAX as i64, + i16::MAX as i64, + i32::MAX as i64, + i64::MAX / 2 - 1, + i64::MAX / 2, + i64::MAX / 2 + 1, + i64::MAX - 1, + i64::MAX, +]; + +impl Distribution<i64> for Boundary<i64> { + fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> i64 { + BOUNDARY_I64[rng.random_range(0..BOUNDARY_I64.len())] + } +} diff --git a/test/antithesis/scratchbook/bug-ledger.md b/test/antithesis/scratchbook/bug-ledger.md index 75cd0b8ce3..ab63265666 100644 --- a/test/antithesis/scratchbook/bug-ledger.md +++ b/test/antithesis/scratchbook/bug-ledger.md @@ -1,7 +1,7 @@ --- sut_path: /home/ssm-user/src/saluki -commit: fc4bb29728814ddf9321572b954ec28f58faeb53 -updated: 2026-05-30 +commit: 21b2072b4743ddbf4c84891d93abac7299dc4ce8 +updated: 2026-06-01 external_references: - path: https://datadoghq.atlassian.net/wiki/spaces/DADP/ why: ADP Confluence space — headline guarantees that frame these defects as bugs. @@ -31,6 +31,18 @@ bug happens. Run all five (expect five FAILURES — the failing tests are the demonstrations): `cargo nextest run --no-fail-fast -E 'test(/bug_nan_sample_poisons_sum_and_avg|bug_corrupt_length_prefix_silently_drops_following_records|bug_forward_clock_jump_floods_zero_value_points|bug_default_heap_fallback_makes_context_resolution_unbounded|bug_config_ready_hangs_forever_without_snapshot/)'` +> **Workload reach (2026-06-01):** the live `parallel_driver_send_dogstatsd` feral DSD-line generator +> exercises only **one** of these five repros under a run — #4, the high-cardinality interner +> heap-fallback (`rss-bounded-under-cardinality`) — and even that needs a memory-capped `adp` container +> or a SUT-side RSS assertion to be *caught* (neither yet wired). The other four are off the DSD-socket +> input path: +> - **#1 `ddsketch-no-nan-poison`** — DSD drops non-finite at the codec; needs a `checks_ipc` gRPC +> Histogram feeder. +> - **#2 `replay-corruption-not-silent-eof`** — needs the `agent-data-plane dogstatsd replay` CLI plus +> crafted capture files. +> - **#3 `aggregate-clock-skew-stable` (forward-jump)** — needs a clock-skip fault. +> - **#5 `config-stall-no-deadlock`** — needs a config-stream stub that withholds the snapshot. + ## Resolved upstream on main (repro now stale) - **`aggregate-no-panic-any-window` — sub-second window `% 0` panic (was bug #1).** Fixed on main: @@ -46,7 +58,7 @@ Run all five (expect five FAILURES — the failing tests are the demonstrations) ## Burned into an Antithesis triage shot (submitted run) -- **`rss-bounded-under-cardinality` (behavioral)** and **`forwarder-eventual-delivery` (baseline liveness)** — run id (redacted; tracked internally) (test-name `saluki-adp-bug-hunt`, 30 min, submitted 2026-05-29). The `parallel_driver_send_dogstatsd` high-cardinality regime drives memory growth; `finally_verify_delivery` checks delivery. Triage with the `antithesis-triage` skill once it completes. +- **`rss-bounded-under-cardinality` (behavioral)** and **`forwarder-eventual-delivery` (baseline liveness)** — run id (redacted; tracked internally) (test-name `saluki-adp-bug-hunt`, 30 min, submitted 2026-05-29). The `parallel_driver_send_dogstatsd` driver (a sampled batch of feral DSD lines whose names/tags/values are built combinatorially from finite segment pools) floods distinct contexts and drives memory growth; `finally_verify_delivery` checks delivery. Triage with the `antithesis-triage` skill once it completes. **Caveat:** `rss-bounded-under-cardinality` only becomes a *caught* failure with a memory-capped `adp` container (OOM ⇒ `eventually_adp_alive`) or a SUT-side RSS assertion — neither yet wired. ## Antithesis-shot-only — blocked on harness infrastructure (not locally reproducible) diff --git a/test/antithesis/scratchbook/deployment-topology.md b/test/antithesis/scratchbook/deployment-topology.md index fcef2890c0..d0ba82ad5f 100644 --- a/test/antithesis/scratchbook/deployment-topology.md +++ b/test/antithesis/scratchbook/deployment-topology.md @@ -44,7 +44,7 @@ containers so every link is faultable. ```text +------------------------+ DogStatsD +------------------------+ HTTP (Datadog +------------------------+ | workload-client | (UDP/TCP, faultable) | adp | intake API, | mock-intake | -| - millstone load gen | ------------------------> | agent-data-plane | faultable, retryable) | datadog-intake | +| - dogstatsd driver | ------------------------> | agent-data-plane | faultable, retryable) | datadog-intake | | - Antithesis SDK | | (standalone mode) | ----------------------> | (mock fakeintake) | | - test template | <------------------------ | UDP/TCP/UDS listeners | <---------------------- | records payloads, | +------------------------+ backpressure / health +------------------------+ acks / 5xx / hang | queryable for asserts | @@ -55,22 +55,24 @@ containers so every link is faultable. |---|---|---|---|---|---| | `adp` | Service (SUT) | reuse `docker/Dockerfile.agent-data-plane` (standalone build) | `agent-data-plane run` in **standalone mode** (`DD_DATA_PLANE_STANDALONE_MODE=true`, `DD_DATA_PLANE_DOGSTATSD_ENABLED=true`), no Core Agent dependency | receives DogStatsD from `workload-client`; forwards to `mock-intake` over HTTP | 1 | | `mock-intake` | Dependency | reuse `docker/Dockerfile.correctness-tools` (the `datadog-intake` binary) | mock Datadog intake; record + count forwarded payloads; expose a query API the workload reads for assertions | receives ADP forwarder traffic; queried by `workload-client` | 1 | -| `workload-client` | Client (test driver) | new thin Dockerfile layering the `millstone` binary + test template + Antithesis Rust SDK | emits `setup_complete`, then test commands drive `millstone` load and run assertions against `mock-intake` | sends DogStatsD to `adp`; queries `mock-intake` | 1 | +| `workload-client` | Client (test driver) | thin Dockerfile layering the compiled test-command binaries + test templates + Antithesis Rust SDK | emits `setup_complete`, then `parallel_driver_send_dogstatsd` samples DogStatsD load (the `harness::payload::dogstatsd` feral/clean generator) and `finally_verify_delivery` checks the intake | sends DogStatsD to `adp`; queries `mock-intake` | 1 | Notes: -- **Use UDP or TCP, not UDS, between `workload-client` and `adp`.** UDS requires a shared volume - (same fate / no faulting), and it couples origin-detection credentials. UDP/TCP keeps the intake - *and* the DSD-intake links independently faultable and lets `malformed-dsd-no-crash` exercise the - network listeners. (UDS-specific listener behavior can be a secondary case with a shared-volume - sidecar — see "Listener-coverage variant".) +- **The DSD link between `workload-client` and `adp` currently uses UDS** via a shared + `dogstatsd-socket` volume (`DSD_SOCKET`). The tradeoff: the ingress link is no longer independently + faultable (shared volume, same fate) and it couples origin-detection credentials. A UDP/TCP + variant would keep the intake *and* the DSD-intake links independently faultable and let + `malformed-dsd-no-crash` exercise the network listeners; track it as a follow-up (see + "Listener-coverage variant"). - **Point ADP's forwarder at `mock-intake`** via `DD_URL` / forwarder endpoint config; set a real (fake) API key. This is the link that unlocks the entire egress data-loss cluster. -- `millstone` already supports deterministic seeds and fixed payload counts (`millstone.yaml`), - so the workload is reproducible; Antithesis adds the fault dimension on top. +- The driver samples all randomness through `AntithesisRng` (boundary-biased `Probe`/`Boundary` + samples and `random_choice` selections), so the workload is deterministic and simulator-steerable; + Antithesis adds the fault dimension on top. ### What the primary topology covers -- **Memory & resource bounds (Cat A):** high-cardinality / many-timestamp `millstone` corpus + +- **Memory & resource bounds (Cat A):** high-cardinality / many-timestamp load from the driver + `memory_mode`/`memory_limit` set on `adp`; node-throttling on `adp` to stress the limiter timing; observe RSS vs grant. `rss-bounded-under-cardinality`, `aggregate-context-limit-enforced`, `interner-full-bounded`, `memory-limiter-survives-rss-read-failure` (needs `/proc` fault — see @@ -91,8 +93,8 @@ Notes: SUT-side assertions (`interner-reclamation-no-corruption`, `non-finite-values-handled-consistently`). - **Events & service-checks (Cat B/E additions):** the workload must emit well-formed *and* malformed events + service-checks so `events-sc-no-silent-loss`, `malformed-event-sc-no-crash`, and - the anti-vacuity anchor `events-sc-pipeline-reachable` are exercised — a metrics-only `millstone` - corpus leaves these vacuous. + the anti-vacuity anchor `events-sc-pipeline-reachable` are exercised — a metrics-only workload + leaves these vacuous. - **Transformer correctness (Cat G, primary-runnable subset):** `mapper-interner-bounded` rides a high-cardinality flood of distinct *mappable* names against a small `dogstatsd_mapper_string_interner_size`. The differential Cat G properties (`mapper-output-matches-agent`, `prefix-filter-ordering-matches-agent`) @@ -209,7 +211,7 @@ needs a script. Confirm whether the existing binary supports this or needs a small extension. - **A minimal Core Agent config-stub** must be built (or the full `datadog-agent` image adapted) to send adversarial config the real Agent wouldn't — needed for Add-on 1. -- Whether the workload can drive DogStatsD over **UDP/TCP at the volume `millstone` targets** without +- Whether the workload can drive DogStatsD over **UDP/TCP at the volume the driver targets** without loss confounding the assertions (UDP is lossy by nature; for no-loss assertions prefer TCP/UDS, and scope UDP cases to no-crash rather than no-loss). - The `checks_ipc` Histogram NaN bypass (`ddsketch-no-nan-poison`) needs a **checks-IPC producer** in diff --git a/test/antithesis/scratchbook/existing-assertions.md b/test/antithesis/scratchbook/existing-assertions.md index 479c17494f..540b34c450 100644 --- a/test/antithesis/scratchbook/existing-assertions.md +++ b/test/antithesis/scratchbook/existing-assertions.md @@ -1,7 +1,7 @@ --- sut_path: /home/ssm-user/src/saluki -commit: 2e4ae1b8be45143882f0dbeb5e74998021c5faf9 -updated: 2026-05-31 +commit: 21b2072b4743ddbf4c84891d93abac7299dc4ce8 +updated: 2026-06-01 external_references: - path: https://datadoghq.atlassian.net/wiki/spaces/DADP/ why: Datadog ADP Confluence space (design notes, weekly summaries, gap analyses) consulted for grounding. @@ -16,19 +16,22 @@ external_references: ## Summary **A bootstrap-and-workload assertion set exists, now with the first liveness instrumentation.** It -comprises **8 SDK call sites**: one lifecycle init and one bootstrap reachability probe in ADP, two -workload-side `assert_reachable!`/`assert_sometimes!` pairs in the harness drivers, and — added -2026-05-31 — the external `eventually_adp_alive` liveness `assert_always!` plus the **first in-SUT -property assertion**, an `assert_sometimes!` at the forwarder 2xx site in `saluki-components`. All -ADP/`saluki-components` sites are gated behind an `antithesis` cargo feature (no-op in production). -The bootstrap probe and the two driver anchors remain **integration probes / anti-vacuity anchors**; -the two new sites are real liveness instrumentation (Category H `adp-stays-alive` and the -good-function half of `adp-keeps-delivering` / in-SUT seed of `forwarder-eventual-delivery`). +comprises **11 SDK call sites**: one lifecycle init and one bootstrap reachability probe in ADP, a +`finally_verify_delivery` `assert_reachable!`/`assert_sometimes!` pair, the +`parallel_driver_send_dogstatsd` anchors (one `assert_reachable!` plus four `assert_sometimes!` — +delivered, clean, feral, mixed batch composition), the external `eventually_adp_alive` liveness +`assert_always!`, and the **first in-SUT property assertion**, an `assert_sometimes!` at the +forwarder 2xx site in `saluki-components`. All ADP/`saluki-components` sites are gated behind an +`antithesis` cargo feature (no-op in production). The bootstrap probe and the driver anchors remain +**integration probes / anti-vacuity anchors**; the liveness sites are real liveness instrumentation +(Category H `adp-stays-alive` and the good-function half of `adp-keeps-delivering` / in-SUT seed of +`forwarder-eventual-delivery`). > [!NOTE] > History: an early version of this file claimed no SDK assertions existed (true before the harness -> commit; corrected 2026-05-30). Updated again 2026-05-31 when the liveness pieces landed (6 → 8 -> sites). +> commit; corrected 2026-05-30). Updated 2026-05-31 when the liveness pieces landed (6 → 8 sites), +> and again when `parallel_driver_send_dogstatsd` added the clean/feral/mixed batch assertions +> (8 → 11 sites). ## Assertions present @@ -38,10 +41,25 @@ good-function half of `adp-keeps-delivering` / in-SUT seed of `forwarder-eventua | `bin/agent-data-plane/src/main.rs:100` | `assert_reachable!` | "agent-data-plane completed bootstrap" | `#[cfg(feature = "antithesis")]` | Bootstrap-integration probe — proves the SDK is linked, cataloging works, the instrumentation path is wired. | | `test/antithesis/harness/src/bin/finally_verify_delivery.rs:54` | `assert_reachable!` | "intake metrics dump query succeeded" | harness binary | Confirms the delivery-verification query path ran. | | `test/antithesis/harness/src/bin/finally_verify_delivery.rs:59` | `assert_sometimes!` | "metrics delivered end-to-end to the intake" (`delivered > 0`) | harness binary | Workload-side liveness anchor — partially seeds `forwarder-eventual-delivery`. | -| `test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs:77` | `assert_reachable!` | "workload sent a dogstatsd batch" | harness binary | Confirms the DSD driver actually emitted load. | -| `test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs:87` | `assert_sometimes!` | "workload drove a high-cardinality dogstatsd flood" (`regime == High`) | harness binary | Anti-vacuity anchor that timelines reach the high-cardinality regime — seeds `rss-bounded-under-cardinality`. | -| `test/antithesis/harness/src/bin/eventually_adp_alive.rs:62` | `assert_always!` | "ADP booted: API reachable and DogStatsD socket present" | harness binary (`eventually_`, faults-paused) | Death-liveness for `adp-stays-alive` — fails the branch when ADP self-crashed (config panic / load) but stayed down through the quiet period. | -| `lib/saluki-components/src/common/datadog/io.rs:553` | `assert_sometimes!` | "ADP forwarded a payload to the intake" (`{ domain }`) | `#[cfg(feature = "antithesis")]` | First in-SUT property assertion — good-function liveness (the full pipeline ran to a 2xx) + replay checkpoint; good-function half of `adp-keeps-delivering`, in-SUT seed of `forwarder-eventual-delivery`. | +| `test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs:67` | `assert_reachable!` | "workload ran a dogstatsd batch" | harness binary | Confirms the DSD driver ran a batch; details carry the attempted-line count and socket path. | +| `test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs:68` | `assert_sometimes!` | "workload delivered a dogstatsd line" (`attempted > 0`) | harness binary | Anti-vacuity anchor: a batch can sample count == 0, so "ran" does not imply "sent"; this proves a timeline sometimes actually delivers a line, else delivery checks are vacuous. | +| `test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs:73` | `assert_sometimes!` | "workload ran a fully clean batch" (`attempted > 0 && Clean`) | harness binary | Composition anchor: proves the clean branch is sometimes exercised, so the clean delivery surface is non-vacuous. | +| `test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs:78` | `assert_sometimes!` | "workload ran a fully feral batch" (`attempted > 0 && Feral`) | harness binary | Composition anchor: proves the feral branch is sometimes exercised. | +| `test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs:83` | `assert_sometimes!` | "workload ran a mixed batch" (`attempted > 0 && Mixed`) | harness binary | Composition anchor: proves the mixed branch is sometimes exercised. | +| `test/antithesis/harness/src/bin/eventually_adp_alive.rs:63` | `assert_always!` | "ADP booted: API reachable and DogStatsD socket present" | harness binary (`eventually_`, faults-paused) | Death-liveness for `adp-stays-alive` — fails the branch when ADP self-crashed (config panic / load) but stayed down through the quiet period. | +| `lib/saluki-components/src/common/datadog/io.rs:556` | `assert_sometimes!` | "ADP forwarded a payload to the intake" (`{ domain }`) | `#[cfg(feature = "antithesis")]` | First in-SUT property assertion — good-function liveness (the full pipeline ran to a 2xx) + replay checkpoint; good-function half of `adp-keeps-delivering`, in-SUT seed of `forwarder-eventual-delivery`. | + +> **Load driver (2026-06-01):** `parallel_driver_send_dogstatsd` replaced the `parallel_driver_load` +> driver (the four-profile C1–C4 ladder and the `harness::load_gen` Generator/Profile module are gone). +> The driver samples a batch size (`random_range(0..=10_000)`), and for each line calls +> `harness::payload::dogstatsd::send`, which picks a message type via `random_choice` and dispatches to +> `metrics`/`events`/`service_checks`, then writes the bytes to the DSD UDS socket and exits. The +> generator builds names, tags, values, and headers combinatorially from finite segment pools joined by +> sampled separators (`harness::payload::dogstatsd::common`), with counts from the finite +> `harness::rand::Boundary` sampler. A per-message `Vibe` toggle is either clean (by-the-book) or feral +> (aberrant bytes, cursed-but-equivalent number encodings, skewed `_e{len,len}` event header lengths). +> Its five assertions above are the `assert_reachable!` batch anchor plus four `assert_sometimes!` +> anchors (delivered, and the clean/feral/mixed batch-composition checks). Dependency wiring: ADP gains the SDK only under the `antithesis` feature (`bin/agent-data-plane/Cargo.toml:14` → `dep:antithesis_sdk`, `antithesis_sdk/full`, @@ -59,14 +77,15 @@ Searched the repository with ripgrep over `*.rs` and `*.toml`: - `rg -li "antithesis" -g '*.rs' -g '*.toml'` — matches in ADP `main.rs`, the two harness binaries, and the `Cargo.toml` files above. - `rg "assert_always|assert_sometimes|assert_reachable|assert_unreachable|antithesis_sdk" -g '*.rs'` - — the 6 call sites tabled above; **no `assert_always!` and no `assert_unreachable!` anywhere yet.** + — the 11 call sites tabled above (`assert_always!` now present in `eventually_adp_alive`); **no + `assert_unreachable!` anywhere yet.** ## Implication for property work Most catalog invariants are still **net-new instrumentation**, but the pattern is now proven in-SUT: - `forwarder-eventual-delivery` now has an **in-SUT** `Sometimes(forwarded a payload)` at the 2xx - site (io.rs:553) in addition to the workload-side `Sometimes(delivered > 0)`. The full no-loss + site (io.rs:556) in addition to the workload-side `Sometimes(delivered > 0)`. The full no-loss `Always`/accounting reconciliation (delivered == accepted-and-retryable after a transient outage) is still net-new. - `rss-bounded-under-cardinality` has its high-cardinality `Sometimes` anchor but no SUT-side RSS or diff --git a/test/antithesis/scratchbook/property-catalog.md b/test/antithesis/scratchbook/property-catalog.md index 73160b962a..76f8a02cfe 100644 --- a/test/antithesis/scratchbook/property-catalog.md +++ b/test/antithesis/scratchbook/property-catalog.md @@ -1,7 +1,7 @@ --- sut_path: /home/ssm-user/src/saluki -commit: 2e4ae1b8be45143882f0dbeb5e74998021c5faf9 -updated: 2026-05-31 +commit: 21b2072b4743ddbf4c84891d93abac7299dc4ce8 +updated: 2026-06-01 external_references: - path: https://datadoghq.atlassian.net/wiki/spaces/DADP/ why: ADP Confluence space — headline guarantees and gap analyses that seed properties. @@ -33,6 +33,16 @@ assertion** — an `assert_sometimes!` at the forwarder 2xx site in `saluki-comp fail by design** under default config (memory limiter disabled, interner heap-fallback enabled, disk persistence off) — these are flagged; they are the highest-value findings, not catalog errors. +> **Workload-reach note (2026-06-01):** the live `parallel_driver_send_dogstatsd` feral DSD-line +> generator exercises only **one** of the five still-unfixed reproduced bugs (branch +> `blt/antithesis-bug-tests`): the high-cardinality interner heap-fallback +> (`rss-bounded-under-cardinality`). The other four are off the DSD-socket input path entirely: +> `ddsketch-no-nan-poison` needs a `checks_ipc` gRPC Histogram feeder (DSD drops non-finite at the +> codec); `replay-corruption-not-silent-eof` needs the `agent-data-plane dogstatsd replay` CLI plus +> crafted capture files; `aggregate-clock-skew-stable` (forward-jump) needs a clock-skip fault; +> `config-stall-no-deadlock` needs a config-stream stub that withholds the snapshot. The sub-second +> window `%0` panic is fixed upstream. See `bug-ledger.md`. + Provenance tags `[Fn]` after each slug name the discovery focus that surfaced it: `[RB]` resource boundaries, `[DL]` data-loss/recovery, `[AG]` aggregation/sketch, `[LC]` lifecycle/config, `[RC]` replay/codec/concurrency, `[WC]` wildcard (from SUT analysis). @@ -47,12 +57,16 @@ limiter is advisory (≤25ms backoff, 250ms sampling, cooperative), disabled by interner spills to the heap by default. This category probes whether RSS is *actually* bounded. ### rss-bounded-under-cardinality — RSS bounded under high cardinality -> **Status (2026-05-29): WORKLOAD WIRED + ROOT CAUSE REPRO'D** — `parallel_driver_send_dogstatsd` -> (high-cardinality regime) floods distinct contexts in the Antithesis harness to drive this -> behavioral bug under a run; and the root cause is reproduced as a unit test in -> `lib/saluki-context/src/resolver.rs` +> **Status (2026-06-01): WORKLOAD WIRED + ROOT CAUSE REPRO'D** — `parallel_driver_send_dogstatsd` +> sends a sampled batch of feral DSD lines whose names/tags/values are built combinatorially from +> finite segment pools, flooding distinct contexts in ADP to drive this behavioral bug under a run; and +> the root cause is reproduced as a unit test in `lib/saluki-context/src/resolver.rs` > `tests::bug_default_heap_fallback_makes_context_resolution_unbounded` (default heap fallback ⇒ -> resolution never refuses ⇒ unbounded memory). Not fixed. +> resolution never refuses ⇒ unbounded memory). Not fixed. **This is the only one of the five +> still-unfixed reproduced bugs that the live DSD-line generator exercises**, and even here it becomes a +> *caught* failure only with a memory-capped `adp` container (OOM ⇒ `eventually_adp_alive`) or a +> SUT-side RSS assertion — neither yet wired. The other four bugs are off the DSD-socket input path +> (see the note below the catalog header / bug-ledger). | | | |---|---| | **Type** | Safety (expected to FAIL by design under default config) |