From a4c93b6ccc2748d6d03212df6c0772ca71201a76 Mon Sep 17 00:00:00 2001 From: "Brian L. Troutwine" Date: Sat, 30 May 2026 17:39:46 +0000 Subject: [PATCH] Vary datadog.yaml in test/antithesis This PR introduces variation in the datadog.yaml we use under test in the antithesis rig. The goal here is to explore variation in buffer sizes etc and also startup panics on truly weird configs. --- .claude/scheduled_tasks.lock | 1 + Cargo.lock | 4 + test/antithesis/deploy/Dockerfile | 26 +- test/antithesis/deploy/adp/entrypoint.sh | 26 ++ test/antithesis/deploy/docker-compose.yaml | 24 ++ test/antithesis/deploy/workload/entrypoint.sh | 13 +- test/antithesis/harness/Cargo.toml | 4 + .../src/bin/datadog_yaml_config_gen/config.rs | 302 ++++++++++++++++++ .../src/bin/datadog_yaml_config_gen/main.rs | 79 +++++ test/antithesis/harness/src/lib.rs | 4 + test/antithesis/harness/src/rand.rs | 72 +++++ 11 files changed, 546 insertions(+), 9 deletions(-) create mode 100644 .claude/scheduled_tasks.lock create mode 100644 test/antithesis/deploy/adp/entrypoint.sh create mode 100644 test/antithesis/harness/src/bin/datadog_yaml_config_gen/config.rs create mode 100644 test/antithesis/harness/src/bin/datadog_yaml_config_gen/main.rs create mode 100644 test/antithesis/harness/src/lib.rs create mode 100644 test/antithesis/harness/src/rand.rs diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock new file mode 100644 index 0000000000..5b120af615 --- /dev/null +++ b/.claude/scheduled_tasks.lock @@ -0,0 +1 @@ +{"sessionId":"477c57ec-b319-4603-b0be-745a689f9811","pid":2891480,"procStart":"153930906","acquiredAt":1780165254895} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index b8d5db24c1..a7e076bcb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1676,8 +1676,12 @@ dependencies = [ "antithesis_sdk", "anyhow", "clap", + "num-traits", "rand 0.10.1", + "rand_distr", + "serde", "serde_json", + "serde_yaml", ] [[package]] diff --git a/test/antithesis/deploy/Dockerfile b/test/antithesis/deploy/Dockerfile index d8fe97108d..9630de71d3 100644 --- a/test/antithesis/deploy/Dockerfile +++ b/test/antithesis/deploy/Dockerfile @@ -74,11 +74,13 @@ RUN --mount=type=cache,target=/tools/target,id=antithesis-tools-target \ --mount=type=cache,target=/root/.cargo/git,id=cargo-git \ cargo build --release \ --bin datadog-intake --bin millstone \ - --bin parallel_driver_send_dogstatsd --bin finally_verify_delivery && \ + --bin parallel_driver_send_dogstatsd --bin finally_verify_delivery \ + --bin datadog_yaml_config_gen && \ cp /tools/target/release/datadog-intake /usr/local/bin/datadog-intake && \ cp /tools/target/release/millstone /usr/local/bin/millstone && \ cp /tools/target/release/parallel_driver_send_dogstatsd /usr/local/bin/parallel_driver_send_dogstatsd && \ - cp /tools/target/release/finally_verify_delivery /usr/local/bin/finally_verify_delivery + cp /tools/target/release/finally_verify_delivery /usr/local/bin/finally_verify_delivery && \ + cp /tools/target/release/datadog_yaml_config_gen /usr/local/bin/datadog_yaml_config_gen # --------------------------------------------------------------------------- # Runtime: Agent Data Plane (SUT). @@ -92,8 +94,12 @@ RUN apt-get update && \ COPY --from=adp-builder /usr/local/bin/agent-data-plane /usr/local/bin/agent-data-plane # Expose DWARF/build-id symbols to Antithesis for symbolization (one-hop symlink to the unstripped binary). RUN mkdir -p /symbols && ln -s /usr/local/bin/agent-data-plane /symbols/agent-data-plane -# main.rs requires the bootstrap config file to exist at the default path; ship a minimal standalone config. +# main.rs requires the bootstrap config file to exist at the default path; ship a minimal standalone +# config as a fallback. The boot wrapper overwrites it with the per-replay config drawn by the +# datadog-yaml-config-gen service onto the shared `agent-config` volume. COPY test/antithesis/deploy/adp/datadog.yaml /etc/datadog-agent/datadog.yaml +# Boot wrapper: waits for the drawn config sentinel, copies the config into place, then execs ADP. +COPY --chmod=755 test/antithesis/deploy/adp/entrypoint.sh /entrypoint.sh # ADP's control-plane secure API requires an IPC TLS cert (a single PEM holding both certificate and # private key) that the Core Agent normally generates. In standalone mode there is no Core Agent, so # generate a self-signed cert+key. An empty auth_token satisfies the IPC auth config at startup. @@ -103,7 +109,7 @@ RUN openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \ cat /tmp/ipc_cert.pem /tmp/ipc_key.pem > /etc/datadog-agent/ipc_cert.pem && \ rm -f /tmp/ipc_cert.pem /tmp/ipc_key.pem && \ touch /etc/datadog-agent/auth_token -ENTRYPOINT ["/usr/local/bin/agent-data-plane"] +ENTRYPOINT ["/entrypoint.sh"] CMD ["run"] # --------------------------------------------------------------------------- @@ -114,6 +120,18 @@ ENV NO_COLOR=1 COPY --from=tools-builder /usr/local/bin/datadog-intake /usr/local/bin/datadog-intake ENTRYPOINT ["/usr/local/bin/datadog-intake"] +# --------------------------------------------------------------------------- +# Runtime: datadog-yaml-config-gen (one-shot per-replay datadog.yaml generator). +# +# Fires the Antithesis setup_complete snapshot, generates a randomized datadog.yaml using SDK +# randomness, writes it to the shared `agent-config` volume with a `ready` sentinel, then exits. ADP +# gates its boot on the sentinel. Uninstrumented harness tooling, not the SUT. +# --------------------------------------------------------------------------- +FROM ${APP_IMAGE} AS datadog-yaml-config-gen +ENV NO_COLOR=1 +COPY --from=tools-builder /usr/local/bin/datadog_yaml_config_gen /usr/local/bin/datadog_yaml_config_gen +ENTRYPOINT ["/usr/local/bin/datadog_yaml_config_gen"] + # --------------------------------------------------------------------------- # Runtime: workload client (millstone load generator + test templates). # --------------------------------------------------------------------------- diff --git a/test/antithesis/deploy/adp/entrypoint.sh b/test/antithesis/deploy/adp/entrypoint.sh new file mode 100644 index 0000000000..cc0c89f60a --- /dev/null +++ b/test/antithesis/deploy/adp/entrypoint.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Agent Data Plane boot wrapper. +# +# The datadog-yaml-config-gen service generates a per-replay datadog.yaml (post-snapshot) onto +# the shared `agent-config` volume and touches a `ready` sentinel. We block on +# that sentinel, copy the config into place, then exec ADP. docker-compose also +# gates this container on datadog-yaml-config-gen completing successfully, so the wait below +# is a defensive belt-and-suspenders for the file actually landing. + +CONFIG_DIR="${AGENT_CONFIG_DIR:-/agent-config}" + +tries=120 +while [ ! -f "${CONFIG_DIR}/ready" ]; do + tries=$((tries - 1)) + if [ "${tries}" -le 0 ]; then + echo "timeout waiting for ${CONFIG_DIR}/ready" >&2 + exit 1 + fi + sleep 1 +done + +cp "${CONFIG_DIR}/datadog.yaml" /etc/datadog-agent/datadog.yaml + +exec /usr/local/bin/agent-data-plane "$@" diff --git a/test/antithesis/deploy/docker-compose.yaml b/test/antithesis/deploy/docker-compose.yaml index 335c5b9b1e..c7bb1b5903 100644 --- a/test/antithesis/deploy/docker-compose.yaml +++ b/test/antithesis/deploy/docker-compose.yaml @@ -20,6 +20,25 @@ services: timeout: 2s retries: 30 + datadog-yaml-config-gen: + container_name: datadog-yaml-config-gen + hostname: datadog-yaml-config-gen + platform: linux/amd64 + init: true + build: + context: ../../.. + dockerfile: test/antithesis/deploy/Dockerfile + target: datadog-yaml-config-gen + image: datadog-yaml-config-gen:latest + environment: + NO_COLOR: "1" + volumes: + - agent-config:/agent-config + depends_on: + intake: + condition: service_healthy + # One-shot: fires setup_complete, generates the per-replay datadog.yaml, then exits 0. + adp: container_name: adp hostname: adp @@ -40,9 +59,13 @@ services: DD_DATA_PLANE_DOGSTATSD_ENABLED: "true" volumes: - dogstatsd-socket:/var/run/datadog + # Read-only: the boot wrapper copies the drawn datadog.yaml from here into /etc/datadog-agent. + - agent-config:/agent-config:ro depends_on: intake: condition: service_healthy + datadog-yaml-config-gen: + condition: service_completed_successfully healthcheck: # ADP's unprivileged API listens on TCP :5100 once the internal supervisor is up. test: ["CMD-SHELL", "bash -c 'exec 3<>/dev/tcp/localhost/5100'"] @@ -78,3 +101,4 @@ services: volumes: dogstatsd-socket: + agent-config: diff --git a/test/antithesis/deploy/workload/entrypoint.sh b/test/antithesis/deploy/workload/entrypoint.sh index 3ff5e46908..7165e2aab9 100644 --- a/test/antithesis/deploy/workload/entrypoint.sh +++ b/test/antithesis/deploy/workload/entrypoint.sh @@ -4,9 +4,13 @@ set -euo pipefail # Workload client entrypoint. # # By the time this runs, docker-compose has gated startup on the `adp` and `intake` services being -# healthy (depends_on: condition: service_healthy). We re-confirm reachability defensively, emit the -# Antithesis `setup_complete` signal, then idle so Antithesis can run test commands from the test -# template at /opt/antithesis/test/v1/. +# healthy (depends_on: condition: service_healthy). We re-confirm reachability defensively, then idle +# so Antithesis can run test commands from the test template at /opt/antithesis/test/v1/. +# +# The Antithesis `setup_complete` signal is NOT emitted here. The datadog-yaml-config-gen service owns it: it +# fires `setup_complete` (the snapshot boundary) before ADP boots, then draws a per-replay +# datadog.yaml. Emitting setup_complete here (after ADP is healthy) would freeze ADP's config across +# every replay branch. ADP_HOST="${ADP_HOST:-adp}" ADP_API_PORT="${ADP_API_PORT:-5100}" @@ -46,8 +50,7 @@ wait_for_tcp "${ADP_HOST}" "${ADP_API_PORT}" "agent-data-plane API" wait_for_socket "${DSD_SOCKET}" "agent-data-plane DogStatsD socket" wait_for_tcp "${INTAKE_HOST}" "${INTAKE_PORT}" "datadog-intake" -echo "System is ready. Emitting setup_complete." -/opt/antithesis/setup-complete.sh +echo "System is ready. setup_complete is emitted by the datadog-yaml-config-gen service, not here." echo "Workload client idle; awaiting Antithesis test commands." exec tail -f /dev/null diff --git a/test/antithesis/harness/Cargo.toml b/test/antithesis/harness/Cargo.toml index 0190ffc517..9d35aa056c 100644 --- a/test/antithesis/harness/Cargo.toml +++ b/test/antithesis/harness/Cargo.toml @@ -16,8 +16,12 @@ clap = { workspace = true, features = [ "std", "usage", ] } +num-traits = { workspace = true } rand = { workspace = true } +rand_distr = { workspace = true } +serde = { workspace = true } serde_json = { workspace = true } +serde_yaml = { workspace = true } [lints.clippy] all = "deny" diff --git a/test/antithesis/harness/src/bin/datadog_yaml_config_gen/config.rs b/test/antithesis/harness/src/bin/datadog_yaml_config_gen/config.rs new file mode 100644 index 0000000000..1120d1513d --- /dev/null +++ b/test/antithesis/harness/src/bin/datadog_yaml_config_gen/config.rs @@ -0,0 +1,302 @@ +//! Configuration model and rendering for Datadog Agent configuration. +//! +//! Primary focus is currently `DogStatsD` but this is, hopefully, easy to expand +//! in the future. + +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use anyhow::Context as _; +use harness::rand::Probe; +use rand::distr::{Distribution, StandardUniform}; +use rand::{Rng, RngExt}; +use serde::{Serialize, Serializer}; + +/// Yaml flags the Agent reads at boot that never vary. +const STATIC_YAML_TAIL: &str = "use_dogstatsd: true +use_v2_api_series: true +inventories_enabled: false +enable_metadata_collection: false +cloud_provider_metadata: [] +"; + +/// A Go `time.Duration`, rendered as a Go duration string (for example `100ms`) +/// — the form the Agent's duration config keys parse. +#[derive(Debug, Clone, Copy)] +struct GoDuration(Duration); + +impl Serialize for GoDuration { + fn serialize(&self, serializer: S) -> Result { + serializer.collect_str(&format_args!("{}ms", self.0.as_millis())) + } +} + +impl Distribution for Probe { + fn sample(&self, rng: &mut R) -> GoDuration { + let millis: u64 = self.sample(rng); + GoDuration(Duration::from_millis(millis)) + } +} + +/// A duration the Agent reads as a plain integer number of seconds (`GetInt`), +/// rendered as that integer. +#[derive(Debug, Clone, Copy)] +struct DurationSeconds(Duration); + +impl Serialize for DurationSeconds { + fn serialize(&self, serializer: S) -> Result { + serializer.serialize_u64(self.0.as_secs()) + } +} + +impl Distribution for Probe { + fn sample(&self, rng: &mut R) -> DurationSeconds { + let secs: u64 = self.sample(rng); + DurationSeconds(Duration::from_secs(secs)) + } +} + +/// Agent log level +/// +/// Restricted to quiet levels on purpose. Antithesis enforces a per-hour +/// log-output budget per run and `info`/`debug`/`trace` is a whole awful lot of +/// logs. +#[derive(Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "lowercase")] +pub(crate) enum LogLevel { + /// Warnings and above. + Warn, + /// Errors only. + Error, +} + +impl Distribution for StandardUniform { + fn sample(&self, rng: &mut R) -> LogLevel { + match rng.random_range(0..2u8) { + 0 => LogLevel::Warn, + _ => LogLevel::Error, + } + } +} + +/// Tag granularity for origin-detected `DogStatsD` tags. +#[derive(Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "lowercase")] +pub(crate) enum TagCardinality { + /// Low-cardinality objects: clusters, hosts, deployments, images. Agent + /// default. + Low, + /// Orchestrator-level: pod (Kubernetes) or task (ECS/Mesos) cardinality. + Orchestrator, + /// High-cardinality objects: individual containers, request user IDs, etc. + High, +} + +impl Distribution for StandardUniform { + fn sample(&self, rng: &mut R) -> TagCardinality { + match rng.random_range(0..3u8) { + 0 => TagCardinality::Low, + 1 => TagCardinality::Orchestrator, + _ => TagCardinality::High, + } + } +} + +/// The Agent's `DogStatsD` configuration surface. `dogstatsd_socket` is +/// supplied by the environment; the rest are drawn. +/// +/// Numeric fields are drawn with [`Probe`]: usually a typical value (so ADP +/// boots and runs), occasionally a boundary value to probe overflow and +/// wraparound. +#[allow(clippy::struct_field_names, clippy::struct_excessive_bools)] +#[derive(Debug, Serialize)] +pub(crate) struct DogStatsdConfig { + /// Unix socket the server listens on. Supplied by the environment. + dogstatsd_socket: PathBuf, + /// Buffer used to receive statsd packets, in bytes. + dogstatsd_buffer_size: u64, + /// Bytes for the socket receive buffer (`POSIX`); `0` keeps the OS default. + dogstatsd_so_rcvbuf: u64, + /// Packets buffered before flushing to the processing queue. + dogstatsd_packet_buffer_size: u64, + /// Maximum time packets sit in the packet buffer before a flush. + dogstatsd_packet_buffer_flush_timeout: GoDuration, + /// Internal queue size of the server; smaller caps memory but risks packet + /// drops. + dogstatsd_queue_size: u64, + /// Number of processing pipelines. + dogstatsd_pipeline_count: u64, + /// Worker count processing packets; `0` lets the Agent choose. + dogstatsd_workers_count: u64, + /// Seconds a counter is sampled to `0` after its last value before expiring. + dogstatsd_expiry_seconds: DurationSeconds, + /// Seconds a metric context is kept in memory after its last sample. + dogstatsd_context_expiry_seconds: DurationSeconds, + /// Maximum entries in the string interner cache. + dogstatsd_string_interner_size: u64, + /// Max number of metric-mapping results cached by the mapper. + dogstatsd_mapper_cache_size: u64, + /// Max metrics per payload from the no-aggregation pipeline. + dogstatsd_no_aggregation_pipeline_batch_size: u64, + /// Tag granularity for origin-detected tags. + dogstatsd_tag_cardinality: TagCardinality, + /// Listen for non-local UDP traffic (binds `0.0.0.0`). + dogstatsd_non_local_traffic: bool, + /// Tag metrics with container metadata from the Unix socket peer. + dogstatsd_origin_detection: bool, + /// Use a client-provided container ID to enrich metrics. + dogstatsd_origin_detection_client: bool, + /// Let clients opt out of origin detection via cardinality `none`. + dogstatsd_origin_optout_enabled: bool, + /// Collect basic per-metric statistics (count / last seen). + dogstatsd_metrics_stats_enable: bool, + /// When an `Entity-ID` is set, skip origin-detection tag enrichment. + dogstatsd_entity_id_precedence: bool, + /// Enable the no-aggregation pipeline (forward timestamped metrics with + /// tagging only). + dogstatsd_no_aggregation_pipeline: bool, + /// Flush incomplete metric time buckets on shutdown. + dogstatsd_flush_incomplete_buckets: bool, + /// Automatically adjust the number of processing pipelines. + dogstatsd_pipeline_autoadjust: bool, + /// Publish `DogStatsD` internal stats as Go expvars. + dogstatsd_stats_enable: bool, +} + +impl DogStatsdConfig { + /// Draw the `DogStatsD` options from `rng`, taking the socket from the + /// environment. + fn gen(rng: &mut R, dogstatsd_socket: &Path) -> Self { + Self { + dogstatsd_socket: dogstatsd_socket.to_path_buf(), + dogstatsd_buffer_size: Probe.sample(rng), + dogstatsd_so_rcvbuf: Probe.sample(rng), + dogstatsd_packet_buffer_size: Probe.sample(rng), + dogstatsd_packet_buffer_flush_timeout: Probe.sample(rng), + dogstatsd_queue_size: Probe.sample(rng), + dogstatsd_pipeline_count: Probe.sample(rng), + dogstatsd_workers_count: Probe.sample(rng), + dogstatsd_expiry_seconds: Probe.sample(rng), + dogstatsd_context_expiry_seconds: Probe.sample(rng), + dogstatsd_string_interner_size: Probe.sample(rng), + dogstatsd_mapper_cache_size: Probe.sample(rng), + dogstatsd_no_aggregation_pipeline_batch_size: Probe.sample(rng), + dogstatsd_tag_cardinality: rng.random(), + dogstatsd_non_local_traffic: rng.random(), + dogstatsd_origin_detection: rng.random(), + dogstatsd_origin_detection_client: rng.random(), + dogstatsd_origin_optout_enabled: rng.random(), + dogstatsd_metrics_stats_enable: rng.random(), + dogstatsd_entity_id_precedence: rng.random(), + dogstatsd_no_aggregation_pipeline: rng.random(), + dogstatsd_flush_incomplete_buckets: rng.random(), + dogstatsd_pipeline_autoadjust: rng.random(), + dogstatsd_stats_enable: rng.random(), + } + } +} + +/// Agent-facing config. `hostname`, `api_key`, `dd_url`, and the socket are +/// supplied by the environment; `log_level` and the `DogStatsD` options are +/// drawn per branch. The static flags are appended by [`Self::to_yaml`], not +/// fields here. +#[derive(Debug, Serialize)] +pub(crate) struct DatadogConfig { + /// Agent hostname. Supplied by the environment. ADP requires it + /// (`FixedHostProvider`); absent it refuses to boot. + hostname: String, + /// Agent API key. Supplied by the environment. + api_key: String, + /// Metrics intake base URL. Supplied by the environment. + dd_url: String, + /// Agent log verbosity. Drawn; restricted to quiet levels (see [`LogLevel`]). + log_level: LogLevel, + /// `DogStatsD` options, flattened to top-level `dogstatsd_*` keys. + #[serde(flatten)] + dogstatsd: DogStatsdConfig, +} + +impl DatadogConfig { + /// Generate a config: the environmental fields come from the caller, the + /// rest are drawn from `rng`. With an Antithesis-backed rng, each call after + /// the snapshot yields an independent draw per replay branch. + pub(crate) fn gen( + rng: &mut R, hostname: &str, api_key: &str, dd_url: &str, dogstatsd_socket: &Path, + ) -> Self { + Self { + hostname: hostname.to_owned(), + api_key: api_key.to_owned(), + dd_url: dd_url.to_owned(), + log_level: rng.random(), + dogstatsd: DogStatsdConfig::gen(rng, dogstatsd_socket), + } + } + + /// Render `self` as a `datadog.yaml` string, followed by the static-tail + /// flags. + /// + /// # Errors + /// + /// Returns an error if serialization fails. + pub(crate) fn to_yaml(&self) -> anyhow::Result { + let mut yaml = serde_yaml::to_string(self).context("serialize datadog.yaml")?; + yaml.push_str(STATIC_YAML_TAIL); + Ok(yaml) + } +} + +#[cfg(test)] +mod tests { + use rand::rngs::SmallRng; + use rand::SeedableRng as _; + + use super::*; + + #[test] + fn renders_expected_yaml_for_fixed_seed() { + let mut rng = SmallRng::seed_from_u64(0); + let config = DatadogConfig::gen( + &mut rng, + "antithesis-adp", + "antithesis-test-api-key", + "http://intake:2049", + Path::new("/var/run/datadog/dsd.socket"), + ); + let expected = "\ +hostname: antithesis-adp +api_key: antithesis-test-api-key +dd_url: http://intake:2049 +log_level: warn +dogstatsd_socket: /var/run/datadog/dsd.socket +dogstatsd_buffer_size: 672 +dogstatsd_so_rcvbuf: 65535 +dogstatsd_packet_buffer_size: 9223372036854775807 +dogstatsd_packet_buffer_flush_timeout: 82ms +dogstatsd_queue_size: 126 +dogstatsd_pipeline_count: 1 +dogstatsd_workers_count: 3673 +dogstatsd_expiry_seconds: 32766 +dogstatsd_context_expiry_seconds: 219 +dogstatsd_string_interner_size: 48 +dogstatsd_mapper_cache_size: 469 +dogstatsd_no_aggregation_pipeline_batch_size: 54 +dogstatsd_tag_cardinality: orchestrator +dogstatsd_non_local_traffic: true +dogstatsd_origin_detection: false +dogstatsd_origin_detection_client: false +dogstatsd_origin_optout_enabled: false +dogstatsd_metrics_stats_enable: false +dogstatsd_entity_id_precedence: true +dogstatsd_no_aggregation_pipeline: true +dogstatsd_flush_incomplete_buckets: false +dogstatsd_pipeline_autoadjust: true +dogstatsd_stats_enable: false +use_dogstatsd: true +use_v2_api_series: true +inventories_enabled: false +enable_metadata_collection: false +cloud_provider_metadata: [] +"; + assert_eq!(config.to_yaml().expect("render datadog.yaml"), expected); + } +} diff --git a/test/antithesis/harness/src/bin/datadog_yaml_config_gen/main.rs b/test/antithesis/harness/src/bin/datadog_yaml_config_gen/main.rs new file mode 100644 index 0000000000..66e6ba3cae --- /dev/null +++ b/test/antithesis/harness/src/bin/datadog_yaml_config_gen/main.rs @@ -0,0 +1,79 @@ +//! Generates a randomized, valid `datadog.yaml` for the Agent Data Plane (ADP) +//! per Antithesis replay branch. +//! +//! After `setup_complete`, this generates a `datadog.yaml` (see [`config`]) +//! whose `DogStatsD` options are drawn with Antithesis SDK randomness, writes it +//! to the shared `agent-config` volume, and touches a `ready` sentinel. ADP's +//! boot wrapper (`deploy/adp/entrypoint.sh`) copies the yaml into place and +//! execs the agent. +//! +//! The draw runs *after* the snapshot boundary so each Antithesis replay branch +//! gets a different config and ADP boots fresh with it. Deployment-specific +//! fields come from the environment (see [`Cli`]). + +mod config; + +use std::fs; +use std::path::PathBuf; + +use antithesis_sdk::prelude::*; +use antithesis_sdk::random::AntithesisRng; +use anyhow::Context as _; +use clap::Parser; +use config::DatadogConfig; +use rand::rand_core::UnwrapErr; +use serde_json::json; + +/// Deployment inputs, sourced from the environment (or flags). +#[derive(Debug, Parser)] +#[command(name = "datadog-yaml-config-gen")] +struct Cli { + /// Directory to write `datadog.yaml` and the `ready` sentinel into. + #[arg(long, env = "CONFIG_DIR", default_value = "/agent-config")] + config_dir: PathBuf, + /// Agent hostname written into the config. (`DD_HOSTNAME`, not the ambient + /// `HOSTNAME`, so a container's own hostname does not leak in.) + #[arg(long, env = "DD_HOSTNAME", default_value = "antithesis-adp")] + hostname: String, + /// Agent API key written into the config. + #[arg(long, env = "API_KEY", default_value = "antithesis-test-api-key")] + api_key: String, + /// Metrics intake base URL. + #[arg(long, env = "DD_URL", default_value = "http://intake:2049")] + dd_url: String, + /// `DogStatsD` unix datagram socket path. + #[arg(long, env = "DOGSTATSD_SOCKET", default_value = "/var/run/datadog/dsd.socket")] + dogstatsd_socket: PathBuf, +} + +fn main() -> anyhow::Result<()> { + antithesis_init(); + let cli = Cli::parse(); + + // Snapshot boundary. The draw below must happen AFTER this so each replay + // branch generates an independent config. + lifecycle::setup_complete(&json!({ "component": "datadog-yaml-config-gen" })); + + fs::create_dir_all(&cli.config_dir) + .with_context(|| format!("create agent config dir {}", cli.config_dir.display()))?; + + let mut rng = UnwrapErr(AntithesisRng); + let config = DatadogConfig::gen( + &mut rng, + &cli.hostname, + &cli.api_key, + &cli.dd_url, + &cli.dogstatsd_socket, + ); + + let yaml_path = cli.config_dir.join("datadog.yaml"); + fs::write(&yaml_path, config.to_yaml()?.as_bytes()) + .with_context(|| format!("write agent config {}", yaml_path.display()))?; + + let details = serde_json::to_value(&config).unwrap_or_else(|_| json!({})); + assert_reachable!("datadog_yaml_config_gen.datadog_yaml_generated", &details); + + let ready_path = cli.config_dir.join("ready"); + fs::write(&ready_path, b"ready\n").with_context(|| format!("write sentinel {}", ready_path.display()))?; + Ok(()) +} diff --git a/test/antithesis/harness/src/lib.rs b/test/antithesis/harness/src/lib.rs new file mode 100644 index 0000000000..8c05e117db --- /dev/null +++ b/test/antithesis/harness/src/lib.rs @@ -0,0 +1,4 @@ +//! Shared helpers for the Antithesis harness, used by the `src/bin/*` test +//! commands. + +pub mod rand; diff --git a/test/antithesis/harness/src/rand.rs b/test/antithesis/harness/src/rand.rs new file mode 100644 index 0000000000..ceaf2e6ecd --- /dev/null +++ b/test/antithesis/harness/src/rand.rs @@ -0,0 +1,72 @@ +//! Randomness utilities. + +use rand::distr::Distribution; +use rand::seq::IndexedRandom as _; +use rand::{Rng, RngExt}; +use rand_distr::LogNormal; + +/// Boundary values for the u64 field. +const BOUNDARIES: &[u64] = &[ + 0, + 1, + i8::MAX as u64 - 1, + i8::MAX as u64, + i8::MAX as u64 + 1, + u8::MAX as u64 - 1, + u8::MAX as u64, + u8::MAX as u64 + 1, + i16::MAX as u64 - 1, + i16::MAX as u64, + i16::MAX as u64 + 1, + u16::MAX as u64 - 1, + u16::MAX as u64, + u16::MAX as u64 + 1, + i32::MAX as u64 - 1, + i32::MAX as u64, + i32::MAX as u64 + 1, + u32::MAX as u64 - 1, + u32::MAX as u64, + u32::MAX as u64 + 1, + i64::MAX as u64 - 1, + i64::MAX as u64, + i64::MAX as u64 + 1, + u64::MAX - 1, + u64::MAX, +]; + +/// Produces `u64` values that are generally 'normal' and with some being +/// boundary values. +#[derive(Debug, Clone, Copy)] +pub struct Probe; + +impl Distribution for Probe { + fn sample(&self, rng: &mut R) -> u64 { + if rng.random_ratio(1, 8) { + BOUNDARIES.choose(rng).copied().unwrap_or(0) + } else { + typical(rng) + } + } +} + +/// Typical draw: a [`LogNormal`] (median `1024`, sigma `4`) — a hump near the +/// median with a heavy tail spanning many orders of magnitude. The float draw is +/// rounded to the nearest integer (standard library); a value beyond `u64` +/// saturates to `u64::MAX`. [`num_traits::cast`] is range-checked, so the +/// conversion is not a lossy `as`. +/// +/// Approximate probability of a typical draw landing in each range: +/// +/// | Value range | Probability | +/// |------------------------|-------------| +/// | `<= 16` | ~15% | +/// | `16 ..= 256` | ~21% | +/// | `256 ..= 1_024` | ~14% | +/// | `1_024 ..= 4_096` | ~14% | +/// | `4_096 ..= 65_536` | ~22% | +/// | `65_536 ..= 1_048_576` | ~11% | +/// | `> 1_048_576` | ~4% | +fn typical(rng: &mut R) -> u64 { + let dist = LogNormal::new(1024.0_f64.ln(), 4.0).expect("median > 0 and sigma >= 0"); + num_traits::cast::(dist.sample(rng).round()).unwrap_or(u64::MAX) +}