diff --git a/.dockerignore b/.dockerignore index a901b5abf17..ab596ebc934 100644 --- a/.dockerignore +++ b/.dockerignore @@ -8,6 +8,7 @@ node_modules/ target/ **/fuzz/target test/ +!test/antithesis/ .gitignore .gitlab-ci.yml CONTRIBUTING.md diff --git a/Cargo.lock b/Cargo.lock index 8f5dbc3b1e6..0fc68028370 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,8 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" name = "agent-data-plane" version = "1.2.0" dependencies = [ + "antithesis-instrumentation", + "antithesis_sdk", "argh", "async-trait", "bytesize", @@ -124,6 +126,31 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" +[[package]] +name = "antithesis-instrumentation" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb6b548668212c6d3a942a4ac7be13bc4fc25715bf661328438f30e3fd342cf0" +dependencies = [ + "cc", +] + +[[package]] +name = "antithesis_sdk" +version = "0.2.8" +source = "git+https://github.com/antithesishq/antithesis-sdk-rust?rev=6829a946e7e970cc743ffe17c3cee7d2bc25425a#6829a946e7e970cc743ffe17c3cee7d2bc25425a" +dependencies = [ + "libc", + "libloading", + "linkme", + "once_cell", + "rand 0.8.6", + "rand_core 0.10.1", + "rustc_version_runtime", + "serde", + "serde_json", +] + [[package]] name = "anyhow" version = "1.0.102" @@ -702,6 +729,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -714,6 +742,18 @@ dependencies = [ "clap_lex", ] +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "clap_lex" version = "1.1.0" @@ -1631,6 +1671,17 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "harness" +version = "0.1.0" +dependencies = [ + "antithesis_sdk", + "anyhow", + "clap", + "rand 0.10.1", + "serde_json", +] + [[package]] name = "hash32" version = "0.3.1" @@ -2452,6 +2503,26 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +[[package]] +name = "linkme" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83272d46373fb8decca684579ac3e7c8f3d71d4cc3aa693df8759e260ae41cf" +dependencies = [ + "linkme-impl", +] + +[[package]] +name = "linkme-impl" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32d59e20403c7d08fe62b4376edfe5c7fb2ef1e6b1465379686d0f21c8df444b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -3873,6 +3944,16 @@ dependencies = [ "semver", ] +[[package]] +name = "rustc_version_runtime" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dd18cd2bae1820af0b6ad5e54f4a51d0f3fcc53b05f845675074efcc7af071d" +dependencies = [ + "rustc_version", + "semver", +] + [[package]] name = "rusticata-macros" version = "4.1.0" diff --git a/Cargo.toml b/Cargo.toml index b2ab748ced0..58a24749f50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ "lib/saluki-metrics", "lib/saluki-tls", "lib/stringtheory", + "test/antithesis/harness", ] resolver = "2" @@ -66,6 +67,7 @@ async-trait = { version = "0.1", default-features = false } atty = { version = "0.2", default-features = false } axum = { version = "0.8", default-features = false } bytes = { version = "1", default-features = false } +clap = { version = "4", default-features = false, features = [] } protobuf = { version = "3.7", default-features = false, features = [ "with-bytes", ] } @@ -223,6 +225,8 @@ chumsky = { version = "0.13", default-features = false } logos = { version = "0.16", default-features = false } lru-slab = { version = "0.1.2", default-features = false } hickory-resolver = { version = "0.26", default-features = false } +antithesis-instrumentation = { version = "0.1" } +antithesis_sdk = { git = "https://github.com/antithesishq/antithesis-sdk-rust", rev = "6829a946e7e970cc743ffe17c3cee7d2bc25425a", default-features = false } # 0.2.8 is pinned to rand 0.8, rev version allows us to select workspace rand [patch.crates-io] # Forked version of `hyper-http-proxy` that removes an unused dependency on `rustls-native-certs`, which transitively depends diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 6490abcd2b5..1d9925d94ac 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -7,6 +7,8 @@ allocator-api2,https://github.com/zakarumych/allocator-api2,MIT OR Apache-2.0,Za android_system_properties,https://github.com/nical/android_system_properties,MIT OR Apache-2.0,Nicolas Silva anes,https://github.com/zrzka/anes-rs,MIT OR Apache-2.0,Robert Vojta anstyle,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The anstyle Authors +antithesis-instrumentation,https://github.com/antithesishq/antithesis-instrumentation-rust,MIT,The antithesis-instrumentation Authors +antithesis_sdk,https://github.com/antithesishq/antithesis-sdk-rust,MIT,The antithesis_sdk Authors anyhow,https://github.com/dtolnay/anyhow,MIT OR Apache-2.0,David Tolnay arc-swap,https://github.com/vorner/arc-swap,MIT OR Apache-2.0,Michal 'vorner' Vaner argh,https://github.com/google/argh,BSD-3-Clause,"Taylor Cramer , Benjamin Brittain , Erick Tryzelaar " @@ -58,6 +60,7 @@ ciborium-ll,https://github.com/enarx/ciborium,Apache-2.0,Nathaniel McCallum clap,https://github.com/clap-rs/clap,MIT OR Apache-2.0,The clap Authors clap_builder,https://github.com/clap-rs/clap,MIT OR Apache-2.0,The clap_builder Authors +clap_derive,https://github.com/clap-rs/clap,MIT OR Apache-2.0,The clap_derive Authors clap_lex,https://github.com/clap-rs/clap,MIT OR Apache-2.0,The clap_lex Authors colored,https://github.com/mackwic/colored,MPL-2.0,Thomas Wickham combine,https://github.com/Marwes/combine,MIT,Markus Westerlind @@ -196,6 +199,8 @@ leb128fmt,https://github.com/bluk/leb128fmt,MIT OR Apache-2.0,Bryant Luk libm,https://github.com/rust-lang/compiler-builtins,MIT,"Alex Crichton , Amanieu d'Antras , Jorge Aparicio , Trevor Gross " +linkme,https://github.com/dtolnay/linkme,MIT OR Apache-2.0,David Tolnay +linkme-impl,https://github.com/dtolnay/linkme,MIT OR Apache-2.0,David Tolnay linux-raw-sys,https://github.com/sunfishcode/linux-raw-sys,Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT,Dan Gohman litemap,https://github.com/unicode-org/icu4x,Unicode-3.0,The ICU4X Project Developers litrs,https://github.com/LukasKalbertodt/litrs,MIT OR Apache-2.0,Lukas Kalbertodt @@ -316,6 +321,8 @@ rust_decimal,https://github.com/paupino/rust-decimal,MIT,Paul Mason rustc-hash,https://github.com/rust-lang-nursery/rustc-hash,Apache-2.0 OR MIT,The Rust Project Developers rustc-hash,https://github.com/rust-lang/rustc-hash,Apache-2.0 OR MIT,The Rust Project Developers +rustc_version,https://github.com/djc/rustc-version-rs,MIT OR Apache-2.0,The rustc_version Authors +rustc_version_runtime,https://github.com/seppo0010/rustc-version-runtime-rs,MIT,Sebastian Waisbrot rusticata-macros,https://github.com/rusticata/rusticata-macros,MIT OR Apache-2.0,Pierre Chifflier rustix,https://github.com/bytecodealliance/rustix,Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT,"Dan Gohman , Jakub Konka " rustls,https://github.com/rustls/rustls,Apache-2.0 OR ISC OR MIT,The rustls Authors diff --git a/Makefile b/Makefile index 01f06e5643d..6484145bc7e 100644 --- a/Makefile +++ b/Makefile @@ -312,7 +312,7 @@ run-adp-standalone: build-adp create-dummy-agent-config create-dummy-ipc-cert run-adp-standalone: ## Runs ADP locally in standalone mode (debug) @echo "[*] Running ADP..." @DD_DATA_PLANE_STANDALONE_MODE=true DD_DATA_PLANE_DOGSTATSD_ENABLED=true \ - DD_API_KEY=api-key-adp-standalone DD_HOSTNAME=adp-standalone \ + DD_API_KEY=api-key-adp-standalone DD_HOSTNAME=adp-standalone \ DD_DOGSTATSD_PORT=9191 DD_DOGSTATSD_SOCKET=/tmp/adp-dogstatsd-dgram.sock DD_DOGSTATSD_STREAM_SOCKET=/tmp/adp-dogstatsd-stream.sock \ DD_IPC_CERT_FILE_PATH=$(ADP_STANDALONE_IPC_CERT_FILE) \ target/devel/agent-data-plane --config /tmp/adp-empty-config.yaml run @@ -675,6 +675,28 @@ endif @echo "[*] Ensuring Miri is setup..." @cargo +nightly-2025-06-16 miri setup +##@ Antithesis + +ANTITHESIS_CONFIG_DIR := test/antithesis/deploy +ANTITHESIS_COMPOSE_FILE := $(ANTITHESIS_CONFIG_DIR)/docker-compose.yaml + +.PHONY: check-antithesis-tools +check-antithesis-tools: +ifeq ($(shell command -v snouty >/dev/null || echo not-found), not-found) + $(error "snouty must be present to validate the Antithesis harness, see https://github.com/antithesishq/snouty") +endif + +.PHONY: antithesis-build +antithesis-build: ## Builds the Antithesis harness container images + @echo "[*] Building Antithesis harness images..." + @docker compose -f $(ANTITHESIS_COMPOSE_FILE) build + +.PHONY: antithesis-validate +antithesis-validate: check-antithesis-tools antithesis-build +antithesis-validate: ## Validates the Antithesis harness: builds images, runs 'snouty validate' + @echo "[*] Validating Antithesis harness with snouty..." + @snouty validate $(ANTITHESIS_CONFIG_DIR) + ##@ Profiling .PHONY: profile-run-blackhole diff --git a/bin/agent-data-plane/Cargo.toml b/bin/agent-data-plane/Cargo.toml index 57eb9d20464..7b479635d4e 100644 --- a/bin/agent-data-plane/Cargo.toml +++ b/bin/agent-data-plane/Cargo.toml @@ -11,8 +11,11 @@ workspace = true [features] default = [] fips = ["saluki-app/tls-fips", "saluki-components/fips"] +antithesis = ["dep:antithesis_sdk", "antithesis_sdk/full", "dep:antithesis-instrumentation"] [dependencies] +antithesis-instrumentation = { workspace = true, optional = true } +antithesis_sdk = { workspace = true, optional = true } argh = { workspace = true, features = ["help"] } async-trait = { workspace = true } bytesize = { workspace = true } diff --git a/bin/agent-data-plane/src/main.rs b/bin/agent-data-plane/src/main.rs index 7f1940ee118..25864a1e89a 100644 --- a/bin/agent-data-plane/src/main.rs +++ b/bin/agent-data-plane/src/main.rs @@ -7,6 +7,11 @@ #![deny(missing_docs)] use std::time::Instant; +// Pull in the Antithesis coverage-instrumentation runtime shim only when +// building for antithesis. Load-baring: equired to avoid the shim being dropped +// as unused. +#[cfg(feature = "antithesis")] +use antithesis_instrumentation as _; use datadog_agent_commons::platform::PlatformSettings; use metrics::Level; use saluki_app::bootstrap::{AppBootstrapper, Bootstrap, BootstrapGuard}; @@ -39,6 +44,12 @@ static ALLOC: resource_accounting::TrackingAllocator = #[tokio::main] async fn main() -> Result<(), GenericError> { let started = Instant::now(); + + // Initialize the Antithesis SDK as early as possible so assertions and lifecycle hooks register + // their catalog before any are evaluated. No-op outside Antithesis and absent in production builds. + #[cfg(feature = "antithesis")] + antithesis_sdk::antithesis_init(); + let cli: Cli = argh::from_env(); // Print version and exit early without requiring config. @@ -83,6 +94,11 @@ async fn main() -> Result<(), GenericError> { .await .error_context("Failed to complete bootstrap phase.")?; + // Bootstrap-integration probe: proves the Antithesis SDK is linked, cataloging works, and the + // instrumentation path is wired. + #[cfg(feature = "antithesis")] + antithesis_sdk::assert_reachable!("agent-data-plane completed bootstrap", &serde_json::json!({})); + // Run the given subcommand. The bootstrap supervisor is forwarded by value; only the long-lived `run` // subcommand actually drives it (it is added as a child of the internal supervisor inside // `handle_run_command`). All other subcommands drop it on entry. diff --git a/test/antithesis/AGENTS.md b/test/antithesis/AGENTS.md new file mode 100644 index 00000000000..b8bebf772bd --- /dev/null +++ b/test/antithesis/AGENTS.md @@ -0,0 +1,83 @@ +This directory contains files relevant to running tests in Antithesis. + +# Skills + +Use the `antithesis-setup` skill to scaffold and manage this directory. Use the +`antithesis-research` skill to analyze the system and build a property +catalog. Use the `antithesis-workload` skill to implement assertions and test +commands. Use the `antithesis-launch` skill to build, validate, and submit +Antithesis runs — do not run `snouty launch` directly. + +**snouty launch** + +Use `snouty launch --json --webhook basic_test --config test/antithesis/deploy` +to start an Antithesis run. Always run `compose build` first to ensure images +are up to date. + +**snouty validate** + +Use this command to quickly validate changes to the Antithesis scaffolding. See +`snouty validate --help` for details. + +**setup-complete.sh** + +Inject this script into a Dockerfile to notify Antithesis that setup is +complete. This script should only run once the system under test is ready for +testing. Antithesis will not run any test commands until it receives this event. + +**Directory layout** + +- `harness/` — the harness Rust crate (`harness`), a member of the + repository-root workspace. `src/lib.rs` holds shared helpers; each + `src/bin/*.rs` is an Antithesis test command named after its file. Run cargo + from this directory; it is built, fmt'd, Clippy'd, and tested from the repo + root via the usual `make check-all` / `make test`. +- `deploy/` — all Antithesis/Docker infrastructure: the `Dockerfile`, + `docker-compose.yaml`, and per-container build inputs grouped by service + (`deploy/adp/`, `deploy/workload/`). This is the directory snouty consumes as + `--config`; it contains `docker-compose.yaml` at its top. Snouty will push + tagged images, consume this directory, and launch the run. + +**scratchbook** + +This directory is the Antithesis scratchbook for the codebase. It contains +documents such as system analysis, property catalogs, topology plans, +per-property evidence files (in `scratchbook/properties/`), property +relationship maps, and other persistent integration notes. Keep it up to date as +Antithesis-related decisions change. + +**test templates** (`deploy/workload/test/`) + +This directory contains test templates. A test template is a directory +containing test command executable files. Each test command must have a valid +prefix: `parallel_driver_, singleton_driver_, serial_driver_, first_, +eventually_, finally_, anytime_`. Prefixes constrain when and how commands are +composed in a single timeline. Files or subdirectories prefixed with `helper_` +are ignored by Antithesis and can be used for helper scripts kept alongside the +commands. + +# Agent Behavior + +Agent behavior will be governed by the following dictums: + +- **The human is primary.** If you run into any confusion, pause and ask for + clarification. +- When you are faced with a choice between doing the right, time-consuming thing + or the wrong, fast thing do the right thing. +- Code is liability. The status quo is not worth preserving if it does not have + utility. Be unsentimental and delete what is not needed. +- **Truth over comfort.** Say what is true regardless of the presumed comfort of + the receiver. Do not soften findings, hedge claims or omit bad news. To do so + is _not kindness_. It is, rather, an insidious form of lie. Note that this + dictum should be understood less in terms of Kim Scott's "Radical Candor" -- a + gift from the elite to the undeserving common -- but more in Walter + Brueggemann's "Prophetic Imagination" where truth erodes a "royal + consciousness" that ablates one's ability to do new and interesting things + _and_ shouts a path toward those new and interesting things, against the + status quo. Consider in this same vein Tony Hoare's "The Emperor's Old + Clothes". +- **Honor the spirit of a request, not just its letter.** A "random string + pool" requires actual variation. Returning `["foo", "bar"]` is technically + a pool but a semantic mismatch. When the literal reading is unusually + narrow or cheap, reach for the generous reading. Hostile compliance is + worse than asking. diff --git a/test/antithesis/README.md b/test/antithesis/README.md new file mode 100644 index 00000000000..edb64d4424e --- /dev/null +++ b/test/antithesis/README.md @@ -0,0 +1,19 @@ +# Antithesis Tests + +This directory contains a sub-project to run Antithesis tests for the saluki +project. Primary focus is on establishing that ADP and DogStatsD behave +'equivalently', which is to say if ADP is dropped in for Datadog Agent's +DogStatsD users will not notice shifts in their telemetry. Better operational +behavior is acceptable deviation. + +## Prerequisites + +* snouty -- https://github.com/antithesishq/snouty +* antithesis-skills + claude -- https://github.com/antithesishq/antithesis-skills + +## Running Scenarios + +This effort is extremely early. Today we assume claude drives scenarios runs, +command it to do so with `/antithesis-launch`. In order for this to work you +must already have credentials available. Eventually we will have CI rigged up to +do nightly shots. diff --git a/test/antithesis/deploy/Dockerfile b/test/antithesis/deploy/Dockerfile new file mode 100644 index 00000000000..d8fe97108dd --- /dev/null +++ b/test/antithesis/deploy/Dockerfile @@ -0,0 +1,134 @@ +# syntax=docker/dockerfile:1.24@sha256:87999aa3d42bdc6bea60565083ee17e86d1f3339802f543c0d03998580f9cb89 +# +# Antithesis harness image for Agent Data Plane (ADP). +# +# Build context is the repository root. Three named targets: +# - adp : agent-data-plane built WITH Antithesis coverage instrumentation + SDK (the SUT) +# - intake : datadog-intake mock Datadog intake (dependency) +# - workload : millstone load generator + test templates + setup-complete (the client) +# +# ADP is built native x86_64-unknown-linux-gnu (glibc), so no musl cross-compile headers are needed. + +ARG BUILD_IMAGE=ubuntu:24.04@sha256:c4a8d5503dfb2a3eb8ab5f807da5bc69a85730fb49b5cfca2330194ebcc41c7b +ARG APP_IMAGE=ubuntu:24.04@sha256:c4a8d5503dfb2a3eb8ab5f807da5bc69a85730fb49b5cfca2330194ebcc41c7b + +# --------------------------------------------------------------------------- +# Shared build environment: Rust toolchain + native build dependencies. +# --------------------------------------------------------------------------- +FROM ${BUILD_IMAGE} AS build-base +ENV DEBIAN_FRONTEND=noninteractive \ + NO_COLOR=1 \ + CARGO_TERM_COLOR=never +RUN apt-get update && \ + apt-get install --no-install-recommends -y \ + build-essential ca-certificates make cmake gcc g++ perl protobuf-compiler curl unzip rustup && \ + rm -rf /var/lib/apt/lists/* +RUN rustup set profile minimal +ENV PATH="/root/.cargo/bin:${PATH}" +# Pre-install the pinned toolchain (cache key is just rust-toolchain.toml). +RUN --mount=type=bind,source=rust-toolchain.toml,target=/tmp/rust-toolchain.toml \ + cd /tmp && rustup show active-toolchain + +# --------------------------------------------------------------------------- +# Build the instrumented Agent Data Plane. +# +# Coverage instrumentation uses the modern Antithesis Rust flow (post-2026-05-22): the +# `antithesis-instrumentation` crate (referenced once in main.rs behind the `antithesis` feature) +# provides the runtime shim, and these RUSTFLAGS enable LLVM sancov coverage. `--build-id` is +# required for symbolization; the release profile sets `debug = true`, so the binary keeps DWARF +# for /symbols. LTO is disabled to keep sancov instrumentation predictable. +# --------------------------------------------------------------------------- +FROM build-base AS adp-builder +ENV APP_FULL_NAME="Agent Data Plane" \ + APP_SHORT_NAME="agent-data-plane" \ + APP_IDENTIFIER="adp" \ + CARGO_PROFILE_RELEASE_LTO=off +WORKDIR /adp +COPY . /adp +# The sancov RUSTFLAGS are passed via `--config target..rustflags` with an explicit +# `--target`, NOT via the RUSTFLAGS env var. With an explicit target, Cargo builds host artifacts +# (build scripts, proc-macros) for the host and does NOT apply the target rustflags to them — so they +# are not instrumented and link cleanly. Using the RUSTFLAGS env var instead instruments build +# scripts too, which then fail to link (`undefined symbol: __sanitizer_cov_trace_pc_guard_init`). +RUN --mount=type=cache,target=/adp/target,id=antithesis-adp-target \ + --mount=type=cache,target=/root/.cargo/registry,id=cargo-registry \ + --mount=type=cache,target=/root/.cargo/git,id=cargo-git \ + cargo build --release --package agent-data-plane --features antithesis \ + --target x86_64-unknown-linux-gnu \ + --config 'target.x86_64-unknown-linux-gnu.rustflags=["--cfg","tokio_unstable","-Ccodegen-units=1","-Cpasses=sancov-module","-Cllvm-args=-sanitizer-coverage-level=3","-Cllvm-args=-sanitizer-coverage-trace-pc-guard","-Clink-args=-Wl,--build-id"]' && \ + cp /adp/target/x86_64-unknown-linux-gnu/release/agent-data-plane /usr/local/bin/agent-data-plane && \ + echo "Validating Antithesis instrumentation symbols..." && \ + nm /usr/local/bin/agent-data-plane | grep -q "antithesis_load_libvoidstar" && \ + nm /usr/local/bin/agent-data-plane | grep -q "sanitizer_cov_trace_pc_guard" && \ + echo "Instrumentation symbols present." + +# --------------------------------------------------------------------------- +# Build the correctness tools (datadog-intake + millstone), uninstrumented. +# These are supporting harness components, not the SUT, so they need no coverage instrumentation. +# --------------------------------------------------------------------------- +FROM build-base AS tools-builder +WORKDIR /tools +COPY . /tools +RUN --mount=type=cache,target=/tools/target,id=antithesis-tools-target \ + --mount=type=cache,target=/root/.cargo/registry,id=cargo-registry \ + --mount=type=cache,target=/root/.cargo/git,id=cargo-git \ + cargo build --release \ + --bin datadog-intake --bin millstone \ + --bin parallel_driver_send_dogstatsd --bin finally_verify_delivery && \ + cp /tools/target/release/datadog-intake /usr/local/bin/datadog-intake && \ + cp /tools/target/release/millstone /usr/local/bin/millstone && \ + cp /tools/target/release/parallel_driver_send_dogstatsd /usr/local/bin/parallel_driver_send_dogstatsd && \ + cp /tools/target/release/finally_verify_delivery /usr/local/bin/finally_verify_delivery + +# --------------------------------------------------------------------------- +# Runtime: Agent Data Plane (SUT). +# --------------------------------------------------------------------------- +FROM ${APP_IMAGE} AS adp +ENV NO_COLOR=1 \ + RUST_BACKTRACE=1 +RUN apt-get update && \ + apt-get install --no-install-recommends -y ca-certificates openssl && \ + rm -rf /var/lib/apt/lists/* +COPY --from=adp-builder /usr/local/bin/agent-data-plane /usr/local/bin/agent-data-plane +# Expose DWARF/build-id symbols to Antithesis for symbolization (one-hop symlink to the unstripped binary). +RUN mkdir -p /symbols && ln -s /usr/local/bin/agent-data-plane /symbols/agent-data-plane +# main.rs requires the bootstrap config file to exist at the default path; ship a minimal standalone config. +COPY test/antithesis/deploy/adp/datadog.yaml /etc/datadog-agent/datadog.yaml +# ADP's control-plane secure API requires an IPC TLS cert (a single PEM holding both certificate and +# private key) that the Core Agent normally generates. In standalone mode there is no Core Agent, so +# generate a self-signed cert+key. An empty auth_token satisfies the IPC auth config at startup. +RUN openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \ + -subj "/CN=agent-data-plane" \ + -keyout /tmp/ipc_key.pem -out /tmp/ipc_cert.pem && \ + cat /tmp/ipc_cert.pem /tmp/ipc_key.pem > /etc/datadog-agent/ipc_cert.pem && \ + rm -f /tmp/ipc_cert.pem /tmp/ipc_key.pem && \ + touch /etc/datadog-agent/auth_token +ENTRYPOINT ["/usr/local/bin/agent-data-plane"] +CMD ["run"] + +# --------------------------------------------------------------------------- +# Runtime: datadog-intake (mock Datadog intake dependency). +# --------------------------------------------------------------------------- +FROM ${APP_IMAGE} AS intake +ENV NO_COLOR=1 +COPY --from=tools-builder /usr/local/bin/datadog-intake /usr/local/bin/datadog-intake +ENTRYPOINT ["/usr/local/bin/datadog-intake"] + +# --------------------------------------------------------------------------- +# Runtime: workload client (millstone load generator + test templates). +# --------------------------------------------------------------------------- +FROM ${APP_IMAGE} AS workload +ENV NO_COLOR=1 +RUN test -d /usr/share/ca-certificates || ( \ + apt-get update && \ + apt-get install --no-install-recommends -y ca-certificates && \ + rm -rf /var/lib/apt/lists/* ) +COPY --from=tools-builder /usr/local/bin/millstone /usr/local/bin/millstone +# Antithesis setup-complete helper and test templates (helper files + the "main" template dir). +COPY --chmod=755 test/antithesis/deploy/workload/setup-complete.sh /opt/antithesis/setup-complete.sh +COPY test/antithesis/deploy/workload/test/ /opt/antithesis/test/ +# Inject the compiled test-command binaries into the "main" test template. +COPY --from=tools-builder --chmod=755 /usr/local/bin/parallel_driver_send_dogstatsd /opt/antithesis/test/v1/main/parallel_driver_send_dogstatsd +COPY --from=tools-builder --chmod=755 /usr/local/bin/finally_verify_delivery /opt/antithesis/test/v1/main/finally_verify_delivery +COPY --chmod=755 test/antithesis/deploy/workload/entrypoint.sh /entrypoint.sh +ENTRYPOINT ["/entrypoint.sh"] diff --git a/test/antithesis/deploy/adp/datadog.yaml b/test/antithesis/deploy/adp/datadog.yaml new file mode 100644 index 00000000000..2db76088f6f --- /dev/null +++ b/test/antithesis/deploy/adp/datadog.yaml @@ -0,0 +1,14 @@ +# Minimal Agent Data Plane configuration for the Antithesis harness. +# +# /etc/datadog-agent/datadog.yaml is baked into the `adp` image. DD_* +# environment variables are in docker-compose.yaml. + +hostname: "antithesis-adp" +dd_url: "http://intake:2049" + +dogstatsd_port: 0 +dogstatsd_socket: "/var/run/datadog/dsd.socket" +dogstatsd_non_local_traffic: false +dogstatsd_origin_detection: false + +dogstatsd_workers_count: 1 diff --git a/test/antithesis/deploy/docker-compose.yaml b/test/antithesis/deploy/docker-compose.yaml new file mode 100644 index 00000000000..335c5b9b1e2 --- /dev/null +++ b/test/antithesis/deploy/docker-compose.yaml @@ -0,0 +1,80 @@ +name: saluki + +services: + intake: + container_name: intake + hostname: intake + platform: linux/amd64 + init: true + build: + context: ../../.. + dockerfile: test/antithesis/deploy/Dockerfile + target: intake + image: intake:latest + environment: + NO_COLOR: "1" + healthcheck: + # datadog-intake serves HTTP on :2049. /dev/tcp avoids needing curl in the image. + test: ["CMD-SHELL", "bash -c 'exec 3<>/dev/tcp/localhost/2049'"] + interval: 2s + timeout: 2s + retries: 30 + + adp: + container_name: adp + hostname: adp + platform: linux/amd64 + init: true + build: + context: ../../.. + dockerfile: test/antithesis/deploy/Dockerfile + target: adp + image: adp:latest + command: ["run"] + environment: + NO_COLOR: "1" + RUST_BACKTRACE: "1" + DD_API_KEY: "antithesis-test-api-key" + DD_DATA_PLANE_ENABLED: "true" + DD_DATA_PLANE_STANDALONE_MODE: "true" + DD_DATA_PLANE_DOGSTATSD_ENABLED: "true" + volumes: + - dogstatsd-socket:/var/run/datadog + depends_on: + intake: + condition: service_healthy + healthcheck: + # ADP's unprivileged API listens on TCP :5100 once the internal supervisor is up. + test: ["CMD-SHELL", "bash -c 'exec 3<>/dev/tcp/localhost/5100'"] + interval: 2s + timeout: 2s + retries: 60 + + workload: + container_name: workload + hostname: workload + platform: linux/amd64 + init: true + build: + context: ../../.. + dockerfile: test/antithesis/deploy/Dockerfile + target: workload + image: workload:latest + environment: + NO_COLOR: "1" + ADP_HOST: "adp" + ADP_API_PORT: "5100" + DSD_SOCKET: "/var/run/datadog/dsd.socket" + INTAKE_ADDR: "intake:2049" + INTAKE_HOST: "intake" + INTAKE_PORT: "2049" + volumes: + - dogstatsd-socket:/var/run/datadog + depends_on: + adp: + condition: service_healthy + intake: + condition: service_healthy + +volumes: + dogstatsd-socket: diff --git a/test/antithesis/deploy/workload/entrypoint.sh b/test/antithesis/deploy/workload/entrypoint.sh new file mode 100644 index 00000000000..3ff5e46908b --- /dev/null +++ b/test/antithesis/deploy/workload/entrypoint.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Workload client entrypoint. +# +# By the time this runs, docker-compose has gated startup on the `adp` and `intake` services being +# healthy (depends_on: condition: service_healthy). We re-confirm reachability defensively, emit the +# Antithesis `setup_complete` signal, then idle so Antithesis can run test commands from the test +# template at /opt/antithesis/test/v1/. + +ADP_HOST="${ADP_HOST:-adp}" +ADP_API_PORT="${ADP_API_PORT:-5100}" +DSD_SOCKET="${DSD_SOCKET:-/var/run/datadog/dsd.socket}" +INTAKE_HOST="${INTAKE_HOST:-intake}" +INTAKE_PORT="${INTAKE_PORT:-2049}" + +wait_for_tcp() { + local host="$1" port="$2" name="$3" tries=60 + echo "Waiting for ${name} (${host}:${port})..." + while (( tries-- > 0 )); do + if (exec 3<>"/dev/tcp/${host}/${port}") 2>/dev/null; then + echo "${name} is reachable." + return 0 + fi + sleep 1 + done + echo "Timed out waiting for ${name} (${host}:${port})." >&2 + return 1 +} + +wait_for_socket() { + local path="$1" name="$2" tries=60 + echo "Waiting for ${name} (${path})..." + while (( tries-- > 0 )); do + if [[ -S "${path}" ]]; then + echo "${name} is reachable." + return 0 + fi + sleep 1 + done + echo "Timed out waiting for ${name} (${path})." >&2 + return 1 +} + +wait_for_tcp "${ADP_HOST}" "${ADP_API_PORT}" "agent-data-plane API" +wait_for_socket "${DSD_SOCKET}" "agent-data-plane DogStatsD socket" +wait_for_tcp "${INTAKE_HOST}" "${INTAKE_PORT}" "datadog-intake" + +echo "System is ready. Emitting setup_complete." +/opt/antithesis/setup-complete.sh + +echo "Workload client idle; awaiting Antithesis test commands." +exec tail -f /dev/null diff --git a/test/antithesis/deploy/workload/setup-complete.sh b/test/antithesis/deploy/workload/setup-complete.sh new file mode 100755 index 00000000000..23232da77e2 --- /dev/null +++ b/test/antithesis/deploy/workload/setup-complete.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Run this script to inform Antithesis that it can start running test commands. +# You can also use the Antithesis SDK to emit setup-complete from your system if +# that is easier. +# +# Antithesis sets the `ANTITHESIS_OUTPUT_DIR` environment variable +# automatically. This script is setup to emit `setup_complete` to the +# `sdk.jsonl` file in that directory. + +OUTPUT_PATH="/tmp/antithesis_sdk.jsonl" +if [[ -n "${ANTITHESIS_OUTPUT_DIR:-}" ]]; then + OUTPUT_PATH="${ANTITHESIS_OUTPUT_DIR}/sdk.jsonl" + echo "Running in Antithesis, emitting setup_complete to ${OUTPUT_PATH}" +elif [[ -n "${ANTITHESIS_SDK_LOCAL_OUTPUT:-}" ]]; then + OUTPUT_PATH="${ANTITHESIS_SDK_LOCAL_OUTPUT}" + echo "Antithesis SDK local output override detected, emitting setup_complete to ${OUTPUT_PATH}" +fi + +mkdir -p $(dirname "$OUTPUT_PATH") +echo '{"antithesis_setup":{"status":"complete","details":{"message":"ready to go"}}}' >> "${OUTPUT_PATH}" diff --git a/test/antithesis/.gitkeep b/test/antithesis/deploy/workload/test/v1/.gitkeep similarity index 100% rename from test/antithesis/.gitkeep rename to test/antithesis/deploy/workload/test/v1/.gitkeep diff --git a/test/antithesis/harness/Cargo.toml b/test/antithesis/harness/Cargo.toml new file mode 100644 index 00000000000..0190ffc5173 --- /dev/null +++ b/test/antithesis/harness/Cargo.toml @@ -0,0 +1,49 @@ +[package] +name = "harness" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +publish = false + +[dependencies] +antithesis_sdk = { workspace = true, features = ["full", "rand_v0_10"] } +anyhow = { workspace = true, features = ["std"] } +clap = { workspace = true, features = [ + "derive", + "env", + "error-context", + "help", + "std", + "usage", +] } +rand = { workspace = true } +serde_json = { workspace = true } + +[lints.clippy] +all = "deny" +complexity = "deny" +dbg_macro = "deny" +float_cmp = "deny" +large_futures = "deny" +large_stack_arrays = "deny" +manual_memcpy = "deny" +mod_module_files = "deny" +pedantic = "deny" +perf = "deny" +print_stderr = "deny" +print_stdout = "deny" +rc_buffer = "deny" +redundant_allocation = "deny" +suspicious = "deny" +unnecessary_to_owned = "deny" +unwrap_used = "deny" + +[lints.rust] +missing_copy_implementations = "deny" +missing_debug_implementations = "deny" +missing_docs = "deny" +unreachable_pub = "deny" +unused_allocation = "deny" +unused_assignments = "deny" +unused_comparisons = "deny" +unused_extern_crates = "deny" diff --git a/test/antithesis/harness/src/bin/finally_verify_delivery.rs b/test/antithesis/harness/src/bin/finally_verify_delivery.rs new file mode 100644 index 00000000000..903b85e8bc0 --- /dev/null +++ b/test/antithesis/harness/src/bin/finally_verify_delivery.rs @@ -0,0 +1,92 @@ +//! Antithesis `finally_` test command: verifies metrics reached the intake +//! after drivers complete. +//! +//! Runs after the driver(s) finish and fault injection has stopped. Checks the +//! eventual-delivery liveness baseline for the `forwarder-eventual-delivery` +//! property: at least once across the run, metrics submitted to ADP are +//! aggregated, forwarded, and observed at the mock intake. Aggregation flushes +//! on an interval before forwarding, so this polls with retries. +//! +//! This is a fairly weak verification and will be improved in the future. + +use std::io::{Read, Write}; +use std::net::TcpStream; +use std::time::Duration; + +use antithesis_sdk::prelude::*; +use anyhow::{anyhow, bail}; +use clap::{builder::NonEmptyStringValueParser, Parser}; +use serde_json::json; + +#[derive(Debug, Parser)] +#[command(name = "finally_verify_delivery")] +struct Config { + #[arg( + long = "intake-addr", + env = "INTAKE_ADDR", + default_value = "intake:2049", + value_parser = NonEmptyStringValueParser::new() + )] + intake_addr: String, +} + +fn main() -> anyhow::Result<()> { + antithesis_init(); + + let config = Config::try_parse()?; + + let mut delivered = 0usize; + let mut query_ok = false; + // Poll up to ~60s: aggregation flushes periodically, then the forwarder ships to the intake. + for _ in 0..60 { + if let Ok(n) = fetch_intake_metric_count(&config) { + query_ok = true; + delivered = n; + if n > 0 { + break; + } + } else { + // Transient during recovery; keep polling. + } + std::thread::sleep(Duration::from_secs(1)); + } + + assert_reachable!( + "intake metrics dump query succeeded", + &json!({ "delivered": delivered, "query_ok": query_ok }) + ); + + assert_sometimes!( + delivered > 0, + "metrics delivered end-to-end to the intake", + &json!({ "delivered": delivered }) + ); + + Ok(()) +} + +fn fetch_intake_metric_count(config: &Config) -> anyhow::Result { + let mut stream = TcpStream::connect(config.intake_addr.as_str())?; + stream.set_read_timeout(Some(Duration::from_secs(5)))?; + let req = format!( + "GET /metrics/dump HTTP/1.0\r\nHost: {}\r\nConnection: close\r\n\r\n", + config.intake_addr + ); + stream.write_all(req.as_bytes())?; + + let mut buf = Vec::new(); + stream.read_to_end(&mut buf)?; + + let header_end = buf + .windows(4) + .position(|w| w == b"\r\n\r\n") + .ok_or_else(|| anyhow!("intake response did not include an HTTP header terminator"))?; + let body = &buf[header_end + 4..]; + + let value: serde_json::Value = serde_json::from_slice(body)?; + let Some(arr) = value.as_array() else { + bail!("intake /metrics/dump did not return a JSON array"); + }; + + Ok(arr.len()) +} diff --git a/test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs b/test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs new file mode 100644 index 00000000000..ac9f887a3e5 --- /dev/null +++ b/test/antithesis/harness/src/bin/parallel_driver_send_dogstatsd.rs @@ -0,0 +1,94 @@ +//! Antithesis `parallel_driver_` test command: sends a batch of `DogStatsD` metrics to ADP. +//! +//! Draws a per-timeline cardinality regime (swarm biasing) and a batch size, then sends metrics over +//! UDS. The high-cardinality regime floods distinct aggregation contexts, targeting the +//! `rss-bounded-under-cardinality` property (ADP's memory limiter is disabled by default, so RSS can +//! grow without bound under sustained high cardinality). + +use std::os::unix::net::UnixDatagram; +use std::path::PathBuf; + +use antithesis_sdk::prelude::*; +use antithesis_sdk::random::AntithesisRng; +use anyhow::Context as _; +use clap::Parser; +use rand::{rand_core::UnwrapErr, seq::IndexedRandom as _, RngExt as _}; +use serde_json::json; + +#[derive(Debug, Parser)] +#[command(name = "parallel_driver_send_dogstatsd")] +struct Config { + #[arg( + long = "dogstatsd-socket", + env = "DSD_SOCKET", + default_value = "/var/run/datadog/dsd.socket" + )] + dogstatsd_socket: PathBuf, +} + +#[derive(Clone, Copy, Debug)] +enum Cardinality { + Low, + Medium, + High, +} + +fn main() -> anyhow::Result<()> { + antithesis_init(); + + let config = Config::try_parse()?; + let mut rng = UnwrapErr(AntithesisRng); + let regimes = [Cardinality::Low, Cardinality::Medium, Cardinality::High]; + let regime = *regimes + .choose(&mut rng) + .context("cardinality regime choices must not be empty")?; + let regime_label = match regime { + Cardinality::Low => "low", + Cardinality::Medium => "medium", + Cardinality::High => "high", + }; + let count: u64 = rng.random_range(50..=2000); + + let socket = UnixDatagram::unbound()?; + socket.connect(&config.dogstatsd_socket)?; + + let names = ["adp.test.foo", "adp.test.bar", "adp.test.balkajsldfkjasdlfkjasdfz"]; + let metric_types = ["c", "g"]; + let mut attempted = 0usize; + for i in 0..count { + let name = *names + .choose(&mut rng) + .context("metric name choices must not be empty")?; + let metric_type = *metric_types + .choose(&mut rng) + .context("metric type choices must not be empty")?; + let value: u64 = rng.random_range(0..=1000); + let tag = match regime { + Cardinality::Low => format!("host:h{}", rng.random_range(0..4)), + Cardinality::Medium => format!("host:h{}", rng.random_range(0..256)), + Cardinality::High => format!("uid:{i}-{}", rng.random::()), + }; + let line = format!("{name}:{value}|{metric_type}|#{tag}\n"); + if socket.send(line.as_bytes()).is_ok() { + attempted += 1; + } + } + + assert_reachable!( + "workload sent a dogstatsd batch", + &json!({ + "attempted": attempted, + "regime": regime_label, + "socket": config.dogstatsd_socket.display().to_string(), + }) + ); + + // Confirm timelines sometimes drive a high-cardinality flood (the interesting case for memory). + assert_sometimes!( + matches!(regime, Cardinality::High), + "workload drove a high-cardinality dogstatsd flood", + &json!({ "attempted": attempted }) + ); + + Ok(()) +} diff --git a/test/antithesis/scratchbook/.gitkeep b/test/antithesis/scratchbook/.gitkeep new file mode 100644 index 00000000000..e69de29bb2d