From cf17a0cd17d9c3164c198a2c0d8a403696404203 Mon Sep 17 00:00:00 2001
From: Travis Thieman <travis.thieman@gmail.com>
Date: Fri, 3 Apr 2026 13:15:54 -0400
Subject: [PATCH 1/2] fix(encoder): remove false-positive compressed-size check
 that panicked on first write
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CompressionEstimator::would_write_exceed_threshold had an early exit:
  if len > threshold { return true; }

When a pathological metric (e.g. a sketch with many bins from a very
low sample rate) was the very first input encoded, its uncompressed
encoded length could exceed the configured compressed size limit. The
early exit fired before any compressed data had been written, signalling
encode_inner to flush — but with an empty builder, flush() returned
nothing, hitting the panic:
  "builder told us to flush, but gave us nothing"

The early exit is wrong: uncompressed size > compressed limit says
nothing about whether the compressed output will exceed the limit (many
sketches with near-identical bins compress very well). The estimator's
job is to act as an optimization hint, not a hard gate that can fire
before the first byte has been compressed.

Removing the check means the existing total_compressed_len == 0 guard
is the first thing evaluated, returning false and allowing the write to
proceed. A genuinely oversized payload will be caught during flush via
the existing PayloadTooLarge path instead of panicking.

Fixes the panic reported in #1119 / discussed in #1305.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../src/common/datadog/request_builder.rs     | 26 +++++++++++++++++++
 lib/saluki-io/src/compression.rs              | 21 ++++++++++-----
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/lib/saluki-components/src/common/datadog/request_builder.rs b/lib/saluki-components/src/common/datadog/request_builder.rs
index 1e79e190ba0..83bfa17a45d 100644
--- a/lib/saluki-components/src/common/datadog/request_builder.rs
+++ b/lib/saluki-components/src/common/datadog/request_builder.rs
@@ -1114,4 +1114,30 @@ mod tests {
 
         prop_assert_eq!(original_inputs_len, flushed_inputs_len);
     }
+
+    #[tokio::test]
+    async fn first_input_larger_than_compressed_limit_does_not_panic() {
+        // Regression test for a panic triggered when the very first metric's uncompressed encoded
+        // size exceeded the configured compressed size limit.
+        //
+        // Previously, `CompressionEstimator::would_write_exceed_threshold` had an early exit that
+        // returned `true` when `len > threshold`, even before any compressed data had been written.
+        // This caused `encode_inner` to return `Ok(false)` (signal to flush), but since nothing had
+        // been written, `flush()` returned an empty vec, hitting:
+        //   panic!("builder told us to flush, but gave us nothing")
+        //
+        // The fix removes that early exit. When no compressed data has been written yet, the
+        // estimator returns `false` (allow the write), so the large input proceeds to the compressor
+        // rather than triggering a premature flush of an empty builder.
+        let large_input = "x".repeat(10_000);
+
+        // Compressed limit set below the uncompressed input length to trigger the old bug.
+        let encoder = TestEncoder::new(1_000, usize::MAX, "/submit");
+        let mut request_builder = create_zstd_compression_request_builder(encoder).await;
+
+        // This must not panic. The input may produce an oversized payload on flush, but that is
+        // handled gracefully — the important invariant is that encode itself does not panic.
+        let result = request_builder.encode(large_input).await;
+        assert!(result.is_ok(), "encode returned an error: {:?}", result.err());
+    }
 }
diff --git a/lib/saluki-io/src/compression.rs b/lib/saluki-io/src/compression.rs
index d759271cf2b..7a935dbaeac 100644
--- a/lib/saluki-io/src/compression.rs
+++ b/lib/saluki-io/src/compression.rs
@@ -291,15 +291,20 @@ impl CompressionEstimator {
 
     /// Estimates if writing `len` bytes to the compressor would cause the final compressed size to exceed `threshold`
     /// bytes.
+    ///
+    /// This is a hint only: it intentionally allows writes when no compressed data has been seen yet (since we have no
+    /// compression ratio to estimate with), which means a caller must handle the possibility of an oversized payload on
+    /// flush. The benefit is avoiding a false-positive block on the very first write, which previously caused a panic
+    /// when the builder was empty and asked to flush.
     pub fn would_write_exceed_threshold(&self, len: usize, threshold: usize) -> bool {
-        // If the length of the data to be written exceeds the threshold, then it obviously would exceed the threshold.
-        if len > threshold {
-            return true;
-        }
-
         // If we have yet to see any compressed data, we can't make a meaningful estimate, and this likely means that
         // the compressor is still actively able to compress more data into the first block, which when eventually
         // written, should never exceed the compressed size limit... so we choose to not block writes in this case.
+        //
+        // Importantly, we do not use `len > threshold` as an early exit here: the uncompressed length of the input
+        // may legitimately exceed the compressed size limit (e.g. a sketch with thousands of nearly-identical bins
+        // compresses very well), and short-circuiting before the first write causes a panic because the builder
+        // would be told to flush when it has nothing buffered.
         if self.total_compressed_len == 0 {
             return false;
         }
@@ -362,8 +367,12 @@ mod tests {
     fn compression_estimator_no_output() {
         let estimator = CompressionEstimator::default();
 
+        // Without any compressed data, we cannot estimate whether a write would exceed the threshold,
+        // so we always return false to allow the write. This includes the case where the uncompressed
+        // size exceeds the threshold, because many inputs compress significantly (e.g. sketches with
+        // many near-identical bins).
         assert!(!estimator.would_write_exceed_threshold(10, 100));
-        assert!(estimator.would_write_exceed_threshold(100, 90));
+        assert!(!estimator.would_write_exceed_threshold(100, 90));
     }
 
     #[test]

From 3c85d434aa07d5b077d41e3d34a33ae93c4201e7 Mon Sep 17 00:00:00 2001
From: Travis Thieman <travis.thieman@gmail.com>
Date: Fri, 3 Apr 2026 13:27:30 -0400
Subject: [PATCH 2/2] refactor(encoder): remove extraneous comments from
 would_write_exceed_threshold

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lib/saluki-io/src/compression.rs | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/lib/saluki-io/src/compression.rs b/lib/saluki-io/src/compression.rs
index 7a935dbaeac..496d2b9730c 100644
--- a/lib/saluki-io/src/compression.rs
+++ b/lib/saluki-io/src/compression.rs
@@ -291,20 +291,10 @@ impl CompressionEstimator {
 
     /// Estimates if writing `len` bytes to the compressor would cause the final compressed size to exceed `threshold`
     /// bytes.
-    ///
-    /// This is a hint only: it intentionally allows writes when no compressed data has been seen yet (since we have no
-    /// compression ratio to estimate with), which means a caller must handle the possibility of an oversized payload on
-    /// flush. The benefit is avoiding a false-positive block on the very first write, which previously caused a panic
-    /// when the builder was empty and asked to flush.
     pub fn would_write_exceed_threshold(&self, len: usize, threshold: usize) -> bool {
         // If we have yet to see any compressed data, we can't make a meaningful estimate, and this likely means that
         // the compressor is still actively able to compress more data into the first block, which when eventually
         // written, should never exceed the compressed size limit... so we choose to not block writes in this case.
-        //
-        // Importantly, we do not use `len > threshold` as an early exit here: the uncompressed length of the input
-        // may legitimately exceed the compressed size limit (e.g. a sketch with thousands of nearly-identical bins
-        // compresses very well), and short-circuiting before the first write causes a panic because the builder
-        // would be told to flush when it has nothing buffered.
         if self.total_compressed_len == 0 {
             return false;
         }