DataDog · thieman · Apr 2, 2026 · Apr 2, 2026 · Apr 3, 2026 · Apr 3, 2026
@@ -681,13 +681,17 @@ fn trim_left(bins: &mut SmallVec<[Bin; 4]>, bin_limit: u16) {
         generate_bins(&mut overflow, bin_remove.k, missing);
     }
 
-    let overflow_len = overflow.len();
     let (_, bins_end) = bins.split_at(num_to_remove);
     overflow.extend_from_slice(bins_end);
 
-    // I still don't yet understand how this works, since you'd think bin limit should be the overall limit of the
-    // number of bins, but we're allowing more than that.. :thinkies:
-    overflow.truncate(bin_limit + overflow_len);
+    // Truncate to bin_limit so the total bin count stays within the configured limit. Overflow bins created
+    // above (when collapsed counts exceed MAX_BIN_WIDTH) are prepended before bins_end, and together the
+    // combined slice is capped at bin_limit. This may discard some higher-key bins from bins_end when
+    // overflow is large, which is the expected precision trade-off for a bounded sketch.
+    //
+    // As of April 2026, this is an intentional divergence from the Datadog Agent implementation,
+    // which does not truncate bins to stay under a limit.
+    overflow.truncate(bin_limit);
 
     mem::swap(bins, &mut overflow);
 }
@@ -712,3 +716,46 @@ fn generate_bins(bins: &mut SmallVec<[Bin; 4]>, k: i16, n: u64) {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Regression test for trim_left bin count explosion with large per-sample weights.
+    ///
+    /// When a sample weight exceeds MAX_BIN_WIDTH (65535), a single `insert_n` call generates
+    /// multiple bins with the same key (one per 65535 units of count). Before the fix,
+    /// `trim_left` accumulated these overflow bins into its output without truncating to
+    /// `bin_limit`, causing the bin count to grow without bound across insertions. After
+    /// enough inserts the sketch could have millions of bins rather than the expected 4096,
+    /// eventually producing a serialized payload that exceeded the encoder's compressed size
+    /// limit and triggering a panic in the request builder.
+    ///
+    /// This test inserts several values with a weight representative of what ADP receives when
+    /// clamping an incoming sample rate of 3e-9 to its minimum of 3.845e-9 (~260M per sample),
+    /// then asserts the bin count never exceeds DDSKETCH_CONF_BIN_LIMIT.
+    #[test]
+    fn trim_left_respects_bin_limit_with_large_weights() {
+        // Weight corresponding to ADP's minimum safe sample rate (1 / 3.845e-9 ≈ 260_078_024).
+        // Each insert_n call with this weight generates ceil(260_078_024 / 65535) = 3969 bins
+        // for a single key, enough to trigger trim_left after just two distinct values.
+        let weight: u64 = 260_078_024;
+        let bin_limit = usize::from(DDSKETCH_CONF_BIN_LIMIT);
+
+        let mut sketch = DDSketch::default();
+
+        // Insert enough distinct values to repeatedly trigger trim_left.  Ten values is more
+        // than sufficient; two already exceed the bin limit without the fix.
+        for i in 1..=10_i32 {
+            sketch.insert_n(f64::from(i), weight);
+            assert!(
+                sketch.bins().len() <= bin_limit,
+                "bin count {} exceeded limit {} after inserting {} value(s) at weight {}",
+                sketch.bins().len(),
+                bin_limit,
+                i,
+                weight,
+            );
+        }
+    }
+}
@@ -104,6 +104,15 @@ where
         encoded_len: usize,
         uncompressed_len_limit: usize,
     },
+    #[snafu(display(
+        "input encoded size ({} bytes) exceeds compressed payload limit ({} bytes) and can never fit",
+        encoded_len,
+        compressed_len_limit
+    ))]
+    InputExceedsCompressedSizeLimit {
+        encoded_len: usize,
+        compressed_len_limit: usize,
+    },
     #[snafu(display("input was invalid for request builder: {:?}'", input))]
     InvalidInput { input: E::Input },
     #[snafu(display("failed to encode/write payload: {}", source))]
@@ -333,6 +342,16 @@ where
             });
         }
 
+        // If the input's encoded size already exceeds the compressed payload limit, it can never fit regardless of
+        // what else is in the payload, so there is no point in asking the caller to flush and retry. Return an error
+        // so the input is dropped rather than looping forever.
+        if self.scratch_buf.len() > self.compressed_len_limit {
+            return Err(RequestBuilderError::InputExceedsCompressedSizeLimit {
+                encoded_len: self.scratch_buf.len(),
+                compressed_len_limit: self.compressed_len_limit,
+            });
+        }
+
         // If the input can't fit into the current request payload based on the uncompressed size limit, or isn't likely
         // to fit into the current request payload based on the estimated compressed size limit, then return it to the
         // caller: this indicates that a flush must happen before trying to encode the same input again.
@@ -947,6 +966,40 @@ mod tests {
         // size limits.
     }
 
+    #[tokio::test]
+    async fn input_exceeds_compressed_size_limit() {
+        // Regression test: when a single input's encoded size exceeds the compressed payload limit, the request
+        // builder previously returned Ok(Some(input)) (signalling "flush and retry"), but since the builder
+        // was empty there was nothing to flush. The caller (run_request_builder in the metrics encoder) would
+        // then panic because flush() returned an empty vec on a supposedly non-empty builder.
+        //
+        // The fix returns Err(InputExceedsCompressedSizeLimit) instead, letting the caller drop the metric
+        // and continue rather than entering an unresolvable flush loop.
+        let compressed_limit = 64;
+        let encoder = TestEncoder::new(compressed_limit, usize::MAX, "/submit");
+        let mut request_builder = create_no_compression_request_builder(encoder).await;
+
+        // This input encodes to more bytes than the compressed limit, so it can never fit.
+        let oversized_input = "x".repeat(compressed_limit + 1);
+
+        match request_builder.encode(oversized_input).await {
+            Err(RequestBuilderError::InputExceedsCompressedSizeLimit {
+                encoded_len,
+                compressed_len_limit,
+            }) => {
+                assert_eq!(encoded_len, compressed_limit + 1);
+                assert_eq!(compressed_len_limit, compressed_limit);
+            }
+            other => panic!("expected InputExceedsCompressedSizeLimit, got: {:?}", other),
+        }
+
+        // The builder should still be empty and usable after the error.
+        assert!(request_builder.flush().await.is_empty());
+
+        let small_input = "hello".to_string();
+        assert_eq!(None, request_builder.encode(small_input).await.unwrap());
+    }
+
     #[tokio::test]
     async fn uncompressed_size_limit_too_small() {
         // Make sure that we can't build a request builder with an uncompressed size limit that is smaller than the