From 221e1afd69805cd45dc7fd74a3bcb591b1e342c0 Mon Sep 17 00:00:00 2001 From: David Christle Date: Tue, 21 Apr 2026 14:53:41 -0700 Subject: [PATCH 1/2] feat(treemap): add contains_range and range_cardinality RoaringBitmap has had both methods; this closes the same gap on RoaringTreemap using BTreeMap::range to iterate only the relevant high-word buckets. --- roaring/src/treemap/inherent.rs | 89 ++++++++++++ roaring/tests/treemap_range_checks.rs | 200 ++++++++++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 roaring/tests/treemap_range_checks.rs diff --git a/roaring/src/treemap/inherent.rs b/roaring/src/treemap/inherent.rs index bbc89f0a..0d746878 100644 --- a/roaring/src/treemap/inherent.rs +++ b/roaring/src/treemap/inherent.rs @@ -272,6 +272,95 @@ impl RoaringTreemap { } } + /// Returns `true` if all values in the range are present in this set. + /// + /// An empty range is always contained. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringTreemap; + /// + /// let mut rb = RoaringTreemap::new(); + /// // An empty range is always contained + /// assert!(rb.contains_range(7..7)); + /// + /// rb.insert_range(1..0x1_0000_0000); + /// assert!(rb.contains_range(1..0x1_0000_0000)); + /// assert!(rb.contains_range(2..0x1_0000_0000)); + /// // 0 is not contained + /// assert!(!rb.contains_range(0..2)); + /// // 0x1_0000_0000 is not contained + /// assert!(!rb.contains_range(1..=0x1_0000_0000)); + /// ``` + pub fn contains_range(&self, range: R) -> bool + where + R: RangeBounds, + { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + None => return true, + }; + let (start_hi, start_lo) = util::split(start); + let (end_hi, end_lo) = util::split(end); + + let mut expected_key = start_hi; + for (&key, bitmap) in self.map.range(start_hi..=end_hi) { + if key != expected_key { + return false; + } + let lo_start = if key == start_hi { start_lo } else { 0 }; + let lo_end = if key == end_hi { end_lo } else { u32::MAX }; + if !bitmap.contains_range(lo_start..=lo_end) { + return false; + } + match key.checked_add(1) { + Some(k) => expected_key = k, + None => return true, + } + } + expected_key > end_hi + } + + /// Returns the number of elements in this set which are in the passed range. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringTreemap; + /// + /// let mut rb = RoaringTreemap::new(); + /// rb.insert_range(0x1_0000_0000..0x4_0000_0000); + /// rb.insert(0x5_0000_0001); + /// rb.insert(0x5_0000_0005); + /// rb.insert(u64::MAX); + /// + /// assert_eq!(rb.range_cardinality(0..0x1_0000_0000), 0); + /// assert_eq!(rb.range_cardinality(0x1_0000_0000..0x4_0000_0000), 0x3_0000_0000); + /// assert_eq!(rb.range_cardinality(0x5_0000_0000..0x6_0000_0000), 2); + /// assert_eq!(rb.range_cardinality(0x1_0000_0000..0x1_0000_0000), 0); + /// assert_eq!(rb.range_cardinality(0x5_0000_0000..=u64::MAX), 3); + /// ``` + pub fn range_cardinality(&self, range: R) -> u64 + where + R: RangeBounds, + { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + None => return 0, + }; + let (start_hi, start_lo) = util::split(start); + let (end_hi, end_lo) = util::split(end); + + let mut cardinality = 0u64; + for (&key, bitmap) in self.map.range(start_hi..=end_hi) { + let lo_start = if key == start_hi { start_lo } else { 0 }; + let lo_end = if key == end_hi { end_lo } else { u32::MAX }; + cardinality += bitmap.range_cardinality(lo_start..=lo_end); + } + cardinality + } + /// Clears all integers in this set. /// /// # Examples diff --git a/roaring/tests/treemap_range_checks.rs b/roaring/tests/treemap_range_checks.rs new file mode 100644 index 00000000..62dbd8c2 --- /dev/null +++ b/roaring/tests/treemap_range_checks.rs @@ -0,0 +1,200 @@ +use proptest::collection::hash_set; +use proptest::prelude::*; +use roaring::RoaringTreemap; + +// The bucket boundary is at every multiple of 2^32. Values with the same high 32 bits +// share a bucket; adjacent high keys are adjacent buckets. +const BUCKET: u64 = 1 << 32; + +#[test] +fn empty_range_always_contained() { + let rb = RoaringTreemap::new(); + assert!(rb.contains_range(7..7)); + assert!(rb.contains_range(0..0)); + assert!(rb.contains_range(u64::MAX..u64::MAX)); +} + +#[test] +fn empty_range_cardinality_is_zero() { + let rb = RoaringTreemap::new(); + assert_eq!(rb.range_cardinality(7..7), 0); + assert_eq!(rb.range_cardinality(u64::MAX..u64::MAX), 0); +} + +#[test] +fn empty_treemap_not_contained() { + let rb = RoaringTreemap::new(); + assert!(!rb.contains_range(0..1)); + assert!(!rb.contains_range(0..=u64::MAX)); + assert_eq!(rb.range_cardinality(0..=u64::MAX), 0); +} + +#[test] +fn single_bucket_contained() { + let mut rb = RoaringTreemap::new(); + rb.insert_range(10..20); + assert!(rb.contains_range(10..20)); + assert!(rb.contains_range(11..19)); + assert!(!rb.contains_range(9..20)); + assert!(!rb.contains_range(10..21)); + assert_eq!(rb.range_cardinality(10..20), 10); + assert_eq!(rb.range_cardinality(10..15), 5); + assert_eq!(rb.range_cardinality(0..10), 0); + assert_eq!(rb.range_cardinality(20..30), 0); +} + +#[test] +fn cross_bucket_boundary() { + // Values straddle the hi=0 / hi=1 bucket boundary + let lo_max = u32::MAX as u64; + let hi_min = BUCKET; + + let mut rb = RoaringTreemap::new(); + rb.insert(lo_max); + rb.insert(hi_min); + + assert!(rb.contains_range(lo_max..=lo_max)); + assert!(rb.contains_range(hi_min..=hi_min)); + // Range spanning the two values — only those two exist, so the full span isn't contained + assert!(!rb.contains_range(lo_max - 1..=hi_min)); + assert!(!rb.contains_range(lo_max..=hi_min + 1)); + + // But range_cardinality counts the values present + assert_eq!(rb.range_cardinality(lo_max..=hi_min), 2); + assert_eq!(rb.range_cardinality(lo_max..=lo_max), 1); + assert_eq!(rb.range_cardinality(hi_min..=hi_min), 1); + assert_eq!(rb.range_cardinality(lo_max - 1..lo_max), 0); + + // Insert the full span and verify containment + rb.insert_range(lo_max..=hi_min); + assert!(rb.contains_range(lo_max..=hi_min)); + assert_eq!(rb.range_cardinality(lo_max..=hi_min), 2); +} + +#[test] +fn multi_bucket_gap_not_contained() { + // Insert values in buckets 0 and 2, leaving bucket 1 empty. + let mut rb = RoaringTreemap::new(); + rb.insert_range(0..BUCKET); // hi=0, full + rb.insert_range(2 * BUCKET..3 * BUCKET); // hi=2, full + + assert!(!rb.contains_range(0..3 * BUCKET)); + assert!(rb.contains_range(0..BUCKET)); + assert!(rb.contains_range(2 * BUCKET..3 * BUCKET)); + assert_eq!(rb.range_cardinality(0..3 * BUCKET), 2 * BUCKET); +} + +#[test] +fn u64_max_boundary() { + let mut rb = RoaringTreemap::new(); + rb.insert(u64::MAX); + assert!(rb.contains_range(u64::MAX..=u64::MAX)); + assert!(!rb.contains_range(u64::MAX - 1..=u64::MAX)); + assert_eq!(rb.range_cardinality(u64::MAX..=u64::MAX), 1); + assert_eq!(rb.range_cardinality(u64::MAX - 1..u64::MAX), 0); + + // Insert the last two values + rb.insert(u64::MAX - 1); + assert!(rb.contains_range(u64::MAX - 1..=u64::MAX)); + assert_eq!(rb.range_cardinality(u64::MAX - 1..=u64::MAX), 2); +} + +#[test] +fn unbounded_range() { + // Use a start value in the last bucket (hi = u32::MAX) to avoid allocating + // billions of buckets, which would happen if the high word of start is small. + let last_bucket_start = (u32::MAX as u64) << 32; // hi=u32::MAX, lo=0 + let mut rb = RoaringTreemap::new(); + rb.insert_range(last_bucket_start..); + assert!(rb.contains_range(last_bucket_start..)); + assert!(rb.contains_range(last_bucket_start + 1..=u64::MAX)); + assert!(!rb.contains_range(last_bucket_start - 1..=u64::MAX)); +} + +proptest! { + #[test] + fn proptest_range( + // Keep values well within a single bucket to avoid very slow tests from + // inserting billions of values across bucket boundaries. + start in ..=262_143_u64, + len in ..=262_143_u64, + extra in hash_set(..=462_143_u64, ..=100), + ) { + let end = start + len; + let range = start..end; + let inverse_empty_range = (start + len)..start; + + let mut rb = RoaringTreemap::new(); + rb.insert_range(range.clone()); + assert!(rb.contains_range(range.clone())); + assert!(rb.contains_range(inverse_empty_range.clone())); + assert_eq!(rb.range_cardinality(range.clone()), len); + + for &val in &extra { + rb.insert(val); + assert!(rb.contains_range(range.clone())); + assert!(rb.contains_range(inverse_empty_range.clone())); + assert_eq!(rb.range_cardinality(range.clone()), len); + } + + for (i, &val) in extra.iter().filter(|&&x| range.contains(&x)).enumerate() { + rb.remove(val); + assert!(!rb.contains_range(range.clone())); + assert!(rb.contains_range(inverse_empty_range.clone())); + assert_eq!(rb.range_cardinality(range.clone()), len - i as u64 - 1); + } + } + + #[test] + fn proptest_range_boundaries( + start in 1..=262_143_u64, + len in 0..=262_143_u64, + ) { + let mut rb = RoaringTreemap::new(); + let end = start + len; + let half = start + len / 2; + rb.insert_range(start..end); + + assert!(rb.contains_range(start..end)); + assert!(rb.contains_range(start + 1..end)); + assert!(rb.contains_range(start..end.saturating_sub(1))); + assert!(rb.contains_range(start + 1..end.saturating_sub(1))); + + assert!(!rb.contains_range(start - 1..end)); + assert!(!rb.contains_range(start - 1..end.saturating_sub(1))); + assert!(!rb.contains_range(start..end + 1)); + assert!(!rb.contains_range(start + 1..end + 1)); + assert!(!rb.contains_range(start - 1..end + 1)); + + assert!(!rb.contains_range(start - 1..half)); + assert!(!rb.contains_range(half..end + 1)); + } + + #[test] + fn proptest_cross_bucket( + // start_lo: low 32 bits of start value, in bucket hi=0 + start_lo in 0_u32..=u32::MAX / 2, + // end_lo: low 32 bits of end value, in bucket hi=1 + end_lo in u32::MAX / 2..=u32::MAX, + ) { + let start = start_lo as u64; + let end = BUCKET | end_lo as u64; + + let mut rb = RoaringTreemap::new(); + rb.insert_range(start..=end); + + assert!(rb.contains_range(start..=end)); + assert_eq!( + rb.range_cardinality(start..=end), + (u32::MAX as u64 - start_lo as u64 + 1) + (end_lo as u64 + 1), + ); + + // One element past the end should break containment + if end < u64::MAX { + assert!(!rb.contains_range(start..=end + 1)); + } + if start > 0 { + assert!(!rb.contains_range(start - 1..=end)); + } + } +} From bc151e4bc44c135a609051ffb4968a7dbe7aa097 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 23 Apr 2026 16:22:37 +0200 Subject: [PATCH 2/2] Use iterator combinators instead of for loop and mutable --- roaring/src/treemap/inherent.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/roaring/src/treemap/inherent.rs b/roaring/src/treemap/inherent.rs index 0d746878..aa9a31ab 100644 --- a/roaring/src/treemap/inherent.rs +++ b/roaring/src/treemap/inherent.rs @@ -352,13 +352,14 @@ impl RoaringTreemap { let (start_hi, start_lo) = util::split(start); let (end_hi, end_lo) = util::split(end); - let mut cardinality = 0u64; - for (&key, bitmap) in self.map.range(start_hi..=end_hi) { - let lo_start = if key == start_hi { start_lo } else { 0 }; - let lo_end = if key == end_hi { end_lo } else { u32::MAX }; - cardinality += bitmap.range_cardinality(lo_start..=lo_end); - } - cardinality + self.map + .range(start_hi..=end_hi) + .map(|(&key, bitmap)| { + let lo_start = if key == start_hi { start_lo } else { 0 }; + let lo_end = if key == end_hi { end_lo } else { u32::MAX }; + bitmap.range_cardinality(lo_start..=lo_end) + }) + .sum::() } /// Clears all integers in this set.