Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions roaring/src/treemap/inherent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,96 @@ impl RoaringTreemap {
}
}

/// Returns `true` if all values in the range are present in this set.
///
/// An empty range is always contained.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringTreemap;
///
/// let mut rb = RoaringTreemap::new();
/// // An empty range is always contained
/// assert!(rb.contains_range(7..7));
///
/// rb.insert_range(1..0x1_0000_0000);
/// assert!(rb.contains_range(1..0x1_0000_0000));
/// assert!(rb.contains_range(2..0x1_0000_0000));
/// // 0 is not contained
/// assert!(!rb.contains_range(0..2));
/// // 0x1_0000_0000 is not contained
/// assert!(!rb.contains_range(1..=0x1_0000_0000));
/// ```
pub fn contains_range<R>(&self, range: R) -> bool
where
R: RangeBounds<u64>,
{
let (start, end) = match util::convert_range_to_inclusive(range) {
Some(range) => (*range.start(), *range.end()),
None => return true,
};
let (start_hi, start_lo) = util::split(start);
let (end_hi, end_lo) = util::split(end);

let mut expected_key = start_hi;
for (&key, bitmap) in self.map.range(start_hi..=end_hi) {
if key != expected_key {
return false;
}
let lo_start = if key == start_hi { start_lo } else { 0 };
let lo_end = if key == end_hi { end_lo } else { u32::MAX };
if !bitmap.contains_range(lo_start..=lo_end) {
return false;
}
match key.checked_add(1) {
Some(k) => expected_key = k,
None => return true,
}
}
expected_key > end_hi
}

/// Returns the number of elements in this set which are in the passed range.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringTreemap;
///
/// let mut rb = RoaringTreemap::new();
/// rb.insert_range(0x1_0000_0000..0x4_0000_0000);
/// rb.insert(0x5_0000_0001);
/// rb.insert(0x5_0000_0005);
/// rb.insert(u64::MAX);
///
/// assert_eq!(rb.range_cardinality(0..0x1_0000_0000), 0);
/// assert_eq!(rb.range_cardinality(0x1_0000_0000..0x4_0000_0000), 0x3_0000_0000);
/// assert_eq!(rb.range_cardinality(0x5_0000_0000..0x6_0000_0000), 2);
/// assert_eq!(rb.range_cardinality(0x1_0000_0000..0x1_0000_0000), 0);
/// assert_eq!(rb.range_cardinality(0x5_0000_0000..=u64::MAX), 3);
/// ```
pub fn range_cardinality<R>(&self, range: R) -> u64
where
R: RangeBounds<u64>,
{
let (start, end) = match util::convert_range_to_inclusive(range) {
Some(range) => (*range.start(), *range.end()),
None => return 0,
};
let (start_hi, start_lo) = util::split(start);
let (end_hi, end_lo) = util::split(end);

self.map
.range(start_hi..=end_hi)
.map(|(&key, bitmap)| {
let lo_start = if key == start_hi { start_lo } else { 0 };
let lo_end = if key == end_hi { end_lo } else { u32::MAX };
bitmap.range_cardinality(lo_start..=lo_end)
})
.sum::<u64>()
}

/// Clears all integers in this set.
///
/// # Examples
Expand Down
200 changes: 200 additions & 0 deletions roaring/tests/treemap_range_checks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
use proptest::collection::hash_set;
use proptest::prelude::*;
use roaring::RoaringTreemap;

// The bucket boundary is at every multiple of 2^32. Values with the same high 32 bits
// share a bucket; adjacent high keys are adjacent buckets.
const BUCKET: u64 = 1 << 32;

#[test]
fn empty_range_always_contained() {
let rb = RoaringTreemap::new();
assert!(rb.contains_range(7..7));
assert!(rb.contains_range(0..0));
assert!(rb.contains_range(u64::MAX..u64::MAX));
}

#[test]
fn empty_range_cardinality_is_zero() {
let rb = RoaringTreemap::new();
assert_eq!(rb.range_cardinality(7..7), 0);
assert_eq!(rb.range_cardinality(u64::MAX..u64::MAX), 0);
}

#[test]
fn empty_treemap_not_contained() {
let rb = RoaringTreemap::new();
assert!(!rb.contains_range(0..1));
assert!(!rb.contains_range(0..=u64::MAX));
assert_eq!(rb.range_cardinality(0..=u64::MAX), 0);
}

#[test]
fn single_bucket_contained() {
let mut rb = RoaringTreemap::new();
rb.insert_range(10..20);
assert!(rb.contains_range(10..20));
assert!(rb.contains_range(11..19));
assert!(!rb.contains_range(9..20));
assert!(!rb.contains_range(10..21));
assert_eq!(rb.range_cardinality(10..20), 10);
assert_eq!(rb.range_cardinality(10..15), 5);
assert_eq!(rb.range_cardinality(0..10), 0);
assert_eq!(rb.range_cardinality(20..30), 0);
}

#[test]
fn cross_bucket_boundary() {
// Values straddle the hi=0 / hi=1 bucket boundary
let lo_max = u32::MAX as u64;
let hi_min = BUCKET;

let mut rb = RoaringTreemap::new();
rb.insert(lo_max);
rb.insert(hi_min);

assert!(rb.contains_range(lo_max..=lo_max));
assert!(rb.contains_range(hi_min..=hi_min));
// Range spanning the two values — only those two exist, so the full span isn't contained
assert!(!rb.contains_range(lo_max - 1..=hi_min));
assert!(!rb.contains_range(lo_max..=hi_min + 1));

// But range_cardinality counts the values present
assert_eq!(rb.range_cardinality(lo_max..=hi_min), 2);
assert_eq!(rb.range_cardinality(lo_max..=lo_max), 1);
assert_eq!(rb.range_cardinality(hi_min..=hi_min), 1);
assert_eq!(rb.range_cardinality(lo_max - 1..lo_max), 0);

// Insert the full span and verify containment
rb.insert_range(lo_max..=hi_min);
assert!(rb.contains_range(lo_max..=hi_min));
assert_eq!(rb.range_cardinality(lo_max..=hi_min), 2);
}

#[test]
fn multi_bucket_gap_not_contained() {
// Insert values in buckets 0 and 2, leaving bucket 1 empty.
let mut rb = RoaringTreemap::new();
rb.insert_range(0..BUCKET); // hi=0, full
rb.insert_range(2 * BUCKET..3 * BUCKET); // hi=2, full

assert!(!rb.contains_range(0..3 * BUCKET));
assert!(rb.contains_range(0..BUCKET));
assert!(rb.contains_range(2 * BUCKET..3 * BUCKET));
assert_eq!(rb.range_cardinality(0..3 * BUCKET), 2 * BUCKET);
}

#[test]
fn u64_max_boundary() {
let mut rb = RoaringTreemap::new();
rb.insert(u64::MAX);
assert!(rb.contains_range(u64::MAX..=u64::MAX));
assert!(!rb.contains_range(u64::MAX - 1..=u64::MAX));
assert_eq!(rb.range_cardinality(u64::MAX..=u64::MAX), 1);
assert_eq!(rb.range_cardinality(u64::MAX - 1..u64::MAX), 0);

// Insert the last two values
rb.insert(u64::MAX - 1);
assert!(rb.contains_range(u64::MAX - 1..=u64::MAX));
assert_eq!(rb.range_cardinality(u64::MAX - 1..=u64::MAX), 2);
}

#[test]
fn unbounded_range() {
// Use a start value in the last bucket (hi = u32::MAX) to avoid allocating
// billions of buckets, which would happen if the high word of start is small.
let last_bucket_start = (u32::MAX as u64) << 32; // hi=u32::MAX, lo=0
let mut rb = RoaringTreemap::new();
rb.insert_range(last_bucket_start..);
assert!(rb.contains_range(last_bucket_start..));
assert!(rb.contains_range(last_bucket_start + 1..=u64::MAX));
assert!(!rb.contains_range(last_bucket_start - 1..=u64::MAX));
}

proptest! {
#[test]
fn proptest_range(
// Keep values well within a single bucket to avoid very slow tests from
// inserting billions of values across bucket boundaries.
start in ..=262_143_u64,
len in ..=262_143_u64,
extra in hash_set(..=462_143_u64, ..=100),
) {
let end = start + len;
let range = start..end;
let inverse_empty_range = (start + len)..start;

let mut rb = RoaringTreemap::new();
rb.insert_range(range.clone());
assert!(rb.contains_range(range.clone()));
assert!(rb.contains_range(inverse_empty_range.clone()));
assert_eq!(rb.range_cardinality(range.clone()), len);

for &val in &extra {
rb.insert(val);
assert!(rb.contains_range(range.clone()));
assert!(rb.contains_range(inverse_empty_range.clone()));
assert_eq!(rb.range_cardinality(range.clone()), len);
}

for (i, &val) in extra.iter().filter(|&&x| range.contains(&x)).enumerate() {
rb.remove(val);
assert!(!rb.contains_range(range.clone()));
assert!(rb.contains_range(inverse_empty_range.clone()));
assert_eq!(rb.range_cardinality(range.clone()), len - i as u64 - 1);
}
}

#[test]
fn proptest_range_boundaries(
start in 1..=262_143_u64,
len in 0..=262_143_u64,
) {
let mut rb = RoaringTreemap::new();
let end = start + len;
let half = start + len / 2;
rb.insert_range(start..end);

assert!(rb.contains_range(start..end));
assert!(rb.contains_range(start + 1..end));
assert!(rb.contains_range(start..end.saturating_sub(1)));
assert!(rb.contains_range(start + 1..end.saturating_sub(1)));

assert!(!rb.contains_range(start - 1..end));
assert!(!rb.contains_range(start - 1..end.saturating_sub(1)));
assert!(!rb.contains_range(start..end + 1));
assert!(!rb.contains_range(start + 1..end + 1));
assert!(!rb.contains_range(start - 1..end + 1));

assert!(!rb.contains_range(start - 1..half));
assert!(!rb.contains_range(half..end + 1));
}

#[test]
fn proptest_cross_bucket(
// start_lo: low 32 bits of start value, in bucket hi=0
start_lo in 0_u32..=u32::MAX / 2,
// end_lo: low 32 bits of end value, in bucket hi=1
end_lo in u32::MAX / 2..=u32::MAX,
) {
let start = start_lo as u64;
let end = BUCKET | end_lo as u64;

let mut rb = RoaringTreemap::new();
rb.insert_range(start..=end);

assert!(rb.contains_range(start..=end));
assert_eq!(
rb.range_cardinality(start..=end),
(u32::MAX as u64 - start_lo as u64 + 1) + (end_lo as u64 + 1),
);

// One element past the end should break containment
if end < u64::MAX {
assert!(!rb.contains_range(start..=end + 1));
}
if start > 0 {
assert!(!rb.contains_range(start - 1..=end));
}
}
}
Loading