rstar/algorithm/bulk_load/
bulk_load_sequential.rs

1use crate::envelope::Envelope;
2use crate::node::{ParentNode, RTreeNode};
3use crate::object::RTreeObject;
4use crate::params::RTreeParams;
5use crate::point::Point;
6
7#[cfg(not(test))]
8use alloc::{vec, vec::Vec};
9
10#[allow(unused_imports)] // Import is required when building without std
11use num_traits::Float;
12
13use super::cluster_group_iterator::{calculate_number_of_clusters_on_axis, ClusterGroupIterator};
14
15fn bulk_load_recursive<T, Params>(elements: Vec<T>) -> ParentNode<T>
16where
17    T: RTreeObject,
18    <T::Envelope as Envelope>::Point: Point,
19    Params: RTreeParams,
20{
21    let m = Params::MAX_SIZE;
22    if elements.len() <= m {
23        // Reached leaf level
24        let elements: Vec<_> = elements.into_iter().map(RTreeNode::Leaf).collect();
25        return ParentNode::new_parent(elements);
26    }
27    let number_of_clusters_on_axis =
28        calculate_number_of_clusters_on_axis::<T, Params>(elements.len()).max(2);
29
30    let iterator = PartitioningTask::<_, Params> {
31        number_of_clusters_on_axis,
32        work_queue: vec![PartitioningState {
33            current_axis: <T::Envelope as Envelope>::Point::DIMENSIONS,
34            elements,
35        }],
36        _params: Default::default(),
37    };
38    ParentNode::new_parent(iterator.collect())
39}
40
41/// Represents a partitioning task that still needs to be done.
42///
43/// A partitioning iterator will take this item from its work queue and start partitioning "elements"
44/// along "current_axis" .
45struct PartitioningState<T: RTreeObject> {
46    elements: Vec<T>,
47    current_axis: usize,
48}
49
50/// Successively partitions the given elements into  cluster groups and finally into clusters.
51struct PartitioningTask<T: RTreeObject, Params: RTreeParams> {
52    work_queue: Vec<PartitioningState<T>>,
53    number_of_clusters_on_axis: usize,
54    _params: core::marker::PhantomData<Params>,
55}
56
57impl<T: RTreeObject, Params: RTreeParams> Iterator for PartitioningTask<T, Params> {
58    type Item = RTreeNode<T>;
59
60    fn next(&mut self) -> Option<Self::Item> {
61        while let Some(next) = self.work_queue.pop() {
62            let PartitioningState {
63                elements,
64                current_axis,
65            } = next;
66            if current_axis == 0 {
67                // Partitioning finished successfully on all axis. The remaining cluster forms a new node
68                let data = bulk_load_recursive::<_, Params>(elements);
69                return RTreeNode::Parent(data).into();
70            } else {
71                // The cluster group needs to be partitioned further along the next axis
72                let iterator = ClusterGroupIterator::new(
73                    elements,
74                    self.number_of_clusters_on_axis,
75                    current_axis - 1,
76                );
77                self.work_queue
78                    .extend(iterator.map(|slab| PartitioningState {
79                        elements: slab,
80                        current_axis: current_axis - 1,
81                    }));
82            }
83        }
84        None
85    }
86}
87
88/// A multi dimensional implementation of the OMT bulk loading algorithm.
89///
90/// See http://ceur-ws.org/Vol-74/files/FORUM_18.pdf
91pub fn bulk_load_sequential<T, Params>(elements: Vec<T>) -> ParentNode<T>
92where
93    T: RTreeObject,
94    <T::Envelope as Envelope>::Point: Point,
95    Params: RTreeParams,
96{
97    bulk_load_recursive::<_, Params>(elements)
98}
99
100#[cfg(test)]
101mod test {
102    use crate::test_utilities::*;
103    use crate::{Point, RTree, RTreeObject};
104    use std::collections::HashSet;
105    use std::fmt::Debug;
106    use std::hash::Hash;
107
108    #[test]
109    fn test_bulk_load_small() {
110        let random_points = create_random_integers::<[i32; 2]>(50, SEED_1);
111        create_and_check_bulk_loading_with_points(&random_points);
112    }
113
114    #[test]
115    fn test_bulk_load_large() {
116        let random_points = create_random_integers::<[i32; 2]>(3000, SEED_1);
117        create_and_check_bulk_loading_with_points(&random_points);
118    }
119
120    #[test]
121    fn test_bulk_load_with_different_sizes() {
122        for size in (0..100).map(|i| i * 7) {
123            test_bulk_load_with_size_and_dimension::<[i32; 2]>(size);
124            test_bulk_load_with_size_and_dimension::<[i32; 3]>(size);
125            test_bulk_load_with_size_and_dimension::<[i32; 4]>(size);
126        }
127    }
128
129    fn test_bulk_load_with_size_and_dimension<P>(size: usize)
130    where
131        P: Point<Scalar = i32> + RTreeObject + Send + Sync + Eq + Clone + Debug + Hash + 'static,
132        P::Envelope: Send + Sync,
133    {
134        let random_points = create_random_integers::<P>(size, SEED_1);
135        create_and_check_bulk_loading_with_points(&random_points);
136    }
137
138    fn create_and_check_bulk_loading_with_points<P>(points: &[P])
139    where
140        P: RTreeObject + Send + Sync + Eq + Clone + Debug + Hash + 'static,
141        P::Envelope: Send + Sync,
142    {
143        let tree = RTree::bulk_load(points.into());
144        let set1: HashSet<_> = tree.iter().collect();
145        let set2: HashSet<_> = points.iter().collect();
146        assert_eq!(set1, set2);
147        assert_eq!(tree.size(), points.len());
148    }
149}