obvhs/bvh2/
reinsertion.rs

1// Reinsertion optimizer based on "Parallel Reinsertion for Bounding Volume Hierarchy Optimization", by D. Meister and J. Bittner:
2// https://meistdan.github.io/publications/prbvh/paper.pdf
3// https://jcgt.org/published/0011/04/01/paper.pdf
4// Reference: https://github.com/madmann91/bvh/blob/3490634ae822e5081e41f09498fcce03bc1419e3/src/bvh/v2/reinsertion_optimizer.h
5
6// Note: Most asserts exist to try to elide bounds checks
7
8#[cfg(feature = "parallel")]
9use rayon::{
10    iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator},
11    slice::ParallelSliceMut,
12};
13
14use rdst::{RadixKey, RadixSort};
15
16use crate::{
17    bvh2::{Bvh2, Bvh2Node},
18    fast_stack,
19    faststack::FastStack,
20};
21
22use super::update_primitives_to_nodes_for_node;
23
24/// Restructures the BVH, optimizing node locations within the BVH hierarchy per SAH cost.
25#[derive(Default)]
26pub struct ReinsertionOptimizer {
27    candidates: Vec<Candidate>,
28    reinsertions: Vec<Reinsertion>,
29    touched: Vec<bool>,
30    batch_size_ratio: f32,
31}
32
33impl ReinsertionOptimizer {
34    /// Restructures the BVH, optimizing node locations within the BVH hierarchy per SAH cost.
35    /// batch_size_ratio: Fraction of the number of nodes to optimize per iteration.
36    /// ratio_sequence: A sequence of ratios to preform reinsertion at. These are as a
37    /// proportion of the batch_size_ratio. If None, the following sequence is used:
38    /// (1..32).step_by(2).map(|n| 1.0 / n as f32) or
39    /// 1/1, 1/3, 1/5, 1/7, 1/9, 1/11, 1/13, 1/15, 1/17, 1/19, 1/21, 1/23, 1/25, 1/27, 1/29, 1/31
40    pub fn run(&mut self, bvh: &mut Bvh2, batch_size_ratio: f32, ratio_sequence: Option<Vec<f32>>) {
41        crate::scope!("reinsertion_optimize");
42
43        if bvh.nodes.is_empty() || bvh.nodes[0].is_leaf() || batch_size_ratio <= 0.0 {
44            return;
45        }
46
47        bvh.init_parents_if_uninit();
48
49        let cap = (bvh.nodes.len() as f32 * batch_size_ratio.min(1.0)).ceil() as usize;
50
51        self.candidates.reserve(cap);
52        self.reinsertions.reserve(cap);
53        self.touched.clear();
54        self.touched.resize(bvh.nodes.len(), false);
55        self.batch_size_ratio = batch_size_ratio;
56        self.optimize_impl(bvh, ratio_sequence);
57    }
58
59    /// Restructures the BVH, optimizing given node locations within the BVH hierarchy per SAH cost.
60    ///
61    /// # Arguments
62    /// * `candidates` - A list of ids for nodes that need to be re-inserted.
63    /// * `iterations` - The number of times reinsertion is run. Parallel reinsertion passes can result in conflicts
64    ///   that potentially limit the proportion of reinsertions in a single pass.
65    pub fn run_with_candidates(&mut self, bvh: &mut Bvh2, candidates: &[u32], iterations: u32) {
66        crate::scope!("reinsertion_optimize_candidates");
67
68        if bvh.nodes.is_empty() || bvh.nodes[0].is_leaf() {
69            return;
70        }
71
72        bvh.init_parents_if_uninit();
73
74        let cap = candidates.len();
75
76        self.candidates = candidates
77            .iter()
78            .map(|node_id| {
79                let cost = bvh.nodes[*node_id as usize].aabb().half_area();
80                Candidate {
81                    cost,
82                    node_id: *node_id,
83                }
84            })
85            .collect::<Vec<_>>();
86        self.reinsertions.reserve(cap);
87        self.touched.clear();
88        self.touched.resize(bvh.nodes.len(), false);
89        self.optimize_specific_candidates(bvh, iterations);
90    }
91
92    pub fn optimize_impl(&mut self, bvh: &mut Bvh2, ratio_sequence: Option<Vec<f32>>) {
93        bvh.children_are_ordered_after_parents = false;
94        // This initially preforms reinsertion at the specified ratio, then at progressively smaller ratios,
95        // focusing more reinsertion time at the top of the bvh. The original method would perform reinsertion
96        // for a fixed ratio a fixed number of times.
97        let ratio_sequence = ratio_sequence.unwrap_or(
98            (1..32)
99                .step_by(2)
100                .map(|n| 1.0 / n as f32)
101                .collect::<Vec<_>>(),
102        );
103
104        ratio_sequence.iter().for_each(|ratio| {
105            let batch_size =
106                (((bvh.nodes.len() as f32 * self.batch_size_ratio) * ratio) as usize).max(1);
107            let node_count = bvh.nodes.len().min(batch_size + 1);
108            self.find_candidates(bvh, node_count);
109            self.optimize_candidates(bvh, node_count - 1);
110        });
111    }
112
113    pub fn optimize_specific_candidates(&mut self, bvh: &mut Bvh2, iterations: u32) {
114        bvh.children_are_ordered_after_parents = false;
115        for _ in 0..iterations {
116            self.optimize_candidates(bvh, self.candidates.len());
117        }
118    }
119
120    /// Find potential candidates for reinsertion
121    fn find_candidates(&mut self, bvh: &mut Bvh2, node_count: usize) {
122        // This method just takes the first node_count*2 nodes in the bvh and sorts them by their half area
123        // This seemed to find candidates much faster while resulting in similar bvh traversal performance vs the original method
124        // https://github.com/madmann91/bvh/blob/3490634ae822e5081e41f09498fcce03bc1419e3/src/bvh/v2/reinsertion_optimizer.h#L88
125        // Taking the first node_count * 2 seemed to work nearly as well as sorting all the nodes, but builds much faster.
126        self.candidates.clear();
127        bvh.nodes
128            .iter()
129            .take(node_count * 2)
130            .enumerate()
131            .skip(1)
132            .for_each(|(i, node)| {
133                self.candidates.push(Candidate {
134                    cost: node.aabb().half_area(),
135                    node_id: i as u32,
136                });
137            });
138        self.candidates.radix_sort_unstable();
139    }
140
141    #[allow(unused_variables)]
142    fn optimize_candidates(&mut self, bvh: &mut Bvh2, count: usize) {
143        self.touched.fill(false);
144
145        #[cfg(feature = "parallel")]
146        {
147            self.reinsertions.resize(count, Default::default());
148            self.reinsertions
149                .par_iter_mut()
150                .enumerate()
151                .for_each(|(i, reinsertion)| {
152                    *reinsertion = find_reinsertion(bvh, self.candidates[i].node_id as usize)
153                });
154        }
155        #[cfg(not(feature = "parallel"))]
156        {
157            self.reinsertions.clear();
158            assert!(count <= self.candidates.len());
159            (0..count).for_each(|i| {
160                let r = find_reinsertion(bvh, self.candidates[i].node_id as usize);
161                if r.area_diff > 0.0 {
162                    self.reinsertions.push(r)
163                }
164            });
165        }
166
167        #[cfg(feature = "parallel")]
168        self.reinsertions
169            .par_sort_unstable_by(|a, b| b.area_diff.partial_cmp(&a.area_diff).unwrap());
170
171        #[cfg(not(feature = "parallel"))]
172        self.reinsertions
173            .sort_unstable_by(|a, b| b.area_diff.partial_cmp(&a.area_diff).unwrap());
174
175        assert!(self.reinsertions.len() <= self.touched.len());
176        (0..self.reinsertions.len()).for_each(|i| {
177            let reinsertion = &self.reinsertions[i];
178
179            #[cfg(feature = "parallel")]
180            if reinsertion.area_diff <= 0.0 {
181                return;
182            }
183
184            let conflicts = self.get_conflicts(bvh, reinsertion.from, reinsertion.to);
185
186            if conflicts.iter().any(|&i| self.touched[i]) {
187                return;
188            }
189
190            conflicts.iter().for_each(|&conflict| {
191                self.touched[conflict] = true;
192            });
193
194            reinsert_node(bvh, reinsertion.from as usize, reinsertion.to as usize);
195        });
196    }
197
198    #[inline(always)]
199    fn get_conflicts(&self, bvh: &mut Bvh2, from: u32, to: u32) -> [usize; 5] {
200        [
201            to as usize,
202            from as usize,
203            Bvh2Node::get_sibling_id(from as usize),
204            // SAFETY: Caller asserts self.bvh.parents is Some outside of hot loop
205            bvh.parents[to as usize] as usize,
206            bvh.parents[from as usize] as usize,
207        ]
208    }
209}
210
211#[derive(Default, Clone, Copy)]
212pub struct Reinsertion {
213    pub from: u32,
214    pub to: u32,
215    pub area_diff: f32,
216}
217
218#[derive(Clone, Copy, Debug)]
219struct Candidate {
220    node_id: u32,
221    cost: f32,
222}
223
224impl RadixKey for Candidate {
225    const LEVELS: usize = 4;
226
227    #[inline]
228    fn get_level(&self, level: usize) -> u8 {
229        (-self.cost).get_level(level)
230    }
231}
232
233pub fn find_reinsertion(bvh: &Bvh2, node_id: usize) -> Reinsertion {
234    if bvh.parents.is_empty() {
235        panic!("Parents mapping required. Please run Bvh2::init_parents() before reinsert_node()")
236    }
237
238    debug_assert_ne!(node_id, 0);
239    // Try to elide bounds checks
240    assert!(node_id < bvh.nodes.len());
241    assert!(node_id < bvh.parents.len());
242
243    /*
244     * Here is an example that explains how the cost of a reinsertion is computed. For the
245     * reinsertion from A to C, in the figure below, we need to remove P1, replace it by B,
246     * and create a node that holds A and C and place it where C was.
247     *
248     *             R
249     *            / \
250     *          Pn   Q1
251     *          /     \
252     *        ...     ...
253     *        /         \
254     *       P1          C
255     *      / \
256     *     A   B
257     *
258     * The resulting area *decrease* is (SA(x) means the surface area of x):
259     *
260     *     SA(P1) +                                                : P1 was removed
261     *     SA(P2) - SA(B) +                                        : P2 now only contains B
262     *     SA(P3) - SA(B U sibling(P2)) +                          : Same but for P3
263     *     ... +
264     *     SA(Pn) - SA(B U sibling(P2) U ... U sibling(P(n - 1)) + : Same but for Pn
265     *     0 +                                                     : R does not change
266     *     SA(Q1) - SA(Q1 U A) +                                   : Q1 now contains A
267     *     SA(Q2) - SA(Q2 U A) +                                   : Q2 now contains A
268     *     ... +
269     *     -SA(A U C)                                              : For the parent of A and C
270     */
271    let mut best_reinsertion = Reinsertion {
272        from: node_id as u32,
273        to: 0,
274        area_diff: 0.0,
275    };
276    let node_area = bvh.nodes[node_id].aabb().half_area();
277
278    let parent_area = bvh.nodes[bvh.parents[node_id] as usize].aabb().half_area();
279    let mut area_diff = parent_area;
280    let mut sibling_id = Bvh2Node::get_sibling_id(node_id);
281    let mut pivot_bbox = *bvh.nodes[sibling_id].aabb();
282    let parent_id = bvh.parents[node_id] as usize;
283    let mut pivot_id = parent_id;
284    let aabb = bvh.nodes[node_id].aabb();
285    let mut longest = 0;
286    // TODO is it possible to push only the left pair and reduce the stack size?
287    fast_stack!((f32, u32), (96, 192), bvh.max_depth * 2, stack, {
288        stack.clear();
289        loop {
290            stack.push((area_diff, sibling_id as u32));
291            while !stack.is_empty() {
292                longest = stack.len().max(longest);
293                let (top_area_diff, top_sibling_id) = stack.pop_fast();
294                if top_area_diff - node_area <= best_reinsertion.area_diff {
295                    continue;
296                }
297
298                let dst_node = &bvh.nodes[top_sibling_id as usize];
299                let merged_area = dst_node.aabb().union(aabb).half_area();
300                let reinsert_area = top_area_diff - merged_area;
301                if reinsert_area > best_reinsertion.area_diff {
302                    best_reinsertion.to = top_sibling_id;
303                    best_reinsertion.area_diff = reinsert_area;
304                }
305
306                if !dst_node.is_leaf() {
307                    let child_area = reinsert_area + dst_node.aabb().half_area();
308                    stack.push((child_area, dst_node.first_index));
309                    stack.push((child_area, dst_node.first_index + 1));
310                }
311            }
312
313            if pivot_id != parent_id {
314                pivot_bbox = pivot_bbox.union(bvh.nodes[sibling_id].aabb());
315                area_diff += bvh.nodes[pivot_id].aabb().half_area() - pivot_bbox.half_area();
316            }
317
318            if pivot_id == 0 {
319                break;
320            }
321
322            sibling_id = Bvh2Node::get_sibling_id(pivot_id);
323            pivot_id = bvh.parents[pivot_id] as usize;
324        }
325    });
326
327    if best_reinsertion.to == Bvh2Node::get_sibling_id32(best_reinsertion.from)
328        || best_reinsertion.to == bvh.parents[best_reinsertion.from as usize]
329    {
330        best_reinsertion = Reinsertion::default();
331    }
332
333    best_reinsertion
334}
335
336pub fn reinsert_node(bvh: &mut Bvh2, from: usize, to: usize) {
337    if bvh.parents.is_empty() {
338        panic!("Parents mapping required. Please run Bvh2::init_parents() before reinsert_node()")
339    }
340
341    let sibling_id = Bvh2Node::get_sibling_id(from);
342    let parent_id = bvh.parents[from] as usize;
343    let sibling_node = bvh.nodes[sibling_id];
344    let dst_node = bvh.nodes[to];
345
346    bvh.nodes[to].make_inner(Bvh2Node::get_left_sibling_id(from) as u32);
347    bvh.nodes[sibling_id] = dst_node;
348    bvh.nodes[parent_id] = sibling_node;
349
350    let sibling_node = &bvh.nodes[sibling_id];
351    if sibling_node.is_leaf() {
352        // Tell primitives where their node went.
353        update_primitives_to_nodes_for_node(
354            sibling_node,
355            sibling_id,
356            &bvh.primitive_indices,
357            &mut bvh.primitives_to_nodes,
358        );
359    } else {
360        bvh.parents[sibling_node.first_index as usize] = sibling_id as u32;
361        bvh.parents[sibling_node.first_index as usize + 1] = sibling_id as u32;
362    }
363
364    let parent_node = &bvh.nodes[parent_id];
365    if bvh.nodes[parent_id].is_leaf() {
366        // Tell primitives where their node went.
367        update_primitives_to_nodes_for_node(
368            parent_node,
369            parent_id,
370            &bvh.primitive_indices,
371            &mut bvh.primitives_to_nodes,
372        );
373    } else {
374        bvh.parents[parent_node.first_index as usize] = parent_id as u32;
375        bvh.parents[parent_node.first_index as usize + 1] = parent_id as u32;
376    }
377
378    bvh.parents[sibling_id] = to as u32;
379    bvh.parents[from] = to as u32;
380    bvh.refit_from_fast(to);
381    bvh.refit_from_fast(parent_id);
382}
383
384#[cfg(test)]
385mod tests {
386
387    use crate::{
388        ploc::{PlocBuilder, PlocSearchDistance, SortPrecision},
389        test_util::geometry::demoscene,
390    };
391
392    use super::*;
393
394    #[test]
395    fn test_reinsertion() {
396        let tris = demoscene(32, 0);
397        let mut aabbs = Vec::with_capacity(tris.len());
398        let mut indices = Vec::with_capacity(tris.len());
399        for (i, primitive) in tris.iter().enumerate() {
400            indices.push(i as u32);
401            aabbs.push(primitive.aabb());
402        }
403        {
404            // Test without init_primitives_to_nodes & init_parents
405            let mut bvh = PlocBuilder::new().build(
406                PlocSearchDistance::VeryLow,
407                &aabbs,
408                indices.clone(),
409                SortPrecision::U64,
410                1,
411            );
412            bvh.validate(&tris, false, false);
413            ReinsertionOptimizer::default().run(&mut bvh, 0.25, None);
414            bvh.validate(&tris, false, false);
415            bvh.reorder_in_stack_traversal_order();
416            ReinsertionOptimizer::default().run(&mut bvh, 0.5, None);
417            bvh.validate(&tris, false, false);
418        }
419        {
420            // Test with init_primitives_to_nodes & init_parents
421            let mut bvh = PlocBuilder::new().build(
422                PlocSearchDistance::VeryLow,
423                &aabbs,
424                indices,
425                SortPrecision::U64,
426                1,
427            );
428            bvh.validate(&tris, false, false);
429            bvh.init_primitives_to_nodes_if_uninit();
430            bvh.init_parents_if_uninit();
431            bvh.validate(&tris, false, false);
432            ReinsertionOptimizer::default().run(&mut bvh, 0.25, None);
433            bvh.validate(&tris, false, false);
434            bvh.reorder_in_stack_traversal_order();
435            ReinsertionOptimizer::default().run(&mut bvh, 0.5, None);
436            bvh.validate(&tris, false, false);
437        }
438    }
439}