obvhs/cwbvh/
bvh2_to_cwbvh.rs

1// Uses cost / merging from cwbvh paper
2
3use glam::{UVec3, Vec3A, vec3a};
4
5use crate::{
6    PerComponent,
7    aabb::Aabb,
8    bvh2::Bvh2,
9    cwbvh::{BRANCHING, CwBvh, CwBvhNode, DENOM},
10};
11
12use super::DIRECTIONS;
13
14/// Convert a bvh2 to CwBvh
15pub struct Bvh2Converter<'a> {
16    pub bvh2: &'a Bvh2,
17    pub nodes: Vec<CwBvhNode>,
18    pub primitive_indices: Vec<u32>,
19    pub decisions: Vec<Decision>,
20    pub order_children_during_build: bool,
21    pub include_exact_node_aabbs: bool,
22    pub exact_node_aabbs: Option<Vec<Aabb>>,
23    direction_lut: [Vec3A; 8],
24}
25
26const INVALID: u8 = u8::MAX;
27const INVALID32: u32 = u32::MAX;
28const INVALID_USIZE: usize = INVALID32 as usize;
29
30const PRIM_COST: f32 = 0.3;
31
32impl<'a> Bvh2Converter<'a> {
33    /// Initialize the Bvh2 to CwBvh converter.
34    pub fn new(bvh2: &'a Bvh2, order_children: bool, include_exact_node_aabbs: bool) -> Self {
35        let capacity = bvh2.primitive_indices.len();
36
37        let mut nodes = Vec::with_capacity(capacity);
38        nodes.push(Default::default());
39
40        let mut direction_lut = [Vec3A::ZERO; DIRECTIONS];
41        direction_lut
42            .iter_mut()
43            .enumerate()
44            .for_each(|(s, direction)| {
45                *direction = vec3a(
46                    if (s & 0b100) != 0 { -1.0 } else { 1.0 },
47                    if (s & 0b010) != 0 { -1.0 } else { 1.0 },
48                    if (s & 0b001) != 0 { -1.0 } else { 1.0 },
49                );
50            });
51
52        Self {
53            bvh2,
54            nodes,
55            primitive_indices: Vec::with_capacity(capacity),
56            decisions: vec![Decision::default(); bvh2.nodes.len() * 7],
57            order_children_during_build: order_children,
58            direction_lut,
59            include_exact_node_aabbs,
60            exact_node_aabbs: if include_exact_node_aabbs {
61                Some(vec![Aabb::empty(); bvh2.nodes.len()])
62            } else {
63                None
64            },
65        }
66    }
67
68    /// Convert the bvh2 to CwBvh
69    pub fn convert_to_cwbvh(&mut self) {
70        crate::scope!("convert_to_cwbvh");
71        debug_assert_eq!(std::mem::size_of::<CwBvhNode>(), 80);
72        self.convert_to_cwbvh_impl(0, 0);
73    }
74
75    pub fn convert_to_cwbvh_impl(&mut self, node_index_bvh8: usize, node_index_bvh2: usize) {
76        let mut node = self.nodes[node_index_bvh8];
77        let aabb = self.bvh2.nodes[node_index_bvh2].aabb();
78        if let Some(exact_node_aabbs) = &mut self.exact_node_aabbs {
79            exact_node_aabbs[node_index_bvh8] = *aabb;
80        }
81
82        let node_p = aabb.min;
83        node.p = node_p.into();
84
85        let e = ((aabb.max - aabb.min).max(Vec3A::splat(1e-20)) * DENOM)
86            .log2()
87            .ceil()
88            .exp2();
89        debug_assert!(e.cmpgt(Vec3A::ZERO).all(), "aabb: {aabb:?} e: {e}");
90
91        let rcp_e = 1.0 / e;
92        let e: UVec3 = e.per_comp(|c: f32| {
93            let bits = c.to_bits();
94            // Only the exponent bits can be non-zero
95            debug_assert_eq!(bits & 0b10000000011111111111111111111111, 0);
96            bits >> 23
97        });
98        node.e = [e.x as u8, e.y as u8, e.z as u8];
99
100        let children = &mut [INVALID32; 8];
101
102        let child_count = &mut 0;
103        self.get_children(node_index_bvh2, children, child_count, 0);
104
105        if self.order_children_during_build {
106            self.order_children(node_index_bvh2, children, *child_count as usize);
107        }
108
109        node.imask = 0;
110
111        node.primitive_base_idx = self.primitive_indices.len() as u32;
112        node.child_base_idx = self.nodes.len() as u32;
113
114        let mut num_internal_nodes = 0;
115        let mut num_primitives = 0_u32;
116
117        for (i, child_index) in children.iter().enumerate() {
118            if *child_index == INVALID32 {
119                continue; // Empty slot
120            };
121
122            let child_aabb = self.bvh2.nodes[*child_index as usize].aabb();
123
124            // const PAD: f32 = 1e-20;
125            // Use to force non-zero volumes.
126            const PAD: f32 = 0.0;
127
128            let mut child_min = ((child_aabb.min - node_p - PAD) * rcp_e).floor();
129            let mut child_max = ((child_aabb.max - node_p + PAD) * rcp_e).ceil();
130
131            child_min = child_min.clamp(Vec3A::ZERO, Vec3A::splat(255.0));
132            child_max = child_max.clamp(Vec3A::ZERO, Vec3A::splat(255.0));
133
134            debug_assert!((child_min.cmple(child_max)).all());
135
136            node.child_min_x[i] = child_min.x as u8;
137            node.child_min_y[i] = child_min.y as u8;
138            node.child_min_z[i] = child_min.z as u8;
139            node.child_max_x[i] = child_max.x as u8;
140            node.child_max_y[i] = child_max.y as u8;
141            node.child_max_z[i] = child_max.z as u8;
142
143            match self.decisions[(child_index * 7) as usize].kind {
144                DecisionKind::LEAF => {
145                    let primitive_count = self.count_primitives(*child_index as usize, self.bvh2);
146                    debug_assert!(primitive_count > 0 && primitive_count <= 3);
147
148                    // Three highest bits contain unary representation of primitive count
149
150                    node.child_meta[i] = num_primitives as u8
151                        | match primitive_count {
152                            1 => 0b0010_0000,
153                            2 => 0b0110_0000,
154                            3 => 0b1110_0000,
155                            _ => panic!("Incorrect leaf primitive count: {primitive_count}"),
156                        };
157
158                    num_primitives += primitive_count;
159                    debug_assert!(num_primitives <= 24);
160                }
161                DecisionKind::INTERNAL => {
162                    node.imask |= 1u8 << i;
163
164                    node.child_meta[i] = (24 + i as u8) | 0b0010_0000;
165
166                    num_internal_nodes += 1;
167                }
168                DecisionKind::DISTRIBUTE => unreachable!(),
169            }
170        }
171
172        self.nodes
173            .resize(self.nodes.len() + num_internal_nodes, Default::default());
174        self.nodes[node_index_bvh8] = node;
175
176        debug_assert!(node.child_base_idx as usize + num_internal_nodes == self.nodes.len());
177        debug_assert!(
178            node.primitive_base_idx + num_primitives == self.primitive_indices.len() as u32
179        );
180
181        // Recurse on Internal Nodes
182        let mut offset = 0;
183        for (i, child_index) in children.iter().enumerate() {
184            if *child_index != INVALID32 && (node.imask & (1 << i)) != 0 {
185                self.convert_to_cwbvh_impl(
186                    (node.child_base_idx + offset) as usize,
187                    *child_index as usize,
188                );
189                offset += 1;
190            }
191        }
192        //self.nodes[node_index_bvh8] = node;
193    }
194
195    // Recursively count primitives in subtree of the given Node
196    // Simultaneously fills the indices buffer of the BVH8
197    fn count_primitives(&mut self, node_index: usize, bvh2: &Bvh2) -> u32 {
198        let node = bvh2.nodes[node_index];
199
200        if node.is_leaf() {
201            debug_assert!(node.prim_count == 1);
202
203            self.primitive_indices
204                .push(bvh2.primitive_indices[node.first_index as usize]);
205
206            return node.prim_count;
207        }
208
209        self.count_primitives(node.first_index as usize, bvh2)
210            + self.count_primitives((node.first_index + 1) as usize, bvh2)
211    }
212
213    /// Fill cost table for bvh2 -> bvh8 conversion
214    pub fn calculate_cost(&mut self, max_prims_per_leaf: u32) {
215        crate::scope!("calculate_cost");
216        self.calculate_cost_impl(0, max_prims_per_leaf, 0);
217    }
218
219    // Based on https://github.com/jan-van-bergen/GPU-Raytracer/blob/6559ae2241c8fdea0ddaec959fe1a47ec9b3ab0d/Src/BVH/Converters/BVH8Converter.cpp#L24
220    pub fn calculate_cost_impl(
221        &mut self,
222        node_index: usize,
223        max_prims_per_leaf: u32,
224        _current_depth: i32,
225    ) -> u32 {
226        let node = &self.bvh2.nodes[node_index];
227        let half_area = node.aabb().half_area();
228        let first_index = node.first_index;
229        let prim_count = node.prim_count;
230
231        let node_dec_idx = node_index * 7;
232        let first_index_7 = (first_index * 7) as usize;
233        let next_index_7 = ((first_index + 1) * 7) as usize;
234
235        let num_primitives;
236
237        // TODO possibly merge as much as possible past a specified depth
238        // let depth_cost = if current_depth > 15 { 1.0 } else { 1.0 };
239
240        //if is_leaf()
241        if prim_count != 0 {
242            num_primitives = prim_count;
243            if num_primitives != 1 {
244                panic!(
245                    "ERROR: BVH8 Builder expects BVH with leaf Nodes containing only 1 primitive!\n"
246                );
247            }
248
249            // SAH cost
250            let cost_leaf = half_area * (num_primitives as f32) * PRIM_COST;
251
252            for i in 0..7 {
253                let decision = &mut self.decisions[node_dec_idx + i];
254                decision.kind = DecisionKind::LEAF;
255                decision.cost = cost_leaf;
256            }
257        } else {
258            num_primitives = self.calculate_cost_impl(
259                first_index as usize,
260                max_prims_per_leaf,
261                _current_depth + 1,
262            ) + self.calculate_cost_impl(
263                (first_index + 1) as usize,
264                max_prims_per_leaf,
265                _current_depth + 1,
266            );
267
268            // Separate case: i=0 (i=1 in the paper)
269            {
270                let cost_leaf = if num_primitives <= max_prims_per_leaf {
271                    (num_primitives as f32) * half_area * PRIM_COST
272                } else {
273                    f32::INFINITY
274                };
275
276                let mut cost_distribute = f32::INFINITY;
277
278                let mut distribute_left = INVALID;
279                let mut distribute_right = INVALID;
280
281                for k in 0..7 {
282                    let c = self.decisions[first_index_7 + k].cost
283                        + self.decisions[next_index_7 + 6 - k].cost;
284
285                    if c < cost_distribute {
286                        cost_distribute = c;
287
288                        distribute_left = k as u8;
289                        distribute_right = 6 - k as u8;
290                    }
291                }
292
293                let cost_internal = cost_distribute + half_area;
294
295                let decision = &mut self.decisions[node_dec_idx];
296                if cost_leaf < cost_internal {
297                    decision.kind = DecisionKind::LEAF;
298                    decision.cost = cost_leaf;
299                } else {
300                    decision.kind = DecisionKind::INTERNAL;
301                    decision.cost = cost_internal;
302                }
303
304                decision.distribute_left = distribute_left;
305                decision.distribute_right = distribute_right;
306            }
307
308            // In the paper i=2..7
309            let mut node_i;
310            for i in 1..7 {
311                node_i = node_dec_idx + i;
312                let mut cost_distribute = self.decisions[node_i - 1].cost;
313
314                let mut distribute_left = INVALID;
315                let mut distribute_right = INVALID;
316
317                for k in 0..i {
318                    let c = self.decisions[first_index_7 + k].cost
319                        + self.decisions[next_index_7 + i - k - 1].cost;
320
321                    if c < cost_distribute {
322                        cost_distribute = c;
323
324                        let k_u8 = k as u8;
325                        distribute_left = k_u8;
326                        distribute_right = i as u8 - k_u8 - 1;
327                    }
328                }
329
330                let decision = &mut self.decisions[node_i];
331                decision.cost = cost_distribute;
332
333                if distribute_left != INVALID {
334                    decision.kind = DecisionKind::DISTRIBUTE;
335                    decision.distribute_left = distribute_left;
336                    decision.distribute_right = distribute_right;
337                } else {
338                    self.decisions[node_i] = self.decisions[node_i - 1];
339                }
340            }
341        }
342
343        num_primitives
344    }
345
346    pub fn get_children(
347        &mut self,
348        node_index: usize,
349        children: &mut [u32; 8],
350        child_count: &mut u32,
351        i: usize,
352    ) {
353        let node = &self.bvh2.nodes[node_index];
354
355        if node.is_leaf() {
356            children[*child_count as usize] = node_index as u32;
357            *child_count += 1;
358            return;
359        }
360
361        let decision = &self.decisions[node_index * 7 + i];
362        let distribute_left = decision.distribute_left;
363        let distribute_right = decision.distribute_right;
364
365        debug_assert!(distribute_left < 7);
366        debug_assert!(distribute_right < 7);
367
368        // Recurse on left child if it needs to distribute
369        if self.decisions[(node.first_index * 7 + distribute_left as u32) as usize].kind
370            == DecisionKind::DISTRIBUTE
371        {
372            self.get_children(
373                node.first_index as usize,
374                children,
375                child_count,
376                distribute_left as usize,
377            );
378        } else {
379            children[*child_count as usize] = node.first_index;
380            *child_count += 1;
381        }
382
383        // Recurse on right child if it needs to distribute
384        if self.decisions[((node.first_index + 1) * 7 + distribute_right as u32) as usize].kind
385            == DecisionKind::DISTRIBUTE
386        {
387            self.get_children(
388                (node.first_index + 1) as usize,
389                children,
390                child_count,
391                distribute_right as usize,
392            );
393        } else {
394            children[*child_count as usize] = node.first_index + 1;
395            *child_count += 1;
396        }
397    }
398
399    /// Arrange child nodes in Morton order according to their centroids so that the order in which the intersected
400    /// children are traversed can be determined by the ray octant.
401    // Based on https://github.com/jan-van-bergen/GPU-Raytracer/blob/6559ae2241c8fdea0ddaec959fe1a47ec9b3ab0d/Src/BVH/Converters/BVH8Converter.cpp#L148
402    pub fn order_children(
403        &mut self,
404        node_index: usize,
405        children: &mut [u32; 8],
406        child_count: usize,
407    ) {
408        let node = &self.bvh2.nodes[node_index];
409        let p = node.aabb().center();
410
411        let mut cost = [[f32::MAX; DIRECTIONS]; BRANCHING];
412
413        assert!(child_count <= BRANCHING);
414        assert!(cost.len() >= child_count);
415        // Fill cost table
416        for s in 0..DIRECTIONS {
417            let d = self.direction_lut[s];
418            for c in 0..child_count {
419                let v = self.bvh2.nodes[children[c] as usize].aabb().center() - p;
420                let cost_slot = unsafe { cost.get_unchecked_mut(c).get_unchecked_mut(s) };
421                *cost_slot = d.dot(v); // No benefit from normalizing
422            }
423        }
424
425        let mut assignment = [INVALID_USIZE; BRANCHING];
426        let mut slot_filled = [false; DIRECTIONS];
427
428        // The paper suggests the auction method, but greedy is almost as good.
429        loop {
430            let mut min_cost = f32::MAX;
431
432            let mut min_slot = INVALID_USIZE;
433            let mut min_index = INVALID_USIZE;
434
435            // Find cheapest unfilled slot of any unassigned child
436            for c in 0..child_count {
437                if assignment[c] == INVALID_USIZE {
438                    for (s, &slot_filled) in slot_filled.iter().enumerate() {
439                        let cost = unsafe { *cost.get_unchecked(c).get_unchecked(s) };
440                        if !slot_filled && cost < min_cost {
441                            min_cost = cost;
442
443                            min_slot = s;
444                            min_index = c;
445                        }
446                    }
447                }
448            }
449
450            if min_slot == INVALID_USIZE {
451                break;
452            }
453
454            slot_filled[min_slot] = true;
455            assignment[min_index] = min_slot;
456        }
457
458        let original_order = std::mem::replace(children, [INVALID32; 8]);
459
460        assert!(assignment.len() >= child_count); // Allow compiler to skip bounds check
461        assert!(original_order.len() >= child_count); // Allow compiler to skip bounds check
462        for i in 0..child_count {
463            debug_assert!(assignment[i] != INVALID_USIZE);
464            debug_assert!(original_order[i] != INVALID32);
465            children[assignment[i]] = original_order[i];
466        }
467    }
468}
469
470#[derive(Copy, Clone, PartialEq, Default)]
471pub enum DecisionKind {
472    LEAF,
473    INTERNAL,
474    #[default]
475    DISTRIBUTE,
476}
477
478#[derive(Copy, Clone, Default)]
479pub struct Decision {
480    pub cost: f32,
481    pub kind: DecisionKind,
482    pub distribute_left: u8,
483    pub distribute_right: u8,
484}
485
486/// Convert the given bvh2 to cwbvh
487/// # Arguments
488/// * `bvh2` - Source BVH
489/// * `max_prims_per_leaf` - 0..=3 The maximum number of primitives per leaf.
490pub fn bvh2_to_cwbvh(
491    bvh2: &Bvh2,
492    max_prims_per_leaf: u32,
493    order_children: bool,
494    include_exact_node_aabbs: bool,
495) -> CwBvh {
496    if bvh2.nodes.is_empty() {
497        return CwBvh::default();
498    }
499    let mut converter = Bvh2Converter::new(bvh2, order_children, include_exact_node_aabbs);
500    converter.calculate_cost(max_prims_per_leaf);
501    converter.convert_to_cwbvh();
502
503    CwBvh {
504        nodes: converter.nodes,
505        primitive_indices: converter.primitive_indices,
506        total_aabb: *bvh2.nodes[0].aabb(),
507        exact_node_aabbs: converter.exact_node_aabbs,
508        uses_spatial_splits: bvh2.uses_spatial_splits,
509    }
510}