obvhs/
lib.rs

1// TODO re-enable needless_range_loop lint and evaluate performance / clarity
2#![allow(clippy::needless_range_loop)]
3
4//! # BVH Construction and Traversal Library
5//!
6//! - [PLOC](https://meistdan.github.io/publications/ploc/paper.pdf) BVH2 builder with [Parallel Reinsertion](https://meistdan.github.io/publications/prbvh/paper.pdf) and spatial pre-splits.
7//! - [CWBVH](https://research.nvidia.com/sites/default/files/publications/ylitie2017hpg-paper.pdf) An eight-way compressed wide BVH8 builder. Each BVH Node is compressed so that it takes up only 80 bytes per node.
8//! - Tools for dynamically updating and optimizing the BVH2. ([Added in 0.3](https://github.com/DGriffin91/obvhs/pull/8))
9//! - CPU traversal for both BVH2 and CWBVH (SIMD traversal, intersecting 4 nodes at a time)
10//! - For GPU traversal example, see the [Tray Racing](https://github.com/DGriffin91/tray_racing) benchmark
11//!
12//! OBVHS optionally uses [rayon](https://github.com/rayon-rs/rayon) to parallelize building.
13//!
14//! ## Example
15//!
16//! ```
17//! use glam::*;
18//! use obvhs::{
19//!     cwbvh::builder::build_cwbvh_from_tris,
20//!     ray::{Ray, RayHit},
21//!     test_util::geometry::{icosphere, PLANE},
22//!     triangle::Triangle,
23//!     BvhBuildParams,
24//! };
25//! use std::time::Duration;
26//!
27//!
28//! // Build a scene with an icosphere and a plane
29//! // BVH primitives do not need to be triangles, the BVH builder is only concerned with AABBs.
30//! // (With the exception of optional precise triangle aabb splitting)
31//! let mut tris: Vec<Triangle> = Vec::new();
32//! tris.extend(icosphere(1));
33//! tris.extend(PLANE);
34//!
35//! // Build the BVH.
36//! // build_cwbvh_from_tris is just a helper that can build from BvhBuildParams and the
37//! // respective presets. Feel free to copy the contents of build_cwbvh_from_tris or
38//! // build_cwbvh. They are very straightforward. If you don't want to use Triangles as the
39//! // primitive, use  build_cwbvh instead. build_cwbvh_from_tris just adds support for
40//! // splitting tris.
41//! let bvh = build_cwbvh_from_tris(
42//!     &tris,
43//!     BvhBuildParams::medium_build(),
44//!     &mut Duration::default(),
45//! );
46//!
47//! // Create a new ray
48//! let ray = Ray::new_inf(vec3a(0.1, 0.1, 4.0), vec3a(0.0, 0.0, -1.0));
49//!
50//! // Traverse the BVH, finding the closest hit.
51//! let mut ray_hit = RayHit::none();
52//! if bvh.ray_traverse(ray, &mut ray_hit, |ray, id| {
53//!     // Use primitive_indices to look up the original primitive id.
54//!     // (Could reorder tris per bvh.primitive_indices to avoid this lookup, see
55//!     // cornell_box_cwbvh example)
56//!     tris[bvh.primitive_indices[id] as usize].intersect(ray)
57//! }) {
58//!     println!(
59//!         "Hit Triangle {}",
60//!         bvh.primitive_indices[ray_hit.primitive_id as usize]
61//!     );
62//!     println!("Distance to hit: {}", ray_hit.t);
63//! } else {
64//!     println!("Miss");
65//! }
66//!
67//! ```
68
69use std::time::Duration;
70
71use aabb::Aabb;
72use glam::Mat4;
73use ploc::{PlocSearchDistance, SortPrecision};
74use triangle::Triangle;
75
76pub mod aabb;
77pub mod bvh2;
78pub mod cwbvh;
79pub mod faststack;
80pub mod ploc;
81pub mod ray;
82pub mod rt_triangle;
83pub mod splits;
84pub mod test_util;
85pub mod triangle;
86
87/// Used to indicate a vacant slot in various contexts.
88#[doc(hidden)]
89pub const INVALID: u32 = u32::MAX;
90
91/// A trait for types that can be bounded by an axis-aligned bounding box (AABB). Used in Bvh2/CwBvh validation.
92#[cfg(feature = "parallel")]
93pub trait Boundable: Send + Sync {
94    fn aabb(&self) -> Aabb;
95}
96
97/// A trait for types that can be bounded by an axis-aligned bounding box (AABB). Used in Bvh2/CwBvh validation.
98#[cfg(not(feature = "parallel"))]
99pub trait Boundable {
100    fn aabb(&self) -> Aabb;
101}
102
103/// A trait for types that can have a matrix transform applied. Primarily for testing/examples.
104pub trait Transformable {
105    fn transform(&mut self, matrix: &Mat4);
106}
107
108/// Apply a function to each component of a type.
109#[doc(hidden)]
110pub trait PerComponent<C1, C2 = C1, Output = Self> {
111    fn per_comp(self, f: impl Fn(C1) -> C2) -> Output;
112}
113
114impl<Input, C1, C2, Output> PerComponent<C1, C2, Output> for Input
115where
116    Input: Into<[C1; 3]>,
117    Output: From<[C2; 3]>,
118{
119    /// Applies a function to each component of the input.
120    fn per_comp(self, f: impl Fn(C1) -> C2) -> Output {
121        let [x, y, z] = self.into();
122        Output::from([f(x), f(y), f(z)])
123    }
124}
125
126#[allow(unused)]
127fn as_slice_of_atomic_u32(slice: &mut [u32]) -> &mut [core::sync::atomic::AtomicU32] {
128    assert_eq!(size_of::<AtomicU32>(), size_of::<u32>());
129    assert_eq!(align_of::<AtomicU32>(), align_of::<u32>());
130    use core::sync::atomic::AtomicU32;
131    let parents: &mut [AtomicU32] = unsafe {
132        core::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut AtomicU32, slice.len())
133    };
134    // Alternatively:
135    //let slice: &mut [AtomicU32] = unsafe { &mut *((slice.as_mut_slice() as *mut [u32]) as *mut [AtomicU32]) };
136    parents
137}
138
139/// A macro to measure and print the execution time of a block of code.
140///
141/// # Arguments
142/// * `$label` - A string label to identify the code block being timed.
143/// * `$($code:tt)*` - The code block whose execution time is to be measured.
144///
145/// # Usage
146/// ```rust
147/// use obvhs::timeit;
148/// timeit!["example",
149///     // code to measure
150/// ];
151/// ```
152///
153/// # Note
154/// The macro purposefully doesn't include a scope so variables don't need to
155/// be passed out of it. This allows it to be trivially added to existing code.
156///
157/// This macro only measures time when the `timeit` feature is enabled.
158#[macro_export]
159#[doc(hidden)]
160macro_rules! timeit {
161    [$label:expr, $($code:tt)*] => {
162        #[cfg(feature = "timeit")]
163        let timeit_start = std::time::Instant::now();
164        $($code)*
165        #[cfg(feature = "timeit")]
166        println!("{:>8} {}", format!("{}", $crate::PrettyDuration(timeit_start.elapsed())), $label);
167    };
168}
169
170/// A wrapper struct for `std::time::Duration` to provide pretty-printing of durations.
171#[doc(hidden)]
172pub struct PrettyDuration(pub Duration);
173
174impl std::fmt::Display for PrettyDuration {
175    /// Durations are formatted as follows:
176    /// - If the duration is greater than or equal to 1 second, it is formatted in seconds (s).
177    /// - If the duration is greater than or equal to 1 millisecond but less than 1 second, it is formatted in milliseconds (ms).
178    /// - If the duration is less than 1 millisecond, it is formatted in microseconds (µs).
179    ///   In the case of seconds & milliseconds, the duration is always printed with a precision of two decimal places.
180    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
181        let duration = self.0;
182        if duration.as_secs() > 0 {
183            let seconds =
184                duration.as_secs() as f64 + f64::from(duration.subsec_nanos()) / 1_000_000_000.0;
185            write!(f, "{seconds:.2}s ")
186        } else if duration.subsec_millis() > 0 {
187            let milliseconds =
188                duration.as_millis() as f64 + f64::from(duration.subsec_micros() % 1_000) / 1_000.0;
189            write!(f, "{milliseconds:.2}ms")
190        } else {
191            let microseconds = duration.as_micros();
192            write!(f, "{microseconds}µs")
193        }
194    }
195}
196
197/// Add profile scope. Nesting the macro allows us to make the profiling crate optional.
198#[doc(hidden)]
199#[macro_export]
200macro_rules! scope {
201    [$label:expr] => {
202        #[cfg(feature = "profile")]
203        profiling::scope!($label);
204    };
205}
206
207/// General build parameters for Bvh2 & CwBvh
208#[derive(Clone, Copy, Debug)]
209pub struct BvhBuildParams {
210    /// Split large tris into multiple AABBs
211    pub pre_split: bool,
212    /// In ploc, the number of nodes before and after the current one that are evaluated for pairing. 1 has a
213    /// fast path in building and still results in decent quality BVHs esp. when paired with a bit of reinsertion.
214    pub ploc_search_distance: PlocSearchDistance,
215    /// Below this depth a search distance of 1 will be used for ploc.
216    pub search_depth_threshold: usize,
217    /// Typically 0..1: ratio of nodes considered as candidates for reinsertion. Above 1 to evaluate the whole set
218    /// multiple times. A little goes a long way. Try 0.01 or even 0.001 before disabling for build performance.
219    pub reinsertion_batch_ratio: f32,
220    /// For Bvh2 only, a second pass of reinsertion after collapse. Since collapse reduces the node count,
221    /// this reinsertion pass will be faster. 0 to disable. Relative to the initial reinsertion_batch_ratio.
222    pub post_collapse_reinsertion_batch_ratio_multiplier: f32,
223    /// Bits used for ploc radix sort.
224    pub sort_precision: SortPrecision,
225    /// Min 1 (CwBvh will clamp to max 3, Bvh2 will clamp to max 255)
226    pub max_prims_per_leaf: u32,
227    /// Multiplier for traversal cost calculation during Bvh2 collapse (Does not affect CwBvh). A higher value will
228    /// result in more primitives per leaf.
229    pub collapse_traversal_cost: f32,
230}
231
232impl BvhBuildParams {
233    pub const fn fastest_build() -> Self {
234        BvhBuildParams {
235            pre_split: false,
236            ploc_search_distance: PlocSearchDistance::Minimum,
237            search_depth_threshold: 0,
238            reinsertion_batch_ratio: 0.0,
239            post_collapse_reinsertion_batch_ratio_multiplier: 0.0,
240            sort_precision: SortPrecision::U64,
241            max_prims_per_leaf: 1,
242            collapse_traversal_cost: 1.0,
243        }
244    }
245    pub const fn very_fast_build() -> Self {
246        BvhBuildParams {
247            pre_split: false,
248            ploc_search_distance: PlocSearchDistance::Minimum,
249            search_depth_threshold: 0,
250            reinsertion_batch_ratio: 0.01,
251            post_collapse_reinsertion_batch_ratio_multiplier: 0.0,
252            sort_precision: SortPrecision::U64,
253            max_prims_per_leaf: 8,
254            collapse_traversal_cost: 3.0,
255        }
256    }
257    pub const fn fast_build() -> Self {
258        BvhBuildParams {
259            pre_split: false,
260            ploc_search_distance: PlocSearchDistance::Low,
261            search_depth_threshold: 2,
262            reinsertion_batch_ratio: 0.02,
263            post_collapse_reinsertion_batch_ratio_multiplier: 0.0,
264            sort_precision: SortPrecision::U64,
265            max_prims_per_leaf: 8,
266            collapse_traversal_cost: 3.0,
267        }
268    }
269    /// Tries to be around the same build time as embree but with faster traversal
270    pub const fn medium_build() -> Self {
271        BvhBuildParams {
272            pre_split: false,
273            ploc_search_distance: PlocSearchDistance::Medium,
274            search_depth_threshold: 3,
275            reinsertion_batch_ratio: 0.05,
276            post_collapse_reinsertion_batch_ratio_multiplier: 2.0,
277            sort_precision: SortPrecision::U64,
278            max_prims_per_leaf: 8,
279            collapse_traversal_cost: 3.0,
280        }
281    }
282    pub const fn slow_build() -> Self {
283        BvhBuildParams {
284            pre_split: true,
285            ploc_search_distance: PlocSearchDistance::High,
286            search_depth_threshold: 2,
287            reinsertion_batch_ratio: 0.2,
288            post_collapse_reinsertion_batch_ratio_multiplier: 2.0,
289            sort_precision: SortPrecision::U128,
290            max_prims_per_leaf: 8,
291            collapse_traversal_cost: 3.0,
292        }
293    }
294    pub const fn very_slow_build() -> Self {
295        BvhBuildParams {
296            pre_split: true,
297            ploc_search_distance: PlocSearchDistance::Medium,
298            search_depth_threshold: 1,
299            reinsertion_batch_ratio: 1.0,
300            post_collapse_reinsertion_batch_ratio_multiplier: 1.0,
301            sort_precision: SortPrecision::U128,
302            max_prims_per_leaf: 8,
303            collapse_traversal_cost: 3.0,
304        }
305    }
306}