obvhs/lib.rs
1// TODO re-enable needless_range_loop lint and evaluate performance / clarity
2#![allow(clippy::needless_range_loop)]
3
4//! # BVH Construction and Traversal Library
5//!
6//! - [PLOC](https://meistdan.github.io/publications/ploc/paper.pdf) BVH2 builder with [Parallel Reinsertion](https://meistdan.github.io/publications/prbvh/paper.pdf) and spatial pre-splits.
7//! - [CWBVH](https://research.nvidia.com/sites/default/files/publications/ylitie2017hpg-paper.pdf) An eight-way compressed wide BVH8 builder. Each BVH Node is compressed so that it takes up only 80 bytes per node.
8//! - Tools for dynamically updating and optimizing the BVH2. ([Added in 0.3](https://github.com/DGriffin91/obvhs/pull/8))
9//! - CPU traversal for both BVH2 and CWBVH (SIMD traversal, intersecting 4 nodes at a time)
10//! - For GPU traversal example, see the [Tray Racing](https://github.com/DGriffin91/tray_racing) benchmark
11//!
12//! OBVHS optionally uses [rayon](https://github.com/rayon-rs/rayon) to parallelize building.
13//!
14//! ## Example
15//!
16//! ```
17//! use glam::*;
18//! use obvhs::{
19//! cwbvh::builder::build_cwbvh_from_tris,
20//! ray::{Ray, RayHit},
21//! test_util::geometry::{icosphere, PLANE},
22//! triangle::Triangle,
23//! BvhBuildParams,
24//! };
25//! use std::time::Duration;
26//!
27//!
28//! // Build a scene with an icosphere and a plane
29//! // BVH primitives do not need to be triangles, the BVH builder is only concerned with AABBs.
30//! // (With the exception of optional precise triangle aabb splitting)
31//! let mut tris: Vec<Triangle> = Vec::new();
32//! tris.extend(icosphere(1));
33//! tris.extend(PLANE);
34//!
35//! // Build the BVH.
36//! // build_cwbvh_from_tris is just a helper that can build from BvhBuildParams and the
37//! // respective presets. Feel free to copy the contents of build_cwbvh_from_tris or
38//! // build_cwbvh. They are very straightforward. If you don't want to use Triangles as the
39//! // primitive, use build_cwbvh instead. build_cwbvh_from_tris just adds support for
40//! // splitting tris.
41//! let bvh = build_cwbvh_from_tris(
42//! &tris,
43//! BvhBuildParams::medium_build(),
44//! &mut Duration::default(),
45//! );
46//!
47//! // Create a new ray
48//! let ray = Ray::new_inf(vec3a(0.1, 0.1, 4.0), vec3a(0.0, 0.0, -1.0));
49//!
50//! // Traverse the BVH, finding the closest hit.
51//! let mut ray_hit = RayHit::none();
52//! if bvh.ray_traverse(ray, &mut ray_hit, |ray, id| {
53//! // Use primitive_indices to look up the original primitive id.
54//! // (Could reorder tris per bvh.primitive_indices to avoid this lookup, see
55//! // cornell_box_cwbvh example)
56//! tris[bvh.primitive_indices[id] as usize].intersect(ray)
57//! }) {
58//! println!(
59//! "Hit Triangle {}",
60//! bvh.primitive_indices[ray_hit.primitive_id as usize]
61//! );
62//! println!("Distance to hit: {}", ray_hit.t);
63//! } else {
64//! println!("Miss");
65//! }
66//!
67//! ```
68
69use std::time::Duration;
70
71use aabb::Aabb;
72use glam::Mat4;
73use ploc::{PlocSearchDistance, SortPrecision};
74use triangle::Triangle;
75
76pub mod aabb;
77pub mod bvh2;
78pub mod cwbvh;
79pub mod faststack;
80pub mod ploc;
81pub mod ray;
82pub mod rt_triangle;
83pub mod splits;
84pub mod test_util;
85pub mod triangle;
86
87/// Used to indicate a vacant slot in various contexts.
88#[doc(hidden)]
89pub const INVALID: u32 = u32::MAX;
90
91/// A trait for types that can be bounded by an axis-aligned bounding box (AABB). Used in Bvh2/CwBvh validation.
92#[cfg(feature = "parallel")]
93pub trait Boundable: Send + Sync {
94 fn aabb(&self) -> Aabb;
95}
96
97/// A trait for types that can be bounded by an axis-aligned bounding box (AABB). Used in Bvh2/CwBvh validation.
98#[cfg(not(feature = "parallel"))]
99pub trait Boundable {
100 fn aabb(&self) -> Aabb;
101}
102
103/// A trait for types that can have a matrix transform applied. Primarily for testing/examples.
104pub trait Transformable {
105 fn transform(&mut self, matrix: &Mat4);
106}
107
108/// Apply a function to each component of a type.
109#[doc(hidden)]
110pub trait PerComponent<C1, C2 = C1, Output = Self> {
111 fn per_comp(self, f: impl Fn(C1) -> C2) -> Output;
112}
113
114impl<Input, C1, C2, Output> PerComponent<C1, C2, Output> for Input
115where
116 Input: Into<[C1; 3]>,
117 Output: From<[C2; 3]>,
118{
119 /// Applies a function to each component of the input.
120 fn per_comp(self, f: impl Fn(C1) -> C2) -> Output {
121 let [x, y, z] = self.into();
122 Output::from([f(x), f(y), f(z)])
123 }
124}
125
126#[allow(unused)]
127fn as_slice_of_atomic_u32(slice: &mut [u32]) -> &mut [core::sync::atomic::AtomicU32] {
128 assert_eq!(size_of::<AtomicU32>(), size_of::<u32>());
129 assert_eq!(align_of::<AtomicU32>(), align_of::<u32>());
130 use core::sync::atomic::AtomicU32;
131 let parents: &mut [AtomicU32] = unsafe {
132 core::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut AtomicU32, slice.len())
133 };
134 // Alternatively:
135 //let slice: &mut [AtomicU32] = unsafe { &mut *((slice.as_mut_slice() as *mut [u32]) as *mut [AtomicU32]) };
136 parents
137}
138
139/// A macro to measure and print the execution time of a block of code.
140///
141/// # Arguments
142/// * `$label` - A string label to identify the code block being timed.
143/// * `$($code:tt)*` - The code block whose execution time is to be measured.
144///
145/// # Usage
146/// ```rust
147/// use obvhs::timeit;
148/// timeit!["example",
149/// // code to measure
150/// ];
151/// ```
152///
153/// # Note
154/// The macro purposefully doesn't include a scope so variables don't need to
155/// be passed out of it. This allows it to be trivially added to existing code.
156///
157/// This macro only measures time when the `timeit` feature is enabled.
158#[macro_export]
159#[doc(hidden)]
160macro_rules! timeit {
161 [$label:expr, $($code:tt)*] => {
162 #[cfg(feature = "timeit")]
163 let timeit_start = std::time::Instant::now();
164 $($code)*
165 #[cfg(feature = "timeit")]
166 println!("{:>8} {}", format!("{}", $crate::PrettyDuration(timeit_start.elapsed())), $label);
167 };
168}
169
170/// A wrapper struct for `std::time::Duration` to provide pretty-printing of durations.
171#[doc(hidden)]
172pub struct PrettyDuration(pub Duration);
173
174impl std::fmt::Display for PrettyDuration {
175 /// Durations are formatted as follows:
176 /// - If the duration is greater than or equal to 1 second, it is formatted in seconds (s).
177 /// - If the duration is greater than or equal to 1 millisecond but less than 1 second, it is formatted in milliseconds (ms).
178 /// - If the duration is less than 1 millisecond, it is formatted in microseconds (µs).
179 /// In the case of seconds & milliseconds, the duration is always printed with a precision of two decimal places.
180 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
181 let duration = self.0;
182 if duration.as_secs() > 0 {
183 let seconds =
184 duration.as_secs() as f64 + f64::from(duration.subsec_nanos()) / 1_000_000_000.0;
185 write!(f, "{seconds:.2}s ")
186 } else if duration.subsec_millis() > 0 {
187 let milliseconds =
188 duration.as_millis() as f64 + f64::from(duration.subsec_micros() % 1_000) / 1_000.0;
189 write!(f, "{milliseconds:.2}ms")
190 } else {
191 let microseconds = duration.as_micros();
192 write!(f, "{microseconds}µs")
193 }
194 }
195}
196
197/// Add profile scope. Nesting the macro allows us to make the profiling crate optional.
198#[doc(hidden)]
199#[macro_export]
200macro_rules! scope {
201 [$label:expr] => {
202 #[cfg(feature = "profile")]
203 profiling::scope!($label);
204 };
205}
206
207/// General build parameters for Bvh2 & CwBvh
208#[derive(Clone, Copy, Debug)]
209pub struct BvhBuildParams {
210 /// Split large tris into multiple AABBs
211 pub pre_split: bool,
212 /// In ploc, the number of nodes before and after the current one that are evaluated for pairing. 1 has a
213 /// fast path in building and still results in decent quality BVHs esp. when paired with a bit of reinsertion.
214 pub ploc_search_distance: PlocSearchDistance,
215 /// Below this depth a search distance of 1 will be used for ploc.
216 pub search_depth_threshold: usize,
217 /// Typically 0..1: ratio of nodes considered as candidates for reinsertion. Above 1 to evaluate the whole set
218 /// multiple times. A little goes a long way. Try 0.01 or even 0.001 before disabling for build performance.
219 pub reinsertion_batch_ratio: f32,
220 /// For Bvh2 only, a second pass of reinsertion after collapse. Since collapse reduces the node count,
221 /// this reinsertion pass will be faster. 0 to disable. Relative to the initial reinsertion_batch_ratio.
222 pub post_collapse_reinsertion_batch_ratio_multiplier: f32,
223 /// Bits used for ploc radix sort.
224 pub sort_precision: SortPrecision,
225 /// Min 1 (CwBvh will clamp to max 3, Bvh2 will clamp to max 255)
226 pub max_prims_per_leaf: u32,
227 /// Multiplier for traversal cost calculation during Bvh2 collapse (Does not affect CwBvh). A higher value will
228 /// result in more primitives per leaf.
229 pub collapse_traversal_cost: f32,
230}
231
232impl BvhBuildParams {
233 pub const fn fastest_build() -> Self {
234 BvhBuildParams {
235 pre_split: false,
236 ploc_search_distance: PlocSearchDistance::Minimum,
237 search_depth_threshold: 0,
238 reinsertion_batch_ratio: 0.0,
239 post_collapse_reinsertion_batch_ratio_multiplier: 0.0,
240 sort_precision: SortPrecision::U64,
241 max_prims_per_leaf: 1,
242 collapse_traversal_cost: 1.0,
243 }
244 }
245 pub const fn very_fast_build() -> Self {
246 BvhBuildParams {
247 pre_split: false,
248 ploc_search_distance: PlocSearchDistance::Minimum,
249 search_depth_threshold: 0,
250 reinsertion_batch_ratio: 0.01,
251 post_collapse_reinsertion_batch_ratio_multiplier: 0.0,
252 sort_precision: SortPrecision::U64,
253 max_prims_per_leaf: 8,
254 collapse_traversal_cost: 3.0,
255 }
256 }
257 pub const fn fast_build() -> Self {
258 BvhBuildParams {
259 pre_split: false,
260 ploc_search_distance: PlocSearchDistance::Low,
261 search_depth_threshold: 2,
262 reinsertion_batch_ratio: 0.02,
263 post_collapse_reinsertion_batch_ratio_multiplier: 0.0,
264 sort_precision: SortPrecision::U64,
265 max_prims_per_leaf: 8,
266 collapse_traversal_cost: 3.0,
267 }
268 }
269 /// Tries to be around the same build time as embree but with faster traversal
270 pub const fn medium_build() -> Self {
271 BvhBuildParams {
272 pre_split: false,
273 ploc_search_distance: PlocSearchDistance::Medium,
274 search_depth_threshold: 3,
275 reinsertion_batch_ratio: 0.05,
276 post_collapse_reinsertion_batch_ratio_multiplier: 2.0,
277 sort_precision: SortPrecision::U64,
278 max_prims_per_leaf: 8,
279 collapse_traversal_cost: 3.0,
280 }
281 }
282 pub const fn slow_build() -> Self {
283 BvhBuildParams {
284 pre_split: true,
285 ploc_search_distance: PlocSearchDistance::High,
286 search_depth_threshold: 2,
287 reinsertion_batch_ratio: 0.2,
288 post_collapse_reinsertion_batch_ratio_multiplier: 2.0,
289 sort_precision: SortPrecision::U128,
290 max_prims_per_leaf: 8,
291 collapse_traversal_cost: 3.0,
292 }
293 }
294 pub const fn very_slow_build() -> Self {
295 BvhBuildParams {
296 pre_split: true,
297 ploc_search_distance: PlocSearchDistance::Medium,
298 search_depth_threshold: 1,
299 reinsertion_batch_ratio: 1.0,
300 post_collapse_reinsertion_batch_ratio_multiplier: 1.0,
301 sort_precision: SortPrecision::U128,
302 max_prims_per_leaf: 8,
303 collapse_traversal_cost: 3.0,
304 }
305 }
306}