obvhs/
rt_triangle.rs

1//! Triangle types optimized for ray intersection performance.
2
3use bytemuck::{Pod, Zeroable};
4use glam::*;
5
6use half::f16;
7
8use crate::{Boundable, aabb::Aabb, ray::Ray, triangle::Triangle};
9
10#[derive(Clone, Copy, Default, PartialEq)]
11#[repr(C)]
12/// A compressed 3D triangle optimized for GPU ray intersection performance.
13pub struct RtCompressedTriangle {
14    /// Base vertex
15    pub v0: [f32; 3],
16    /// Edges 1 & 2 encoded as IEEE 754 f16 `v1 - v0, v2 - v0`
17    pub e1_e2: [u16; 6],
18}
19
20unsafe impl Pod for RtCompressedTriangle {}
21unsafe impl Zeroable for RtCompressedTriangle {}
22
23impl From<&Triangle> for RtCompressedTriangle {
24    #[inline(always)]
25    fn from(tri: &Triangle) -> Self {
26        RtCompressedTriangle::new(tri.v0, tri.v1, tri.v2)
27    }
28}
29
30impl RtCompressedTriangle {
31    #[inline(always)]
32    pub fn new(v0: Vec3A, v1: Vec3A, v2: Vec3A) -> Self {
33        let e1 = v1 - v0;
34        let e2 = v2 - v0;
35
36        Self {
37            v0: [v0.x, v0.y, v0.z],
38            e1_e2: [
39                f16::from_f32(e1.x).to_bits(),
40                f16::from_f32(e2.x).to_bits(),
41                f16::from_f32(e1.y).to_bits(),
42                f16::from_f32(e2.y).to_bits(),
43                f16::from_f32(e1.z).to_bits(),
44                f16::from_f32(e2.z).to_bits(),
45            ],
46        }
47    }
48
49    #[inline(always)]
50    pub fn vertices(&self) -> [Vec3A; 3] {
51        let (v0, e1, e2) = self.unpack();
52        let v1 = v0 + e1;
53        let v2 = v0 + e2;
54        [v0, v1, v2]
55    }
56
57    #[inline(always)]
58    pub fn aabb(&self) -> Aabb {
59        Aabb::from_points(&self.vertices())
60    }
61
62    #[inline(always)]
63    pub fn compute_normal(&self) -> Vec3A {
64        let (_v0, e1, e2) = self.unpack();
65        ((e1).cross(e2)).normalize_or_zero()
66    }
67
68    /// Find the distance (t) of the intersection of the `Ray` and this Triangle.
69    /// Returns f32::INFINITY for miss.
70    #[inline(always)]
71    pub fn intersect(&self, ray: &Ray) -> f32 {
72        // TODO not very water tight from the back side in some contexts (tris with edges at 0,0,0 show 1px gap)
73        // Find out if this is typical of Möller
74        // Based on Fast Minimum Storage Ray Triangle Intersection by T. Möller and B. Trumbore
75        // https://madmann91.github.io/2021/04/29/an-introduction-to-bvhs.html
76
77        let (v0, e1, e2) = self.unpack();
78        let ng = (-e1).cross(e2);
79
80        let cull_backface = false;
81
82        let c = v0 - ray.origin;
83        let r = ray.direction.cross(c);
84        let inv_det = 1.0 / ng.dot(ray.direction);
85
86        let u = r.dot(e2) * inv_det;
87        let v = r.dot(-e1) * inv_det;
88        let w = 1.0 - u - v;
89
90        // Original:
91        //let hit = u >= 0.0 && v >= 0.0 && w >= 0.0;
92        //let valid = if cull_backface {
93        //    inv_det > 0.0 && hit
94        //} else {
95        //    inv_det != 0.0 && hit
96        //};
97
98        // Note: differs in that if v == -0.0, for example will cause valid to be false
99        let hit = u.to_bits() | v.to_bits() | w.to_bits();
100        let valid = if cull_backface {
101            (inv_det.to_bits() | hit) & 0x8000_0000 == 0
102        } else {
103            inv_det != 0.0 && hit & 0x8000_0000 == 0
104        };
105
106        if valid {
107            let t = ng.dot(c) * inv_det;
108            if t >= ray.tmin && t <= ray.tmax {
109                return t;
110            }
111        }
112
113        f32::INFINITY
114    }
115
116    pub fn unpack(&self) -> (Vec3A, Vec3A, Vec3A) {
117        let v0: Vec3A = self.v0.into();
118        let e1x = f16::from_bits(self.e1_e2[0]).to_f32();
119        let e2x = f16::from_bits(self.e1_e2[1]).to_f32();
120        let e1y = f16::from_bits(self.e1_e2[2]).to_f32();
121        let e2y = f16::from_bits(self.e1_e2[3]).to_f32();
122        let e1z = f16::from_bits(self.e1_e2[4]).to_f32();
123        let e2z = f16::from_bits(self.e1_e2[5]).to_f32();
124        let e1 = Vec3A::new(e1x, e1y, e1z);
125        let e2 = Vec3A::new(e2x, e2y, e2z);
126        (v0, e1, e2)
127    }
128
129    #[inline(always)]
130    pub fn compute_barycentric(&self, ray: &Ray) -> Vec2 {
131        let (v0, e1, e2) = self.unpack();
132        let ng = (-e1).cross(e2);
133        let r = ray.direction.cross(v0 - ray.origin);
134        vec2(r.dot(e2), r.dot(-e1)) / ng.dot(ray.direction)
135    }
136}
137
138impl Boundable for RtCompressedTriangle {
139    #[inline(always)]
140    fn aabb(&self) -> Aabb {
141        self.aabb()
142    }
143}
144
145#[derive(Clone, Copy, Default, PartialEq)]
146/// A 3D triangle optimized for CPU ray intersection performance.
147pub struct RtTriangle {
148    /// Base vertex
149    pub v0: Vec3A,
150    /// Edge 1 `v0 - v1`
151    pub e1: Vec3A,
152    /// Edge 2 `v2 - v0`
153    pub e2: Vec3A,
154    /// Geometric normal `e1.cross(e2)`.
155    /// Optimized for intersection.
156    /// Needs to be inverted for typical normal.
157    pub ng: Vec3A,
158}
159
160impl From<&Triangle> for RtTriangle {
161    #[inline(always)]
162    fn from(tri: &Triangle) -> Self {
163        RtTriangle::new(tri.v0, tri.v1, tri.v2)
164    }
165}
166
167// Uses layout from https://github.com/madmann91/bvh/blob/master/src/bvh/v2/tri.h#L36
168// to optimize for intersection. On the CPU this is a bit faster than e1 = v1 - v0; e2 = v2 - v0;
169impl RtTriangle {
170    #[inline(always)]
171    pub fn new(v0: Vec3A, v1: Vec3A, v2: Vec3A) -> Self {
172        let e1 = v0 - v1;
173        let e2 = v2 - v0;
174        Self {
175            v0,
176            e1,
177            e2,
178            ng: e1.cross(e2),
179        }
180    }
181
182    #[inline(always)]
183    fn vertices(&self) -> [Vec3A; 3] {
184        [self.v0, self.v0 - self.e1, self.v0 + self.e2]
185    }
186
187    #[inline(always)]
188    pub fn aabb(&self) -> Aabb {
189        Aabb::from_points(&self.vertices())
190    }
191
192    #[inline(always)]
193    pub fn compute_normal(&self) -> Vec3A {
194        -self.ng.normalize_or_zero()
195    }
196
197    /// Find the distance (t) of the intersection of the `Ray` and this Triangle.
198    /// Returns f32::INFINITY for miss.
199    #[inline(always)]
200    pub fn intersect(&self, ray: &Ray) -> f32 {
201        // TODO not very water tight from the back side in some contexts (tris with edges at 0,0,0 show 1px gap)
202        // Find out if this is typical of Möller
203        // Based on Fast Minimum Storage Ray Triangle Intersection by T. Möller and B. Trumbore
204        // https://madmann91.github.io/2021/04/29/an-introduction-to-bvhs.html
205        let cull_backface = false;
206
207        let c = self.v0 - ray.origin;
208        let r = ray.direction.cross(c);
209        let inv_det = 1.0 / self.ng.dot(ray.direction);
210
211        let u = r.dot(self.e2) * inv_det;
212        let v = r.dot(self.e1) * inv_det;
213        let w = 1.0 - u - v;
214
215        // Original:
216        //let hit = u >= 0.0 && v >= 0.0 && w >= 0.0;
217        //let valid = if cull_backface {
218        //    inv_det > 0.0 && hit
219        //} else {
220        //    inv_det != 0.0 && hit
221        //};
222
223        // Note: differs in that if v == -0.0, for example will cause valid to be false
224        let hit = u.to_bits() | v.to_bits() | w.to_bits();
225        let valid = if cull_backface {
226            (inv_det.to_bits() | hit) & 0x8000_0000 == 0
227        } else {
228            inv_det != 0.0 && hit & 0x8000_0000 == 0
229        };
230
231        if valid {
232            let t = self.ng.dot(c) * inv_det;
233            if t >= ray.tmin && t <= ray.tmax {
234                return t;
235            }
236        }
237
238        f32::INFINITY
239    }
240
241    // https://github.com/RenderKit/embree/blob/0c236df6f31a8e9c8a48803dada333e9ea0029a6/kernels/geometry/triangle_intersector_moeller.h#L9
242    #[cfg(all(
243        any(target_arch = "x86", target_arch = "x86_64"),
244        target_feature = "sse2"
245    ))]
246    pub fn intersect_embree(&self, ray: &Ray) -> f32 {
247        // Not watertight from the front side? Looks similar to what intersect() above looks like from the back side.
248
249        // This uses the orientation from https://madmann91.github.io/2021/04/29/an-introduction-to-bvhs.html
250
251        let cull_backface = false;
252
253        // Calculate denominator
254        let o = ray.origin;
255        let d = ray.direction;
256        let c = self.v0 - o;
257        let r = c.cross(d);
258        let den = (-self.ng).dot(d);
259        let abs_den = den.abs();
260
261        fn signmsk(x: f32) -> f32 {
262            #[cfg(target_arch = "x86")]
263            use std::arch::x86::*;
264            #[cfg(target_arch = "x86_64")]
265            use std::arch::x86_64::*;
266            unsafe {
267                let mask = _mm_set1_ps(-0.0);
268                let x_vec = _mm_set_ss(x);
269                let sign_bit = _mm_and_ps(x_vec, mask);
270                _mm_cvtss_f32(sign_bit)
271                //_mm_cvtss_f32(_mm_and_ps(
272                //    _mm_set_ss(x),
273                //    _mm_castsi128_ps(_mm_set1_epi32(-2147483648i32)),
274                //))
275            }
276        }
277
278        let sgn_den = signmsk(den).to_bits();
279
280        // Perform edge tests
281        let u = f32::from_bits(r.dot(self.e2).to_bits() ^ sgn_den);
282        let v = f32::from_bits(r.dot(self.e1).to_bits() ^ sgn_den);
283        // TODO simd uv?
284
285        // Perform backface culling
286        // Original:
287        //let valid = if cull_backface {
288        //    den < 0.0 && u >= 0.0 && v >= 0.0 && u + v <= abs_den
289        //} else {
290        //    den != 0.0 && u >= 0.0 && v >= 0.0 && u + v <= abs_den
291        //};
292
293        let w = abs_den - u - v;
294        let valid = if cull_backface {
295            ((-den).to_bits() | u.to_bits() | v.to_bits() | (abs_den - u - v).to_bits())
296                & 0x8000_0000
297                == 0
298        } else {
299            den != 0.0 && ((u.to_bits() | v.to_bits() | w.to_bits()) & 0x8000_0000) == 0
300        };
301
302        if !valid {
303            return f32::INFINITY;
304        }
305
306        // Perform depth test
307        let t = f32::from_bits((-self.ng).dot(c).to_bits() ^ sgn_den);
308
309        if abs_den * ray.tmin < t && t <= abs_den * ray.tmax {
310            return t;
311        }
312
313        f32::INFINITY
314    }
315
316    #[inline(always)]
317    pub fn compute_barycentric(&self, ray: &Ray) -> Vec2 {
318        let r = ray.direction.cross(self.v0 - ray.origin);
319        vec2(r.dot(self.e2), r.dot(self.e1)) / self.ng.dot(ray.direction)
320    }
321}
322
323impl Boundable for RtTriangle {
324    #[inline(always)]
325    fn aabb(&self) -> Aabb {
326        self.aabb()
327    }
328}