wide/
f64x2_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq)]
6    #[repr(C, align(16))]
7    pub struct f64x2 { pub(crate) sse: m128d }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct f64x2 { pub(crate) simd: v128 }
14
15    impl Default for f64x2 {
16      fn default() -> Self {
17        Self::splat(0.0)
18      }
19    }
20
21    impl PartialEq for f64x2 {
22      fn eq(&self, other: &Self) -> bool {
23        u64x2_all_true(f64x2_eq(self.simd, other.simd))
24      }
25    }
26  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
27    use core::arch::aarch64::*;
28    #[repr(C)]
29    #[derive(Copy, Clone)]
30    pub struct f64x2 { pub(crate) neon: float64x2_t }
31
32    impl Default for f64x2 {
33      #[inline]
34      #[must_use]
35      fn default() -> Self {
36        unsafe { Self { neon: vdupq_n_f64(0.0)} }
37      }
38    }
39
40    impl PartialEq for f64x2 {
41      #[inline]
42      #[must_use]
43      fn eq(&self, other: &Self) -> bool {
44        unsafe
45        { let e = vceqq_f64(self.neon, other.neon);
46          vgetq_lane_u64(e,0) == u64::MAX && vgetq_lane_u64(e,1) == u64::MAX
47        }
48      }
49
50    }
51  } else {
52    #[derive(Default, Clone, Copy, PartialEq)]
53    #[repr(C, align(16))]
54    pub struct f64x2 { pub(crate) arr: [f64;2] }
55  }
56}
57
58macro_rules! const_f64_as_f64x2 {
59  ($i:ident, $f:expr) => {
60    #[allow(non_upper_case_globals)]
61    pub const $i: f64x2 = f64x2::new([$f; 2]);
62  };
63}
64
65impl f64x2 {
66  const_f64_as_f64x2!(ONE, 1.0);
67  const_f64_as_f64x2!(ZERO, 0.0);
68  const_f64_as_f64x2!(HALF, 0.5);
69  const_f64_as_f64x2!(E, core::f64::consts::E);
70  const_f64_as_f64x2!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
71  const_f64_as_f64x2!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
72  const_f64_as_f64x2!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
73  const_f64_as_f64x2!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
74  const_f64_as_f64x2!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
75  const_f64_as_f64x2!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
76  const_f64_as_f64x2!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
77  const_f64_as_f64x2!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
78  const_f64_as_f64x2!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
79  const_f64_as_f64x2!(LN_2, core::f64::consts::LN_2);
80  const_f64_as_f64x2!(LN_10, core::f64::consts::LN_10);
81  const_f64_as_f64x2!(LOG2_E, core::f64::consts::LOG2_E);
82  const_f64_as_f64x2!(LOG10_E, core::f64::consts::LOG10_E);
83  const_f64_as_f64x2!(LOG10_2, core::f64::consts::LOG10_2);
84  const_f64_as_f64x2!(LOG2_10, core::f64::consts::LOG2_10);
85  const_f64_as_f64x2!(PI, core::f64::consts::PI);
86  const_f64_as_f64x2!(SQRT_2, core::f64::consts::SQRT_2);
87  const_f64_as_f64x2!(TAU, core::f64::consts::TAU);
88}
89
90unsafe impl Zeroable for f64x2 {}
91unsafe impl Pod for f64x2 {}
92
93impl Add for f64x2 {
94  type Output = Self;
95  #[inline]
96  #[must_use]
97  fn add(self, rhs: Self) -> Self::Output {
98    pick! {
99      if #[cfg(target_feature="sse2")] {
100        Self { sse: add_m128d(self.sse, rhs.sse) }
101      } else if #[cfg(target_feature="simd128")] {
102        Self { simd: f64x2_add(self.simd, rhs.simd) }
103      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
104        unsafe { Self { neon: vaddq_f64(self.neon, rhs.neon) } }
105      } else {
106        Self { arr: [
107          self.arr[0] + rhs.arr[0],
108          self.arr[1] + rhs.arr[1],
109        ]}
110      }
111    }
112  }
113}
114
115impl Sub for f64x2 {
116  type Output = Self;
117  #[inline]
118  #[must_use]
119  fn sub(self, rhs: Self) -> Self::Output {
120    pick! {
121      if #[cfg(target_feature="sse2")] {
122        Self { sse: sub_m128d(self.sse, rhs.sse) }
123      } else if #[cfg(target_feature="simd128")] {
124        Self { simd: f64x2_sub(self.simd, rhs.simd) }
125      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
126        unsafe { Self { neon: vsubq_f64(self.neon, rhs.neon) } }
127      } else {
128        Self { arr: [
129          self.arr[0] - rhs.arr[0],
130          self.arr[1] - rhs.arr[1],
131        ]}
132      }
133    }
134  }
135}
136
137impl Mul for f64x2 {
138  type Output = Self;
139  #[inline]
140  #[must_use]
141  fn mul(self, rhs: Self) -> Self::Output {
142    pick! {
143      if #[cfg(target_feature="sse2")] {
144        Self { sse: mul_m128d(self.sse, rhs.sse) }
145      } else if #[cfg(target_feature="simd128")] {
146        Self { simd: f64x2_mul(self.simd, rhs.simd) }
147      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
148        unsafe {Self { neon: vmulq_f64(self.neon, rhs.neon) }}
149      } else {
150        Self { arr: [
151          self.arr[0] * rhs.arr[0],
152          self.arr[1] * rhs.arr[1],
153        ]}
154      }
155    }
156  }
157}
158
159impl Div for f64x2 {
160  type Output = Self;
161  #[inline]
162  #[must_use]
163  fn div(self, rhs: Self) -> Self::Output {
164    pick! {
165      if #[cfg(target_feature="sse2")] {
166        Self { sse: div_m128d(self.sse, rhs.sse) }
167      } else if #[cfg(target_feature="simd128")] {
168        Self { simd: f64x2_div(self.simd, rhs.simd) }
169      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
170        unsafe {Self { neon: vdivq_f64(self.neon, rhs.neon) }}
171      } else {
172        Self { arr: [
173          self.arr[0] / rhs.arr[0],
174          self.arr[1] / rhs.arr[1],
175        ]}
176      }
177    }
178  }
179}
180
181impl Add<f64> for f64x2 {
182  type Output = Self;
183  #[inline]
184  #[must_use]
185  fn add(self, rhs: f64) -> Self::Output {
186    self.add(Self::splat(rhs))
187  }
188}
189
190impl Sub<f64> for f64x2 {
191  type Output = Self;
192  #[inline]
193  #[must_use]
194  fn sub(self, rhs: f64) -> Self::Output {
195    self.sub(Self::splat(rhs))
196  }
197}
198
199impl Mul<f64> for f64x2 {
200  type Output = Self;
201  #[inline]
202  #[must_use]
203  fn mul(self, rhs: f64) -> Self::Output {
204    self.mul(Self::splat(rhs))
205  }
206}
207
208impl Div<f64> for f64x2 {
209  type Output = Self;
210  #[inline]
211  #[must_use]
212  fn div(self, rhs: f64) -> Self::Output {
213    self.div(Self::splat(rhs))
214  }
215}
216
217impl Add<f64x2> for f64 {
218  type Output = f64x2;
219  #[inline]
220  #[must_use]
221  fn add(self, rhs: f64x2) -> Self::Output {
222    f64x2::splat(self).add(rhs)
223  }
224}
225
226impl Sub<f64x2> for f64 {
227  type Output = f64x2;
228  #[inline]
229  #[must_use]
230  fn sub(self, rhs: f64x2) -> Self::Output {
231    f64x2::splat(self).sub(rhs)
232  }
233}
234
235impl Mul<f64x2> for f64 {
236  type Output = f64x2;
237  #[inline]
238  #[must_use]
239  fn mul(self, rhs: f64x2) -> Self::Output {
240    f64x2::splat(self).mul(rhs)
241  }
242}
243
244impl Div<f64x2> for f64 {
245  type Output = f64x2;
246  #[inline]
247  #[must_use]
248  fn div(self, rhs: f64x2) -> Self::Output {
249    f64x2::splat(self).div(rhs)
250  }
251}
252
253impl BitAnd for f64x2 {
254  type Output = Self;
255  #[inline]
256  #[must_use]
257  fn bitand(self, rhs: Self) -> Self::Output {
258    pick! {
259      if #[cfg(target_feature="sse2")] {
260        Self { sse: bitand_m128d(self.sse, rhs.sse) }
261      } else if #[cfg(target_feature="simd128")] {
262        Self { simd: v128_and(self.simd, rhs.simd) }
263      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
264        unsafe {Self { neon: vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
265      } else {
266        Self { arr: [
267          f64::from_bits(self.arr[0].to_bits() & rhs.arr[0].to_bits()),
268          f64::from_bits(self.arr[1].to_bits() & rhs.arr[1].to_bits()),
269        ]}
270      }
271    }
272  }
273}
274
275impl BitOr for f64x2 {
276  type Output = Self;
277  #[inline]
278  #[must_use]
279  fn bitor(self, rhs: Self) -> Self::Output {
280    pick! {
281      if #[cfg(target_feature="sse2")] {
282        Self { sse: bitor_m128d(self.sse, rhs.sse) }
283      } else if #[cfg(target_feature="simd128")] {
284        Self { simd: v128_or(self.simd, rhs.simd) }
285      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
286        unsafe {Self { neon: vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
287      } else {
288        Self { arr: [
289          f64::from_bits(self.arr[0].to_bits() | rhs.arr[0].to_bits()),
290          f64::from_bits(self.arr[1].to_bits() | rhs.arr[1].to_bits()),
291        ]}
292      }
293    }
294  }
295}
296
297impl BitXor for f64x2 {
298  type Output = Self;
299  #[inline]
300  #[must_use]
301  fn bitxor(self, rhs: Self) -> Self::Output {
302    pick! {
303      if #[cfg(target_feature="sse2")] {
304        Self { sse: bitxor_m128d(self.sse, rhs.sse) }
305      } else if #[cfg(target_feature="simd128")] {
306        Self { simd: v128_xor(self.simd, rhs.simd) }
307      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
308        unsafe {Self { neon: vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
309      } else {
310        Self { arr: [
311          f64::from_bits(self.arr[0].to_bits() ^ rhs.arr[0].to_bits()),
312          f64::from_bits(self.arr[1].to_bits() ^ rhs.arr[1].to_bits()),
313        ]}
314      }
315    }
316  }
317}
318
319impl CmpEq for f64x2 {
320  type Output = Self;
321  #[inline]
322  #[must_use]
323  fn cmp_eq(self, rhs: Self) -> Self::Output {
324    pick! {
325      if #[cfg(target_feature="sse2")] {
326        Self { sse: cmp_eq_mask_m128d(self.sse, rhs.sse) }
327      } else if #[cfg(target_feature="simd128")] {
328        Self { simd: f64x2_eq(self.simd, rhs.simd) }
329      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
330        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }}
331      } else {
332        Self { arr: [
333          if self.arr[0] == rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
334          if self.arr[1] == rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
335        ]}
336      }
337    }
338  }
339}
340
341impl CmpGe for f64x2 {
342  type Output = Self;
343  #[inline]
344  #[must_use]
345  fn cmp_ge(self, rhs: Self) -> Self::Output {
346    pick! {
347      if #[cfg(target_feature="sse2")] {
348        Self { sse: cmp_ge_mask_m128d(self.sse, rhs.sse) }
349      } else if #[cfg(target_feature="simd128")] {
350        Self { simd: f64x2_ge(self.simd, rhs.simd) }
351      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
352        unsafe {Self { neon: vreinterpretq_f64_u64(vcgeq_f64(self.neon, rhs.neon)) }}
353      } else {
354        Self { arr: [
355          if self.arr[0] >= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
356          if self.arr[1] >= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
357        ]}
358      }
359    }
360  }
361}
362
363impl CmpGt for f64x2 {
364  type Output = Self;
365  #[inline]
366  #[must_use]
367  fn cmp_gt(self, rhs: Self) -> Self::Output {
368    pick! {
369      if #[cfg(target_feature="avx")] {
370        Self { sse: cmp_op_mask_m128d::<{cmp_op!(GreaterThanOrdered)}>(self.sse, rhs.sse) }
371      } else if #[cfg(target_feature="sse2")] {
372        Self { sse: cmp_gt_mask_m128d(self.sse, rhs.sse) }
373      } else if #[cfg(target_feature="simd128")] {
374        Self { simd: f64x2_gt(self.simd, rhs.simd) }
375      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
376        unsafe {Self { neon: vreinterpretq_f64_u64(vcgtq_f64(self.neon, rhs.neon)) }}
377      } else {
378        Self { arr: [
379          if self.arr[0] > rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
380          if self.arr[1] > rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
381        ]}
382      }
383    }
384  }
385}
386
387impl CmpNe for f64x2 {
388  type Output = Self;
389  #[inline]
390  #[must_use]
391  fn cmp_ne(self, rhs: Self) -> Self::Output {
392    pick! {
393      if #[cfg(target_feature="sse2")] {
394        Self { sse: cmp_neq_mask_m128d(self.sse, rhs.sse) }
395      } else if #[cfg(target_feature="simd128")] {
396        Self { simd: f64x2_ne(self.simd, rhs.simd) }
397      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
398        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }.not() }
399      } else {
400        Self { arr: [
401          if self.arr[0] != rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
402          if self.arr[1] != rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
403        ]}
404      }
405    }
406  }
407}
408
409impl CmpLe for f64x2 {
410  type Output = Self;
411  #[inline]
412  #[must_use]
413  fn cmp_le(self, rhs: Self) -> Self::Output {
414    pick! {
415      if #[cfg(target_feature="sse2")] {
416        Self { sse: cmp_le_mask_m128d(self.sse, rhs.sse) }
417      } else if #[cfg(target_feature="simd128")] {
418        Self { simd: f64x2_le(self.simd, rhs.simd) }
419      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
420        unsafe {Self { neon: vreinterpretq_f64_u64(vcleq_f64(self.neon, rhs.neon)) }}
421      } else {
422        Self { arr: [
423          if self.arr[0] <= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
424          if self.arr[1] <= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
425        ]}
426      }
427    }
428  }
429}
430
431impl CmpLt for f64x2 {
432  type Output = Self;
433  #[inline]
434  #[must_use]
435  fn cmp_lt(self, rhs: Self) -> Self::Output {
436    pick! {
437      if #[cfg(target_feature="sse2")] {
438        Self { sse: cmp_lt_mask_m128d(self.sse, rhs.sse) }
439      } else if #[cfg(target_feature="simd128")] {
440        Self { simd: f64x2_lt(self.simd, rhs.simd) }
441      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
442        unsafe {Self { neon: vreinterpretq_f64_u64(vcltq_f64(self.neon, rhs.neon)) }}
443      } else {
444        Self { arr: [
445          if self.arr[0] < rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
446          if self.arr[1] < rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
447        ]}
448      }
449    }
450  }
451}
452
453impl f64x2 {
454  #[inline]
455  #[must_use]
456  pub const fn new(array: [f64; 2]) -> Self {
457    unsafe { core::mem::transmute(array) }
458  }
459  #[inline]
460  #[must_use]
461  pub fn blend(self, t: Self, f: Self) -> Self {
462    pick! {
463      if #[cfg(target_feature="sse4.1")] {
464        Self { sse: blend_varying_m128d(f.sse, t.sse, self.sse) }
465      } else if #[cfg(target_feature="simd128")] {
466        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
467      } else {
468        generic_bit_blend(self, t, f)
469      }
470    }
471  }
472  #[inline]
473  #[must_use]
474  pub fn abs(self) -> Self {
475    pick! {
476      if #[cfg(target_feature="simd128")] {
477        Self { simd: f64x2_abs(self.simd) }
478      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
479        unsafe {Self { neon: vabsq_f64(self.neon) }}
480      } else {
481        let non_sign_bits = f64x2::from(f64::from_bits(i64::MAX as u64));
482        self & non_sign_bits
483      }
484    }
485  }
486  #[inline]
487  #[must_use]
488  pub fn floor(self) -> Self {
489    pick! {
490      if #[cfg(target_feature="simd128")] {
491        Self { simd: f64x2_floor(self.simd) }
492      } else if #[cfg(target_feature="sse4.1")] {
493        Self { sse: floor_m128d(self.sse) }
494      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
495        unsafe {Self { neon: vrndmq_f64(self.neon) }}
496      } else if #[cfg(feature="std")] {
497        let base: [f64; 2] = cast(self);
498        cast(base.map(|val| val.floor()))
499      } else {
500        let base: [f64; 2] = cast(self);
501        let rounded: [f64; 2] = cast(self.round());
502        cast([
503          if base[0] < rounded[0] { rounded[0] - 1.0 } else { rounded[0] },
504          if base[1] < rounded[1] { rounded[1] - 1.0 } else { rounded[1] },
505        ])
506      }
507    }
508  }
509  #[inline]
510  #[must_use]
511  pub fn ceil(self) -> Self {
512    pick! {
513      if #[cfg(target_feature="simd128")] {
514        Self { simd: f64x2_ceil(self.simd) }
515      } else if #[cfg(target_feature="sse4.1")] {
516        Self { sse: ceil_m128d(self.sse) }
517      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
518        unsafe {Self { neon: vrndpq_f64(self.neon) }}
519      } else if #[cfg(feature="std")] {
520        let base: [f64; 2] = cast(self);
521        cast(base.map(|val| val.ceil()))
522      } else {
523        let base: [f64; 2] = cast(self);
524        let rounded: [f64; 2] = cast(self.round());
525        cast([
526          if base[0] > rounded[0] { rounded[0] + 1.0 } else { rounded[0] },
527          if base[1] > rounded[1] { rounded[1] + 1.0 } else { rounded[1] },
528        ])
529      }
530    }
531  }
532
533  /// Calculates the lanewise maximum of both vectors. This is a faster
534  /// implementation than `max`, but it doesn't specify any behavior if NaNs are
535  /// involved.
536  #[inline]
537  #[must_use]
538  pub fn fast_max(self, rhs: Self) -> Self {
539    pick! {
540      if #[cfg(target_feature="sse2")] {
541        Self { sse: max_m128d(self.sse, rhs.sse) }
542      } else if #[cfg(target_feature="simd128")] {
543        Self {
544          simd: f64x2_pmax(self.simd, rhs.simd),
545        }
546      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
547        unsafe {Self { neon: vmaxq_f64(self.neon, rhs.neon) }}
548      } else {
549        Self { arr: [
550          if self.arr[0] < rhs.arr[0] { rhs.arr[0] } else { self.arr[0] },
551          if self.arr[1] < rhs.arr[1] { rhs.arr[1] } else { self.arr[1] },
552        ]}
553      }
554    }
555  }
556
557  /// Calculates the lanewise maximum of both vectors. If either lane is NaN,
558  /// the other lane gets chosen. Use `fast_max` for a faster implementation
559  /// that doesn't handle NaNs.
560  #[inline]
561  #[must_use]
562  pub fn max(self, rhs: Self) -> Self {
563    pick! {
564      if #[cfg(target_feature="sse2")] {
565        // max_m128d seems to do rhs < self ? self : rhs. So if there's any NaN
566        // involved, it chooses rhs, so we need to specifically check rhs for
567        // NaN.
568        rhs.is_nan().blend(self, Self { sse: max_m128d(self.sse, rhs.sse) })
569      } else if #[cfg(target_feature="simd128")] {
570        // WASM has two max intrinsics:
571        // - max: This propagates NaN, that's the opposite of what we need.
572        // - pmax: This is defined as self < rhs ? rhs : self, which basically
573        //   chooses self if either is NaN.
574        //
575        // pmax is what we want, but we need to specifically check self for NaN.
576        Self {
577          simd: v128_bitselect(
578            rhs.simd,
579            f64x2_pmax(self.simd, rhs.simd),
580            f64x2_ne(self.simd, self.simd), // NaN check
581          )
582        }
583      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
584        unsafe {Self { neon: vmaxnmq_f64(self.neon, rhs.neon) }}
585            } else {
586        Self { arr: [
587          self.arr[0].max(rhs.arr[0]),
588          self.arr[1].max(rhs.arr[1]),
589        ]}
590      }
591    }
592  }
593
594  /// Calculates the lanewise minimum of both vectors. This is a faster
595  /// implementation than `min`, but it doesn't specify any behavior if NaNs are
596  /// involved.
597  #[inline]
598  #[must_use]
599  pub fn fast_min(self, rhs: Self) -> Self {
600    pick! {
601      if #[cfg(target_feature="sse2")] {
602        Self { sse: min_m128d(self.sse, rhs.sse) }
603      } else if #[cfg(target_feature="simd128")] {
604        Self {
605          simd: f64x2_pmin(self.simd, rhs.simd),
606        }
607      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
608        unsafe {Self { neon: vminq_f64(self.neon, rhs.neon) }}
609      } else {
610        Self { arr: [
611          if self.arr[0] < rhs.arr[0] { self.arr[0] } else { rhs.arr[0] },
612          if self.arr[1] < rhs.arr[1] { self.arr[1] } else { rhs.arr[1] },
613        ]}
614      }
615    }
616  }
617
618  /// Calculates the lanewise minimum of both vectors. If either lane is NaN,
619  /// the other lane gets chosen. Use `fast_min` for a faster implementation
620  /// that doesn't handle NaNs.
621  #[inline]
622  #[must_use]
623  pub fn min(self, rhs: Self) -> Self {
624    pick! {
625      if #[cfg(target_feature="sse2")] {
626        // min_m128d seems to do rhs < self ? rhs : self. So if there's any NaN
627        // involved, it chooses rhs, so we need to specifically check rhs for
628        // NaN.
629        rhs.is_nan().blend(self, Self { sse: min_m128d(self.sse, rhs.sse) })
630      } else if #[cfg(target_feature="simd128")] {
631        // WASM has two min intrinsics:
632        // - min: This propagates NaN, that's the opposite of what we need.
633        // - pmin: This is defined as rhs < self ? rhs : self, which basically
634        //   chooses self if either is NaN.
635        //
636        // pmin is what we want, but we need to specifically check self for NaN.
637        Self {
638          simd: v128_bitselect(
639            rhs.simd,
640            f64x2_pmin(self.simd, rhs.simd),
641            f64x2_ne(self.simd, self.simd), // NaN check
642          )
643        }
644      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
645        unsafe {Self { neon: vminnmq_f64(self.neon, rhs.neon) }}
646      } else {
647        Self { arr: [
648          self.arr[0].min(rhs.arr[0]),
649          self.arr[1].min(rhs.arr[1]),
650        ]}
651      }
652    }
653  }
654
655  #[inline]
656  #[must_use]
657  pub fn is_nan(self) -> Self {
658    pick! {
659      if #[cfg(target_feature="sse2")] {
660        Self { sse: cmp_unord_mask_m128d(self.sse, self.sse) }
661      } else if #[cfg(target_feature="simd128")] {
662        Self { simd: f64x2_ne(self.simd, self.simd) }
663      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
664        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, self.neon)) }.not() }
665      } else {
666        Self { arr: [
667          if self.arr[0].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
668          if self.arr[1].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
669        ]}
670      }
671    }
672  }
673  #[inline]
674  #[must_use]
675  pub fn is_finite(self) -> Self {
676    let shifted_exp_mask = u64x2::from(0xFFE0000000000000);
677    let u: u64x2 = cast(self);
678    let shift_u = u << 1_u64;
679    let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
680    cast(out)
681  }
682  #[inline]
683  #[must_use]
684  pub fn is_inf(self) -> Self {
685    let shifted_inf = u64x2::from(0xFFE0000000000000);
686    let u: u64x2 = cast(self);
687    let shift_u = u << 1_u64;
688    let out = (shift_u).cmp_eq(shifted_inf);
689    cast(out)
690  }
691
692  #[inline]
693  #[must_use]
694  pub fn round(self) -> Self {
695    pick! {
696      if #[cfg(target_feature="sse4.1")] {
697        Self { sse: round_m128d::<{round_op!(Nearest)}>(self.sse) }
698      } else if #[cfg(target_feature="simd128")] {
699        Self { simd: f64x2_nearest(self.simd) }
700      } else {
701        let sign_mask = f64x2::from(-0.0);
702        let magic = f64x2::from(f64::from_bits(0x43300000_00000000));
703        let sign = self & sign_mask;
704        let signed_magic = magic | sign;
705        self + signed_magic - signed_magic
706      }
707    }
708  }
709  #[inline]
710  #[must_use]
711  pub fn round_int(self) -> i64x2 {
712    let rounded: [f64; 2] = cast(self.round());
713    cast([rounded[0] as i64, rounded[1] as i64])
714  }
715  #[inline]
716  #[must_use]
717  pub fn mul_add(self, m: Self, a: Self) -> Self {
718    pick! {
719      if #[cfg(all(target_feature="fma"))] {
720        Self { sse: fused_mul_add_m128d(self.sse, m.sse, a.sse) }
721      } else {
722        (self * m) + a
723      }
724    }
725  }
726
727  #[inline]
728  #[must_use]
729  pub fn mul_sub(self, m: Self, a: Self) -> Self {
730    pick! {
731      if #[cfg(all(target_feature="fma"))] {
732        Self { sse: fused_mul_sub_m128d(self.sse, m.sse, a.sse) }
733      } else {
734        (self * m) - a
735      }
736    }
737  }
738
739  #[inline]
740  #[must_use]
741  pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
742    pick! {
743        if #[cfg(all(target_feature="fma"))] {
744          Self { sse: fused_mul_neg_add_m128d(self.sse, m.sse, a.sse) }
745        } else {
746          a - (self * m)
747        }
748    }
749  }
750
751  #[inline]
752  #[must_use]
753  pub fn mul_neg_sub(self, m: Self, a: Self) -> Self {
754    pick! {
755        if #[cfg(all(target_feature="fma"))] {
756          Self { sse: fused_mul_neg_sub_m128d(self.sse, m.sse, a.sse) }
757        } else {
758          -(self * m) - a
759        }
760    }
761  }
762
763  #[inline]
764  #[must_use]
765  pub fn flip_signs(self, signs: Self) -> Self {
766    self ^ (signs & Self::from(-0.0))
767  }
768
769  #[inline]
770  #[must_use]
771  pub fn copysign(self, sign: Self) -> Self {
772    let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
773    (self & magnitude_mask) | (sign & Self::from(-0.0))
774  }
775
776  #[inline]
777  pub fn asin_acos(self) -> (Self, Self) {
778    // Based on the Agner Fog "vector class library":
779    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
780    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
781    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
782    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
783    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
784    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
785
786    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
787    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
788    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
789    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
790
791    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
792    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
793    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
794    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
795    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
796    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
797
798    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
799    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
800    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
801    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
802    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
803
804    let xa = self.abs();
805
806    let big = xa.cmp_ge(f64x2::splat(0.625));
807
808    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
809
810    let x2 = x1 * x1;
811    let x3 = x2 * x1;
812    let x4 = x2 * x2;
813    let x5 = x4 * x1;
814
815    let do_big = big.any();
816    let do_small = !big.all();
817
818    let mut rx = f64x2::default();
819    let mut sx = f64x2::default();
820    let mut px = f64x2::default();
821    let mut qx = f64x2::default();
822
823    if do_big {
824      rx = x3.mul_add(R3asin, x2 * R2asin)
825        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
826      sx =
827        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
828    }
829    if do_small {
830      px = x3.mul_add(P3asin, P0asin)
831        + x4.mul_add(P4asin, x1 * P1asin)
832        + x5.mul_add(P5asin, x2 * P2asin);
833      qx = x4.mul_add(Q4asin, x5)
834        + x3.mul_add(Q3asin, x1 * Q1asin)
835        + x2.mul_add(Q2asin, Q0asin);
836    };
837
838    let vx = big.blend(rx, px);
839    let wx = big.blend(sx, qx);
840
841    let y1 = vx / wx * x1;
842
843    let mut z1 = f64x2::default();
844    let mut z2 = f64x2::default();
845    if do_big {
846      let xb = (x1 + x1).sqrt();
847      z1 = xb.mul_add(y1, xb);
848    }
849
850    if do_small {
851      z2 = xa.mul_add(y1, xa);
852    }
853
854    // asin
855    let z3 = f64x2::FRAC_PI_2 - z1;
856    let asin = big.blend(z3, z2);
857    let asin = asin.flip_signs(self);
858
859    // acos
860    let z3 = self.cmp_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
861    let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
862    let acos = big.blend(z3, z4);
863
864    (asin, acos)
865  }
866
867  #[inline]
868  pub fn acos(self) -> Self {
869    // Based on the Agner Fog "vector class library":
870    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
871    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
872    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
873    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
874    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
875    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
876
877    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
878    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
879    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
880    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
881
882    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
883    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
884    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
885    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
886    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
887    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
888
889    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
890    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
891    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
892    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
893    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
894
895    let xa = self.abs();
896
897    let big = xa.cmp_ge(f64x2::splat(0.625));
898
899    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
900
901    let x2 = x1 * x1;
902    let x3 = x2 * x1;
903    let x4 = x2 * x2;
904    let x5 = x4 * x1;
905
906    let do_big = big.any();
907    let do_small = !big.all();
908
909    let mut rx = f64x2::default();
910    let mut sx = f64x2::default();
911    let mut px = f64x2::default();
912    let mut qx = f64x2::default();
913
914    if do_big {
915      rx = x3.mul_add(R3asin, x2 * R2asin)
916        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
917      sx =
918        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
919    }
920    if do_small {
921      px = x3.mul_add(P3asin, P0asin)
922        + x4.mul_add(P4asin, x1 * P1asin)
923        + x5.mul_add(P5asin, x2 * P2asin);
924      qx = x4.mul_add(Q4asin, x5)
925        + x3.mul_add(Q3asin, x1 * Q1asin)
926        + x2.mul_add(Q2asin, Q0asin);
927    };
928
929    let vx = big.blend(rx, px);
930    let wx = big.blend(sx, qx);
931
932    let y1 = vx / wx * x1;
933
934    let mut z1 = f64x2::default();
935    let mut z2 = f64x2::default();
936    if do_big {
937      let xb = (x1 + x1).sqrt();
938      z1 = xb.mul_add(y1, xb);
939    }
940
941    if do_small {
942      z2 = xa.mul_add(y1, xa);
943    }
944
945    // acos
946    let z3 = self.cmp_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
947    let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
948    let acos = big.blend(z3, z4);
949
950    acos
951  }
952
953  #[inline]
954  pub fn asin(self) -> Self {
955    // Based on the Agner Fog "vector class library":
956    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
957    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
958    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
959    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
960    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
961    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
962
963    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
964    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
965    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
966    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
967
968    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
969    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
970    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
971    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
972    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
973    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
974
975    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
976    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
977    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
978    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
979    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
980
981    let xa = self.abs();
982
983    let big = xa.cmp_ge(f64x2::splat(0.625));
984
985    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
986
987    let x2 = x1 * x1;
988    let x3 = x2 * x1;
989    let x4 = x2 * x2;
990    let x5 = x4 * x1;
991
992    let do_big = big.any();
993    let do_small = !big.all();
994
995    let mut rx = f64x2::default();
996    let mut sx = f64x2::default();
997    let mut px = f64x2::default();
998    let mut qx = f64x2::default();
999
1000    if do_big {
1001      rx = x3.mul_add(R3asin, x2 * R2asin)
1002        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
1003      sx =
1004        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
1005    }
1006    if do_small {
1007      px = x3.mul_add(P3asin, P0asin)
1008        + x4.mul_add(P4asin, x1 * P1asin)
1009        + x5.mul_add(P5asin, x2 * P2asin);
1010      qx = x4.mul_add(Q4asin, x5)
1011        + x3.mul_add(Q3asin, x1 * Q1asin)
1012        + x2.mul_add(Q2asin, Q0asin);
1013    };
1014
1015    let vx = big.blend(rx, px);
1016    let wx = big.blend(sx, qx);
1017
1018    let y1 = vx / wx * x1;
1019
1020    let mut z1 = f64x2::default();
1021    let mut z2 = f64x2::default();
1022    if do_big {
1023      let xb = (x1 + x1).sqrt();
1024      z1 = xb.mul_add(y1, xb);
1025    }
1026
1027    if do_small {
1028      z2 = xa.mul_add(y1, xa);
1029    }
1030
1031    // asin
1032    let z3 = f64x2::FRAC_PI_2 - z1;
1033    let asin = big.blend(z3, z2);
1034    let asin = asin.flip_signs(self);
1035
1036    asin
1037  }
1038
1039  #[inline]
1040  pub fn atan(self) -> Self {
1041    // Based on the Agner Fog "vector class library":
1042    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1043    const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1044    const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1045    const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1046
1047    const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1048    const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1049    const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1050    const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1051    const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1052
1053    const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1054    const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1055    const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1056    const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1057    const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1058
1059    let t = self.abs();
1060
1061    // small:  t < 0.66
1062    // medium: t <= t <= 2.4142 (1+sqrt(2))
1063    // big:    t > 2.4142
1064    let notbig = t.cmp_le(T3PO8);
1065    let notsmal = t.cmp_ge(Self::splat(0.66));
1066
1067    let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1068    s = notsmal & s;
1069    let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1070    fac = notsmal & fac;
1071
1072    // small:  z = t / 1.0;
1073    // medium: z = (t-1.0) / (t+1.0);
1074    // big:    z = -1.0 / t;
1075    let mut a = notbig & t;
1076    a = notsmal.blend(a - Self::ONE, a);
1077    let mut b = notbig & Self::ONE;
1078    b = notsmal.blend(b + t, b);
1079    let z = a / b;
1080
1081    let zz = z * z;
1082
1083    let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1084    let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1085
1086    let mut re = (px / qx).mul_add(z * zz, z);
1087    re += s + fac;
1088
1089    // get sign bit
1090    re = (self.sign_bit()).blend(-re, re);
1091
1092    re
1093  }
1094
1095  #[inline]
1096  pub fn atan2(self, x: Self) -> Self {
1097    // Based on the Agner Fog "vector class library":
1098    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1099    const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1100    const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1101    const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1102
1103    const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1104    const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1105    const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1106    const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1107    const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1108
1109    const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1110    const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1111    const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1112    const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1113    const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1114
1115    let y = self;
1116
1117    // move in first octant
1118    let x1 = x.abs();
1119    let y1 = y.abs();
1120    let swapxy = y1.cmp_gt(x1);
1121    // swap x and y if y1 > x1
1122    let mut x2 = swapxy.blend(y1, x1);
1123    let mut y2 = swapxy.blend(x1, y1);
1124
1125    // check for special case: x and y are both +/- INF
1126    let both_infinite = x.is_inf() & y.is_inf();
1127    if both_infinite.any() {
1128      let minus_one = -Self::ONE;
1129      x2 = both_infinite.blend(x2 & minus_one, x2);
1130      y2 = both_infinite.blend(y2 & minus_one, y2);
1131    }
1132
1133    // x = y = 0 gives NAN here
1134    let t = y2 / x2;
1135
1136    // small:  t < 0.66
1137    // medium: t <= t <= 2.4142 (1+sqrt(2))
1138    // big:    t > 2.4142
1139    let notbig = t.cmp_le(T3PO8);
1140    let notsmal = t.cmp_ge(Self::splat(0.66));
1141
1142    let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1143    s = notsmal & s;
1144    let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1145    fac = notsmal & fac;
1146
1147    // small:  z = t / 1.0;
1148    // medium: z = (t-1.0) / (t+1.0);
1149    // big:    z = -1.0 / t;
1150    let mut a = notbig & t;
1151    a = notsmal.blend(a - Self::ONE, a);
1152    let mut b = notbig & Self::ONE;
1153    b = notsmal.blend(b + t, b);
1154    let z = a / b;
1155
1156    let zz = z * z;
1157
1158    let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1159    let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1160
1161    let mut re = (px / qx).mul_add(z * zz, z);
1162    re += s + fac;
1163
1164    // move back in place
1165    re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1166    re = ((x | y).cmp_eq(Self::ZERO)).blend(Self::ZERO, re);
1167    re = (x.sign_bit()).blend(Self::PI - re, re);
1168
1169    // get sign bit
1170    re = (y.sign_bit()).blend(-re, re);
1171
1172    re
1173  }
1174
1175  #[inline]
1176  #[must_use]
1177  pub fn sin_cos(self) -> (Self, Self) {
1178    // Based on the Agner Fog "vector class library":
1179    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1180
1181    const_f64_as_f64x2!(P0sin, -1.66666666666666307295E-1);
1182    const_f64_as_f64x2!(P1sin, 8.33333333332211858878E-3);
1183    const_f64_as_f64x2!(P2sin, -1.98412698295895385996E-4);
1184    const_f64_as_f64x2!(P3sin, 2.75573136213857245213E-6);
1185    const_f64_as_f64x2!(P4sin, -2.50507477628578072866E-8);
1186    const_f64_as_f64x2!(P5sin, 1.58962301576546568060E-10);
1187
1188    const_f64_as_f64x2!(P0cos, 4.16666666666665929218E-2);
1189    const_f64_as_f64x2!(P1cos, -1.38888888888730564116E-3);
1190    const_f64_as_f64x2!(P2cos, 2.48015872888517045348E-5);
1191    const_f64_as_f64x2!(P3cos, -2.75573141792967388112E-7);
1192    const_f64_as_f64x2!(P4cos, 2.08757008419747316778E-9);
1193    const_f64_as_f64x2!(P5cos, -1.13585365213876817300E-11);
1194
1195    const_f64_as_f64x2!(DP1, 7.853981554508209228515625E-1 * 2.);
1196    const_f64_as_f64x2!(DP2, 7.94662735614792836714E-9 * 2.);
1197    const_f64_as_f64x2!(DP3, 3.06161699786838294307E-17 * 2.);
1198
1199    const_f64_as_f64x2!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1200
1201    let xa = self.abs();
1202
1203    let y = (xa * TWO_OVER_PI).round();
1204    let q = y.round_int();
1205
1206    let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1207
1208    let x2 = x * x;
1209    let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1210    let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1211    s = (x * x2).mul_add(s, x);
1212    c =
1213      (x2 * x2).mul_add(c, x2.mul_neg_add(f64x2::from(0.5), f64x2::from(1.0)));
1214
1215    let swap = !((q & i64x2::from(1)).cmp_eq(i64x2::from(0)));
1216
1217    let mut overflow: f64x2 = cast(q.cmp_gt(i64x2::from(0x80000000000000)));
1218    overflow &= xa.is_finite();
1219    s = overflow.blend(f64x2::from(0.0), s);
1220    c = overflow.blend(f64x2::from(1.0), c);
1221
1222    // calc sin
1223    let mut sin1 = cast::<_, f64x2>(swap).blend(c, s);
1224    let sign_sin: i64x2 = (q << 62) ^ cast::<_, i64x2>(self);
1225    sin1 = sin1.flip_signs(cast(sign_sin));
1226
1227    // calc cos
1228    let mut cos1 = cast::<_, f64x2>(swap).blend(s, c);
1229    let sign_cos: i64x2 = ((q + i64x2::from(1)) & i64x2::from(2)) << 62;
1230    cos1 ^= cast::<_, f64x2>(sign_cos);
1231
1232    (sin1, cos1)
1233  }
1234  #[inline]
1235  #[must_use]
1236  pub fn sin(self) -> Self {
1237    let (s, _) = self.sin_cos();
1238    s
1239  }
1240  #[inline]
1241  #[must_use]
1242  pub fn cos(self) -> Self {
1243    let (_, c) = self.sin_cos();
1244    c
1245  }
1246  #[inline]
1247  #[must_use]
1248  pub fn tan(self) -> Self {
1249    let (s, c) = self.sin_cos();
1250    s / c
1251  }
1252  #[inline]
1253  #[must_use]
1254  pub fn to_degrees(self) -> Self {
1255    const_f64_as_f64x2!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1256    self * RAD_TO_DEG_RATIO
1257  }
1258  #[inline]
1259  #[must_use]
1260  pub fn to_radians(self) -> Self {
1261    const_f64_as_f64x2!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1262    self * DEG_TO_RAD_RATIO
1263  }
1264  #[inline]
1265  #[must_use]
1266  pub fn sqrt(self) -> Self {
1267    pick! {
1268      if #[cfg(target_feature="sse2")] {
1269        Self { sse: sqrt_m128d(self.sse) }
1270      } else if #[cfg(target_feature="simd128")] {
1271        Self { simd: f64x2_sqrt(self.simd) }
1272      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1273        unsafe {Self { neon: vsqrtq_f64(self.neon) }}
1274      } else if #[cfg(feature="std")] {
1275        Self { arr: [
1276          self.arr[0].sqrt(),
1277          self.arr[1].sqrt(),
1278        ]}
1279      } else {
1280        Self { arr: [
1281          software_sqrt(self.arr[0]),
1282          software_sqrt(self.arr[1]),
1283        ]}
1284      }
1285    }
1286  }
1287  #[inline]
1288  #[must_use]
1289  pub fn move_mask(self) -> i32 {
1290    pick! {
1291      if #[cfg(target_feature="sse2")] {
1292        move_mask_m128d(self.sse)
1293      } else if #[cfg(target_feature="simd128")] {
1294        u64x2_bitmask(self.simd) as i32
1295      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1296        unsafe
1297        {
1298          let e = vreinterpretq_u64_f64(self.neon);
1299
1300          (vgetq_lane_u64(e,0) >> 63 | ((vgetq_lane_u64(e,1) >> 62) & 0x2)) as i32
1301        }
1302      } else {
1303        (((self.arr[0].to_bits() as i64) < 0) as i32) << 0 |
1304        (((self.arr[1].to_bits() as i64) < 0) as i32) << 1
1305      }
1306    }
1307  }
1308  #[inline]
1309  #[must_use]
1310  pub fn any(self) -> bool {
1311    pick! {
1312      if #[cfg(target_feature="simd128")] {
1313        v128_any_true(self.simd)
1314      } else {
1315        self.move_mask() != 0
1316      }
1317    }
1318  }
1319  #[inline]
1320  #[must_use]
1321  pub fn all(self) -> bool {
1322    pick! {
1323      if #[cfg(target_feature="simd128")] {
1324        u64x2_all_true(self.simd)
1325      } else {
1326        // two lanes
1327        self.move_mask() == 0b11
1328      }
1329    }
1330  }
1331  #[inline]
1332  #[must_use]
1333  pub fn none(self) -> bool {
1334    !self.any()
1335  }
1336
1337  #[inline]
1338  fn vm_pow2n(self) -> Self {
1339    const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1340    const_f64_as_f64x2!(bias, 1023.0);
1341    let a = self + (bias + pow2_52);
1342    let c = cast::<_, i64x2>(a) << 52;
1343    cast::<_, f64x2>(c)
1344  }
1345
1346  /// Calculate the exponent of a packed `f64x2`
1347  #[inline]
1348  #[must_use]
1349  pub fn exp(self) -> Self {
1350    const_f64_as_f64x2!(P2, 1.0 / 2.0);
1351    const_f64_as_f64x2!(P3, 1.0 / 6.0);
1352    const_f64_as_f64x2!(P4, 1. / 24.);
1353    const_f64_as_f64x2!(P5, 1. / 120.);
1354    const_f64_as_f64x2!(P6, 1. / 720.);
1355    const_f64_as_f64x2!(P7, 1. / 5040.);
1356    const_f64_as_f64x2!(P8, 1. / 40320.);
1357    const_f64_as_f64x2!(P9, 1. / 362880.);
1358    const_f64_as_f64x2!(P10, 1. / 3628800.);
1359    const_f64_as_f64x2!(P11, 1. / 39916800.);
1360    const_f64_as_f64x2!(P12, 1. / 479001600.);
1361    const_f64_as_f64x2!(P13, 1. / 6227020800.);
1362    const_f64_as_f64x2!(LN2D_HI, 0.693145751953125);
1363    const_f64_as_f64x2!(LN2D_LO, 1.42860682030941723212E-6);
1364    let max_x = f64x2::from(708.39);
1365    let r = (self * Self::LOG2_E).round();
1366    let x = r.mul_neg_add(LN2D_HI, self);
1367    let x = r.mul_neg_add(LN2D_LO, x);
1368    let z =
1369      polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1370    let n2 = Self::vm_pow2n(r);
1371    let z = (z + Self::ONE) * n2;
1372    // check for overflow
1373    let in_range = self.abs().cmp_lt(max_x);
1374    let in_range = in_range & self.is_finite();
1375    in_range.blend(z, Self::ZERO)
1376  }
1377
1378  #[inline]
1379  fn exponent(self) -> f64x2 {
1380    const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1381    const_f64_as_f64x2!(bias, 1023.0);
1382    let a = cast::<_, u64x2>(self);
1383    let b = a >> 52;
1384    let c = b | cast::<_, u64x2>(pow2_52);
1385    let d = cast::<_, f64x2>(c);
1386    let e = d - (pow2_52 + bias);
1387    e
1388  }
1389
1390  #[inline]
1391  fn fraction_2(self) -> Self {
1392    let t1 = cast::<_, u64x2>(self);
1393    let t2 = cast::<_, u64x2>(
1394      (t1 & u64x2::from(0x000FFFFFFFFFFFFF)) | u64x2::from(0x3FE0000000000000),
1395    );
1396    cast::<_, f64x2>(t2)
1397  }
1398
1399  #[inline]
1400  fn is_zero_or_subnormal(self) -> Self {
1401    let t = cast::<_, i64x2>(self);
1402    let t = t & i64x2::splat(0x7FF0000000000000);
1403    i64x2::round_float(t.cmp_eq(i64x2::splat(0)))
1404  }
1405
1406  #[inline]
1407  fn infinity() -> Self {
1408    cast::<_, f64x2>(i64x2::splat(0x7FF0000000000000))
1409  }
1410
1411  #[inline]
1412  fn nan_log() -> Self {
1413    cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1414  }
1415
1416  #[inline]
1417  fn nan_pow() -> Self {
1418    cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1419  }
1420
1421  #[inline]
1422  fn sign_bit(self) -> Self {
1423    let t1 = cast::<_, i64x2>(self);
1424    let t2 = t1 >> 63;
1425    !cast::<_, f64x2>(t2).cmp_eq(f64x2::ZERO)
1426  }
1427
1428  /// horizontal add of all the elements of the vector
1429  #[inline]
1430  #[must_use]
1431  pub fn reduce_add(self) -> f64 {
1432    pick! {
1433      if #[cfg(target_feature="ssse3")] {
1434        let a = add_horizontal_m128d(self.sse, self.sse);
1435        a.to_array()[0]
1436      } else if #[cfg(any(target_feature="sse2", target_feature="simd128"))] {
1437        let a: [f64;2] = cast(self);
1438        a.iter().sum()
1439      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1440        unsafe { vgetq_lane_f64(self.neon,0) + vgetq_lane_f64(self.neon,1) }
1441      } else {
1442        self.arr.iter().sum()
1443      }
1444    }
1445  }
1446
1447  #[inline]
1448  #[must_use]
1449  pub fn ln(self) -> Self {
1450    const_f64_as_f64x2!(P0, 7.70838733755885391666E0);
1451    const_f64_as_f64x2!(P1, 1.79368678507819816313E1);
1452    const_f64_as_f64x2!(P2, 1.44989225341610930846E1);
1453    const_f64_as_f64x2!(P3, 4.70579119878881725854E0);
1454    const_f64_as_f64x2!(P4, 4.97494994976747001425E-1);
1455    const_f64_as_f64x2!(P5, 1.01875663804580931796E-4);
1456
1457    const_f64_as_f64x2!(Q0, 2.31251620126765340583E1);
1458    const_f64_as_f64x2!(Q1, 7.11544750618563894466E1);
1459    const_f64_as_f64x2!(Q2, 8.29875266912776603211E1);
1460    const_f64_as_f64x2!(Q3, 4.52279145837532221105E1);
1461    const_f64_as_f64x2!(Q4, 1.12873587189167450590E1);
1462    const_f64_as_f64x2!(LN2F_HI, 0.693359375);
1463    const_f64_as_f64x2!(LN2F_LO, -2.12194440e-4);
1464    const_f64_as_f64x2!(VM_SQRT2, 1.414213562373095048801);
1465    const_f64_as_f64x2!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1466
1467    let x1 = self;
1468    let x = Self::fraction_2(x1);
1469    let e = Self::exponent(x1);
1470    let mask = x.cmp_gt(VM_SQRT2 * f64x2::HALF);
1471    let x = (!mask).blend(x + x, x);
1472    let fe = mask.blend(e + Self::ONE, e);
1473    let x = x - Self::ONE;
1474    let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1475    let x2 = x * x;
1476    let px = x2 * x * px;
1477    let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1478    let res = px / qx;
1479    let res = fe.mul_add(LN2F_LO, res);
1480    let res = res + x2.mul_neg_add(f64x2::HALF, x);
1481    let res = fe.mul_add(LN2F_HI, res);
1482    let overflow = !self.is_finite();
1483    let underflow = x1.cmp_lt(VM_SMALLEST_NORMAL);
1484    let mask = overflow | underflow;
1485    if !mask.any() {
1486      res
1487    } else {
1488      let is_zero = self.is_zero_or_subnormal();
1489      let res = underflow.blend(Self::nan_log(), res);
1490      let res = is_zero.blend(Self::infinity(), res);
1491      let res = overflow.blend(self, res);
1492      res
1493    }
1494  }
1495
1496  #[inline]
1497  #[must_use]
1498  pub fn log2(self) -> Self {
1499    Self::ln(self) * Self::LOG2_E
1500  }
1501  #[inline]
1502  #[must_use]
1503  pub fn log10(self) -> Self {
1504    Self::ln(self) * Self::LOG10_E
1505  }
1506
1507  #[inline]
1508  #[must_use]
1509  pub fn pow_f64x2(self, y: Self) -> Self {
1510    const_f64_as_f64x2!(ln2d_hi, 0.693145751953125);
1511    const_f64_as_f64x2!(ln2d_lo, 1.42860682030941723212E-6);
1512    const_f64_as_f64x2!(P0log, 2.0039553499201281259648E1);
1513    const_f64_as_f64x2!(P1log, 5.7112963590585538103336E1);
1514    const_f64_as_f64x2!(P2log, 6.0949667980987787057556E1);
1515    const_f64_as_f64x2!(P3log, 2.9911919328553073277375E1);
1516    const_f64_as_f64x2!(P4log, 6.5787325942061044846969E0);
1517    const_f64_as_f64x2!(P5log, 4.9854102823193375972212E-1);
1518    const_f64_as_f64x2!(P6log, 4.5270000862445199635215E-5);
1519    const_f64_as_f64x2!(Q0log, 6.0118660497603843919306E1);
1520    const_f64_as_f64x2!(Q1log, 2.1642788614495947685003E2);
1521    const_f64_as_f64x2!(Q2log, 3.0909872225312059774938E2);
1522    const_f64_as_f64x2!(Q3log, 2.2176239823732856465394E2);
1523    const_f64_as_f64x2!(Q4log, 8.3047565967967209469434E1);
1524    const_f64_as_f64x2!(Q5log, 1.5062909083469192043167E1);
1525
1526    // Taylor expansion constants
1527    const_f64_as_f64x2!(p2, 1.0 / 2.0); // coefficients for Taylor expansion of exp
1528    const_f64_as_f64x2!(p3, 1.0 / 6.0);
1529    const_f64_as_f64x2!(p4, 1.0 / 24.0);
1530    const_f64_as_f64x2!(p5, 1.0 / 120.0);
1531    const_f64_as_f64x2!(p6, 1.0 / 720.0);
1532    const_f64_as_f64x2!(p7, 1.0 / 5040.0);
1533    const_f64_as_f64x2!(p8, 1.0 / 40320.0);
1534    const_f64_as_f64x2!(p9, 1.0 / 362880.0);
1535    const_f64_as_f64x2!(p10, 1.0 / 3628800.0);
1536    const_f64_as_f64x2!(p11, 1.0 / 39916800.0);
1537    const_f64_as_f64x2!(p12, 1.0 / 479001600.0);
1538    const_f64_as_f64x2!(p13, 1.0 / 6227020800.0);
1539
1540    let x1 = self.abs();
1541    let x = x1.fraction_2();
1542    let mask = x.cmp_gt(f64x2::SQRT_2 * f64x2::HALF);
1543    let x = (!mask).blend(x + x, x);
1544    let x = x - f64x2::ONE;
1545    let x2 = x * x;
1546    let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1547    let px = px * x * x2;
1548    let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1549    let lg1 = px / qx;
1550
1551    let ef = x1.exponent();
1552    let ef = mask.blend(ef + f64x2::ONE, ef);
1553    let e1 = (ef * y).round();
1554    let yr = ef.mul_sub(y, e1);
1555
1556    let lg = f64x2::HALF.mul_neg_add(x2, x) + lg1;
1557    let x2err = (f64x2::HALF * x).mul_sub(x, f64x2::HALF * x2);
1558    let lg_err = f64x2::HALF.mul_add(x2, lg - x) - lg1;
1559
1560    let e2 = (lg * y * f64x2::LOG2_E).round();
1561    let v = lg.mul_sub(y, e2 * ln2d_hi);
1562    let v = e2.mul_neg_add(ln2d_lo, v);
1563    let v = v - (lg_err + x2err).mul_sub(y, yr * f64x2::LN_2);
1564
1565    let x = v;
1566    let e3 = (x * f64x2::LOG2_E).round();
1567    let x = e3.mul_neg_add(f64x2::LN_2, x);
1568    let z =
1569      polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1570        + f64x2::ONE;
1571    let ee = e1 + e2 + e3;
1572    let ei = cast::<_, i64x2>(ee.round_int());
1573    let ej = cast::<_, i64x2>(ei + (cast::<_, i64x2>(z) >> 52));
1574
1575    let overflow = cast::<_, f64x2>(!ej.cmp_lt(i64x2::splat(0x07FF)))
1576      | ee.cmp_gt(f64x2::splat(3000.0));
1577    let underflow = cast::<_, f64x2>(!ej.cmp_gt(i64x2::splat(0x000)))
1578      | ee.cmp_lt(f64x2::splat(-3000.0));
1579
1580    // Add exponent by integer addition
1581    let z = cast::<_, f64x2>(cast::<_, i64x2>(z) + (ei << 52));
1582
1583    // Check for overflow/underflow
1584    let z = if (overflow | underflow).any() {
1585      let z = underflow.blend(f64x2::ZERO, z);
1586      overflow.blend(Self::infinity(), z)
1587    } else {
1588      z
1589    };
1590
1591    // Check for self == 0
1592    let x_zero = self.is_zero_or_subnormal();
1593    let z = x_zero.blend(
1594      y.cmp_lt(f64x2::ZERO).blend(
1595        Self::infinity(),
1596        y.cmp_eq(f64x2::ZERO).blend(f64x2::ONE, f64x2::ZERO),
1597      ),
1598      z,
1599    );
1600
1601    let x_sign = self.sign_bit();
1602    let z = if x_sign.any() {
1603      // Y into an integer
1604      let yi = y.cmp_eq(y.round());
1605      // Is y odd?
1606      let y_odd = cast::<_, i64x2>(y.round_int() << 63).round_float();
1607
1608      let z1 =
1609        yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
1610      x_sign.blend(z1, z)
1611    } else {
1612      z
1613    };
1614
1615    let x_finite = self.is_finite();
1616    let y_finite = y.is_finite();
1617    let e_finite = ee.is_finite();
1618
1619    if (x_finite & y_finite & (e_finite | x_zero)).all() {
1620      return z;
1621    }
1622
1623    (self.is_nan() | y.is_nan()).blend(self + y, z)
1624  }
1625
1626  #[inline]
1627  pub fn powf(self, y: f64) -> Self {
1628    Self::pow_f64x2(self, f64x2::splat(y))
1629  }
1630
1631  #[inline]
1632  pub fn to_array(self) -> [f64; 2] {
1633    cast(self)
1634  }
1635
1636  #[inline]
1637  pub fn as_array_ref(&self) -> &[f64; 2] {
1638    cast_ref(self)
1639  }
1640
1641  #[inline]
1642  pub fn as_array_mut(&mut self) -> &mut [f64; 2] {
1643    cast_mut(self)
1644  }
1645
1646  /// Converts the lower two `i32` lanes to two `f64` lanes (and dropping the
1647  /// higher two `i32` lanes)
1648  #[inline]
1649  pub fn from_i32x4_lower2(v: i32x4) -> Self {
1650    pick! {
1651      if #[cfg(target_feature="sse2")] {
1652        Self { sse: convert_to_m128d_from_lower2_i32_m128i(v.sse) }
1653      } else if #[cfg(target_feature="simd128")] {
1654        Self { simd: f64x2_convert_low_i32x4(v.simd)}
1655      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1656        Self { neon: unsafe { vcvtq_f64_s64(vmovl_s32(vget_low_s32(v.neon))) }}
1657      } else {
1658        Self { arr: [
1659            v.as_array_ref()[0] as f64,
1660            v.as_array_ref()[1] as f64,
1661        ]}
1662      }
1663    }
1664  }
1665}
1666
1667impl From<i32x4> for f64x2 {
1668  /// Converts the lower two `i32` lanes to two `f64` lanes (and dropping the
1669  /// higher two `i32` lanes)
1670  #[inline]
1671  fn from(v: i32x4) -> Self {
1672    Self::from_i32x4_lower2(v)
1673  }
1674}
1675
1676impl Not for f64x2 {
1677  type Output = Self;
1678  #[inline]
1679  fn not(self) -> Self {
1680    pick! {
1681      if #[cfg(target_feature="sse2")] {
1682        Self { sse: self.sse.not() }
1683      } else if #[cfg(target_feature="simd128")] {
1684        Self { simd: v128_not(self.simd) }
1685      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1686        unsafe {Self { neon: vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(self.neon))) }}
1687      } else {
1688        Self { arr: [
1689          f64::from_bits(!self.arr[0].to_bits()),
1690          f64::from_bits(!self.arr[1].to_bits()),
1691        ]}
1692      }
1693    }
1694  }
1695}
wide/f64x2_.rs

wide/
f64x2_.rs