wide/
i32x4_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct i32x4 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct i32x4 { pub(crate) simd: v128 }
14
15    impl Default for i32x4 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for i32x4 {
22      fn eq(&self, other: &Self) -> bool {
23        u32x4_all_true(i32x4_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for i32x4 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29    use core::arch::aarch64::*;
30    #[repr(C)]
31    #[derive(Copy, Clone)]
32    pub struct i32x4 { pub(crate) neon : int32x4_t }
33
34    impl Default for i32x4 {
35      #[inline]
36      #[must_use]
37      fn default() -> Self {
38        Self::splat(0)
39      }
40    }
41
42    impl PartialEq for i32x4 {
43      #[inline]
44      #[must_use]
45      fn eq(&self, other: &Self) -> bool {
46        unsafe { vminvq_u32(vceqq_s32(self.neon, other.neon))==u32::MAX }
47      }
48    }
49
50    impl Eq for i32x4 { }
51  } else {
52    #[derive(Default, Clone, Copy, PartialEq, Eq)]
53    #[repr(C, align(16))]
54    pub struct i32x4 { pub(crate) arr: [i32;4] }
55  }
56}
57
58int_uint_consts!(i32, 4, i32x4, 128);
59
60unsafe impl Zeroable for i32x4 {}
61unsafe impl Pod for i32x4 {}
62
63impl Add for i32x4 {
64  type Output = Self;
65  #[inline]
66  #[must_use]
67  fn add(self, rhs: Self) -> Self::Output {
68    pick! {
69      if #[cfg(target_feature="sse2")] {
70        Self { sse: add_i32_m128i(self.sse, rhs.sse) }
71      } else if #[cfg(target_feature="simd128")] {
72        Self { simd: i32x4_add(self.simd, rhs.simd) }
73      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
74        unsafe { Self { neon: vaddq_s32(self.neon, rhs.neon) } }
75      } else {
76        Self { arr: [
77          self.arr[0].wrapping_add(rhs.arr[0]),
78          self.arr[1].wrapping_add(rhs.arr[1]),
79          self.arr[2].wrapping_add(rhs.arr[2]),
80          self.arr[3].wrapping_add(rhs.arr[3]),
81        ]}
82      }
83    }
84  }
85}
86
87impl Sub for i32x4 {
88  type Output = Self;
89  #[inline]
90  #[must_use]
91  fn sub(self, rhs: Self) -> Self::Output {
92    pick! {
93      if #[cfg(target_feature="sse2")] {
94        Self { sse: sub_i32_m128i(self.sse, rhs.sse) }
95      } else if #[cfg(target_feature="simd128")] {
96        Self { simd: i32x4_sub(self.simd, rhs.simd) }
97      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
98        unsafe {Self { neon: vsubq_s32(self.neon, rhs.neon) }}
99      } else {
100        Self { arr: [
101          self.arr[0].wrapping_sub(rhs.arr[0]),
102          self.arr[1].wrapping_sub(rhs.arr[1]),
103          self.arr[2].wrapping_sub(rhs.arr[2]),
104          self.arr[3].wrapping_sub(rhs.arr[3]),
105        ]}
106      }
107    }
108  }
109}
110
111impl Mul for i32x4 {
112  type Output = Self;
113  #[inline]
114  #[must_use]
115  fn mul(self, rhs: Self) -> Self::Output {
116    pick! {
117      if #[cfg(target_feature="sse4.1")] {
118        Self { sse: mul_32_m128i(self.sse, rhs.sse) }
119      } else if #[cfg(target_feature="simd128")] {
120        Self { simd: i32x4_mul(self.simd, rhs.simd) }
121      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
122        unsafe {Self { neon: vmulq_s32(self.neon, rhs.neon) }}
123      } else {
124        let arr1: [i32; 4] = cast(self);
125        let arr2: [i32; 4] = cast(rhs);
126        cast([
127          arr1[0].wrapping_mul(arr2[0]),
128          arr1[1].wrapping_mul(arr2[1]),
129          arr1[2].wrapping_mul(arr2[2]),
130          arr1[3].wrapping_mul(arr2[3]),
131        ])
132      }
133    }
134  }
135}
136
137impl Add<i32> for i32x4 {
138  type Output = Self;
139  #[inline]
140  #[must_use]
141  fn add(self, rhs: i32) -> Self::Output {
142    self.add(Self::splat(rhs))
143  }
144}
145
146impl Sub<i32> for i32x4 {
147  type Output = Self;
148  #[inline]
149  #[must_use]
150  fn sub(self, rhs: i32) -> Self::Output {
151    self.sub(Self::splat(rhs))
152  }
153}
154
155impl Mul<i32> for i32x4 {
156  type Output = Self;
157  #[inline]
158  #[must_use]
159  fn mul(self, rhs: i32) -> Self::Output {
160    self.mul(Self::splat(rhs))
161  }
162}
163
164impl Add<i32x4> for i32 {
165  type Output = i32x4;
166  #[inline]
167  #[must_use]
168  fn add(self, rhs: i32x4) -> Self::Output {
169    i32x4::splat(self).add(rhs)
170  }
171}
172
173impl Sub<i32x4> for i32 {
174  type Output = i32x4;
175  #[inline]
176  #[must_use]
177  fn sub(self, rhs: i32x4) -> Self::Output {
178    i32x4::splat(self).sub(rhs)
179  }
180}
181
182impl Mul<i32x4> for i32 {
183  type Output = i32x4;
184  #[inline]
185  #[must_use]
186  fn mul(self, rhs: i32x4) -> Self::Output {
187    i32x4::splat(self).mul(rhs)
188  }
189}
190
191impl BitAnd for i32x4 {
192  type Output = Self;
193  #[inline]
194  #[must_use]
195  fn bitand(self, rhs: Self) -> Self::Output {
196    pick! {
197      if #[cfg(target_feature="sse2")] {
198        Self { sse: bitand_m128i(self.sse, rhs.sse) }
199      } else if #[cfg(target_feature="simd128")] {
200        Self { simd: v128_and(self.simd, rhs.simd) }
201      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
202        unsafe {Self { neon: vandq_s32(self.neon, rhs.neon) }}
203      } else {
204        Self { arr: [
205          self.arr[0].bitand(rhs.arr[0]),
206          self.arr[1].bitand(rhs.arr[1]),
207          self.arr[2].bitand(rhs.arr[2]),
208          self.arr[3].bitand(rhs.arr[3]),
209        ]}
210      }
211    }
212  }
213}
214
215impl BitOr for i32x4 {
216  type Output = Self;
217  #[inline]
218  #[must_use]
219  fn bitor(self, rhs: Self) -> Self::Output {
220    pick! {
221      if #[cfg(target_feature="sse2")] {
222        Self { sse: bitor_m128i(self.sse, rhs.sse) }
223      } else if #[cfg(target_feature="simd128")] {
224        Self { simd: v128_or(self.simd, rhs.simd) }
225      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
226        unsafe {Self { neon: vorrq_s32(self.neon, rhs.neon) }}
227      } else {
228        Self { arr: [
229          self.arr[0].bitor(rhs.arr[0]),
230          self.arr[1].bitor(rhs.arr[1]),
231          self.arr[2].bitor(rhs.arr[2]),
232          self.arr[3].bitor(rhs.arr[3]),
233        ]}
234      }
235    }
236  }
237}
238
239impl BitXor for i32x4 {
240  type Output = Self;
241  #[inline]
242  #[must_use]
243  fn bitxor(self, rhs: Self) -> Self::Output {
244    pick! {
245      if #[cfg(target_feature="sse2")] {
246        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
247      } else if #[cfg(target_feature="simd128")] {
248        Self { simd: v128_xor(self.simd, rhs.simd) }
249      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
250        unsafe {Self { neon: veorq_s32(self.neon, rhs.neon) }}
251      } else {
252        Self { arr: [
253          self.arr[0].bitxor(rhs.arr[0]),
254          self.arr[1].bitxor(rhs.arr[1]),
255          self.arr[2].bitxor(rhs.arr[2]),
256          self.arr[3].bitxor(rhs.arr[3]),
257        ]}
258      }
259    }
260  }
261}
262
263macro_rules! impl_shl_t_for_i32x4 {
264  ($($shift_type:ty),+ $(,)?) => {
265    $(impl Shl<$shift_type> for i32x4 {
266      type Output = Self;
267      /// Shifts all lanes by the value given.
268      #[inline]
269      #[must_use]
270      fn shl(self, rhs: $shift_type) -> Self::Output {
271        pick! {
272          if #[cfg(target_feature="sse2")] {
273            let shift = cast([rhs as u64, 0]);
274            Self { sse: shl_all_u32_m128i(self.sse, shift) }
275          } else if #[cfg(target_feature="simd128")] {
276            Self { simd: i32x4_shl(self.simd, rhs as u32) }
277          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
278            unsafe {Self { neon: vshlq_s32(self.neon, vmovq_n_s32(rhs as i32)) }}
279          } else {
280            let u = rhs as u64;
281            Self { arr: [
282              self.arr[0] << u,
283              self.arr[1] << u,
284              self.arr[2] << u,
285              self.arr[3] << u,
286            ]}
287          }
288        }
289      }
290    })+
291  };
292}
293impl_shl_t_for_i32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
294
295macro_rules! impl_shr_t_for_i32x4 {
296  ($($shift_type:ty),+ $(,)?) => {
297    $(impl Shr<$shift_type> for i32x4 {
298      type Output = Self;
299      /// Shifts all lanes by the value given.
300      #[inline]
301      #[must_use]
302      fn shr(self, rhs: $shift_type) -> Self::Output {
303        pick! {
304          if #[cfg(target_feature="sse2")] {
305            let shift = cast([rhs as u64, 0]);
306            Self { sse: shr_all_i32_m128i(self.sse, shift) }
307          } else if #[cfg(target_feature="simd128")] {
308            Self { simd: i32x4_shr(self.simd, rhs as u32) }
309          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
310            unsafe {Self { neon: vshlq_s32(self.neon, vmovq_n_s32( -(rhs as i32))) }}
311          } else {
312            let u = rhs as u64;
313            Self { arr: [
314              self.arr[0] >> u,
315              self.arr[1] >> u,
316              self.arr[2] >> u,
317              self.arr[3] >> u,
318            ]}
319          }
320        }
321      }
322    })+
323  };
324}
325impl_shr_t_for_i32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
326
327/// Shifts lanes by the corresponding lane.
328///
329/// Bitwise shift-right; yields `self >> mask(rhs)`, where mask removes any
330/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
331/// of the type. (same as `wrapping_shr`)
332impl Shr<i32x4> for i32x4 {
333  type Output = Self;
334
335  #[inline]
336  #[must_use]
337  fn shr(self, rhs: i32x4) -> Self::Output {
338    pick! {
339      if #[cfg(target_feature="avx2")] {
340        // mask the shift count to 31 to have same behavior on all platforms
341        let shift_by = bitand_m128i(rhs.sse, set_splat_i32_m128i(31));
342        Self { sse: shr_each_i32_m128i(self.sse, shift_by) }
343      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
344        unsafe {
345          // mask the shift count to 31 to have same behavior on all platforms
346          // no right shift, have to pass negative value to left shift on neon
347          let shift_by = vnegq_s32(vandq_s32(rhs.neon, vmovq_n_s32(31)));
348          Self { neon: vshlq_s32(self.neon, shift_by) }
349        }
350      } else {
351        let arr: [i32; 4] = cast(self);
352        let rhs: [i32; 4] = cast(rhs);
353        cast([
354          arr[0].wrapping_shr(rhs[0] as u32),
355          arr[1].wrapping_shr(rhs[1] as u32),
356          arr[2].wrapping_shr(rhs[2] as u32),
357          arr[3].wrapping_shr(rhs[3] as u32),
358        ])
359      }
360    }
361  }
362}
363
364/// Shifts lanes by the corresponding lane.
365///
366/// Bitwise shift-left; yields `self << mask(rhs)`, where mask removes any
367/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
368/// of the type. (same as `wrapping_shl`)
369impl Shl<i32x4> for i32x4 {
370  type Output = Self;
371
372  #[inline]
373  #[must_use]
374  fn shl(self, rhs: i32x4) -> Self::Output {
375    pick! {
376      if #[cfg(target_feature="avx2")] {
377        // mask the shift count to 31 to have same behavior on all platforms
378        let shift_by = bitand_m128i(rhs.sse, set_splat_i32_m128i(31));
379        Self { sse: shl_each_u32_m128i(self.sse, shift_by) }
380      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
381        unsafe {
382          // mask the shift count to 31 to have same behavior on all platforms
383          let shift_by = vandq_s32(rhs.neon, vmovq_n_s32(31));
384          Self { neon: vshlq_s32(self.neon, shift_by) }
385        }
386      } else {
387        let arr: [i32; 4] = cast(self);
388        let rhs: [i32; 4] = cast(rhs);
389        cast([
390          arr[0].wrapping_shl(rhs[0] as u32),
391          arr[1].wrapping_shl(rhs[1] as u32),
392          arr[2].wrapping_shl(rhs[2] as u32),
393          arr[3].wrapping_shl(rhs[3] as u32),
394        ])
395      }
396    }
397  }
398}
399
400impl CmpEq for i32x4 {
401  type Output = Self;
402  #[inline]
403  #[must_use]
404  fn cmp_eq(self, rhs: Self) -> Self::Output {
405    pick! {
406      if #[cfg(target_feature="sse2")] {
407        Self { sse: cmp_eq_mask_i32_m128i(self.sse, rhs.sse) }
408      } else if #[cfg(target_feature="simd128")] {
409        Self { simd: i32x4_eq(self.simd, rhs.simd) }
410      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
411        unsafe {Self { neon: vreinterpretq_s32_u32(vceqq_s32(self.neon, rhs.neon)) }}
412      } else {
413        Self { arr: [
414          if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
415          if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
416          if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
417          if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
418        ]}
419      }
420    }
421  }
422}
423
424impl CmpGt for i32x4 {
425  type Output = Self;
426  #[inline]
427  #[must_use]
428  fn cmp_gt(self, rhs: Self) -> Self::Output {
429    pick! {
430      if #[cfg(target_feature="sse2")] {
431        Self { sse: cmp_gt_mask_i32_m128i(self.sse, rhs.sse) }
432      } else if #[cfg(target_feature="simd128")] {
433        Self { simd: i32x4_gt(self.simd, rhs.simd) }
434      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
435        unsafe {Self { neon: vreinterpretq_s32_u32(vcgtq_s32(self.neon, rhs.neon)) }}
436      } else {
437        Self { arr: [
438          if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
439          if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
440          if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
441          if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
442        ]}
443      }
444    }
445  }
446}
447
448impl CmpLt for i32x4 {
449  type Output = Self;
450  #[inline]
451  #[must_use]
452  fn cmp_lt(self, rhs: Self) -> Self::Output {
453    pick! {
454      if #[cfg(target_feature="sse2")] {
455        Self { sse: cmp_lt_mask_i32_m128i(self.sse, rhs.sse) }
456      } else if #[cfg(target_feature="simd128")] {
457        Self { simd: i32x4_lt(self.simd, rhs.simd) }
458      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
459        unsafe {Self { neon: vreinterpretq_s32_u32(vcltq_s32(self.neon, rhs.neon)) }}
460      } else {
461        Self { arr: [
462          if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
463          if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
464          if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
465          if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
466        ]}
467      }
468    }
469  }
470}
471
472impl i32x4 {
473  #[inline]
474  #[must_use]
475  pub const fn new(array: [i32; 4]) -> Self {
476    unsafe { core::mem::transmute(array) }
477  }
478  #[inline]
479  #[must_use]
480  pub fn blend(self, t: Self, f: Self) -> Self {
481    pick! {
482      if #[cfg(target_feature="sse4.1")] {
483        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
484      } else if #[cfg(target_feature="simd128")] {
485        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
486      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
487        unsafe {Self { neon: vbslq_s32(vreinterpretq_u32_s32(self.neon), t.neon, f.neon) }}
488      } else {
489        generic_bit_blend(self, t, f)
490      }
491    }
492  }
493
494  /// Multiplies corresponding 32 bit lanes and returns the 64 bit result
495  /// on the corresponding lanes.
496  ///
497  /// Effectively does two multiplies on 128 bit platforms, but is easier
498  /// to use than wrapping mul_widen_i32_odd_m128i individually.
499  #[inline]
500  #[must_use]
501  pub fn mul_widen(self, rhs: Self) -> i64x4 {
502    pick! {
503      if #[cfg(target_feature="avx2")] {
504        let a = convert_to_i64_m256i_from_i32_m128i(self.sse);
505        let b = convert_to_i64_m256i_from_i32_m128i(rhs.sse);
506        cast(mul_i64_low_bits_m256i(a, b))
507      } else if #[cfg(target_feature="sse4.1")] {
508          let evenp = mul_widen_i32_odd_m128i(self.sse, rhs.sse);
509
510          let oddp = mul_widen_i32_odd_m128i(
511            shr_imm_u64_m128i::<32>(self.sse),
512            shr_imm_u64_m128i::<32>(rhs.sse));
513
514          i64x4 {
515            a: i64x2 { sse: unpack_low_i64_m128i(evenp, oddp)},
516            b: i64x2 { sse: unpack_high_i64_m128i(evenp, oddp)}
517          }
518      } else if #[cfg(target_feature="simd128")] {
519          i64x4 {
520            a: i64x2 { simd: i64x2_extmul_low_i32x4(self.simd, rhs.simd) },
521            b: i64x2 { simd: i64x2_extmul_high_i32x4(self.simd, rhs.simd) },
522          }
523      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
524        unsafe {
525          i64x4 { a: i64x2 { neon: vmull_s32(vget_low_s32(self.neon), vget_low_s32(rhs.neon)) },
526                  b: i64x2 { neon: vmull_s32(vget_high_s32(self.neon), vget_high_s32(rhs.neon)) } }
527        }
528      } else {
529        let a: [i32; 4] = cast(self);
530        let b: [i32; 4] = cast(rhs);
531        cast([
532          i64::from(a[0]) * i64::from(b[0]),
533          i64::from(a[1]) * i64::from(b[1]),
534          i64::from(a[2]) * i64::from(b[2]),
535          i64::from(a[3]) * i64::from(b[3]),
536        ])
537      }
538    }
539  }
540
541  #[inline]
542  #[must_use]
543  pub fn abs(self) -> Self {
544    pick! {
545      if #[cfg(target_feature="ssse3")] {
546        Self { sse: abs_i32_m128i(self.sse) }
547      } else if #[cfg(target_feature="simd128")] {
548        Self { simd: i32x4_abs(self.simd) }
549      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
550        unsafe {Self { neon: vabsq_s32(self.neon) }}
551      } else {
552        let arr: [i32; 4] = cast(self);
553        cast([
554          arr[0].wrapping_abs(),
555          arr[1].wrapping_abs(),
556          arr[2].wrapping_abs(),
557          arr[3].wrapping_abs(),
558        ])
559      }
560    }
561  }
562
563  #[inline]
564  #[must_use]
565  pub fn unsigned_abs(self) -> u32x4 {
566    pick! {
567      if #[cfg(target_feature="ssse3")] {
568        u32x4 { sse: abs_i32_m128i(self.sse) }
569      } else if #[cfg(target_feature="simd128")] {
570        u32x4 { simd: i32x4_abs(self.simd) }
571      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
572        unsafe {u32x4 { neon: vreinterpretq_u32_s32(vabsq_s32(self.neon)) }}
573      } else {
574        let arr: [i32; 4] = cast(self);
575        cast([
576          arr[0].unsigned_abs(),
577          arr[1].unsigned_abs(),
578          arr[2].unsigned_abs(),
579          arr[3].unsigned_abs(),
580        ])
581      }
582    }
583  }
584
585  /// horizontal add of all the elements of the vector
586  #[inline]
587  #[must_use]
588  pub fn reduce_add(self) -> i32 {
589    pick! {
590      if #[cfg(target_feature="sse2")] {
591        let hi64  = unpack_high_i64_m128i(self.sse, self.sse);
592        let sum64 = add_i32_m128i(hi64, self.sse);
593        let hi32  = shuffle_ai_f32_all_m128i::<0b10_11_00_01>(sum64);    // Swap the low two elements
594        let sum32 = add_i32_m128i(sum64, hi32);
595        get_i32_from_m128i_s(sum32)
596      } else {
597        let arr: [i32; 4] = cast(self);
598        arr[0].wrapping_add(arr[1]).wrapping_add(
599        arr[2].wrapping_add(arr[3]))
600      }
601    }
602  }
603
604  /// horizontal max of all the elements of the vector
605  #[inline]
606  #[must_use]
607  pub fn reduce_max(self) -> i32 {
608    let arr: [i32; 4] = cast(self);
609    arr[0].max(arr[1]).max(arr[2].max(arr[3]))
610  }
611
612  /// horizontal min of all the elements of the vector
613  #[inline]
614  #[must_use]
615  pub fn reduce_min(self) -> i32 {
616    let arr: [i32; 4] = cast(self);
617    arr[0].min(arr[1]).min(arr[2].min(arr[3]))
618  }
619
620  #[inline]
621  #[must_use]
622  pub fn max(self, rhs: Self) -> Self {
623    pick! {
624      if #[cfg(target_feature="sse4.1")] {
625        Self { sse: max_i32_m128i(self.sse, rhs.sse) }
626      } else if #[cfg(target_feature="simd128")] {
627        Self { simd: i32x4_max(self.simd, rhs.simd) }
628      } else {
629        self.cmp_lt(rhs).blend(rhs, self)
630      }
631    }
632  }
633  #[inline]
634  #[must_use]
635  pub fn min(self, rhs: Self) -> Self {
636    pick! {
637      if #[cfg(target_feature="sse4.1")] {
638        Self { sse: min_i32_m128i(self.sse, rhs.sse) }
639      } else if #[cfg(target_feature="simd128")] {
640        Self { simd: i32x4_min(self.simd, rhs.simd) }
641      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
642        unsafe {Self { neon: vminq_s32(self.neon, rhs.neon) }}
643      } else {
644        self.cmp_lt(rhs).blend(self, rhs)
645      }
646    }
647  }
648  #[inline]
649  #[must_use]
650  pub fn round_float(self) -> f32x4 {
651    pick! {
652      if #[cfg(target_feature="sse2")] {
653        cast(convert_to_m128_from_i32_m128i(self.sse))
654      } else if #[cfg(target_feature="simd128")] {
655        cast(Self { simd: f32x4_convert_i32x4(self.simd) })
656      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
657        cast(unsafe {Self { neon: vreinterpretq_s32_f32(vcvtq_f32_s32(self.neon)) }})
658      } else {
659        let arr: [i32; 4] = cast(self);
660        cast([
661          arr[0] as f32,
662          arr[1] as f32,
663          arr[2] as f32,
664          arr[3] as f32,
665        ])
666      }
667    }
668  }
669
670  #[inline]
671  #[must_use]
672  pub fn move_mask(self) -> i32 {
673    pick! {
674      if #[cfg(target_feature="sse2")] {
675        // use f32 move_mask since it is the same size as i32
676        move_mask_m128(cast(self.sse))
677      } else if #[cfg(target_feature="simd128")] {
678        u32x4_bitmask(self.simd) as i32
679      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
680        unsafe
681        {
682          // set all to 1 if top bit is set, else 0
683          let masked = vcltq_s32(self.neon, vdupq_n_s32(0));
684
685          // select the right bit out of each lane
686          let selectbit : uint32x4_t = core::mem::transmute([1u32, 2, 4, 8]);
687          let r = vandq_u32(masked, selectbit);
688
689          // horizontally add the 32-bit lanes
690          vaddvq_u32(r) as i32
691         }
692      } else {
693        ((self.arr[0] < 0) as i32) << 0 |
694        ((self.arr[1] < 0) as i32) << 1 |
695        ((self.arr[2] < 0) as i32) << 2 |
696        ((self.arr[3] < 0) as i32) << 3
697      }
698    }
699  }
700
701  #[inline]
702  #[must_use]
703  pub fn any(self) -> bool {
704    pick! {
705      if #[cfg(target_feature="sse2")] {
706        // use f32 move_mask since it is the same size as i32
707        move_mask_m128(cast(self.sse)) != 0
708      } else if #[cfg(target_feature="simd128")] {
709        u32x4_bitmask(self.simd) != 0
710      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
711        // some lanes are negative
712        unsafe {
713          vminvq_s32(self.neon) < 0
714        }
715      } else {
716        let v : [u64;2] = cast(self);
717        ((v[0] | v[1]) & 0x8000000080000000) != 0
718      }
719    }
720  }
721
722  #[inline]
723  #[must_use]
724  pub fn all(self) -> bool {
725    pick! {
726      if #[cfg(target_feature="sse2")] {
727        // use f32 move_mask since it is the same size as i32
728        move_mask_m128(cast(self.sse)) == 0b1111
729      } else if #[cfg(target_feature="simd128")] {
730        u32x4_bitmask(self.simd) == 0b1111
731      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
732        // all lanes are negative
733        unsafe {
734          vmaxvq_s32(self.neon) < 0
735        }
736      } else {
737        let v : [u64;2] = cast(self);
738        (v[0] & v[1] & 0x8000000080000000) == 0x8000000080000000
739      }
740    }
741  }
742
743  #[inline]
744  #[must_use]
745  pub fn none(self) -> bool {
746    !self.any()
747  }
748
749  /// Transpose matrix of 4x4 `i32` matrix. Currently only accelerated on SSE.
750  #[must_use]
751  #[inline]
752  pub fn transpose(data: [i32x4; 4]) -> [i32x4; 4] {
753    pick! {
754      if #[cfg(target_feature="sse")] {
755        let mut e0 = data[0];
756        let mut e1 = data[1];
757        let mut e2 = data[2];
758        let mut e3 = data[3];
759
760        transpose_four_m128(
761          cast_mut(&mut e0.sse),
762          cast_mut(&mut e1.sse),
763          cast_mut(&mut e2.sse),
764          cast_mut(&mut e3.sse),
765        );
766
767        [e0, e1, e2, e3]
768      } else {
769        #[inline(always)]
770        fn transpose_column(data: &[i32x4; 4], index: usize) -> i32x4 {
771          i32x4::new([
772            data[0].as_array_ref()[index],
773            data[1].as_array_ref()[index],
774            data[2].as_array_ref()[index],
775            data[3].as_array_ref()[index],
776          ])
777        }
778
779        [
780          transpose_column(&data, 0),
781          transpose_column(&data, 1),
782          transpose_column(&data, 2),
783          transpose_column(&data, 3),
784        ]
785      }
786    }
787  }
788
789  #[inline]
790  pub fn to_array(self) -> [i32; 4] {
791    cast(self)
792  }
793
794  #[inline]
795  pub fn as_array_ref(&self) -> &[i32; 4] {
796    cast_ref(self)
797  }
798
799  #[inline]
800  pub fn as_array_mut(&mut self) -> &mut [i32; 4] {
801    cast_mut(self)
802  }
803}
wide/i32x4_.rs

wide/
i32x4_.rs