wide/
u32x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx512f")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(64))]
7    pub struct u32x16 { pub(crate) avx512: m512i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(64))]
11    pub struct u32x16 { pub(crate) a : u32x8, pub(crate) b : u32x8 }
12  }
13}
14
15int_uint_consts!(u32, 16, u32x16, 512);
16
17unsafe impl Zeroable for u32x16 {}
18unsafe impl Pod for u32x16 {}
19
20impl AlignTo for u32x16 {
21  type Elem = u32;
22}
23
24impl Add for u32x16 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx512f")] {
30        Self { avx512: add_i32_m512i(self.avx512, rhs.avx512) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for u32x16 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx512f")] {
47        Self { avx512: sub_i32_m512i(self.avx512, rhs.avx512) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Add<u32> for u32x16 {
59  type Output = Self;
60  #[inline]
61  fn add(self, rhs: u32) -> Self::Output {
62    self.add(Self::splat(rhs))
63  }
64}
65
66impl Sub<u32> for u32x16 {
67  type Output = Self;
68  #[inline]
69  fn sub(self, rhs: u32) -> Self::Output {
70    self.sub(Self::splat(rhs))
71  }
72}
73
74impl Add<u32x16> for u32 {
75  type Output = u32x16;
76  #[inline]
77  fn add(self, rhs: u32x16) -> Self::Output {
78    u32x16::splat(self).add(rhs)
79  }
80}
81
82impl Sub<u32x16> for u32 {
83  type Output = u32x16;
84  #[inline]
85  fn sub(self, rhs: u32x16) -> Self::Output {
86    u32x16::splat(self).sub(rhs)
87  }
88}
89
90impl Mul for u32x16 {
91  type Output = Self;
92  #[inline]
93  fn mul(self, rhs: Self) -> Self::Output {
94    pick! {
95      if #[cfg(target_feature="avx512f")] {
96        Self { avx512: mul_i32_keep_low_m512i(self.avx512, rhs.avx512) }
97      } else {
98        Self {
99          a : self.a.mul(rhs.a),
100          b : self.b.mul(rhs.b),
101        }
102      }
103    }
104  }
105}
106
107impl BitAnd for u32x16 {
108  type Output = Self;
109  #[inline]
110  fn bitand(self, rhs: Self) -> Self::Output {
111    pick! {
112      if #[cfg(target_feature="avx512f")] {
113        Self { avx512: bitand_m512i(self.avx512, rhs.avx512) }
114      } else {
115        Self {
116          a : self.a.bitand(rhs.a),
117          b : self.b.bitand(rhs.b),
118        }
119      }
120    }
121  }
122}
123
124impl BitOr for u32x16 {
125  type Output = Self;
126  #[inline]
127  fn bitor(self, rhs: Self) -> Self::Output {
128    pick! {
129    if #[cfg(target_feature="avx512f")] {
130        Self { avx512: bitor_m512i(self.avx512, rhs.avx512) }
131      } else {
132        Self {
133          a : self.a.bitor(rhs.a),
134          b : self.b.bitor(rhs.b),
135        }
136      }
137    }
138  }
139}
140
141impl BitXor for u32x16 {
142  type Output = Self;
143  #[inline]
144  fn bitxor(self, rhs: Self) -> Self::Output {
145    pick! {
146      if #[cfg(target_feature="avx512f")] {
147        Self { avx512: bitxor_m512i(self.avx512, rhs.avx512) }
148      } else {
149        Self {
150          a : self.a.bitxor(rhs.a),
151          b : self.b.bitxor(rhs.b),
152        }
153      }
154    }
155  }
156}
157
158impl From<u16x16> for u32x16 {
159  /// Widens and zero-extends each u16 lane to u32
160  #[inline]
161  fn from(v: u16x16) -> Self {
162    pick! {
163      if #[cfg(target_feature = "avx512f")] {
164        Self {
165          avx512: convert_to_u32_m512i_from_u16_m256i(v.avx2)
166        }
167      } else if #[cfg(target_feature = "avx2")] {
168        let lo: m128i = extract_m128i_from_m256i::<0>(v.avx2);
169        let hi: m128i = extract_m128i_from_m256i::<1>(v.avx2);
170        Self {
171          a: u32x8 { avx2: convert_to_i32_m256i_from_u16_m128i(lo) },
172          b: u32x8 { avx2: convert_to_i32_m256i_from_u16_m128i(hi) },
173        }
174      } else if #[cfg(target_feature = "sse2")] {
175        Self {
176          a: u32x8 {
177            a: u32x4 {
178              sse: shr_imm_u32_m128i::<16>(unpack_low_i16_m128i(v.a.sse, v.a.sse))
179            },
180            b: u32x4 {
181              sse: shr_imm_u32_m128i::<16>(unpack_high_i16_m128i(v.a.sse, v.a.sse))
182            },
183          },
184          b: u32x8 {
185            a: u32x4 {
186              sse: shr_imm_u32_m128i::<16>(unpack_low_i16_m128i(v.b.sse, v.b.sse))
187            },
188            b: u32x4 {
189              sse: shr_imm_u32_m128i::<16>(unpack_high_i16_m128i(v.b.sse, v.b.sse))
190            },
191          },
192        }
193      } else {
194        // Portable fallback
195        let arr = v.as_array();
196        Self::new([
197          arr[0] as u32,  arr[1] as u32,  arr[2] as u32,  arr[3] as u32,
198          arr[4] as u32,  arr[5] as u32,  arr[6] as u32,  arr[7] as u32,
199          arr[8] as u32,  arr[9] as u32,  arr[10] as u32, arr[11] as u32,
200          arr[12] as u32, arr[13] as u32, arr[14] as u32, arr[15] as u32,
201        ])
202      }
203    }
204  }
205}
206
207macro_rules! impl_shl_t_for_u32x16 {
208  ($($shift_type:ty),+ $(,)?) => {
209    $(impl Shl<$shift_type> for u32x16 {
210      type Output = Self;
211      /// Shifts all lanes by the value given.
212      #[inline]
213      fn shl(self, rhs: $shift_type) -> Self::Output {
214        pick! {
215          if #[cfg(target_feature="avx512f")] {
216            let shift = cast(rhs as u32);
217            Self { avx512: shl_all_u32_m512i(self.avx512, shift) }
218          } else {
219            Self {
220              a : self.a.shl(rhs),
221              b : self.b.shl(rhs),
222            }
223          }
224        }
225      }
226    })+
227  };
228}
229impl_shl_t_for_u32x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
230
231macro_rules! impl_shr_t_for_u32x16 {
232  ($($shift_type:ty),+ $(,)?) => {
233    $(impl Shr<$shift_type> for u32x16 {
234      type Output = Self;
235      /// Shifts all lanes by the value given.
236      #[inline]
237      fn shr(self, rhs: $shift_type) -> Self::Output {
238        pick! {
239          if #[cfg(target_feature="avx512f")] {
240            let shift = cast(rhs as u32);
241            Self { avx512: shr_all_u32_m512i(self.avx512, shift) }
242          } else {
243            Self {
244              a : self.a.shr(rhs),
245              b : self.b.shr(rhs),
246            }
247          }
248        }
249      }
250    })+
251  };
252}
253impl_shr_t_for_u32x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
254
255/// Shifts lanes by the corresponding lane.
256///
257/// Bitwise shift-right; yields `self >> mask(rhs)`, where mask removes any
258/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
259/// of the type. (same as `wrapping_shr`)
260impl Shr<u32x16> for u32x16 {
261  type Output = Self;
262
263  #[inline]
264  fn shr(self, rhs: u32x16) -> Self::Output {
265    pick! {
266      if #[cfg(target_feature="avx512f")] {
267        let shift_by = bitand_m512i(rhs.avx512, set_splat_i32_m512i(31));
268        Self { avx512: shr_each_u32_m512i(self.avx512, shift_by ) }
269      } else {
270        Self {
271          a : self.a.shr(rhs.a),
272          b : self.b.shr(rhs.b),
273        }
274      }
275    }
276  }
277}
278
279/// Shifts lanes by the corresponding lane.
280///
281/// Bitwise shift-left; yields `self << mask(rhs)`, where mask removes any
282/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
283/// of the type. (same as `wrapping_shl`)
284impl Shl<u32x16> for u32x16 {
285  type Output = Self;
286
287  #[inline]
288  fn shl(self, rhs: u32x16) -> Self::Output {
289    pick! {
290      if #[cfg(target_feature="avx512f")] {
291        let shift_by = bitand_m512i(rhs.avx512, set_splat_i32_m512i(31));
292        Self { avx512: shl_each_u32_m512i(self.avx512, shift_by) }
293      } else {
294        Self {
295          a : self.a.shl(rhs.a),
296          b : self.b.shl(rhs.b),
297        }
298      }
299    }
300  }
301}
302
303impl CmpEq for u32x16 {
304  type Output = Self;
305  #[inline]
306  fn simd_eq(self, rhs: Self) -> Self::Output {
307    Self::simd_eq(self, rhs)
308  }
309}
310
311impl CmpGt for u32x16 {
312  type Output = Self;
313  #[inline]
314  fn simd_gt(self, rhs: Self) -> Self::Output {
315    Self::simd_gt(self, rhs)
316  }
317}
318
319impl CmpLt for u32x16 {
320  type Output = Self;
321  #[inline]
322  fn simd_lt(self, rhs: Self) -> Self::Output {
323    // no gt, so just reverse to get same answer
324    Self::simd_gt(rhs, self)
325  }
326}
327
328impl u32x16 {
329  #[inline]
330  #[must_use]
331  pub const fn new(array: [u32; 16]) -> Self {
332    unsafe { core::mem::transmute(array) }
333  }
334
335  #[inline]
336  #[must_use]
337  pub fn simd_eq(self, rhs: Self) -> Self {
338    pick! {
339      if #[cfg(target_feature="avx512f")] {
340        Self { avx512: cmp_op_mask_u32_m512i::<{cmp_int_op!(Eq)}>(self.avx512, rhs.avx512) }
341      } else {
342        Self {
343          a : self.a.simd_eq(rhs.a),
344          b : self.b.simd_eq(rhs.b),
345        }
346      }
347    }
348  }
349
350  #[inline]
351  #[must_use]
352  pub fn simd_gt(self, rhs: Self) -> Self {
353    pick! {
354      if #[cfg(target_feature="avx512f")] {
355        Self { avx512: cmp_op_mask_u32_m512i::<{cmp_int_op!(Nle)}>(self.avx512, rhs.avx512) }
356      } else {
357        Self {
358          a : self.a.simd_gt(rhs.a),
359          b : self.b.simd_gt(rhs.b),
360        }
361      }
362    }
363  }
364
365  #[inline]
366  #[must_use]
367  pub fn simd_lt(self, rhs: Self) -> Self {
368    pick! {
369      if #[cfg(target_feature="avx512f")] {
370        Self { avx512: cmp_op_mask_u32_m512i::<{cmp_int_op!(Lt)}>(self.avx512, rhs.avx512) }
371      } else {
372        Self {
373          a : rhs.a.simd_gt(self.a),
374          b : rhs.b.simd_gt(self.b),
375        }
376      }
377    }
378  }
379
380  #[inline]
381  #[must_use]
382  pub fn blend(self, t: Self, f: Self) -> Self {
383    pick! {
384      if #[cfg(target_feature="avx512f")] {
385        Self { avx512: blend_varying_i8_m512i(f.avx512,t.avx512,movepi8_mask_m512i(self.avx512)) }
386      } else {
387        Self {
388          a : self.a.blend(t.a, f.a),
389          b : self.b.blend(t.b, f.b),
390        }
391      }
392    }
393  }
394
395  #[inline]
396  #[must_use]
397  pub fn min(self, rhs: Self) -> Self {
398    pick! {
399      if #[cfg(target_feature="avx512f")] {
400        Self { avx512: min_u32_m512i(self.avx512, rhs.avx512) }
401      } else {
402        Self {
403          a: self.a.min(rhs.a),
404          b: self.b.min(rhs.b),
405        }
406      }
407    }
408  }
409
410  #[inline]
411  #[must_use]
412  pub fn max(self, rhs: Self) -> Self {
413    pick! {
414      if #[cfg(target_feature="avx512f")] {
415        Self { avx512: max_u32_m512i(self.avx512, rhs.avx512) }
416      } else {
417        Self {
418          a: self.a.max(rhs.a),
419          b: self.b.max(rhs.b),
420        }
421      }
422    }
423  }
424
425  #[inline]
426  #[must_use]
427  pub fn mul_keep_high(self, rhs: Self) -> Self {
428    pick! {
429      if #[cfg(target_feature="avx512f")] {
430        let alo = extract_m256i32_from_m512i::<0>(self.avx512);
431        let ahi = extract_m256i32_from_m512i::<1>(self.avx512);
432        let blo = extract_m256i32_from_m512i::<0>(rhs.avx512);
433        let bhi = extract_m256i32_from_m512i::<1>(rhs.avx512);
434
435        let lo_res: m256i = {
436          let a8 = u32x8 { avx2: alo };
437          let b8 = u32x8 { avx2: blo };
438          a8.mul_keep_high(b8).avx2
439        };
440        let hi_res: m256i = {
441          let a8 = u32x8 { avx2: ahi };
442          let b8 = u32x8 { avx2: bhi };
443          a8.mul_keep_high(b8).avx2
444        };
445
446        let zero = zeroed_m512i();
447        let with_lo = insert_m256i32_to_m512i::<0>(zero, lo_res);
448        let combined = insert_m256i32_to_m512i::<1>(with_lo, hi_res);
449
450        Self { avx512: combined }
451      } else {
452        Self {
453          a: self.a.mul_keep_high(rhs.a),
454          b: self.b.mul_keep_high(rhs.b),
455        }
456      }
457    }
458  }
459  
460  #[inline]
461  #[must_use]
462  #[doc(alias("movemask", "move_mask"))]
463  pub fn to_bitmask(self) -> u32 {
464    i32x16::to_bitmask(cast(self))
465  }
466
467  #[inline]
468  #[must_use]
469  pub fn any(self) -> bool {
470    pick! {
471      if #[cfg(target_feature="avx512f")] {
472        ((movepi8_mask_m512i(self.avx512) as u32) &
473          0b10001000100010001000100010001000) != 0
474      } else {
475        (self.a | self.b).any()
476      }
477    }
478  }
479
480  #[inline]
481  #[must_use]
482  pub fn all(self) -> bool {
483    pick! {
484      if #[cfg(target_feature="avx512f")] {
485        ((movepi8_mask_m512i(self.avx512) as u32) &
486          0b10001000100010001000100010001000) ==
487          0b10001000100010001000100010001000
488      } else {
489        (self.a & self.b).all()
490      }
491    }
492  }
493
494  #[inline]
495  #[must_use]
496  pub fn none(self) -> bool {
497    !self.any()
498  }
499
500  #[inline]
501  pub fn to_array(self) -> [u32; 16] {
502    cast(self)
503  }
504
505  #[inline]
506  pub fn as_array(&self) -> &[u32; 16] {
507    cast_ref(self)
508  }
509
510  #[inline]
511  pub fn as_mut_array(&mut self) -> &mut [u32; 16] {
512    cast_mut(self)
513  }
514}
515
516impl Not for u32x16 {
517  type Output = Self;
518  #[inline]
519  fn not(self) -> Self::Output {
520    pick! {
521      if #[cfg(target_feature="avx512f")] {
522        Self { avx512: bitxor_m512i(self.avx512, set_splat_i32_m512i(-1)) }
523      } else {
524        Self {
525          a : self.a.not(),
526          b : self.b.not(),
527        }
528      }
529    }
530  }
531}