1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx512f")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(64))]
7 pub struct u32x16 { pub(crate) avx512: m512i }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq, Eq)]
10 #[repr(C, align(64))]
11 pub struct u32x16 { pub(crate) a : u32x8, pub(crate) b : u32x8 }
12 }
13}
14
15int_uint_consts!(u32, 16, u32x16, 512);
16
17unsafe impl Zeroable for u32x16 {}
18unsafe impl Pod for u32x16 {}
19
20impl AlignTo for u32x16 {
21 type Elem = u32;
22}
23
24impl Add for u32x16 {
25 type Output = Self;
26 #[inline]
27 fn add(self, rhs: Self) -> Self::Output {
28 pick! {
29 if #[cfg(target_feature="avx512f")] {
30 Self { avx512: add_i32_m512i(self.avx512, rhs.avx512) }
31 } else {
32 Self {
33 a : self.a.add(rhs.a),
34 b : self.b.add(rhs.b),
35 }
36 }
37 }
38 }
39}
40
41impl Sub for u32x16 {
42 type Output = Self;
43 #[inline]
44 fn sub(self, rhs: Self) -> Self::Output {
45 pick! {
46 if #[cfg(target_feature="avx512f")] {
47 Self { avx512: sub_i32_m512i(self.avx512, rhs.avx512) }
48 } else {
49 Self {
50 a : self.a.sub(rhs.a),
51 b : self.b.sub(rhs.b),
52 }
53 }
54 }
55 }
56}
57
58impl Add<u32> for u32x16 {
59 type Output = Self;
60 #[inline]
61 fn add(self, rhs: u32) -> Self::Output {
62 self.add(Self::splat(rhs))
63 }
64}
65
66impl Sub<u32> for u32x16 {
67 type Output = Self;
68 #[inline]
69 fn sub(self, rhs: u32) -> Self::Output {
70 self.sub(Self::splat(rhs))
71 }
72}
73
74impl Add<u32x16> for u32 {
75 type Output = u32x16;
76 #[inline]
77 fn add(self, rhs: u32x16) -> Self::Output {
78 u32x16::splat(self).add(rhs)
79 }
80}
81
82impl Sub<u32x16> for u32 {
83 type Output = u32x16;
84 #[inline]
85 fn sub(self, rhs: u32x16) -> Self::Output {
86 u32x16::splat(self).sub(rhs)
87 }
88}
89
90impl Mul for u32x16 {
91 type Output = Self;
92 #[inline]
93 fn mul(self, rhs: Self) -> Self::Output {
94 pick! {
95 if #[cfg(target_feature="avx512f")] {
96 Self { avx512: mul_i32_keep_low_m512i(self.avx512, rhs.avx512) }
97 } else {
98 Self {
99 a : self.a.mul(rhs.a),
100 b : self.b.mul(rhs.b),
101 }
102 }
103 }
104 }
105}
106
107impl BitAnd for u32x16 {
108 type Output = Self;
109 #[inline]
110 fn bitand(self, rhs: Self) -> Self::Output {
111 pick! {
112 if #[cfg(target_feature="avx512f")] {
113 Self { avx512: bitand_m512i(self.avx512, rhs.avx512) }
114 } else {
115 Self {
116 a : self.a.bitand(rhs.a),
117 b : self.b.bitand(rhs.b),
118 }
119 }
120 }
121 }
122}
123
124impl BitOr for u32x16 {
125 type Output = Self;
126 #[inline]
127 fn bitor(self, rhs: Self) -> Self::Output {
128 pick! {
129 if #[cfg(target_feature="avx512f")] {
130 Self { avx512: bitor_m512i(self.avx512, rhs.avx512) }
131 } else {
132 Self {
133 a : self.a.bitor(rhs.a),
134 b : self.b.bitor(rhs.b),
135 }
136 }
137 }
138 }
139}
140
141impl BitXor for u32x16 {
142 type Output = Self;
143 #[inline]
144 fn bitxor(self, rhs: Self) -> Self::Output {
145 pick! {
146 if #[cfg(target_feature="avx512f")] {
147 Self { avx512: bitxor_m512i(self.avx512, rhs.avx512) }
148 } else {
149 Self {
150 a : self.a.bitxor(rhs.a),
151 b : self.b.bitxor(rhs.b),
152 }
153 }
154 }
155 }
156}
157
158impl From<u16x16> for u32x16 {
159 #[inline]
161 fn from(v: u16x16) -> Self {
162 pick! {
163 if #[cfg(target_feature = "avx512f")] {
164 Self {
165 avx512: convert_to_u32_m512i_from_u16_m256i(v.avx2)
166 }
167 } else if #[cfg(target_feature = "avx2")] {
168 let lo: m128i = extract_m128i_from_m256i::<0>(v.avx2);
169 let hi: m128i = extract_m128i_from_m256i::<1>(v.avx2);
170 Self {
171 a: u32x8 { avx2: convert_to_i32_m256i_from_u16_m128i(lo) },
172 b: u32x8 { avx2: convert_to_i32_m256i_from_u16_m128i(hi) },
173 }
174 } else if #[cfg(target_feature = "sse2")] {
175 Self {
176 a: u32x8 {
177 a: u32x4 {
178 sse: shr_imm_u32_m128i::<16>(unpack_low_i16_m128i(v.a.sse, v.a.sse))
179 },
180 b: u32x4 {
181 sse: shr_imm_u32_m128i::<16>(unpack_high_i16_m128i(v.a.sse, v.a.sse))
182 },
183 },
184 b: u32x8 {
185 a: u32x4 {
186 sse: shr_imm_u32_m128i::<16>(unpack_low_i16_m128i(v.b.sse, v.b.sse))
187 },
188 b: u32x4 {
189 sse: shr_imm_u32_m128i::<16>(unpack_high_i16_m128i(v.b.sse, v.b.sse))
190 },
191 },
192 }
193 } else {
194 let arr = v.as_array();
196 Self::new([
197 arr[0] as u32, arr[1] as u32, arr[2] as u32, arr[3] as u32,
198 arr[4] as u32, arr[5] as u32, arr[6] as u32, arr[7] as u32,
199 arr[8] as u32, arr[9] as u32, arr[10] as u32, arr[11] as u32,
200 arr[12] as u32, arr[13] as u32, arr[14] as u32, arr[15] as u32,
201 ])
202 }
203 }
204 }
205}
206
207macro_rules! impl_shl_t_for_u32x16 {
208 ($($shift_type:ty),+ $(,)?) => {
209 $(impl Shl<$shift_type> for u32x16 {
210 type Output = Self;
211 #[inline]
213 fn shl(self, rhs: $shift_type) -> Self::Output {
214 pick! {
215 if #[cfg(target_feature="avx512f")] {
216 let shift = cast(rhs as u32);
217 Self { avx512: shl_all_u32_m512i(self.avx512, shift) }
218 } else {
219 Self {
220 a : self.a.shl(rhs),
221 b : self.b.shl(rhs),
222 }
223 }
224 }
225 }
226 })+
227 };
228}
229impl_shl_t_for_u32x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
230
231macro_rules! impl_shr_t_for_u32x16 {
232 ($($shift_type:ty),+ $(,)?) => {
233 $(impl Shr<$shift_type> for u32x16 {
234 type Output = Self;
235 #[inline]
237 fn shr(self, rhs: $shift_type) -> Self::Output {
238 pick! {
239 if #[cfg(target_feature="avx512f")] {
240 let shift = cast(rhs as u32);
241 Self { avx512: shr_all_u32_m512i(self.avx512, shift) }
242 } else {
243 Self {
244 a : self.a.shr(rhs),
245 b : self.b.shr(rhs),
246 }
247 }
248 }
249 }
250 })+
251 };
252}
253impl_shr_t_for_u32x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
254
255impl Shr<u32x16> for u32x16 {
261 type Output = Self;
262
263 #[inline]
264 fn shr(self, rhs: u32x16) -> Self::Output {
265 pick! {
266 if #[cfg(target_feature="avx512f")] {
267 let shift_by = bitand_m512i(rhs.avx512, set_splat_i32_m512i(31));
268 Self { avx512: shr_each_u32_m512i(self.avx512, shift_by ) }
269 } else {
270 Self {
271 a : self.a.shr(rhs.a),
272 b : self.b.shr(rhs.b),
273 }
274 }
275 }
276 }
277}
278
279impl Shl<u32x16> for u32x16 {
285 type Output = Self;
286
287 #[inline]
288 fn shl(self, rhs: u32x16) -> Self::Output {
289 pick! {
290 if #[cfg(target_feature="avx512f")] {
291 let shift_by = bitand_m512i(rhs.avx512, set_splat_i32_m512i(31));
292 Self { avx512: shl_each_u32_m512i(self.avx512, shift_by) }
293 } else {
294 Self {
295 a : self.a.shl(rhs.a),
296 b : self.b.shl(rhs.b),
297 }
298 }
299 }
300 }
301}
302
303impl CmpEq for u32x16 {
304 type Output = Self;
305 #[inline]
306 fn simd_eq(self, rhs: Self) -> Self::Output {
307 Self::simd_eq(self, rhs)
308 }
309}
310
311impl CmpGt for u32x16 {
312 type Output = Self;
313 #[inline]
314 fn simd_gt(self, rhs: Self) -> Self::Output {
315 Self::simd_gt(self, rhs)
316 }
317}
318
319impl CmpLt for u32x16 {
320 type Output = Self;
321 #[inline]
322 fn simd_lt(self, rhs: Self) -> Self::Output {
323 Self::simd_gt(rhs, self)
325 }
326}
327
328impl u32x16 {
329 #[inline]
330 #[must_use]
331 pub const fn new(array: [u32; 16]) -> Self {
332 unsafe { core::mem::transmute(array) }
333 }
334
335 #[inline]
336 #[must_use]
337 pub fn simd_eq(self, rhs: Self) -> Self {
338 pick! {
339 if #[cfg(target_feature="avx512f")] {
340 Self { avx512: cmp_op_mask_u32_m512i::<{cmp_int_op!(Eq)}>(self.avx512, rhs.avx512) }
341 } else {
342 Self {
343 a : self.a.simd_eq(rhs.a),
344 b : self.b.simd_eq(rhs.b),
345 }
346 }
347 }
348 }
349
350 #[inline]
351 #[must_use]
352 pub fn simd_gt(self, rhs: Self) -> Self {
353 pick! {
354 if #[cfg(target_feature="avx512f")] {
355 Self { avx512: cmp_op_mask_u32_m512i::<{cmp_int_op!(Nle)}>(self.avx512, rhs.avx512) }
356 } else {
357 Self {
358 a : self.a.simd_gt(rhs.a),
359 b : self.b.simd_gt(rhs.b),
360 }
361 }
362 }
363 }
364
365 #[inline]
366 #[must_use]
367 pub fn simd_lt(self, rhs: Self) -> Self {
368 pick! {
369 if #[cfg(target_feature="avx512f")] {
370 Self { avx512: cmp_op_mask_u32_m512i::<{cmp_int_op!(Lt)}>(self.avx512, rhs.avx512) }
371 } else {
372 Self {
373 a : rhs.a.simd_gt(self.a),
374 b : rhs.b.simd_gt(self.b),
375 }
376 }
377 }
378 }
379
380 #[inline]
381 #[must_use]
382 pub fn blend(self, t: Self, f: Self) -> Self {
383 pick! {
384 if #[cfg(target_feature="avx512f")] {
385 Self { avx512: blend_varying_i8_m512i(f.avx512,t.avx512,movepi8_mask_m512i(self.avx512)) }
386 } else {
387 Self {
388 a : self.a.blend(t.a, f.a),
389 b : self.b.blend(t.b, f.b),
390 }
391 }
392 }
393 }
394
395 #[inline]
396 #[must_use]
397 pub fn min(self, rhs: Self) -> Self {
398 pick! {
399 if #[cfg(target_feature="avx512f")] {
400 Self { avx512: min_u32_m512i(self.avx512, rhs.avx512) }
401 } else {
402 Self {
403 a: self.a.min(rhs.a),
404 b: self.b.min(rhs.b),
405 }
406 }
407 }
408 }
409
410 #[inline]
411 #[must_use]
412 pub fn max(self, rhs: Self) -> Self {
413 pick! {
414 if #[cfg(target_feature="avx512f")] {
415 Self { avx512: max_u32_m512i(self.avx512, rhs.avx512) }
416 } else {
417 Self {
418 a: self.a.max(rhs.a),
419 b: self.b.max(rhs.b),
420 }
421 }
422 }
423 }
424
425 #[inline]
426 #[must_use]
427 pub fn mul_keep_high(self, rhs: Self) -> Self {
428 pick! {
429 if #[cfg(target_feature="avx512f")] {
430 let alo = extract_m256i32_from_m512i::<0>(self.avx512);
431 let ahi = extract_m256i32_from_m512i::<1>(self.avx512);
432 let blo = extract_m256i32_from_m512i::<0>(rhs.avx512);
433 let bhi = extract_m256i32_from_m512i::<1>(rhs.avx512);
434
435 let lo_res: m256i = {
436 let a8 = u32x8 { avx2: alo };
437 let b8 = u32x8 { avx2: blo };
438 a8.mul_keep_high(b8).avx2
439 };
440 let hi_res: m256i = {
441 let a8 = u32x8 { avx2: ahi };
442 let b8 = u32x8 { avx2: bhi };
443 a8.mul_keep_high(b8).avx2
444 };
445
446 let zero = zeroed_m512i();
447 let with_lo = insert_m256i32_to_m512i::<0>(zero, lo_res);
448 let combined = insert_m256i32_to_m512i::<1>(with_lo, hi_res);
449
450 Self { avx512: combined }
451 } else {
452 Self {
453 a: self.a.mul_keep_high(rhs.a),
454 b: self.b.mul_keep_high(rhs.b),
455 }
456 }
457 }
458 }
459
460 #[inline]
461 #[must_use]
462 #[doc(alias("movemask", "move_mask"))]
463 pub fn to_bitmask(self) -> u32 {
464 i32x16::to_bitmask(cast(self))
465 }
466
467 #[inline]
468 #[must_use]
469 pub fn any(self) -> bool {
470 pick! {
471 if #[cfg(target_feature="avx512f")] {
472 ((movepi8_mask_m512i(self.avx512) as u32) &
473 0b10001000100010001000100010001000) != 0
474 } else {
475 (self.a | self.b).any()
476 }
477 }
478 }
479
480 #[inline]
481 #[must_use]
482 pub fn all(self) -> bool {
483 pick! {
484 if #[cfg(target_feature="avx512f")] {
485 ((movepi8_mask_m512i(self.avx512) as u32) &
486 0b10001000100010001000100010001000) ==
487 0b10001000100010001000100010001000
488 } else {
489 (self.a & self.b).all()
490 }
491 }
492 }
493
494 #[inline]
495 #[must_use]
496 pub fn none(self) -> bool {
497 !self.any()
498 }
499
500 #[inline]
501 pub fn to_array(self) -> [u32; 16] {
502 cast(self)
503 }
504
505 #[inline]
506 pub fn as_array(&self) -> &[u32; 16] {
507 cast_ref(self)
508 }
509
510 #[inline]
511 pub fn as_mut_array(&mut self) -> &mut [u32; 16] {
512 cast_mut(self)
513 }
514}
515
516impl Not for u32x16 {
517 type Output = Self;
518 #[inline]
519 fn not(self) -> Self::Output {
520 pick! {
521 if #[cfg(target_feature="avx512f")] {
522 Self { avx512: bitxor_m512i(self.avx512, set_splat_i32_m512i(-1)) }
523 } else {
524 Self {
525 a : self.a.not(),
526 b : self.b.not(),
527 }
528 }
529 }
530 }
531}