wide/
u64x8_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx512f")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(64))]
7    pub struct u64x8 { pub(crate) avx512: m512i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(64))]
11    pub struct u64x8 { pub(crate) a : u64x4, pub(crate) b : u64x4 }
12  }
13}
14
15int_uint_consts!(u64, 8, u64x8, 512);
16
17unsafe impl Zeroable for u64x8 {}
18unsafe impl Pod for u64x8 {}
19
20impl AlignTo for u64x8 {
21  type Elem = u64;
22}
23
24impl Add for u64x8 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx512f")] {
30        Self { avx512: add_i64_m512i(self.avx512, rhs.avx512) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for u64x8 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx512f")] {
47        Self { avx512: sub_i64_m512i(self.avx512, rhs.avx512) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Mul for u64x8 {
59  type Output = Self;
60  #[inline]
61  fn mul(self, rhs: Self) -> Self::Output {
62    pick! {
63      if #[cfg(target_feature="avx512f")] {
64        let arr1: [u64; 8] = cast(self);
65        let arr2: [u64; 8] = cast(rhs);
66        cast([
67          arr1[0].wrapping_mul(arr2[0]),
68          arr1[1].wrapping_mul(arr2[1]),
69          arr1[2].wrapping_mul(arr2[2]),
70          arr1[3].wrapping_mul(arr2[3]),
71          arr1[4].wrapping_mul(arr2[4]),
72          arr1[5].wrapping_mul(arr2[5]),
73          arr1[6].wrapping_mul(arr2[6]),
74          arr1[7].wrapping_mul(arr2[7]),
75        ])
76      } else {
77        Self { a: self.a.mul(rhs.a), b: self.b.mul(rhs.b) }
78      }
79    }
80  }
81}
82
83impl Add<u64> for u64x8 {
84  type Output = Self;
85  #[inline]
86  fn add(self, rhs: u64) -> Self::Output {
87    self.add(Self::splat(rhs))
88  }
89}
90
91impl Sub<u64> for u64x8 {
92  type Output = Self;
93  #[inline]
94  fn sub(self, rhs: u64) -> Self::Output {
95    self.sub(Self::splat(rhs))
96  }
97}
98
99impl Mul<u64> for u64x8 {
100  type Output = Self;
101  #[inline]
102  fn mul(self, rhs: u64) -> Self::Output {
103    self.mul(Self::splat(rhs))
104  }
105}
106
107impl Add<u64x8> for u64 {
108  type Output = u64x8;
109  #[inline]
110  fn add(self, rhs: u64x8) -> Self::Output {
111    u64x8::splat(self).add(rhs)
112  }
113}
114
115impl Sub<u64x8> for u64 {
116  type Output = u64x8;
117  #[inline]
118  fn sub(self, rhs: u64x8) -> Self::Output {
119    u64x8::splat(self).sub(rhs)
120  }
121}
122
123impl Mul<u64x8> for u64 {
124  type Output = u64x8;
125  #[inline]
126  fn mul(self, rhs: u64x8) -> Self::Output {
127    u64x8::splat(self).mul(rhs)
128  }
129}
130
131impl BitAnd for u64x8 {
132  type Output = Self;
133  #[inline]
134  fn bitand(self, rhs: Self) -> Self::Output {
135    pick! {
136      if #[cfg(target_feature="avx512f")] {
137        Self { avx512: bitand_m512i(self.avx512, rhs.avx512) }
138      } else {
139        Self {
140          a : self.a.bitand(rhs.a),
141          b : self.b.bitand(rhs.b),
142        }
143      }
144    }
145  }
146}
147
148impl BitOr for u64x8 {
149  type Output = Self;
150  #[inline]
151  fn bitor(self, rhs: Self) -> Self::Output {
152    pick! {
153    if #[cfg(target_feature="avx512f")] {
154        Self { avx512: bitor_m512i(self.avx512, rhs.avx512) }
155      } else {
156        Self {
157          a : self.a.bitor(rhs.a),
158          b : self.b.bitor(rhs.b),
159        }
160      }
161    }
162  }
163}
164
165impl BitXor for u64x8 {
166  type Output = Self;
167  #[inline]
168  fn bitxor(self, rhs: Self) -> Self::Output {
169    pick! {
170      if #[cfg(target_feature="avx512f")] {
171        Self { avx512: bitxor_m512i(self.avx512, rhs.avx512) }
172      } else {
173        Self {
174          a : self.a.bitxor(rhs.a),
175          b : self.b.bitxor(rhs.b),
176        }
177      }
178    }
179  }
180}
181
182macro_rules! impl_shl_t_for_u64x8 {
183  ($($shift_type:ty),+ $(,)?) => {
184    $(impl Shl<$shift_type> for u64x8 {
185      type Output = Self;
186      /// Shifts all lanes by the value given.
187      #[inline]
188      fn shl(self, rhs: $shift_type) -> Self::Output {
189        pick! {
190          if #[cfg(target_feature="avx512f")] {
191            let shift = cast(rhs as u64);
192            Self { avx512: shl_all_u64_m512i(self.avx512, shift) }
193          } else {
194            Self {
195              a : self.a.shl(rhs),
196              b : self.b.shl(rhs),
197            }
198          }
199        }
200      }
201    })+
202  };
203}
204impl_shl_t_for_u64x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
205
206macro_rules! impl_shr_t_for_u64x8 {
207  ($($shift_type:ty),+ $(,)?) => {
208    $(impl Shr<$shift_type> for u64x8 {
209      type Output = Self;
210      /// Shifts all lanes by the value given.
211      #[inline]
212      fn shr(self, rhs: $shift_type) -> Self::Output {
213        pick! {
214          if #[cfg(target_feature="avx512f")] {
215            let shift = cast(rhs as u64);
216            Self { avx512: shr_all_u64_m512i(self.avx512, shift) }
217          } else {
218            Self {
219              a : self.a.shr(rhs),
220              b : self.b.shr(rhs),
221            }
222          }
223        }
224      }
225    })+
226  };
227}
228impl_shr_t_for_u64x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
229
230impl Shr for u64x8 {
231  type Output = Self;
232
233  #[inline]
234  fn shr(self, rhs: Self) -> Self::Output {
235    pick! {
236      if #[cfg(target_feature="avx512f")] {
237        Self { avx512: shr_each_u64_m512i(self.avx512, rhs.avx512) }
238      } else {
239        Self {
240          a : self.a.shr(rhs.a),
241          b : self.b.shr(rhs.b),
242        }
243      }
244    }
245  }
246}
247
248impl Shl for u64x8 {
249  type Output = Self;
250
251  #[inline]
252  fn shl(self, rhs: Self) -> Self::Output {
253    pick! {
254      if #[cfg(target_feature="avx512f")] {
255        Self { avx512: shl_each_u64_m512i(self.avx512, rhs.avx512) }
256      } else {
257        Self {
258          a : self.a.shl(rhs.a),
259          b : self.b.shl(rhs.b),
260        }
261      }
262    }
263  }
264}
265
266impl CmpEq for u64x8 {
267  type Output = Self;
268  #[inline]
269  fn simd_eq(self, rhs: Self) -> Self::Output {
270    Self::simd_eq(self, rhs)
271  }
272}
273
274impl CmpGt for u64x8 {
275  type Output = Self;
276  #[inline]
277  fn simd_gt(self, rhs: Self) -> Self::Output {
278    Self::simd_gt(self, rhs)
279  }
280}
281
282impl CmpLt for u64x8 {
283  type Output = Self;
284  #[inline]
285  fn simd_lt(self, rhs: Self) -> Self::Output {
286    // no lt, so just call gt with swapped args
287    Self::simd_gt(rhs, self)
288  }
289}
290
291impl u64x8 {
292  #[inline]
293  #[must_use]
294  pub const fn new(array: [u64; 8]) -> Self {
295    unsafe { core::mem::transmute(array) }
296  }
297  #[inline]
298  #[must_use]
299  pub fn simd_eq(self, rhs: Self) -> Self {
300    pick! {
301      if #[cfg(target_feature="avx512f")] {
302        Self { avx512: cmp_op_mask_i64_m512i::<{cmp_int_op!(Eq)}>(self.avx512, rhs.avx512) }
303      } else {
304        Self {
305          a : self.a.simd_eq(rhs.a),
306          b : self.b.simd_eq(rhs.b),
307        }
308      }
309    }
310  }
311  #[inline]
312  #[must_use]
313  pub fn simd_gt(self, rhs: Self) -> Self {
314    pick! {
315      if #[cfg(target_feature="avx512f")] {
316        // no unsigned gt than so inverting the high bit will get the correct result
317        let highbit = u64x8::splat(1 << 63);
318        Self { avx512: cmp_op_mask_i64_m512i::<{cmp_int_op!(Nle)}>((self ^ highbit).avx512, (rhs ^ highbit).avx512) }
319      } else {
320        Self {
321          a : self.a.simd_gt(rhs.a),
322          b : self.b.simd_gt(rhs.b),
323        }
324      }
325    }
326  }
327
328  #[inline]
329  #[must_use]
330  pub fn simd_lt(self, rhs: Self) -> Self {
331    pick! {
332      if #[cfg(target_feature="avx512f")] {
333        // no unsigned gt than so inverting the high bit will get the correct result
334        let highbit = u64x8::splat(1 << 63);
335        Self { avx512: cmp_op_mask_i64_m512i::<{cmp_int_op!(Lt)}>((self ^ highbit).avx512, (rhs ^ highbit).avx512) }
336      } else {
337        Self {
338          a : self.a.simd_gt(rhs.a),
339          b : self.b.simd_gt(rhs.b),
340        }
341      }
342    }
343  }
344
345  #[inline]
346  #[must_use]
347  pub fn blend(self, t: Self, f: Self) -> Self {
348    pick! {
349      if #[cfg(target_feature="avx512f")] {
350        Self { avx512: blend_varying_i8_m512i(f.avx512,t.avx512,movepi8_mask_m512i(self.avx512)) }
351      } else {
352        Self {
353          a : self.a.blend(t.a, f.a),
354          b : self.b.blend(t.b, f.b),
355        }
356      }
357    }
358  }
359
360  #[inline]
361  #[must_use]
362  #[doc(alias("movemask", "move_mask"))]
363  pub fn to_bitmask(self) -> u32 {
364    i64x8::to_bitmask(cast(self))
365  }
366
367  #[inline]
368  pub fn to_array(self) -> [u64; 8] {
369    cast(self)
370  }
371
372  #[inline]
373  pub fn as_array(&self) -> &[u64; 8] {
374    cast_ref(self)
375  }
376
377  #[inline]
378  pub fn as_mut_array(&mut self) -> &mut [u64; 8] {
379    cast_mut(self)
380  }
381
382  #[inline]
383  #[must_use]
384  pub fn min(self, rhs: Self) -> Self {
385    pick! {
386      if #[cfg(target_feature="avx512f")] {
387        Self { avx512: min_u64_m512i(self.avx512, rhs.avx512) }
388      } else {
389        Self {
390          a: self.a.min(rhs.a),
391          b: self.b.min(rhs.b),
392        }
393      }
394    }
395  }
396
397  #[inline]
398  #[must_use]
399  pub fn max(self, rhs: Self) -> Self {
400    pick! {
401      if #[cfg(target_feature="avx512f")] {
402        Self { avx512: max_u64_m512i(self.avx512, rhs.avx512) }
403      } else {
404        Self {
405          a: self.a.max(rhs.a),
406          b: self.b.max(rhs.b),
407        }
408      }
409    }
410  }
411
412  #[inline]
413  #[must_use]
414  pub fn mul_keep_high(self, rhs: Self) -> Self {
415    pick! {
416      if #[cfg(target_feature="avx512f")] {
417        let arr1: [u64; 8] = cast(self);
418        let arr2: [u64; 8] = cast(rhs);
419        cast([
420          (arr1[0] as u128 * arr2[0] as u128 >> 64) as u64,
421          (arr1[1] as u128 * arr2[1] as u128 >> 64) as u64,
422          (arr1[2] as u128 * arr2[2] as u128 >> 64) as u64,
423          (arr1[3] as u128 * arr2[3] as u128 >> 64) as u64,
424          (arr1[4] as u128 * arr2[4] as u128 >> 64) as u64,
425          (arr1[5] as u128 * arr2[5] as u128 >> 64) as u64,
426          (arr1[6] as u128 * arr2[6] as u128 >> 64) as u64,
427          (arr1[7] as u128 * arr2[7] as u128 >> 64) as u64,
428        ])
429      } else {
430        Self {
431          a: self.a.mul_keep_high(rhs.a),
432          b: self.b.mul_keep_high(rhs.b),
433        }
434      }
435    }
436  }
437}
438
439impl Not for u64x8 {
440  type Output = Self;
441  #[inline]
442  fn not(self) -> Self::Output {
443    pick! {
444      if #[cfg(target_feature="avx512f")] {
445        Self { avx512: bitxor_m512i(self.avx512, set_splat_i64_m512i(-1)) }
446      } else {
447        Self {
448          a : self.a.not(),
449          b : self.b.not(),
450        }
451      }
452    }
453  }
454}