wide/
u8x32_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct u8x32 { pub(crate) avx: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct u8x32 { pub(crate) a : u8x16, pub(crate) b : u8x16 }
12  }
13}
14
15int_uint_consts!(u8, 32, u8x32, 256);
16
17unsafe impl Zeroable for u8x32 {}
18unsafe impl Pod for u8x32 {}
19
20impl AlignTo for u8x32 {
21  type Elem = u8;
22}
23
24impl Add for u8x32 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx2")] {
30        Self { avx: add_i8_m256i(self.avx,rhs.avx) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for u8x32 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx2")] {
47        Self { avx: sub_i8_m256i(self.avx,rhs.avx) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Add<u8> for u8x32 {
59  type Output = Self;
60  #[inline]
61  fn add(self, rhs: u8) -> Self::Output {
62    self.add(Self::splat(rhs))
63  }
64}
65
66impl Sub<u8> for u8x32 {
67  type Output = Self;
68  #[inline]
69  fn sub(self, rhs: u8) -> Self::Output {
70    self.sub(Self::splat(rhs))
71  }
72}
73
74impl Add<u8x32> for u8 {
75  type Output = u8x32;
76  #[inline]
77  fn add(self, rhs: u8x32) -> Self::Output {
78    u8x32::splat(self).add(rhs)
79  }
80}
81
82impl Sub<u8x32> for u8 {
83  type Output = u8x32;
84  #[inline]
85  fn sub(self, rhs: u8x32) -> Self::Output {
86    u8x32::splat(self).sub(rhs)
87  }
88}
89
90impl BitAnd for u8x32 {
91  type Output = Self;
92  #[inline]
93  fn bitand(self, rhs: Self) -> Self::Output {
94    pick! {
95      if #[cfg(target_feature="avx2")] {
96          Self { avx : bitand_m256i(self.avx,rhs.avx) }
97      } else {
98          Self {
99            a : self.a.bitand(rhs.a),
100            b : self.b.bitand(rhs.b),
101          }
102      }
103    }
104  }
105}
106
107impl BitOr for u8x32 {
108  type Output = Self;
109  #[inline]
110  fn bitor(self, rhs: Self) -> Self::Output {
111    pick! {
112      if #[cfg(target_feature="avx2")] {
113        Self { avx : bitor_m256i(self.avx,rhs.avx) }
114      } else {
115        Self {
116          a : self.a.bitor(rhs.a),
117          b : self.b.bitor(rhs.b),
118        }
119      }
120    }
121  }
122}
123
124impl BitXor for u8x32 {
125  type Output = Self;
126  #[inline]
127  fn bitxor(self, rhs: Self) -> Self::Output {
128    pick! {
129      if #[cfg(target_feature="avx2")] {
130        Self { avx : bitxor_m256i(self.avx,rhs.avx) }
131      } else {
132        Self {
133          a : self.a.bitxor(rhs.a),
134          b : self.b.bitxor(rhs.b),
135        }
136      }
137    }
138  }
139}
140
141impl CmpEq for u8x32 {
142  type Output = Self;
143  #[inline]
144  fn simd_eq(self, rhs: Self) -> Self::Output {
145    pick! {
146      if #[cfg(target_feature="avx2")] {
147        Self { avx : cmp_eq_mask_i8_m256i(self.avx,rhs.avx) }
148      } else {
149        Self {
150          a : self.a.simd_eq(rhs.a),
151          b : self.b.simd_eq(rhs.b),
152        }
153      }
154    }
155  }
156}
157
158impl CmpLt for u8x32 {
159  type Output = Self;
160  #[inline]
161  fn simd_lt(self, rhs: Self) -> Self::Output {
162    pick! {
163      if #[cfg(target_feature="avx2")] {
164        // Convert from u8 to i8.
165        let offset = Self::splat(0x80);
166        let self_i8 = self.bitxor(offset).avx;
167        let rhs_i8 = rhs.bitxor(offset).avx;
168        Self { avx: cmp_gt_mask_i8_m256i(rhs_i8, self_i8)}
169      } else {
170        Self { a: self.a.simd_lt(rhs.a), b: self.b.simd_lt(rhs.b) }
171      }
172    }
173  }
174}
175
176impl CmpLe for u8x32 {
177  type Output = Self;
178  #[inline]
179  fn simd_le(self, rhs: Self) -> Self::Output {
180    pick! {
181      if #[cfg(target_feature="avx2")] {
182        // Convert from u8 to i8.
183        let offset = Self::splat(0x80);
184        let self_i8 = self.bitxor(offset).avx;
185        let rhs_i8 = rhs.bitxor(offset).avx;
186        let gt_mask = Self { avx : cmp_gt_mask_i8_m256i(self_i8,rhs_i8) };
187        Self { avx: gt_mask.bitxor(Self::splat(0xFF)).avx }
188      } else {
189        Self { a: self.a.simd_le(rhs.a), b: self.b.simd_le(rhs.b) }
190      }
191    }
192  }
193}
194
195impl CmpGe for u8x32 {
196  type Output = Self;
197  #[inline]
198  fn simd_ge(self, rhs: Self) -> Self::Output {
199    pick! {
200      if #[cfg(target_feature="avx2")] {
201        // Convert from u8 to i8.
202        let offset = Self::splat(0x80);
203        let self_i8 = self.bitxor(offset).avx;
204        let rhs_i8 = rhs.bitxor(offset).avx;
205        let lt_mask = Self { avx: cmp_gt_mask_i8_m256i(rhs_i8, self_i8)};
206        Self { avx: lt_mask.bitxor(Self::splat(0xFF)).avx }
207      } else {
208        Self { a: self.a.simd_ge(rhs.a), b: self.b.simd_ge(rhs.b) }
209      }
210    }
211  }
212}
213
214impl CmpGt for u8x32 {
215  type Output = Self;
216  #[inline]
217  fn simd_gt(self, rhs: Self) -> Self::Output {
218    pick! {
219      if #[cfg(target_feature="avx2")] {
220        // Convert from u8 to i8.
221        let offset = Self::splat(0x80);
222        let self_i8 = self.bitxor(offset).avx;
223        let rhs_i8 = rhs.bitxor(offset).avx;
224        Self { avx : cmp_gt_mask_i8_m256i(self_i8,rhs_i8) }
225      } else {
226        Self { a: self.a.simd_gt(rhs.a), b: self.b.simd_gt(rhs.b) }
227      }
228    }
229  }
230}
231
232impl Not for u8x32 {
233  type Output = Self;
234  #[inline]
235  fn not(self) -> Self {
236    pick! {
237      if #[cfg(target_feature="avx2")] {
238        Self { avx: self.avx.not()  }
239      } else {
240        Self {
241          a : self.a.not(),
242          b : self.b.not(),
243        }
244      }
245    }
246  }
247}
248
249impl u8x32 {
250  #[inline]
251  #[must_use]
252  pub const fn new(array: [u8; 32]) -> Self {
253    unsafe { core::mem::transmute(array) }
254  }
255  #[inline]
256  #[must_use]
257  pub fn blend(self, t: Self, f: Self) -> Self {
258    pick! {
259      if #[cfg(target_feature="avx2")] {
260        Self { avx: blend_varying_i8_m256i(f.avx, t.avx, self.avx) }
261      } else {
262        Self {
263          a : self.a.blend(t.a, f.a),
264          b : self.b.blend(t.b, f.b),
265        }
266      }
267    }
268  }
269  #[inline]
270  #[must_use]
271  pub fn max(self, rhs: Self) -> Self {
272    pick! {
273      if #[cfg(target_feature="avx2")] {
274        Self { avx: max_u8_m256i(self.avx,rhs.avx) }
275      } else {
276        Self {
277          a : self.a.max(rhs.a),
278          b : self.b.max(rhs.b),
279        }
280      }
281    }
282  }
283  #[inline]
284  #[must_use]
285  pub fn min(self, rhs: Self) -> Self {
286    pick! {
287      if #[cfg(target_feature="avx2")] {
288        Self { avx: min_u8_m256i(self.avx,rhs.avx) }
289      } else {
290        Self {
291          a : self.a.min(rhs.a),
292          b : self.b.min(rhs.b),
293        }
294      }
295    }
296  }
297
298  #[inline]
299  #[must_use]
300  pub fn saturating_add(self, rhs: Self) -> Self {
301    pick! {
302      if #[cfg(target_feature="avx2")] {
303        Self { avx: add_saturating_u8_m256i(self.avx, rhs.avx) }
304      } else {
305        Self {
306          a : self.a.saturating_add(rhs.a),
307          b : self.b.saturating_add(rhs.b),
308        }
309      }
310    }
311  }
312  #[inline]
313  #[must_use]
314  pub fn saturating_sub(self, rhs: Self) -> Self {
315    pick! {
316      if #[cfg(target_feature="avx2")] {
317        Self { avx: sub_saturating_u8_m256i(self.avx, rhs.avx) }
318      } else {
319        Self {
320          a : self.a.saturating_sub(rhs.a),
321          b : self.b.saturating_sub(rhs.b),
322        }
323      }
324    }
325  }
326
327  #[inline]
328  #[must_use]
329  #[doc(alias("movemask", "move_mask"))]
330  pub fn to_bitmask(self) -> u32 {
331    i8x32::to_bitmask(cast(self)) as u32
332  }
333
334  #[inline]
335  #[must_use]
336  pub fn any(self) -> bool {
337    i8x32::any(cast(self))
338  }
339
340  #[inline]
341  #[must_use]
342  pub fn all(self) -> bool {
343    i8x32::all(cast(self))
344  }
345
346  /// Returns a new vector with lanes selected from the lanes of the first input
347  /// vector a specified in the second input vector `rhs`.
348  /// The indices i in range `[0, 15]` select the i-th element of `self`. For
349  /// indices outside of the range the resulting lane is `0`.
350  ///
351  /// This note that is the equivalent of two parallel swizzle operations on the
352  /// two halves of the vector, and the indexes each refer to the
353  /// corresponding half.
354  #[inline]
355  pub fn swizzle_half(self, rhs: i8x32) -> i8x32 {
356    cast(i8x32::swizzle_half(cast(self), cast(rhs)))
357  }
358
359  /// Indices in the range `[0, 15]` will select the i-th element of `self`. If
360  /// the high bit of any element of `rhs` is set (negative) then the
361  /// corresponding output lane is guaranteed to be zero. Otherwise if the
362  /// element of `rhs` is within the range `[32, 127]` then the output lane is
363  /// either `0` or `self[rhs[i] % 16]` depending on the implementation.
364  ///
365  /// This is the equivalent to two parallel swizzle operations on the two
366  /// halves of the vector, and the indexes each refer to their corresponding
367  /// half.
368  #[inline]
369  pub fn swizzle_half_relaxed(self, rhs: u8x32) -> u8x32 {
370    cast(i8x32::swizzle_half_relaxed(cast(self), cast(rhs)))
371  }
372
373  #[inline]
374  #[must_use]
375  pub fn none(self) -> bool {
376    !self.any()
377  }
378
379  #[inline]
380  pub fn to_array(self) -> [u8; 32] {
381    cast(self)
382  }
383
384  #[inline]
385  pub fn as_array(&self) -> &[u8; 32] {
386    cast_ref(self)
387  }
388
389  #[inline]
390  pub fn as_mut_array(&mut self) -> &mut [u8; 32] {
391    cast_mut(self)
392  }
393}
wide/u8x32_.rs

wide/
u8x32_.rs