wide/
u8x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct u8x16 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct u8x16 { pub(crate) simd: v128 }
14
15    impl Default for u8x16 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for u8x16 {
22      fn eq(&self, other: &Self) -> bool {
23        u8x16_all_true(u8x16_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for u8x16 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29    use core::arch::aarch64::*;
30    #[repr(C)]
31    #[derive(Copy, Clone)]
32    pub struct u8x16 { pub(crate) neon : uint8x16_t }
33
34    impl Default for u8x16 {
35      #[inline]
36      fn default() -> Self {
37        Self::splat(0)
38      }
39    }
40
41    impl PartialEq for u8x16 {
42      #[inline]
43      fn eq(&self, other: &Self) -> bool {
44        unsafe { vminvq_u8(vceqq_u8(self.neon, other.neon))==u8::MAX }
45      }
46    }
47
48    impl Eq for u8x16 { }
49  } else {
50    #[derive(Default, Clone, Copy, PartialEq, Eq)]
51    #[repr(C, align(16))]
52    pub struct u8x16 { pub(crate) arr: [u8;16] }
53  }
54}
55
56int_uint_consts!(u8, 16, u8x16, 128);
57
58unsafe impl Zeroable for u8x16 {}
59unsafe impl Pod for u8x16 {}
60
61impl AlignTo for u8x16 {
62  type Elem = u8;
63}
64
65impl Add for u8x16 {
66  type Output = Self;
67  #[inline]
68  fn add(self, rhs: Self) -> Self::Output {
69    pick! {
70      if #[cfg(target_feature="sse2")] {
71        Self { sse: add_i8_m128i(self.sse, rhs.sse) }
72      } else if #[cfg(target_feature="simd128")] {
73        Self { simd: u8x16_add(self.simd, rhs.simd) }
74      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
75        unsafe { Self { neon: vaddq_u8(self.neon, rhs.neon) } }
76      } else {
77        Self { arr: [
78          self.arr[0].wrapping_add(rhs.arr[0]),
79          self.arr[1].wrapping_add(rhs.arr[1]),
80          self.arr[2].wrapping_add(rhs.arr[2]),
81          self.arr[3].wrapping_add(rhs.arr[3]),
82          self.arr[4].wrapping_add(rhs.arr[4]),
83          self.arr[5].wrapping_add(rhs.arr[5]),
84          self.arr[6].wrapping_add(rhs.arr[6]),
85          self.arr[7].wrapping_add(rhs.arr[7]),
86          self.arr[8].wrapping_add(rhs.arr[8]),
87          self.arr[9].wrapping_add(rhs.arr[9]),
88          self.arr[10].wrapping_add(rhs.arr[10]),
89          self.arr[11].wrapping_add(rhs.arr[11]),
90          self.arr[12].wrapping_add(rhs.arr[12]),
91          self.arr[13].wrapping_add(rhs.arr[13]),
92          self.arr[14].wrapping_add(rhs.arr[14]),
93          self.arr[15].wrapping_add(rhs.arr[15]),
94        ]}
95      }
96    }
97  }
98}
99
100impl Sub for u8x16 {
101  type Output = Self;
102  #[inline]
103  fn sub(self, rhs: Self) -> Self::Output {
104    pick! {
105      if #[cfg(target_feature="sse2")] {
106        Self { sse: sub_i8_m128i(self.sse, rhs.sse) }
107      } else if #[cfg(target_feature="simd128")] {
108        Self { simd: u8x16_sub(self.simd, rhs.simd) }
109      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
110        unsafe {Self { neon: vsubq_u8(self.neon, rhs.neon) }}
111      } else {
112        Self { arr: [
113          self.arr[0].wrapping_sub(rhs.arr[0]),
114          self.arr[1].wrapping_sub(rhs.arr[1]),
115          self.arr[2].wrapping_sub(rhs.arr[2]),
116          self.arr[3].wrapping_sub(rhs.arr[3]),
117          self.arr[4].wrapping_sub(rhs.arr[4]),
118          self.arr[5].wrapping_sub(rhs.arr[5]),
119          self.arr[6].wrapping_sub(rhs.arr[6]),
120          self.arr[7].wrapping_sub(rhs.arr[7]),
121          self.arr[8].wrapping_sub(rhs.arr[8]),
122          self.arr[9].wrapping_sub(rhs.arr[9]),
123          self.arr[10].wrapping_sub(rhs.arr[10]),
124          self.arr[11].wrapping_sub(rhs.arr[11]),
125          self.arr[12].wrapping_sub(rhs.arr[12]),
126          self.arr[13].wrapping_sub(rhs.arr[13]),
127          self.arr[14].wrapping_sub(rhs.arr[14]),
128          self.arr[15].wrapping_sub(rhs.arr[15]),
129        ]}
130      }
131    }
132  }
133}
134
135impl Add<u8> for u8x16 {
136  type Output = Self;
137  #[inline]
138  fn add(self, rhs: u8) -> Self::Output {
139    self.add(Self::splat(rhs))
140  }
141}
142
143impl Sub<u8> for u8x16 {
144  type Output = Self;
145  #[inline]
146  fn sub(self, rhs: u8) -> Self::Output {
147    self.sub(Self::splat(rhs))
148  }
149}
150
151impl Add<u8x16> for u8 {
152  type Output = u8x16;
153  #[inline]
154  fn add(self, rhs: u8x16) -> Self::Output {
155    u8x16::splat(self).add(rhs)
156  }
157}
158
159impl Sub<u8x16> for u8 {
160  type Output = u8x16;
161  #[inline]
162  fn sub(self, rhs: u8x16) -> Self::Output {
163    u8x16::splat(self).sub(rhs)
164  }
165}
166
167impl BitAnd for u8x16 {
168  type Output = Self;
169  #[inline]
170  fn bitand(self, rhs: Self) -> Self::Output {
171    pick! {
172      if #[cfg(target_feature="sse2")] {
173        Self { sse: bitand_m128i(self.sse, rhs.sse) }
174      } else if #[cfg(target_feature="simd128")] {
175        Self { simd: v128_and(self.simd, rhs.simd) }
176      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
177        unsafe {Self { neon: vandq_u8(self.neon, rhs.neon) }}
178      } else {
179        Self { arr: [
180          self.arr[0].bitand(rhs.arr[0]),
181          self.arr[1].bitand(rhs.arr[1]),
182          self.arr[2].bitand(rhs.arr[2]),
183          self.arr[3].bitand(rhs.arr[3]),
184          self.arr[4].bitand(rhs.arr[4]),
185          self.arr[5].bitand(rhs.arr[5]),
186          self.arr[6].bitand(rhs.arr[6]),
187          self.arr[7].bitand(rhs.arr[7]),
188          self.arr[8].bitand(rhs.arr[8]),
189          self.arr[9].bitand(rhs.arr[9]),
190          self.arr[10].bitand(rhs.arr[10]),
191          self.arr[11].bitand(rhs.arr[11]),
192          self.arr[12].bitand(rhs.arr[12]),
193          self.arr[13].bitand(rhs.arr[13]),
194          self.arr[14].bitand(rhs.arr[14]),
195          self.arr[15].bitand(rhs.arr[15]),
196        ]}
197      }
198    }
199  }
200}
201
202impl BitOr for u8x16 {
203  type Output = Self;
204  #[inline]
205  fn bitor(self, rhs: Self) -> Self::Output {
206    pick! {
207      if #[cfg(target_feature="sse2")] {
208        Self { sse: bitor_m128i(self.sse, rhs.sse) }
209      } else if #[cfg(target_feature="simd128")] {
210        Self { simd: v128_or(self.simd, rhs.simd) }
211      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
212        unsafe {Self { neon: vorrq_u8(self.neon, rhs.neon) }}
213      } else {
214        Self { arr: [
215          self.arr[0].bitor(rhs.arr[0]),
216          self.arr[1].bitor(rhs.arr[1]),
217          self.arr[2].bitor(rhs.arr[2]),
218          self.arr[3].bitor(rhs.arr[3]),
219          self.arr[4].bitor(rhs.arr[4]),
220          self.arr[5].bitor(rhs.arr[5]),
221          self.arr[6].bitor(rhs.arr[6]),
222          self.arr[7].bitor(rhs.arr[7]),
223          self.arr[8].bitor(rhs.arr[8]),
224          self.arr[9].bitor(rhs.arr[9]),
225          self.arr[10].bitor(rhs.arr[10]),
226          self.arr[11].bitor(rhs.arr[11]),
227          self.arr[12].bitor(rhs.arr[12]),
228          self.arr[13].bitor(rhs.arr[13]),
229          self.arr[14].bitor(rhs.arr[14]),
230          self.arr[15].bitor(rhs.arr[15]),
231        ]}
232      }
233    }
234  }
235}
236
237impl BitXor for u8x16 {
238  type Output = Self;
239  #[inline]
240  fn bitxor(self, rhs: Self) -> Self::Output {
241    pick! {
242      if #[cfg(target_feature="sse2")] {
243        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
244      } else if #[cfg(target_feature="simd128")] {
245        Self { simd: v128_xor(self.simd, rhs.simd) }
246      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
247        unsafe {Self { neon: veorq_u8(self.neon, rhs.neon) }}
248      } else {
249        Self { arr: [
250          self.arr[0].bitxor(rhs.arr[0]),
251          self.arr[1].bitxor(rhs.arr[1]),
252          self.arr[2].bitxor(rhs.arr[2]),
253          self.arr[3].bitxor(rhs.arr[3]),
254          self.arr[4].bitxor(rhs.arr[4]),
255          self.arr[5].bitxor(rhs.arr[5]),
256          self.arr[6].bitxor(rhs.arr[6]),
257          self.arr[7].bitxor(rhs.arr[7]),
258          self.arr[8].bitxor(rhs.arr[8]),
259          self.arr[9].bitxor(rhs.arr[9]),
260          self.arr[10].bitxor(rhs.arr[10]),
261          self.arr[11].bitxor(rhs.arr[11]),
262          self.arr[12].bitxor(rhs.arr[12]),
263          self.arr[13].bitxor(rhs.arr[13]),
264          self.arr[14].bitxor(rhs.arr[14]),
265          self.arr[15].bitxor(rhs.arr[15]),
266        ]}
267      }
268    }
269  }
270}
271
272impl CmpEq for u8x16 {
273  type Output = Self;
274  #[inline]
275  fn simd_eq(self, rhs: Self) -> Self::Output {
276    Self::simd_eq(self, rhs)
277  }
278}
279
280impl CmpLt for u8x16 {
281  type Output = Self;
282  #[inline]
283  fn simd_lt(self, rhs: Self) -> Self::Output {
284    Self::simd_lt(self, rhs)
285  }
286}
287
288impl CmpLe for u8x16 {
289  type Output = Self;
290  #[inline]
291  fn simd_le(self, rhs: Self) -> Self::Output {
292    Self::simd_le(self, rhs)
293  }
294}
295
296impl CmpGe for u8x16 {
297  type Output = Self;
298  #[inline]
299  fn simd_ge(self, rhs: Self) -> Self::Output {
300    Self::simd_ge(self, rhs)
301  }
302}
303
304impl CmpGt for u8x16 {
305  type Output = Self;
306  #[inline]
307  fn simd_gt(self, rhs: Self) -> Self::Output {
308    Self::simd_gt(self, rhs)
309  }
310}
311
312impl u8x16 {
313  #[inline]
314  #[must_use]
315  pub const fn new(array: [u8; 16]) -> Self {
316    unsafe { core::mem::transmute(array) }
317  }
318  #[inline]
319  #[must_use]
320  pub fn simd_eq(self, rhs: Self) -> Self {
321    pick! {
322      if #[cfg(target_feature="sse2")] {
323        Self { sse: cmp_eq_mask_i8_m128i(self.sse, rhs.sse) }
324      } else if #[cfg(target_feature="simd128")] {
325        Self { simd: u8x16_eq(self.simd, rhs.simd) }
326      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
327        unsafe {Self { neon: vceqq_u8(self.neon, rhs.neon) }}
328      } else {
329        Self { arr: [
330          if self.arr[0] == rhs.arr[0] { u8::MAX } else { 0 },
331          if self.arr[1] == rhs.arr[1] { u8::MAX } else { 0 },
332          if self.arr[2] == rhs.arr[2] { u8::MAX } else { 0 },
333          if self.arr[3] == rhs.arr[3] { u8::MAX } else { 0 },
334          if self.arr[4] == rhs.arr[4] { u8::MAX } else { 0 },
335          if self.arr[5] == rhs.arr[5] { u8::MAX } else { 0 },
336          if self.arr[6] == rhs.arr[6] { u8::MAX } else { 0 },
337          if self.arr[7] == rhs.arr[7] { u8::MAX } else { 0 },
338          if self.arr[8] == rhs.arr[8] { u8::MAX } else { 0 },
339          if self.arr[9] == rhs.arr[9] { u8::MAX } else { 0 },
340          if self.arr[10] == rhs.arr[10] { u8::MAX } else { 0 },
341          if self.arr[11] == rhs.arr[11] { u8::MAX } else { 0 },
342          if self.arr[12] == rhs.arr[12] { u8::MAX } else { 0 },
343          if self.arr[13] == rhs.arr[13] { u8::MAX } else { 0 },
344          if self.arr[14] == rhs.arr[14] { u8::MAX } else { 0 },
345          if self.arr[15] == rhs.arr[15] { u8::MAX } else { 0 },
346        ]}
347      }
348    }
349  }
350  #[inline]
351  #[must_use]
352  pub fn simd_lt(self, rhs: Self) -> Self {
353    pick! {
354      if #[cfg(target_feature="sse2")] {
355        // Convert from u8 to i8.
356        let offset = Self::splat(0x80);
357        let self_i8 = self.bitxor(offset).sse;
358        let rhs_i8 = rhs.bitxor(offset).sse;
359        Self { sse: cmp_lt_mask_i8_m128i(self_i8, rhs_i8) }
360      } else if #[cfg(target_feature="simd128")] {
361        Self { simd: u8x16_lt(self.simd, rhs.simd) }
362      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
363        unsafe {Self { neon: vcltq_u8(self.neon, rhs.neon) }}
364      } else {
365        Self { arr: [
366          if self.arr[0] < rhs.arr[0] { u8::MAX } else { 0 },
367          if self.arr[1] < rhs.arr[1] { u8::MAX } else { 0 },
368          if self.arr[2] < rhs.arr[2] { u8::MAX } else { 0 },
369          if self.arr[3] < rhs.arr[3] { u8::MAX } else { 0 },
370          if self.arr[4] < rhs.arr[4] { u8::MAX } else { 0 },
371          if self.arr[5] < rhs.arr[5] { u8::MAX } else { 0 },
372          if self.arr[6] < rhs.arr[6] { u8::MAX } else { 0 },
373          if self.arr[7] < rhs.arr[7] { u8::MAX } else { 0 },
374          if self.arr[8] < rhs.arr[8] { u8::MAX } else { 0 },
375          if self.arr[9] < rhs.arr[9] { u8::MAX } else { 0 },
376          if self.arr[10] < rhs.arr[10] { u8::MAX } else { 0 },
377          if self.arr[11] < rhs.arr[11] { u8::MAX } else { 0 },
378          if self.arr[12] < rhs.arr[12] { u8::MAX } else { 0 },
379          if self.arr[13] < rhs.arr[13] { u8::MAX } else { 0 },
380          if self.arr[14] < rhs.arr[14] { u8::MAX } else { 0 },
381          if self.arr[15] < rhs.arr[15] { u8::MAX } else { 0 },
382        ]}
383      }
384    }
385  }
386  #[inline]
387  #[must_use]
388  pub fn simd_le(self, rhs: Self) -> Self {
389    pick! {
390      if #[cfg(target_feature="sse2")] {
391        // Convert from u8 to i8.
392        let offset = Self::splat(0x80);
393        let self_i8 = self.bitxor(offset).sse;
394        let rhs_i8 = rhs.bitxor(offset).sse;
395        // a <= b  is equivalent to  !(b < a)  or  !(a > b)
396        let gt_mask = u8x16 { sse: cmp_gt_mask_i8_m128i(self_i8, rhs_i8) };
397        Self { sse: gt_mask.bitxor(u8x16::splat(0xFF)).sse }
398      } else if #[cfg(target_feature="simd128")] {
399        Self { simd: u8x16_le(self.simd, rhs.simd) }
400      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
401        unsafe {Self { neon: vcleq_u8(self.neon, rhs.neon) }}
402      } else {
403        Self { arr: [
404          if self.arr[0] <= rhs.arr[0] { u8::MAX } else { 0 },
405          if self.arr[1] <= rhs.arr[1] { u8::MAX } else { 0 },
406          if self.arr[2] <= rhs.arr[2] { u8::MAX } else { 0 },
407          if self.arr[3] <= rhs.arr[3] { u8::MAX } else { 0 },
408          if self.arr[4] <= rhs.arr[4] { u8::MAX } else { 0 },
409          if self.arr[5] <= rhs.arr[5] { u8::MAX } else { 0 },
410          if self.arr[6] <= rhs.arr[6] { u8::MAX } else { 0 },
411          if self.arr[7] <= rhs.arr[7] { u8::MAX } else { 0 },
412          if self.arr[8] <= rhs.arr[8] { u8::MAX } else { 0 },
413          if self.arr[9] <= rhs.arr[9] { u8::MAX } else { 0 },
414          if self.arr[10] <= rhs.arr[10] { u8::MAX } else { 0 },
415          if self.arr[11] <= rhs.arr[11] { u8::MAX } else { 0 },
416          if self.arr[12] <= rhs.arr[12] { u8::MAX } else { 0 },
417          if self.arr[13] <= rhs.arr[13] { u8::MAX } else { 0 },
418          if self.arr[14] <= rhs.arr[14] { u8::MAX } else { 0 },
419          if self.arr[15] <= rhs.arr[15] { u8::MAX } else { 0 },
420        ]}
421      }
422    }
423  }
424  #[inline]
425  #[must_use]
426  pub fn simd_ge(self, rhs: Self) -> Self {
427    pick! {
428      if #[cfg(target_feature="sse2")] {
429        // Convert from u8 to i8.
430        let offset = Self::splat(0x80);
431        let self_i8 = self.bitxor(offset).sse;
432        let rhs_i8 = rhs.bitxor(offset).sse;
433        // a >= b  is equivalent to  !(b > a)  or  !(a < b)
434        let lt_mask = u8x16 { sse: cmp_lt_mask_i8_m128i(self_i8, rhs_i8) };
435        Self { sse: lt_mask.bitxor(u8x16::splat(0xFF)).sse }
436      } else if #[cfg(target_feature="simd128")] {
437        Self { simd: u8x16_ge(self.simd, rhs.simd) }
438      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
439        unsafe {Self { neon: vcgeq_u8(self.neon, rhs.neon) }}
440      } else {
441        Self { arr: [
442          if self.arr[0] >= rhs.arr[0] { u8::MAX } else { 0 },
443          if self.arr[1] >= rhs.arr[1] { u8::MAX } else { 0 },
444          if self.arr[2] >= rhs.arr[2] { u8::MAX } else { 0 },
445          if self.arr[3] >= rhs.arr[3] { u8::MAX } else { 0 },
446          if self.arr[4] >= rhs.arr[4] { u8::MAX } else { 0 },
447          if self.arr[5] >= rhs.arr[5] { u8::MAX } else { 0 },
448          if self.arr[6] >= rhs.arr[6] { u8::MAX } else { 0 },
449          if self.arr[7] >= rhs.arr[7] { u8::MAX } else { 0 },
450          if self.arr[8] >= rhs.arr[8] { u8::MAX } else { 0 },
451          if self.arr[9] >= rhs.arr[9] { u8::MAX } else { 0 },
452          if self.arr[10] >= rhs.arr[10] { u8::MAX } else { 0 },
453          if self.arr[11] >= rhs.arr[11] { u8::MAX } else { 0 },
454          if self.arr[12] >= rhs.arr[12] { u8::MAX } else { 0 },
455          if self.arr[13] >= rhs.arr[13] { u8::MAX } else { 0 },
456          if self.arr[14] >= rhs.arr[14] { u8::MAX } else { 0 },
457          if self.arr[15] >= rhs.arr[15] { u8::MAX } else { 0 },
458        ]}
459      }
460    }
461  }
462  #[inline]
463  #[must_use]
464  pub fn simd_gt(self, rhs: Self) -> Self {
465    pick! {
466      if #[cfg(target_feature="sse2")] {
467        // Convert from u8 to i8.
468        let offset = Self::splat(0x80);
469        let self_i8 = self.bitxor(offset).sse;
470        let rhs_i8 = rhs.bitxor(offset).sse;
471        Self { sse: cmp_gt_mask_i8_m128i(self_i8, rhs_i8) }
472      } else if #[cfg(target_feature="simd128")] {
473        Self { simd: u8x16_gt(self.simd, rhs.simd) }
474      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
475        unsafe {Self { neon: vcgtq_u8(self.neon, rhs.neon) }}
476      } else {
477        Self { arr: [
478          if self.arr[0] > rhs.arr[0] { u8::MAX } else { 0 },
479          if self.arr[1] > rhs.arr[1] { u8::MAX } else { 0 },
480          if self.arr[2] > rhs.arr[2] { u8::MAX } else { 0 },
481          if self.arr[3] > rhs.arr[3] { u8::MAX } else { 0 },
482          if self.arr[4] > rhs.arr[4] { u8::MAX } else { 0 },
483          if self.arr[5] > rhs.arr[5] { u8::MAX } else { 0 },
484          if self.arr[6] > rhs.arr[6] { u8::MAX } else { 0 },
485          if self.arr[7] > rhs.arr[7] { u8::MAX } else { 0 },
486          if self.arr[8] > rhs.arr[8] { u8::MAX } else { 0 },
487          if self.arr[9] > rhs.arr[9] { u8::MAX } else { 0 },
488          if self.arr[10] > rhs.arr[10] { u8::MAX } else { 0 },
489          if self.arr[11] > rhs.arr[11] { u8::MAX } else { 0 },
490          if self.arr[12] > rhs.arr[12] { u8::MAX } else { 0 },
491          if self.arr[13] > rhs.arr[13] { u8::MAX } else { 0 },
492          if self.arr[14] > rhs.arr[14] { u8::MAX } else { 0 },
493          if self.arr[15] > rhs.arr[15] { u8::MAX } else { 0 },
494        ]}
495      }
496    }
497  }
498  #[inline]
499  #[must_use]
500  pub fn blend(self, t: Self, f: Self) -> Self {
501    pick! {
502      if #[cfg(target_feature="sse4.1")] {
503        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
504      } else if #[cfg(target_feature="simd128")] {
505        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
506      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
507        unsafe {Self { neon: vbslq_u8(self.neon, t.neon, f.neon) }}
508      } else {
509        generic_bit_blend(self, t, f)
510      }
511    }
512  }
513  #[inline]
514  #[must_use]
515  pub fn max(self, rhs: Self) -> Self {
516    pick! {
517      if #[cfg(target_feature="sse2")] {
518        Self { sse: max_u8_m128i(self.sse, rhs.sse) }
519      } else if #[cfg(target_feature="simd128")] {
520        Self { simd: u8x16_max(self.simd, rhs.simd) }
521      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
522        unsafe {Self { neon: vmaxq_u8(self.neon, rhs.neon) }}
523      } else {
524        Self { arr: [
525          self.arr[0].max(rhs.arr[0]),
526          self.arr[1].max(rhs.arr[1]),
527          self.arr[2].max(rhs.arr[2]),
528          self.arr[3].max(rhs.arr[3]),
529          self.arr[4].max(rhs.arr[4]),
530          self.arr[5].max(rhs.arr[5]),
531          self.arr[6].max(rhs.arr[6]),
532          self.arr[7].max(rhs.arr[7]),
533          self.arr[8].max(rhs.arr[8]),
534          self.arr[9].max(rhs.arr[9]),
535          self.arr[10].max(rhs.arr[10]),
536          self.arr[11].max(rhs.arr[11]),
537          self.arr[12].max(rhs.arr[12]),
538          self.arr[13].max(rhs.arr[13]),
539          self.arr[14].max(rhs.arr[14]),
540          self.arr[15].max(rhs.arr[15]),
541        ]}
542      }
543    }
544  }
545  #[inline]
546  #[must_use]
547  pub fn min(self, rhs: Self) -> Self {
548    pick! {
549      if #[cfg(target_feature="sse2")] {
550        Self { sse: min_u8_m128i(self.sse, rhs.sse) }
551      } else if #[cfg(target_feature="simd128")] {
552        Self { simd: u8x16_min(self.simd, rhs.simd) }
553      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
554        unsafe {Self { neon: vminq_u8(self.neon, rhs.neon) }}
555      } else {
556        Self { arr: [
557          self.arr[0].min(rhs.arr[0]),
558          self.arr[1].min(rhs.arr[1]),
559          self.arr[2].min(rhs.arr[2]),
560          self.arr[3].min(rhs.arr[3]),
561          self.arr[4].min(rhs.arr[4]),
562          self.arr[5].min(rhs.arr[5]),
563          self.arr[6].min(rhs.arr[6]),
564          self.arr[7].min(rhs.arr[7]),
565          self.arr[8].min(rhs.arr[8]),
566          self.arr[9].min(rhs.arr[9]),
567          self.arr[10].min(rhs.arr[10]),
568          self.arr[11].min(rhs.arr[11]),
569          self.arr[12].min(rhs.arr[12]),
570          self.arr[13].min(rhs.arr[13]),
571          self.arr[14].min(rhs.arr[14]),
572          self.arr[15].min(rhs.arr[15]),
573        ]}
574      }
575    }
576  }
577
578  #[inline]
579  #[must_use]
580  pub fn saturating_add(self, rhs: Self) -> Self {
581    pick! {
582      if #[cfg(target_feature="sse2")] {
583        Self { sse: add_saturating_u8_m128i(self.sse, rhs.sse) }
584      } else if #[cfg(target_feature="simd128")] {
585        Self { simd: u8x16_add_sat(self.simd, rhs.simd) }
586      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
587        unsafe {Self { neon: vqaddq_u8(self.neon, rhs.neon) }}
588      } else {
589        Self { arr: [
590          self.arr[0].saturating_add(rhs.arr[0]),
591          self.arr[1].saturating_add(rhs.arr[1]),
592          self.arr[2].saturating_add(rhs.arr[2]),
593          self.arr[3].saturating_add(rhs.arr[3]),
594          self.arr[4].saturating_add(rhs.arr[4]),
595          self.arr[5].saturating_add(rhs.arr[5]),
596          self.arr[6].saturating_add(rhs.arr[6]),
597          self.arr[7].saturating_add(rhs.arr[7]),
598          self.arr[8].saturating_add(rhs.arr[8]),
599          self.arr[9].saturating_add(rhs.arr[9]),
600          self.arr[10].saturating_add(rhs.arr[10]),
601          self.arr[11].saturating_add(rhs.arr[11]),
602          self.arr[12].saturating_add(rhs.arr[12]),
603          self.arr[13].saturating_add(rhs.arr[13]),
604          self.arr[14].saturating_add(rhs.arr[14]),
605          self.arr[15].saturating_add(rhs.arr[15]),
606        ]}
607      }
608    }
609  }
610  #[inline]
611  #[must_use]
612  pub fn saturating_sub(self, rhs: Self) -> Self {
613    pick! {
614      if #[cfg(target_feature="sse2")] {
615        Self { sse: sub_saturating_u8_m128i(self.sse, rhs.sse) }
616      } else if #[cfg(target_feature="simd128")] {
617        Self { simd: u8x16_sub_sat(self.simd, rhs.simd) }
618      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
619        unsafe { Self { neon: vqsubq_u8(self.neon, rhs.neon) } }
620      } else {
621        Self { arr: [
622          self.arr[0].saturating_sub(rhs.arr[0]),
623          self.arr[1].saturating_sub(rhs.arr[1]),
624          self.arr[2].saturating_sub(rhs.arr[2]),
625          self.arr[3].saturating_sub(rhs.arr[3]),
626          self.arr[4].saturating_sub(rhs.arr[4]),
627          self.arr[5].saturating_sub(rhs.arr[5]),
628          self.arr[6].saturating_sub(rhs.arr[6]),
629          self.arr[7].saturating_sub(rhs.arr[7]),
630          self.arr[8].saturating_sub(rhs.arr[8]),
631          self.arr[9].saturating_sub(rhs.arr[9]),
632          self.arr[10].saturating_sub(rhs.arr[10]),
633          self.arr[11].saturating_sub(rhs.arr[11]),
634          self.arr[12].saturating_sub(rhs.arr[12]),
635          self.arr[13].saturating_sub(rhs.arr[13]),
636          self.arr[14].saturating_sub(rhs.arr[14]),
637          self.arr[15].saturating_sub(rhs.arr[15]),
638        ]}
639      }
640    }
641  }
642
643  /// Unpack and interleave low lanes of two `u8x16`
644  #[inline]
645  #[must_use]
646  pub fn unpack_low(lhs: u8x16, rhs: u8x16) -> u8x16 {
647    pick! {
648        if #[cfg(target_feature = "sse2")] {
649            u8x16 { sse: unpack_low_i8_m128i(lhs.sse, rhs.sse) }
650        } else if #[cfg(target_feature = "simd128")] {
651          u8x16 { simd: u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(lhs.simd, rhs.simd) }
652        } else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
653            let lhs = unsafe { vget_low_u8(lhs.neon) };
654            let rhs = unsafe { vget_low_u8(rhs.neon) };
655
656            let zipped = unsafe { vzip_u8(lhs, rhs) };
657            u8x16 { neon: unsafe { vcombine_u8(zipped.0, zipped.1) } }
658        } else {
659            u8x16::new([
660                lhs.as_array()[0], rhs.as_array()[0],
661                lhs.as_array()[1], rhs.as_array()[1],
662                lhs.as_array()[2], rhs.as_array()[2],
663                lhs.as_array()[3], rhs.as_array()[3],
664                lhs.as_array()[4], rhs.as_array()[4],
665                lhs.as_array()[5], rhs.as_array()[5],
666                lhs.as_array()[6], rhs.as_array()[6],
667                lhs.as_array()[7], rhs.as_array()[7],
668            ])
669        }
670    }
671  }
672
673  /// Unpack and interleave high lanes of two `u8x16`
674  #[inline]
675  #[must_use]
676  pub fn unpack_high(lhs: u8x16, rhs: u8x16) -> u8x16 {
677    pick! {
678        if #[cfg(target_feature = "sse2")] {
679            u8x16 { sse: unpack_high_i8_m128i(lhs.sse, rhs.sse) }
680        } else if #[cfg(target_feature = "simd128")] {
681            u8x16 { simd: u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(lhs.simd, rhs.simd) }
682        } else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
683            let lhs = unsafe { vget_high_u8(lhs.neon) };
684            let rhs = unsafe { vget_high_u8(rhs.neon) };
685
686            let zipped = unsafe { vzip_u8(lhs, rhs) };
687            u8x16 { neon: unsafe { vcombine_u8(zipped.0, zipped.1) } }
688        } else {
689            u8x16::new([
690                lhs.as_array()[8], rhs.as_array()[8],
691                lhs.as_array()[9], rhs.as_array()[9],
692                lhs.as_array()[10], rhs.as_array()[10],
693                lhs.as_array()[11], rhs.as_array()[11],
694                lhs.as_array()[12], rhs.as_array()[12],
695                lhs.as_array()[13], rhs.as_array()[13],
696                lhs.as_array()[14], rhs.as_array()[14],
697                lhs.as_array()[15], rhs.as_array()[15],
698            ])
699        }
700    }
701  }
702
703  /// Pack and saturate two `i16x8` to `u8x16`
704  #[inline]
705  #[must_use]
706  pub fn narrow_i16x8(lhs: i16x8, rhs: i16x8) -> Self {
707    pick! {
708        if #[cfg(target_feature = "sse2")] {
709            u8x16 { sse: pack_i16_to_u8_m128i(lhs.sse, rhs.sse) }
710        } else if #[cfg(target_feature = "simd128")] {
711            u8x16 { simd: u8x16_narrow_i16x8(lhs.simd, rhs.simd) }
712        } else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
713            let lhs = unsafe { vqmovun_s16(lhs.neon) };
714            let rhs = unsafe { vqmovun_s16(rhs.neon) };
715            u8x16 { neon: unsafe { vcombine_u8(lhs, rhs) } }
716        } else {
717            fn clamp(a: i16) -> u8 {
718                  if a < u8::MIN as i16 {
719                      u8::MIN
720                  } else if a > u8::MAX as i16 {
721                      u8::MAX
722                  } else {
723                      a as u8
724                  }
725            }
726
727            Self { arr: [
728                clamp(lhs.as_array()[0]),
729                clamp(lhs.as_array()[1]),
730                clamp(lhs.as_array()[2]),
731                clamp(lhs.as_array()[3]),
732                clamp(lhs.as_array()[4]),
733                clamp(lhs.as_array()[5]),
734                clamp(lhs.as_array()[6]),
735                clamp(lhs.as_array()[7]),
736                clamp(rhs.as_array()[0]),
737                clamp(rhs.as_array()[1]),
738                clamp(rhs.as_array()[2]),
739                clamp(rhs.as_array()[3]),
740                clamp(rhs.as_array()[4]),
741                clamp(rhs.as_array()[5]),
742                clamp(rhs.as_array()[6]),
743                clamp(rhs.as_array()[7]),
744            ]}
745        }
746    }
747  }
748
749  /// Returns a new vector where each element is based on the index values in
750  /// `rhs`.
751  ///
752  /// * Index values in the range `[0, 15]` select the i-th element of `self`.
753  /// * Index values that are out of range will cause that output lane to be
754  ///   `0`.
755  #[inline]
756  pub fn swizzle(self, rhs: i8x16) -> i8x16 {
757    cast(i8x16::swizzle(cast(self), rhs))
758  }
759
760  /// Works like [`swizzle`](Self::swizzle) with the following additional
761  /// details
762  ///
763  /// * Indices in the range `[0, 15]` will select the i-th element of `self`.
764  /// * If the high bit of any index is set (meaning that the index is
765  ///   negative), then the corresponding output lane is guaranteed to be zero.
766  /// * Otherwise the output lane is either `0` or `self[rhs[i] % 16]`,
767  ///   depending on the implementation.
768  #[inline]
769  pub fn swizzle_relaxed(self, rhs: u8x16) -> u8x16 {
770    cast(i8x16::swizzle_relaxed(cast(self), cast(rhs)))
771  }
772
773  #[inline]
774  #[must_use]
775  #[doc(alias("movemask", "move_mask"))]
776  pub fn to_bitmask(self) -> u32 {
777    i8x16::to_bitmask(cast(self)) as u32
778  }
779
780  #[inline]
781  #[must_use]
782  pub fn any(self) -> bool {
783    i8x16::any(cast(self))
784  }
785
786  #[inline]
787  #[must_use]
788  pub fn all(self) -> bool {
789    i8x16::all(cast(self))
790  }
791
792  #[inline]
793  #[must_use]
794  pub fn none(self) -> bool {
795    i8x16::none(cast(self))
796  }
797
798  #[inline]
799  pub fn to_array(self) -> [u8; 16] {
800    cast(self)
801  }
802
803  #[inline]
804  pub fn as_array(&self) -> &[u8; 16] {
805    cast_ref(self)
806  }
807
808  #[inline]
809  pub fn as_mut_array(&mut self) -> &mut [u8; 16] {
810    cast_mut(self)
811  }
812}
wide/u8x16_.rs

wide/
u8x16_.rs