wide/
i8x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct i8x16 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct i8x16 { pub(crate) simd: v128 }
14
15    impl Default for i8x16 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for i8x16 {
22      fn eq(&self, other: &Self) -> bool {
23        u8x16_all_true(i8x16_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for i8x16 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29    use core::arch::aarch64::*;
30    #[repr(C)]
31    #[derive(Copy, Clone)]
32    pub struct i8x16 { pub(crate) neon : int8x16_t }
33
34    impl Default for i8x16 {
35      #[inline]
36      fn default() -> Self {
37        Self::splat(0)
38      }
39    }
40
41    impl PartialEq for i8x16 {
42      #[inline]
43      fn eq(&self, other: &Self) -> bool {
44        unsafe { vminvq_u8(vceqq_s8(self.neon, other.neon))==u8::MAX }
45      }
46    }
47
48    impl Eq for i8x16 { }
49  } else {
50    #[derive(Default, Clone, Copy, PartialEq, Eq)]
51    #[repr(C, align(16))]
52    pub struct i8x16 { arr: [i8;16] }
53  }
54}
55
56int_uint_consts!(i8, 16, i8x16, 128);
57
58unsafe impl Zeroable for i8x16 {}
59unsafe impl Pod for i8x16 {}
60
61impl AlignTo for i8x16 {
62  type Elem = i8;
63}
64
65impl Add for i8x16 {
66  type Output = Self;
67  #[inline]
68  fn add(self, rhs: Self) -> Self::Output {
69    pick! {
70      if #[cfg(target_feature="sse2")] {
71        Self { sse: add_i8_m128i(self.sse, rhs.sse) }
72      } else if #[cfg(target_feature="simd128")] {
73        Self { simd: i8x16_add(self.simd, rhs.simd) }
74      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
75        unsafe { Self { neon: vaddq_s8(self.neon, rhs.neon) } }
76      } else {
77        Self { arr: [
78          self.arr[0].wrapping_add(rhs.arr[0]),
79          self.arr[1].wrapping_add(rhs.arr[1]),
80          self.arr[2].wrapping_add(rhs.arr[2]),
81          self.arr[3].wrapping_add(rhs.arr[3]),
82          self.arr[4].wrapping_add(rhs.arr[4]),
83          self.arr[5].wrapping_add(rhs.arr[5]),
84          self.arr[6].wrapping_add(rhs.arr[6]),
85          self.arr[7].wrapping_add(rhs.arr[7]),
86          self.arr[8].wrapping_add(rhs.arr[8]),
87          self.arr[9].wrapping_add(rhs.arr[9]),
88          self.arr[10].wrapping_add(rhs.arr[10]),
89          self.arr[11].wrapping_add(rhs.arr[11]),
90          self.arr[12].wrapping_add(rhs.arr[12]),
91          self.arr[13].wrapping_add(rhs.arr[13]),
92          self.arr[14].wrapping_add(rhs.arr[14]),
93          self.arr[15].wrapping_add(rhs.arr[15]),
94        ]}
95      }
96    }
97  }
98}
99
100impl Sub for i8x16 {
101  type Output = Self;
102  #[inline]
103  fn sub(self, rhs: Self) -> Self::Output {
104    pick! {
105      if #[cfg(target_feature="sse2")] {
106        Self { sse: sub_i8_m128i(self.sse, rhs.sse) }
107      } else if #[cfg(target_feature="simd128")] {
108        Self { simd: i8x16_sub(self.simd, rhs.simd) }
109      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
110        unsafe {Self { neon: vsubq_s8(self.neon, rhs.neon) }}
111      } else {
112        Self { arr: [
113          self.arr[0].wrapping_sub(rhs.arr[0]),
114          self.arr[1].wrapping_sub(rhs.arr[1]),
115          self.arr[2].wrapping_sub(rhs.arr[2]),
116          self.arr[3].wrapping_sub(rhs.arr[3]),
117          self.arr[4].wrapping_sub(rhs.arr[4]),
118          self.arr[5].wrapping_sub(rhs.arr[5]),
119          self.arr[6].wrapping_sub(rhs.arr[6]),
120          self.arr[7].wrapping_sub(rhs.arr[7]),
121          self.arr[8].wrapping_sub(rhs.arr[8]),
122          self.arr[9].wrapping_sub(rhs.arr[9]),
123          self.arr[10].wrapping_sub(rhs.arr[10]),
124          self.arr[11].wrapping_sub(rhs.arr[11]),
125          self.arr[12].wrapping_sub(rhs.arr[12]),
126          self.arr[13].wrapping_sub(rhs.arr[13]),
127          self.arr[14].wrapping_sub(rhs.arr[14]),
128          self.arr[15].wrapping_sub(rhs.arr[15]),
129        ]}
130      }
131    }
132  }
133}
134
135impl Add<i8> for i8x16 {
136  type Output = Self;
137  #[inline]
138  fn add(self, rhs: i8) -> Self::Output {
139    self.add(Self::splat(rhs))
140  }
141}
142
143impl Sub<i8> for i8x16 {
144  type Output = Self;
145  #[inline]
146  fn sub(self, rhs: i8) -> Self::Output {
147    self.sub(Self::splat(rhs))
148  }
149}
150
151impl Add<i8x16> for i8 {
152  type Output = i8x16;
153  #[inline]
154  fn add(self, rhs: i8x16) -> Self::Output {
155    i8x16::splat(self).add(rhs)
156  }
157}
158
159impl Sub<i8x16> for i8 {
160  type Output = i8x16;
161  #[inline]
162  fn sub(self, rhs: i8x16) -> Self::Output {
163    i8x16::splat(self).sub(rhs)
164  }
165}
166
167impl BitAnd for i8x16 {
168  type Output = Self;
169  #[inline]
170  fn bitand(self, rhs: Self) -> Self::Output {
171    pick! {
172      if #[cfg(target_feature="sse2")] {
173        Self { sse: bitand_m128i(self.sse, rhs.sse) }
174      } else if #[cfg(target_feature="simd128")] {
175        Self { simd: v128_and(self.simd, rhs.simd) }
176      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
177        unsafe {Self { neon: vandq_s8(self.neon, rhs.neon) }}
178      } else {
179        Self { arr: [
180          self.arr[0].bitand(rhs.arr[0]),
181          self.arr[1].bitand(rhs.arr[1]),
182          self.arr[2].bitand(rhs.arr[2]),
183          self.arr[3].bitand(rhs.arr[3]),
184          self.arr[4].bitand(rhs.arr[4]),
185          self.arr[5].bitand(rhs.arr[5]),
186          self.arr[6].bitand(rhs.arr[6]),
187          self.arr[7].bitand(rhs.arr[7]),
188          self.arr[8].bitand(rhs.arr[8]),
189          self.arr[9].bitand(rhs.arr[9]),
190          self.arr[10].bitand(rhs.arr[10]),
191          self.arr[11].bitand(rhs.arr[11]),
192          self.arr[12].bitand(rhs.arr[12]),
193          self.arr[13].bitand(rhs.arr[13]),
194          self.arr[14].bitand(rhs.arr[14]),
195          self.arr[15].bitand(rhs.arr[15]),
196        ]}
197      }
198    }
199  }
200}
201
202impl BitOr for i8x16 {
203  type Output = Self;
204  #[inline]
205  fn bitor(self, rhs: Self) -> Self::Output {
206    pick! {
207      if #[cfg(target_feature="sse2")] {
208        Self { sse: bitor_m128i(self.sse, rhs.sse) }
209      } else if #[cfg(target_feature="simd128")] {
210        Self { simd: v128_or(self.simd, rhs.simd) }
211      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
212        unsafe {Self { neon: vorrq_s8(self.neon, rhs.neon) }}
213      } else {
214        Self { arr: [
215          self.arr[0].bitor(rhs.arr[0]),
216          self.arr[1].bitor(rhs.arr[1]),
217          self.arr[2].bitor(rhs.arr[2]),
218          self.arr[3].bitor(rhs.arr[3]),
219          self.arr[4].bitor(rhs.arr[4]),
220          self.arr[5].bitor(rhs.arr[5]),
221          self.arr[6].bitor(rhs.arr[6]),
222          self.arr[7].bitor(rhs.arr[7]),
223          self.arr[8].bitor(rhs.arr[8]),
224          self.arr[9].bitor(rhs.arr[9]),
225          self.arr[10].bitor(rhs.arr[10]),
226          self.arr[11].bitor(rhs.arr[11]),
227          self.arr[12].bitor(rhs.arr[12]),
228          self.arr[13].bitor(rhs.arr[13]),
229          self.arr[14].bitor(rhs.arr[14]),
230          self.arr[15].bitor(rhs.arr[15]),
231        ]}
232      }
233    }
234  }
235}
236
237impl BitXor for i8x16 {
238  type Output = Self;
239  #[inline]
240  fn bitxor(self, rhs: Self) -> Self::Output {
241    pick! {
242      if #[cfg(target_feature="sse2")] {
243        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
244      } else if #[cfg(target_feature="simd128")] {
245        Self { simd: v128_xor(self.simd, rhs.simd) }
246      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
247        unsafe {Self { neon: veorq_s8(self.neon, rhs.neon) }}
248      } else {
249        Self { arr: [
250          self.arr[0].bitxor(rhs.arr[0]),
251          self.arr[1].bitxor(rhs.arr[1]),
252          self.arr[2].bitxor(rhs.arr[2]),
253          self.arr[3].bitxor(rhs.arr[3]),
254          self.arr[4].bitxor(rhs.arr[4]),
255          self.arr[5].bitxor(rhs.arr[5]),
256          self.arr[6].bitxor(rhs.arr[6]),
257          self.arr[7].bitxor(rhs.arr[7]),
258          self.arr[8].bitxor(rhs.arr[8]),
259          self.arr[9].bitxor(rhs.arr[9]),
260          self.arr[10].bitxor(rhs.arr[10]),
261          self.arr[11].bitxor(rhs.arr[11]),
262          self.arr[12].bitxor(rhs.arr[12]),
263          self.arr[13].bitxor(rhs.arr[13]),
264          self.arr[14].bitxor(rhs.arr[14]),
265          self.arr[15].bitxor(rhs.arr[15]),
266        ]}
267      }
268    }
269  }
270}
271
272impl CmpEq for i8x16 {
273  type Output = Self;
274  #[inline]
275  fn simd_eq(self, rhs: Self) -> Self::Output {
276    pick! {
277      if #[cfg(target_feature="sse2")] {
278        Self { sse: cmp_eq_mask_i8_m128i(self.sse, rhs.sse) }
279      } else if #[cfg(target_feature="simd128")] {
280        Self { simd: i8x16_eq(self.simd, rhs.simd) }
281      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
282        unsafe {Self { neon: vreinterpretq_s8_u8(vceqq_s8(self.neon, rhs.neon)) }}
283      } else {
284        Self { arr: [
285          if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
286          if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
287          if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
288          if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
289          if self.arr[4] == rhs.arr[4] { -1 } else { 0 },
290          if self.arr[5] == rhs.arr[5] { -1 } else { 0 },
291          if self.arr[6] == rhs.arr[6] { -1 } else { 0 },
292          if self.arr[7] == rhs.arr[7] { -1 } else { 0 },
293          if self.arr[8] == rhs.arr[8] { -1 } else { 0 },
294          if self.arr[9] == rhs.arr[9] { -1 } else { 0 },
295          if self.arr[10] == rhs.arr[10] { -1 } else { 0 },
296          if self.arr[11] == rhs.arr[11] { -1 } else { 0 },
297          if self.arr[12] == rhs.arr[12] { -1 } else { 0 },
298          if self.arr[13] == rhs.arr[13] { -1 } else { 0 },
299          if self.arr[14] == rhs.arr[14] { -1 } else { 0 },
300          if self.arr[15] == rhs.arr[15] { -1 } else { 0 },
301        ]}
302      }
303    }
304  }
305}
306
307impl CmpGt for i8x16 {
308  type Output = Self;
309  #[inline]
310  fn simd_gt(self, rhs: Self) -> Self::Output {
311    pick! {
312      if #[cfg(target_feature="sse2")] {
313        Self { sse: cmp_gt_mask_i8_m128i(self.sse, rhs.sse) }
314      } else if #[cfg(target_feature="simd128")] {
315        Self { simd: i8x16_gt(self.simd, rhs.simd) }
316      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
317        unsafe {Self { neon: vreinterpretq_s8_u8(vcgtq_s8(self.neon, rhs.neon)) }}
318      } else {
319        Self { arr: [
320          if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
321          if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
322          if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
323          if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
324          if self.arr[4] > rhs.arr[4] { -1 } else { 0 },
325          if self.arr[5] > rhs.arr[5] { -1 } else { 0 },
326          if self.arr[6] > rhs.arr[6] { -1 } else { 0 },
327          if self.arr[7] > rhs.arr[7] { -1 } else { 0 },
328          if self.arr[8] > rhs.arr[8] { -1 } else { 0 },
329          if self.arr[9] > rhs.arr[9] { -1 } else { 0 },
330          if self.arr[10] > rhs.arr[10] { -1 } else { 0 },
331          if self.arr[11] > rhs.arr[11] { -1 } else { 0 },
332          if self.arr[12] > rhs.arr[12] { -1 } else { 0 },
333          if self.arr[13] > rhs.arr[13] { -1 } else { 0 },
334          if self.arr[14] > rhs.arr[14] { -1 } else { 0 },
335          if self.arr[15] > rhs.arr[15] { -1 } else { 0 },
336        ]}
337      }
338    }
339  }
340}
341
342impl CmpLt for i8x16 {
343  type Output = Self;
344  #[inline]
345  fn simd_lt(self, rhs: Self) -> Self::Output {
346    pick! {
347      if #[cfg(target_feature="sse2")] {
348        Self { sse: cmp_lt_mask_i8_m128i(self.sse, rhs.sse) }
349      } else if #[cfg(target_feature="simd128")] {
350        Self { simd: i8x16_lt(self.simd, rhs.simd) }
351      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
352        unsafe {Self { neon: vreinterpretq_s8_u8(vcltq_s8(self.neon, rhs.neon)) }}
353      } else {
354        Self { arr: [
355          if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
356          if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
357          if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
358          if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
359          if self.arr[4] < rhs.arr[4] { -1 } else { 0 },
360          if self.arr[5] < rhs.arr[5] { -1 } else { 0 },
361          if self.arr[6] < rhs.arr[6] { -1 } else { 0 },
362          if self.arr[7] < rhs.arr[7] { -1 } else { 0 },
363          if self.arr[8] < rhs.arr[8] { -1 } else { 0 },
364          if self.arr[9] < rhs.arr[9] { -1 } else { 0 },
365          if self.arr[10] < rhs.arr[10] { -1 } else { 0 },
366          if self.arr[11] < rhs.arr[11] { -1 } else { 0 },
367          if self.arr[12] < rhs.arr[12] { -1 } else { 0 },
368          if self.arr[13] < rhs.arr[13] { -1 } else { 0 },
369          if self.arr[14] < rhs.arr[14] { -1 } else { 0 },
370          if self.arr[15] < rhs.arr[15] { -1 } else { 0 },
371        ]}
372      }
373    }
374  }
375}
376
377impl i8x16 {
378  #[inline]
379  #[must_use]
380  pub const fn new(array: [i8; 16]) -> Self {
381    unsafe { core::mem::transmute(array) }
382  }
383
384  /// converts `i16` to `i8`, saturating values that are too large
385  #[inline]
386  #[must_use]
387  pub fn from_i16x16_saturate(v: i16x16) -> i8x16 {
388    pick! {
389      if #[cfg(target_feature="avx2")] {
390        i8x16 { sse: pack_i16_to_i8_m128i( extract_m128i_from_m256i::<0>(v.avx2), extract_m128i_from_m256i::<1>(v.avx2))  }
391      } else if #[cfg(target_feature="sse2")] {
392        i8x16 { sse: pack_i16_to_i8_m128i( v.a.sse, v.b.sse ) }
393      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
394        use core::arch::aarch64::*;
395
396        unsafe {
397          i8x16 { neon: vcombine_s8(vqmovn_s16(v.a.neon), vqmovn_s16(v.b.neon)) }
398        }
399      } else if #[cfg(target_feature="simd128")] {
400        use core::arch::wasm32::*;
401
402        i8x16 { simd: i8x16_narrow_i16x8(v.a.simd, v.b.simd) }
403      } else {
404        fn clamp(a : i16) -> i8 {
405            if a < i8::MIN as i16 {
406              i8::MIN
407            }
408            else if a > i8::MAX as i16 {
409              i8::MAX
410            } else {
411                a as i8
412            }
413        }
414
415        i8x16::new([
416          clamp(v.as_array()[0]),
417          clamp(v.as_array()[1]),
418          clamp(v.as_array()[2]),
419          clamp(v.as_array()[3]),
420          clamp(v.as_array()[4]),
421          clamp(v.as_array()[5]),
422          clamp(v.as_array()[6]),
423          clamp(v.as_array()[7]),
424          clamp(v.as_array()[8]),
425          clamp(v.as_array()[9]),
426          clamp(v.as_array()[10]),
427          clamp(v.as_array()[11]),
428          clamp(v.as_array()[12]),
429          clamp(v.as_array()[13]),
430          clamp(v.as_array()[14]),
431          clamp(v.as_array()[15]),
432        ])
433      }
434    }
435  }
436
437  /// converts `i16` to `i8`, truncating the upper bits if they are set
438  #[inline]
439  #[must_use]
440  pub fn from_i16x16_truncate(v: i16x16) -> i8x16 {
441    pick! {
442      if #[cfg(target_feature="avx2")] {
443        let a = v.avx2.bitand(set_splat_i16_m256i(0xff));
444        i8x16 { sse: pack_i16_to_u8_m128i( extract_m128i_from_m256i::<0>(a), extract_m128i_from_m256i::<1>(a))  }
445      } else if #[cfg(target_feature="sse2")] {
446        let mask = set_splat_i16_m128i(0xff);
447        i8x16 { sse: pack_i16_to_u8_m128i( v.a.sse.bitand(mask), v.b.sse.bitand(mask) ) }
448      } else {
449        // no super good intrinsics on other platforms... plain old codegen does a reasonable job
450        i8x16::new([
451          v.as_array()[0] as i8,
452          v.as_array()[1] as i8,
453          v.as_array()[2] as i8,
454          v.as_array()[3] as i8,
455          v.as_array()[4] as i8,
456          v.as_array()[5] as i8,
457          v.as_array()[6] as i8,
458          v.as_array()[7] as i8,
459          v.as_array()[8] as i8,
460          v.as_array()[9] as i8,
461          v.as_array()[10] as i8,
462          v.as_array()[11] as i8,
463          v.as_array()[12] as i8,
464          v.as_array()[13] as i8,
465          v.as_array()[14] as i8,
466          v.as_array()[15] as i8,
467        ])
468      }
469    }
470  }
471
472  #[inline]
473  #[must_use]
474  pub fn blend(self, t: Self, f: Self) -> Self {
475    pick! {
476      if #[cfg(target_feature="sse4.1")] {
477        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
478      } else if #[cfg(target_feature="simd128")] {
479        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
480      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
481        unsafe {Self { neon: vbslq_s8(vreinterpretq_u8_s8(self.neon), t.neon, f.neon) }}
482      } else {
483        generic_bit_blend(self, t, f)
484      }
485    }
486  }
487  #[inline]
488  #[must_use]
489  pub fn abs(self) -> Self {
490    pick! {
491      if #[cfg(target_feature="ssse3")] {
492        Self { sse: abs_i8_m128i(self.sse) }
493      } else if #[cfg(target_feature="simd128")] {
494        Self { simd: i8x16_abs(self.simd) }
495      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
496        unsafe {Self { neon: vabsq_s8(self.neon) }}
497      } else {
498        let arr: [i8; 16] = cast(self);
499        cast([
500          arr[0].wrapping_abs(),
501          arr[1].wrapping_abs(),
502          arr[2].wrapping_abs(),
503          arr[3].wrapping_abs(),
504          arr[4].wrapping_abs(),
505          arr[5].wrapping_abs(),
506          arr[6].wrapping_abs(),
507          arr[7].wrapping_abs(),
508          arr[8].wrapping_abs(),
509          arr[9].wrapping_abs(),
510          arr[10].wrapping_abs(),
511          arr[11].wrapping_abs(),
512          arr[12].wrapping_abs(),
513          arr[13].wrapping_abs(),
514          arr[14].wrapping_abs(),
515          arr[15].wrapping_abs(),
516        ])
517      }
518    }
519  }
520
521  #[inline]
522  #[must_use]
523  pub fn unsigned_abs(self) -> u8x16 {
524    pick! {
525      if #[cfg(target_feature="ssse3")] {
526        u8x16 { sse: abs_i8_m128i(self.sse) }
527      } else if #[cfg(target_feature="simd128")] {
528        u8x16 { simd: i8x16_abs(self.simd) }
529      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
530        unsafe { u8x16 { neon: vreinterpretq_u8_s8(vabsq_s8(self.neon)) }}
531      } else {
532        let arr: [i8; 16] = cast(self);
533        cast(
534          [
535            arr[0].unsigned_abs(),
536            arr[1].unsigned_abs(),
537            arr[2].unsigned_abs(),
538            arr[3].unsigned_abs(),
539            arr[4].unsigned_abs(),
540            arr[5].unsigned_abs(),
541            arr[6].unsigned_abs(),
542            arr[7].unsigned_abs(),
543            arr[8].unsigned_abs(),
544            arr[9].unsigned_abs(),
545            arr[10].unsigned_abs(),
546            arr[11].unsigned_abs(),
547            arr[12].unsigned_abs(),
548            arr[13].unsigned_abs(),
549            arr[14].unsigned_abs(),
550            arr[15].unsigned_abs(),
551            ])
552      }
553    }
554  }
555
556  #[inline]
557  #[must_use]
558  pub fn max(self, rhs: Self) -> Self {
559    pick! {
560      if #[cfg(target_feature="sse4.1")] {
561        Self { sse: max_i8_m128i(self.sse, rhs.sse) }
562      } else if #[cfg(target_feature="simd128")] {
563        Self { simd: i8x16_max(self.simd, rhs.simd) }
564      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
565        unsafe {Self { neon: vmaxq_s8(self.neon, rhs.neon) }}
566      } else {
567        self.simd_lt(rhs).blend(rhs, self)
568      }
569    }
570  }
571  #[inline]
572  #[must_use]
573  pub fn min(self, rhs: Self) -> Self {
574    pick! {
575      if #[cfg(target_feature="sse4.1")] {
576        Self { sse: min_i8_m128i(self.sse, rhs.sse) }
577      } else if #[cfg(target_feature="simd128")] {
578        Self { simd: i8x16_min(self.simd, rhs.simd) }
579      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
580        unsafe {Self { neon: vminq_s8(self.neon, rhs.neon) }}
581      } else {
582        self.simd_lt(rhs).blend(self, rhs)
583      }
584    }
585  }
586
587  #[inline]
588  #[must_use]
589  pub fn from_slice_unaligned(input: &[i8]) -> Self {
590    assert!(input.len() >= 16);
591
592    pick! {
593      if #[cfg(target_feature="sse2")] {
594        unsafe { Self { sse: load_unaligned_m128i( &*(input.as_ptr() as * const [u8;16]) ) } }
595      } else if #[cfg(target_feature="simd128")] {
596        unsafe { Self { simd: v128_load(input.as_ptr() as *const v128 ) } }
597      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
598        unsafe { Self { neon: vld1q_s8( input.as_ptr() as *const i8 ) } }
599      } else {
600        // 2018 edition doesn't have try_into
601        unsafe { Self::new( *(input.as_ptr() as * const [i8;16]) ) }
602      }
603    }
604  }
605
606  #[inline]
607  #[must_use]
608  #[doc(alias("movemask", "move_mask"))]
609  pub fn to_bitmask(self) -> u32 {
610    pick! {
611      if #[cfg(target_feature="sse2")] {
612        move_mask_i8_m128i(self.sse) as u32
613      } else if #[cfg(target_feature="simd128")] {
614        i8x16_bitmask(self.simd) as u32
615      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
616        unsafe
617        {
618          // set all to 1 if top bit is set, else 0
619          let masked = vcltq_s8(self.neon, vdupq_n_s8(0));
620
621          // select the right bit out of each lane
622          let selectbit : uint8x16_t = core::mem::transmute([1u8, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128]);
623          let out = vandq_u8(masked, selectbit);
624
625          // interleave the lanes so that a 16-bit sum accumulates the bits in the right order
626          let table : uint8x16_t = core::mem::transmute([0u8, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15]);
627          let r = vqtbl1q_u8(out, table);
628
629          // horizontally add the 16-bit lanes
630          vaddvq_u16(vreinterpretq_u16_u8(r)) as u32
631        }
632       } else {
633        ((self.arr[0] < 0) as u32) << 0 |
634        ((self.arr[1] < 0) as u32) << 1 |
635        ((self.arr[2] < 0) as u32) << 2 |
636        ((self.arr[3] < 0) as u32) << 3 |
637        ((self.arr[4] < 0) as u32) << 4 |
638        ((self.arr[5] < 0) as u32) << 5 |
639        ((self.arr[6] < 0) as u32) << 6 |
640        ((self.arr[7] < 0) as u32) << 7 |
641        ((self.arr[8] < 0) as u32) << 8 |
642        ((self.arr[9] < 0) as u32) << 9 |
643        ((self.arr[10] < 0) as u32) << 10 |
644        ((self.arr[11] < 0) as u32) << 11 |
645        ((self.arr[12] < 0) as u32) << 12 |
646        ((self.arr[13] < 0) as u32) << 13 |
647        ((self.arr[14] < 0) as u32) << 14 |
648        ((self.arr[15] < 0) as u32) << 15
649      }
650    }
651  }
652
653  #[inline]
654  #[must_use]
655  pub fn any(self) -> bool {
656    pick! {
657      if #[cfg(target_feature="sse2")] {
658        move_mask_i8_m128i(self.sse) != 0
659      } else if #[cfg(target_feature="simd128")] {
660        u8x16_bitmask(self.simd) != 0
661      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
662        unsafe {
663          vminvq_s8(self.neon) < 0
664        }
665      } else {
666        let v : [u64;2] = cast(self);
667        ((v[0] | v[1]) & 0x80808080808080) != 0
668      }
669    }
670  }
671  #[inline]
672  #[must_use]
673  pub fn all(self) -> bool {
674    pick! {
675      if #[cfg(target_feature="sse2")] {
676        move_mask_i8_m128i(self.sse) == 0b1111_1111_1111_1111
677      } else if #[cfg(target_feature="simd128")] {
678        u8x16_bitmask(self.simd) == 0b1111_1111_1111_1111
679      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
680        unsafe {
681          vmaxvq_s8(self.neon) < 0
682        }
683      } else {
684        let v : [u64;2] = cast(self);
685        (v[0] & v[1] & 0x80808080808080) == 0x80808080808080
686      }
687    }
688  }
689
690  /// Returns a new vector where each element is based on the index values in
691  /// `rhs`.
692  ///
693  /// * Index values in the range `[0, 15]` select the i-th element of `self`.
694  /// * Index values that are out of range will cause that output lane to be
695  ///   `0`.
696  #[inline]
697  pub fn swizzle(self, rhs: i8x16) -> i8x16 {
698    pick! {
699      if #[cfg(target_feature="ssse3")] {
700        Self { sse: shuffle_av_i8z_all_m128i(self.sse, add_saturating_u8_m128i(rhs.sse, set_splat_i8_m128i(0x70))) }
701      } else if #[cfg(target_feature="simd128")] {
702        Self { simd: i8x16_swizzle(self.simd, rhs.simd) }
703      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
704        unsafe { Self { neon: vqtbl1q_s8(self.neon, vreinterpretq_u8_s8(rhs.neon)) } }
705      } else {
706        let idxs = rhs.to_array();
707        let arr = self.to_array();
708        let mut out = [0i8;16];
709        for i in 0..16 {
710          let idx = idxs[i] as usize;
711          if idx >= 16 {
712            out[i] = 0;
713          } else {
714            out[i] = arr[idx];
715          }
716        }
717        Self::new(out)
718      }
719    }
720  }
721
722  /// Works like [`swizzle`](Self::swizzle) with the following additional
723  /// details
724  ///
725  /// * Indices in the range `[0, 15]` will select the i-th element of `self`.
726  /// * If the high bit of any index is set (meaning that the index is
727  ///   negative), then the corresponding output lane is guaranteed to be zero.
728  /// * Otherwise the output lane is either `0` or `self[rhs[i] % 16]`,
729  ///   depending on the implementation.
730  #[inline]
731  pub fn swizzle_relaxed(self, rhs: i8x16) -> i8x16 {
732    pick! {
733      if #[cfg(target_feature="ssse3")] {
734        Self { sse: shuffle_av_i8z_all_m128i(self.sse, rhs.sse) }
735      } else if #[cfg(target_feature="simd128")] {
736        Self { simd: i8x16_swizzle(self.simd, rhs.simd) }
737      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
738        unsafe { Self { neon: vqtbl1q_s8(self.neon, vreinterpretq_u8_s8(rhs.neon)) } }
739      } else {
740        let idxs = rhs.to_array();
741        let arr = self.to_array();
742        let mut out = [0i8;16];
743        for i in 0..16 {
744          let idx = idxs[i] as usize;
745          if idx >= 16 {
746            out[i] = 0;
747          } else {
748            out[i] = arr[idx];
749          }
750        }
751        Self::new(out)
752      }
753    }
754  }
755
756  #[inline]
757  #[must_use]
758  pub fn none(self) -> bool {
759    !self.any()
760  }
761
762  #[inline]
763  #[must_use]
764  pub fn saturating_add(self, rhs: Self) -> Self {
765    pick! {
766      if #[cfg(target_feature="sse2")] {
767        Self { sse: add_saturating_i8_m128i(self.sse, rhs.sse) }
768      } else if #[cfg(target_feature="simd128")] {
769        Self { simd: i8x16_add_sat(self.simd, rhs.simd) }
770      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
771        unsafe {Self { neon: vqaddq_s8(self.neon, rhs.neon) }}
772      } else {
773        Self { arr: [
774          self.arr[0].saturating_add(rhs.arr[0]),
775          self.arr[1].saturating_add(rhs.arr[1]),
776          self.arr[2].saturating_add(rhs.arr[2]),
777          self.arr[3].saturating_add(rhs.arr[3]),
778          self.arr[4].saturating_add(rhs.arr[4]),
779          self.arr[5].saturating_add(rhs.arr[5]),
780          self.arr[6].saturating_add(rhs.arr[6]),
781          self.arr[7].saturating_add(rhs.arr[7]),
782          self.arr[8].saturating_add(rhs.arr[8]),
783          self.arr[9].saturating_add(rhs.arr[9]),
784          self.arr[10].saturating_add(rhs.arr[10]),
785          self.arr[11].saturating_add(rhs.arr[11]),
786          self.arr[12].saturating_add(rhs.arr[12]),
787          self.arr[13].saturating_add(rhs.arr[13]),
788          self.arr[14].saturating_add(rhs.arr[14]),
789          self.arr[15].saturating_add(rhs.arr[15]),
790        ]}
791      }
792    }
793  }
794  #[inline]
795  #[must_use]
796  pub fn saturating_sub(self, rhs: Self) -> Self {
797    pick! {
798      if #[cfg(target_feature="sse2")] {
799        Self { sse: sub_saturating_i8_m128i(self.sse, rhs.sse) }
800      } else if #[cfg(target_feature="simd128")] {
801        Self { simd: i8x16_sub_sat(self.simd, rhs.simd) }
802      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
803        unsafe { Self { neon: vqsubq_s8(self.neon, rhs.neon) } }
804      } else {
805        Self { arr: [
806          self.arr[0].saturating_sub(rhs.arr[0]),
807          self.arr[1].saturating_sub(rhs.arr[1]),
808          self.arr[2].saturating_sub(rhs.arr[2]),
809          self.arr[3].saturating_sub(rhs.arr[3]),
810          self.arr[4].saturating_sub(rhs.arr[4]),
811          self.arr[5].saturating_sub(rhs.arr[5]),
812          self.arr[6].saturating_sub(rhs.arr[6]),
813          self.arr[7].saturating_sub(rhs.arr[7]),
814          self.arr[8].saturating_sub(rhs.arr[8]),
815          self.arr[9].saturating_sub(rhs.arr[9]),
816          self.arr[10].saturating_sub(rhs.arr[10]),
817          self.arr[11].saturating_sub(rhs.arr[11]),
818          self.arr[12].saturating_sub(rhs.arr[12]),
819          self.arr[13].saturating_sub(rhs.arr[13]),
820          self.arr[14].saturating_sub(rhs.arr[14]),
821          self.arr[15].saturating_sub(rhs.arr[15]),
822        ]}
823      }
824    }
825  }
826
827  #[inline]
828  pub fn to_array(self) -> [i8; 16] {
829    cast(self)
830  }
831
832  #[inline]
833  pub fn as_array(&self) -> &[i8; 16] {
834    cast_ref(self)
835  }
836
837  #[inline]
838  pub fn as_mut_array(&mut self) -> &mut [i8; 16] {
839    cast_mut(self)
840  }
841}