wide/
u16x8_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct u16x8 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct u16x8 { pub(crate) simd: v128 }
14
15    impl Default for u16x8 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for u16x8 {
22      fn eq(&self, other: &Self) -> bool {
23        u16x8_all_true(u16x8_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for u16x8 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29      use core::arch::aarch64::*;
30      #[repr(C)]
31      #[derive(Copy, Clone)]
32      pub struct u16x8 { pub(crate) neon : uint16x8_t }
33
34      impl Default for u16x8 {
35        #[inline]
36        fn default() -> Self {
37          Self::splat(0)
38        }
39      }
40
41      impl PartialEq for u16x8 {
42        #[inline]
43        fn eq(&self, other: &Self) -> bool {
44          unsafe { vminvq_u16(vceqq_u16(self.neon, other.neon))==u16::MAX }
45        }
46      }
47
48      impl Eq for u16x8 { }
49  } else {
50    #[derive(Default, Clone, Copy, PartialEq, Eq)]
51    #[repr(C, align(16))]
52    pub struct u16x8 { pub(crate) arr: [u16;8] }
53  }
54}
55
56int_uint_consts!(u16, 8, u16x8, 128);
57
58unsafe impl Zeroable for u16x8 {}
59unsafe impl Pod for u16x8 {}
60
61impl AlignTo for u16x8 {
62  type Elem = u16;
63}
64
65impl Add for u16x8 {
66  type Output = Self;
67  #[inline]
68  fn add(self, rhs: Self) -> Self::Output {
69    pick! {
70      if #[cfg(target_feature="sse2")] {
71        Self { sse: add_i16_m128i(self.sse, rhs.sse) }
72      } else if #[cfg(target_feature="simd128")] {
73        Self { simd: u16x8_add(self.simd, rhs.simd) }
74      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
75        unsafe { Self { neon: vaddq_u16(self.neon, rhs.neon) } }
76      } else {
77        Self { arr: [
78          self.arr[0].wrapping_add(rhs.arr[0]),
79          self.arr[1].wrapping_add(rhs.arr[1]),
80          self.arr[2].wrapping_add(rhs.arr[2]),
81          self.arr[3].wrapping_add(rhs.arr[3]),
82          self.arr[4].wrapping_add(rhs.arr[4]),
83          self.arr[5].wrapping_add(rhs.arr[5]),
84          self.arr[6].wrapping_add(rhs.arr[6]),
85          self.arr[7].wrapping_add(rhs.arr[7]),
86        ]}
87      }
88    }
89  }
90}
91
92impl Sub for u16x8 {
93  type Output = Self;
94  #[inline]
95  fn sub(self, rhs: Self) -> Self::Output {
96    pick! {
97      if #[cfg(target_feature="sse2")] {
98        Self { sse: sub_i16_m128i(self.sse, rhs.sse) }
99      } else if #[cfg(target_feature="simd128")] {
100        Self { simd: u16x8_sub(self.simd, rhs.simd) }
101      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
102        unsafe {Self { neon: vsubq_u16(self.neon, rhs.neon) }}
103      } else {
104        Self { arr: [
105          self.arr[0].wrapping_sub(rhs.arr[0]),
106          self.arr[1].wrapping_sub(rhs.arr[1]),
107          self.arr[2].wrapping_sub(rhs.arr[2]),
108          self.arr[3].wrapping_sub(rhs.arr[3]),
109          self.arr[4].wrapping_sub(rhs.arr[4]),
110          self.arr[5].wrapping_sub(rhs.arr[5]),
111          self.arr[6].wrapping_sub(rhs.arr[6]),
112          self.arr[7].wrapping_sub(rhs.arr[7]),
113        ]}
114      }
115    }
116  }
117}
118
119impl Mul for u16x8 {
120  type Output = Self;
121  #[inline]
122  fn mul(self, rhs: Self) -> Self::Output {
123    pick! {
124      if #[cfg(target_feature="sse2")] {
125        Self { sse: mul_i16_keep_low_m128i(self.sse, rhs.sse) }
126      } else if #[cfg(target_feature="simd128")] {
127        Self { simd: u16x8_mul(self.simd, rhs.simd) }
128      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
129        unsafe {Self { neon: vmulq_u16(self.neon, rhs.neon) }}
130      } else {
131        Self { arr: [
132          self.arr[0].wrapping_mul(rhs.arr[0]),
133          self.arr[1].wrapping_mul(rhs.arr[1]),
134          self.arr[2].wrapping_mul(rhs.arr[2]),
135          self.arr[3].wrapping_mul(rhs.arr[3]),
136          self.arr[4].wrapping_mul(rhs.arr[4]),
137          self.arr[5].wrapping_mul(rhs.arr[5]),
138          self.arr[6].wrapping_mul(rhs.arr[6]),
139          self.arr[7].wrapping_mul(rhs.arr[7]),
140        ]}
141      }
142    }
143  }
144}
145
146impl Add<u16> for u16x8 {
147  type Output = Self;
148  #[inline]
149  fn add(self, rhs: u16) -> Self::Output {
150    self.add(Self::splat(rhs))
151  }
152}
153
154impl Sub<u16> for u16x8 {
155  type Output = Self;
156  #[inline]
157  fn sub(self, rhs: u16) -> Self::Output {
158    self.sub(Self::splat(rhs))
159  }
160}
161
162impl Mul<u16> for u16x8 {
163  type Output = Self;
164  #[inline]
165  fn mul(self, rhs: u16) -> Self::Output {
166    self.mul(Self::splat(rhs))
167  }
168}
169
170impl Add<u16x8> for u16 {
171  type Output = u16x8;
172  #[inline]
173  fn add(self, rhs: u16x8) -> Self::Output {
174    u16x8::splat(self).add(rhs)
175  }
176}
177
178impl Sub<u16x8> for u16 {
179  type Output = u16x8;
180  #[inline]
181  fn sub(self, rhs: u16x8) -> Self::Output {
182    u16x8::splat(self).sub(rhs)
183  }
184}
185
186impl Mul<u16x8> for u16 {
187  type Output = u16x8;
188  #[inline]
189  fn mul(self, rhs: u16x8) -> Self::Output {
190    u16x8::splat(self).mul(rhs)
191  }
192}
193
194impl BitAnd for u16x8 {
195  type Output = Self;
196  #[inline]
197  fn bitand(self, rhs: Self) -> Self::Output {
198    pick! {
199      if #[cfg(target_feature="sse2")] {
200        Self { sse: bitand_m128i(self.sse, rhs.sse) }
201      } else if #[cfg(target_feature="simd128")] {
202        Self { simd: v128_and(self.simd, rhs.simd) }
203      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
204        unsafe {Self { neon: vandq_u16(self.neon, rhs.neon) }}
205      } else {
206        Self { arr: [
207          self.arr[0].bitand(rhs.arr[0]),
208          self.arr[1].bitand(rhs.arr[1]),
209          self.arr[2].bitand(rhs.arr[2]),
210          self.arr[3].bitand(rhs.arr[3]),
211          self.arr[4].bitand(rhs.arr[4]),
212          self.arr[5].bitand(rhs.arr[5]),
213          self.arr[6].bitand(rhs.arr[6]),
214          self.arr[7].bitand(rhs.arr[7]),
215        ]}
216      }
217    }
218  }
219}
220
221impl BitOr for u16x8 {
222  type Output = Self;
223  #[inline]
224  fn bitor(self, rhs: Self) -> Self::Output {
225    pick! {
226      if #[cfg(target_feature="sse2")] {
227        Self { sse: bitor_m128i(self.sse, rhs.sse) }
228      } else if #[cfg(target_feature="simd128")] {
229        Self { simd: v128_or(self.simd, rhs.simd) }
230      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
231        unsafe {Self { neon: vorrq_u16(self.neon, rhs.neon) }}
232      } else {
233        Self { arr: [
234          self.arr[0].bitor(rhs.arr[0]),
235          self.arr[1].bitor(rhs.arr[1]),
236          self.arr[2].bitor(rhs.arr[2]),
237          self.arr[3].bitor(rhs.arr[3]),
238          self.arr[4].bitor(rhs.arr[4]),
239          self.arr[5].bitor(rhs.arr[5]),
240          self.arr[6].bitor(rhs.arr[6]),
241          self.arr[7].bitor(rhs.arr[7]),
242        ]}
243      }
244    }
245  }
246}
247
248impl BitXor for u16x8 {
249  type Output = Self;
250  #[inline]
251  fn bitxor(self, rhs: Self) -> Self::Output {
252    pick! {
253      if #[cfg(target_feature="sse2")] {
254        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
255      } else if #[cfg(target_feature="simd128")] {
256        Self { simd: v128_xor(self.simd, rhs.simd) }
257      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
258        unsafe {Self { neon: veorq_u16(self.neon, rhs.neon) }}
259      } else {
260        Self { arr: [
261          self.arr[0].bitxor(rhs.arr[0]),
262          self.arr[1].bitxor(rhs.arr[1]),
263          self.arr[2].bitxor(rhs.arr[2]),
264          self.arr[3].bitxor(rhs.arr[3]),
265          self.arr[4].bitxor(rhs.arr[4]),
266          self.arr[5].bitxor(rhs.arr[5]),
267          self.arr[6].bitxor(rhs.arr[6]),
268          self.arr[7].bitxor(rhs.arr[7]),
269        ]}
270      }
271    }
272  }
273}
274
275macro_rules! impl_shl_t_for_u16x8 {
276  ($($shift_type:ty),+ $(,)?) => {
277    $(impl Shl<$shift_type> for u16x8 {
278      type Output = Self;
279      /// Shifts all lanes by the value given.
280      #[inline]
281      fn shl(self, rhs: $shift_type) -> Self::Output {
282        pick! {
283          if #[cfg(target_feature="sse2")] {
284            let shift = cast([rhs as u64, 0]);
285            Self { sse: shl_all_u16_m128i(self.sse, shift) }
286          } else if #[cfg(target_feature="simd128")] {
287            Self { simd: u16x8_shl(self.simd, rhs as u32) }
288          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
289            unsafe {Self { neon: vshlq_u16(self.neon, vmovq_n_s16(rhs as i16)) }}
290          } else {
291            let u = rhs as u32;
292            Self { arr: [
293              self.arr[0].wrapping_shl(u),
294              self.arr[1].wrapping_shl(u),
295              self.arr[2].wrapping_shl(u),
296              self.arr[3].wrapping_shl(u),
297              self.arr[4].wrapping_shl(u),
298              self.arr[5].wrapping_shl(u),
299              self.arr[6].wrapping_shl(u),
300              self.arr[7].wrapping_shl(u),
301            ]}
302          }
303        }
304      }
305    })+
306  };
307}
308impl_shl_t_for_u16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
309
310macro_rules! impl_shr_t_for_u16x8 {
311  ($($shift_type:ty),+ $(,)?) => {
312    $(impl Shr<$shift_type> for u16x8 {
313      type Output = Self;
314      /// Shifts all lanes by the value given.
315      #[inline]
316      fn shr(self, rhs: $shift_type) -> Self::Output {
317        pick! {
318          if #[cfg(target_feature="sse2")] {
319            let shift = cast([rhs as u64, 0]);
320            Self { sse: shr_all_u16_m128i(self.sse, shift) }
321          } else if #[cfg(target_feature="simd128")] {
322            Self { simd: u16x8_shr(self.simd, rhs as u32) }
323          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
324            unsafe {Self { neon: vshlq_u16(self.neon, vmovq_n_s16( -(rhs as i16))) }}
325          } else {
326            let u = rhs as u32;
327            Self { arr: [
328              self.arr[0].wrapping_shr(u),
329              self.arr[1].wrapping_shr(u),
330              self.arr[2].wrapping_shr(u),
331              self.arr[3].wrapping_shr(u),
332              self.arr[4].wrapping_shr(u),
333              self.arr[5].wrapping_shr(u),
334              self.arr[6].wrapping_shr(u),
335              self.arr[7].wrapping_shr(u),
336            ]}
337          }
338        }
339      }
340    })+
341  };
342}
343impl_shr_t_for_u16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
344
345impl CmpEq for u16x8 {
346  type Output = Self;
347  #[inline]
348  fn simd_eq(self, rhs: Self) -> Self::Output {
349    Self::simd_eq(self, rhs)
350  }
351}
352
353impl CmpGt for u16x8 {
354  type Output = Self;
355  #[inline]
356  fn simd_gt(self, rhs: Self) -> Self::Output {
357    Self::simd_gt(self, rhs)
358  }
359}
360
361impl CmpLt for u16x8 {
362  type Output = Self;
363  #[inline]
364  fn simd_lt(self, rhs: Self) -> Self::Output {
365    // no lt, so reverse gt
366    Self::simd_gt(rhs, self)
367  }
368}
369
370impl u16x8 {
371  #[inline]
372  #[must_use]
373  pub const fn new(array: [u16; 8]) -> Self {
374    unsafe { core::mem::transmute(array) }
375  }
376  #[inline]
377  #[must_use]
378  pub fn simd_eq(self, rhs: Self) -> Self {
379    pick! {
380      if #[cfg(target_feature="sse2")] {
381        Self { sse: cmp_eq_mask_i16_m128i(self.sse, rhs.sse) }
382      } else if #[cfg(target_feature="simd128")] {
383        Self { simd: u16x8_eq(self.simd, rhs.simd) }
384      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
385        unsafe {Self { neon: vceqq_u16(self.neon, rhs.neon) }}
386      } else {
387        Self { arr: [
388          if self.arr[0] == rhs.arr[0] { u16::MAX } else { 0 },
389          if self.arr[1] == rhs.arr[1] { u16::MAX } else { 0 },
390          if self.arr[2] == rhs.arr[2] { u16::MAX } else { 0 },
391          if self.arr[3] == rhs.arr[3] { u16::MAX } else { 0 },
392          if self.arr[4] == rhs.arr[4] { u16::MAX } else { 0 },
393          if self.arr[5] == rhs.arr[5] { u16::MAX } else { 0 },
394          if self.arr[6] == rhs.arr[6] { u16::MAX } else { 0 },
395          if self.arr[7] == rhs.arr[7] { u16::MAX } else { 0 },
396        ]}
397      }
398    }
399  }
400  #[inline]
401  #[must_use]
402  pub fn simd_gt(self, rhs: Self) -> Self {
403    pick! {
404      if #[cfg(target_feature = "sse2")] {
405        use safe_arch::*;
406
407        let bias = m128i::from([0x8000u16; 8]);
408
409        let a_biased = sub_i16_m128i(self.sse, bias);
410        let b_biased = sub_i16_m128i(rhs.sse, bias);
411        let mask = cmp_gt_mask_i16_m128i(a_biased, b_biased);
412
413        Self { sse: mask }
414      } else if #[cfg(target_feature="simd128")] {
415        Self { simd: u16x8_gt(self.simd, rhs.simd) }
416      } else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
417        unsafe {
418          use core::arch::aarch64::*;
419          Self {
420            neon: vcgtq_u16(self.neon, rhs.neon),
421          }
422        }
423      } else {
424        Self {
425          arr: [
426            if self.arr[0] > rhs.arr[0] { u16::MAX } else { 0 },
427            if self.arr[1] > rhs.arr[1] { u16::MAX } else { 0 },
428            if self.arr[2] > rhs.arr[2] { u16::MAX } else { 0 },
429            if self.arr[3] > rhs.arr[3] { u16::MAX } else { 0 },
430            if self.arr[4] > rhs.arr[4] { u16::MAX } else { 0 },
431            if self.arr[5] > rhs.arr[5] { u16::MAX } else { 0 },
432            if self.arr[6] > rhs.arr[6] { u16::MAX } else { 0 },
433            if self.arr[7] > rhs.arr[7] { u16::MAX } else { 0 },
434          ]
435        }
436      }
437    }
438  }
439  #[inline]
440  #[must_use]
441  pub fn blend(self, t: Self, f: Self) -> Self {
442    pick! {
443      if #[cfg(target_feature="sse4.1")] {
444        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
445      } else if #[cfg(target_feature="simd128")] {
446        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
447      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
448        unsafe {Self { neon: vbslq_u16(self.neon, t.neon, f.neon) }}
449      } else {
450        generic_bit_blend(self, t, f)
451      }
452    }
453  }
454  #[inline]
455  #[must_use]
456  pub fn max(self, rhs: Self) -> Self {
457    pick! {
458      if #[cfg(target_feature="sse4.1")] {
459        Self { sse: max_u16_m128i(self.sse, rhs.sse) }
460      } else if #[cfg(target_feature="simd128")] {
461        Self { simd: u16x8_max(self.simd, rhs.simd) }
462      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
463        unsafe {Self { neon: vmaxq_u16(self.neon, rhs.neon) }}
464      } else {
465        let arr: [u16; 8] = cast(self);
466        let rhs: [u16; 8] = cast(rhs);
467        cast([
468          arr[0].max(rhs[0]),
469          arr[1].max(rhs[1]),
470          arr[2].max(rhs[2]),
471          arr[3].max(rhs[3]),
472          arr[4].max(rhs[4]),
473          arr[5].max(rhs[5]),
474          arr[6].max(rhs[6]),
475          arr[7].max(rhs[7]),
476        ])
477      }
478    }
479  }
480  #[inline]
481  #[must_use]
482  pub fn min(self, rhs: Self) -> Self {
483    pick! {
484      if #[cfg(target_feature="sse4.1")] {
485        Self { sse: min_u16_m128i(self.sse, rhs.sse) }
486      } else if #[cfg(target_feature="simd128")] {
487        Self { simd: u16x8_min(self.simd, rhs.simd) }
488      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
489        unsafe {Self { neon: vminq_u16(self.neon, rhs.neon) }}
490      } else {
491        let arr: [u16; 8] = cast(self);
492        let rhs: [u16; 8] = cast(rhs);
493        cast([
494          arr[0].min(rhs[0]),
495          arr[1].min(rhs[1]),
496          arr[2].min(rhs[2]),
497          arr[3].min(rhs[3]),
498          arr[4].min(rhs[4]),
499          arr[5].min(rhs[5]),
500          arr[6].min(rhs[6]),
501          arr[7].min(rhs[7]),
502        ])
503      }
504    }
505  }
506
507  #[inline]
508  #[must_use]
509  pub fn saturating_add(self, rhs: Self) -> Self {
510    pick! {
511      if #[cfg(target_feature="sse2")] {
512        Self { sse: add_saturating_u16_m128i(self.sse, rhs.sse) }
513      } else if #[cfg(target_feature="simd128")] {
514        Self { simd: u16x8_add_sat(self.simd, rhs.simd) }
515      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
516        unsafe {Self { neon: vqaddq_u16(self.neon, rhs.neon) }}
517      } else {
518        Self { arr: [
519          self.arr[0].saturating_add(rhs.arr[0]),
520          self.arr[1].saturating_add(rhs.arr[1]),
521          self.arr[2].saturating_add(rhs.arr[2]),
522          self.arr[3].saturating_add(rhs.arr[3]),
523          self.arr[4].saturating_add(rhs.arr[4]),
524          self.arr[5].saturating_add(rhs.arr[5]),
525          self.arr[6].saturating_add(rhs.arr[6]),
526          self.arr[7].saturating_add(rhs.arr[7]),
527        ]}
528      }
529    }
530  }
531  #[inline]
532  #[must_use]
533  pub fn saturating_sub(self, rhs: Self) -> Self {
534    pick! {
535      if #[cfg(target_feature="sse2")] {
536        Self { sse: sub_saturating_u16_m128i(self.sse, rhs.sse) }
537      } else if #[cfg(target_feature="simd128")] {
538        Self { simd: u16x8_sub_sat(self.simd, rhs.simd) }
539      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
540        unsafe {Self { neon: vqsubq_u16(self.neon, rhs.neon) }}
541      } else {
542        Self { arr: [
543          self.arr[0].saturating_sub(rhs.arr[0]),
544          self.arr[1].saturating_sub(rhs.arr[1]),
545          self.arr[2].saturating_sub(rhs.arr[2]),
546          self.arr[3].saturating_sub(rhs.arr[3]),
547          self.arr[4].saturating_sub(rhs.arr[4]),
548          self.arr[5].saturating_sub(rhs.arr[5]),
549          self.arr[6].saturating_sub(rhs.arr[6]),
550          self.arr[7].saturating_sub(rhs.arr[7]),
551        ]}
552      }
553    }
554  }
555
556  /// Unpack the lower half of the input and zero expand it to `u16` values.
557  #[inline]
558  #[must_use]
559  pub fn from_u8x16_low(u: u8x16) -> Self {
560    pick! {
561      if #[cfg(target_feature="sse2")] {
562        Self{ sse: unpack_low_i8_m128i(u.sse, m128i::zeroed()) }
563      } else {
564        let u_arr: [u8; 16] = cast(u);
565        cast([
566          u_arr[0] as u16,
567          u_arr[1] as u16,
568          u_arr[2] as u16,
569          u_arr[3] as u16,
570          u_arr[4] as u16,
571          u_arr[5] as u16,
572          u_arr[6] as u16,
573          u_arr[7] as u16,
574        ])
575      }
576    }
577  }
578
579  /// Unpack the upper half of the input and zero expand it to `u16` values.
580  #[inline]
581  #[must_use]
582  pub fn from_u8x16_high(u: u8x16) -> Self {
583    pick! {
584      if #[cfg(target_feature="sse2")] {
585        Self{ sse: unpack_high_i8_m128i(u.sse, m128i::zeroed()) }
586      } else {
587        let u_arr: [u8; 16] = cast(u);
588        cast([
589          u_arr[8] as u16,
590          u_arr[9] as u16,
591          u_arr[10] as u16,
592          u_arr[11] as u16,
593          u_arr[12] as u16,
594          u_arr[13] as u16,
595          u_arr[14] as u16,
596          u_arr[15] as u16,
597        ])
598      }
599    }
600  }
601
602  /// multiplies two u16x8 and returns the result as a widened u32x8
603  #[inline]
604  #[must_use]
605  pub fn mul_widen(self, rhs: Self) -> u32x8 {
606    pick! {
607      if #[cfg(target_feature="avx2")] {
608        let a = convert_to_i32_m256i_from_u16_m128i(self.sse);
609        let b = convert_to_i32_m256i_from_u16_m128i(rhs.sse);
610        u32x8 { avx2: mul_i32_keep_low_m256i(a,b) }
611      } else if #[cfg(target_feature="sse2")] {
612         let low = mul_i16_keep_low_m128i(self.sse, rhs.sse);
613         let high = mul_u16_keep_high_m128i(self.sse, rhs.sse);
614         u32x8 {
615          a: u32x4 { sse:unpack_low_i16_m128i(low, high) },
616          b: u32x4 { sse:unpack_high_i16_m128i(low, high) }
617        }
618      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
619         let lhs_low = unsafe { vget_low_u16(self.neon) };
620         let rhs_low = unsafe { vget_low_u16(rhs.neon) };
621
622         let lhs_high = unsafe { vget_high_u16(self.neon) };
623         let rhs_high = unsafe { vget_high_u16(rhs.neon) };
624
625         let low = unsafe { vmull_u16(lhs_low, rhs_low) };
626         let high = unsafe { vmull_u16(lhs_high, rhs_high) };
627
628         u32x8 { a: u32x4 { neon: low }, b: u32x4 {neon: high } }
629       } else {
630        let a = self.as_array();
631        let b = rhs.as_array();
632         u32x8::new([
633           u32::from(a[0]) * u32::from(b[0]),
634           u32::from(a[1]) * u32::from(b[1]),
635           u32::from(a[2]) * u32::from(b[2]),
636           u32::from(a[3]) * u32::from(b[3]),
637           u32::from(a[4]) * u32::from(b[4]),
638           u32::from(a[5]) * u32::from(b[5]),
639           u32::from(a[6]) * u32::from(b[6]),
640           u32::from(a[7]) * u32::from(b[7]),
641         ])
642       }
643    }
644  }
645
646  /// Multiples two `u16x8` and return the high part of intermediate `u32x8`
647  #[inline]
648  #[must_use]
649  pub fn mul_keep_high(self, rhs: Self) -> Self {
650    pick! {
651      if #[cfg(target_feature="sse2")] {
652        Self { sse: mul_u16_keep_high_m128i(self.sse, rhs.sse) }
653      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
654        let lhs_low = unsafe { vget_low_u16(self.neon) };
655        let rhs_low = unsafe { vget_low_u16(rhs.neon) };
656
657        let lhs_high = unsafe { vget_high_u16(self.neon) };
658        let rhs_high = unsafe { vget_high_u16(rhs.neon) };
659
660        let low = unsafe { vmull_u16(lhs_low, rhs_low) };
661        let high = unsafe { vmull_u16(lhs_high, rhs_high) };
662
663        u16x8 { neon: unsafe { vuzpq_u16(vreinterpretq_u16_u32(low), vreinterpretq_u16_u32(high)).1 } }
664      } else if #[cfg(target_feature="simd128")] {
665        let low =  u32x4_extmul_low_u16x8(self.simd, rhs.simd);
666        let high = u32x4_extmul_high_u16x8(self.simd, rhs.simd);
667
668        Self { simd: u16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(low, high) }
669      } else {
670        u16x8::new([
671          ((u32::from(rhs.as_array()[0]) * u32::from(self.as_array()[0])) >> 16) as u16,
672          ((u32::from(rhs.as_array()[1]) * u32::from(self.as_array()[1])) >> 16) as u16,
673          ((u32::from(rhs.as_array()[2]) * u32::from(self.as_array()[2])) >> 16) as u16,
674          ((u32::from(rhs.as_array()[3]) * u32::from(self.as_array()[3])) >> 16) as u16,
675          ((u32::from(rhs.as_array()[4]) * u32::from(self.as_array()[4])) >> 16) as u16,
676          ((u32::from(rhs.as_array()[5]) * u32::from(self.as_array()[5])) >> 16) as u16,
677          ((u32::from(rhs.as_array()[6]) * u32::from(self.as_array()[6])) >> 16) as u16,
678          ((u32::from(rhs.as_array()[7]) * u32::from(self.as_array()[7])) >> 16) as u16,
679        ])
680      }
681    }
682  }
683
684  #[inline]
685  #[must_use]
686  #[doc(alias("movemask", "move_mask"))]
687  pub fn to_bitmask(self) -> u32 {
688    i16x8::to_bitmask(cast(self))
689  }
690
691  #[inline]
692  pub fn to_array(self) -> [u16; 8] {
693    cast(self)
694  }
695
696  #[inline]
697  pub fn as_array(&self) -> &[u16; 8] {
698    cast_ref(self)
699  }
700
701  #[inline]
702  pub fn as_mut_array(&mut self) -> &mut [u16; 8] {
703    cast_mut(self)
704  }
705}