wide/
i16x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct i16x16 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct i16x16 { pub(crate) a : i16x8, pub(crate) b : i16x8 }
12  }
13}
14
15int_uint_consts!(i16, 16, i16x16, 256);
16
17unsafe impl Zeroable for i16x16 {}
18unsafe impl Pod for i16x16 {}
19
20impl AlignTo for i16x16 {
21  type Elem = i16;
22}
23
24impl Add for i16x16 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx2")] {
30        Self { avx2: add_i16_m256i(self.avx2, rhs.avx2) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for i16x16 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx2")] {
47        Self { avx2: sub_i16_m256i(self.avx2, rhs.avx2) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Mul for i16x16 {
59  type Output = Self;
60  #[inline]
61  fn mul(self, rhs: Self) -> Self::Output {
62    pick! {
63      if #[cfg(target_feature="avx2")] {
64        Self { avx2: mul_i16_keep_low_m256i(self.avx2, rhs.avx2) }
65      } else {
66        Self {
67          a : self.a.mul(rhs.a),
68          b : self.b.mul(rhs.b),
69        }
70      }
71    }
72  }
73}
74
75impl Add<i16> for i16x16 {
76  type Output = Self;
77  #[inline]
78  fn add(self, rhs: i16) -> Self::Output {
79    self.add(Self::splat(rhs))
80  }
81}
82
83impl Sub<i16> for i16x16 {
84  type Output = Self;
85  #[inline]
86  fn sub(self, rhs: i16) -> Self::Output {
87    self.sub(Self::splat(rhs))
88  }
89}
90
91impl Mul<i16> for i16x16 {
92  type Output = Self;
93  #[inline]
94  fn mul(self, rhs: i16) -> Self::Output {
95    self.mul(Self::splat(rhs))
96  }
97}
98
99impl Add<i16x16> for i16 {
100  type Output = i16x16;
101  #[inline]
102  fn add(self, rhs: i16x16) -> Self::Output {
103    i16x16::splat(self).add(rhs)
104  }
105}
106
107impl Sub<i16x16> for i16 {
108  type Output = i16x16;
109  #[inline]
110  fn sub(self, rhs: i16x16) -> Self::Output {
111    i16x16::splat(self).sub(rhs)
112  }
113}
114
115impl Mul<i16x16> for i16 {
116  type Output = i16x16;
117  #[inline]
118  fn mul(self, rhs: i16x16) -> Self::Output {
119    i16x16::splat(self).mul(rhs)
120  }
121}
122
123impl BitAnd for i16x16 {
124  type Output = Self;
125  #[inline]
126  fn bitand(self, rhs: Self) -> Self::Output {
127    pick! {
128      if #[cfg(target_feature="avx2")] {
129        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
130      } else {
131        Self {
132          a : self.a.bitand(rhs.a),
133          b : self.b.bitand(rhs.b),
134        }
135      }
136    }
137  }
138}
139
140impl BitOr for i16x16 {
141  type Output = Self;
142  #[inline]
143  fn bitor(self, rhs: Self) -> Self::Output {
144    pick! {
145      if #[cfg(target_feature="avx2")] {
146        Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
147      } else {
148        Self {
149          a : self.a.bitor(rhs.a),
150          b : self.b.bitor(rhs.b),
151        }
152      }
153    }
154  }
155}
156
157impl BitXor for i16x16 {
158  type Output = Self;
159  #[inline]
160  fn bitxor(self, rhs: Self) -> Self::Output {
161    pick! {
162      if #[cfg(target_feature="avx2")] {
163        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
164      } else {
165        Self {
166          a : self.a.bitxor(rhs.a),
167          b : self.b.bitxor(rhs.b),
168        }
169      }
170    }
171  }
172}
173
174macro_rules! impl_shl_t_for_i16x16 {
175  ($($shift_type:ty),+ $(,)?) => {
176    $(impl Shl<$shift_type> for i16x16 {
177      type Output = Self;
178      /// Shifts all lanes by the value given.
179      #[inline]
180      fn shl(self, rhs: $shift_type) -> Self::Output {
181        pick! {
182          if #[cfg(target_feature="avx2")] {
183            let shift = cast([rhs as u64, 0]);
184            Self { avx2: shl_all_u16_m256i(self.avx2, shift) }
185          } else {
186            Self {
187              a : self.a.shl(rhs),
188              b : self.b.shl(rhs),
189            }
190          }
191       }
192     }
193    })+
194  };
195}
196impl_shl_t_for_i16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
197
198macro_rules! impl_shr_t_for_i16x16 {
199  ($($shift_type:ty),+ $(,)?) => {
200    $(impl Shr<$shift_type> for i16x16 {
201      type Output = Self;
202      /// Shifts all lanes by the value given.
203      #[inline]
204      fn shr(self, rhs: $shift_type) -> Self::Output {
205        pick! {
206          if #[cfg(target_feature="avx2")] {
207            let shift = cast([rhs as u64, 0]);
208            Self { avx2: shr_all_i16_m256i(self.avx2, shift) }
209          } else {
210            Self {
211              a : self.a.shr(rhs),
212              b : self.b.shr(rhs),
213            }
214          }
215        }
216      }
217    })+
218  };
219}
220impl_shr_t_for_i16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
221
222impl CmpEq for i16x16 {
223  type Output = Self;
224  #[inline]
225  fn simd_eq(self, rhs: Self) -> Self::Output {
226    pick! {
227      if #[cfg(target_feature="avx2")] {
228        Self { avx2: cmp_eq_mask_i16_m256i(self.avx2, rhs.avx2) }
229      } else {
230        Self {
231          a : self.a.simd_eq(rhs.a),
232          b : self.b.simd_eq(rhs.b),
233        }
234      }
235    }
236  }
237}
238
239impl CmpGt for i16x16 {
240  type Output = Self;
241  #[inline]
242  fn simd_gt(self, rhs: Self) -> Self::Output {
243    pick! {
244      if #[cfg(target_feature="avx2")] {
245        Self { avx2: cmp_gt_mask_i16_m256i(self.avx2, rhs.avx2) }
246      } else {
247        Self {
248          a : self.a.simd_gt(rhs.a),
249          b : self.b.simd_gt(rhs.b),
250        }
251      }
252    }
253  }
254}
255
256impl CmpLt for i16x16 {
257  type Output = Self;
258  #[inline]
259  fn simd_lt(self, rhs: Self) -> Self::Output {
260    pick! {
261      if #[cfg(target_feature="avx2")] {
262        Self { avx2: !cmp_gt_mask_i16_m256i(self.avx2, rhs.avx2) ^ cmp_eq_mask_i16_m256i(self.avx2,rhs.avx2) }
263      } else {
264        Self {
265          a : self.a.simd_lt(rhs.a),
266          b : self.b.simd_lt(rhs.b),
267        }
268      }
269    }
270  }
271}
272
273impl From<i8x16> for i16x16 {
274  /// widen with sign extend from i8 to i16
275  #[inline]
276  fn from(i: i8x16) -> Self {
277    i16x16::from_i8x16(i)
278  }
279}
280
281impl From<u8x16> for i16x16 {
282  /// widen with zero extend from u8 to i16
283  #[inline]
284  fn from(i: u8x16) -> Self {
285    cast(u16x16::from(i))
286  }
287}
288
289impl Not for i16x16 {
290  type Output = Self;
291  #[inline]
292  fn not(self) -> Self {
293    pick! {
294      if #[cfg(target_feature="avx2")] {
295        Self { avx2: self.avx2.not()  }
296      } else {
297        Self {
298          a : self.a.not(),
299          b : self.b.not(),
300        }
301      }
302    }
303  }
304}
305
306impl i16x16 {
307  #[inline]
308  #[must_use]
309  pub const fn new(array: [i16; 16]) -> Self {
310    unsafe { core::mem::transmute(array) }
311  }
312
313  #[inline]
314  #[must_use]
315  #[doc(alias("movemask", "move_mask"))]
316  pub fn to_bitmask(self) -> u32 {
317    pick! {
318      if #[cfg(target_feature="sse2")] {
319          let [a,b] = cast::<_,[m128i;2]>(self);
320          move_mask_i8_m128i( pack_i16_to_i8_m128i(a,b)) as u32
321        } else {
322        self.a.to_bitmask() | (self.b.to_bitmask() << 8)
323      }
324    }
325  }
326
327  #[inline]
328  #[must_use]
329  pub fn any(self) -> bool {
330    pick! {
331      if #[cfg(target_feature="avx2")] {
332        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10101010101010101010101010101010) != 0
333      } else {
334        (self.a | self.b).any()
335      }
336    }
337  }
338  #[inline]
339  #[must_use]
340  pub fn all(self) -> bool {
341    pick! {
342      if #[cfg(target_feature="avx2")] {
343        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10101010101010101010101010101010) == 0b10101010101010101010101010101010
344      } else {
345        (self.a & self.b).all()
346      }
347    }
348  }
349  #[inline]
350  #[must_use]
351  pub fn none(self) -> bool {
352    !self.any()
353  }
354
355  /// widens and sign extends to i16x16
356  #[inline]
357  #[must_use]
358  pub fn from_i8x16(v: i8x16) -> Self {
359    pick! {
360      if #[cfg(target_feature="avx2")] {
361        i16x16 { avx2:convert_to_i16_m256i_from_i8_m128i(v.sse) }
362      } else if #[cfg(target_feature="sse4.1")] {
363        i16x16 {
364          a: i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(v.sse) },
365          b: i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(unpack_high_i64_m128i(v.sse, v.sse)) }
366        }
367      } else if #[cfg(target_feature="sse2")] {
368        i16x16 {
369          a: i16x8 { sse: shr_imm_i16_m128i::<8>( unpack_low_i8_m128i(v.sse, v.sse)) },
370          b: i16x8 { sse: shr_imm_i16_m128i::<8>( unpack_high_i8_m128i(v.sse, v.sse)) },
371        }
372      } else {
373
374        i16x16::new([
375          v.as_array()[0] as i16,
376          v.as_array()[1] as i16,
377          v.as_array()[2] as i16,
378          v.as_array()[3] as i16,
379          v.as_array()[4] as i16,
380          v.as_array()[5] as i16,
381          v.as_array()[6] as i16,
382          v.as_array()[7] as i16,
383          v.as_array()[8] as i16,
384          v.as_array()[9] as i16,
385          v.as_array()[10] as i16,
386          v.as_array()[11] as i16,
387          v.as_array()[12] as i16,
388          v.as_array()[13] as i16,
389          v.as_array()[14] as i16,
390          v.as_array()[15] as i16,
391          ])
392      }
393    }
394  }
395
396  #[inline]
397  #[must_use]
398  pub fn blend(self, t: Self, f: Self) -> Self {
399    pick! {
400      if #[cfg(target_feature="avx2")] {
401        Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
402      } else {
403        Self {
404          a : self.a.blend(t.a, f.a),
405          b : self.b.blend(t.b, f.b),
406        }
407      }
408    }
409  }
410
411  /// horizontal add of all the elements of the vector
412  #[inline]
413  #[must_use]
414  pub fn reduce_add(self) -> i16 {
415    let arr: [i16x8; 2] = cast(self);
416
417    (arr[0] + arr[1]).reduce_add()
418  }
419
420  /// horizontal min of all the elements of the vector
421  #[inline]
422  #[must_use]
423  pub fn reduce_min(self) -> i16 {
424    let arr: [i16x8; 2] = cast(self);
425
426    arr[0].min(arr[1]).reduce_min()
427  }
428
429  /// horizontal max of all the elements of the vector
430  #[inline]
431  #[must_use]
432  pub fn reduce_max(self) -> i16 {
433    let arr: [i16x8; 2] = cast(self);
434
435    arr[0].max(arr[1]).reduce_max()
436  }
437
438  #[inline]
439  #[must_use]
440  pub fn abs(self) -> Self {
441    pick! {
442      if #[cfg(target_feature="avx2")] {
443        Self { avx2: abs_i16_m256i(self.avx2) }
444      } else {
445        Self {
446          a : self.a.abs(),
447          b : self.b.abs(),
448        }
449      }
450    }
451  }
452  #[inline]
453  #[must_use]
454  pub fn max(self, rhs: Self) -> Self {
455    pick! {
456      if #[cfg(target_feature="avx2")] {
457        Self { avx2: max_i16_m256i(self.avx2, rhs.avx2) }
458      } else {
459        Self {
460          a : self.a.max(rhs.a),
461          b : self.b.max(rhs.b),
462        }
463      }
464    }
465  }
466  #[inline]
467  #[must_use]
468  pub fn min(self, rhs: Self) -> Self {
469    pick! {
470      if #[cfg(target_feature="avx2")] {
471        Self { avx2: min_i16_m256i(self.avx2, rhs.avx2) }
472      } else {
473        Self {
474          a : self.a.min(rhs.a),
475          b : self.b.min(rhs.b),
476        }
477      }
478    }
479  }
480
481  #[inline]
482  #[must_use]
483  pub fn saturating_add(self, rhs: Self) -> Self {
484    pick! {
485      if #[cfg(target_feature="avx2")] {
486        Self { avx2: add_saturating_i16_m256i(self.avx2, rhs.avx2) }
487      } else {
488        Self {
489          a : self.a.saturating_add(rhs.a),
490          b : self.b.saturating_add(rhs.b),
491        }
492      }
493    }
494  }
495  #[inline]
496  #[must_use]
497  pub fn saturating_sub(self, rhs: Self) -> Self {
498    pick! {
499      if #[cfg(target_feature="avx2")] {
500        Self { avx2: sub_saturating_i16_m256i(self.avx2, rhs.avx2) }
501      } else {
502        Self {
503          a : self.a.saturating_sub(rhs.a),
504          b : self.b.saturating_sub(rhs.b),
505        }
506      }
507    }
508  }
509
510  /// Calculates partial dot product.
511  /// Multiplies packed signed 16-bit integers, producing intermediate signed
512  /// 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit
513  /// integers.
514  #[inline]
515  #[must_use]
516  pub fn dot(self, rhs: Self) -> i32x8 {
517    pick! {
518      if #[cfg(target_feature="avx2")] {
519        i32x8 { avx2:  mul_i16_horizontal_add_m256i(self.avx2, rhs.avx2) }
520      } else {
521        i32x8 {
522          a : self.a.dot(rhs.a),
523          b : self.b.dot(rhs.b),
524        }
525      }
526    }
527  }
528
529  /// Multiply and scale equivalent to `((self * rhs) + 0x4000) >> 15` on each
530  /// lane, effectively multiplying by a 16 bit fixed point number between `-1`
531  /// and `1`. This corresponds to the following instructions:
532  /// - `vqrdmulhq_n_s16` instruction on neon
533  /// - `i16x8_q15mulr_sat` on simd128
534  /// - `_mm256_mulhrs_epi16` on avx2
535  /// - emulated via `mul_i16_*` on sse2
536  #[inline]
537  #[must_use]
538  pub fn mul_scale_round(self, rhs: Self) -> Self {
539    pick! {
540      if #[cfg(target_feature="avx2")] {
541        Self { avx2: mul_i16_scale_round_m256i(self.avx2, rhs.avx2) }
542      } else {
543        Self {
544          a : self.a.mul_scale_round(rhs.a),
545          b : self.b.mul_scale_round(rhs.b),
546        }
547      }
548    }
549  }
550
551  /// Multiply and scale equivalent to `((self * rhs) + 0x4000) >> 15` on each
552  /// lane, effectively multiplying by a 16 bit fixed point number between `-1`
553  /// and `1`. This corresponds to the following instructions:
554  /// - `vqrdmulhq_n_s16` instruction on neon
555  /// - `i16x8_q15mulr_sat` on simd128
556  /// - `_mm256_mulhrs_epi16` on avx2
557  /// - emulated via `mul_i16_*` on sse2
558  #[inline]
559  #[must_use]
560  pub fn mul_scale_round_n(self, rhs: i16) -> Self {
561    pick! {
562      if #[cfg(target_feature="avx2")] {
563        Self { avx2: mul_i16_scale_round_m256i(self.avx2, set_splat_i16_m256i(rhs)) }
564      } else {
565        Self {
566          a : self.a.mul_scale_round_n(rhs),
567          b : self.b.mul_scale_round_n(rhs),
568        }
569      }
570    }
571  }
572
573  #[inline]
574  pub fn to_array(self) -> [i16; 16] {
575    cast(self)
576  }
577
578  #[inline]
579  pub fn as_array(&self) -> &[i16; 16] {
580    cast_ref(self)
581  }
582
583  #[inline]
584  pub fn as_mut_array(&mut self) -> &mut [i16; 16] {
585    cast_mut(self)
586  }
587}