wide/
i16x32_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx512bw")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(64))]
7    pub struct i16x32 { pub(crate) avx512: m512i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(64))]
11    pub struct i16x32 { pub(crate) a : i16x16, pub(crate) b : i16x16 }
12  }
13}
14
15int_uint_consts!(i16, 32, i16x32, 512);
16
17unsafe impl Zeroable for i16x32 {}
18unsafe impl Pod for i16x32 {}
19
20impl AlignTo for i16x32 {
21  type Elem = i16;
22}
23
24impl Add for i16x32 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx512bw")] {
30        Self { avx512: add_i16_m512i(self.avx512, rhs.avx512) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for i16x32 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx512bw")] {
47        Self { avx512: sub_i16_m512i(self.avx512, rhs.avx512) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Mul for i16x32 {
59  type Output = Self;
60  #[inline]
61  fn mul(self, rhs: Self) -> Self::Output {
62    pick! {
63      if #[cfg(target_feature="avx512bw")] {
64        Self { avx512: mul_i16_keep_low_m512i(self.avx512, rhs.avx512) }
65      } else {
66        Self { a: self.a.mul(rhs.a), b: self.b.mul(rhs.b) }
67      }
68    }
69  }
70}
71
72impl Add<i16> for i16x32 {
73  type Output = Self;
74  #[inline]
75  fn add(self, rhs: i16) -> Self::Output {
76    self.add(Self::splat(rhs))
77  }
78}
79
80impl Sub<i16> for i16x32 {
81  type Output = Self;
82  #[inline]
83  fn sub(self, rhs: i16) -> Self::Output {
84    self.sub(Self::splat(rhs))
85  }
86}
87
88impl Mul<i16> for i16x32 {
89  type Output = Self;
90  #[inline]
91  fn mul(self, rhs: i16) -> Self::Output {
92    self.mul(Self::splat(rhs))
93  }
94}
95
96impl Add<i16x32> for i16 {
97  type Output = i16x32;
98  #[inline]
99  fn add(self, rhs: i16x32) -> Self::Output {
100    i16x32::splat(self).add(rhs)
101  }
102}
103
104impl Sub<i16x32> for i16 {
105  type Output = i16x32;
106  #[inline]
107  fn sub(self, rhs: i16x32) -> Self::Output {
108    i16x32::splat(self).sub(rhs)
109  }
110}
111
112impl Mul<i16x32> for i16 {
113  type Output = i16x32;
114  #[inline]
115  fn mul(self, rhs: i16x32) -> Self::Output {
116    i16x32::splat(self).mul(rhs)
117  }
118}
119
120impl BitAnd for i16x32 {
121  type Output = Self;
122  #[inline]
123  fn bitand(self, rhs: Self) -> Self::Output {
124    pick! {
125      if #[cfg(target_feature="avx512bw")] {
126        Self { avx512: bitand_m512i(self.avx512, rhs.avx512) }
127      } else {
128        Self {
129          a : self.a.bitand(rhs.a),
130          b : self.b.bitand(rhs.b),
131        }
132      }
133    }
134  }
135}
136
137impl BitOr for i16x32 {
138  type Output = Self;
139  #[inline]
140  fn bitor(self, rhs: Self) -> Self::Output {
141    pick! {
142    if #[cfg(target_feature="avx512bw")] {
143        Self { avx512: bitor_m512i(self.avx512, rhs.avx512) }
144      } else {
145        Self {
146          a : self.a.bitor(rhs.a),
147          b : self.b.bitor(rhs.b),
148        }
149      }
150    }
151  }
152}
153
154impl BitXor for i16x32 {
155  type Output = Self;
156  #[inline]
157  fn bitxor(self, rhs: Self) -> Self::Output {
158    pick! {
159      if #[cfg(target_feature="avx512bw")] {
160        Self { avx512: bitxor_m512i(self.avx512, rhs.avx512) }
161      } else {
162        Self {
163          a : self.a.bitxor(rhs.a),
164          b : self.b.bitxor(rhs.b),
165        }
166      }
167    }
168  }
169}
170
171macro_rules! impl_shl_t_for_i16x32 {
172  ($($shift_type:ty),+ $(,)?) => {
173    $(impl Shl<$shift_type> for i16x32 {
174      type Output = Self;
175      /// Shifts all lanes by the value given.
176      #[inline]
177      fn shl(self, rhs: $shift_type) -> Self::Output {
178        pick! {
179          if #[cfg(target_feature="avx512bw")] {
180            let shift = cast(rhs as u16);
181            Self { avx512: shl_all_u16_m512i(self.avx512, shift) }
182          } else {
183            Self {
184              a : self.a.shl(rhs),
185              b : self.b.shl(rhs),
186            }
187          }
188        }
189      }
190    })+
191  };
192}
193impl_shl_t_for_i16x32!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
194
195macro_rules! impl_shr_t_for_i16x32 {
196  ($($shift_type:ty),+ $(,)?) => {
197    $(impl Shr<$shift_type> for i16x32 {
198      type Output = Self;
199      /// Shifts all lanes by the value given.
200      #[inline]
201      fn shr(self, rhs: $shift_type) -> Self::Output {
202        pick! {
203          if #[cfg(target_feature="avx512bw")] {
204            let shift = cast(rhs as u16);
205            Self { avx512: shr_all_i16_m512i(self.avx512, shift) }
206          } else {
207            Self {
208              a : self.a.shr(rhs),
209              b : self.b.shr(rhs),
210            }
211          }
212        }
213      }
214    })+
215  };
216}
217impl_shr_t_for_i16x32!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
218
219impl CmpEq for i16x32 {
220  type Output = Self;
221  #[inline]
222  fn simd_eq(self, rhs: Self) -> Self::Output {
223    Self::simd_eq(self, rhs)
224  }
225}
226
227impl CmpLt for i16x32 {
228  type Output = Self;
229  #[inline]
230  fn simd_lt(self, rhs: Self) -> Self::Output {
231    Self::simd_lt(self, rhs)
232  }
233}
234impl CmpGt for i16x32 {
235  type Output = Self;
236  #[inline]
237  fn simd_gt(self, rhs: Self) -> Self::Output {
238    Self::simd_gt(self, rhs)
239  }
240}
241
242impl i16x32 {
243  #[inline]
244  #[must_use]
245  pub const fn new(array: [i16; 32]) -> Self {
246    unsafe { core::mem::transmute(array) }
247  }
248
249  #[inline]
250  #[must_use]
251  pub fn simd_eq(self, rhs: Self) -> Self {
252    pick! {
253      if #[cfg(target_feature="avx512bw")] {
254        Self { avx512: cmp_op_mask_i16_m512i::<{cmp_int_op!(Eq)}>(self.avx512, rhs.avx512) }
255      } else {
256        Self {
257          a : self.a.simd_eq(rhs.a),
258          b : self.b.simd_eq(rhs.b),
259        }
260      }
261    }
262  }
263
264  #[inline]
265  #[must_use]
266  pub fn simd_gt(self, rhs: Self) -> Self {
267    pick! {
268      if #[cfg(target_feature="avx512bw")] {
269        Self { avx512: cmp_op_mask_i16_m512i::<{cmp_int_op!(Nle)}>(self.avx512, rhs.avx512) }
270      } else {
271        Self {
272          a : self.a.simd_gt(rhs.a),
273          b : self.b.simd_gt(rhs.b),
274        }
275      }
276    }
277  }
278
279  #[inline]
280  #[must_use]
281  pub fn simd_lt(self, rhs: Self) -> Self {
282    pick! {
283      if #[cfg(target_feature="avx512bw")] {
284        Self { avx512: cmp_op_mask_i16_m512i::<{cmp_int_op!(Lt)}>(self.avx512, rhs.avx512) }
285      } else {
286        Self {
287          a : rhs.a.simd_gt(self.a),
288          b : rhs.b.simd_gt(self.b),
289        }
290      }
291    }
292  }
293
294  #[inline]
295  #[must_use]
296  pub fn blend(self, t: Self, f: Self) -> Self {
297    pick! {
298      if #[cfg(target_feature="avx512bw")] {
299        Self { avx512: blend_varying_i8_m512i(f.avx512,t.avx512,movepi8_mask_m512i(self.avx512)) }
300      } else {
301        Self {
302          a : self.a.blend(t.a, f.a),
303          b : self.b.blend(t.b, f.b),
304        }
305      }
306    }
307  }
308
309  /// horizontal add of all the elements of the vector
310  #[inline]
311  #[must_use]
312  pub fn reduce_add(self) -> i16 {
313    let arr: [i16x16; 2] = cast(self);
314    (arr[0] + arr[1]).reduce_add()
315  }
316
317  /// horizontal min of all the elements of the vector
318  #[inline]
319  #[must_use]
320  pub fn reduce_min(self) -> i16 {
321    let arr: [i16x16; 2] = cast(self);
322    arr[0].min(arr[1]).reduce_min()
323  }
324
325  /// horizontal max of all the elements of the vector
326  #[inline]
327  #[must_use]
328  pub fn reduce_max(self) -> i16 {
329    let arr: [i16x16; 2] = cast(self);
330    arr[0].max(arr[1]).reduce_max()
331  }
332
333  #[inline]
334  #[must_use]
335  pub fn abs(self) -> Self {
336    pick! {
337      if #[cfg(target_feature="avx512bw")] {
338        Self { avx512: abs_i16_m512i(self.avx512) }
339      } else {
340        Self {
341          a : self.a.abs(),
342          b : self.b.abs(),
343        }
344      }
345    }
346  }
347
348  #[inline]
349  #[must_use]
350  pub fn min(self, rhs: Self) -> Self {
351    pick! {
352      if #[cfg(target_feature="avx512bw")] {
353        Self { avx512: min_i16_m512i(self.avx512, rhs.avx512) }
354      } else {
355        Self {
356          a: self.a.min(rhs.a),
357          b: self.b.min(rhs.b),
358        }
359      }
360    }
361  }
362
363  #[inline]
364  #[must_use]
365  pub fn max(self, rhs: Self) -> Self {
366    pick! {
367      if #[cfg(target_feature="avx512bw")] {
368        Self { avx512: max_i16_m512i(self.avx512, rhs.avx512) }
369      } else {
370        Self {
371          a: self.a.max(rhs.a),
372          b: self.b.max(rhs.b),
373        }
374      }
375    }
376  }
377
378  #[inline]
379  #[must_use]
380  pub fn saturating_add(self, rhs: Self) -> Self {
381    pick! {
382      if #[cfg(target_feature="avx512bw")] {
383        Self { avx512: add_saturating_i16_m512i(self.avx512, rhs.avx512) }
384      } else {
385        Self {
386          a: self.a.saturating_add(rhs.a),
387          b: self.b.saturating_add(rhs.b),
388        }
389      }
390    }
391  }
392
393  #[inline]
394  #[must_use]
395  pub fn saturating_sub(self, rhs: Self) -> Self {
396    pick! {
397      if #[cfg(target_feature="avx512bw")] {
398        Self { avx512: sub_saturating_i16_m512i(self.avx512, rhs.avx512) }
399      } else {
400        Self {
401          a: self.a.saturating_sub(rhs.a),
402          b: self.b.saturating_sub(rhs.b),
403        }
404      }
405    }
406  }
407
408  /// Calculates partial dot product.
409  /// Multiplies packed signed 16-bit integers, producing intermediate signed
410  /// 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit
411  /// integers.
412  #[inline]
413  #[must_use]
414  pub fn dot(self, rhs: Self) -> i32x16 {
415    pick! {
416      if #[cfg(target_feature="avx512bw")] {
417        i32x16 { avx512: mul_i16_horizontal_add_m512i(self.avx512, rhs.avx512) }
418      } else {
419        i32x16 {
420          a : self.a.dot(rhs.a),
421          b : self.b.dot(rhs.b),
422        }
423      }
424    }
425  }
426
427  #[inline]
428  #[must_use]
429  #[doc(alias("movemask", "move_mask"))]
430  pub fn to_bitmask(self) -> u32 {
431    pick! {
432      if #[cfg(target_feature="avx512bw")] {
433        // use f16 move_mask since it is the same size as i16
434        movepi16_mask_m512i(self.avx512) as u32
435      } else {
436        self.a.to_bitmask() | (self.b.to_bitmask() << 16)
437      }
438    }
439  }
440
441  #[inline]
442  pub fn to_array(self) -> [i16; 32] {
443    cast(self)
444  }
445
446  #[inline]
447  pub fn as_array(&self) -> &[i16; 32] {
448    cast_ref(self)
449  }
450
451  #[inline]
452  pub fn as_mut_array(&mut self) -> &mut [i16; 32] {
453    cast_mut(self)
454  }
455}
456
457impl Not for i16x32 {
458  type Output = Self;
459  #[inline]
460  fn not(self) -> Self::Output {
461    pick! {
462      if #[cfg(target_feature="avx512bw")] {
463        Self { avx512: bitxor_m512i(self.avx512, set_splat_i16_m512i(-1)) }
464      } else {
465        Self {
466          a : self.a.not(),
467          b : self.b.not(),
468        }
469      }
470    }
471  }
472}