wide/
u16x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct u16x16 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct u16x16 { pub(crate) a : u16x8, pub(crate) b : u16x8 }
12  }
13}
14
15int_uint_consts!(u16, 16, u16x16, 256);
16
17unsafe impl Zeroable for u16x16 {}
18unsafe impl Pod for u16x16 {}
19
20impl AlignTo for u16x16 {
21  type Elem = u16;
22}
23
24impl Add for u16x16 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx2")] {
30        Self { avx2: add_i16_m256i(self.avx2, rhs.avx2) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for u16x16 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx2")] {
47        Self { avx2: sub_i16_m256i(self.avx2, rhs.avx2) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Add<u16> for u16x16 {
59  type Output = Self;
60  #[inline]
61  fn add(self, rhs: u16) -> Self::Output {
62    self.add(Self::splat(rhs))
63  }
64}
65
66impl Sub<u16> for u16x16 {
67  type Output = Self;
68  #[inline]
69  fn sub(self, rhs: u16) -> Self::Output {
70    self.sub(Self::splat(rhs))
71  }
72}
73
74impl Add<u16x16> for u16 {
75  type Output = u16x16;
76  #[inline]
77  fn add(self, rhs: u16x16) -> Self::Output {
78    u16x16::splat(self).add(rhs)
79  }
80}
81
82impl Sub<u16x16> for u16 {
83  type Output = u16x16;
84  #[inline]
85  fn sub(self, rhs: u16x16) -> Self::Output {
86    u16x16::splat(self).sub(rhs)
87  }
88}
89
90impl BitAnd for u16x16 {
91  type Output = Self;
92  #[inline]
93  fn bitand(self, rhs: Self) -> Self::Output {
94    pick! {
95      if #[cfg(target_feature="avx2")] {
96        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
97      } else {
98        Self {
99          a : self.a.bitand(rhs.a),
100          b : self.b.bitand(rhs.b),
101        }
102      }
103    }
104  }
105}
106
107impl BitOr for u16x16 {
108  type Output = Self;
109  #[inline]
110  fn bitor(self, rhs: Self) -> Self::Output {
111    pick! {
112      if #[cfg(target_feature="avx2")] {
113        Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
114      } else {
115        Self {
116          a : self.a.bitor(rhs.a),
117          b : self.b.bitor(rhs.b),
118        }
119      }
120    }
121  }
122}
123
124impl BitXor for u16x16 {
125  type Output = Self;
126  #[inline]
127  fn bitxor(self, rhs: Self) -> Self::Output {
128    pick! {
129      if #[cfg(target_feature="avx2")] {
130        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
131      } else {
132        Self {
133          a : self.a.bitxor(rhs.a),
134          b : self.b.bitxor(rhs.b),
135        }
136      }
137    }
138  }
139}
140
141impl Not for u16x16 {
142  type Output = Self;
143  #[inline]
144  fn not(self) -> Self {
145    pick! {
146      if #[cfg(target_feature="avx2")] {
147        Self { avx2: self.avx2.not()  }
148      } else {
149        Self {
150          a : self.a.not(),
151          b : self.b.not(),
152        }
153      }
154    }
155  }
156}
157
158macro_rules! impl_shl_t_for_u16x16 {
159  ($($shift_type:ty),+ $(,)?) => {
160    $(impl Shl<$shift_type> for u16x16 {
161      type Output = Self;
162      /// Shifts all lanes by the value given.
163      #[inline]
164      fn shl(self, rhs: $shift_type) -> Self::Output {
165        pick! {
166          if #[cfg(target_feature="avx2")] {
167            let shift = cast([rhs as u64, 0]);
168            Self { avx2: shl_all_u16_m256i(self.avx2, shift) }
169          } else {
170            Self {
171              a : self.a.shl(rhs),
172              b : self.b.shl(rhs),
173            }
174          }
175       }
176     }
177    })+
178  };
179}
180impl_shl_t_for_u16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
181
182macro_rules! impl_shr_t_for_u16x16 {
183  ($($shift_type:ty),+ $(,)?) => {
184    $(impl Shr<$shift_type> for u16x16 {
185      type Output = Self;
186      /// Shifts all lanes by the value given.
187      #[inline]
188      fn shr(self, rhs: $shift_type) -> Self::Output {
189        pick! {
190          if #[cfg(target_feature="avx2")] {
191            let shift = cast([rhs as u64, 0]);
192            Self { avx2: shr_all_u16_m256i(self.avx2, shift) }
193          } else {
194            Self {
195              a : self.a.shr(rhs),
196              b : self.b.shr(rhs),
197            }
198          }
199        }
200      }
201    })+
202  };
203}
204impl_shr_t_for_u16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
205
206impl CmpEq for u16x16 {
207  type Output = Self;
208  #[inline]
209  fn simd_eq(self, rhs: Self) -> Self::Output {
210    pick! {
211      if #[cfg(target_feature="avx2")] {
212        Self { avx2: cmp_eq_mask_i16_m256i(self.avx2, rhs.avx2) }
213      } else {
214        Self {
215          a : self.a.simd_eq(rhs.a),
216          b : self.b.simd_eq(rhs.b),
217        }
218      }
219    }
220  }
221}
222
223impl CmpGt for u16x16 {
224  type Output = Self;
225  #[inline]
226  fn simd_gt(self, rhs: Self) -> Self::Output {
227    Self::simd_gt(self, rhs)
228  }
229}
230
231impl CmpLt for u16x16 {
232  type Output = Self;
233  #[inline]
234  fn simd_lt(self, rhs: Self) -> Self::Output {
235    // no gt, so just reverse to get same answer
236    Self::simd_gt(rhs, self)
237  }
238}
239
240impl Mul for u16x16 {
241  type Output = Self;
242  #[inline]
243  fn mul(self, rhs: Self) -> Self::Output {
244    pick! {
245      if #[cfg(target_feature="avx2")] {
246        // non-widening multiplication is the same for unsigned and signed
247        Self { avx2: mul_i16_keep_low_m256i(self.avx2, rhs.avx2) }
248      } else {
249        Self {
250          a : self.a.mul(rhs.a),
251          b : self.b.mul(rhs.b),
252        }
253      }
254    }
255  }
256}
257
258impl From<u8x16> for u16x16 {
259  /// widens and sign extends to u16x16
260  #[inline]
261  fn from(v: u8x16) -> Self {
262    pick! {
263      if #[cfg(target_feature="avx2")] {
264        u16x16 { avx2:convert_to_i16_m256i_from_u8_m128i(v.sse) }
265      } else if #[cfg(target_feature="sse2")] {
266        u16x16 {
267          a: u16x8 { sse: shr_imm_u16_m128i::<8>( unpack_low_i8_m128i(v.sse, v.sse)) },
268          b: u16x8 { sse: shr_imm_u16_m128i::<8>( unpack_high_i8_m128i(v.sse, v.sse)) },
269        }
270      } else {
271
272        u16x16::new([
273          v.as_array()[0] as u16,
274          v.as_array()[1] as u16,
275          v.as_array()[2] as u16,
276          v.as_array()[3] as u16,
277          v.as_array()[4] as u16,
278          v.as_array()[5] as u16,
279          v.as_array()[6] as u16,
280          v.as_array()[7] as u16,
281          v.as_array()[8] as u16,
282          v.as_array()[9] as u16,
283          v.as_array()[10] as u16,
284          v.as_array()[11] as u16,
285          v.as_array()[12] as u16,
286          v.as_array()[13] as u16,
287          v.as_array()[14] as u16,
288          v.as_array()[15] as u16,
289          ])
290      }
291    }
292  }
293}
294
295impl u16x16 {
296  #[inline]
297  #[must_use]
298  pub const fn new(array: [u16; 16]) -> Self {
299    unsafe { core::mem::transmute(array) }
300  }
301
302  #[inline]
303  #[must_use]
304  pub fn blend(self, t: Self, f: Self) -> Self {
305    pick! {
306      if #[cfg(target_feature="avx2")] {
307        Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
308      } else {
309        Self {
310          a : self.a.blend(t.a, f.a),
311          b : self.b.blend(t.b, f.b),
312        }
313      }
314    }
315  }
316
317  #[inline]
318  #[must_use]
319  pub fn simd_gt(self, rhs: Self) -> Self {
320    pick! {
321      if #[cfg(target_feature = "avx2")] {
322        let bias = m256i::from([0x8000u16; 16]);
323        let a_biased = sub_i16_m256i(self.avx2, bias);
324        let b_biased = sub_i16_m256i(rhs.avx2, bias);
325        let mask = cmp_gt_mask_i16_m256i(a_biased, b_biased);
326
327        Self { avx2: mask }
328      } else {
329        Self {
330          a: self.a.simd_gt(rhs.a),
331          b: self.b.simd_gt(rhs.b),
332        }
333      }
334    }
335  }
336
337  #[inline]
338  #[must_use]
339  pub fn max(self, rhs: Self) -> Self {
340    pick! {
341      if #[cfg(target_feature="avx2")] {
342        Self { avx2: max_u16_m256i(self.avx2, rhs.avx2) }
343      } else {
344        Self {
345          a : self.a.max(rhs.a),
346          b : self.b.max(rhs.b),
347        }
348      }
349    }
350  }
351  #[inline]
352  #[must_use]
353  pub fn min(self, rhs: Self) -> Self {
354    pick! {
355      if #[cfg(target_feature="avx2")] {
356        Self { avx2: min_u16_m256i(self.avx2, rhs.avx2) }
357      } else {
358        Self {
359          a : self.a.min(rhs.a),
360          b : self.b.min(rhs.b),
361        }
362      }
363    }
364  }
365
366  #[inline]
367  #[must_use]
368  pub fn saturating_add(self, rhs: Self) -> Self {
369    pick! {
370      if #[cfg(target_feature="avx2")] {
371        Self { avx2: add_saturating_u16_m256i(self.avx2, rhs.avx2) }
372      } else {
373        Self {
374          a : self.a.saturating_add(rhs.a),
375          b : self.b.saturating_add(rhs.b),
376        }
377      }
378    }
379  }
380  #[inline]
381  #[must_use]
382  pub fn saturating_sub(self, rhs: Self) -> Self {
383    pick! {
384      if #[cfg(target_feature="avx2")] {
385        Self { avx2: sub_saturating_u16_m256i(self.avx2, rhs.avx2) }
386      } else {
387        Self {
388          a : self.a.saturating_sub(rhs.a),
389          b : self.b.saturating_sub(rhs.b),
390        }
391      }
392    }
393  }
394
395  #[inline]
396  #[must_use]
397  #[doc(alias("movemask", "move_mask"))]
398  pub fn to_bitmask(self) -> u32 {
399    i16x16::to_bitmask(cast(self))
400  }
401
402  #[inline]
403  pub fn to_array(self) -> [u16; 16] {
404    cast(self)
405  }
406
407  #[inline]
408  pub fn as_array(&self) -> &[u16; 16] {
409    cast_ref(self)
410  }
411
412  #[inline]
413  pub fn as_mut_array(&mut self) -> &mut [u16; 16] {
414    cast_mut(self)
415  }
416}
wide/u16x16_.rs

wide/
u16x16_.rs