1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct i16x8 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct i16x8 { pub(crate) simd: v128 }
14
15 impl Default for i16x8 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for i16x8 {
22 fn eq(&self, other: &Self) -> bool {
23 u16x8_all_true(i16x8_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for i16x8 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct i16x8 { pub(crate) neon : int16x8_t }
33
34 impl Default for i16x8 {
35 #[inline]
36 fn default() -> Self {
37 Self::splat(0)
38 }
39 }
40
41 impl PartialEq for i16x8 {
42 #[inline]
43 fn eq(&self, other: &Self) -> bool {
44 unsafe { vminvq_u16(vceqq_s16(self.neon, other.neon))==u16::MAX }
45 }
46 }
47
48 impl Eq for i16x8 { }
49 } else {
50 #[derive(Default, Clone, Copy, PartialEq, Eq)]
51 #[repr(C, align(16))]
52 pub struct i16x8 { pub(crate) arr: [i16;8] }
53 }
54}
55
56int_uint_consts!(i16, 8, i16x8, 128);
57
58unsafe impl Zeroable for i16x8 {}
59unsafe impl Pod for i16x8 {}
60
61impl AlignTo for i16x8 {
62 type Elem = i16;
63}
64
65impl Add for i16x8 {
66 type Output = Self;
67 #[inline]
68 fn add(self, rhs: Self) -> Self::Output {
69 pick! {
70 if #[cfg(target_feature="sse2")] {
71 Self { sse: add_i16_m128i(self.sse, rhs.sse) }
72 } else if #[cfg(target_feature="simd128")] {
73 Self { simd: i16x8_add(self.simd, rhs.simd) }
74 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
75 unsafe { Self { neon: vaddq_s16(self.neon, rhs.neon) } }
76 } else {
77 Self { arr: [
78 self.arr[0].wrapping_add(rhs.arr[0]),
79 self.arr[1].wrapping_add(rhs.arr[1]),
80 self.arr[2].wrapping_add(rhs.arr[2]),
81 self.arr[3].wrapping_add(rhs.arr[3]),
82 self.arr[4].wrapping_add(rhs.arr[4]),
83 self.arr[5].wrapping_add(rhs.arr[5]),
84 self.arr[6].wrapping_add(rhs.arr[6]),
85 self.arr[7].wrapping_add(rhs.arr[7]),
86 ]}
87 }
88 }
89 }
90}
91
92impl Sub for i16x8 {
93 type Output = Self;
94 #[inline]
95 fn sub(self, rhs: Self) -> Self::Output {
96 pick! {
97 if #[cfg(target_feature="sse2")] {
98 Self { sse: sub_i16_m128i(self.sse, rhs.sse) }
99 } else if #[cfg(target_feature="simd128")] {
100 Self { simd: i16x8_sub(self.simd, rhs.simd) }
101 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
102 unsafe {Self { neon: vsubq_s16(self.neon, rhs.neon) }}
103 } else {
104 Self { arr: [
105 self.arr[0].wrapping_sub(rhs.arr[0]),
106 self.arr[1].wrapping_sub(rhs.arr[1]),
107 self.arr[2].wrapping_sub(rhs.arr[2]),
108 self.arr[3].wrapping_sub(rhs.arr[3]),
109 self.arr[4].wrapping_sub(rhs.arr[4]),
110 self.arr[5].wrapping_sub(rhs.arr[5]),
111 self.arr[6].wrapping_sub(rhs.arr[6]),
112 self.arr[7].wrapping_sub(rhs.arr[7]),
113 ]}
114 }
115 }
116 }
117}
118
119impl Mul for i16x8 {
120 type Output = Self;
121 #[inline]
122 fn mul(self, rhs: Self) -> Self::Output {
123 pick! {
124 if #[cfg(target_feature="sse2")] {
125 Self { sse: mul_i16_keep_low_m128i(self.sse, rhs.sse) }
126 } else if #[cfg(target_feature="simd128")] {
127 Self { simd: i16x8_mul(self.simd, rhs.simd) }
128 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
129 unsafe {Self { neon: vmulq_s16(self.neon, rhs.neon) }}
130 } else {
131 Self { arr: [
132 self.arr[0].wrapping_mul(rhs.arr[0]),
133 self.arr[1].wrapping_mul(rhs.arr[1]),
134 self.arr[2].wrapping_mul(rhs.arr[2]),
135 self.arr[3].wrapping_mul(rhs.arr[3]),
136 self.arr[4].wrapping_mul(rhs.arr[4]),
137 self.arr[5].wrapping_mul(rhs.arr[5]),
138 self.arr[6].wrapping_mul(rhs.arr[6]),
139 self.arr[7].wrapping_mul(rhs.arr[7]),
140 ]}
141 }
142 }
143 }
144}
145
146impl Add<i16> for i16x8 {
147 type Output = Self;
148 #[inline]
149 fn add(self, rhs: i16) -> Self::Output {
150 self.add(Self::splat(rhs))
151 }
152}
153
154impl Sub<i16> for i16x8 {
155 type Output = Self;
156 #[inline]
157 fn sub(self, rhs: i16) -> Self::Output {
158 self.sub(Self::splat(rhs))
159 }
160}
161
162impl Mul<i16> for i16x8 {
163 type Output = Self;
164 #[inline]
165 fn mul(self, rhs: i16) -> Self::Output {
166 self.mul(Self::splat(rhs))
167 }
168}
169
170impl Add<i16x8> for i16 {
171 type Output = i16x8;
172 #[inline]
173 fn add(self, rhs: i16x8) -> Self::Output {
174 i16x8::splat(self).add(rhs)
175 }
176}
177
178impl Sub<i16x8> for i16 {
179 type Output = i16x8;
180 #[inline]
181 fn sub(self, rhs: i16x8) -> Self::Output {
182 i16x8::splat(self).sub(rhs)
183 }
184}
185
186impl Mul<i16x8> for i16 {
187 type Output = i16x8;
188 #[inline]
189 fn mul(self, rhs: i16x8) -> Self::Output {
190 i16x8::splat(self).mul(rhs)
191 }
192}
193
194impl BitAnd for i16x8 {
195 type Output = Self;
196 #[inline]
197 fn bitand(self, rhs: Self) -> Self::Output {
198 pick! {
199 if #[cfg(target_feature="sse2")] {
200 Self { sse: bitand_m128i(self.sse, rhs.sse) }
201 } else if #[cfg(target_feature="simd128")] {
202 Self { simd: v128_and(self.simd, rhs.simd) }
203 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
204 unsafe {Self { neon: vandq_s16(self.neon, rhs.neon) }}
205 } else {
206 Self { arr: [
207 self.arr[0].bitand(rhs.arr[0]),
208 self.arr[1].bitand(rhs.arr[1]),
209 self.arr[2].bitand(rhs.arr[2]),
210 self.arr[3].bitand(rhs.arr[3]),
211 self.arr[4].bitand(rhs.arr[4]),
212 self.arr[5].bitand(rhs.arr[5]),
213 self.arr[6].bitand(rhs.arr[6]),
214 self.arr[7].bitand(rhs.arr[7]),
215 ]}
216 }
217 }
218 }
219}
220
221impl BitOr for i16x8 {
222 type Output = Self;
223 #[inline]
224 fn bitor(self, rhs: Self) -> Self::Output {
225 pick! {
226 if #[cfg(target_feature="sse2")] {
227 Self { sse: bitor_m128i(self.sse, rhs.sse) }
228 } else if #[cfg(target_feature="simd128")] {
229 Self { simd: v128_or(self.simd, rhs.simd) }
230 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
231 unsafe {Self { neon: vorrq_s16(self.neon, rhs.neon) }}
232 } else {
233 Self { arr: [
234 self.arr[0].bitor(rhs.arr[0]),
235 self.arr[1].bitor(rhs.arr[1]),
236 self.arr[2].bitor(rhs.arr[2]),
237 self.arr[3].bitor(rhs.arr[3]),
238 self.arr[4].bitor(rhs.arr[4]),
239 self.arr[5].bitor(rhs.arr[5]),
240 self.arr[6].bitor(rhs.arr[6]),
241 self.arr[7].bitor(rhs.arr[7]),
242 ]}
243 }
244 }
245 }
246}
247
248impl BitXor for i16x8 {
249 type Output = Self;
250 #[inline]
251 fn bitxor(self, rhs: Self) -> Self::Output {
252 pick! {
253 if #[cfg(target_feature="sse2")] {
254 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
255 } else if #[cfg(target_feature="simd128")] {
256 Self { simd: v128_xor(self.simd, rhs.simd) }
257 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
258 unsafe {Self { neon: veorq_s16(self.neon, rhs.neon) }}
259 } else {
260 Self { arr: [
261 self.arr[0].bitxor(rhs.arr[0]),
262 self.arr[1].bitxor(rhs.arr[1]),
263 self.arr[2].bitxor(rhs.arr[2]),
264 self.arr[3].bitxor(rhs.arr[3]),
265 self.arr[4].bitxor(rhs.arr[4]),
266 self.arr[5].bitxor(rhs.arr[5]),
267 self.arr[6].bitxor(rhs.arr[6]),
268 self.arr[7].bitxor(rhs.arr[7]),
269 ]}
270 }
271 }
272 }
273}
274
275macro_rules! impl_shl_t_for_i16x8 {
276 ($($shift_type:ty),+ $(,)?) => {
277 $(impl Shl<$shift_type> for i16x8 {
278 type Output = Self;
279 #[inline]
281 fn shl(self, rhs: $shift_type) -> Self::Output {
282 pick! {
283 if #[cfg(target_feature="sse2")] {
284 let shift = cast([rhs as u64, 0]);
285 Self { sse: shl_all_u16_m128i(self.sse, shift) }
286 } else if #[cfg(target_feature="simd128")] {
287 Self { simd: i16x8_shl(self.simd, rhs as u32) }
288 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
289 unsafe {Self { neon: vshlq_s16(self.neon, vmovq_n_s16(rhs as i16)) }}
290 } else {
291 let u = rhs as u32;
292 Self { arr: [
293 self.arr[0].wrapping_shl(u),
294 self.arr[1].wrapping_shl(u),
295 self.arr[2].wrapping_shl(u),
296 self.arr[3].wrapping_shl(u),
297 self.arr[4].wrapping_shl(u),
298 self.arr[5].wrapping_shl(u),
299 self.arr[6].wrapping_shl(u),
300 self.arr[7].wrapping_shl(u),
301 ]}
302 }
303 }
304 }
305 })+
306 };
307}
308impl_shl_t_for_i16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
309
310macro_rules! impl_shr_t_for_i16x8 {
311 ($($shift_type:ty),+ $(,)?) => {
312 $(impl Shr<$shift_type> for i16x8 {
313 type Output = Self;
314 #[inline]
316 fn shr(self, rhs: $shift_type) -> Self::Output {
317 pick! {
318 if #[cfg(target_feature="sse2")] {
319 let shift = cast([rhs as u64, 0]);
320 Self { sse: shr_all_i16_m128i(self.sse, shift) }
321 } else if #[cfg(target_feature="simd128")] {
322 Self { simd: i16x8_shr(self.simd, rhs as u32) }
323 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
324 unsafe {Self { neon: vshlq_s16(self.neon, vmovq_n_s16( -(rhs as i16))) }}
325 } else {
326 let u = rhs as u32;
327 Self { arr: [
328 self.arr[0].wrapping_shr(u),
329 self.arr[1].wrapping_shr(u),
330 self.arr[2].wrapping_shr(u),
331 self.arr[3].wrapping_shr(u),
332 self.arr[4].wrapping_shr(u),
333 self.arr[5].wrapping_shr(u),
334 self.arr[6].wrapping_shr(u),
335 self.arr[7].wrapping_shr(u),
336 ]}
337 }
338 }
339 }
340 })+
341 };
342}
343impl_shr_t_for_i16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
344
345impl CmpEq for i16x8 {
346 type Output = Self;
347 #[inline]
348 fn simd_eq(self, rhs: Self) -> Self::Output {
349 pick! {
350 if #[cfg(target_feature="sse2")] {
351 Self { sse: cmp_eq_mask_i16_m128i(self.sse, rhs.sse) }
352 } else if #[cfg(target_feature="simd128")] {
353 Self { simd: i16x8_eq(self.simd, rhs.simd) }
354 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
355 unsafe {Self { neon: vreinterpretq_s16_u16(vceqq_s16(self.neon, rhs.neon)) }}
356 } else {
357 Self { arr: [
358 if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
359 if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
360 if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
361 if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
362 if self.arr[4] == rhs.arr[4] { -1 } else { 0 },
363 if self.arr[5] == rhs.arr[5] { -1 } else { 0 },
364 if self.arr[6] == rhs.arr[6] { -1 } else { 0 },
365 if self.arr[7] == rhs.arr[7] { -1 } else { 0 },
366 ]}
367 }
368 }
369 }
370}
371
372impl CmpGt for i16x8 {
373 type Output = Self;
374 #[inline]
375 fn simd_gt(self, rhs: Self) -> Self::Output {
376 pick! {
377 if #[cfg(target_feature="sse2")] {
378 Self { sse: cmp_gt_mask_i16_m128i(self.sse, rhs.sse) }
379 } else if #[cfg(target_feature="simd128")] {
380 Self { simd: i16x8_gt(self.simd, rhs.simd) }
381 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
382 unsafe {Self { neon: vreinterpretq_s16_u16(vcgtq_s16(self.neon, rhs.neon)) }}
383 } else {
384 Self { arr: [
385 if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
386 if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
387 if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
388 if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
389 if self.arr[4] > rhs.arr[4] { -1 } else { 0 },
390 if self.arr[5] > rhs.arr[5] { -1 } else { 0 },
391 if self.arr[6] > rhs.arr[6] { -1 } else { 0 },
392 if self.arr[7] > rhs.arr[7] { -1 } else { 0 },
393 ]}
394 }
395 }
396 }
397}
398
399impl CmpLt for i16x8 {
400 type Output = Self;
401 #[inline]
402 fn simd_lt(self, rhs: Self) -> Self::Output {
403 pick! {
404 if #[cfg(target_feature="sse2")] {
405 Self { sse: cmp_lt_mask_i16_m128i(self.sse, rhs.sse) }
406 } else if #[cfg(target_feature="simd128")] {
407 Self { simd: i16x8_lt(self.simd, rhs.simd) }
408 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
409 unsafe {Self { neon: vreinterpretq_s16_u16(vcltq_s16(self.neon, rhs.neon)) }}
410 } else {
411 Self { arr: [
412 if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
413 if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
414 if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
415 if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
416 if self.arr[4] < rhs.arr[4] { -1 } else { 0 },
417 if self.arr[5] < rhs.arr[5] { -1 } else { 0 },
418 if self.arr[6] < rhs.arr[6] { -1 } else { 0 },
419 if self.arr[7] < rhs.arr[7] { -1 } else { 0 },
420 ]}
421 }
422 }
423 }
424}
425
426impl i16x8 {
427 #[inline]
428 #[must_use]
429 pub const fn new(array: [i16; 8]) -> Self {
430 unsafe { core::mem::transmute(array) }
431 }
432
433 #[inline]
434 #[must_use]
435 #[doc(alias("movemask", "move_mask"))]
436 pub fn to_bitmask(self) -> u32 {
437 pick! {
438 if #[cfg(target_feature="sse2")] {
439 (move_mask_i8_m128i( pack_i16_to_i8_m128i(self.sse,self.sse)) as u32) & 0xff
440 } else if #[cfg(target_feature="simd128")] {
441 i16x8_bitmask(self.simd) as u32
442 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
443 unsafe
444 {
445 let masked = vcltq_s16(self.neon, vdupq_n_s16(0));
447
448 let selectbit : uint16x8_t = core::mem::transmute([1u16, 2, 4, 8, 16, 32, 64, 128]);
450 let r = vandq_u16(masked, selectbit);
451
452 vaddvq_u16(r) as u32
454 }
455 } else {
456 ((self.arr[0] < 0) as u32) << 0 |
457 ((self.arr[1] < 0) as u32) << 1 |
458 ((self.arr[2] < 0) as u32) << 2 |
459 ((self.arr[3] < 0) as u32) << 3 |
460 ((self.arr[4] < 0) as u32) << 4 |
461 ((self.arr[5] < 0) as u32) << 5 |
462 ((self.arr[6] < 0) as u32) << 6 |
463 ((self.arr[7] < 0) as u32) << 7
464 }
465 }
466 }
467
468 #[inline]
469 #[must_use]
470 pub fn any(self) -> bool {
471 pick! {
472 if #[cfg(target_feature="sse2")] {
473 (move_mask_i8_m128i(self.sse) & 0b1010101010101010) != 0
474 } else if #[cfg(target_feature="simd128")] {
475 u16x8_bitmask(self.simd) != 0
476 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
477 unsafe {
478 vminvq_s16(self.neon) < 0
479 }
480 } else {
481 let v : [u64;2] = cast(self);
482 ((v[0] | v[1]) & 0x8000800080008000) != 0
483 }
484 }
485 }
486
487 #[inline]
488 #[must_use]
489 pub fn all(self) -> bool {
490 pick! {
491 if #[cfg(target_feature="sse2")] {
492 (move_mask_i8_m128i(self.sse) & 0b1010101010101010) == 0b1010101010101010
493 } else if #[cfg(target_feature="simd128")] {
494 u16x8_bitmask(self.simd) == 0b11111111
495 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
496 unsafe {
497 vmaxvq_s16(self.neon) < 0
498 }
499 } else {
500 let v : [u64;2] = cast(self);
501 (v[0] & v[1] & 0x8000800080008000) == 0x8000800080008000
502 }
503 }
504 }
505
506 #[inline]
507 #[must_use]
508 pub fn none(self) -> bool {
509 !self.any()
510 }
511
512 #[inline]
514 #[must_use]
515 pub fn from_u8x16_low(u: u8x16) -> Self {
516 pick! {
517 if #[cfg(target_feature="sse2")] {
518 Self{ sse: unpack_low_i8_m128i(u.sse, m128i::zeroed()) }
519 } else {
520 let u_arr: [u8; 16] = cast(u);
521 cast([
522 u_arr[0] as u16 as i16,
523 u_arr[1] as u16 as i16,
524 u_arr[2] as u16 as i16,
525 u_arr[3] as u16 as i16,
526 u_arr[4] as u16 as i16,
527 u_arr[5] as u16 as i16,
528 u_arr[6] as u16 as i16,
529 u_arr[7] as u16 as i16,
530 ])
531 }
532 }
533 }
534
535 #[inline]
537 #[must_use]
538 pub fn from_u8x16_high(u: u8x16) -> Self {
539 pick! {
540 if #[cfg(target_feature="sse2")] {
541 Self{ sse: unpack_high_i8_m128i(u.sse, m128i::zeroed()) }
542 } else {
543 let u_arr: [u8; 16] = cast(u);
544 cast([
545 u_arr[8] as u16 as i16,
546 u_arr[9] as u16 as i16,
547 u_arr[10] as u16 as i16,
548 u_arr[11] as u16 as i16,
549 u_arr[12] as u16 as i16,
550 u_arr[13] as u16 as i16,
551 u_arr[14] as u16 as i16,
552 u_arr[15] as u16 as i16,
553 ])
554 }
555 }
556 }
557
558 #[inline]
560 #[must_use]
561 pub fn from_i32x8_saturate(v: i32x8) -> Self {
562 pick! {
563 if #[cfg(target_feature="avx2")] {
564 i16x8 { sse: pack_i32_to_i16_m128i( extract_m128i_from_m256i::<0>(v.avx2), extract_m128i_from_m256i::<1>(v.avx2)) }
565 } else if #[cfg(target_feature="sse2")] {
566 i16x8 { sse: pack_i32_to_i16_m128i( v.a.sse, v.b.sse ) }
567 } else if #[cfg(target_feature="simd128")] {
568 use core::arch::wasm32::*;
569
570 i16x8 { simd: i16x8_narrow_i32x4(v.a.simd, v.b.simd) }
571 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
572 use core::arch::aarch64::*;
573
574 unsafe {
575 i16x8 { neon: vcombine_s16(vqmovn_s32(v.a.neon), vqmovn_s32(v.b.neon)) }
576 }
577 } else {
578 fn clamp(a : i32) -> i16 {
579 if a < i16::MIN as i32 {
580 i16::MIN
581 }
582 else if a > i16::MAX as i32 {
583 i16::MAX
584 } else {
585 a as i16
586 }
587 }
588
589 i16x8::new([
590 clamp(v.as_array()[0]),
591 clamp(v.as_array()[1]),
592 clamp(v.as_array()[2]),
593 clamp(v.as_array()[3]),
594 clamp(v.as_array()[4]),
595 clamp(v.as_array()[5]),
596 clamp(v.as_array()[6]),
597 clamp(v.as_array()[7]),
598 ])
599 }
600 }
601 }
602
603 #[inline]
605 #[must_use]
606 pub fn from_i32x8_truncate(v: i32x8) -> Self {
607 pick! {
608 if #[cfg(target_feature="avx2")] {
609 let a = v.avx2.bitand(set_splat_i32_m256i(0xffff));
610 i16x8 { sse: pack_i32_to_u16_m128i( extract_m128i_from_m256i::<0>(a), extract_m128i_from_m256i::<1>(a) ) }
611 } else if #[cfg(target_feature="sse2")] {
612 let a = shr_imm_i32_m128i::<16>(shl_imm_u32_m128i::<16>(v.a.sse));
613 let b = shr_imm_i32_m128i::<16>(shl_imm_u32_m128i::<16>(v.b.sse));
614
615 i16x8 { sse: pack_i32_to_i16_m128i( a, b) }
616 } else {
617 i16x8::new([
618 v.as_array()[0] as i16,
619 v.as_array()[1] as i16,
620 v.as_array()[2] as i16,
621 v.as_array()[3] as i16,
622 v.as_array()[4] as i16,
623 v.as_array()[5] as i16,
624 v.as_array()[6] as i16,
625 v.as_array()[7] as i16,
626 ])
627 }
628 }
629 }
630
631 #[inline]
632 #[must_use]
633 pub fn from_slice_unaligned(input: &[i16]) -> Self {
634 assert!(input.len() >= 8);
635
636 pick! {
637 if #[cfg(target_feature="sse2")] {
638 unsafe { Self { sse: load_unaligned_m128i( &*(input.as_ptr() as * const [u8;16]) ) } }
639 } else if #[cfg(target_feature="simd128")] {
640 unsafe { Self { simd: v128_load(input.as_ptr() as *const v128 ) } }
641 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
642 unsafe { Self { neon: vld1q_s16( input.as_ptr() as *const i16 ) } }
643 } else {
644 unsafe { Self::new( *(input.as_ptr() as * const [i16;8]) ) }
646 }
647 }
648 }
649
650 #[inline]
651 #[must_use]
652 pub fn blend(self, t: Self, f: Self) -> Self {
653 pick! {
654 if #[cfg(target_feature="sse4.1")] {
655 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
656 } else if #[cfg(target_feature="simd128")] {
657 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
658 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
659 unsafe {Self { neon: vbslq_s16(vreinterpretq_u16_s16(self.neon), t.neon, f.neon) }}
660 } else {
661 generic_bit_blend(self, t, f)
662 }
663 }
664 }
665 #[inline]
666 #[must_use]
667 pub fn is_negative(self) -> Self {
668 self.simd_lt(Self::zeroed())
669 }
670
671 #[inline]
673 #[must_use]
674 pub fn reduce_add(self) -> i16 {
675 pick! {
676 if #[cfg(target_feature="sse2")] {
677 let hi64 = shuffle_ai_f32_all_m128i::<0b01_00_11_10>(self.sse);
679 let sum64 = add_i16_m128i(self.sse, hi64);
680 let hi32 = shuffle_ai_f32_all_m128i::<0b11_10_00_01>(sum64);
681 let sum32 = add_i16_m128i(sum64, hi32);
682 let lo16 = shr_imm_u32_m128i::<16>(sum32);
683 let sum16 = add_i16_m128i(sum32, lo16);
684 extract_i16_as_i32_m128i::<0>(sum16) as i16
685 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
686 unsafe { vaddvq_s16(self.neon) }
687 } else {
688 let arr: [i16; 8] = cast(self);
689
690 let mut r = arr[0];
692 r = r.wrapping_add(arr[1]);
693 r = r.wrapping_add(arr[2]);
694 r = r.wrapping_add(arr[3]);
695 r = r.wrapping_add(arr[4]);
696 r = r.wrapping_add(arr[5]);
697 r = r.wrapping_add(arr[6]);
698 r.wrapping_add(arr[7])
699 }
700 }
701 }
702
703 #[inline]
705 #[must_use]
706 pub fn reduce_min(self) -> i16 {
707 pick! {
708 if #[cfg(target_feature="sse2")] {
709 let hi64 = shuffle_ai_f32_all_m128i::<0b01_00_11_10>(self.sse);
710 let sum64 = min_i16_m128i(self.sse, hi64);
711 let hi32 = shuffle_ai_f32_all_m128i::<0b11_10_00_01>(sum64);
712 let sum32 = min_i16_m128i(sum64, hi32);
713 let lo16 = shr_imm_u32_m128i::<16>(sum32);
714 let sum16 = min_i16_m128i(sum32, lo16);
715 extract_i16_as_i32_m128i::<0>(sum16) as i16
716 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
717 unsafe { vminvq_s16(self.neon) }
718 } else {
719 let arr: [i16; 8] = cast(self);
720
721 let mut r = arr[0];
723 r = r.min(arr[1]);
724 r = r.min(arr[2]);
725 r = r.min(arr[3]);
726 r = r.min(arr[4]);
727 r = r.min(arr[5]);
728 r = r.min(arr[6]);
729 r.min(arr[7])
730 }
731 }
732 }
733
734 #[inline]
736 #[must_use]
737 pub fn reduce_max(self) -> i16 {
738 pick! {
739 if #[cfg(target_feature="sse2")] {
740 let hi64 = shuffle_ai_f32_all_m128i::<0b01_00_11_10>(self.sse);
741 let sum64 = max_i16_m128i(self.sse, hi64);
742 let hi32 = shuffle_ai_f32_all_m128i::<0b11_10_00_01>(sum64);
743 let sum32 = max_i16_m128i(sum64, hi32);
744 let lo16 = shr_imm_u32_m128i::<16>(sum32);
745 let sum16 = max_i16_m128i(sum32, lo16);
746 extract_i16_as_i32_m128i::<0>(sum16) as i16
747 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
748 unsafe { vmaxvq_s16(self.neon) }
749 } else {
750 let arr: [i16; 8] = cast(self);
751
752 let mut r = arr[0];
754 r = r.max(arr[1]);
755 r = r.max(arr[2]);
756 r = r.max(arr[3]);
757 r = r.max(arr[4]);
758 r = r.max(arr[5]);
759 r = r.max(arr[6]);
760 r.max(arr[7])
761 }
762 }
763 }
764
765 #[inline]
766 #[must_use]
767 pub fn abs(self) -> Self {
768 pick! {
769 if #[cfg(target_feature="sse2")] {
770 let mask = shr_imm_i16_m128i::<15>(self.sse);
771 Self { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
772 } else if #[cfg(target_feature="ssse3")] {
773 Self { sse: abs_i16_m128i(self.sse) }
774 } else if #[cfg(target_feature="simd128")] {
775 Self { simd: i16x8_abs(self.simd) }
776 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
777 unsafe {Self { neon: vabsq_s16(self.neon) }}
778 } else {
779 let arr: [i16; 8] = cast(self);
780 cast(
781 [
782 arr[0].wrapping_abs(),
783 arr[1].wrapping_abs(),
784 arr[2].wrapping_abs(),
785 arr[3].wrapping_abs(),
786 arr[4].wrapping_abs(),
787 arr[5].wrapping_abs(),
788 arr[6].wrapping_abs(),
789 arr[7].wrapping_abs(),
790 ])
791 }
792 }
793 }
794
795 #[inline]
796 #[must_use]
797 pub fn unsigned_abs(self) -> u16x8 {
798 pick! {
799 if #[cfg(target_feature="sse2")] {
800 let mask = shr_imm_i16_m128i::<15>(self.sse);
801 u16x8 { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
802 } else if #[cfg(target_feature="ssse3")] {
803 u16x8 { sse: abs_i16_m128i(self.sse) }
804 } else if #[cfg(target_feature="simd128")] {
805 u16x8 { simd: i16x8_abs(self.simd) }
806 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
807 unsafe {u16x8 { neon: vreinterpretq_u16_s16(vabsq_s16(self.neon)) }}
808 } else {
809 let arr: [i16; 8] = cast(self);
810 cast(
811 [
812 arr[0].unsigned_abs(),
813 arr[1].unsigned_abs(),
814 arr[2].unsigned_abs(),
815 arr[3].unsigned_abs(),
816 arr[4].unsigned_abs(),
817 arr[5].unsigned_abs(),
818 arr[6].unsigned_abs(),
819 arr[7].unsigned_abs(),
820 ])
821 }
822 }
823 }
824
825 #[inline]
826 #[must_use]
827 pub fn max(self, rhs: Self) -> Self {
828 pick! {
829 if #[cfg(target_feature="sse2")] {
830 Self { sse: max_i16_m128i(self.sse, rhs.sse) }
831 } else if #[cfg(target_feature="simd128")] {
832 Self { simd: i16x8_max(self.simd, rhs.simd) }
833 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
834 unsafe {Self { neon: vmaxq_s16(self.neon, rhs.neon) }}
835 } else {
836 self.simd_lt(rhs).blend(rhs, self)
837 }
838 }
839 }
840 #[inline]
841 #[must_use]
842 pub fn min(self, rhs: Self) -> Self {
843 pick! {
844 if #[cfg(target_feature="sse2")] {
845 Self { sse: min_i16_m128i(self.sse, rhs.sse) }
846 } else if #[cfg(target_feature="simd128")] {
847 Self { simd: i16x8_min(self.simd, rhs.simd) }
848 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
849 unsafe {Self { neon: vminq_s16(self.neon, rhs.neon) }}
850 } else {
851 self.simd_lt(rhs).blend(self, rhs)
852 }
853 }
854 }
855
856 #[inline]
857 #[must_use]
858 pub fn saturating_add(self, rhs: Self) -> Self {
859 pick! {
860 if #[cfg(target_feature="sse2")] {
861 Self { sse: add_saturating_i16_m128i(self.sse, rhs.sse) }
862 } else if #[cfg(target_feature="simd128")] {
863 Self { simd: i16x8_add_sat(self.simd, rhs.simd) }
864 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
865 unsafe {Self { neon: vqaddq_s16(self.neon, rhs.neon) }}
866 } else {
867 Self { arr: [
868 self.arr[0].saturating_add(rhs.arr[0]),
869 self.arr[1].saturating_add(rhs.arr[1]),
870 self.arr[2].saturating_add(rhs.arr[2]),
871 self.arr[3].saturating_add(rhs.arr[3]),
872 self.arr[4].saturating_add(rhs.arr[4]),
873 self.arr[5].saturating_add(rhs.arr[5]),
874 self.arr[6].saturating_add(rhs.arr[6]),
875 self.arr[7].saturating_add(rhs.arr[7]),
876 ]}
877 }
878 }
879 }
880 #[inline]
881 #[must_use]
882 pub fn saturating_sub(self, rhs: Self) -> Self {
883 pick! {
884 if #[cfg(target_feature="sse2")] {
885 Self { sse: sub_saturating_i16_m128i(self.sse, rhs.sse) }
886 } else if #[cfg(target_feature="simd128")] {
887 Self { simd: i16x8_sub_sat(self.simd, rhs.simd) }
888 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
889 unsafe { Self { neon: vqsubq_s16(self.neon, rhs.neon) } }
890 } else {
891 Self { arr: [
892 self.arr[0].saturating_sub(rhs.arr[0]),
893 self.arr[1].saturating_sub(rhs.arr[1]),
894 self.arr[2].saturating_sub(rhs.arr[2]),
895 self.arr[3].saturating_sub(rhs.arr[3]),
896 self.arr[4].saturating_sub(rhs.arr[4]),
897 self.arr[5].saturating_sub(rhs.arr[5]),
898 self.arr[6].saturating_sub(rhs.arr[6]),
899 self.arr[7].saturating_sub(rhs.arr[7]),
900 ]}
901 }
902 }
903 }
904
905 #[inline]
910 #[must_use]
911 pub fn dot(self, rhs: Self) -> i32x4 {
912 pick! {
913 if #[cfg(target_feature="sse2")] {
914 i32x4 { sse: mul_i16_horizontal_add_m128i(self.sse, rhs.sse) }
915 } else if #[cfg(target_feature="simd128")] {
916 i32x4 { simd: i32x4_dot_i16x8(self.simd, rhs.simd) }
917 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
918 unsafe {
919 let pl = vmull_s16(vget_low_s16(self.neon), vget_low_s16(rhs.neon));
920 let ph = vmull_high_s16(self.neon, rhs.neon);
921 i32x4 { neon: vpaddq_s32(pl, ph) }
922 }
923 } else {
924 i32x4 { arr: [
925 (i32::from(self.arr[0]) * i32::from(rhs.arr[0])) + (i32::from(self.arr[1]) * i32::from(rhs.arr[1])),
926 (i32::from(self.arr[2]) * i32::from(rhs.arr[2])) + (i32::from(self.arr[3]) * i32::from(rhs.arr[3])),
927 (i32::from(self.arr[4]) * i32::from(rhs.arr[4])) + (i32::from(self.arr[5]) * i32::from(rhs.arr[5])),
928 (i32::from(self.arr[6]) * i32::from(rhs.arr[6])) + (i32::from(self.arr[7]) * i32::from(rhs.arr[7])),
929 ] }
930 }
931 }
932 }
933
934 #[inline]
942 #[must_use]
943 pub fn mul_scale_round(self, rhs: Self) -> Self {
944 pick! {
945 if #[cfg(target_feature="ssse3")] {
946 Self { sse: mul_i16_scale_round_m128i(self.sse, rhs.sse) }
947 } else if #[cfg(target_feature="sse2")] {
948 let hi = mul_i16_keep_high_m128i(self.sse, rhs.sse);
950 let lo = mul_i16_keep_low_m128i(self.sse, rhs.sse);
951 let mut v1 = unpack_low_i16_m128i(lo, hi);
952 let mut v2 = unpack_high_i16_m128i(lo, hi);
953 let a = set_splat_i32_m128i(0x4000);
954 v1 = shr_imm_i32_m128i::<15>(add_i32_m128i(v1, a));
955 v2 = shr_imm_i32_m128i::<15>(add_i32_m128i(v2, a));
956 let s = pack_i32_to_i16_m128i(v1, v2);
957 Self { sse: s }
958 } else if #[cfg(target_feature="simd128")] {
959 Self { simd: i16x8_q15mulr_sat(self.simd, rhs.simd) }
960 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
961 unsafe { Self { neon: vqrdmulhq_s16(self.neon, rhs.neon) } }
962 } else {
963 Self { arr: [
965 ((i32::from(self.arr[0]) * i32::from(rhs.arr[0]) + 0x4000) >> 15) as i16,
966 ((i32::from(self.arr[1]) * i32::from(rhs.arr[1]) + 0x4000) >> 15) as i16,
967 ((i32::from(self.arr[2]) * i32::from(rhs.arr[2]) + 0x4000) >> 15) as i16,
968 ((i32::from(self.arr[3]) * i32::from(rhs.arr[3]) + 0x4000) >> 15) as i16,
969 ((i32::from(self.arr[4]) * i32::from(rhs.arr[4]) + 0x4000) >> 15) as i16,
970 ((i32::from(self.arr[5]) * i32::from(rhs.arr[5]) + 0x4000) >> 15) as i16,
971 ((i32::from(self.arr[6]) * i32::from(rhs.arr[6]) + 0x4000) >> 15) as i16,
972 ((i32::from(self.arr[7]) * i32::from(rhs.arr[7]) + 0x4000) >> 15) as i16,
973 ]}
974 }
975 }
976 }
977
978 #[inline]
980 #[must_use]
981 pub fn mul_keep_high(lhs: Self, rhs: Self) -> Self {
982 pick! {
983 if #[cfg(target_feature="sse2")] {
984 Self { sse: mul_i16_keep_high_m128i(lhs.sse, rhs.sse) }
985 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
986 let lhs_low = unsafe { vget_low_s16(lhs.neon) };
987 let rhs_low = unsafe { vget_low_s16(rhs.neon) };
988
989 let lhs_high = unsafe { vget_high_s16(lhs.neon) };
990 let rhs_high = unsafe { vget_high_s16(rhs.neon) };
991
992 let low = unsafe { vmull_s16(lhs_low, rhs_low) };
993 let high = unsafe { vmull_s16(lhs_high, rhs_high) };
994
995 i16x8 { neon: unsafe { vreinterpretq_s16_u16(vuzpq_u16(vreinterpretq_u16_s32(low), vreinterpretq_u16_s32(high)).1) } }
996 } else if #[cfg(target_feature="simd128")] {
997 let low = i32x4_extmul_low_i16x8(lhs.simd, rhs.simd);
998 let high = i32x4_extmul_high_i16x8(lhs.simd, rhs.simd);
999
1000 Self { simd: i16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(low, high) }
1001 } else {
1002 i16x8::new([
1003 ((i32::from(rhs.as_array()[0]) * i32::from(lhs.as_array()[0])) >> 16) as i16,
1004 ((i32::from(rhs.as_array()[1]) * i32::from(lhs.as_array()[1])) >> 16) as i16,
1005 ((i32::from(rhs.as_array()[2]) * i32::from(lhs.as_array()[2])) >> 16) as i16,
1006 ((i32::from(rhs.as_array()[3]) * i32::from(lhs.as_array()[3])) >> 16) as i16,
1007 ((i32::from(rhs.as_array()[4]) * i32::from(lhs.as_array()[4])) >> 16) as i16,
1008 ((i32::from(rhs.as_array()[5]) * i32::from(lhs.as_array()[5])) >> 16) as i16,
1009 ((i32::from(rhs.as_array()[6]) * i32::from(lhs.as_array()[6])) >> 16) as i16,
1010 ((i32::from(rhs.as_array()[7]) * i32::from(lhs.as_array()[7])) >> 16) as i16,
1011 ])
1012 }
1013 }
1014 }
1015
1016 #[inline]
1018 #[must_use]
1019 pub fn mul_widen(self, rhs: Self) -> i32x8 {
1020 pick! {
1021 if #[cfg(target_feature="avx2")] {
1022 let a = convert_to_i32_m256i_from_i16_m128i(self.sse);
1023 let b = convert_to_i32_m256i_from_i16_m128i(rhs.sse);
1024 i32x8 { avx2: mul_i32_keep_low_m256i(a,b) }
1025 } else if #[cfg(target_feature="sse2")] {
1026 let low = mul_i16_keep_low_m128i(self.sse, rhs.sse);
1027 let high = mul_i16_keep_high_m128i(self.sse, rhs.sse);
1028 i32x8 {
1029 a: i32x4 { sse:unpack_low_i16_m128i(low, high) },
1030 b: i32x4 { sse:unpack_high_i16_m128i(low, high) }
1031 }
1032 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1033 let lhs_low = unsafe { vget_low_s16(self.neon) };
1034 let rhs_low = unsafe { vget_low_s16(rhs.neon) };
1035
1036 let lhs_high = unsafe { vget_high_s16(self.neon) };
1037 let rhs_high = unsafe { vget_high_s16(rhs.neon) };
1038
1039 let low = unsafe { vmull_s16(lhs_low, rhs_low) };
1040 let high = unsafe { vmull_s16(lhs_high, rhs_high) };
1041
1042 i32x8 { a: i32x4 { neon: low }, b: i32x4 {neon: high } }
1043 } else {
1044 let a = self.as_array();
1045 let b = rhs.as_array();
1046 i32x8::new([
1047 i32::from(a[0]) * i32::from(b[0]),
1048 i32::from(a[1]) * i32::from(b[1]),
1049 i32::from(a[2]) * i32::from(b[2]),
1050 i32::from(a[3]) * i32::from(b[3]),
1051 i32::from(a[4]) * i32::from(b[4]),
1052 i32::from(a[5]) * i32::from(b[5]),
1053 i32::from(a[6]) * i32::from(b[6]),
1054 i32::from(a[7]) * i32::from(b[7]),
1055 ])
1056 }
1057 }
1058 }
1059
1060 #[must_use]
1062 #[inline]
1063 pub fn transpose(data: [i16x8; 8]) -> [i16x8; 8] {
1064 pick! {
1065 if #[cfg(target_feature="sse2")] {
1066 let a1 = unpack_low_i16_m128i(data[0].sse, data[1].sse);
1067 let a2 = unpack_high_i16_m128i(data[0].sse, data[1].sse);
1068 let a3 = unpack_low_i16_m128i(data[2].sse, data[3].sse);
1069 let a4 = unpack_high_i16_m128i(data[2].sse, data[3].sse);
1070 let a5 = unpack_low_i16_m128i(data[4].sse, data[5].sse);
1071 let a6 = unpack_high_i16_m128i(data[4].sse, data[5].sse);
1072 let a7 = unpack_low_i16_m128i(data[6].sse, data[7].sse);
1073 let a8 = unpack_high_i16_m128i(data[6].sse, data[7].sse);
1074
1075 let b1 = unpack_low_i32_m128i(a1, a3);
1076 let b2 = unpack_high_i32_m128i(a1, a3);
1077 let b3 = unpack_low_i32_m128i(a2, a4);
1078 let b4 = unpack_high_i32_m128i(a2, a4);
1079 let b5 = unpack_low_i32_m128i(a5, a7);
1080 let b6 = unpack_high_i32_m128i(a5, a7);
1081 let b7 = unpack_low_i32_m128i(a6, a8);
1082 let b8 = unpack_high_i32_m128i(a6, a8);
1083
1084 [
1085 i16x8 { sse: unpack_low_i64_m128i(b1, b5) },
1086 i16x8 { sse: unpack_high_i64_m128i(b1, b5) },
1087 i16x8 { sse: unpack_low_i64_m128i(b2, b6) },
1088 i16x8 { sse: unpack_high_i64_m128i(b2, b6) },
1089 i16x8 { sse: unpack_low_i64_m128i(b3, b7) },
1090 i16x8 { sse: unpack_high_i64_m128i(b3, b7) },
1091 i16x8 { sse: unpack_low_i64_m128i(b4, b8) },
1092 i16x8 { sse: unpack_high_i64_m128i(b4, b8) } ,
1093 ]
1094 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1095
1096 #[inline] fn vtrq32(a : int16x8_t, b : int16x8_t) -> (int16x8_t, int16x8_t)
1097 {
1098 unsafe {
1099 let r = vtrnq_s32(vreinterpretq_s32_s16(a),vreinterpretq_s32_s16(b));
1100 (vreinterpretq_s16_s32(r.0), vreinterpretq_s16_s32(r.1))
1101 }
1102 }
1103
1104 unsafe {
1105 let (q0,q2) = vtrq32(data[0].neon, data[2].neon);
1106 let (q1,q3) = vtrq32(data[1].neon, data[3].neon);
1107 let (q4,q6) = vtrq32(data[4].neon, data[6].neon);
1108 let (q5,q7) = vtrq32(data[5].neon, data[7].neon);
1109
1110 let b1 = vtrnq_s16(q0, q1);
1111 let b2 = vtrnq_s16(q2, q3);
1112 let b3 = vtrnq_s16(q4, q5);
1113 let b4 = vtrnq_s16(q6, q7);
1114
1115 [
1119 i16x8 { neon: vcombine_s16(vget_low_s16(b1.0), vget_low_s16(b3.0)) },
1120 i16x8 { neon: vcombine_s16(vget_low_s16(b1.1), vget_low_s16(b3.1)) },
1121 i16x8 { neon: vcombine_s16(vget_low_s16(b2.0), vget_low_s16(b4.0)) },
1122 i16x8 { neon: vcombine_s16(vget_low_s16(b2.1), vget_low_s16(b4.1)) },
1123 i16x8 { neon: vcombine_s16(vget_high_s16(b1.0), vget_high_s16(b3.0)) },
1124 i16x8 { neon: vcombine_s16(vget_high_s16(b1.1), vget_high_s16(b3.1)) },
1125 i16x8 { neon: vcombine_s16(vget_high_s16(b2.0), vget_high_s16(b4.0)) },
1126 i16x8 { neon: vcombine_s16(vget_high_s16(b2.1), vget_high_s16(b4.1)) },
1127 ]
1128 }
1129 } else if #[cfg(target_feature="simd128")] {
1130 #[inline] fn lo_i16(a : v128, b : v128) -> v128 { i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(a,b) }
1131 #[inline] fn hi_i16(a : v128, b : v128) -> v128 { i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(a,b) }
1132 #[inline] fn lo_i32(a : v128, b : v128) -> v128 { i32x4_shuffle::<0, 4, 1, 5>(a,b) }
1133 #[inline] fn hi_i32(a : v128, b : v128) -> v128 { i32x4_shuffle::<2, 6, 3, 7>(a,b) }
1134 #[inline] fn lo_i64(a : v128, b : v128) -> v128 { i64x2_shuffle::<0, 2>(a,b) }
1135 #[inline] fn hi_i64(a : v128, b : v128) -> v128 { i64x2_shuffle::<1, 3>(a,b) }
1136
1137 let a1 = lo_i16(data[0].simd, data[1].simd);
1138 let a2 = hi_i16(data[0].simd, data[1].simd);
1139 let a3 = lo_i16(data[2].simd, data[3].simd);
1140 let a4 = hi_i16(data[2].simd, data[3].simd);
1141 let a5 = lo_i16(data[4].simd, data[5].simd);
1142 let a6 = hi_i16(data[4].simd, data[5].simd);
1143 let a7 = lo_i16(data[6].simd, data[7].simd);
1144 let a8 = hi_i16(data[6].simd, data[7].simd);
1145
1146 let b1 = lo_i32(a1, a3);
1147 let b2 = hi_i32(a1, a3);
1148 let b3 = lo_i32(a2, a4);
1149 let b4 = hi_i32(a2, a4);
1150 let b5 = lo_i32(a5, a7);
1151 let b6 = hi_i32(a5, a7);
1152 let b7 = lo_i32(a6, a8);
1153 let b8 = hi_i32(a6, a8);
1154
1155 [
1156 i16x8 { simd: lo_i64(b1, b5) },
1157 i16x8 { simd: hi_i64(b1, b5) },
1158 i16x8 { simd: lo_i64(b2, b6) },
1159 i16x8 { simd: hi_i64(b2, b6) },
1160 i16x8 { simd: lo_i64(b3, b7) },
1161 i16x8 { simd: hi_i64(b3, b7) },
1162 i16x8 { simd: lo_i64(b4, b8) },
1163 i16x8 { simd: hi_i64(b4, b8) } ,
1164 ]
1165
1166 } else {
1167 #[inline(always)]
1168 fn transpose_column(data: &[i16x8; 8], index: usize) -> i16x8 {
1169 i16x8::new([
1170 data[0].as_array()[index],
1171 data[1].as_array()[index],
1172 data[2].as_array()[index],
1173 data[3].as_array()[index],
1174 data[4].as_array()[index],
1175 data[5].as_array()[index],
1176 data[6].as_array()[index],
1177 data[7].as_array()[index],
1178 ])
1179 }
1180
1181 [
1182 transpose_column(&data, 0),
1183 transpose_column(&data, 1),
1184 transpose_column(&data, 2),
1185 transpose_column(&data, 3),
1186 transpose_column(&data, 4),
1187 transpose_column(&data, 5),
1188 transpose_column(&data, 6),
1189 transpose_column(&data, 7),
1190 ]
1191 }
1192 }
1193 }
1194
1195 #[inline]
1196 #[must_use]
1197 pub fn mul_scale_round_n(self, rhs: i16) -> Self {
1205 pick! {
1206 if #[cfg(target_feature="ssse3")] {
1207 Self { sse: mul_i16_scale_round_m128i(self.sse, set_splat_i16_m128i(rhs)) }
1208 } else if #[cfg(target_feature="sse2")] {
1209 let r = set_splat_i16_m128i(rhs);
1211 let hi = mul_i16_keep_high_m128i(self.sse, r);
1212 let lo = mul_i16_keep_low_m128i(self.sse, r);
1213 let mut v1 = unpack_low_i16_m128i(lo, hi);
1214 let mut v2 = unpack_high_i16_m128i(lo, hi);
1215 let a = set_splat_i32_m128i(0x4000);
1216 v1 = shr_imm_i32_m128i::<15>(add_i32_m128i(v1, a));
1217 v2 = shr_imm_i32_m128i::<15>(add_i32_m128i(v2, a));
1218 let s = pack_i32_to_i16_m128i(v1, v2);
1219 Self { sse: s }
1220 } else if #[cfg(target_feature="simd128")] {
1221 Self { simd: i16x8_q15mulr_sat(self.simd, i16x8_splat(rhs)) }
1222 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1223 unsafe { Self { neon: vqrdmulhq_n_s16(self.neon, rhs) } }
1224 } else {
1225 Self { arr: [
1227 ((i32::from(self.arr[0]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1228 ((i32::from(self.arr[1]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1229 ((i32::from(self.arr[2]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1230 ((i32::from(self.arr[3]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1231 ((i32::from(self.arr[4]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1232 ((i32::from(self.arr[5]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1233 ((i32::from(self.arr[6]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1234 ((i32::from(self.arr[7]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1235 ]}
1236 }
1237 }
1238 }
1239
1240 #[inline]
1241 pub fn to_array(self) -> [i16; 8] {
1242 cast(self)
1243 }
1244
1245 #[inline]
1246 pub fn as_array(&self) -> &[i16; 8] {
1247 cast_ref(self)
1248 }
1249
1250 #[inline]
1251 pub fn as_mut_array(&mut self) -> &mut [i16; 8] {
1252 cast_mut(self)
1253 }
1254}