wide/
f64x2_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq)]
6    #[repr(C, align(16))]
7    pub struct f64x2 { pub(crate) sse: m128d }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct f64x2 { pub(crate) simd: v128 }
14
15    impl Default for f64x2 {
16      fn default() -> Self {
17        Self::splat(0.0)
18      }
19    }
20
21    impl PartialEq for f64x2 {
22      fn eq(&self, other: &Self) -> bool {
23        u64x2_all_true(f64x2_eq(self.simd, other.simd))
24      }
25    }
26  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
27    use core::arch::aarch64::*;
28    #[repr(C)]
29    #[derive(Copy, Clone)]
30    pub struct f64x2 { pub(crate) neon: float64x2_t }
31
32    impl Default for f64x2 {
33      #[inline]
34      fn default() -> Self {
35        unsafe { Self { neon: vdupq_n_f64(0.0)} }
36      }
37    }
38
39    impl PartialEq for f64x2 {
40      #[inline]
41      fn eq(&self, other: &Self) -> bool {
42        unsafe
43        { let e = vceqq_f64(self.neon, other.neon);
44          vgetq_lane_u64(e,0) == u64::MAX && vgetq_lane_u64(e,1) == u64::MAX
45        }
46      }
47
48    }
49  } else {
50    #[derive(Default, Clone, Copy, PartialEq)]
51    #[repr(C, align(16))]
52    pub struct f64x2 { pub(crate) arr: [f64;2] }
53  }
54}
55
56macro_rules! const_f64_as_f64x2 {
57  ($i:ident, $f:expr) => {
58    #[allow(non_upper_case_globals)]
59    pub const $i: f64x2 = f64x2::new([$f; 2]);
60  };
61}
62
63impl f64x2 {
64  const_f64_as_f64x2!(ONE, 1.0);
65  const_f64_as_f64x2!(ZERO, 0.0);
66  const_f64_as_f64x2!(HALF, 0.5);
67  const_f64_as_f64x2!(E, core::f64::consts::E);
68  const_f64_as_f64x2!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
69  const_f64_as_f64x2!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
70  const_f64_as_f64x2!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
71  const_f64_as_f64x2!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
72  const_f64_as_f64x2!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
73  const_f64_as_f64x2!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
74  const_f64_as_f64x2!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
75  const_f64_as_f64x2!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
76  const_f64_as_f64x2!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
77  const_f64_as_f64x2!(LN_2, core::f64::consts::LN_2);
78  const_f64_as_f64x2!(LN_10, core::f64::consts::LN_10);
79  const_f64_as_f64x2!(LOG2_E, core::f64::consts::LOG2_E);
80  const_f64_as_f64x2!(LOG10_E, core::f64::consts::LOG10_E);
81  const_f64_as_f64x2!(LOG10_2, core::f64::consts::LOG10_2);
82  const_f64_as_f64x2!(LOG2_10, core::f64::consts::LOG2_10);
83  const_f64_as_f64x2!(PI, core::f64::consts::PI);
84  const_f64_as_f64x2!(SQRT_2, core::f64::consts::SQRT_2);
85  const_f64_as_f64x2!(TAU, core::f64::consts::TAU);
86}
87
88unsafe impl Zeroable for f64x2 {}
89unsafe impl Pod for f64x2 {}
90
91impl AlignTo for f64x2 {
92  type Elem = f64;
93}
94
95impl Add for f64x2 {
96  type Output = Self;
97  #[inline]
98  fn add(self, rhs: Self) -> Self::Output {
99    pick! {
100      if #[cfg(target_feature="sse2")] {
101        Self { sse: add_m128d(self.sse, rhs.sse) }
102      } else if #[cfg(target_feature="simd128")] {
103        Self { simd: f64x2_add(self.simd, rhs.simd) }
104      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
105        unsafe { Self { neon: vaddq_f64(self.neon, rhs.neon) } }
106      } else {
107        Self { arr: [
108          self.arr[0] + rhs.arr[0],
109          self.arr[1] + rhs.arr[1],
110        ]}
111      }
112    }
113  }
114}
115
116impl Sub for f64x2 {
117  type Output = Self;
118  #[inline]
119  fn sub(self, rhs: Self) -> Self::Output {
120    pick! {
121      if #[cfg(target_feature="sse2")] {
122        Self { sse: sub_m128d(self.sse, rhs.sse) }
123      } else if #[cfg(target_feature="simd128")] {
124        Self { simd: f64x2_sub(self.simd, rhs.simd) }
125      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
126        unsafe { Self { neon: vsubq_f64(self.neon, rhs.neon) } }
127      } else {
128        Self { arr: [
129          self.arr[0] - rhs.arr[0],
130          self.arr[1] - rhs.arr[1],
131        ]}
132      }
133    }
134  }
135}
136
137impl Mul for f64x2 {
138  type Output = Self;
139  #[inline]
140  fn mul(self, rhs: Self) -> Self::Output {
141    pick! {
142      if #[cfg(target_feature="sse2")] {
143        Self { sse: mul_m128d(self.sse, rhs.sse) }
144      } else if #[cfg(target_feature="simd128")] {
145        Self { simd: f64x2_mul(self.simd, rhs.simd) }
146      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
147        unsafe {Self { neon: vmulq_f64(self.neon, rhs.neon) }}
148      } else {
149        Self { arr: [
150          self.arr[0] * rhs.arr[0],
151          self.arr[1] * rhs.arr[1],
152        ]}
153      }
154    }
155  }
156}
157
158impl Div for f64x2 {
159  type Output = Self;
160  #[inline]
161  fn div(self, rhs: Self) -> Self::Output {
162    pick! {
163      if #[cfg(target_feature="sse2")] {
164        Self { sse: div_m128d(self.sse, rhs.sse) }
165      } else if #[cfg(target_feature="simd128")] {
166        Self { simd: f64x2_div(self.simd, rhs.simd) }
167      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
168        unsafe {Self { neon: vdivq_f64(self.neon, rhs.neon) }}
169      } else {
170        Self { arr: [
171          self.arr[0] / rhs.arr[0],
172          self.arr[1] / rhs.arr[1],
173        ]}
174      }
175    }
176  }
177}
178
179impl Neg for f64x2 {
180  type Output = Self;
181  #[inline]
182  fn neg(self) -> Self::Output {
183    pick! {
184      if #[cfg(target_feature="sse")] {
185        Self { sse: bitxor_m128d(self.sse, Self::splat(-0.0).sse) }
186      } else if #[cfg(target_feature="simd128")] {
187        Self { simd: f64x2_neg(self.simd) }
188      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
189        unsafe {Self { neon: vnegq_f64(self.neon) }}
190      } else {
191        Self { arr: [
192          -self.arr[0],
193          -self.arr[1],
194        ]}
195      }
196    }
197  }
198}
199
200impl Add<f64> for f64x2 {
201  type Output = Self;
202  #[inline]
203  fn add(self, rhs: f64) -> Self::Output {
204    self.add(Self::splat(rhs))
205  }
206}
207
208impl Sub<f64> for f64x2 {
209  type Output = Self;
210  #[inline]
211  fn sub(self, rhs: f64) -> Self::Output {
212    self.sub(Self::splat(rhs))
213  }
214}
215
216impl Mul<f64> for f64x2 {
217  type Output = Self;
218  #[inline]
219  fn mul(self, rhs: f64) -> Self::Output {
220    self.mul(Self::splat(rhs))
221  }
222}
223
224impl Div<f64> for f64x2 {
225  type Output = Self;
226  #[inline]
227  fn div(self, rhs: f64) -> Self::Output {
228    self.div(Self::splat(rhs))
229  }
230}
231
232impl Add<f64x2> for f64 {
233  type Output = f64x2;
234  #[inline]
235  fn add(self, rhs: f64x2) -> Self::Output {
236    f64x2::splat(self).add(rhs)
237  }
238}
239
240impl Sub<f64x2> for f64 {
241  type Output = f64x2;
242  #[inline]
243  fn sub(self, rhs: f64x2) -> Self::Output {
244    f64x2::splat(self).sub(rhs)
245  }
246}
247
248impl Mul<f64x2> for f64 {
249  type Output = f64x2;
250  #[inline]
251  fn mul(self, rhs: f64x2) -> Self::Output {
252    f64x2::splat(self).mul(rhs)
253  }
254}
255
256impl Div<f64x2> for f64 {
257  type Output = f64x2;
258  #[inline]
259  fn div(self, rhs: f64x2) -> Self::Output {
260    f64x2::splat(self).div(rhs)
261  }
262}
263
264impl BitAnd for f64x2 {
265  type Output = Self;
266  #[inline]
267  fn bitand(self, rhs: Self) -> Self::Output {
268    pick! {
269      if #[cfg(target_feature="sse2")] {
270        Self { sse: bitand_m128d(self.sse, rhs.sse) }
271      } else if #[cfg(target_feature="simd128")] {
272        Self { simd: v128_and(self.simd, rhs.simd) }
273      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
274        unsafe {Self { neon: vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
275      } else {
276        Self { arr: [
277          f64::from_bits(self.arr[0].to_bits() & rhs.arr[0].to_bits()),
278          f64::from_bits(self.arr[1].to_bits() & rhs.arr[1].to_bits()),
279        ]}
280      }
281    }
282  }
283}
284
285impl BitOr for f64x2 {
286  type Output = Self;
287  #[inline]
288  fn bitor(self, rhs: Self) -> Self::Output {
289    pick! {
290      if #[cfg(target_feature="sse2")] {
291        Self { sse: bitor_m128d(self.sse, rhs.sse) }
292      } else if #[cfg(target_feature="simd128")] {
293        Self { simd: v128_or(self.simd, rhs.simd) }
294      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
295        unsafe {Self { neon: vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
296      } else {
297        Self { arr: [
298          f64::from_bits(self.arr[0].to_bits() | rhs.arr[0].to_bits()),
299          f64::from_bits(self.arr[1].to_bits() | rhs.arr[1].to_bits()),
300        ]}
301      }
302    }
303  }
304}
305
306impl BitXor for f64x2 {
307  type Output = Self;
308  #[inline]
309  fn bitxor(self, rhs: Self) -> Self::Output {
310    pick! {
311      if #[cfg(target_feature="sse2")] {
312        Self { sse: bitxor_m128d(self.sse, rhs.sse) }
313      } else if #[cfg(target_feature="simd128")] {
314        Self { simd: v128_xor(self.simd, rhs.simd) }
315      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
316        unsafe {Self { neon: vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
317      } else {
318        Self { arr: [
319          f64::from_bits(self.arr[0].to_bits() ^ rhs.arr[0].to_bits()),
320          f64::from_bits(self.arr[1].to_bits() ^ rhs.arr[1].to_bits()),
321        ]}
322      }
323    }
324  }
325}
326
327impl CmpEq for f64x2 {
328  type Output = Self;
329  #[inline]
330  fn simd_eq(self, rhs: Self) -> Self::Output {
331    pick! {
332      if #[cfg(target_feature="sse2")] {
333        Self { sse: cmp_eq_mask_m128d(self.sse, rhs.sse) }
334      } else if #[cfg(target_feature="simd128")] {
335        Self { simd: f64x2_eq(self.simd, rhs.simd) }
336      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
337        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }}
338      } else {
339        Self { arr: [
340          if self.arr[0] == rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
341          if self.arr[1] == rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
342        ]}
343      }
344    }
345  }
346}
347
348impl CmpGe for f64x2 {
349  type Output = Self;
350  #[inline]
351  fn simd_ge(self, rhs: Self) -> Self::Output {
352    pick! {
353      if #[cfg(target_feature="sse2")] {
354        Self { sse: cmp_ge_mask_m128d(self.sse, rhs.sse) }
355      } else if #[cfg(target_feature="simd128")] {
356        Self { simd: f64x2_ge(self.simd, rhs.simd) }
357      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
358        unsafe {Self { neon: vreinterpretq_f64_u64(vcgeq_f64(self.neon, rhs.neon)) }}
359      } else {
360        Self { arr: [
361          if self.arr[0] >= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
362          if self.arr[1] >= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
363        ]}
364      }
365    }
366  }
367}
368
369impl CmpGt for f64x2 {
370  type Output = Self;
371  #[inline]
372  fn simd_gt(self, rhs: Self) -> Self::Output {
373    pick! {
374      if #[cfg(target_feature="avx")] {
375        Self { sse: cmp_op_mask_m128d::<{cmp_op!(GreaterThanOrdered)}>(self.sse, rhs.sse) }
376      } else if #[cfg(target_feature="sse2")] {
377        Self { sse: cmp_gt_mask_m128d(self.sse, rhs.sse) }
378      } else if #[cfg(target_feature="simd128")] {
379        Self { simd: f64x2_gt(self.simd, rhs.simd) }
380      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
381        unsafe {Self { neon: vreinterpretq_f64_u64(vcgtq_f64(self.neon, rhs.neon)) }}
382      } else {
383        Self { arr: [
384          if self.arr[0] > rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
385          if self.arr[1] > rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
386        ]}
387      }
388    }
389  }
390}
391
392impl CmpNe for f64x2 {
393  type Output = Self;
394  #[inline]
395  fn simd_ne(self, rhs: Self) -> Self::Output {
396    pick! {
397      if #[cfg(target_feature="sse2")] {
398        Self { sse: cmp_neq_mask_m128d(self.sse, rhs.sse) }
399      } else if #[cfg(target_feature="simd128")] {
400        Self { simd: f64x2_ne(self.simd, rhs.simd) }
401      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
402        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }.not() }
403      } else {
404        Self { arr: [
405          if self.arr[0] != rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
406          if self.arr[1] != rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
407        ]}
408      }
409    }
410  }
411}
412
413impl CmpLe for f64x2 {
414  type Output = Self;
415  #[inline]
416  fn simd_le(self, rhs: Self) -> Self::Output {
417    pick! {
418      if #[cfg(target_feature="sse2")] {
419        Self { sse: cmp_le_mask_m128d(self.sse, rhs.sse) }
420      } else if #[cfg(target_feature="simd128")] {
421        Self { simd: f64x2_le(self.simd, rhs.simd) }
422      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
423        unsafe {Self { neon: vreinterpretq_f64_u64(vcleq_f64(self.neon, rhs.neon)) }}
424      } else {
425        Self { arr: [
426          if self.arr[0] <= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
427          if self.arr[1] <= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
428        ]}
429      }
430    }
431  }
432}
433
434impl CmpLt for f64x2 {
435  type Output = Self;
436  #[inline]
437  fn simd_lt(self, rhs: Self) -> Self::Output {
438    pick! {
439      if #[cfg(target_feature="sse2")] {
440        Self { sse: cmp_lt_mask_m128d(self.sse, rhs.sse) }
441      } else if #[cfg(target_feature="simd128")] {
442        Self { simd: f64x2_lt(self.simd, rhs.simd) }
443      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
444        unsafe {Self { neon: vreinterpretq_f64_u64(vcltq_f64(self.neon, rhs.neon)) }}
445      } else {
446        Self { arr: [
447          if self.arr[0] < rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
448          if self.arr[1] < rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
449        ]}
450      }
451    }
452  }
453}
454
455impl f64x2 {
456  #[inline]
457  #[must_use]
458  pub const fn new(array: [f64; 2]) -> Self {
459    unsafe { core::mem::transmute(array) }
460  }
461  #[inline]
462  #[must_use]
463  pub fn blend(self, t: Self, f: Self) -> Self {
464    pick! {
465      if #[cfg(target_feature="sse4.1")] {
466        Self { sse: blend_varying_m128d(f.sse, t.sse, self.sse) }
467      } else if #[cfg(target_feature="simd128")] {
468        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
469      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
470        unsafe {Self { neon: vbslq_f64(vreinterpretq_u64_f64(self.neon), t.neon, f.neon) }}
471      } else {
472        generic_bit_blend(self, t, f)
473      }
474    }
475  }
476  #[inline]
477  #[must_use]
478  pub fn abs(self) -> Self {
479    pick! {
480      if #[cfg(target_feature="simd128")] {
481        Self { simd: f64x2_abs(self.simd) }
482      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
483        unsafe {Self { neon: vabsq_f64(self.neon) }}
484      } else {
485        let non_sign_bits = f64x2::from(f64::from_bits(i64::MAX as u64));
486        self & non_sign_bits
487      }
488    }
489  }
490  #[inline]
491  #[must_use]
492  pub fn floor(self) -> Self {
493    pick! {
494      if #[cfg(target_feature="simd128")] {
495        Self { simd: f64x2_floor(self.simd) }
496      } else if #[cfg(target_feature="sse4.1")] {
497        Self { sse: floor_m128d(self.sse) }
498      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
499        unsafe {Self { neon: vrndmq_f64(self.neon) }}
500      } else if #[cfg(feature="std")] {
501        let base: [f64; 2] = cast(self);
502        cast(base.map(|val| val.floor()))
503      } else {
504        let base: [f64; 2] = cast(self);
505        let rounded: [f64; 2] = cast(self.round());
506        cast([
507          if base[0] < rounded[0] { rounded[0] - 1.0 } else { rounded[0] },
508          if base[1] < rounded[1] { rounded[1] - 1.0 } else { rounded[1] },
509        ])
510      }
511    }
512  }
513  #[inline]
514  #[must_use]
515  pub fn ceil(self) -> Self {
516    pick! {
517      if #[cfg(target_feature="simd128")] {
518        Self { simd: f64x2_ceil(self.simd) }
519      } else if #[cfg(target_feature="sse4.1")] {
520        Self { sse: ceil_m128d(self.sse) }
521      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
522        unsafe {Self { neon: vrndpq_f64(self.neon) }}
523      } else if #[cfg(feature="std")] {
524        let base: [f64; 2] = cast(self);
525        cast(base.map(|val| val.ceil()))
526      } else {
527        let base: [f64; 2] = cast(self);
528        let rounded: [f64; 2] = cast(self.round());
529        cast([
530          if base[0] > rounded[0] { rounded[0] + 1.0 } else { rounded[0] },
531          if base[1] > rounded[1] { rounded[1] + 1.0 } else { rounded[1] },
532        ])
533      }
534    }
535  }
536
537  /// Calculates the lanewise maximum of both vectors. This is a faster
538  /// implementation than `max`, but it doesn't specify any behavior if NaNs are
539  /// involved.
540  #[inline]
541  #[must_use]
542  pub fn fast_max(self, rhs: Self) -> Self {
543    pick! {
544      if #[cfg(target_feature="sse2")] {
545        Self { sse: max_m128d(self.sse, rhs.sse) }
546      } else if #[cfg(target_feature="simd128")] {
547        Self {
548          simd: f64x2_pmax(self.simd, rhs.simd),
549        }
550      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
551        unsafe {Self { neon: vmaxq_f64(self.neon, rhs.neon) }}
552      } else {
553        Self { arr: [
554          if self.arr[0] < rhs.arr[0] { rhs.arr[0] } else { self.arr[0] },
555          if self.arr[1] < rhs.arr[1] { rhs.arr[1] } else { self.arr[1] },
556        ]}
557      }
558    }
559  }
560
561  /// Calculates the lanewise maximum of both vectors. If either lane is NaN,
562  /// the other lane gets chosen. Use `fast_max` for a faster implementation
563  /// that doesn't handle NaNs.
564  #[inline]
565  #[must_use]
566  pub fn max(self, rhs: Self) -> Self {
567    pick! {
568      if #[cfg(target_feature="sse2")] {
569        // max_m128d seems to do rhs < self ? self : rhs. So if there's any NaN
570        // involved, it chooses rhs, so we need to specifically check rhs for
571        // NaN.
572        rhs.is_nan().blend(self, Self { sse: max_m128d(self.sse, rhs.sse) })
573      } else if #[cfg(target_feature="simd128")] {
574        // WASM has two max intrinsics:
575        // - max: This propagates NaN, that's the opposite of what we need.
576        // - pmax: This is defined as self < rhs ? rhs : self, which basically
577        //   chooses self if either is NaN.
578        //
579        // pmax is what we want, but we need to specifically check self for NaN.
580        Self {
581          simd: v128_bitselect(
582            rhs.simd,
583            f64x2_pmax(self.simd, rhs.simd),
584            f64x2_ne(self.simd, self.simd), // NaN check
585          )
586        }
587      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
588        unsafe {Self { neon: vmaxnmq_f64(self.neon, rhs.neon) }}
589            } else {
590        Self { arr: [
591          self.arr[0].max(rhs.arr[0]),
592          self.arr[1].max(rhs.arr[1]),
593        ]}
594      }
595    }
596  }
597
598  /// Calculates the lanewise minimum of both vectors. This is a faster
599  /// implementation than `min`, but it doesn't specify any behavior if NaNs are
600  /// involved.
601  #[inline]
602  #[must_use]
603  pub fn fast_min(self, rhs: Self) -> Self {
604    pick! {
605      if #[cfg(target_feature="sse2")] {
606        Self { sse: min_m128d(self.sse, rhs.sse) }
607      } else if #[cfg(target_feature="simd128")] {
608        Self {
609          simd: f64x2_pmin(self.simd, rhs.simd),
610        }
611      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
612        unsafe {Self { neon: vminq_f64(self.neon, rhs.neon) }}
613      } else {
614        Self { arr: [
615          if self.arr[0] < rhs.arr[0] { self.arr[0] } else { rhs.arr[0] },
616          if self.arr[1] < rhs.arr[1] { self.arr[1] } else { rhs.arr[1] },
617        ]}
618      }
619    }
620  }
621
622  /// Calculates the lanewise minimum of both vectors. If either lane is NaN,
623  /// the other lane gets chosen. Use `fast_min` for a faster implementation
624  /// that doesn't handle NaNs.
625  #[inline]
626  #[must_use]
627  pub fn min(self, rhs: Self) -> Self {
628    pick! {
629      if #[cfg(target_feature="sse2")] {
630        // min_m128d seems to do rhs < self ? rhs : self. So if there's any NaN
631        // involved, it chooses rhs, so we need to specifically check rhs for
632        // NaN.
633        rhs.is_nan().blend(self, Self { sse: min_m128d(self.sse, rhs.sse) })
634      } else if #[cfg(target_feature="simd128")] {
635        // WASM has two min intrinsics:
636        // - min: This propagates NaN, that's the opposite of what we need.
637        // - pmin: This is defined as rhs < self ? rhs : self, which basically
638        //   chooses self if either is NaN.
639        //
640        // pmin is what we want, but we need to specifically check self for NaN.
641        Self {
642          simd: v128_bitselect(
643            rhs.simd,
644            f64x2_pmin(self.simd, rhs.simd),
645            f64x2_ne(self.simd, self.simd), // NaN check
646          )
647        }
648      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
649        unsafe {Self { neon: vminnmq_f64(self.neon, rhs.neon) }}
650      } else {
651        Self { arr: [
652          self.arr[0].min(rhs.arr[0]),
653          self.arr[1].min(rhs.arr[1]),
654        ]}
655      }
656    }
657  }
658
659  #[inline]
660  #[must_use]
661  pub fn is_nan(self) -> Self {
662    pick! {
663      if #[cfg(target_feature="sse2")] {
664        Self { sse: cmp_unord_mask_m128d(self.sse, self.sse) }
665      } else if #[cfg(target_feature="simd128")] {
666        Self { simd: f64x2_ne(self.simd, self.simd) }
667      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
668        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, self.neon)) }.not() }
669      } else {
670        Self { arr: [
671          if self.arr[0].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
672          if self.arr[1].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
673        ]}
674      }
675    }
676  }
677  #[inline]
678  #[must_use]
679  pub fn is_finite(self) -> Self {
680    let shifted_exp_mask = u64x2::from(0xFFE0000000000000);
681    let u: u64x2 = cast(self);
682    let shift_u = u << 1_u64;
683    let out = !(shift_u & shifted_exp_mask).simd_eq(shifted_exp_mask);
684    cast(out)
685  }
686  #[inline]
687  #[must_use]
688  pub fn is_inf(self) -> Self {
689    let shifted_inf = u64x2::from(0xFFE0000000000000);
690    let u: u64x2 = cast(self);
691    let shift_u = u << 1_u64;
692    let out = (shift_u).simd_eq(shifted_inf);
693    cast(out)
694  }
695
696  #[inline]
697  #[must_use]
698  pub fn round(self) -> Self {
699    pick! {
700      if #[cfg(target_feature="sse4.1")] {
701        Self { sse: round_m128d::<{round_op!(Nearest)}>(self.sse) }
702      } else if #[cfg(target_feature="simd128")] {
703        Self { simd: f64x2_nearest(self.simd) }
704      } else {
705        let sign_mask = f64x2::from(-0.0);
706        let magic = f64x2::from(f64::from_bits(0x43300000_00000000));
707        let sign = self & sign_mask;
708        let signed_magic = magic | sign;
709        self + signed_magic - signed_magic
710      }
711    }
712  }
713  #[inline]
714  #[must_use]
715  pub fn round_int(self) -> i64x2 {
716    let rounded: [f64; 2] = cast(self.round());
717    cast([rounded[0] as i64, rounded[1] as i64])
718  }
719  /// Performs a multiply-add operation: `self * m + a`
720  ///
721  /// When hardware FMA support is available, this computes the result with a
722  /// single rounding operation. Without FMA support, it falls back to separate
723  /// multiply and add operations with two roundings.
724  ///
725  /// # Platform-specific behavior
726  /// - On `x86`/`x86_64` with FMA: Uses `vfmadd` (single rounding, best
727  ///   accuracy)
728  /// - On ARM64 with NEON: Uses `vfmaq_f64` (single rounding, best accuracy)
729  /// - Without FMA support: Uses `(self * m) + a` (two roundings)
730  ///
731  /// # Examples
732  /// ```
733  /// # use wide::f64x2;
734  /// let a = f64x2::from([1.0, 2.0]);
735  /// let b = f64x2::from([3.0, 4.0]);
736  /// let c = f64x2::from([5.0, 6.0]);
737  ///
738  /// let result = a.mul_add(b, c);
739  ///
740  /// let expected = f64x2::from([8.0, 14.0]);
741  /// assert_eq!(result, expected);
742  /// ```
743  #[inline]
744  #[must_use]
745  pub fn mul_add(self, m: Self, a: Self) -> Self {
746    pick! {
747      if #[cfg(all(target_feature="fma"))] {
748        Self { sse: fused_mul_add_m128d(self.sse, m.sse, a.sse) }
749      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
750        unsafe { Self { neon: vfmaq_f64(a.neon, self.neon, m.neon) } }
751      } else {
752        (self * m) + a
753      }
754    }
755  }
756
757  /// Performs a multiply-subtract operation: `self * m - s`
758  ///
759  /// When hardware FMA support is available, this computes the result with a
760  /// single rounding operation. Without FMA support, it falls back to separate
761  /// multiply and subtract operations with two roundings.
762  ///
763  /// # Platform-specific behavior
764  /// - On `x86`/`x86_64` with FMA: Uses `vfmsub` (single rounding, best
765  ///   accuracy)
766  /// - On ARM64 with NEON: Uses `vfmaq_f64(-s, self, m)` (single rounding, best
767  ///   accuracy)
768  /// - Without FMA support: Uses `(self * m) - s` (two roundings)
769  ///
770  /// # Examples
771  /// ```
772  /// # use wide::f64x2;
773  /// let a = f64x2::from([10.0, 20.0]);
774  /// let b = f64x2::from([2.0, 3.0]);
775  /// let c = f64x2::from([5.0, 10.0]);
776  ///
777  /// let result = a.mul_sub(b, c);
778  ///
779  /// let expected = f64x2::from([15.0, 50.0]);
780  /// assert_eq!(result, expected);
781  /// ```
782  #[inline]
783  #[must_use]
784  pub fn mul_sub(self, m: Self, s: Self) -> Self {
785    pick! {
786      if #[cfg(all(target_feature="fma"))] {
787        Self { sse: fused_mul_sub_m128d(self.sse, m.sse, s.sse) }
788      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
789        unsafe { Self { neon: vfmaq_f64(vnegq_f64(s.neon), self.neon, m.neon) } }
790      } else {
791        (self * m) - s
792      }
793    }
794  }
795
796  /// Performs a negative multiply-add operation: `a - (self * m)`
797  ///
798  /// When hardware FMA support is available, this computes the result with a
799  /// single rounding operation. Without FMA support, it falls back to separate
800  /// operations with two roundings.
801  ///
802  /// # Platform-specific behavior
803  /// - On `x86`/`x86_64` with FMA: Uses `vfnmadd` (single rounding, best
804  ///   accuracy)
805  /// - On ARM64 with NEON: Uses `vfmsq_f64` (single rounding, best accuracy)
806  /// - Without FMA support: Uses `a - (self * m)` (two roundings)
807  ///
808  /// # Examples
809  /// ```
810  /// # use wide::f64x2;
811  /// let a = f64x2::from([3.0, 4.0]);
812  /// let b = f64x2::from([2.0, 2.0]);
813  /// let c = f64x2::from([10.0, 20.0]);
814  ///
815  /// let result = a.mul_neg_add(b, c);
816  ///
817  /// let expected = f64x2::from([4.0, 12.0]);
818  /// assert_eq!(result, expected);
819  /// ```
820  #[inline]
821  #[must_use]
822  pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
823    pick! {
824        if #[cfg(all(target_feature="fma"))] {
825          Self { sse: fused_mul_neg_add_m128d(self.sse, m.sse, a.sse) }
826        } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
827          unsafe { Self { neon: vfmsq_f64(a.neon, self.neon, m.neon) } }
828        } else {
829          a - (self * m)
830        }
831    }
832  }
833
834  /// Performs a negative multiply-subtract operation: `-(self * m) - s`
835  ///
836  /// When hardware FMA support is available, this computes the result with a
837  /// single rounding operation. Without FMA support, it falls back to separate
838  /// operations with two roundings.
839  ///
840  /// # Platform-specific behavior
841  /// - On `x86`/`x86_64` with FMA: Uses `vfnmsub` (single rounding, best
842  ///   accuracy)
843  /// - On ARM64 with NEON: Uses `-(vfmaq_f64(s, self, m))` (single rounding,
844  ///   best accuracy)
845  /// - Without FMA support: Uses `-(self * m) - s` (two roundings)
846  ///
847  /// # Examples
848  /// ```
849  /// # use wide::f64x2;
850  /// let a = f64x2::from([3.0, 4.0]);
851  /// let b = f64x2::from([2.0, 2.0]);
852  /// let c = f64x2::from([1.0, 2.0]);
853  ///
854  /// let result = a.mul_neg_sub(b, c);
855  ///
856  /// let expected = f64x2::from([-7.0, -10.0]);
857  /// assert_eq!(result, expected);
858  /// ```
859  #[inline]
860  #[must_use]
861  pub fn mul_neg_sub(self, m: Self, s: Self) -> Self {
862    pick! {
863        if #[cfg(all(target_feature="fma"))] {
864          Self { sse: fused_mul_neg_sub_m128d(self.sse, m.sse, s.sse) }
865        } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
866          unsafe { Self { neon: vnegq_f64(vfmaq_f64(s.neon, self.neon, m.neon)) } }
867        } else {
868          -(self * m) - s
869        }
870    }
871  }
872
873  #[inline]
874  #[must_use]
875  pub fn flip_signs(self, signs: Self) -> Self {
876    self ^ (signs & Self::from(-0.0))
877  }
878
879  #[inline]
880  #[must_use]
881  pub fn copysign(self, sign: Self) -> Self {
882    let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
883    (self & magnitude_mask) | (sign & Self::from(-0.0))
884  }
885
886  #[inline]
887  pub fn asin_acos(self) -> (Self, Self) {
888    // Based on the Agner Fog "vector class library":
889    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
890    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
891    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
892    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
893    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
894    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
895
896    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
897    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
898    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
899    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
900
901    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
902    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
903    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
904    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
905    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
906    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
907
908    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
909    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
910    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
911    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
912    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
913
914    let xa = self.abs();
915
916    let big = xa.simd_ge(f64x2::splat(0.625));
917
918    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
919
920    let x2 = x1 * x1;
921    let x3 = x2 * x1;
922    let x4 = x2 * x2;
923    let x5 = x4 * x1;
924
925    let do_big = big.any();
926    let do_small = !big.all();
927
928    let mut rx = f64x2::default();
929    let mut sx = f64x2::default();
930    let mut px = f64x2::default();
931    let mut qx = f64x2::default();
932
933    if do_big {
934      rx = x3.mul_add(R3asin, x2 * R2asin)
935        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
936      sx =
937        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
938    }
939    if do_small {
940      px = x3.mul_add(P3asin, P0asin)
941        + x4.mul_add(P4asin, x1 * P1asin)
942        + x5.mul_add(P5asin, x2 * P2asin);
943      qx = x4.mul_add(Q4asin, x5)
944        + x3.mul_add(Q3asin, x1 * Q1asin)
945        + x2.mul_add(Q2asin, Q0asin);
946    };
947
948    let vx = big.blend(rx, px);
949    let wx = big.blend(sx, qx);
950
951    let y1 = vx / wx * x1;
952
953    let mut z1 = f64x2::default();
954    let mut z2 = f64x2::default();
955    if do_big {
956      let xb = (x1 + x1).sqrt();
957      z1 = xb.mul_add(y1, xb);
958    }
959
960    if do_small {
961      z2 = xa.mul_add(y1, xa);
962    }
963
964    // asin
965    let z3 = f64x2::FRAC_PI_2 - z1;
966    let asin = big.blend(z3, z2);
967    let asin = asin.flip_signs(self);
968
969    // acos
970    let z3 = self.simd_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
971    let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
972    let acos = big.blend(z3, z4);
973
974    (asin, acos)
975  }
976
977  #[inline]
978  pub fn acos(self) -> Self {
979    // Based on the Agner Fog "vector class library":
980    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
981    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
982    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
983    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
984    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
985    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
986
987    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
988    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
989    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
990    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
991
992    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
993    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
994    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
995    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
996    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
997    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
998
999    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
1000    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
1001    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
1002    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
1003    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
1004
1005    let xa = self.abs();
1006
1007    let big = xa.simd_ge(f64x2::splat(0.625));
1008
1009    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
1010
1011    let x2 = x1 * x1;
1012    let x3 = x2 * x1;
1013    let x4 = x2 * x2;
1014    let x5 = x4 * x1;
1015
1016    let do_big = big.any();
1017    let do_small = !big.all();
1018
1019    let mut rx = f64x2::default();
1020    let mut sx = f64x2::default();
1021    let mut px = f64x2::default();
1022    let mut qx = f64x2::default();
1023
1024    if do_big {
1025      rx = x3.mul_add(R3asin, x2 * R2asin)
1026        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
1027      sx =
1028        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
1029    }
1030    if do_small {
1031      px = x3.mul_add(P3asin, P0asin)
1032        + x4.mul_add(P4asin, x1 * P1asin)
1033        + x5.mul_add(P5asin, x2 * P2asin);
1034      qx = x4.mul_add(Q4asin, x5)
1035        + x3.mul_add(Q3asin, x1 * Q1asin)
1036        + x2.mul_add(Q2asin, Q0asin);
1037    };
1038
1039    let vx = big.blend(rx, px);
1040    let wx = big.blend(sx, qx);
1041
1042    let y1 = vx / wx * x1;
1043
1044    let mut z1 = f64x2::default();
1045    let mut z2 = f64x2::default();
1046    if do_big {
1047      let xb = (x1 + x1).sqrt();
1048      z1 = xb.mul_add(y1, xb);
1049    }
1050
1051    if do_small {
1052      z2 = xa.mul_add(y1, xa);
1053    }
1054
1055    // acos
1056    let z3 = self.simd_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
1057    let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
1058    let acos = big.blend(z3, z4);
1059
1060    acos
1061  }
1062
1063  #[inline]
1064  pub fn asin(self) -> Self {
1065    // Based on the Agner Fog "vector class library":
1066    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1067    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
1068    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
1069    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
1070    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
1071    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
1072
1073    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
1074    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
1075    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
1076    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
1077
1078    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
1079    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
1080    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
1081    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
1082    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
1083    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
1084
1085    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
1086    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
1087    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
1088    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
1089    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
1090
1091    let xa = self.abs();
1092
1093    let big = xa.simd_ge(f64x2::splat(0.625));
1094
1095    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
1096
1097    let x2 = x1 * x1;
1098    let x3 = x2 * x1;
1099    let x4 = x2 * x2;
1100    let x5 = x4 * x1;
1101
1102    let do_big = big.any();
1103    let do_small = !big.all();
1104
1105    let mut rx = f64x2::default();
1106    let mut sx = f64x2::default();
1107    let mut px = f64x2::default();
1108    let mut qx = f64x2::default();
1109
1110    if do_big {
1111      rx = x3.mul_add(R3asin, x2 * R2asin)
1112        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
1113      sx =
1114        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
1115    }
1116    if do_small {
1117      px = x3.mul_add(P3asin, P0asin)
1118        + x4.mul_add(P4asin, x1 * P1asin)
1119        + x5.mul_add(P5asin, x2 * P2asin);
1120      qx = x4.mul_add(Q4asin, x5)
1121        + x3.mul_add(Q3asin, x1 * Q1asin)
1122        + x2.mul_add(Q2asin, Q0asin);
1123    };
1124
1125    let vx = big.blend(rx, px);
1126    let wx = big.blend(sx, qx);
1127
1128    let y1 = vx / wx * x1;
1129
1130    let mut z1 = f64x2::default();
1131    let mut z2 = f64x2::default();
1132    if do_big {
1133      let xb = (x1 + x1).sqrt();
1134      z1 = xb.mul_add(y1, xb);
1135    }
1136
1137    if do_small {
1138      z2 = xa.mul_add(y1, xa);
1139    }
1140
1141    // asin
1142    let z3 = f64x2::FRAC_PI_2 - z1;
1143    let asin = big.blend(z3, z2);
1144    let asin = asin.flip_signs(self);
1145
1146    asin
1147  }
1148
1149  #[inline]
1150  pub fn atan(self) -> Self {
1151    // Based on the Agner Fog "vector class library":
1152    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1153    const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1154    const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1155    const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1156
1157    const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1158    const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1159    const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1160    const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1161    const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1162
1163    const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1164    const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1165    const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1166    const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1167    const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1168
1169    let t = self.abs();
1170
1171    // small:  t < 0.66
1172    // medium: t <= t <= 2.4142 (1+sqrt(2))
1173    // big:    t > 2.4142
1174    let notbig = t.simd_le(T3PO8);
1175    let notsmal = t.simd_ge(Self::splat(0.66));
1176
1177    let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1178    s = notsmal & s;
1179    let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1180    fac = notsmal & fac;
1181
1182    // small:  z = t / 1.0;
1183    // medium: z = (t-1.0) / (t+1.0);
1184    // big:    z = -1.0 / t;
1185    let mut a = notbig & t;
1186    a = notsmal.blend(a - Self::ONE, a);
1187    let mut b = notbig & Self::ONE;
1188    b = notsmal.blend(b + t, b);
1189    let z = a / b;
1190
1191    let zz = z * z;
1192
1193    let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1194    let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1195
1196    let mut re = (px / qx).mul_add(z * zz, z);
1197    re += s + fac;
1198
1199    // get sign bit
1200    re = (self.sign_bit()).blend(-re, re);
1201
1202    re
1203  }
1204
1205  #[inline]
1206  pub fn atan2(self, x: Self) -> Self {
1207    // Based on the Agner Fog "vector class library":
1208    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1209    const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1210    const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1211    const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1212
1213    const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1214    const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1215    const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1216    const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1217    const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1218
1219    const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1220    const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1221    const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1222    const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1223    const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1224
1225    let y = self;
1226
1227    // move in first octant
1228    let x1 = x.abs();
1229    let y1 = y.abs();
1230    let swapxy = y1.simd_gt(x1);
1231    // swap x and y if y1 > x1
1232    let mut x2 = swapxy.blend(y1, x1);
1233    let mut y2 = swapxy.blend(x1, y1);
1234
1235    // check for special case: x and y are both +/- INF
1236    let both_infinite = x.is_inf() & y.is_inf();
1237    if both_infinite.any() {
1238      let minus_one = -Self::ONE;
1239      x2 = both_infinite.blend(x2 & minus_one, x2);
1240      y2 = both_infinite.blend(y2 & minus_one, y2);
1241    }
1242
1243    // x = y = 0 gives NAN here
1244    let t = y2 / x2;
1245
1246    // small:  t < 0.66
1247    // medium: t <= t <= 2.4142 (1+sqrt(2))
1248    // big:    t > 2.4142
1249    let notbig = t.simd_le(T3PO8);
1250    let notsmal = t.simd_ge(Self::splat(0.66));
1251
1252    let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1253    s = notsmal & s;
1254    let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1255    fac = notsmal & fac;
1256
1257    // small:  z = t / 1.0;
1258    // medium: z = (t-1.0) / (t+1.0);
1259    // big:    z = -1.0 / t;
1260    let mut a = notbig & t;
1261    a = notsmal.blend(a - Self::ONE, a);
1262    let mut b = notbig & Self::ONE;
1263    b = notsmal.blend(b + t, b);
1264    let z = a / b;
1265
1266    let zz = z * z;
1267
1268    let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1269    let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1270
1271    let mut re = (px / qx).mul_add(z * zz, z);
1272    re += s + fac;
1273
1274    // move back in place
1275    re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1276    re = ((x | y).simd_eq(Self::ZERO)).blend(Self::ZERO, re);
1277    re = (x.sign_bit()).blend(Self::PI - re, re);
1278
1279    // get sign bit
1280    re = (y.sign_bit()).blend(-re, re);
1281
1282    re
1283  }
1284
1285  #[inline]
1286  #[must_use]
1287  pub fn sin_cos(self) -> (Self, Self) {
1288    // Based on the Agner Fog "vector class library":
1289    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1290
1291    const_f64_as_f64x2!(P0sin, -1.66666666666666307295E-1);
1292    const_f64_as_f64x2!(P1sin, 8.33333333332211858878E-3);
1293    const_f64_as_f64x2!(P2sin, -1.98412698295895385996E-4);
1294    const_f64_as_f64x2!(P3sin, 2.75573136213857245213E-6);
1295    const_f64_as_f64x2!(P4sin, -2.50507477628578072866E-8);
1296    const_f64_as_f64x2!(P5sin, 1.58962301576546568060E-10);
1297
1298    const_f64_as_f64x2!(P0cos, 4.16666666666665929218E-2);
1299    const_f64_as_f64x2!(P1cos, -1.38888888888730564116E-3);
1300    const_f64_as_f64x2!(P2cos, 2.48015872888517045348E-5);
1301    const_f64_as_f64x2!(P3cos, -2.75573141792967388112E-7);
1302    const_f64_as_f64x2!(P4cos, 2.08757008419747316778E-9);
1303    const_f64_as_f64x2!(P5cos, -1.13585365213876817300E-11);
1304
1305    const_f64_as_f64x2!(DP1, 7.853981554508209228515625E-1 * 2.);
1306    const_f64_as_f64x2!(DP2, 7.94662735614792836714E-9 * 2.);
1307    const_f64_as_f64x2!(DP3, 3.06161699786838294307E-17 * 2.);
1308
1309    const_f64_as_f64x2!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1310
1311    let xa = self.abs();
1312
1313    let y = (xa * TWO_OVER_PI).round();
1314    let q = y.round_int();
1315
1316    let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1317
1318    let x2 = x * x;
1319    let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1320    let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1321    s = (x * x2).mul_add(s, x);
1322    c =
1323      (x2 * x2).mul_add(c, x2.mul_neg_add(f64x2::from(0.5), f64x2::from(1.0)));
1324
1325    let swap = !((q & i64x2::from(1)).simd_eq(i64x2::from(0)));
1326
1327    let mut overflow: f64x2 = cast(q.simd_gt(i64x2::from(0x80000000000000)));
1328    overflow &= xa.is_finite();
1329    s = overflow.blend(f64x2::from(0.0), s);
1330    c = overflow.blend(f64x2::from(1.0), c);
1331
1332    // calc sin
1333    let mut sin1 = cast::<_, f64x2>(swap).blend(c, s);
1334    let sign_sin: i64x2 = (q << 62) ^ cast::<_, i64x2>(self);
1335    sin1 = sin1.flip_signs(cast(sign_sin));
1336
1337    // calc cos
1338    let mut cos1 = cast::<_, f64x2>(swap).blend(s, c);
1339    let sign_cos: i64x2 = ((q + i64x2::from(1)) & i64x2::from(2)) << 62;
1340    cos1 ^= cast::<_, f64x2>(sign_cos);
1341
1342    (sin1, cos1)
1343  }
1344  #[inline]
1345  #[must_use]
1346  pub fn sin(self) -> Self {
1347    let (s, _) = self.sin_cos();
1348    s
1349  }
1350  #[inline]
1351  #[must_use]
1352  pub fn cos(self) -> Self {
1353    let (_, c) = self.sin_cos();
1354    c
1355  }
1356  #[inline]
1357  #[must_use]
1358  pub fn tan(self) -> Self {
1359    let (s, c) = self.sin_cos();
1360    s / c
1361  }
1362  #[inline]
1363  #[must_use]
1364  pub fn to_degrees(self) -> Self {
1365    const_f64_as_f64x2!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1366    self * RAD_TO_DEG_RATIO
1367  }
1368  #[inline]
1369  #[must_use]
1370  pub fn to_radians(self) -> Self {
1371    const_f64_as_f64x2!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1372    self * DEG_TO_RAD_RATIO
1373  }
1374  #[inline]
1375  #[must_use]
1376  pub fn sqrt(self) -> Self {
1377    pick! {
1378      if #[cfg(target_feature="sse2")] {
1379        Self { sse: sqrt_m128d(self.sse) }
1380      } else if #[cfg(target_feature="simd128")] {
1381        Self { simd: f64x2_sqrt(self.simd) }
1382      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1383        unsafe {Self { neon: vsqrtq_f64(self.neon) }}
1384      } else if #[cfg(feature="std")] {
1385        Self { arr: [
1386          self.arr[0].sqrt(),
1387          self.arr[1].sqrt(),
1388        ]}
1389      } else {
1390        Self { arr: [
1391          software_sqrt(self.arr[0]),
1392          software_sqrt(self.arr[1]),
1393        ]}
1394      }
1395    }
1396  }
1397  #[inline]
1398  #[must_use]
1399  #[doc(alias("movemask", "move_mask"))]
1400  pub fn to_bitmask(self) -> u32 {
1401    pick! {
1402      if #[cfg(target_feature="sse2")] {
1403        move_mask_m128d(self.sse) as u32
1404      } else if #[cfg(target_feature="simd128")] {
1405        u64x2_bitmask(self.simd) as u32
1406      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1407        unsafe
1408        {
1409          let e = vreinterpretq_u64_f64(self.neon);
1410
1411          (vgetq_lane_u64(e,0) >> 63 | ((vgetq_lane_u64(e,1) >> 62) & 0x2)) as u32
1412        }
1413      } else {
1414        (((self.arr[0].to_bits() as i64) < 0) as u32) << 0 |
1415        (((self.arr[1].to_bits() as i64) < 0) as u32) << 1
1416      }
1417    }
1418  }
1419  #[inline]
1420  #[must_use]
1421  pub fn any(self) -> bool {
1422    pick! {
1423      if #[cfg(target_feature="simd128")] {
1424        v128_any_true(self.simd)
1425      } else {
1426        self.to_bitmask() != 0
1427      }
1428    }
1429  }
1430  #[inline]
1431  #[must_use]
1432  pub fn all(self) -> bool {
1433    pick! {
1434      if #[cfg(target_feature="simd128")] {
1435        u64x2_all_true(self.simd)
1436      } else {
1437        // two lanes
1438        self.to_bitmask() == 0b11
1439      }
1440    }
1441  }
1442  #[inline]
1443  #[must_use]
1444  pub fn none(self) -> bool {
1445    !self.any()
1446  }
1447
1448  #[inline]
1449  fn vm_pow2n(self) -> Self {
1450    const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1451    const_f64_as_f64x2!(bias, 1023.0);
1452    let a = self + (bias + pow2_52);
1453    let c = cast::<_, i64x2>(a) << 52;
1454    cast::<_, f64x2>(c)
1455  }
1456
1457  /// Calculate the exponent of a packed `f64x2`
1458  #[inline]
1459  #[must_use]
1460  pub fn exp(self) -> Self {
1461    const_f64_as_f64x2!(P2, 1.0 / 2.0);
1462    const_f64_as_f64x2!(P3, 1.0 / 6.0);
1463    const_f64_as_f64x2!(P4, 1. / 24.);
1464    const_f64_as_f64x2!(P5, 1. / 120.);
1465    const_f64_as_f64x2!(P6, 1. / 720.);
1466    const_f64_as_f64x2!(P7, 1. / 5040.);
1467    const_f64_as_f64x2!(P8, 1. / 40320.);
1468    const_f64_as_f64x2!(P9, 1. / 362880.);
1469    const_f64_as_f64x2!(P10, 1. / 3628800.);
1470    const_f64_as_f64x2!(P11, 1. / 39916800.);
1471    const_f64_as_f64x2!(P12, 1. / 479001600.);
1472    const_f64_as_f64x2!(P13, 1. / 6227020800.);
1473    const_f64_as_f64x2!(LN2D_HI, 0.693145751953125);
1474    const_f64_as_f64x2!(LN2D_LO, 1.42860682030941723212E-6);
1475    let max_x = f64x2::from(708.39);
1476    let r = (self * Self::LOG2_E).round();
1477    let x = r.mul_neg_add(LN2D_HI, self);
1478    let x = r.mul_neg_add(LN2D_LO, x);
1479    let z =
1480      polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1481    let n2 = Self::vm_pow2n(r);
1482    let z = (z + Self::ONE) * n2;
1483    // check for overflow
1484    let in_range = self.abs().simd_lt(max_x);
1485    let in_range = in_range & self.is_finite();
1486    in_range.blend(z, Self::ZERO)
1487  }
1488
1489  #[inline]
1490  fn exponent(self) -> f64x2 {
1491    const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1492    const_f64_as_f64x2!(bias, 1023.0);
1493    let a = cast::<_, u64x2>(self);
1494    let b = a >> 52;
1495    let c = b | cast::<_, u64x2>(pow2_52);
1496    let d = cast::<_, f64x2>(c);
1497    let e = d - (pow2_52 + bias);
1498    e
1499  }
1500
1501  #[inline]
1502  fn fraction_2(self) -> Self {
1503    let t1 = cast::<_, u64x2>(self);
1504    let t2 = cast::<_, u64x2>(
1505      (t1 & u64x2::from(0x000FFFFFFFFFFFFF)) | u64x2::from(0x3FE0000000000000),
1506    );
1507    cast::<_, f64x2>(t2)
1508  }
1509
1510  #[inline]
1511  fn is_zero_or_subnormal(self) -> Self {
1512    let t = cast::<_, i64x2>(self);
1513    let t = t & i64x2::splat(0x7FF0000000000000);
1514    i64x2::round_float(t.simd_eq(i64x2::splat(0)))
1515  }
1516
1517  #[inline]
1518  fn infinity() -> Self {
1519    cast::<_, f64x2>(i64x2::splat(0x7FF0000000000000))
1520  }
1521
1522  #[inline]
1523  fn nan_log() -> Self {
1524    cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1525  }
1526
1527  #[inline]
1528  fn nan_pow() -> Self {
1529    cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1530  }
1531
1532  #[inline]
1533  fn sign_bit(self) -> Self {
1534    let t1 = cast::<_, i64x2>(self);
1535    let t2 = t1 >> 63;
1536    !cast::<_, f64x2>(t2).simd_eq(f64x2::ZERO)
1537  }
1538
1539  /// horizontal add of all the elements of the vector
1540  #[inline]
1541  #[must_use]
1542  pub fn reduce_add(self) -> f64 {
1543    pick! {
1544      if #[cfg(target_feature="ssse3")] {
1545        let a = add_horizontal_m128d(self.sse, self.sse);
1546        a.to_array()[0]
1547      } else if #[cfg(any(target_feature="sse2", target_feature="simd128"))] {
1548        let a: [f64;2] = cast(self);
1549        a.iter().sum()
1550      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1551        unsafe { vgetq_lane_f64(self.neon,0) + vgetq_lane_f64(self.neon,1) }
1552      } else {
1553        self.arr.iter().sum()
1554      }
1555    }
1556  }
1557
1558  #[inline]
1559  #[must_use]
1560  pub fn ln(self) -> Self {
1561    const_f64_as_f64x2!(P0, 7.70838733755885391666E0);
1562    const_f64_as_f64x2!(P1, 1.79368678507819816313E1);
1563    const_f64_as_f64x2!(P2, 1.44989225341610930846E1);
1564    const_f64_as_f64x2!(P3, 4.70579119878881725854E0);
1565    const_f64_as_f64x2!(P4, 4.97494994976747001425E-1);
1566    const_f64_as_f64x2!(P5, 1.01875663804580931796E-4);
1567
1568    const_f64_as_f64x2!(Q0, 2.31251620126765340583E1);
1569    const_f64_as_f64x2!(Q1, 7.11544750618563894466E1);
1570    const_f64_as_f64x2!(Q2, 8.29875266912776603211E1);
1571    const_f64_as_f64x2!(Q3, 4.52279145837532221105E1);
1572    const_f64_as_f64x2!(Q4, 1.12873587189167450590E1);
1573    const_f64_as_f64x2!(LN2F_HI, 0.693359375);
1574    const_f64_as_f64x2!(LN2F_LO, -2.12194440e-4);
1575    const_f64_as_f64x2!(VM_SQRT2, 1.414213562373095048801);
1576    const_f64_as_f64x2!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1577
1578    let x1 = self;
1579    let x = Self::fraction_2(x1);
1580    let e = Self::exponent(x1);
1581    let mask = x.simd_gt(VM_SQRT2 * f64x2::HALF);
1582    let x = (!mask).blend(x + x, x);
1583    let fe = mask.blend(e + Self::ONE, e);
1584    let x = x - Self::ONE;
1585    let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1586    let x2 = x * x;
1587    let px = x2 * x * px;
1588    let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1589    let res = px / qx;
1590    let res = fe.mul_add(LN2F_LO, res);
1591    let res = res + x2.mul_neg_add(f64x2::HALF, x);
1592    let res = fe.mul_add(LN2F_HI, res);
1593    let overflow = !self.is_finite();
1594    let underflow = x1.simd_lt(VM_SMALLEST_NORMAL);
1595    let mask = overflow | underflow;
1596    if !mask.any() {
1597      res
1598    } else {
1599      let is_zero = self.is_zero_or_subnormal();
1600      let res = underflow.blend(Self::nan_log(), res);
1601      let res = is_zero.blend(Self::infinity(), res);
1602      let res = overflow.blend(self, res);
1603      res
1604    }
1605  }
1606
1607  #[inline]
1608  #[must_use]
1609  pub fn log2(self) -> Self {
1610    Self::ln(self) * Self::LOG2_E
1611  }
1612  #[inline]
1613  #[must_use]
1614  pub fn log10(self) -> Self {
1615    Self::ln(self) * Self::LOG10_E
1616  }
1617
1618  #[inline]
1619  #[must_use]
1620  pub fn pow_f64x2(self, y: Self) -> Self {
1621    const_f64_as_f64x2!(ln2d_hi, 0.693145751953125);
1622    const_f64_as_f64x2!(ln2d_lo, 1.42860682030941723212E-6);
1623    const_f64_as_f64x2!(P0log, 2.0039553499201281259648E1);
1624    const_f64_as_f64x2!(P1log, 5.7112963590585538103336E1);
1625    const_f64_as_f64x2!(P2log, 6.0949667980987787057556E1);
1626    const_f64_as_f64x2!(P3log, 2.9911919328553073277375E1);
1627    const_f64_as_f64x2!(P4log, 6.5787325942061044846969E0);
1628    const_f64_as_f64x2!(P5log, 4.9854102823193375972212E-1);
1629    const_f64_as_f64x2!(P6log, 4.5270000862445199635215E-5);
1630    const_f64_as_f64x2!(Q0log, 6.0118660497603843919306E1);
1631    const_f64_as_f64x2!(Q1log, 2.1642788614495947685003E2);
1632    const_f64_as_f64x2!(Q2log, 3.0909872225312059774938E2);
1633    const_f64_as_f64x2!(Q3log, 2.2176239823732856465394E2);
1634    const_f64_as_f64x2!(Q4log, 8.3047565967967209469434E1);
1635    const_f64_as_f64x2!(Q5log, 1.5062909083469192043167E1);
1636
1637    // Taylor expansion constants
1638    const_f64_as_f64x2!(p2, 1.0 / 2.0); // coefficients for Taylor expansion of exp
1639    const_f64_as_f64x2!(p3, 1.0 / 6.0);
1640    const_f64_as_f64x2!(p4, 1.0 / 24.0);
1641    const_f64_as_f64x2!(p5, 1.0 / 120.0);
1642    const_f64_as_f64x2!(p6, 1.0 / 720.0);
1643    const_f64_as_f64x2!(p7, 1.0 / 5040.0);
1644    const_f64_as_f64x2!(p8, 1.0 / 40320.0);
1645    const_f64_as_f64x2!(p9, 1.0 / 362880.0);
1646    const_f64_as_f64x2!(p10, 1.0 / 3628800.0);
1647    const_f64_as_f64x2!(p11, 1.0 / 39916800.0);
1648    const_f64_as_f64x2!(p12, 1.0 / 479001600.0);
1649    const_f64_as_f64x2!(p13, 1.0 / 6227020800.0);
1650
1651    let x1 = self.abs();
1652    let x = x1.fraction_2();
1653    let mask = x.simd_gt(f64x2::SQRT_2 * f64x2::HALF);
1654    let x = (!mask).blend(x + x, x);
1655    let x = x - f64x2::ONE;
1656    let x2 = x * x;
1657    let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1658    let px = px * x * x2;
1659    let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1660    let lg1 = px / qx;
1661
1662    let ef = x1.exponent();
1663    let ef = mask.blend(ef + f64x2::ONE, ef);
1664    let e1 = (ef * y).round();
1665    let yr = ef.mul_sub(y, e1);
1666
1667    let lg = f64x2::HALF.mul_neg_add(x2, x) + lg1;
1668    let x2err = (f64x2::HALF * x).mul_sub(x, f64x2::HALF * x2);
1669    let lg_err = f64x2::HALF.mul_add(x2, lg - x) - lg1;
1670
1671    let e2 = (lg * y * f64x2::LOG2_E).round();
1672    let v = lg.mul_sub(y, e2 * ln2d_hi);
1673    let v = e2.mul_neg_add(ln2d_lo, v);
1674    let v = v - (lg_err + x2err).mul_sub(y, yr * f64x2::LN_2);
1675
1676    let x = v;
1677    let e3 = (x * f64x2::LOG2_E).round();
1678    let x = e3.mul_neg_add(f64x2::LN_2, x);
1679    let z =
1680      polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1681        + f64x2::ONE;
1682    let ee = e1 + e2 + e3;
1683    let ei = cast::<_, i64x2>(ee.round_int());
1684    let ej = cast::<_, i64x2>(ei + (cast::<_, i64x2>(z) >> 52));
1685
1686    let overflow = cast::<_, f64x2>(!ej.simd_lt(i64x2::splat(0x07FF)))
1687      | ee.simd_gt(f64x2::splat(3000.0));
1688    let underflow = cast::<_, f64x2>(!ej.simd_gt(i64x2::splat(0x000)))
1689      | ee.simd_lt(f64x2::splat(-3000.0));
1690
1691    // Add exponent by integer addition
1692    let z = cast::<_, f64x2>(cast::<_, i64x2>(z) + (ei << 52));
1693
1694    // Check for overflow/underflow
1695    let z = if (overflow | underflow).any() {
1696      let z = underflow.blend(f64x2::ZERO, z);
1697      overflow.blend(Self::infinity(), z)
1698    } else {
1699      z
1700    };
1701
1702    // Check for self == 0
1703    let x_zero = self.is_zero_or_subnormal();
1704    let z = x_zero.blend(
1705      y.simd_lt(f64x2::ZERO).blend(
1706        Self::infinity(),
1707        y.simd_eq(f64x2::ZERO).blend(f64x2::ONE, f64x2::ZERO),
1708      ),
1709      z,
1710    );
1711
1712    let x_sign = self.sign_bit();
1713    let z = if x_sign.any() {
1714      // Y into an integer
1715      let yi = y.simd_eq(y.round());
1716      // Is y odd?
1717      let y_odd = cast::<_, i64x2>(y.round_int() << 63).round_float();
1718
1719      let z1 =
1720        yi.blend(z | y_odd, self.simd_eq(Self::ZERO).blend(z, Self::nan_pow()));
1721      x_sign.blend(z1, z)
1722    } else {
1723      z
1724    };
1725
1726    let x_finite = self.is_finite();
1727    let y_finite = y.is_finite();
1728    let e_finite = ee.is_finite();
1729
1730    if (x_finite & y_finite & (e_finite | x_zero)).all() {
1731      return z;
1732    }
1733
1734    (self.is_nan() | y.is_nan()).blend(self + y, z)
1735  }
1736
1737  #[inline]
1738  pub fn powf(self, y: f64) -> Self {
1739    Self::pow_f64x2(self, f64x2::splat(y))
1740  }
1741
1742  #[inline]
1743  pub fn to_array(self) -> [f64; 2] {
1744    cast(self)
1745  }
1746
1747  #[inline]
1748  pub fn as_array(&self) -> &[f64; 2] {
1749    cast_ref(self)
1750  }
1751
1752  #[inline]
1753  pub fn as_mut_array(&mut self) -> &mut [f64; 2] {
1754    cast_mut(self)
1755  }
1756
1757  /// Converts the lower two `i32` lanes to two `f64` lanes (and dropping the
1758  /// higher two `i32` lanes)
1759  #[inline]
1760  pub fn from_i32x4_lower2(v: i32x4) -> Self {
1761    pick! {
1762      if #[cfg(target_feature="sse2")] {
1763        Self { sse: convert_to_m128d_from_lower2_i32_m128i(v.sse) }
1764      } else if #[cfg(target_feature="simd128")] {
1765        Self { simd: f64x2_convert_low_i32x4(v.simd)}
1766      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1767        Self { neon: unsafe { vcvtq_f64_s64(vmovl_s32(vget_low_s32(v.neon))) }}
1768      } else {
1769        Self { arr: [
1770            v.as_array()[0] as f64,
1771            v.as_array()[1] as f64,
1772        ]}
1773      }
1774    }
1775  }
1776}
1777
1778impl From<i32x4> for f64x2 {
1779  /// Converts the lower two `i32` lanes to two `f64` lanes (and dropping the
1780  /// higher two `i32` lanes)
1781  #[inline]
1782  fn from(v: i32x4) -> Self {
1783    Self::from_i32x4_lower2(v)
1784  }
1785}
1786
1787impl Not for f64x2 {
1788  type Output = Self;
1789  #[inline]
1790  fn not(self) -> Self {
1791    pick! {
1792      if #[cfg(target_feature="sse2")] {
1793        Self { sse: self.sse.not() }
1794      } else if #[cfg(target_feature="simd128")] {
1795        Self { simd: v128_not(self.simd) }
1796      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1797        unsafe {Self { neon: vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(self.neon))) }}
1798      } else {
1799        Self { arr: [
1800          f64::from_bits(!self.arr[0].to_bits()),
1801          f64::from_bits(!self.arr[1].to_bits()),
1802        ]}
1803      }
1804    }
1805  }
1806}