1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(32))]
7 pub struct f64x4 { pub(crate) avx: m256d }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq)]
10 #[repr(C, align(32))]
11 pub struct f64x4 { pub(crate) a: f64x2, pub(crate) b: f64x2 }
12 }
13}
14
15macro_rules! const_f64_as_f64x4 {
16 ($i:ident, $f:expr) => {
17 #[allow(non_upper_case_globals)]
18 pub const $i: f64x4 = f64x4::new([$f; 4]);
19 };
20}
21
22impl f64x4 {
23 const_f64_as_f64x4!(ONE, 1.0);
24 const_f64_as_f64x4!(ZERO, 0.0);
25 const_f64_as_f64x4!(HALF, 0.5);
26 const_f64_as_f64x4!(E, core::f64::consts::E);
27 const_f64_as_f64x4!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
28 const_f64_as_f64x4!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
29 const_f64_as_f64x4!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
30 const_f64_as_f64x4!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
31 const_f64_as_f64x4!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
32 const_f64_as_f64x4!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
33 const_f64_as_f64x4!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
34 const_f64_as_f64x4!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
35 const_f64_as_f64x4!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
36 const_f64_as_f64x4!(LN_2, core::f64::consts::LN_2);
37 const_f64_as_f64x4!(LN_10, core::f64::consts::LN_10);
38 const_f64_as_f64x4!(LOG2_E, core::f64::consts::LOG2_E);
39 const_f64_as_f64x4!(LOG10_E, core::f64::consts::LOG10_E);
40 const_f64_as_f64x4!(LOG10_2, core::f64::consts::LOG10_2);
41 const_f64_as_f64x4!(LOG2_10, core::f64::consts::LOG2_10);
42 const_f64_as_f64x4!(PI, core::f64::consts::PI);
43 const_f64_as_f64x4!(SQRT_2, core::f64::consts::SQRT_2);
44 const_f64_as_f64x4!(TAU, core::f64::consts::TAU);
45}
46
47unsafe impl Zeroable for f64x4 {}
48unsafe impl Pod for f64x4 {}
49
50impl AlignTo for f64x4 {
51 type Elem = f64;
52}
53
54impl Add for f64x4 {
55 type Output = Self;
56 #[inline]
57 fn add(self, rhs: Self) -> Self::Output {
58 pick! {
59 if #[cfg(target_feature="avx")] {
60 Self { avx: add_m256d(self.avx, rhs.avx) }
61 } else {
62 Self {
63 a : self.a.add(rhs.a),
64 b : self.b.add(rhs.b),
65 }
66 }
67 }
68 }
69}
70
71impl Sub for f64x4 {
72 type Output = Self;
73 #[inline]
74 fn sub(self, rhs: Self) -> Self::Output {
75 pick! {
76 if #[cfg(target_feature="avx")] {
77 Self { avx: sub_m256d(self.avx, rhs.avx) }
78 } else {
79 Self {
80 a : self.a.sub(rhs.a),
81 b : self.b.sub(rhs.b),
82 }
83 }
84 }
85 }
86}
87
88impl Mul for f64x4 {
89 type Output = Self;
90 #[inline]
91 fn mul(self, rhs: Self) -> Self::Output {
92 pick! {
93 if #[cfg(target_feature="avx")] {
94 Self { avx: mul_m256d(self.avx, rhs.avx) }
95 } else {
96 Self {
97 a : self.a.mul(rhs.a),
98 b : self.b.mul(rhs.b),
99 }
100 }
101 }
102 }
103}
104
105impl Div for f64x4 {
106 type Output = Self;
107 #[inline]
108 fn div(self, rhs: Self) -> Self::Output {
109 pick! {
110 if #[cfg(target_feature="avx")] {
111 Self { avx: div_m256d(self.avx, rhs.avx) }
112 } else {
113 Self {
114 a : self.a.div(rhs.a),
115 b : self.b.div(rhs.b),
116 }
117 }
118 }
119 }
120}
121
122impl Neg for f64x4 {
123 type Output = Self;
124 #[inline]
125 fn neg(self) -> Self::Output {
126 pick! {
127 if #[cfg(target_feature="avx")] {
128 Self { avx: bitxor_m256d(self.avx, Self::splat(-0.0).avx) }
129 } else {
130 Self {
131 a : self.a.neg(),
132 b : self.b.neg(),
133 }
134 }
135 }
136 }
137}
138
139impl Add<f64> for f64x4 {
140 type Output = Self;
141 #[inline]
142 fn add(self, rhs: f64) -> Self::Output {
143 self.add(Self::splat(rhs))
144 }
145}
146
147impl Sub<f64> for f64x4 {
148 type Output = Self;
149 #[inline]
150 fn sub(self, rhs: f64) -> Self::Output {
151 self.sub(Self::splat(rhs))
152 }
153}
154
155impl Mul<f64> for f64x4 {
156 type Output = Self;
157 #[inline]
158 fn mul(self, rhs: f64) -> Self::Output {
159 self.mul(Self::splat(rhs))
160 }
161}
162
163impl Div<f64> for f64x4 {
164 type Output = Self;
165 #[inline]
166 fn div(self, rhs: f64) -> Self::Output {
167 self.div(Self::splat(rhs))
168 }
169}
170
171impl Add<f64x4> for f64 {
172 type Output = f64x4;
173 #[inline]
174 fn add(self, rhs: f64x4) -> Self::Output {
175 f64x4::splat(self).add(rhs)
176 }
177}
178
179impl Sub<f64x4> for f64 {
180 type Output = f64x4;
181 #[inline]
182 fn sub(self, rhs: f64x4) -> Self::Output {
183 f64x4::splat(self).sub(rhs)
184 }
185}
186
187impl Mul<f64x4> for f64 {
188 type Output = f64x4;
189 #[inline]
190 fn mul(self, rhs: f64x4) -> Self::Output {
191 f64x4::splat(self).mul(rhs)
192 }
193}
194
195impl Div<f64x4> for f64 {
196 type Output = f64x4;
197 #[inline]
198 fn div(self, rhs: f64x4) -> Self::Output {
199 f64x4::splat(self).div(rhs)
200 }
201}
202
203impl BitAnd for f64x4 {
204 type Output = Self;
205 #[inline]
206 fn bitand(self, rhs: Self) -> Self::Output {
207 pick! {
208 if #[cfg(target_feature="avx")] {
209 Self { avx: bitand_m256d(self.avx, rhs.avx) }
210 } else {
211 Self {
212 a : self.a.bitand(rhs.a),
213 b : self.b.bitand(rhs.b),
214 }
215 }
216 }
217 }
218}
219
220impl BitOr for f64x4 {
221 type Output = Self;
222 #[inline]
223 fn bitor(self, rhs: Self) -> Self::Output {
224 pick! {
225 if #[cfg(target_feature="avx")] {
226 Self { avx: bitor_m256d(self.avx, rhs.avx) }
227 } else {
228 Self {
229 a : self.a.bitor(rhs.a),
230 b : self.b.bitor(rhs.b),
231 }
232 }
233 }
234 }
235}
236
237impl BitXor for f64x4 {
238 type Output = Self;
239 #[inline]
240 fn bitxor(self, rhs: Self) -> Self::Output {
241 pick! {
242 if #[cfg(target_feature="avx")] {
243 Self { avx: bitxor_m256d(self.avx, rhs.avx) }
244 } else {
245 Self {
246 a : self.a.bitxor(rhs.a),
247 b : self.b.bitxor(rhs.b),
248 }
249 }
250 }
251 }
252}
253
254impl CmpEq for f64x4 {
255 type Output = Self;
256 #[inline]
257 fn simd_eq(self, rhs: Self) -> Self::Output {
258 pick! {
259 if #[cfg(target_feature="avx")]{
260 Self { avx: cmp_op_mask_m256d::<{cmp_op!(EqualOrdered)}>(self.avx, rhs.avx) }
261 } else {
262 Self {
263 a : self.a.simd_eq(rhs.a),
264 b : self.b.simd_eq(rhs.b),
265 }
266 }
267 }
268 }
269}
270
271impl CmpGe for f64x4 {
272 type Output = Self;
273 #[inline]
274 fn simd_ge(self, rhs: Self) -> Self::Output {
275 pick! {
276 if #[cfg(target_feature="avx")]{
277 Self { avx: cmp_op_mask_m256d::<{cmp_op!(GreaterEqualOrdered)}>(self.avx, rhs.avx) }
278 } else {
279 Self {
280 a : self.a.simd_ge(rhs.a),
281 b : self.b.simd_ge(rhs.b),
282 }
283 }
284 }
285 }
286}
287
288impl CmpGt for f64x4 {
289 type Output = Self;
290 #[inline]
291 fn simd_gt(self, rhs: Self) -> Self::Output {
292 pick! {
293 if #[cfg(target_feature="avx")]{
294 Self { avx: cmp_op_mask_m256d::<{cmp_op!( GreaterThanOrdered)}>(self.avx, rhs.avx) }
295 } else {
296 Self {
297 a : self.a.simd_gt(rhs.a),
298 b : self.b.simd_gt(rhs.b),
299 }
300 }
301 }
302 }
303}
304
305impl CmpNe for f64x4 {
306 type Output = Self;
307 #[inline]
308 fn simd_ne(self, rhs: Self) -> Self::Output {
309 pick! {
310 if #[cfg(target_feature="avx")]{
311 Self { avx: cmp_op_mask_m256d::<{cmp_op!(NotEqualOrdered)}>(self.avx, rhs.avx) }
312 } else {
313 Self {
314 a : self.a.simd_ne(rhs.a),
315 b : self.b.simd_ne(rhs.b),
316 }
317 }
318 }
319 }
320}
321
322impl CmpLe for f64x4 {
323 type Output = Self;
324 #[inline]
325 fn simd_le(self, rhs: Self) -> Self::Output {
326 pick! {
327 if #[cfg(target_feature="avx")]{
328 Self { avx: cmp_op_mask_m256d::<{cmp_op!(LessEqualOrdered)}>(self.avx, rhs.avx) }
329 } else {
330 Self {
331 a : self.a.simd_le(rhs.a),
332 b : self.b.simd_le(rhs.b),
333 }
334 }
335 }
336 }
337}
338
339impl CmpLt for f64x4 {
340 type Output = Self;
341 #[inline]
342 fn simd_lt(self, rhs: Self) -> Self::Output {
343 pick! {
344 if #[cfg(target_feature="avx")]{
345 Self { avx: cmp_op_mask_m256d::<{cmp_op!(LessThanOrdered)}>(self.avx, rhs.avx) }
346 } else {
347 Self {
348 a : self.a.simd_lt(rhs.a),
349 b : self.b.simd_lt(rhs.b),
350 }
351 }
352 }
353 }
354}
355
356impl f64x4 {
357 #[inline]
358 #[must_use]
359 pub const fn new(array: [f64; 4]) -> Self {
360 unsafe { core::mem::transmute(array) }
361 }
362 #[inline]
363 #[must_use]
364 pub fn blend(self, t: Self, f: Self) -> Self {
365 pick! {
366 if #[cfg(target_feature="avx")] {
367 Self { avx: blend_varying_m256d(f.avx, t.avx, self.avx) }
368 } else {
369 Self {
370 a : self.a.blend(t.a, f.a),
371 b : self.b.blend(t.b, f.b),
372 }
373 }
374 }
375 }
376
377 #[inline]
378 #[must_use]
379 pub fn abs(self) -> Self {
380 pick! {
381 if #[cfg(target_feature="avx")] {
382 let non_sign_bits = f64x4::from(f64::from_bits(i64::MAX as u64));
383 self & non_sign_bits
384 } else {
385 Self {
386 a : self.a.abs(),
387 b : self.b.abs(),
388 }
389 }
390 }
391 }
392
393 #[inline]
394 #[must_use]
395 pub fn floor(self) -> Self {
396 pick! {
397 if #[cfg(target_feature="avx")] {
398 Self { avx: floor_m256d(self.avx) }
399 } else {
400 Self {
401 a : self.a.floor(),
402 b : self.b.floor(),
403 }
404 }
405 }
406 }
407 #[inline]
408 #[must_use]
409 pub fn ceil(self) -> Self {
410 pick! {
411 if #[cfg(target_feature="avx")] {
412 Self { avx: ceil_m256d(self.avx) }
413 } else {
414 Self {
415 a : self.a.ceil(),
416 b : self.b.ceil(),
417 }
418 }
419 }
420 }
421
422 #[inline]
426 #[must_use]
427 pub fn fast_max(self, rhs: Self) -> Self {
428 pick! {
429 if #[cfg(target_feature="avx")] {
430 Self { avx: max_m256d(self.avx, rhs.avx) }
431 } else {
432 Self {
433 a : self.a.fast_max(rhs.a),
434 b : self.b.fast_max(rhs.b),
435 }
436 }
437 }
438 }
439
440 #[inline]
444 #[must_use]
445 pub fn max(self, rhs: Self) -> Self {
446 pick! {
447 if #[cfg(target_feature="avx")] {
448 rhs.is_nan().blend(self, Self { avx: max_m256d(self.avx, rhs.avx) })
452 } else {
453 Self {
454 a : self.a.max(rhs.a),
455 b : self.b.max(rhs.b),
456 }
457 }
458 }
459 }
460
461 #[inline]
465 #[must_use]
466 pub fn fast_min(self, rhs: Self) -> Self {
467 pick! {
468 if #[cfg(target_feature="avx")] {
469 Self { avx: min_m256d(self.avx, rhs.avx) }
470 } else {
471 Self {
472 a : self.a.fast_min(rhs.a),
473 b : self.b.fast_min(rhs.b),
474 }
475 }
476 }
477 }
478
479 #[inline]
483 #[must_use]
484 pub fn min(self, rhs: Self) -> Self {
485 pick! {
486 if #[cfg(target_feature="avx")] {
487 rhs.is_nan().blend(self, Self { avx: min_m256d(self.avx, rhs.avx) })
491 } else {
492 Self {
493 a : self.a.min(rhs.a),
494 b : self.b.min(rhs.b),
495 }
496 }
497 }
498 }
499
500 #[inline]
501 #[must_use]
502 pub fn is_nan(self) -> Self {
503 pick! {
504 if #[cfg(target_feature="avx")] {
505 Self { avx: cmp_op_mask_m256d::<{cmp_op!(Unordered)}>(self.avx, self.avx ) }
506 } else {
507 Self {
508 a : self.a.is_nan(),
509 b : self.b.is_nan(),
510 }
511 }
512 }
513 }
514
515 #[inline]
516 #[must_use]
517 pub fn is_finite(self) -> Self {
518 let shifted_exp_mask = u64x4::from(0xFFE0000000000000);
519 let u: u64x4 = cast(self);
520 let shift_u = u << 1_u64;
521 let out = !(shift_u & shifted_exp_mask).simd_eq(shifted_exp_mask);
522 cast(out)
523 }
524
525 #[inline]
526 #[must_use]
527 pub fn is_inf(self) -> Self {
528 let shifted_inf = u64x4::from(0xFFE0000000000000);
529 let u: u64x4 = cast(self);
530 let shift_u = u << 1_u64;
531 let out = (shift_u).simd_eq(shifted_inf);
532 cast(out)
533 }
534
535 #[inline]
536 #[must_use]
537 pub fn round(self) -> Self {
538 pick! {
539 if #[cfg(target_feature="avx")] {
540 Self { avx: round_m256d::<{round_op!(Nearest)}>(self.avx) }
541 } else {
542 Self {
543 a : self.a.round(),
544 b : self.b.round(),
545 }
546 }
547 }
548 }
549
550 #[inline]
551 #[must_use]
552 pub fn round_int(self) -> i64x4 {
553 let rounded: [f64; 4] = cast(self.round());
555 cast([
556 rounded[0] as i64,
557 rounded[1] as i64,
558 rounded[2] as i64,
559 rounded[3] as i64,
560 ])
561 }
562
563 #[inline]
588 #[must_use]
589 pub fn mul_add(self, m: Self, a: Self) -> Self {
590 pick! {
591 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
592 Self { avx: fused_mul_add_m256d(self.avx, m.avx, a.avx) }
593 } else if #[cfg(target_feature="avx")] {
594 (self * m) + a
596 } else {
597 Self {
598 a : self.a.mul_add(m.a, a.a),
599 b : self.b.mul_add(m.b, a.b),
600 }
601 }
602 }
603 }
604
605 #[inline]
630 #[must_use]
631 pub fn mul_sub(self, m: Self, s: Self) -> Self {
632 pick! {
633 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
634 Self { avx: fused_mul_sub_m256d(self.avx, m.avx, s.avx) }
635 } else if #[cfg(target_feature="avx")] {
636 (self * m) - s
638 } else {
639 Self {
640 a : self.a.mul_sub(m.a, s.a),
641 b : self.b.mul_sub(m.b, s.b),
642 }
643 }
644 }
645 }
646
647 #[inline]
672 #[must_use]
673 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
674 pick! {
675 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
676 Self { avx: fused_mul_neg_add_m256d(self.avx, m.avx, a.avx) }
677 } else if #[cfg(target_feature="avx")] {
678 a - (self * m)
680 } else {
681 Self {
682 a : self.a.mul_neg_add(m.a, a.a),
683 b : self.b.mul_neg_add(m.b, a.b),
684 }
685 }
686 }
687 }
688
689 #[inline]
714 #[must_use]
715 pub fn mul_neg_sub(self, m: Self, s: Self) -> Self {
716 pick! {
717 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
718 Self { avx: fused_mul_neg_sub_m256d(self.avx, m.avx, s.avx) }
719 } else if #[cfg(target_feature="avx")] {
720 -(self * m) - s
722 } else {
723 Self {
724 a : self.a.mul_neg_sub(m.a, s.a),
725 b : self.b.mul_neg_sub(m.b, s.b),
726 }
727 }
728 }
729 }
730
731 #[inline]
732 #[must_use]
733 pub fn flip_signs(self, signs: Self) -> Self {
734 self ^ (signs & Self::from(-0.0))
735 }
736
737 #[inline]
738 #[must_use]
739 pub fn copysign(self, sign: Self) -> Self {
740 let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
741 (self & magnitude_mask) | (sign & Self::from(-0.0))
742 }
743
744 #[inline]
745 pub fn asin_acos(self) -> (Self, Self) {
746 const_f64_as_f64x4!(R4asin, 2.967721961301243206100E-3);
749 const_f64_as_f64x4!(R3asin, -5.634242780008963776856E-1);
750 const_f64_as_f64x4!(R2asin, 6.968710824104713396794E0);
751 const_f64_as_f64x4!(R1asin, -2.556901049652824852289E1);
752 const_f64_as_f64x4!(R0asin, 2.853665548261061424989E1);
753
754 const_f64_as_f64x4!(S3asin, -2.194779531642920639778E1);
755 const_f64_as_f64x4!(S2asin, 1.470656354026814941758E2);
756 const_f64_as_f64x4!(S1asin, -3.838770957603691357202E2);
757 const_f64_as_f64x4!(S0asin, 3.424398657913078477438E2);
758
759 const_f64_as_f64x4!(P5asin, 4.253011369004428248960E-3);
760 const_f64_as_f64x4!(P4asin, -6.019598008014123785661E-1);
761 const_f64_as_f64x4!(P3asin, 5.444622390564711410273E0);
762 const_f64_as_f64x4!(P2asin, -1.626247967210700244449E1);
763 const_f64_as_f64x4!(P1asin, 1.956261983317594739197E1);
764 const_f64_as_f64x4!(P0asin, -8.198089802484824371615E0);
765
766 const_f64_as_f64x4!(Q4asin, -1.474091372988853791896E1);
767 const_f64_as_f64x4!(Q3asin, 7.049610280856842141659E1);
768 const_f64_as_f64x4!(Q2asin, -1.471791292232726029859E2);
769 const_f64_as_f64x4!(Q1asin, 1.395105614657485689735E2);
770 const_f64_as_f64x4!(Q0asin, -4.918853881490881290097E1);
771
772 let xa = self.abs();
773
774 let big = xa.simd_ge(f64x4::splat(0.625));
775
776 let x1 = big.blend(f64x4::splat(1.0) - xa, xa * xa);
777
778 let x2 = x1 * x1;
779 let x3 = x2 * x1;
780 let x4 = x2 * x2;
781 let x5 = x4 * x1;
782
783 let do_big = big.any();
784 let do_small = !big.all();
785
786 let mut rx = f64x4::default();
787 let mut sx = f64x4::default();
788 let mut px = f64x4::default();
789 let mut qx = f64x4::default();
790
791 if do_big {
792 rx = x3.mul_add(R3asin, x2 * R2asin)
793 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
794 sx =
795 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
796 }
797
798 if do_small {
799 px = x3.mul_add(P3asin, P0asin)
800 + x4.mul_add(P4asin, x1 * P1asin)
801 + x5.mul_add(P5asin, x2 * P2asin);
802 qx = x4.mul_add(Q4asin, x5)
803 + x3.mul_add(Q3asin, x1 * Q1asin)
804 + x2.mul_add(Q2asin, Q0asin);
805 };
806
807 let vx = big.blend(rx, px);
808 let wx = big.blend(sx, qx);
809
810 let y1 = vx / wx * x1;
811
812 let mut z1 = f64x4::default();
813 let mut z2 = f64x4::default();
814 if do_big {
815 let xb = (x1 + x1).sqrt();
816 z1 = xb.mul_add(y1, xb);
817 }
818
819 if do_small {
820 z2 = xa.mul_add(y1, xa);
821 }
822
823 let z3 = f64x4::FRAC_PI_2 - z1;
825 let asin = big.blend(z3, z2);
826 let asin = asin.flip_signs(self);
827
828 let z3 = self.simd_lt(f64x4::ZERO).blend(f64x4::PI - z1, z1);
830 let z4 = f64x4::FRAC_PI_2 - z2.flip_signs(self);
831 let acos = big.blend(z3, z4);
832
833 (asin, acos)
834 }
835
836 #[inline]
837 pub fn acos(self) -> Self {
838 const_f64_as_f64x4!(R4asin, 2.967721961301243206100E-3);
841 const_f64_as_f64x4!(R3asin, -5.634242780008963776856E-1);
842 const_f64_as_f64x4!(R2asin, 6.968710824104713396794E0);
843 const_f64_as_f64x4!(R1asin, -2.556901049652824852289E1);
844 const_f64_as_f64x4!(R0asin, 2.853665548261061424989E1);
845
846 const_f64_as_f64x4!(S3asin, -2.194779531642920639778E1);
847 const_f64_as_f64x4!(S2asin, 1.470656354026814941758E2);
848 const_f64_as_f64x4!(S1asin, -3.838770957603691357202E2);
849 const_f64_as_f64x4!(S0asin, 3.424398657913078477438E2);
850
851 const_f64_as_f64x4!(P5asin, 4.253011369004428248960E-3);
852 const_f64_as_f64x4!(P4asin, -6.019598008014123785661E-1);
853 const_f64_as_f64x4!(P3asin, 5.444622390564711410273E0);
854 const_f64_as_f64x4!(P2asin, -1.626247967210700244449E1);
855 const_f64_as_f64x4!(P1asin, 1.956261983317594739197E1);
856 const_f64_as_f64x4!(P0asin, -8.198089802484824371615E0);
857
858 const_f64_as_f64x4!(Q4asin, -1.474091372988853791896E1);
859 const_f64_as_f64x4!(Q3asin, 7.049610280856842141659E1);
860 const_f64_as_f64x4!(Q2asin, -1.471791292232726029859E2);
861 const_f64_as_f64x4!(Q1asin, 1.395105614657485689735E2);
862 const_f64_as_f64x4!(Q0asin, -4.918853881490881290097E1);
863
864 let xa = self.abs();
865
866 let big = xa.simd_ge(f64x4::splat(0.625));
867
868 let x1 = big.blend(f64x4::splat(1.0) - xa, xa * xa);
869
870 let x2 = x1 * x1;
871 let x3 = x2 * x1;
872 let x4 = x2 * x2;
873 let x5 = x4 * x1;
874
875 let do_big = big.any();
876 let do_small = !big.all();
877
878 let mut rx = f64x4::default();
879 let mut sx = f64x4::default();
880 let mut px = f64x4::default();
881 let mut qx = f64x4::default();
882
883 if do_big {
884 rx = x3.mul_add(R3asin, x2 * R2asin)
885 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
886 sx =
887 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
888 }
889 if do_small {
890 px = x3.mul_add(P3asin, P0asin)
891 + x4.mul_add(P4asin, x1 * P1asin)
892 + x5.mul_add(P5asin, x2 * P2asin);
893 qx = x4.mul_add(Q4asin, x5)
894 + x3.mul_add(Q3asin, x1 * Q1asin)
895 + x2.mul_add(Q2asin, Q0asin);
896 };
897
898 let vx = big.blend(rx, px);
899 let wx = big.blend(sx, qx);
900
901 let y1 = vx / wx * x1;
902
903 let mut z1 = f64x4::default();
904 let mut z2 = f64x4::default();
905 if do_big {
906 let xb = (x1 + x1).sqrt();
907 z1 = xb.mul_add(y1, xb);
908 }
909
910 if do_small {
911 z2 = xa.mul_add(y1, xa);
912 }
913
914 let z3 = self.simd_lt(f64x4::ZERO).blend(f64x4::PI - z1, z1);
916 let z4 = f64x4::FRAC_PI_2 - z2.flip_signs(self);
917 let acos = big.blend(z3, z4);
918
919 acos
920 }
921 #[inline]
922 #[must_use]
923 pub fn asin(self) -> Self {
924 const_f64_as_f64x4!(R4asin, 2.967721961301243206100E-3);
927 const_f64_as_f64x4!(R3asin, -5.634242780008963776856E-1);
928 const_f64_as_f64x4!(R2asin, 6.968710824104713396794E0);
929 const_f64_as_f64x4!(R1asin, -2.556901049652824852289E1);
930 const_f64_as_f64x4!(R0asin, 2.853665548261061424989E1);
931
932 const_f64_as_f64x4!(S3asin, -2.194779531642920639778E1);
933 const_f64_as_f64x4!(S2asin, 1.470656354026814941758E2);
934 const_f64_as_f64x4!(S1asin, -3.838770957603691357202E2);
935 const_f64_as_f64x4!(S0asin, 3.424398657913078477438E2);
936
937 const_f64_as_f64x4!(P5asin, 4.253011369004428248960E-3);
938 const_f64_as_f64x4!(P4asin, -6.019598008014123785661E-1);
939 const_f64_as_f64x4!(P3asin, 5.444622390564711410273E0);
940 const_f64_as_f64x4!(P2asin, -1.626247967210700244449E1);
941 const_f64_as_f64x4!(P1asin, 1.956261983317594739197E1);
942 const_f64_as_f64x4!(P0asin, -8.198089802484824371615E0);
943
944 const_f64_as_f64x4!(Q4asin, -1.474091372988853791896E1);
945 const_f64_as_f64x4!(Q3asin, 7.049610280856842141659E1);
946 const_f64_as_f64x4!(Q2asin, -1.471791292232726029859E2);
947 const_f64_as_f64x4!(Q1asin, 1.395105614657485689735E2);
948 const_f64_as_f64x4!(Q0asin, -4.918853881490881290097E1);
949
950 let xa = self.abs();
951
952 let big = xa.simd_ge(f64x4::splat(0.625));
953
954 let x1 = big.blend(f64x4::splat(1.0) - xa, xa * xa);
955
956 let x2 = x1 * x1;
957 let x3 = x2 * x1;
958 let x4 = x2 * x2;
959 let x5 = x4 * x1;
960
961 let do_big = big.any();
962 let do_small = !big.all();
963
964 let mut rx = f64x4::default();
965 let mut sx = f64x4::default();
966 let mut px = f64x4::default();
967 let mut qx = f64x4::default();
968
969 if do_big {
970 rx = x3.mul_add(R3asin, x2 * R2asin)
971 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
972 sx =
973 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
974 }
975 if do_small {
976 px = x3.mul_add(P3asin, P0asin)
977 + x4.mul_add(P4asin, x1 * P1asin)
978 + x5.mul_add(P5asin, x2 * P2asin);
979 qx = x4.mul_add(Q4asin, x5)
980 + x3.mul_add(Q3asin, x1 * Q1asin)
981 + x2.mul_add(Q2asin, Q0asin);
982 };
983
984 let vx = big.blend(rx, px);
985 let wx = big.blend(sx, qx);
986
987 let y1 = vx / wx * x1;
988
989 let mut z1 = f64x4::default();
990 let mut z2 = f64x4::default();
991 if do_big {
992 let xb = (x1 + x1).sqrt();
993 z1 = xb.mul_add(y1, xb);
994 }
995
996 if do_small {
997 z2 = xa.mul_add(y1, xa);
998 }
999
1000 let z3 = f64x4::FRAC_PI_2 - z1;
1002 let asin = big.blend(z3, z2);
1003 let asin = asin.flip_signs(self);
1004
1005 asin
1006 }
1007
1008 #[inline]
1009 pub fn atan(self) -> Self {
1010 const_f64_as_f64x4!(MORE_BITS, 6.123233995736765886130E-17);
1013 const_f64_as_f64x4!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1014 const_f64_as_f64x4!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1015
1016 const_f64_as_f64x4!(P4atan, -8.750608600031904122785E-1);
1017 const_f64_as_f64x4!(P3atan, -1.615753718733365076637E1);
1018 const_f64_as_f64x4!(P2atan, -7.500855792314704667340E1);
1019 const_f64_as_f64x4!(P1atan, -1.228866684490136173410E2);
1020 const_f64_as_f64x4!(P0atan, -6.485021904942025371773E1);
1021
1022 const_f64_as_f64x4!(Q4atan, 2.485846490142306297962E1);
1023 const_f64_as_f64x4!(Q3atan, 1.650270098316988542046E2);
1024 const_f64_as_f64x4!(Q2atan, 4.328810604912902668951E2);
1025 const_f64_as_f64x4!(Q1atan, 4.853903996359136964868E2);
1026 const_f64_as_f64x4!(Q0atan, 1.945506571482613964425E2);
1027
1028 let t = self.abs();
1029
1030 let notbig = t.simd_le(T3PO8);
1034 let notsmal = t.simd_ge(Self::splat(0.66));
1035
1036 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1037 s = notsmal & s;
1038 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1039 fac = notsmal & fac;
1040
1041 let mut a = notbig & t;
1045 a = notsmal.blend(a - Self::ONE, a);
1046 let mut b = notbig & Self::ONE;
1047 b = notsmal.blend(b + t, b);
1048 let z = a / b;
1049
1050 let zz = z * z;
1051
1052 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1053 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1054
1055 let mut re = (px / qx).mul_add(z * zz, z);
1056 re += s + fac;
1057
1058 re = (self.sign_bit()).blend(-re, re);
1060
1061 re
1062 }
1063
1064 #[inline]
1065 pub fn atan2(self, x: Self) -> Self {
1066 const_f64_as_f64x4!(MORE_BITS, 6.123233995736765886130E-17);
1069 const_f64_as_f64x4!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1070 const_f64_as_f64x4!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1071
1072 const_f64_as_f64x4!(P4atan, -8.750608600031904122785E-1);
1073 const_f64_as_f64x4!(P3atan, -1.615753718733365076637E1);
1074 const_f64_as_f64x4!(P2atan, -7.500855792314704667340E1);
1075 const_f64_as_f64x4!(P1atan, -1.228866684490136173410E2);
1076 const_f64_as_f64x4!(P0atan, -6.485021904942025371773E1);
1077
1078 const_f64_as_f64x4!(Q4atan, 2.485846490142306297962E1);
1079 const_f64_as_f64x4!(Q3atan, 1.650270098316988542046E2);
1080 const_f64_as_f64x4!(Q2atan, 4.328810604912902668951E2);
1081 const_f64_as_f64x4!(Q1atan, 4.853903996359136964868E2);
1082 const_f64_as_f64x4!(Q0atan, 1.945506571482613964425E2);
1083
1084 let y = self;
1085
1086 let x1 = x.abs();
1088 let y1 = y.abs();
1089 let swapxy = y1.simd_gt(x1);
1090 let mut x2 = swapxy.blend(y1, x1);
1092 let mut y2 = swapxy.blend(x1, y1);
1093
1094 let both_infinite = x.is_inf() & y.is_inf();
1096 if both_infinite.any() {
1097 let minus_one = -Self::ONE;
1098 x2 = both_infinite.blend(x2 & minus_one, x2);
1099 y2 = both_infinite.blend(y2 & minus_one, y2);
1100 }
1101
1102 let t = y2 / x2;
1104
1105 let notbig = t.simd_le(T3PO8);
1109 let notsmal = t.simd_ge(Self::splat(0.66));
1110
1111 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1112 s = notsmal & s;
1113 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1114 fac = notsmal & fac;
1115
1116 let mut a = notbig & t;
1120 a = notsmal.blend(a - Self::ONE, a);
1121 let mut b = notbig & Self::ONE;
1122 b = notsmal.blend(b + t, b);
1123 let z = a / b;
1124
1125 let zz = z * z;
1126
1127 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1128 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1129
1130 let mut re = (px / qx).mul_add(z * zz, z);
1131 re += s + fac;
1132
1133 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1135 re = ((x | y).simd_eq(Self::ZERO)).blend(Self::ZERO, re);
1136 re = (x.sign_bit()).blend(Self::PI - re, re);
1137
1138 re = (y.sign_bit()).blend(-re, re);
1140
1141 re
1142 }
1143
1144 #[inline]
1145 #[must_use]
1146 pub fn sin_cos(self) -> (Self, Self) {
1147 const_f64_as_f64x4!(P0sin, -1.66666666666666307295E-1);
1151 const_f64_as_f64x4!(P1sin, 8.33333333332211858878E-3);
1152 const_f64_as_f64x4!(P2sin, -1.98412698295895385996E-4);
1153 const_f64_as_f64x4!(P3sin, 2.75573136213857245213E-6);
1154 const_f64_as_f64x4!(P4sin, -2.50507477628578072866E-8);
1155 const_f64_as_f64x4!(P5sin, 1.58962301576546568060E-10);
1156
1157 const_f64_as_f64x4!(P0cos, 4.16666666666665929218E-2);
1158 const_f64_as_f64x4!(P1cos, -1.38888888888730564116E-3);
1159 const_f64_as_f64x4!(P2cos, 2.48015872888517045348E-5);
1160 const_f64_as_f64x4!(P3cos, -2.75573141792967388112E-7);
1161 const_f64_as_f64x4!(P4cos, 2.08757008419747316778E-9);
1162 const_f64_as_f64x4!(P5cos, -1.13585365213876817300E-11);
1163
1164 const_f64_as_f64x4!(DP1, 7.853981554508209228515625E-1 * 2.);
1165 const_f64_as_f64x4!(DP2, 7.94662735614792836714E-9 * 2.);
1166 const_f64_as_f64x4!(DP3, 3.06161699786838294307E-17 * 2.);
1167
1168 const_f64_as_f64x4!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1169
1170 let xa = self.abs();
1171
1172 let y = (xa * TWO_OVER_PI).round();
1173 let q = y.round_int();
1174
1175 let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1176
1177 let x2 = x * x;
1178 let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1179 let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1180 s = (x * x2).mul_add(s, x);
1181 c =
1182 (x2 * x2).mul_add(c, x2.mul_neg_add(f64x4::from(0.5), f64x4::from(1.0)));
1183
1184 let swap = !((q & i64x4::from(1)).simd_eq(i64x4::from(0)));
1185
1186 let mut overflow: f64x4 = cast(q.simd_gt(i64x4::from(0x80000000000000)));
1187 overflow &= xa.is_finite();
1188 s = overflow.blend(f64x4::from(0.0), s);
1189 c = overflow.blend(f64x4::from(1.0), c);
1190
1191 let mut sin1 = cast::<_, f64x4>(swap).blend(c, s);
1193 let sign_sin: i64x4 = (q << 62) ^ cast::<_, i64x4>(self);
1194 sin1 = sin1.flip_signs(cast(sign_sin));
1195
1196 let mut cos1 = cast::<_, f64x4>(swap).blend(s, c);
1198 let sign_cos: i64x4 = ((q + i64x4::from(1)) & i64x4::from(2)) << 62;
1199 cos1 ^= cast::<_, f64x4>(sign_cos);
1200
1201 (sin1, cos1)
1202 }
1203 #[inline]
1204 #[must_use]
1205 pub fn sin(self) -> Self {
1206 let (s, _) = self.sin_cos();
1207 s
1208 }
1209 #[inline]
1210 #[must_use]
1211 pub fn cos(self) -> Self {
1212 let (_, c) = self.sin_cos();
1213 c
1214 }
1215 #[inline]
1216 #[must_use]
1217 pub fn tan(self) -> Self {
1218 let (s, c) = self.sin_cos();
1219 s / c
1220 }
1221 #[inline]
1222 #[must_use]
1223 pub fn to_degrees(self) -> Self {
1224 const_f64_as_f64x4!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1225 self * RAD_TO_DEG_RATIO
1226 }
1227 #[inline]
1228 #[must_use]
1229 pub fn to_radians(self) -> Self {
1230 const_f64_as_f64x4!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1231 self * DEG_TO_RAD_RATIO
1232 }
1233 #[inline]
1234 #[must_use]
1235 pub fn sqrt(self) -> Self {
1236 pick! {
1237 if #[cfg(target_feature="avx")] {
1238 Self { avx: sqrt_m256d(self.avx) }
1239 } else {
1240 Self {
1241 a : self.a.sqrt(),
1242 b : self.b.sqrt(),
1243 }
1244 }
1245 }
1246 }
1247 #[inline]
1248 #[must_use]
1249 #[doc(alias("movemask", "move_mask"))]
1250 pub fn to_bitmask(self) -> u32 {
1251 pick! {
1252 if #[cfg(target_feature="avx")] {
1253 move_mask_m256d(self.avx) as u32
1254 } else {
1255 (self.b.to_bitmask() << 2) | self.a.to_bitmask()
1256 }
1257 }
1258 }
1259 #[inline]
1260 #[must_use]
1261 pub fn any(self) -> bool {
1262 pick! {
1263 if #[cfg(target_feature="avx")] {
1264 move_mask_m256d(self.avx) != 0
1265 } else {
1266 self.a.any() || self.b.any()
1267 }
1268 }
1269 }
1270 #[inline]
1271 #[must_use]
1272 pub fn all(self) -> bool {
1273 pick! {
1274 if #[cfg(target_feature="avx")] {
1275 move_mask_m256d(self.avx) == 0b1111
1276 } else {
1277 self.a.all() && self.b.all()
1278 }
1279 }
1280 }
1281 #[inline]
1282 #[must_use]
1283 pub fn none(self) -> bool {
1284 !self.any()
1285 }
1286
1287 #[inline]
1288 fn vm_pow2n(self) -> Self {
1289 const_f64_as_f64x4!(pow2_52, 4503599627370496.0);
1290 const_f64_as_f64x4!(bias, 1023.0);
1291 let a = self + (bias + pow2_52);
1292 let c = cast::<_, i64x4>(a) << 52;
1293 cast::<_, f64x4>(c)
1294 }
1295
1296 #[inline]
1298 #[must_use]
1299 pub fn exp(self) -> Self {
1300 const_f64_as_f64x4!(P2, 1.0 / 2.0);
1301 const_f64_as_f64x4!(P3, 1.0 / 6.0);
1302 const_f64_as_f64x4!(P4, 1. / 24.);
1303 const_f64_as_f64x4!(P5, 1. / 120.);
1304 const_f64_as_f64x4!(P6, 1. / 720.);
1305 const_f64_as_f64x4!(P7, 1. / 5040.);
1306 const_f64_as_f64x4!(P8, 1. / 40320.);
1307 const_f64_as_f64x4!(P9, 1. / 362880.);
1308 const_f64_as_f64x4!(P10, 1. / 3628800.);
1309 const_f64_as_f64x4!(P11, 1. / 39916800.);
1310 const_f64_as_f64x4!(P12, 1. / 479001600.);
1311 const_f64_as_f64x4!(P13, 1. / 6227020800.);
1312 const_f64_as_f64x4!(LN2D_HI, 0.693145751953125);
1313 const_f64_as_f64x4!(LN2D_LO, 1.42860682030941723212E-6);
1314 let max_x = f64x4::from(708.39);
1315 let r = (self * Self::LOG2_E).round();
1316 let x = r.mul_neg_add(LN2D_HI, self);
1317 let x = r.mul_neg_add(LN2D_LO, x);
1318 let z =
1319 polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1320 let n2 = Self::vm_pow2n(r);
1321 let z = (z + Self::ONE) * n2;
1322 let in_range = self.abs().simd_lt(max_x);
1324 let in_range = in_range & self.is_finite();
1325 in_range.blend(z, Self::ZERO)
1326 }
1327
1328 #[inline]
1329 fn exponent(self) -> f64x4 {
1330 const_f64_as_f64x4!(pow2_52, 4503599627370496.0);
1331 const_f64_as_f64x4!(bias, 1023.0);
1332 let a = cast::<_, u64x4>(self);
1333 let b = a >> 52;
1334 let c = b | cast::<_, u64x4>(pow2_52);
1335 let d = cast::<_, f64x4>(c);
1336 let e = d - (pow2_52 + bias);
1337 e
1338 }
1339
1340 #[inline]
1341 fn fraction_2(self) -> Self {
1342 let t1 = cast::<_, u64x4>(self);
1343 let t2 = cast::<_, u64x4>(
1344 (t1 & u64x4::from(0x000FFFFFFFFFFFFF)) | u64x4::from(0x3FE0000000000000),
1345 );
1346 cast::<_, f64x4>(t2)
1347 }
1348 #[inline]
1349 fn is_zero_or_subnormal(self) -> Self {
1350 let t = cast::<_, i64x4>(self);
1351 let t = t & i64x4::splat(0x7FF0000000000000);
1352 i64x4::round_float(t.simd_eq(i64x4::splat(0)))
1353 }
1354 #[inline]
1355 fn infinity() -> Self {
1356 cast::<_, f64x4>(i64x4::splat(0x7FF0000000000000))
1357 }
1358 #[inline]
1359 fn nan_log() -> Self {
1360 cast::<_, f64x4>(i64x4::splat(0x7FF8000000000000 | 0x101 << 29))
1361 }
1362 #[inline]
1363 fn nan_pow() -> Self {
1364 cast::<_, f64x4>(i64x4::splat(0x7FF8000000000000 | 0x101 << 29))
1365 }
1366 #[inline]
1367 fn sign_bit(self) -> Self {
1368 let t1 = cast::<_, i64x4>(self);
1369 let t2 = t1 >> 63;
1370 !cast::<_, f64x4>(t2).simd_eq(f64x4::ZERO)
1371 }
1372
1373 #[inline]
1375 pub fn reduce_add(self) -> f64 {
1376 pick! {
1377 if #[cfg(target_feature="avx")] {
1378 let lo = cast_to_m128d_from_m256d(self.avx);
1380 let hi = extract_m128d_from_m256d::<1>(self.avx);
1381 let lo = add_m128d(lo,hi);
1382 let hi64 = unpack_high_m128d(lo,lo);
1383 let sum = add_m128d_s(lo,hi64);
1384 get_f64_from_m128d_s(sum)
1385 } else {
1386 self.a.reduce_add() + self.b.reduce_add()
1387 }
1388 }
1389 }
1390
1391 #[inline]
1393 #[must_use]
1394 pub fn ln(self) -> Self {
1395 const_f64_as_f64x4!(HALF, 0.5);
1396 const_f64_as_f64x4!(P0, 7.70838733755885391666E0);
1397 const_f64_as_f64x4!(P1, 1.79368678507819816313E1);
1398 const_f64_as_f64x4!(P2, 1.44989225341610930846E1);
1399 const_f64_as_f64x4!(P3, 4.70579119878881725854E0);
1400 const_f64_as_f64x4!(P4, 4.97494994976747001425E-1);
1401 const_f64_as_f64x4!(P5, 1.01875663804580931796E-4);
1402
1403 const_f64_as_f64x4!(Q0, 2.31251620126765340583E1);
1404 const_f64_as_f64x4!(Q1, 7.11544750618563894466E1);
1405 const_f64_as_f64x4!(Q2, 8.29875266912776603211E1);
1406 const_f64_as_f64x4!(Q3, 4.52279145837532221105E1);
1407 const_f64_as_f64x4!(Q4, 1.12873587189167450590E1);
1408 const_f64_as_f64x4!(LN2F_HI, 0.693359375);
1409 const_f64_as_f64x4!(LN2F_LO, -2.12194440e-4);
1410 const_f64_as_f64x4!(VM_SQRT2, 1.414213562373095048801);
1411 const_f64_as_f64x4!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1412
1413 let x1 = self;
1414 let x = Self::fraction_2(x1);
1415 let e = Self::exponent(x1);
1416 let mask = x.simd_gt(VM_SQRT2 * HALF);
1417 let x = (!mask).blend(x + x, x);
1418 let fe = mask.blend(e + Self::ONE, e);
1419 let x = x - Self::ONE;
1420 let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1421 let x2 = x * x;
1422 let px = x2 * x * px;
1423 let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1424 let res = px / qx;
1425 let res = fe.mul_add(LN2F_LO, res);
1426 let res = res + x2.mul_neg_add(HALF, x);
1427 let res = fe.mul_add(LN2F_HI, res);
1428 let overflow = !self.is_finite();
1429 let underflow = x1.simd_lt(VM_SMALLEST_NORMAL);
1430 let mask = overflow | underflow;
1431 if !mask.any() {
1432 res
1433 } else {
1434 let is_zero = self.is_zero_or_subnormal();
1435 let res = underflow.blend(Self::nan_log(), res);
1436 let res = is_zero.blend(Self::infinity(), res);
1437 let res = overflow.blend(self, res);
1438 res
1439 }
1440 }
1441
1442 #[inline]
1443 #[must_use]
1444 pub fn log2(self) -> Self {
1445 Self::ln(self) * Self::LOG2_E
1446 }
1447 #[inline]
1448 #[must_use]
1449 pub fn log10(self) -> Self {
1450 Self::ln(self) * Self::LOG10_E
1451 }
1452
1453 #[inline]
1454 #[must_use]
1455 pub fn pow_f64x4(self, y: Self) -> Self {
1456 const_f64_as_f64x4!(ln2d_hi, 0.693145751953125);
1457 const_f64_as_f64x4!(ln2d_lo, 1.42860682030941723212E-6);
1458 const_f64_as_f64x4!(P0log, 2.0039553499201281259648E1);
1459 const_f64_as_f64x4!(P1log, 5.7112963590585538103336E1);
1460 const_f64_as_f64x4!(P2log, 6.0949667980987787057556E1);
1461 const_f64_as_f64x4!(P3log, 2.9911919328553073277375E1);
1462 const_f64_as_f64x4!(P4log, 6.5787325942061044846969E0);
1463 const_f64_as_f64x4!(P5log, 4.9854102823193375972212E-1);
1464 const_f64_as_f64x4!(P6log, 4.5270000862445199635215E-5);
1465 const_f64_as_f64x4!(Q0log, 6.0118660497603843919306E1);
1466 const_f64_as_f64x4!(Q1log, 2.1642788614495947685003E2);
1467 const_f64_as_f64x4!(Q2log, 3.0909872225312059774938E2);
1468 const_f64_as_f64x4!(Q3log, 2.2176239823732856465394E2);
1469 const_f64_as_f64x4!(Q4log, 8.3047565967967209469434E1);
1470 const_f64_as_f64x4!(Q5log, 1.5062909083469192043167E1);
1471
1472 const_f64_as_f64x4!(p2, 1.0 / 2.0); const_f64_as_f64x4!(p3, 1.0 / 6.0);
1475 const_f64_as_f64x4!(p4, 1.0 / 24.0);
1476 const_f64_as_f64x4!(p5, 1.0 / 120.0);
1477 const_f64_as_f64x4!(p6, 1.0 / 720.0);
1478 const_f64_as_f64x4!(p7, 1.0 / 5040.0);
1479 const_f64_as_f64x4!(p8, 1.0 / 40320.0);
1480 const_f64_as_f64x4!(p9, 1.0 / 362880.0);
1481 const_f64_as_f64x4!(p10, 1.0 / 3628800.0);
1482 const_f64_as_f64x4!(p11, 1.0 / 39916800.0);
1483 const_f64_as_f64x4!(p12, 1.0 / 479001600.0);
1484 const_f64_as_f64x4!(p13, 1.0 / 6227020800.0);
1485
1486 let x1 = self.abs();
1487 let x = x1.fraction_2();
1488 let mask = x.simd_gt(f64x4::SQRT_2 * f64x4::HALF);
1489 let x = (!mask).blend(x + x, x);
1490 let x = x - f64x4::ONE;
1491 let x2 = x * x;
1492 let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1493 let px = px * x * x2;
1494 let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1495 let lg1 = px / qx;
1496
1497 let ef = x1.exponent();
1498 let ef = mask.blend(ef + f64x4::ONE, ef);
1499 let e1 = (ef * y).round();
1500 let yr = ef.mul_sub(y, e1);
1501
1502 let lg = f64x4::HALF.mul_neg_add(x2, x) + lg1;
1503 let x2err = (f64x4::HALF * x).mul_sub(x, f64x4::HALF * x2);
1504 let lg_err = f64x4::HALF.mul_add(x2, lg - x) - lg1;
1505
1506 let e2 = (lg * y * f64x4::LOG2_E).round();
1507 let v = lg.mul_sub(y, e2 * ln2d_hi);
1508 let v = e2.mul_neg_add(ln2d_lo, v);
1509 let v = v - (lg_err + x2err).mul_sub(y, yr * f64x4::LN_2);
1510
1511 let x = v;
1512 let e3 = (x * f64x4::LOG2_E).round();
1513 let x = e3.mul_neg_add(f64x4::LN_2, x);
1514 let z =
1515 polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1516 + f64x4::ONE;
1517 let ee = e1 + e2 + e3;
1518 let ei = cast::<_, i64x4>(ee.round_int());
1519 let ej = cast::<_, i64x4>(ei + (cast::<_, i64x4>(z) >> 52));
1520
1521 let overflow = cast::<_, f64x4>(!ej.simd_lt(i64x4::splat(0x07FF)))
1522 | ee.simd_gt(f64x4::splat(3000.0));
1523 let underflow = cast::<_, f64x4>(!ej.simd_gt(i64x4::splat(0x000)))
1524 | ee.simd_lt(f64x4::splat(-3000.0));
1525
1526 let z = cast::<_, f64x4>(cast::<_, i64x4>(z) + (ei << 52));
1528
1529 let z = if (overflow | underflow).any() {
1531 let z = underflow.blend(f64x4::ZERO, z);
1532 overflow.blend(Self::infinity(), z)
1533 } else {
1534 z
1535 };
1536
1537 let x_zero = self.is_zero_or_subnormal();
1539 let z = x_zero.blend(
1540 y.simd_lt(f64x4::ZERO).blend(
1541 Self::infinity(),
1542 y.simd_eq(f64x4::ZERO).blend(f64x4::ONE, f64x4::ZERO),
1543 ),
1544 z,
1545 );
1546
1547 let x_sign = self.sign_bit();
1548
1549 let z = if x_sign.any() {
1550 let yi = y.simd_eq(y.round());
1552 let y_odd = cast::<_, i64x4>(y.round_int() << 63).round_float();
1554 let z1 =
1555 yi.blend(z | y_odd, self.simd_eq(Self::ZERO).blend(z, Self::nan_pow()));
1556 x_sign.blend(z1, z)
1557 } else {
1558 z
1559 };
1560
1561 let x_finite = self.is_finite();
1562 let y_finite = y.is_finite();
1563 let e_finite = ee.is_finite();
1564
1565 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1566 return z;
1567 }
1568
1569 (self.is_nan() | y.is_nan()).blend(self + y, z)
1570 }
1571 #[inline]
1572 pub fn powf(self, y: f64) -> Self {
1573 Self::pow_f64x4(self, f64x4::splat(y))
1574 }
1575
1576 #[inline]
1577 pub fn to_array(self) -> [f64; 4] {
1578 cast(self)
1579 }
1580
1581 #[inline]
1582 pub fn as_array(&self) -> &[f64; 4] {
1583 cast_ref(self)
1584 }
1585
1586 #[inline]
1587 pub fn as_mut_array(&mut self) -> &mut [f64; 4] {
1588 cast_mut(self)
1589 }
1590
1591 #[inline]
1592 pub fn from_i32x4(v: i32x4) -> Self {
1593 pick! {
1594 if #[cfg(target_feature="avx")] {
1595 Self { avx: convert_to_m256d_from_i32_m128i(v.sse) }
1596 } else {
1597 Self::new([
1598 v.as_array()[0] as f64,
1599 v.as_array()[1] as f64,
1600 v.as_array()[2] as f64,
1601 v.as_array()[3] as f64,
1602 ])
1603 }
1604 }
1605 }
1606}
1607
1608impl From<i32x4> for f64x4 {
1609 #[inline]
1610 fn from(v: i32x4) -> Self {
1611 Self::from_i32x4(v)
1612 }
1613}
1614
1615impl Not for f64x4 {
1616 type Output = Self;
1617 #[inline]
1618 fn not(self) -> Self {
1619 pick! {
1620 if #[cfg(target_feature="avx")] {
1621 Self { avx: self.avx.not() }
1622 } else {
1623 Self {
1624 a : self.a.not(),
1625 b : self.b.not(),
1626 }
1627 }
1628 }
1629 }
1630}