1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx512f")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(64))]
7 pub struct f64x8 { pub(crate) avx512: m512d }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq)]
10 #[repr(C, align(64))]
11 pub struct f64x8 { pub(crate) a : f64x4, pub(crate) b : f64x4 }
12 }
13}
14
15macro_rules! const_f64_as_f64x8 {
16 ($i:ident, $f:expr) => {
17 #[allow(non_upper_case_globals)]
18 pub const $i: f64x8 = f64x8::new([$f; 8]);
19 };
20}
21
22unsafe impl Zeroable for f64x8 {}
23unsafe impl Pod for f64x8 {}
24
25impl AlignTo for f64x8 {
26 type Elem = f64;
27}
28
29impl Add for f64x8 {
30 type Output = Self;
31 #[inline]
32 fn add(self, rhs: Self) -> Self::Output {
33 pick! {
34 if #[cfg(target_feature="avx512f")] {
35 Self { avx512: add_m512d(self.avx512, rhs.avx512) }
36 } else {
37 Self {
38 a : self.a.add(rhs.a),
39 b : self.b.add(rhs.b),
40 }
41 }
42 }
43 }
44}
45
46impl Sub for f64x8 {
47 type Output = Self;
48 #[inline]
49 fn sub(self, rhs: Self) -> Self::Output {
50 pick! {
51 if #[cfg(target_feature="avx512f")] {
52 Self { avx512: sub_m512d(self.avx512, rhs.avx512) }
53 } else {
54 Self {
55 a : self.a.sub(rhs.a),
56 b : self.b.sub(rhs.b),
57 }
58 }
59 }
60 }
61}
62
63impl Mul for f64x8 {
64 type Output = Self;
65 #[inline]
66 fn mul(self, rhs: Self) -> Self::Output {
67 pick! {
68 if #[cfg(target_feature="avx512f")] {
69 Self { avx512: mul_m512d(self.avx512, rhs.avx512) }
70 } else {
71 Self { a: self.a.mul(rhs.a), b: self.b.mul(rhs.b) }
72 }
73 }
74 }
75}
76
77impl Div for f64x8 {
78 type Output = Self;
79 #[inline]
80 fn div(self, rhs: Self) -> Self::Output {
81 pick! {
82 if #[cfg(target_feature="avx512f")] {
83 Self { avx512: div_m512d(self.avx512, rhs.avx512) }
84 } else {
85 Self { a: self.a.div(rhs.a), b: self.b.div(rhs.b) }
86 }
87 }
88 }
89}
90
91impl Neg for f64x8 {
92 type Output = Self;
93 #[inline]
94 fn neg(self) -> Self::Output {
95 pick! {
96 if #[cfg(target_feature="avx512f")] {
97 Self { avx512: bitxor_m512d(self.avx512, Self::splat(-0.0).avx512) }
98 } else {
99 Self {
100 a : self.a.neg(),
101 b : self.b.neg(),
102 }
103 }
104 }
105 }
106}
107
108impl Add<f64> for f64x8 {
109 type Output = Self;
110 #[inline]
111 fn add(self, rhs: f64) -> Self::Output {
112 self.add(Self::splat(rhs))
113 }
114}
115
116impl Sub<f64> for f64x8 {
117 type Output = Self;
118 #[inline]
119 fn sub(self, rhs: f64) -> Self::Output {
120 self.sub(Self::splat(rhs))
121 }
122}
123
124impl Mul<f64> for f64x8 {
125 type Output = Self;
126 #[inline]
127 fn mul(self, rhs: f64) -> Self::Output {
128 self.mul(Self::splat(rhs))
129 }
130}
131
132impl Div<f64> for f64x8 {
133 type Output = Self;
134 #[inline]
135 fn div(self, rhs: f64) -> Self::Output {
136 self.div(Self::splat(rhs))
137 }
138}
139
140impl Add<f64x8> for f64 {
141 type Output = f64x8;
142 #[inline]
143 fn add(self, rhs: f64x8) -> Self::Output {
144 f64x8::splat(self).add(rhs)
145 }
146}
147
148impl Sub<f64x8> for f64 {
149 type Output = f64x8;
150 #[inline]
151 fn sub(self, rhs: f64x8) -> Self::Output {
152 f64x8::splat(self).sub(rhs)
153 }
154}
155
156impl Mul<f64x8> for f64 {
157 type Output = f64x8;
158 #[inline]
159 fn mul(self, rhs: f64x8) -> Self::Output {
160 f64x8::splat(self).mul(rhs)
161 }
162}
163
164impl Div<f64x8> for f64 {
165 type Output = f64x8;
166 #[inline]
167 fn div(self, rhs: f64x8) -> Self::Output {
168 f64x8::splat(self).div(rhs)
169 }
170}
171
172impl BitAnd for f64x8 {
173 type Output = Self;
174 #[inline]
175 fn bitand(self, rhs: Self) -> Self::Output {
176 pick! {
177 if #[cfg(target_feature="avx512f")] {
178 Self { avx512: bitand_m512d(self.avx512, rhs.avx512) }
179 } else {
180 Self {
181 a : self.a.bitand(rhs.a),
182 b : self.b.bitand(rhs.b),
183 }
184 }
185 }
186 }
187}
188
189impl BitOr for f64x8 {
190 type Output = Self;
191 #[inline]
192 fn bitor(self, rhs: Self) -> Self::Output {
193 pick! {
194 if #[cfg(target_feature="avx512f")] {
195 Self { avx512: bitor_m512d(self.avx512, rhs.avx512) }
196 } else {
197 Self {
198 a : self.a.bitor(rhs.a),
199 b : self.b.bitor(rhs.b),
200 }
201 }
202 }
203 }
204}
205
206impl BitXor for f64x8 {
207 type Output = Self;
208 #[inline]
209 fn bitxor(self, rhs: Self) -> Self::Output {
210 pick! {
211 if #[cfg(target_feature="avx512f")] {
212 Self { avx512: bitxor_m512d(self.avx512, rhs.avx512) }
213 } else {
214 Self {
215 a : self.a.bitxor(rhs.a),
216 b : self.b.bitxor(rhs.b),
217 }
218 }
219 }
220 }
221}
222
223impl CmpEq for f64x8 {
224 type Output = Self;
225 #[inline]
226 fn simd_eq(self, rhs: Self) -> Self::Output {
227 pick! {
228 if #[cfg(target_feature="avx512f")] {
229 Self { avx512: cmp_op_mask_m512d::<{cmp_op!(EqualOrdered)}>(self.avx512, rhs.avx512) }
230 } else {
231 Self {
232 a : self.a.simd_eq(rhs.a),
233 b : self.b.simd_eq(rhs.b),
234 }
235 }
236 }
237 }
238}
239
240impl CmpGt for f64x8 {
241 type Output = Self;
242 #[inline]
243 fn simd_gt(self, rhs: Self) -> Self::Output {
244 pick! {
245 if #[cfg(target_feature="avx512f")] {
246 Self { avx512: cmp_op_mask_m512d::<{cmp_op!(GreaterThanOrdered)}>(self.avx512, rhs.avx512) }
247 } else {
248 Self {
249 a : self.a.simd_gt(rhs.a),
250 b : self.b.simd_gt(rhs.b),
251 }
252 }
253 }
254 }
255}
256
257impl CmpGe for f64x8 {
258 type Output = Self;
259 #[inline]
260 fn simd_ge(self, rhs: Self) -> Self::Output {
261 pick! {
262 if #[cfg(target_feature="avx512f")] {
263 Self { avx512: cmp_op_mask_m512d::<{cmp_op!(GreaterEqualOrdered)}>(self.avx512, rhs.avx512) }
264 } else {
265 Self {
266 a : self.a.simd_ge(rhs.a),
267 b : self.b.simd_ge(rhs.b),
268 }
269 }
270 }
271 }
272}
273
274impl CmpLt for f64x8 {
275 type Output = Self;
276 #[inline]
277 fn simd_lt(self, rhs: Self) -> Self::Output {
278 pick! {
279 if #[cfg(target_feature="avx512f")] {
280 Self { avx512: cmp_op_mask_m512d::<{cmp_op!(LessThanOrdered)}>(self.avx512, rhs.avx512) }
281 } else {
282 Self {
283 a : self.a.simd_lt(rhs.a),
284 b : self.b.simd_lt(rhs.b),
285 }
286 }
287 }
288 }
289}
290
291impl CmpLe for f64x8 {
292 type Output = Self;
293 #[inline]
294 fn simd_le(self, rhs: Self) -> Self::Output {
295 pick! {
296 if #[cfg(target_feature="avx512f")] {
297 Self { avx512: cmp_op_mask_m512d::<{cmp_op!(LessEqualOrdered)}>(self.avx512, rhs.avx512) }
298 } else {
299 Self {
300 a : self.a.simd_le(rhs.a),
301 b : self.b.simd_le(rhs.b),
302 }
303 }
304 }
305 }
306}
307
308impl CmpNe for f64x8 {
309 type Output = Self;
310 #[inline]
311 fn simd_ne(self, rhs: Self) -> Self::Output {
312 pick! {
313 if #[cfg(target_feature="avx512f")] {
314 Self { avx512: cmp_op_mask_m512d::<{cmp_op!(NotEqualOrdered)}>(self.avx512, rhs.avx512) }
315 } else {
316 Self {
317 a : self.a.simd_ne(rhs.a),
318 b : self.b.simd_ne(rhs.b),
319 }
320 }
321 }
322 }
323}
324
325impl f64x8 {
326 const_f64_as_f64x8!(ONE, 1.0);
327 const_f64_as_f64x8!(ZERO, 0.0);
328 const_f64_as_f64x8!(HALF, 0.5);
329 const_f64_as_f64x8!(E, core::f64::consts::E);
330 const_f64_as_f64x8!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
331 const_f64_as_f64x8!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
332 const_f64_as_f64x8!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
333 const_f64_as_f64x8!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
334 const_f64_as_f64x8!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
335 const_f64_as_f64x8!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
336 const_f64_as_f64x8!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
337 const_f64_as_f64x8!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
338 const_f64_as_f64x8!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
339 const_f64_as_f64x8!(LN_2, core::f64::consts::LN_2);
340 const_f64_as_f64x8!(LN_10, core::f64::consts::LN_10);
341 const_f64_as_f64x8!(LOG2_E, core::f64::consts::LOG2_E);
342 const_f64_as_f64x8!(LOG10_E, core::f64::consts::LOG10_E);
343 const_f64_as_f64x8!(LOG10_2, core::f64::consts::LOG10_2);
344 const_f64_as_f64x8!(LOG2_10, core::f64::consts::LOG2_10);
345 const_f64_as_f64x8!(PI, core::f64::consts::PI);
346 const_f64_as_f64x8!(SQRT_2, core::f64::consts::SQRT_2);
347 const_f64_as_f64x8!(TAU, core::f64::consts::TAU);
348
349 #[inline]
350 #[must_use]
351 pub const fn new(array: [f64; 8]) -> Self {
352 unsafe { core::mem::transmute(array) }
353 }
354
355 #[inline]
356 #[must_use]
357 pub fn blend(self, t: Self, f: Self) -> Self {
358 pick! {
359 if #[cfg(target_feature="avx512f")] {
360 Self { avx512: blend_varying_m512d(f.avx512, t.avx512, movepi64_mask_m512d(self.avx512)) }
361 } else {
362 Self {
363 a : self.a.blend(t.a, f.a),
364 b : self.b.blend(t.b, f.b),
365 }
366 }
367 }
368 }
369
370 #[inline]
371 #[must_use]
372 pub fn abs(self) -> Self {
373 pick! {
374 if #[cfg(target_feature="avx512f")] {
375 let non_sign_bits = f64x8::from(f64::from_bits(i64::MAX as u64));
376 self & non_sign_bits
377 } else {
378 Self {
379 a: self.a.abs(),
380 b: self.b.abs(),
381 }
382 }
383 }
384 }
385
386 #[inline]
387 #[must_use]
388 pub fn floor(self) -> Self {
389 pick! {
390 if #[cfg(target_feature="avx512f")] {
391 Self { avx512: round_m512d::<{round_op!(NegInf)}>(self.avx512) }
392 } else {
393 Self {
394 a : self.a.floor(),
395 b : self.b.floor(),
396 }
397 }
398 }
399 }
400 #[inline]
401 #[must_use]
402 pub fn ceil(self) -> Self {
403 pick! {
404 if #[cfg(target_feature="avx512f")] {
405 Self { avx512: round_m512d::<{round_op!(PosInf)}>(self.avx512) }
406 } else {
407 Self {
408 a : self.a.ceil(),
409 b : self.b.ceil(),
410 }
411 }
412 }
413 }
414
415 #[inline]
416 #[must_use]
417 pub fn fast_max(self, rhs: Self) -> Self {
418 pick! {
419 if #[cfg(target_feature="avx512f")] {
420 Self { avx512: max_m512d(self.avx512, rhs.avx512) }
421 } else {
422 Self {
423 a : self.a.fast_max(rhs.a),
424 b : self.b.fast_max(rhs.b),
425 }
426 }
427 }
428 }
429
430 #[inline]
431 #[must_use]
432 pub fn max(self, rhs: Self) -> Self {
433 pick! {
434 if #[cfg(target_feature="avx512f")] {
435 rhs.is_nan().blend(self, Self { avx512: max_m512d(self.avx512, rhs.avx512) })
436 } else {
437 Self {
438 a: self.a.max(rhs.a),
439 b: self.b.max(rhs.b),
440 }
441 }
442 }
443 }
444
445 #[inline]
446 #[must_use]
447 pub fn fast_min(self, rhs: Self) -> Self {
448 pick! {
449 if #[cfg(target_feature="avx512f")] {
450 Self { avx512: min_m512d(self.avx512, rhs.avx512) }
451 } else {
452 Self {
453 a : self.a.fast_min(rhs.a),
454 b : self.b.fast_min(rhs.b),
455 }
456 }
457 }
458 }
459
460 #[inline]
461 #[must_use]
462 pub fn min(self, rhs: Self) -> Self {
463 pick! {
464 if #[cfg(target_feature="avx512f")] {
465 rhs.is_nan().blend(self, Self { avx512: min_m512d(self.avx512, rhs.avx512) })
466 } else {
467 Self {
468 a: self.a.min(rhs.a),
469 b: self.b.min(rhs.b),
470 }
471 }
472 }
473 }
474
475 #[inline]
476 #[must_use]
477 pub fn is_nan(self) -> Self {
478 pick! {
479 if #[cfg(target_feature="avx512f")] {
480 Self { avx512: cmp_op_mask_m512d::<{cmp_op!(Unordered)}>(self.avx512, self.avx512) }
481 } else {
482 Self {
483 a: self.a.is_nan(),
484 b: self.b.is_nan(),
485 }
486 }
487 }
488 }
489
490 #[inline]
491 #[must_use]
492 pub fn is_finite(self) -> Self {
493 let shifted_exp_mask = u64x8::splat(0xFFE0000000000000);
494 let u: u64x8 = cast(self);
495 let shift_u = u << 1_u64;
496 let out = !(shift_u & shifted_exp_mask).simd_eq(shifted_exp_mask);
497 cast(out)
498 }
499
500 #[inline]
501 #[must_use]
502 pub fn is_inf(self) -> Self {
503 let shifted_inf = u64x8::from(0xFFE0000000000000);
504 let u: u64x8 = cast(self);
505 let shift_u = u << 1_u64;
506 let out = (shift_u).simd_eq(shifted_inf);
507 cast(out)
508 }
509
510 #[inline]
511 #[must_use]
512 pub fn round(self) -> Self {
513 pick! {
514 if #[cfg(target_feature="avx512f")] {
515 Self { avx512: round_m512d::<{round_op!(Nearest)}>(self.avx512) }
516 } else {
517 Self {
518 a: self.a.round(),
519 b: self.b.round(),
520 }
521 }
522 }
523 }
524
525 #[inline]
526 #[must_use]
527 pub fn round_int(self) -> i64x8 {
528 let rounded: [f64; 8] = cast(self.round());
529 cast([
530 rounded[0] as i64,
531 rounded[1] as i64,
532 rounded[2] as i64,
533 rounded[3] as i64,
534 rounded[4] as i64,
535 rounded[5] as i64,
536 rounded[6] as i64,
537 rounded[7] as i64,
538 ])
539 }
540
541 #[inline]
567 #[must_use]
568 pub fn mul_add(self, m: Self, a: Self) -> Self {
569 pick! {
570 if #[cfg(all(target_feature="avx512f",target_feature="fma"))] {
571 Self { avx512: fused_mul_add_m512d(self.avx512, m.avx512, a.avx512) }
572 } else if #[cfg(target_feature="avx512f")] {
573 (self * m) + a
575 } else {
576 Self {
577 a : self.a.mul_add(m.a, a.a),
578 b : self.b.mul_add(m.b, a.b),
579 }
580 }
581 }
582 }
583
584 #[inline]
610 #[must_use]
611 pub fn mul_sub(self, m: Self, s: Self) -> Self {
612 pick! {
613 if #[cfg(all(target_feature="avx512f",target_feature="fma"))] {
614 Self { avx512: fused_mul_sub_m512d(self.avx512, m.avx512, s.avx512) }
615 } else if #[cfg(target_feature="avx512f")] {
616 (self * m) - s
618 } else {
619 Self {
620 a : self.a.mul_sub(m.a, s.a),
621 b : self.b.mul_sub(m.b, s.b),
622 }
623 }
624 }
625 }
626
627 #[inline]
653 #[must_use]
654 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
655 pick! {
656 if #[cfg(all(target_feature="avx512f",target_feature="fma"))] {
657 Self { avx512: fused_mul_neg_add_m512d(self.avx512, m.avx512, a.avx512) }
658 } else if #[cfg(target_feature="avx512f")] {
659 a - (self * m)
661 } else {
662 Self {
663 a : self.a.mul_neg_add(m.a, a.a),
664 b : self.b.mul_neg_add(m.b, a.b),
665 }
666 }
667 }
668 }
669
670 #[inline]
696 #[must_use]
697 pub fn mul_neg_sub(self, m: Self, s: Self) -> Self {
698 pick! {
699 if #[cfg(all(target_feature="avx512f",target_feature="fma"))] {
700 Self { avx512: fused_mul_neg_sub_m512d(self.avx512, m.avx512, s.avx512) }
701 } else if #[cfg(target_feature="avx512f")] {
702 -(self * m) - s
704 } else {
705 Self {
706 a : self.a.mul_neg_sub(m.a, s.a),
707 b : self.b.mul_neg_sub(m.b, s.b),
708 }
709 }
710 }
711 }
712
713 #[inline]
714 #[must_use]
715 pub fn flip_signs(self, signs: Self) -> Self {
716 self ^ (signs & Self::from(-0.0))
717 }
718
719 #[inline]
720 #[must_use]
721 pub fn copysign(self, sign: Self) -> Self {
722 let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
723 (self & magnitude_mask) | (sign & Self::from(-0.0))
724 }
725
726 #[inline]
727 pub fn asin_acos(self) -> (Self, Self) {
728 const_f64_as_f64x8!(R4asin, 2.967721961301243206100E-3);
731 const_f64_as_f64x8!(R3asin, -5.634242780008963776856E-1);
732 const_f64_as_f64x8!(R2asin, 6.968710824104713396794E0);
733 const_f64_as_f64x8!(R1asin, -2.556901049652824852289E1);
734 const_f64_as_f64x8!(R0asin, 2.853665548261061424989E1);
735
736 const_f64_as_f64x8!(S3asin, -2.194779531642920639778E1);
737 const_f64_as_f64x8!(S2asin, 1.470656354026814941758E2);
738 const_f64_as_f64x8!(S1asin, -3.838770957603691357202E2);
739 const_f64_as_f64x8!(S0asin, 3.424398657913078477438E2);
740
741 const_f64_as_f64x8!(P5asin, 4.253011369004428248960E-3);
742 const_f64_as_f64x8!(P4asin, -6.019598008014123785661E-1);
743 const_f64_as_f64x8!(P3asin, 5.444622390564711410273E0);
744 const_f64_as_f64x8!(P2asin, -1.626247967210700244449E1);
745 const_f64_as_f64x8!(P1asin, 1.956261983317594739197E1);
746 const_f64_as_f64x8!(P0asin, -8.198089802484824371615E0);
747
748 const_f64_as_f64x8!(Q4asin, -1.474091372988853791896E1);
749 const_f64_as_f64x8!(Q3asin, 7.049610280856842141659E1);
750 const_f64_as_f64x8!(Q2asin, -1.471791292232726029859E2);
751 const_f64_as_f64x8!(Q1asin, 1.395105614657485689735E2);
752 const_f64_as_f64x8!(Q0asin, -4.918853881490881290097E1);
753
754 let xa = self.abs();
755
756 let big = xa.simd_ge(f64x8::splat(0.625));
757
758 let x1 = big.blend(f64x8::splat(1.0) - xa, xa * xa);
759
760 let x2 = x1 * x1;
761 let x3 = x2 * x1;
762 let x4 = x2 * x2;
763 let x5 = x4 * x1;
764
765 let do_big = big.any();
766 let do_small = !big.all();
767
768 let mut rx = f64x8::default();
769 let mut sx = f64x8::default();
770 let mut px = f64x8::default();
771 let mut qx = f64x8::default();
772
773 if do_big {
774 rx = x3.mul_add(R3asin, x2 * R2asin)
775 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
776 sx =
777 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
778 }
779
780 if do_small {
781 px = x3.mul_add(P3asin, P0asin)
782 + x4.mul_add(P4asin, x1 * P1asin)
783 + x5.mul_add(P5asin, x2 * P2asin);
784 qx = x4.mul_add(Q4asin, x5)
785 + x3.mul_add(Q3asin, x1 * Q1asin)
786 + x2.mul_add(Q2asin, Q0asin);
787 };
788
789 let vx = big.blend(rx, px);
790 let wx = big.blend(sx, qx);
791
792 let y1 = vx / wx * x1;
793
794 let mut z1 = f64x8::default();
795 let mut z2 = f64x8::default();
796 if do_big {
797 let xb = (x1 + x1).sqrt();
798 z1 = xb.mul_add(y1, xb);
799 }
800
801 if do_small {
802 z2 = xa.mul_add(y1, xa);
803 }
804
805 let z3 = f64x8::FRAC_PI_2 - z1;
807 let asin = big.blend(z3, z2);
808 let asin = asin.flip_signs(self);
809
810 let z3 = self.simd_lt(f64x8::ZERO).blend(f64x8::PI - z1, z1);
812 let z4 = f64x8::FRAC_PI_2 - z2.flip_signs(self);
813 let acos = big.blend(z3, z4);
814
815 (asin, acos)
816 }
817
818 #[inline]
819 pub fn acos(self) -> Self {
820 const_f64_as_f64x8!(R4asin, 2.967721961301243206100E-3);
823 const_f64_as_f64x8!(R3asin, -5.634242780008963776856E-1);
824 const_f64_as_f64x8!(R2asin, 6.968710824104713396794E0);
825 const_f64_as_f64x8!(R1asin, -2.556901049652824852289E1);
826 const_f64_as_f64x8!(R0asin, 2.853665548261061424989E1);
827
828 const_f64_as_f64x8!(S3asin, -2.194779531642920639778E1);
829 const_f64_as_f64x8!(S2asin, 1.470656354026814941758E2);
830 const_f64_as_f64x8!(S1asin, -3.838770957603691357202E2);
831 const_f64_as_f64x8!(S0asin, 3.424398657913078477438E2);
832
833 const_f64_as_f64x8!(P5asin, 4.253011369004428248960E-3);
834 const_f64_as_f64x8!(P4asin, -6.019598008014123785661E-1);
835 const_f64_as_f64x8!(P3asin, 5.444622390564711410273E0);
836 const_f64_as_f64x8!(P2asin, -1.626247967210700244449E1);
837 const_f64_as_f64x8!(P1asin, 1.956261983317594739197E1);
838 const_f64_as_f64x8!(P0asin, -8.198089802484824371615E0);
839
840 const_f64_as_f64x8!(Q4asin, -1.474091372988853791896E1);
841 const_f64_as_f64x8!(Q3asin, 7.049610280856842141659E1);
842 const_f64_as_f64x8!(Q2asin, -1.471791292232726029859E2);
843 const_f64_as_f64x8!(Q1asin, 1.395105614657485689735E2);
844 const_f64_as_f64x8!(Q0asin, -4.918853881490881290097E1);
845
846 let xa = self.abs();
847
848 let big = xa.simd_ge(f64x8::splat(0.625));
849
850 let x1 = big.blend(f64x8::splat(1.0) - xa, xa * xa);
851
852 let x2 = x1 * x1;
853 let x3 = x2 * x1;
854 let x4 = x2 * x2;
855 let x5 = x4 * x1;
856
857 let do_big = big.any();
858 let do_small = !big.all();
859
860 let mut rx = f64x8::default();
861 let mut sx = f64x8::default();
862 let mut px = f64x8::default();
863 let mut qx = f64x8::default();
864
865 if do_big {
866 rx = x3.mul_add(R3asin, x2 * R2asin)
867 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
868 sx =
869 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
870 }
871 if do_small {
872 px = x3.mul_add(P3asin, P0asin)
873 + x4.mul_add(P4asin, x1 * P1asin)
874 + x5.mul_add(P5asin, x2 * P2asin);
875 qx = x4.mul_add(Q4asin, x5)
876 + x3.mul_add(Q3asin, x1 * Q1asin)
877 + x2.mul_add(Q2asin, Q0asin);
878 };
879
880 let vx = big.blend(rx, px);
881 let wx = big.blend(sx, qx);
882
883 let y1 = vx / wx * x1;
884
885 let mut z1 = f64x8::default();
886 let mut z2 = f64x8::default();
887 if do_big {
888 let xb = (x1 + x1).sqrt();
889 z1 = xb.mul_add(y1, xb);
890 }
891
892 if do_small {
893 z2 = xa.mul_add(y1, xa);
894 }
895
896 let z3 = self.simd_lt(f64x8::ZERO).blend(f64x8::PI - z1, z1);
898 let z4 = f64x8::FRAC_PI_2 - z2.flip_signs(self);
899 let acos = big.blend(z3, z4);
900
901 acos
902 }
903 #[inline]
904 #[must_use]
905 pub fn asin(self) -> Self {
906 const_f64_as_f64x8!(R4asin, 2.967721961301243206100E-3);
909 const_f64_as_f64x8!(R3asin, -5.634242780008963776856E-1);
910 const_f64_as_f64x8!(R2asin, 6.968710824104713396794E0);
911 const_f64_as_f64x8!(R1asin, -2.556901049652824852289E1);
912 const_f64_as_f64x8!(R0asin, 2.853665548261061424989E1);
913
914 const_f64_as_f64x8!(S3asin, -2.194779531642920639778E1);
915 const_f64_as_f64x8!(S2asin, 1.470656354026814941758E2);
916 const_f64_as_f64x8!(S1asin, -3.838770957603691357202E2);
917 const_f64_as_f64x8!(S0asin, 3.424398657913078477438E2);
918
919 const_f64_as_f64x8!(P5asin, 4.253011369004428248960E-3);
920 const_f64_as_f64x8!(P4asin, -6.019598008014123785661E-1);
921 const_f64_as_f64x8!(P3asin, 5.444622390564711410273E0);
922 const_f64_as_f64x8!(P2asin, -1.626247967210700244449E1);
923 const_f64_as_f64x8!(P1asin, 1.956261983317594739197E1);
924 const_f64_as_f64x8!(P0asin, -8.198089802484824371615E0);
925
926 const_f64_as_f64x8!(Q4asin, -1.474091372988853791896E1);
927 const_f64_as_f64x8!(Q3asin, 7.049610280856842141659E1);
928 const_f64_as_f64x8!(Q2asin, -1.471791292232726029859E2);
929 const_f64_as_f64x8!(Q1asin, 1.395105614657485689735E2);
930 const_f64_as_f64x8!(Q0asin, -4.918853881490881290097E1);
931
932 let xa = self.abs();
933
934 let big = xa.simd_ge(f64x8::splat(0.625));
935
936 let x1 = big.blend(f64x8::splat(1.0) - xa, xa * xa);
937
938 let x2 = x1 * x1;
939 let x3 = x2 * x1;
940 let x4 = x2 * x2;
941 let x5 = x4 * x1;
942
943 let do_big = big.any();
944 let do_small = !big.all();
945
946 let mut rx = f64x8::default();
947 let mut sx = f64x8::default();
948 let mut px = f64x8::default();
949 let mut qx = f64x8::default();
950
951 if do_big {
952 rx = x3.mul_add(R3asin, x2 * R2asin)
953 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
954 sx =
955 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
956 }
957 if do_small {
958 px = x3.mul_add(P3asin, P0asin)
959 + x4.mul_add(P4asin, x1 * P1asin)
960 + x5.mul_add(P5asin, x2 * P2asin);
961 qx = x4.mul_add(Q4asin, x5)
962 + x3.mul_add(Q3asin, x1 * Q1asin)
963 + x2.mul_add(Q2asin, Q0asin);
964 };
965
966 let vx = big.blend(rx, px);
967 let wx = big.blend(sx, qx);
968
969 let y1 = vx / wx * x1;
970
971 let mut z1 = f64x8::default();
972 let mut z2 = f64x8::default();
973 if do_big {
974 let xb = (x1 + x1).sqrt();
975 z1 = xb.mul_add(y1, xb);
976 }
977
978 if do_small {
979 z2 = xa.mul_add(y1, xa);
980 }
981
982 let z3 = f64x8::FRAC_PI_2 - z1;
984 let asin = big.blend(z3, z2);
985 let asin = asin.flip_signs(self);
986
987 asin
988 }
989
990 #[inline]
991 pub fn atan(self) -> Self {
992 const_f64_as_f64x8!(MORE_BITS, 6.123233995736765886130E-17);
995 const_f64_as_f64x8!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
996 const_f64_as_f64x8!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
997
998 const_f64_as_f64x8!(P4atan, -8.750608600031904122785E-1);
999 const_f64_as_f64x8!(P3atan, -1.615753718733365076637E1);
1000 const_f64_as_f64x8!(P2atan, -7.500855792314704667340E1);
1001 const_f64_as_f64x8!(P1atan, -1.228866684490136173410E2);
1002 const_f64_as_f64x8!(P0atan, -6.485021904942025371773E1);
1003
1004 const_f64_as_f64x8!(Q4atan, 2.485846490142306297962E1);
1005 const_f64_as_f64x8!(Q3atan, 1.650270098316988542046E2);
1006 const_f64_as_f64x8!(Q2atan, 4.328810604912902668951E2);
1007 const_f64_as_f64x8!(Q1atan, 4.853903996359136964868E2);
1008 const_f64_as_f64x8!(Q0atan, 1.945506571482613964425E2);
1009
1010 let t = self.abs();
1011
1012 let notbig = t.simd_le(T3PO8);
1016 let notsmal = t.simd_ge(Self::splat(0.66));
1017
1018 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1019 s = notsmal & s;
1020 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1021 fac = notsmal & fac;
1022
1023 let mut a = notbig & t;
1027 a = notsmal.blend(a - Self::ONE, a);
1028 let mut b = notbig & Self::ONE;
1029 b = notsmal.blend(b + t, b);
1030 let z = a / b;
1031
1032 let zz = z * z;
1033
1034 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1035 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1036
1037 let mut re = (px / qx).mul_add(z * zz, z);
1038 re += s + fac;
1039
1040 re = (self.sign_bit()).blend(-re, re);
1042
1043 re
1044 }
1045
1046 #[inline]
1047 pub fn atan2(self, x: Self) -> Self {
1048 const_f64_as_f64x8!(MORE_BITS, 6.123233995736765886130E-17);
1051 const_f64_as_f64x8!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1052 const_f64_as_f64x8!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1053
1054 const_f64_as_f64x8!(P4atan, -8.750608600031904122785E-1);
1055 const_f64_as_f64x8!(P3atan, -1.615753718733365076637E1);
1056 const_f64_as_f64x8!(P2atan, -7.500855792314704667340E1);
1057 const_f64_as_f64x8!(P1atan, -1.228866684490136173410E2);
1058 const_f64_as_f64x8!(P0atan, -6.485021904942025371773E1);
1059
1060 const_f64_as_f64x8!(Q4atan, 2.485846490142306297962E1);
1061 const_f64_as_f64x8!(Q3atan, 1.650270098316988542046E2);
1062 const_f64_as_f64x8!(Q2atan, 4.328810604912902668951E2);
1063 const_f64_as_f64x8!(Q1atan, 4.853903996359136964868E2);
1064 const_f64_as_f64x8!(Q0atan, 1.945506571482613964425E2);
1065
1066 let y = self;
1067
1068 let x1 = x.abs();
1070 let y1 = y.abs();
1071 let swapxy = y1.simd_gt(x1);
1072 let mut x2 = swapxy.blend(y1, x1);
1074 let mut y2 = swapxy.blend(x1, y1);
1075
1076 let both_infinite = x.is_inf() & y.is_inf();
1078 if both_infinite.any() {
1079 let minus_one = -Self::ONE;
1080 x2 = both_infinite.blend(x2 & minus_one, x2);
1081 y2 = both_infinite.blend(y2 & minus_one, y2);
1082 }
1083
1084 let t = y2 / x2;
1086
1087 let notbig = t.simd_le(T3PO8);
1091 let notsmal = t.simd_ge(Self::splat(0.66));
1092
1093 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1094 s = notsmal & s;
1095 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1096 fac = notsmal & fac;
1097
1098 let mut a = notbig & t;
1102 a = notsmal.blend(a - Self::ONE, a);
1103 let mut b = notbig & Self::ONE;
1104 b = notsmal.blend(b + t, b);
1105 let z = a / b;
1106
1107 let zz = z * z;
1108
1109 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1110 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1111
1112 let mut re = (px / qx).mul_add(z * zz, z);
1113 re += s + fac;
1114
1115 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1117 re = ((x | y).simd_eq(Self::ZERO)).blend(Self::ZERO, re);
1118 re = (x.sign_bit()).blend(Self::PI - re, re);
1119
1120 re = (y.sign_bit()).blend(-re, re);
1122
1123 re
1124 }
1125
1126 #[inline]
1127 #[must_use]
1128 pub fn sin_cos(self) -> (Self, Self) {
1129 const_f64_as_f64x8!(P0sin, -1.66666666666666307295E-1);
1133 const_f64_as_f64x8!(P1sin, 8.33333333332211858878E-3);
1134 const_f64_as_f64x8!(P2sin, -1.98412698295895385996E-4);
1135 const_f64_as_f64x8!(P3sin, 2.75573136213857245213E-6);
1136 const_f64_as_f64x8!(P4sin, -2.50507477628578072866E-8);
1137 const_f64_as_f64x8!(P5sin, 1.58962301576546568060E-10);
1138
1139 const_f64_as_f64x8!(P0cos, 4.16666666666665929218E-2);
1140 const_f64_as_f64x8!(P1cos, -1.38888888888730564116E-3);
1141 const_f64_as_f64x8!(P2cos, 2.48015872888517045348E-5);
1142 const_f64_as_f64x8!(P3cos, -2.75573141792967388112E-7);
1143 const_f64_as_f64x8!(P4cos, 2.08757008419747316778E-9);
1144 const_f64_as_f64x8!(P5cos, -1.13585365213876817300E-11);
1145
1146 const_f64_as_f64x8!(DP1, 7.853981554508209228515625E-1 * 2.);
1147 const_f64_as_f64x8!(DP2, 7.94662735614792836714E-9 * 2.);
1148 const_f64_as_f64x8!(DP3, 3.06161699786838294307E-17 * 2.);
1149
1150 const_f64_as_f64x8!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1151
1152 let xa = self.abs();
1153
1154 let y = (xa * TWO_OVER_PI).round();
1155 let q = y.round_int();
1156
1157 let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1158
1159 let x2 = x * x;
1160 let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1161 let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1162 s = (x * x2).mul_add(s, x);
1163 c =
1164 (x2 * x2).mul_add(c, x2.mul_neg_add(f64x8::from(0.5), f64x8::from(1.0)));
1165
1166 let swap = !((q & i64x8::from(1)).simd_eq(i64x8::from(0)));
1167
1168 let mut overflow: f64x8 = cast(q.simd_gt(i64x8::from(0x80000000000000)));
1169 overflow &= xa.is_finite();
1170 s = overflow.blend(f64x8::from(0.0), s);
1171 c = overflow.blend(f64x8::from(1.0), c);
1172
1173 let mut sin1 = cast::<_, f64x8>(swap).blend(c, s);
1175 let sign_sin: i64x8 = (q << 62) ^ cast::<_, i64x8>(self);
1176 sin1 = sin1.flip_signs(cast(sign_sin));
1177
1178 let mut cos1 = cast::<_, f64x8>(swap).blend(s, c);
1180 let sign_cos: i64x8 = ((q + i64x8::from(1)) & i64x8::from(2)) << 62;
1181 cos1 ^= cast::<_, f64x8>(sign_cos);
1182
1183 (sin1, cos1)
1184 }
1185 #[inline]
1186 #[must_use]
1187 pub fn sin(self) -> Self {
1188 let (s, _) = self.sin_cos();
1189 s
1190 }
1191 #[inline]
1192 #[must_use]
1193 pub fn cos(self) -> Self {
1194 let (_, c) = self.sin_cos();
1195 c
1196 }
1197 #[inline]
1198 #[must_use]
1199 pub fn tan(self) -> Self {
1200 let (s, c) = self.sin_cos();
1201 s / c
1202 }
1203 #[inline]
1204 #[must_use]
1205 pub fn to_degrees(self) -> Self {
1206 const_f64_as_f64x8!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1207 self * RAD_TO_DEG_RATIO
1208 }
1209 #[inline]
1210 #[must_use]
1211 pub fn to_radians(self) -> Self {
1212 const_f64_as_f64x8!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1213 self * DEG_TO_RAD_RATIO
1214 }
1215 #[inline]
1216 #[must_use]
1217 pub fn sqrt(self) -> Self {
1218 pick! {
1219 if #[cfg(target_feature="avx512f")] {
1220 Self { avx512: sqrt_m512d(self.avx512) }
1221 } else {
1222 Self {
1223 a : self.a.sqrt(),
1224 b : self.b.sqrt(),
1225 }
1226 }
1227 }
1228 }
1229 #[inline]
1230 #[must_use]
1231 #[doc(alias("movemask", "move_mask"))]
1232 pub fn to_bitmask(self) -> u32 {
1233 pick! {
1234 if #[cfg(target_feature="avx512f")] {
1235 movepi64_mask_m512d(self.avx512) as u32
1236 } else {
1237 (self.b.to_bitmask() << 4) | self.a.to_bitmask()
1238 }
1239 }
1240 }
1241 #[inline]
1242 #[must_use]
1243 pub fn any(self) -> bool {
1244 pick! {
1245 if #[cfg(target_feature="avx512f")] {
1246 movepi64_mask_m512d(self.avx512) != 0
1247 } else {
1248 self.a.any() || self.b.any()
1249 }
1250 }
1251 }
1252 #[inline]
1253 #[must_use]
1254 pub fn all(self) -> bool {
1255 pick! {
1256 if #[cfg(target_feature="avx512f")] {
1257 movepi64_mask_m512d(self.avx512) == 0b11111111
1258 } else {
1259 self.a.all() && self.b.all()
1260 }
1261 }
1262 }
1263
1264 #[inline]
1265 #[must_use]
1266 pub fn none(self) -> bool {
1267 !self.any()
1268 }
1269
1270 #[inline]
1271 fn vm_pow2n(self) -> Self {
1272 const_f64_as_f64x8!(pow2_52, 4503599627370496.0);
1273 const_f64_as_f64x8!(bias, 1023.0);
1274 let a = self + (bias + pow2_52);
1275 let c = cast::<_, i64x8>(a) << 52;
1276 cast::<_, f64x8>(c)
1277 }
1278
1279 #[inline]
1281 #[must_use]
1282 pub fn exp(self) -> Self {
1283 const_f64_as_f64x8!(P2, 1.0 / 2.0);
1284 const_f64_as_f64x8!(P3, 1.0 / 6.0);
1285 const_f64_as_f64x8!(P4, 1. / 24.);
1286 const_f64_as_f64x8!(P5, 1. / 120.);
1287 const_f64_as_f64x8!(P6, 1. / 720.);
1288 const_f64_as_f64x8!(P7, 1. / 5040.);
1289 const_f64_as_f64x8!(P8, 1. / 40320.);
1290 const_f64_as_f64x8!(P9, 1. / 362880.);
1291 const_f64_as_f64x8!(P10, 1. / 3628800.);
1292 const_f64_as_f64x8!(P11, 1. / 39916800.);
1293 const_f64_as_f64x8!(P12, 1. / 479001600.);
1294 const_f64_as_f64x8!(P13, 1. / 6227020800.);
1295 const_f64_as_f64x8!(LN2D_HI, 0.693145751953125);
1296 const_f64_as_f64x8!(LN2D_LO, 1.42860682030941723212E-6);
1297 let max_x = f64x8::from(708.39);
1298 let r = (self * Self::LOG2_E).round();
1299 let x = r.mul_neg_add(LN2D_HI, self);
1300 let x = r.mul_neg_add(LN2D_LO, x);
1301 let z =
1302 polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1303 let n2 = Self::vm_pow2n(r);
1304 let z = (z + Self::ONE) * n2;
1305 let in_range = self.abs().simd_lt(max_x);
1307 let in_range = in_range & self.is_finite();
1308 in_range.blend(z, Self::ZERO)
1309 }
1310
1311 #[inline]
1312 fn exponent(self) -> f64x8 {
1313 const_f64_as_f64x8!(pow2_52, 4503599627370496.0);
1314 const_f64_as_f64x8!(bias, 1023.0);
1315 let a = cast::<_, u64x8>(self);
1316 let b = a >> 52;
1317 let c = b | cast::<_, u64x8>(pow2_52);
1318 let d = cast::<_, f64x8>(c);
1319 let e = d - (pow2_52 + bias);
1320 e
1321 }
1322
1323 #[inline]
1324 fn fraction_2(self) -> Self {
1325 let t1 = cast::<_, u64x8>(self);
1326 let t2 = cast::<_, u64x8>(
1327 (t1 & u64x8::from(0x000FFFFFFFFFFFFF)) | u64x8::from(0x3FE0000000000000),
1328 );
1329 cast::<_, f64x8>(t2)
1330 }
1331 #[inline]
1332 fn is_zero_or_subnormal(self) -> Self {
1333 let t = cast::<_, i64x8>(self);
1334 let t = t & i64x8::splat(0x7FF0000000000000);
1335 i64x8::round_float(t.simd_eq(i64x8::splat(0)))
1336 }
1337 #[inline]
1338 fn infinity() -> Self {
1339 cast::<_, f64x8>(i64x8::splat(0x7FF0000000000000))
1340 }
1341 #[inline]
1342 fn nan_log() -> Self {
1343 cast::<_, f64x8>(i64x8::splat(0x7FF8000000000000 | 0x101 << 29))
1344 }
1345 #[inline]
1346 fn nan_pow() -> Self {
1347 cast::<_, f64x8>(i64x8::splat(0x7FF8000000000000 | 0x101 << 29))
1348 }
1349 #[inline]
1350 fn sign_bit(self) -> Self {
1351 let t1 = cast::<_, i64x8>(self);
1352 let t2 = t1 >> 63;
1353 !cast::<_, f64x8>(t2).simd_eq(f64x8::ZERO)
1354 }
1355
1356 #[inline]
1357 pub fn reduce_add(self) -> f64 {
1358 pick! {
1359 if #[cfg(target_feature="avx512f")] {
1360 let lo = cast_to_m256d_from_m512d(self.avx512);
1362 let hi = extract_m256d_from_m512d::<1>(self.avx512);
1363 let v = add_m256d(lo, hi); let t = add_horizontal_m256d(v, v); let lo = cast_to_m128d_from_m256d(t); let hi = extract_m128d_from_m256d::<1>(t); let s = add_m128d(lo, hi); get_f64_from_m128d_s(s)
1369 } else {
1370 self.a.reduce_add() + self.b.reduce_add()
1371 }
1372 }
1373 }
1374
1375 #[inline]
1377 #[must_use]
1378 pub fn ln(self) -> Self {
1379 const_f64_as_f64x8!(HALF, 0.5);
1380 const_f64_as_f64x8!(P0, 7.70838733755885391666E0);
1381 const_f64_as_f64x8!(P1, 1.79368678507819816313E1);
1382 const_f64_as_f64x8!(P2, 1.44989225341610930846E1);
1383 const_f64_as_f64x8!(P3, 4.70579119878881725854E0);
1384 const_f64_as_f64x8!(P4, 4.97494994976747001425E-1);
1385 const_f64_as_f64x8!(P5, 1.01875663804580931796E-4);
1386
1387 const_f64_as_f64x8!(Q0, 2.31251620126765340583E1);
1388 const_f64_as_f64x8!(Q1, 7.11544750618563894466E1);
1389 const_f64_as_f64x8!(Q2, 8.29875266912776603211E1);
1390 const_f64_as_f64x8!(Q3, 4.52279145837532221105E1);
1391 const_f64_as_f64x8!(Q4, 1.12873587189167450590E1);
1392 const_f64_as_f64x8!(LN2F_HI, 0.693359375);
1393 const_f64_as_f64x8!(LN2F_LO, -2.12194440e-4);
1394 const_f64_as_f64x8!(VM_SQRT2, 1.414213562373095048801);
1395 const_f64_as_f64x8!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1396
1397 let x1 = self;
1398 let x = Self::fraction_2(x1);
1399 let e = Self::exponent(x1);
1400 let mask = x.simd_gt(VM_SQRT2 * HALF);
1401 let x = (!mask).blend(x + x, x);
1402 let fe = mask.blend(e + Self::ONE, e);
1403 let x = x - Self::ONE;
1404 let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1405 let x2 = x * x;
1406 let px = x2 * x * px;
1407 let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1408 let res = px / qx;
1409 let res = fe.mul_add(LN2F_LO, res);
1410 let res = res + x2.mul_neg_add(HALF, x);
1411 let res = fe.mul_add(LN2F_HI, res);
1412 let overflow = !self.is_finite();
1413 let underflow = x1.simd_lt(VM_SMALLEST_NORMAL);
1414 let mask = overflow | underflow;
1415 if !mask.any() {
1416 res
1417 } else {
1418 let is_zero = self.is_zero_or_subnormal();
1419 let res = underflow.blend(Self::nan_log(), res);
1420 let res = is_zero.blend(Self::infinity(), res);
1421 let res = overflow.blend(self, res);
1422 res
1423 }
1424 }
1425
1426 #[inline]
1427 #[must_use]
1428 pub fn log2(self) -> Self {
1429 Self::ln(self) * Self::LOG2_E
1430 }
1431 #[inline]
1432 #[must_use]
1433 pub fn log10(self) -> Self {
1434 Self::ln(self) * Self::LOG10_E
1435 }
1436
1437 #[inline]
1438 #[must_use]
1439 pub fn pow_f64x8(self, y: Self) -> Self {
1440 const_f64_as_f64x8!(ln2d_hi, 0.693145751953125);
1441 const_f64_as_f64x8!(ln2d_lo, 1.42860682030941723212E-6);
1442 const_f64_as_f64x8!(P0log, 2.0039553499201281259648E1);
1443 const_f64_as_f64x8!(P1log, 5.7112963590585538103336E1);
1444 const_f64_as_f64x8!(P2log, 6.0949667980987787057556E1);
1445 const_f64_as_f64x8!(P3log, 2.9911919328553073277375E1);
1446 const_f64_as_f64x8!(P4log, 6.5787325942061044846969E0);
1447 const_f64_as_f64x8!(P5log, 4.9854102823193375972212E-1);
1448 const_f64_as_f64x8!(P6log, 4.5270000862445199635215E-5);
1449 const_f64_as_f64x8!(Q0log, 6.0118660497603843919306E1);
1450 const_f64_as_f64x8!(Q1log, 2.1642788614495947685003E2);
1451 const_f64_as_f64x8!(Q2log, 3.0909872225312059774938E2);
1452 const_f64_as_f64x8!(Q3log, 2.2176239823732856465394E2);
1453 const_f64_as_f64x8!(Q4log, 8.3047565967967209469434E1);
1454 const_f64_as_f64x8!(Q5log, 1.5062909083469192043167E1);
1455
1456 const_f64_as_f64x8!(p2, 1.0 / 2.0); const_f64_as_f64x8!(p3, 1.0 / 6.0);
1459 const_f64_as_f64x8!(p4, 1.0 / 24.0);
1460 const_f64_as_f64x8!(p5, 1.0 / 120.0);
1461 const_f64_as_f64x8!(p6, 1.0 / 720.0);
1462 const_f64_as_f64x8!(p7, 1.0 / 5040.0);
1463 const_f64_as_f64x8!(p8, 1.0 / 40320.0);
1464 const_f64_as_f64x8!(p9, 1.0 / 362880.0);
1465 const_f64_as_f64x8!(p10, 1.0 / 3628800.0);
1466 const_f64_as_f64x8!(p11, 1.0 / 39916800.0);
1467 const_f64_as_f64x8!(p12, 1.0 / 479001600.0);
1468 const_f64_as_f64x8!(p13, 1.0 / 6227020800.0);
1469
1470 let x1 = self.abs();
1471 let x = x1.fraction_2();
1472 let mask = x.simd_gt(f64x8::SQRT_2 * f64x8::HALF);
1473 let x = (!mask).blend(x + x, x);
1474 let x = x - f64x8::ONE;
1475 let x2 = x * x;
1476 let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1477 let px = px * x * x2;
1478 let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1479 let lg1 = px / qx;
1480
1481 let ef = x1.exponent();
1482 let ef = mask.blend(ef + f64x8::ONE, ef);
1483 let e1 = (ef * y).round();
1484 let yr = ef.mul_sub(y, e1);
1485
1486 let lg = f64x8::HALF.mul_neg_add(x2, x) + lg1;
1487 let x2err = (f64x8::HALF * x).mul_sub(x, f64x8::HALF * x2);
1488 let lg_err = f64x8::HALF.mul_add(x2, lg - x) - lg1;
1489
1490 let e2 = (lg * y * f64x8::LOG2_E).round();
1491 let v = lg.mul_sub(y, e2 * ln2d_hi);
1492 let v = e2.mul_neg_add(ln2d_lo, v);
1493 let v = v - (lg_err + x2err).mul_sub(y, yr * f64x8::LN_2);
1494
1495 let x = v;
1496 let e3 = (x * f64x8::LOG2_E).round();
1497 let x = e3.mul_neg_add(f64x8::LN_2, x);
1498 let z =
1499 polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1500 + f64x8::ONE;
1501 let ee = e1 + e2 + e3;
1502 let ei = cast::<_, i64x8>(ee.round_int());
1503 let ej = cast::<_, i64x8>(ei + (cast::<_, i64x8>(z) >> 52));
1504
1505 let overflow = cast::<_, f64x8>(!ej.simd_lt(i64x8::splat(0x07FF)))
1506 | ee.simd_gt(f64x8::splat(3000.0));
1507 let underflow = cast::<_, f64x8>(!ej.simd_gt(i64x8::splat(0x000)))
1508 | ee.simd_lt(f64x8::splat(-3000.0));
1509
1510 let z = cast::<_, f64x8>(cast::<_, i64x8>(z) + (ei << 52));
1512
1513 let z = if (overflow | underflow).any() {
1515 let z = underflow.blend(f64x8::ZERO, z);
1516 overflow.blend(Self::infinity(), z)
1517 } else {
1518 z
1519 };
1520
1521 let x_zero = self.is_zero_or_subnormal();
1523 let z = x_zero.blend(
1524 y.simd_lt(f64x8::ZERO).blend(
1525 Self::infinity(),
1526 y.simd_eq(f64x8::ZERO).blend(f64x8::ONE, f64x8::ZERO),
1527 ),
1528 z,
1529 );
1530
1531 let x_sign = self.sign_bit();
1532
1533 let z = if x_sign.any() {
1534 let yi = y.simd_eq(y.round());
1536 let y_odd = cast::<_, i64x8>(y.round_int() << 63).round_float();
1538 let z1 =
1539 yi.blend(z | y_odd, self.simd_eq(Self::ZERO).blend(z, Self::nan_pow()));
1540 x_sign.blend(z1, z)
1541 } else {
1542 z
1543 };
1544
1545 let x_finite = self.is_finite();
1546 let y_finite = y.is_finite();
1547 let e_finite = ee.is_finite();
1548
1549 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1550 return z;
1551 }
1552
1553 (self.is_nan() | y.is_nan()).blend(self + y, z)
1554 }
1555
1556 #[inline]
1557 pub fn powf(self, y: f64) -> Self {
1558 Self::pow_f64x8(self, f64x8::splat(y))
1559 }
1560
1561 #[inline]
1562 #[must_use]
1563 pub fn to_array(self) -> [f64; 8] {
1564 cast(self)
1565 }
1566
1567 #[inline]
1568 #[must_use]
1569 pub fn as_array(&self) -> &[f64; 8] {
1570 cast_ref(self)
1571 }
1572
1573 #[inline]
1574 #[must_use]
1575 pub fn as_mut_array(&mut self) -> &mut [f64; 8] {
1576 cast_mut(self)
1577 }
1578
1579 #[inline]
1580 pub fn from_i32x8(v: i32x8) -> Self {
1581 pick! {
1582 if #[cfg(target_feature="avx512f")] {
1583 Self { avx512: convert_to_m512d_from_i32_m256i(v.avx2) }
1584 } else {
1585 Self::new([
1586 v.as_array()[0] as f64,
1587 v.as_array()[1] as f64,
1588 v.as_array()[2] as f64,
1589 v.as_array()[3] as f64,
1590 v.as_array()[4] as f64,
1591 v.as_array()[5] as f64,
1592 v.as_array()[6] as f64,
1593 v.as_array()[7] as f64,
1594 ])
1595 }
1596 }
1597 }
1598}
1599
1600impl From<i32x8> for f64x8 {
1601 #[inline]
1602 fn from(v: i32x8) -> Self {
1603 Self::from_i32x8(v)
1604 }
1605}
1606
1607impl Not for f64x8 {
1608 type Output = Self;
1609 #[inline]
1610 fn not(self) -> Self::Output {
1611 pick! {
1612 if #[cfg(target_feature="avx512f")] {
1613 Self { avx512: bitxor_m512d(self.avx512, set_splat_m512d(f64::from_bits(u64::MAX))) }
1614 } else {
1615 Self {
1616 a : self.a.not(),
1617 b : self.b.not(),
1618 }
1619 }
1620 }
1621 }
1622}