1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(16))]
7 pub struct f64x2 { pub(crate) sse: m128d }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct f64x2 { pub(crate) simd: v128 }
14
15 impl Default for f64x2 {
16 fn default() -> Self {
17 Self::splat(0.0)
18 }
19 }
20
21 impl PartialEq for f64x2 {
22 fn eq(&self, other: &Self) -> bool {
23 u64x2_all_true(f64x2_eq(self.simd, other.simd))
24 }
25 }
26 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
27 use core::arch::aarch64::*;
28 #[repr(C)]
29 #[derive(Copy, Clone)]
30 pub struct f64x2 { pub(crate) neon: float64x2_t }
31
32 impl Default for f64x2 {
33 #[inline]
34 fn default() -> Self {
35 unsafe { Self { neon: vdupq_n_f64(0.0)} }
36 }
37 }
38
39 impl PartialEq for f64x2 {
40 #[inline]
41 fn eq(&self, other: &Self) -> bool {
42 unsafe
43 { let e = vceqq_f64(self.neon, other.neon);
44 vgetq_lane_u64(e,0) == u64::MAX && vgetq_lane_u64(e,1) == u64::MAX
45 }
46 }
47
48 }
49 } else {
50 #[derive(Default, Clone, Copy, PartialEq)]
51 #[repr(C, align(16))]
52 pub struct f64x2 { pub(crate) arr: [f64;2] }
53 }
54}
55
56macro_rules! const_f64_as_f64x2 {
57 ($i:ident, $f:expr) => {
58 #[allow(non_upper_case_globals)]
59 pub const $i: f64x2 = f64x2::new([$f; 2]);
60 };
61}
62
63impl f64x2 {
64 const_f64_as_f64x2!(ONE, 1.0);
65 const_f64_as_f64x2!(ZERO, 0.0);
66 const_f64_as_f64x2!(HALF, 0.5);
67 const_f64_as_f64x2!(E, core::f64::consts::E);
68 const_f64_as_f64x2!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
69 const_f64_as_f64x2!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
70 const_f64_as_f64x2!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
71 const_f64_as_f64x2!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
72 const_f64_as_f64x2!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
73 const_f64_as_f64x2!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
74 const_f64_as_f64x2!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
75 const_f64_as_f64x2!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
76 const_f64_as_f64x2!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
77 const_f64_as_f64x2!(LN_2, core::f64::consts::LN_2);
78 const_f64_as_f64x2!(LN_10, core::f64::consts::LN_10);
79 const_f64_as_f64x2!(LOG2_E, core::f64::consts::LOG2_E);
80 const_f64_as_f64x2!(LOG10_E, core::f64::consts::LOG10_E);
81 const_f64_as_f64x2!(LOG10_2, core::f64::consts::LOG10_2);
82 const_f64_as_f64x2!(LOG2_10, core::f64::consts::LOG2_10);
83 const_f64_as_f64x2!(PI, core::f64::consts::PI);
84 const_f64_as_f64x2!(SQRT_2, core::f64::consts::SQRT_2);
85 const_f64_as_f64x2!(TAU, core::f64::consts::TAU);
86}
87
88unsafe impl Zeroable for f64x2 {}
89unsafe impl Pod for f64x2 {}
90
91impl AlignTo for f64x2 {
92 type Elem = f64;
93}
94
95impl Add for f64x2 {
96 type Output = Self;
97 #[inline]
98 fn add(self, rhs: Self) -> Self::Output {
99 pick! {
100 if #[cfg(target_feature="sse2")] {
101 Self { sse: add_m128d(self.sse, rhs.sse) }
102 } else if #[cfg(target_feature="simd128")] {
103 Self { simd: f64x2_add(self.simd, rhs.simd) }
104 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
105 unsafe { Self { neon: vaddq_f64(self.neon, rhs.neon) } }
106 } else {
107 Self { arr: [
108 self.arr[0] + rhs.arr[0],
109 self.arr[1] + rhs.arr[1],
110 ]}
111 }
112 }
113 }
114}
115
116impl Sub for f64x2 {
117 type Output = Self;
118 #[inline]
119 fn sub(self, rhs: Self) -> Self::Output {
120 pick! {
121 if #[cfg(target_feature="sse2")] {
122 Self { sse: sub_m128d(self.sse, rhs.sse) }
123 } else if #[cfg(target_feature="simd128")] {
124 Self { simd: f64x2_sub(self.simd, rhs.simd) }
125 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
126 unsafe { Self { neon: vsubq_f64(self.neon, rhs.neon) } }
127 } else {
128 Self { arr: [
129 self.arr[0] - rhs.arr[0],
130 self.arr[1] - rhs.arr[1],
131 ]}
132 }
133 }
134 }
135}
136
137impl Mul for f64x2 {
138 type Output = Self;
139 #[inline]
140 fn mul(self, rhs: Self) -> Self::Output {
141 pick! {
142 if #[cfg(target_feature="sse2")] {
143 Self { sse: mul_m128d(self.sse, rhs.sse) }
144 } else if #[cfg(target_feature="simd128")] {
145 Self { simd: f64x2_mul(self.simd, rhs.simd) }
146 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
147 unsafe {Self { neon: vmulq_f64(self.neon, rhs.neon) }}
148 } else {
149 Self { arr: [
150 self.arr[0] * rhs.arr[0],
151 self.arr[1] * rhs.arr[1],
152 ]}
153 }
154 }
155 }
156}
157
158impl Div for f64x2 {
159 type Output = Self;
160 #[inline]
161 fn div(self, rhs: Self) -> Self::Output {
162 pick! {
163 if #[cfg(target_feature="sse2")] {
164 Self { sse: div_m128d(self.sse, rhs.sse) }
165 } else if #[cfg(target_feature="simd128")] {
166 Self { simd: f64x2_div(self.simd, rhs.simd) }
167 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
168 unsafe {Self { neon: vdivq_f64(self.neon, rhs.neon) }}
169 } else {
170 Self { arr: [
171 self.arr[0] / rhs.arr[0],
172 self.arr[1] / rhs.arr[1],
173 ]}
174 }
175 }
176 }
177}
178
179impl Neg for f64x2 {
180 type Output = Self;
181 #[inline]
182 fn neg(self) -> Self::Output {
183 pick! {
184 if #[cfg(target_feature="sse")] {
185 Self { sse: bitxor_m128d(self.sse, Self::splat(-0.0).sse) }
186 } else if #[cfg(target_feature="simd128")] {
187 Self { simd: f64x2_neg(self.simd) }
188 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
189 unsafe {Self { neon: vnegq_f64(self.neon) }}
190 } else {
191 Self { arr: [
192 -self.arr[0],
193 -self.arr[1],
194 ]}
195 }
196 }
197 }
198}
199
200impl Add<f64> for f64x2 {
201 type Output = Self;
202 #[inline]
203 fn add(self, rhs: f64) -> Self::Output {
204 self.add(Self::splat(rhs))
205 }
206}
207
208impl Sub<f64> for f64x2 {
209 type Output = Self;
210 #[inline]
211 fn sub(self, rhs: f64) -> Self::Output {
212 self.sub(Self::splat(rhs))
213 }
214}
215
216impl Mul<f64> for f64x2 {
217 type Output = Self;
218 #[inline]
219 fn mul(self, rhs: f64) -> Self::Output {
220 self.mul(Self::splat(rhs))
221 }
222}
223
224impl Div<f64> for f64x2 {
225 type Output = Self;
226 #[inline]
227 fn div(self, rhs: f64) -> Self::Output {
228 self.div(Self::splat(rhs))
229 }
230}
231
232impl Add<f64x2> for f64 {
233 type Output = f64x2;
234 #[inline]
235 fn add(self, rhs: f64x2) -> Self::Output {
236 f64x2::splat(self).add(rhs)
237 }
238}
239
240impl Sub<f64x2> for f64 {
241 type Output = f64x2;
242 #[inline]
243 fn sub(self, rhs: f64x2) -> Self::Output {
244 f64x2::splat(self).sub(rhs)
245 }
246}
247
248impl Mul<f64x2> for f64 {
249 type Output = f64x2;
250 #[inline]
251 fn mul(self, rhs: f64x2) -> Self::Output {
252 f64x2::splat(self).mul(rhs)
253 }
254}
255
256impl Div<f64x2> for f64 {
257 type Output = f64x2;
258 #[inline]
259 fn div(self, rhs: f64x2) -> Self::Output {
260 f64x2::splat(self).div(rhs)
261 }
262}
263
264impl BitAnd for f64x2 {
265 type Output = Self;
266 #[inline]
267 fn bitand(self, rhs: Self) -> Self::Output {
268 pick! {
269 if #[cfg(target_feature="sse2")] {
270 Self { sse: bitand_m128d(self.sse, rhs.sse) }
271 } else if #[cfg(target_feature="simd128")] {
272 Self { simd: v128_and(self.simd, rhs.simd) }
273 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
274 unsafe {Self { neon: vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
275 } else {
276 Self { arr: [
277 f64::from_bits(self.arr[0].to_bits() & rhs.arr[0].to_bits()),
278 f64::from_bits(self.arr[1].to_bits() & rhs.arr[1].to_bits()),
279 ]}
280 }
281 }
282 }
283}
284
285impl BitOr for f64x2 {
286 type Output = Self;
287 #[inline]
288 fn bitor(self, rhs: Self) -> Self::Output {
289 pick! {
290 if #[cfg(target_feature="sse2")] {
291 Self { sse: bitor_m128d(self.sse, rhs.sse) }
292 } else if #[cfg(target_feature="simd128")] {
293 Self { simd: v128_or(self.simd, rhs.simd) }
294 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
295 unsafe {Self { neon: vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
296 } else {
297 Self { arr: [
298 f64::from_bits(self.arr[0].to_bits() | rhs.arr[0].to_bits()),
299 f64::from_bits(self.arr[1].to_bits() | rhs.arr[1].to_bits()),
300 ]}
301 }
302 }
303 }
304}
305
306impl BitXor for f64x2 {
307 type Output = Self;
308 #[inline]
309 fn bitxor(self, rhs: Self) -> Self::Output {
310 pick! {
311 if #[cfg(target_feature="sse2")] {
312 Self { sse: bitxor_m128d(self.sse, rhs.sse) }
313 } else if #[cfg(target_feature="simd128")] {
314 Self { simd: v128_xor(self.simd, rhs.simd) }
315 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
316 unsafe {Self { neon: vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
317 } else {
318 Self { arr: [
319 f64::from_bits(self.arr[0].to_bits() ^ rhs.arr[0].to_bits()),
320 f64::from_bits(self.arr[1].to_bits() ^ rhs.arr[1].to_bits()),
321 ]}
322 }
323 }
324 }
325}
326
327impl CmpEq for f64x2 {
328 type Output = Self;
329 #[inline]
330 fn simd_eq(self, rhs: Self) -> Self::Output {
331 pick! {
332 if #[cfg(target_feature="sse2")] {
333 Self { sse: cmp_eq_mask_m128d(self.sse, rhs.sse) }
334 } else if #[cfg(target_feature="simd128")] {
335 Self { simd: f64x2_eq(self.simd, rhs.simd) }
336 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
337 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }}
338 } else {
339 Self { arr: [
340 if self.arr[0] == rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
341 if self.arr[1] == rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
342 ]}
343 }
344 }
345 }
346}
347
348impl CmpGe for f64x2 {
349 type Output = Self;
350 #[inline]
351 fn simd_ge(self, rhs: Self) -> Self::Output {
352 pick! {
353 if #[cfg(target_feature="sse2")] {
354 Self { sse: cmp_ge_mask_m128d(self.sse, rhs.sse) }
355 } else if #[cfg(target_feature="simd128")] {
356 Self { simd: f64x2_ge(self.simd, rhs.simd) }
357 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
358 unsafe {Self { neon: vreinterpretq_f64_u64(vcgeq_f64(self.neon, rhs.neon)) }}
359 } else {
360 Self { arr: [
361 if self.arr[0] >= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
362 if self.arr[1] >= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
363 ]}
364 }
365 }
366 }
367}
368
369impl CmpGt for f64x2 {
370 type Output = Self;
371 #[inline]
372 fn simd_gt(self, rhs: Self) -> Self::Output {
373 pick! {
374 if #[cfg(target_feature="avx")] {
375 Self { sse: cmp_op_mask_m128d::<{cmp_op!(GreaterThanOrdered)}>(self.sse, rhs.sse) }
376 } else if #[cfg(target_feature="sse2")] {
377 Self { sse: cmp_gt_mask_m128d(self.sse, rhs.sse) }
378 } else if #[cfg(target_feature="simd128")] {
379 Self { simd: f64x2_gt(self.simd, rhs.simd) }
380 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
381 unsafe {Self { neon: vreinterpretq_f64_u64(vcgtq_f64(self.neon, rhs.neon)) }}
382 } else {
383 Self { arr: [
384 if self.arr[0] > rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
385 if self.arr[1] > rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
386 ]}
387 }
388 }
389 }
390}
391
392impl CmpNe for f64x2 {
393 type Output = Self;
394 #[inline]
395 fn simd_ne(self, rhs: Self) -> Self::Output {
396 pick! {
397 if #[cfg(target_feature="sse2")] {
398 Self { sse: cmp_neq_mask_m128d(self.sse, rhs.sse) }
399 } else if #[cfg(target_feature="simd128")] {
400 Self { simd: f64x2_ne(self.simd, rhs.simd) }
401 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
402 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }.not() }
403 } else {
404 Self { arr: [
405 if self.arr[0] != rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
406 if self.arr[1] != rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
407 ]}
408 }
409 }
410 }
411}
412
413impl CmpLe for f64x2 {
414 type Output = Self;
415 #[inline]
416 fn simd_le(self, rhs: Self) -> Self::Output {
417 pick! {
418 if #[cfg(target_feature="sse2")] {
419 Self { sse: cmp_le_mask_m128d(self.sse, rhs.sse) }
420 } else if #[cfg(target_feature="simd128")] {
421 Self { simd: f64x2_le(self.simd, rhs.simd) }
422 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
423 unsafe {Self { neon: vreinterpretq_f64_u64(vcleq_f64(self.neon, rhs.neon)) }}
424 } else {
425 Self { arr: [
426 if self.arr[0] <= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
427 if self.arr[1] <= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
428 ]}
429 }
430 }
431 }
432}
433
434impl CmpLt for f64x2 {
435 type Output = Self;
436 #[inline]
437 fn simd_lt(self, rhs: Self) -> Self::Output {
438 pick! {
439 if #[cfg(target_feature="sse2")] {
440 Self { sse: cmp_lt_mask_m128d(self.sse, rhs.sse) }
441 } else if #[cfg(target_feature="simd128")] {
442 Self { simd: f64x2_lt(self.simd, rhs.simd) }
443 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
444 unsafe {Self { neon: vreinterpretq_f64_u64(vcltq_f64(self.neon, rhs.neon)) }}
445 } else {
446 Self { arr: [
447 if self.arr[0] < rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
448 if self.arr[1] < rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
449 ]}
450 }
451 }
452 }
453}
454
455impl f64x2 {
456 #[inline]
457 #[must_use]
458 pub const fn new(array: [f64; 2]) -> Self {
459 unsafe { core::mem::transmute(array) }
460 }
461 #[inline]
462 #[must_use]
463 pub fn blend(self, t: Self, f: Self) -> Self {
464 pick! {
465 if #[cfg(target_feature="sse4.1")] {
466 Self { sse: blend_varying_m128d(f.sse, t.sse, self.sse) }
467 } else if #[cfg(target_feature="simd128")] {
468 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
469 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
470 unsafe {Self { neon: vbslq_f64(vreinterpretq_u64_f64(self.neon), t.neon, f.neon) }}
471 } else {
472 generic_bit_blend(self, t, f)
473 }
474 }
475 }
476 #[inline]
477 #[must_use]
478 pub fn abs(self) -> Self {
479 pick! {
480 if #[cfg(target_feature="simd128")] {
481 Self { simd: f64x2_abs(self.simd) }
482 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
483 unsafe {Self { neon: vabsq_f64(self.neon) }}
484 } else {
485 let non_sign_bits = f64x2::from(f64::from_bits(i64::MAX as u64));
486 self & non_sign_bits
487 }
488 }
489 }
490 #[inline]
491 #[must_use]
492 pub fn floor(self) -> Self {
493 pick! {
494 if #[cfg(target_feature="simd128")] {
495 Self { simd: f64x2_floor(self.simd) }
496 } else if #[cfg(target_feature="sse4.1")] {
497 Self { sse: floor_m128d(self.sse) }
498 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
499 unsafe {Self { neon: vrndmq_f64(self.neon) }}
500 } else if #[cfg(feature="std")] {
501 let base: [f64; 2] = cast(self);
502 cast(base.map(|val| val.floor()))
503 } else {
504 let base: [f64; 2] = cast(self);
505 let rounded: [f64; 2] = cast(self.round());
506 cast([
507 if base[0] < rounded[0] { rounded[0] - 1.0 } else { rounded[0] },
508 if base[1] < rounded[1] { rounded[1] - 1.0 } else { rounded[1] },
509 ])
510 }
511 }
512 }
513 #[inline]
514 #[must_use]
515 pub fn ceil(self) -> Self {
516 pick! {
517 if #[cfg(target_feature="simd128")] {
518 Self { simd: f64x2_ceil(self.simd) }
519 } else if #[cfg(target_feature="sse4.1")] {
520 Self { sse: ceil_m128d(self.sse) }
521 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
522 unsafe {Self { neon: vrndpq_f64(self.neon) }}
523 } else if #[cfg(feature="std")] {
524 let base: [f64; 2] = cast(self);
525 cast(base.map(|val| val.ceil()))
526 } else {
527 let base: [f64; 2] = cast(self);
528 let rounded: [f64; 2] = cast(self.round());
529 cast([
530 if base[0] > rounded[0] { rounded[0] + 1.0 } else { rounded[0] },
531 if base[1] > rounded[1] { rounded[1] + 1.0 } else { rounded[1] },
532 ])
533 }
534 }
535 }
536
537 #[inline]
541 #[must_use]
542 pub fn fast_max(self, rhs: Self) -> Self {
543 pick! {
544 if #[cfg(target_feature="sse2")] {
545 Self { sse: max_m128d(self.sse, rhs.sse) }
546 } else if #[cfg(target_feature="simd128")] {
547 Self {
548 simd: f64x2_pmax(self.simd, rhs.simd),
549 }
550 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
551 unsafe {Self { neon: vmaxq_f64(self.neon, rhs.neon) }}
552 } else {
553 Self { arr: [
554 if self.arr[0] < rhs.arr[0] { rhs.arr[0] } else { self.arr[0] },
555 if self.arr[1] < rhs.arr[1] { rhs.arr[1] } else { self.arr[1] },
556 ]}
557 }
558 }
559 }
560
561 #[inline]
565 #[must_use]
566 pub fn max(self, rhs: Self) -> Self {
567 pick! {
568 if #[cfg(target_feature="sse2")] {
569 rhs.is_nan().blend(self, Self { sse: max_m128d(self.sse, rhs.sse) })
573 } else if #[cfg(target_feature="simd128")] {
574 Self {
581 simd: v128_bitselect(
582 rhs.simd,
583 f64x2_pmax(self.simd, rhs.simd),
584 f64x2_ne(self.simd, self.simd), )
586 }
587 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
588 unsafe {Self { neon: vmaxnmq_f64(self.neon, rhs.neon) }}
589 } else {
590 Self { arr: [
591 self.arr[0].max(rhs.arr[0]),
592 self.arr[1].max(rhs.arr[1]),
593 ]}
594 }
595 }
596 }
597
598 #[inline]
602 #[must_use]
603 pub fn fast_min(self, rhs: Self) -> Self {
604 pick! {
605 if #[cfg(target_feature="sse2")] {
606 Self { sse: min_m128d(self.sse, rhs.sse) }
607 } else if #[cfg(target_feature="simd128")] {
608 Self {
609 simd: f64x2_pmin(self.simd, rhs.simd),
610 }
611 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
612 unsafe {Self { neon: vminq_f64(self.neon, rhs.neon) }}
613 } else {
614 Self { arr: [
615 if self.arr[0] < rhs.arr[0] { self.arr[0] } else { rhs.arr[0] },
616 if self.arr[1] < rhs.arr[1] { self.arr[1] } else { rhs.arr[1] },
617 ]}
618 }
619 }
620 }
621
622 #[inline]
626 #[must_use]
627 pub fn min(self, rhs: Self) -> Self {
628 pick! {
629 if #[cfg(target_feature="sse2")] {
630 rhs.is_nan().blend(self, Self { sse: min_m128d(self.sse, rhs.sse) })
634 } else if #[cfg(target_feature="simd128")] {
635 Self {
642 simd: v128_bitselect(
643 rhs.simd,
644 f64x2_pmin(self.simd, rhs.simd),
645 f64x2_ne(self.simd, self.simd), )
647 }
648 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
649 unsafe {Self { neon: vminnmq_f64(self.neon, rhs.neon) }}
650 } else {
651 Self { arr: [
652 self.arr[0].min(rhs.arr[0]),
653 self.arr[1].min(rhs.arr[1]),
654 ]}
655 }
656 }
657 }
658
659 #[inline]
660 #[must_use]
661 pub fn is_nan(self) -> Self {
662 pick! {
663 if #[cfg(target_feature="sse2")] {
664 Self { sse: cmp_unord_mask_m128d(self.sse, self.sse) }
665 } else if #[cfg(target_feature="simd128")] {
666 Self { simd: f64x2_ne(self.simd, self.simd) }
667 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
668 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, self.neon)) }.not() }
669 } else {
670 Self { arr: [
671 if self.arr[0].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
672 if self.arr[1].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
673 ]}
674 }
675 }
676 }
677 #[inline]
678 #[must_use]
679 pub fn is_finite(self) -> Self {
680 let shifted_exp_mask = u64x2::from(0xFFE0000000000000);
681 let u: u64x2 = cast(self);
682 let shift_u = u << 1_u64;
683 let out = !(shift_u & shifted_exp_mask).simd_eq(shifted_exp_mask);
684 cast(out)
685 }
686 #[inline]
687 #[must_use]
688 pub fn is_inf(self) -> Self {
689 let shifted_inf = u64x2::from(0xFFE0000000000000);
690 let u: u64x2 = cast(self);
691 let shift_u = u << 1_u64;
692 let out = (shift_u).simd_eq(shifted_inf);
693 cast(out)
694 }
695
696 #[inline]
697 #[must_use]
698 pub fn round(self) -> Self {
699 pick! {
700 if #[cfg(target_feature="sse4.1")] {
701 Self { sse: round_m128d::<{round_op!(Nearest)}>(self.sse) }
702 } else if #[cfg(target_feature="simd128")] {
703 Self { simd: f64x2_nearest(self.simd) }
704 } else {
705 let sign_mask = f64x2::from(-0.0);
706 let magic = f64x2::from(f64::from_bits(0x43300000_00000000));
707 let sign = self & sign_mask;
708 let signed_magic = magic | sign;
709 self + signed_magic - signed_magic
710 }
711 }
712 }
713 #[inline]
714 #[must_use]
715 pub fn round_int(self) -> i64x2 {
716 let rounded: [f64; 2] = cast(self.round());
717 cast([rounded[0] as i64, rounded[1] as i64])
718 }
719 #[inline]
744 #[must_use]
745 pub fn mul_add(self, m: Self, a: Self) -> Self {
746 pick! {
747 if #[cfg(all(target_feature="fma"))] {
748 Self { sse: fused_mul_add_m128d(self.sse, m.sse, a.sse) }
749 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
750 unsafe { Self { neon: vfmaq_f64(a.neon, self.neon, m.neon) } }
751 } else {
752 (self * m) + a
753 }
754 }
755 }
756
757 #[inline]
783 #[must_use]
784 pub fn mul_sub(self, m: Self, s: Self) -> Self {
785 pick! {
786 if #[cfg(all(target_feature="fma"))] {
787 Self { sse: fused_mul_sub_m128d(self.sse, m.sse, s.sse) }
788 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
789 unsafe { Self { neon: vfmaq_f64(vnegq_f64(s.neon), self.neon, m.neon) } }
790 } else {
791 (self * m) - s
792 }
793 }
794 }
795
796 #[inline]
821 #[must_use]
822 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
823 pick! {
824 if #[cfg(all(target_feature="fma"))] {
825 Self { sse: fused_mul_neg_add_m128d(self.sse, m.sse, a.sse) }
826 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
827 unsafe { Self { neon: vfmsq_f64(a.neon, self.neon, m.neon) } }
828 } else {
829 a - (self * m)
830 }
831 }
832 }
833
834 #[inline]
860 #[must_use]
861 pub fn mul_neg_sub(self, m: Self, s: Self) -> Self {
862 pick! {
863 if #[cfg(all(target_feature="fma"))] {
864 Self { sse: fused_mul_neg_sub_m128d(self.sse, m.sse, s.sse) }
865 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
866 unsafe { Self { neon: vnegq_f64(vfmaq_f64(s.neon, self.neon, m.neon)) } }
867 } else {
868 -(self * m) - s
869 }
870 }
871 }
872
873 #[inline]
874 #[must_use]
875 pub fn flip_signs(self, signs: Self) -> Self {
876 self ^ (signs & Self::from(-0.0))
877 }
878
879 #[inline]
880 #[must_use]
881 pub fn copysign(self, sign: Self) -> Self {
882 let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
883 (self & magnitude_mask) | (sign & Self::from(-0.0))
884 }
885
886 #[inline]
887 pub fn asin_acos(self) -> (Self, Self) {
888 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
891 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
892 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
893 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
894 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
895
896 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
897 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
898 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
899 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
900
901 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
902 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
903 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
904 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
905 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
906 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
907
908 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
909 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
910 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
911 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
912 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
913
914 let xa = self.abs();
915
916 let big = xa.simd_ge(f64x2::splat(0.625));
917
918 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
919
920 let x2 = x1 * x1;
921 let x3 = x2 * x1;
922 let x4 = x2 * x2;
923 let x5 = x4 * x1;
924
925 let do_big = big.any();
926 let do_small = !big.all();
927
928 let mut rx = f64x2::default();
929 let mut sx = f64x2::default();
930 let mut px = f64x2::default();
931 let mut qx = f64x2::default();
932
933 if do_big {
934 rx = x3.mul_add(R3asin, x2 * R2asin)
935 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
936 sx =
937 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
938 }
939 if do_small {
940 px = x3.mul_add(P3asin, P0asin)
941 + x4.mul_add(P4asin, x1 * P1asin)
942 + x5.mul_add(P5asin, x2 * P2asin);
943 qx = x4.mul_add(Q4asin, x5)
944 + x3.mul_add(Q3asin, x1 * Q1asin)
945 + x2.mul_add(Q2asin, Q0asin);
946 };
947
948 let vx = big.blend(rx, px);
949 let wx = big.blend(sx, qx);
950
951 let y1 = vx / wx * x1;
952
953 let mut z1 = f64x2::default();
954 let mut z2 = f64x2::default();
955 if do_big {
956 let xb = (x1 + x1).sqrt();
957 z1 = xb.mul_add(y1, xb);
958 }
959
960 if do_small {
961 z2 = xa.mul_add(y1, xa);
962 }
963
964 let z3 = f64x2::FRAC_PI_2 - z1;
966 let asin = big.blend(z3, z2);
967 let asin = asin.flip_signs(self);
968
969 let z3 = self.simd_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
971 let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
972 let acos = big.blend(z3, z4);
973
974 (asin, acos)
975 }
976
977 #[inline]
978 pub fn acos(self) -> Self {
979 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
982 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
983 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
984 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
985 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
986
987 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
988 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
989 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
990 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
991
992 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
993 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
994 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
995 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
996 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
997 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
998
999 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
1000 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
1001 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
1002 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
1003 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
1004
1005 let xa = self.abs();
1006
1007 let big = xa.simd_ge(f64x2::splat(0.625));
1008
1009 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
1010
1011 let x2 = x1 * x1;
1012 let x3 = x2 * x1;
1013 let x4 = x2 * x2;
1014 let x5 = x4 * x1;
1015
1016 let do_big = big.any();
1017 let do_small = !big.all();
1018
1019 let mut rx = f64x2::default();
1020 let mut sx = f64x2::default();
1021 let mut px = f64x2::default();
1022 let mut qx = f64x2::default();
1023
1024 if do_big {
1025 rx = x3.mul_add(R3asin, x2 * R2asin)
1026 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
1027 sx =
1028 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
1029 }
1030 if do_small {
1031 px = x3.mul_add(P3asin, P0asin)
1032 + x4.mul_add(P4asin, x1 * P1asin)
1033 + x5.mul_add(P5asin, x2 * P2asin);
1034 qx = x4.mul_add(Q4asin, x5)
1035 + x3.mul_add(Q3asin, x1 * Q1asin)
1036 + x2.mul_add(Q2asin, Q0asin);
1037 };
1038
1039 let vx = big.blend(rx, px);
1040 let wx = big.blend(sx, qx);
1041
1042 let y1 = vx / wx * x1;
1043
1044 let mut z1 = f64x2::default();
1045 let mut z2 = f64x2::default();
1046 if do_big {
1047 let xb = (x1 + x1).sqrt();
1048 z1 = xb.mul_add(y1, xb);
1049 }
1050
1051 if do_small {
1052 z2 = xa.mul_add(y1, xa);
1053 }
1054
1055 let z3 = self.simd_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
1057 let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
1058 let acos = big.blend(z3, z4);
1059
1060 acos
1061 }
1062
1063 #[inline]
1064 pub fn asin(self) -> Self {
1065 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
1068 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
1069 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
1070 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
1071 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
1072
1073 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
1074 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
1075 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
1076 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
1077
1078 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
1079 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
1080 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
1081 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
1082 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
1083 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
1084
1085 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
1086 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
1087 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
1088 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
1089 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
1090
1091 let xa = self.abs();
1092
1093 let big = xa.simd_ge(f64x2::splat(0.625));
1094
1095 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
1096
1097 let x2 = x1 * x1;
1098 let x3 = x2 * x1;
1099 let x4 = x2 * x2;
1100 let x5 = x4 * x1;
1101
1102 let do_big = big.any();
1103 let do_small = !big.all();
1104
1105 let mut rx = f64x2::default();
1106 let mut sx = f64x2::default();
1107 let mut px = f64x2::default();
1108 let mut qx = f64x2::default();
1109
1110 if do_big {
1111 rx = x3.mul_add(R3asin, x2 * R2asin)
1112 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
1113 sx =
1114 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
1115 }
1116 if do_small {
1117 px = x3.mul_add(P3asin, P0asin)
1118 + x4.mul_add(P4asin, x1 * P1asin)
1119 + x5.mul_add(P5asin, x2 * P2asin);
1120 qx = x4.mul_add(Q4asin, x5)
1121 + x3.mul_add(Q3asin, x1 * Q1asin)
1122 + x2.mul_add(Q2asin, Q0asin);
1123 };
1124
1125 let vx = big.blend(rx, px);
1126 let wx = big.blend(sx, qx);
1127
1128 let y1 = vx / wx * x1;
1129
1130 let mut z1 = f64x2::default();
1131 let mut z2 = f64x2::default();
1132 if do_big {
1133 let xb = (x1 + x1).sqrt();
1134 z1 = xb.mul_add(y1, xb);
1135 }
1136
1137 if do_small {
1138 z2 = xa.mul_add(y1, xa);
1139 }
1140
1141 let z3 = f64x2::FRAC_PI_2 - z1;
1143 let asin = big.blend(z3, z2);
1144 let asin = asin.flip_signs(self);
1145
1146 asin
1147 }
1148
1149 #[inline]
1150 pub fn atan(self) -> Self {
1151 const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1154 const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1155 const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1156
1157 const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1158 const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1159 const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1160 const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1161 const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1162
1163 const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1164 const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1165 const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1166 const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1167 const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1168
1169 let t = self.abs();
1170
1171 let notbig = t.simd_le(T3PO8);
1175 let notsmal = t.simd_ge(Self::splat(0.66));
1176
1177 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1178 s = notsmal & s;
1179 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1180 fac = notsmal & fac;
1181
1182 let mut a = notbig & t;
1186 a = notsmal.blend(a - Self::ONE, a);
1187 let mut b = notbig & Self::ONE;
1188 b = notsmal.blend(b + t, b);
1189 let z = a / b;
1190
1191 let zz = z * z;
1192
1193 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1194 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1195
1196 let mut re = (px / qx).mul_add(z * zz, z);
1197 re += s + fac;
1198
1199 re = (self.sign_bit()).blend(-re, re);
1201
1202 re
1203 }
1204
1205 #[inline]
1206 pub fn atan2(self, x: Self) -> Self {
1207 const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1210 const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1211 const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1212
1213 const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1214 const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1215 const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1216 const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1217 const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1218
1219 const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1220 const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1221 const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1222 const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1223 const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1224
1225 let y = self;
1226
1227 let x1 = x.abs();
1229 let y1 = y.abs();
1230 let swapxy = y1.simd_gt(x1);
1231 let mut x2 = swapxy.blend(y1, x1);
1233 let mut y2 = swapxy.blend(x1, y1);
1234
1235 let both_infinite = x.is_inf() & y.is_inf();
1237 if both_infinite.any() {
1238 let minus_one = -Self::ONE;
1239 x2 = both_infinite.blend(x2 & minus_one, x2);
1240 y2 = both_infinite.blend(y2 & minus_one, y2);
1241 }
1242
1243 let t = y2 / x2;
1245
1246 let notbig = t.simd_le(T3PO8);
1250 let notsmal = t.simd_ge(Self::splat(0.66));
1251
1252 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1253 s = notsmal & s;
1254 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1255 fac = notsmal & fac;
1256
1257 let mut a = notbig & t;
1261 a = notsmal.blend(a - Self::ONE, a);
1262 let mut b = notbig & Self::ONE;
1263 b = notsmal.blend(b + t, b);
1264 let z = a / b;
1265
1266 let zz = z * z;
1267
1268 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1269 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1270
1271 let mut re = (px / qx).mul_add(z * zz, z);
1272 re += s + fac;
1273
1274 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1276 re = ((x | y).simd_eq(Self::ZERO)).blend(Self::ZERO, re);
1277 re = (x.sign_bit()).blend(Self::PI - re, re);
1278
1279 re = (y.sign_bit()).blend(-re, re);
1281
1282 re
1283 }
1284
1285 #[inline]
1286 #[must_use]
1287 pub fn sin_cos(self) -> (Self, Self) {
1288 const_f64_as_f64x2!(P0sin, -1.66666666666666307295E-1);
1292 const_f64_as_f64x2!(P1sin, 8.33333333332211858878E-3);
1293 const_f64_as_f64x2!(P2sin, -1.98412698295895385996E-4);
1294 const_f64_as_f64x2!(P3sin, 2.75573136213857245213E-6);
1295 const_f64_as_f64x2!(P4sin, -2.50507477628578072866E-8);
1296 const_f64_as_f64x2!(P5sin, 1.58962301576546568060E-10);
1297
1298 const_f64_as_f64x2!(P0cos, 4.16666666666665929218E-2);
1299 const_f64_as_f64x2!(P1cos, -1.38888888888730564116E-3);
1300 const_f64_as_f64x2!(P2cos, 2.48015872888517045348E-5);
1301 const_f64_as_f64x2!(P3cos, -2.75573141792967388112E-7);
1302 const_f64_as_f64x2!(P4cos, 2.08757008419747316778E-9);
1303 const_f64_as_f64x2!(P5cos, -1.13585365213876817300E-11);
1304
1305 const_f64_as_f64x2!(DP1, 7.853981554508209228515625E-1 * 2.);
1306 const_f64_as_f64x2!(DP2, 7.94662735614792836714E-9 * 2.);
1307 const_f64_as_f64x2!(DP3, 3.06161699786838294307E-17 * 2.);
1308
1309 const_f64_as_f64x2!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1310
1311 let xa = self.abs();
1312
1313 let y = (xa * TWO_OVER_PI).round();
1314 let q = y.round_int();
1315
1316 let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1317
1318 let x2 = x * x;
1319 let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1320 let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1321 s = (x * x2).mul_add(s, x);
1322 c =
1323 (x2 * x2).mul_add(c, x2.mul_neg_add(f64x2::from(0.5), f64x2::from(1.0)));
1324
1325 let swap = !((q & i64x2::from(1)).simd_eq(i64x2::from(0)));
1326
1327 let mut overflow: f64x2 = cast(q.simd_gt(i64x2::from(0x80000000000000)));
1328 overflow &= xa.is_finite();
1329 s = overflow.blend(f64x2::from(0.0), s);
1330 c = overflow.blend(f64x2::from(1.0), c);
1331
1332 let mut sin1 = cast::<_, f64x2>(swap).blend(c, s);
1334 let sign_sin: i64x2 = (q << 62) ^ cast::<_, i64x2>(self);
1335 sin1 = sin1.flip_signs(cast(sign_sin));
1336
1337 let mut cos1 = cast::<_, f64x2>(swap).blend(s, c);
1339 let sign_cos: i64x2 = ((q + i64x2::from(1)) & i64x2::from(2)) << 62;
1340 cos1 ^= cast::<_, f64x2>(sign_cos);
1341
1342 (sin1, cos1)
1343 }
1344 #[inline]
1345 #[must_use]
1346 pub fn sin(self) -> Self {
1347 let (s, _) = self.sin_cos();
1348 s
1349 }
1350 #[inline]
1351 #[must_use]
1352 pub fn cos(self) -> Self {
1353 let (_, c) = self.sin_cos();
1354 c
1355 }
1356 #[inline]
1357 #[must_use]
1358 pub fn tan(self) -> Self {
1359 let (s, c) = self.sin_cos();
1360 s / c
1361 }
1362 #[inline]
1363 #[must_use]
1364 pub fn to_degrees(self) -> Self {
1365 const_f64_as_f64x2!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1366 self * RAD_TO_DEG_RATIO
1367 }
1368 #[inline]
1369 #[must_use]
1370 pub fn to_radians(self) -> Self {
1371 const_f64_as_f64x2!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1372 self * DEG_TO_RAD_RATIO
1373 }
1374 #[inline]
1375 #[must_use]
1376 pub fn sqrt(self) -> Self {
1377 pick! {
1378 if #[cfg(target_feature="sse2")] {
1379 Self { sse: sqrt_m128d(self.sse) }
1380 } else if #[cfg(target_feature="simd128")] {
1381 Self { simd: f64x2_sqrt(self.simd) }
1382 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1383 unsafe {Self { neon: vsqrtq_f64(self.neon) }}
1384 } else if #[cfg(feature="std")] {
1385 Self { arr: [
1386 self.arr[0].sqrt(),
1387 self.arr[1].sqrt(),
1388 ]}
1389 } else {
1390 Self { arr: [
1391 software_sqrt(self.arr[0]),
1392 software_sqrt(self.arr[1]),
1393 ]}
1394 }
1395 }
1396 }
1397 #[inline]
1398 #[must_use]
1399 #[doc(alias("movemask", "move_mask"))]
1400 pub fn to_bitmask(self) -> u32 {
1401 pick! {
1402 if #[cfg(target_feature="sse2")] {
1403 move_mask_m128d(self.sse) as u32
1404 } else if #[cfg(target_feature="simd128")] {
1405 u64x2_bitmask(self.simd) as u32
1406 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1407 unsafe
1408 {
1409 let e = vreinterpretq_u64_f64(self.neon);
1410
1411 (vgetq_lane_u64(e,0) >> 63 | ((vgetq_lane_u64(e,1) >> 62) & 0x2)) as u32
1412 }
1413 } else {
1414 (((self.arr[0].to_bits() as i64) < 0) as u32) << 0 |
1415 (((self.arr[1].to_bits() as i64) < 0) as u32) << 1
1416 }
1417 }
1418 }
1419 #[inline]
1420 #[must_use]
1421 pub fn any(self) -> bool {
1422 pick! {
1423 if #[cfg(target_feature="simd128")] {
1424 v128_any_true(self.simd)
1425 } else {
1426 self.to_bitmask() != 0
1427 }
1428 }
1429 }
1430 #[inline]
1431 #[must_use]
1432 pub fn all(self) -> bool {
1433 pick! {
1434 if #[cfg(target_feature="simd128")] {
1435 u64x2_all_true(self.simd)
1436 } else {
1437 self.to_bitmask() == 0b11
1439 }
1440 }
1441 }
1442 #[inline]
1443 #[must_use]
1444 pub fn none(self) -> bool {
1445 !self.any()
1446 }
1447
1448 #[inline]
1449 fn vm_pow2n(self) -> Self {
1450 const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1451 const_f64_as_f64x2!(bias, 1023.0);
1452 let a = self + (bias + pow2_52);
1453 let c = cast::<_, i64x2>(a) << 52;
1454 cast::<_, f64x2>(c)
1455 }
1456
1457 #[inline]
1459 #[must_use]
1460 pub fn exp(self) -> Self {
1461 const_f64_as_f64x2!(P2, 1.0 / 2.0);
1462 const_f64_as_f64x2!(P3, 1.0 / 6.0);
1463 const_f64_as_f64x2!(P4, 1. / 24.);
1464 const_f64_as_f64x2!(P5, 1. / 120.);
1465 const_f64_as_f64x2!(P6, 1. / 720.);
1466 const_f64_as_f64x2!(P7, 1. / 5040.);
1467 const_f64_as_f64x2!(P8, 1. / 40320.);
1468 const_f64_as_f64x2!(P9, 1. / 362880.);
1469 const_f64_as_f64x2!(P10, 1. / 3628800.);
1470 const_f64_as_f64x2!(P11, 1. / 39916800.);
1471 const_f64_as_f64x2!(P12, 1. / 479001600.);
1472 const_f64_as_f64x2!(P13, 1. / 6227020800.);
1473 const_f64_as_f64x2!(LN2D_HI, 0.693145751953125);
1474 const_f64_as_f64x2!(LN2D_LO, 1.42860682030941723212E-6);
1475 let max_x = f64x2::from(708.39);
1476 let r = (self * Self::LOG2_E).round();
1477 let x = r.mul_neg_add(LN2D_HI, self);
1478 let x = r.mul_neg_add(LN2D_LO, x);
1479 let z =
1480 polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1481 let n2 = Self::vm_pow2n(r);
1482 let z = (z + Self::ONE) * n2;
1483 let in_range = self.abs().simd_lt(max_x);
1485 let in_range = in_range & self.is_finite();
1486 in_range.blend(z, Self::ZERO)
1487 }
1488
1489 #[inline]
1490 fn exponent(self) -> f64x2 {
1491 const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1492 const_f64_as_f64x2!(bias, 1023.0);
1493 let a = cast::<_, u64x2>(self);
1494 let b = a >> 52;
1495 let c = b | cast::<_, u64x2>(pow2_52);
1496 let d = cast::<_, f64x2>(c);
1497 let e = d - (pow2_52 + bias);
1498 e
1499 }
1500
1501 #[inline]
1502 fn fraction_2(self) -> Self {
1503 let t1 = cast::<_, u64x2>(self);
1504 let t2 = cast::<_, u64x2>(
1505 (t1 & u64x2::from(0x000FFFFFFFFFFFFF)) | u64x2::from(0x3FE0000000000000),
1506 );
1507 cast::<_, f64x2>(t2)
1508 }
1509
1510 #[inline]
1511 fn is_zero_or_subnormal(self) -> Self {
1512 let t = cast::<_, i64x2>(self);
1513 let t = t & i64x2::splat(0x7FF0000000000000);
1514 i64x2::round_float(t.simd_eq(i64x2::splat(0)))
1515 }
1516
1517 #[inline]
1518 fn infinity() -> Self {
1519 cast::<_, f64x2>(i64x2::splat(0x7FF0000000000000))
1520 }
1521
1522 #[inline]
1523 fn nan_log() -> Self {
1524 cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1525 }
1526
1527 #[inline]
1528 fn nan_pow() -> Self {
1529 cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1530 }
1531
1532 #[inline]
1533 fn sign_bit(self) -> Self {
1534 let t1 = cast::<_, i64x2>(self);
1535 let t2 = t1 >> 63;
1536 !cast::<_, f64x2>(t2).simd_eq(f64x2::ZERO)
1537 }
1538
1539 #[inline]
1541 #[must_use]
1542 pub fn reduce_add(self) -> f64 {
1543 pick! {
1544 if #[cfg(target_feature="ssse3")] {
1545 let a = add_horizontal_m128d(self.sse, self.sse);
1546 a.to_array()[0]
1547 } else if #[cfg(any(target_feature="sse2", target_feature="simd128"))] {
1548 let a: [f64;2] = cast(self);
1549 a.iter().sum()
1550 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1551 unsafe { vgetq_lane_f64(self.neon,0) + vgetq_lane_f64(self.neon,1) }
1552 } else {
1553 self.arr.iter().sum()
1554 }
1555 }
1556 }
1557
1558 #[inline]
1559 #[must_use]
1560 pub fn ln(self) -> Self {
1561 const_f64_as_f64x2!(P0, 7.70838733755885391666E0);
1562 const_f64_as_f64x2!(P1, 1.79368678507819816313E1);
1563 const_f64_as_f64x2!(P2, 1.44989225341610930846E1);
1564 const_f64_as_f64x2!(P3, 4.70579119878881725854E0);
1565 const_f64_as_f64x2!(P4, 4.97494994976747001425E-1);
1566 const_f64_as_f64x2!(P5, 1.01875663804580931796E-4);
1567
1568 const_f64_as_f64x2!(Q0, 2.31251620126765340583E1);
1569 const_f64_as_f64x2!(Q1, 7.11544750618563894466E1);
1570 const_f64_as_f64x2!(Q2, 8.29875266912776603211E1);
1571 const_f64_as_f64x2!(Q3, 4.52279145837532221105E1);
1572 const_f64_as_f64x2!(Q4, 1.12873587189167450590E1);
1573 const_f64_as_f64x2!(LN2F_HI, 0.693359375);
1574 const_f64_as_f64x2!(LN2F_LO, -2.12194440e-4);
1575 const_f64_as_f64x2!(VM_SQRT2, 1.414213562373095048801);
1576 const_f64_as_f64x2!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1577
1578 let x1 = self;
1579 let x = Self::fraction_2(x1);
1580 let e = Self::exponent(x1);
1581 let mask = x.simd_gt(VM_SQRT2 * f64x2::HALF);
1582 let x = (!mask).blend(x + x, x);
1583 let fe = mask.blend(e + Self::ONE, e);
1584 let x = x - Self::ONE;
1585 let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1586 let x2 = x * x;
1587 let px = x2 * x * px;
1588 let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1589 let res = px / qx;
1590 let res = fe.mul_add(LN2F_LO, res);
1591 let res = res + x2.mul_neg_add(f64x2::HALF, x);
1592 let res = fe.mul_add(LN2F_HI, res);
1593 let overflow = !self.is_finite();
1594 let underflow = x1.simd_lt(VM_SMALLEST_NORMAL);
1595 let mask = overflow | underflow;
1596 if !mask.any() {
1597 res
1598 } else {
1599 let is_zero = self.is_zero_or_subnormal();
1600 let res = underflow.blend(Self::nan_log(), res);
1601 let res = is_zero.blend(Self::infinity(), res);
1602 let res = overflow.blend(self, res);
1603 res
1604 }
1605 }
1606
1607 #[inline]
1608 #[must_use]
1609 pub fn log2(self) -> Self {
1610 Self::ln(self) * Self::LOG2_E
1611 }
1612 #[inline]
1613 #[must_use]
1614 pub fn log10(self) -> Self {
1615 Self::ln(self) * Self::LOG10_E
1616 }
1617
1618 #[inline]
1619 #[must_use]
1620 pub fn pow_f64x2(self, y: Self) -> Self {
1621 const_f64_as_f64x2!(ln2d_hi, 0.693145751953125);
1622 const_f64_as_f64x2!(ln2d_lo, 1.42860682030941723212E-6);
1623 const_f64_as_f64x2!(P0log, 2.0039553499201281259648E1);
1624 const_f64_as_f64x2!(P1log, 5.7112963590585538103336E1);
1625 const_f64_as_f64x2!(P2log, 6.0949667980987787057556E1);
1626 const_f64_as_f64x2!(P3log, 2.9911919328553073277375E1);
1627 const_f64_as_f64x2!(P4log, 6.5787325942061044846969E0);
1628 const_f64_as_f64x2!(P5log, 4.9854102823193375972212E-1);
1629 const_f64_as_f64x2!(P6log, 4.5270000862445199635215E-5);
1630 const_f64_as_f64x2!(Q0log, 6.0118660497603843919306E1);
1631 const_f64_as_f64x2!(Q1log, 2.1642788614495947685003E2);
1632 const_f64_as_f64x2!(Q2log, 3.0909872225312059774938E2);
1633 const_f64_as_f64x2!(Q3log, 2.2176239823732856465394E2);
1634 const_f64_as_f64x2!(Q4log, 8.3047565967967209469434E1);
1635 const_f64_as_f64x2!(Q5log, 1.5062909083469192043167E1);
1636
1637 const_f64_as_f64x2!(p2, 1.0 / 2.0); const_f64_as_f64x2!(p3, 1.0 / 6.0);
1640 const_f64_as_f64x2!(p4, 1.0 / 24.0);
1641 const_f64_as_f64x2!(p5, 1.0 / 120.0);
1642 const_f64_as_f64x2!(p6, 1.0 / 720.0);
1643 const_f64_as_f64x2!(p7, 1.0 / 5040.0);
1644 const_f64_as_f64x2!(p8, 1.0 / 40320.0);
1645 const_f64_as_f64x2!(p9, 1.0 / 362880.0);
1646 const_f64_as_f64x2!(p10, 1.0 / 3628800.0);
1647 const_f64_as_f64x2!(p11, 1.0 / 39916800.0);
1648 const_f64_as_f64x2!(p12, 1.0 / 479001600.0);
1649 const_f64_as_f64x2!(p13, 1.0 / 6227020800.0);
1650
1651 let x1 = self.abs();
1652 let x = x1.fraction_2();
1653 let mask = x.simd_gt(f64x2::SQRT_2 * f64x2::HALF);
1654 let x = (!mask).blend(x + x, x);
1655 let x = x - f64x2::ONE;
1656 let x2 = x * x;
1657 let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1658 let px = px * x * x2;
1659 let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1660 let lg1 = px / qx;
1661
1662 let ef = x1.exponent();
1663 let ef = mask.blend(ef + f64x2::ONE, ef);
1664 let e1 = (ef * y).round();
1665 let yr = ef.mul_sub(y, e1);
1666
1667 let lg = f64x2::HALF.mul_neg_add(x2, x) + lg1;
1668 let x2err = (f64x2::HALF * x).mul_sub(x, f64x2::HALF * x2);
1669 let lg_err = f64x2::HALF.mul_add(x2, lg - x) - lg1;
1670
1671 let e2 = (lg * y * f64x2::LOG2_E).round();
1672 let v = lg.mul_sub(y, e2 * ln2d_hi);
1673 let v = e2.mul_neg_add(ln2d_lo, v);
1674 let v = v - (lg_err + x2err).mul_sub(y, yr * f64x2::LN_2);
1675
1676 let x = v;
1677 let e3 = (x * f64x2::LOG2_E).round();
1678 let x = e3.mul_neg_add(f64x2::LN_2, x);
1679 let z =
1680 polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1681 + f64x2::ONE;
1682 let ee = e1 + e2 + e3;
1683 let ei = cast::<_, i64x2>(ee.round_int());
1684 let ej = cast::<_, i64x2>(ei + (cast::<_, i64x2>(z) >> 52));
1685
1686 let overflow = cast::<_, f64x2>(!ej.simd_lt(i64x2::splat(0x07FF)))
1687 | ee.simd_gt(f64x2::splat(3000.0));
1688 let underflow = cast::<_, f64x2>(!ej.simd_gt(i64x2::splat(0x000)))
1689 | ee.simd_lt(f64x2::splat(-3000.0));
1690
1691 let z = cast::<_, f64x2>(cast::<_, i64x2>(z) + (ei << 52));
1693
1694 let z = if (overflow | underflow).any() {
1696 let z = underflow.blend(f64x2::ZERO, z);
1697 overflow.blend(Self::infinity(), z)
1698 } else {
1699 z
1700 };
1701
1702 let x_zero = self.is_zero_or_subnormal();
1704 let z = x_zero.blend(
1705 y.simd_lt(f64x2::ZERO).blend(
1706 Self::infinity(),
1707 y.simd_eq(f64x2::ZERO).blend(f64x2::ONE, f64x2::ZERO),
1708 ),
1709 z,
1710 );
1711
1712 let x_sign = self.sign_bit();
1713 let z = if x_sign.any() {
1714 let yi = y.simd_eq(y.round());
1716 let y_odd = cast::<_, i64x2>(y.round_int() << 63).round_float();
1718
1719 let z1 =
1720 yi.blend(z | y_odd, self.simd_eq(Self::ZERO).blend(z, Self::nan_pow()));
1721 x_sign.blend(z1, z)
1722 } else {
1723 z
1724 };
1725
1726 let x_finite = self.is_finite();
1727 let y_finite = y.is_finite();
1728 let e_finite = ee.is_finite();
1729
1730 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1731 return z;
1732 }
1733
1734 (self.is_nan() | y.is_nan()).blend(self + y, z)
1735 }
1736
1737 #[inline]
1738 pub fn powf(self, y: f64) -> Self {
1739 Self::pow_f64x2(self, f64x2::splat(y))
1740 }
1741
1742 #[inline]
1743 pub fn to_array(self) -> [f64; 2] {
1744 cast(self)
1745 }
1746
1747 #[inline]
1748 pub fn as_array(&self) -> &[f64; 2] {
1749 cast_ref(self)
1750 }
1751
1752 #[inline]
1753 pub fn as_mut_array(&mut self) -> &mut [f64; 2] {
1754 cast_mut(self)
1755 }
1756
1757 #[inline]
1760 pub fn from_i32x4_lower2(v: i32x4) -> Self {
1761 pick! {
1762 if #[cfg(target_feature="sse2")] {
1763 Self { sse: convert_to_m128d_from_lower2_i32_m128i(v.sse) }
1764 } else if #[cfg(target_feature="simd128")] {
1765 Self { simd: f64x2_convert_low_i32x4(v.simd)}
1766 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1767 Self { neon: unsafe { vcvtq_f64_s64(vmovl_s32(vget_low_s32(v.neon))) }}
1768 } else {
1769 Self { arr: [
1770 v.as_array()[0] as f64,
1771 v.as_array()[1] as f64,
1772 ]}
1773 }
1774 }
1775 }
1776}
1777
1778impl From<i32x4> for f64x2 {
1779 #[inline]
1782 fn from(v: i32x4) -> Self {
1783 Self::from_i32x4_lower2(v)
1784 }
1785}
1786
1787impl Not for f64x2 {
1788 type Output = Self;
1789 #[inline]
1790 fn not(self) -> Self {
1791 pick! {
1792 if #[cfg(target_feature="sse2")] {
1793 Self { sse: self.sse.not() }
1794 } else if #[cfg(target_feature="simd128")] {
1795 Self { simd: v128_not(self.simd) }
1796 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1797 unsafe {Self { neon: vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(self.neon))) }}
1798 } else {
1799 Self { arr: [
1800 f64::from_bits(!self.arr[0].to_bits()),
1801 f64::from_bits(!self.arr[1].to_bits()),
1802 ]}
1803 }
1804 }
1805 }
1806}