1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
//! Contains traits and implementations of multi-data used in SIMD.
//! The actual representation is driven by the feature flag `"simd"`, which, if set,
//! uses [`std::simd`].
use super::{days_ms, f16, i256, months_days_ns};
use super::{BitChunk, BitChunkIter, NativeType};
/// Describes the ability to convert itself from a [`BitChunk`].
pub trait FromMaskChunk<T> {
/// Convert itself from a slice.
fn from_chunk(v: T) -> Self;
}
/// A struct that lends itself well to be compiled leveraging SIMD
/// # Safety
/// The `NativeType` and the `NativeSimd` must have possible a matching alignment.
/// e.g. slicing `&[NativeType]` by `align_of<NativeSimd>()` must be properly aligned/safe.
pub unsafe trait NativeSimd: Sized + Default + Copy {
/// Number of lanes
const LANES: usize;
/// The [`NativeType`] of this struct. E.g. `f32` for a `NativeSimd = f32x16`.
type Native: NativeType;
/// The type holding bits for masks.
type Chunk: BitChunk;
/// Type used for masking.
type Mask: FromMaskChunk<Self::Chunk>;
/// Sets values to `default` based on `mask`.
fn select(self, mask: Self::Mask, default: Self) -> Self;
/// Convert itself from a slice.
/// # Panics
/// * iff `v.len()` != `T::LANES`
fn from_chunk(v: &[Self::Native]) -> Self;
/// creates a new Self from `v` by populating items from `v` up to its length.
/// Items from `v` at positions larger than the number of lanes are ignored;
/// remaining items are populated with `remaining`.
fn from_incomplete_chunk(v: &[Self::Native], remaining: Self::Native) -> Self;
/// Returns a tuple of 3 items whose middle item is itself, and the remaining
/// are the head and tail of the un-aligned parts.
fn align(values: &[Self::Native]) -> (&[Self::Native], &[Self], &[Self::Native]);
}
/// Trait implemented by some [`NativeType`] that have a SIMD representation.
pub trait Simd: NativeType {
/// The SIMD type associated with this trait.
/// This type supports SIMD operations
type Simd: NativeSimd<Native = Self>;
}
#[cfg(not(feature = "simd"))]
mod native;
#[cfg(not(feature = "simd"))]
pub use native::*;
#[cfg(feature = "simd")]
mod packed;
#[cfg(feature = "simd")]
pub use packed::*;
macro_rules! native_simd {
($name:tt, $type:ty, $lanes:expr, $mask:ty) => {
/// Multi-Data correspondence of the native type
#[allow(non_camel_case_types)]
#[derive(Copy, Clone)]
pub struct $name(pub [$type; $lanes]);
unsafe impl NativeSimd for $name {
const LANES: usize = $lanes;
type Native = $type;
type Chunk = $mask;
type Mask = $mask;
#[inline]
fn select(self, mask: $mask, default: Self) -> Self {
let mut reduced = default;
let iter = BitChunkIter::new(mask, Self::LANES);
for (i, b) in (0..Self::LANES).zip(iter) {
reduced[i] = if b { self[i] } else { reduced[i] };
}
reduced
}
#[inline]
fn from_chunk(v: &[$type]) -> Self {
($name)(v.try_into().unwrap())
}
#[inline]
fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self {
let mut a = [remaining; $lanes];
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
Self(a)
}
#[inline]
fn align(values: &[Self::Native]) -> (&[Self::Native], &[Self], &[Self::Native]) {
unsafe { values.align_to::<Self>() }
}
}
impl std::ops::Index<usize> for $name {
type Output = $type;
#[inline]
fn index(&self, index: usize) -> &Self::Output {
&self.0[index]
}
}
impl std::ops::IndexMut<usize> for $name {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.0[index]
}
}
impl Default for $name {
#[inline]
fn default() -> Self {
($name)([<$type>::default(); $lanes])
}
}
};
}
pub(super) use native_simd;
// Types do not have specific intrinsics and thus SIMD can't be specialized.
// Therefore, we can declare their MD representation as `[$t; 8]` irrespectively
// of how they are represented in the different channels.
native_simd!(f16x32, f16, 32, u32);
native_simd!(days_msx8, days_ms, 8, u8);
native_simd!(months_days_nsx8, months_days_ns, 8, u8);
native_simd!(i128x8, i128, 8, u8);
native_simd!(i256x8, i256, 8, u8);
// In the native implementation, a mask is 1 bit wide, as per AVX512.
impl<T: BitChunk> FromMaskChunk<T> for T {
#[inline]
fn from_chunk(v: T) -> Self {
v
}
}
macro_rules! native {
($type:ty, $simd:ty) => {
impl Simd for $type {
type Simd = $simd;
}
};
}
native!(u8, u8x64);
native!(u16, u16x32);
native!(u32, u32x16);
native!(u64, u64x8);
native!(i8, i8x64);
native!(i16, i16x32);
native!(i32, i32x16);
native!(i64, i64x8);
native!(f16, f16x32);
native!(f32, f32x16);
native!(f64, f64x8);
native!(i128, i128x8);
native!(i256, i256x8);
native!(days_ms, days_msx8);
native!(months_days_ns, months_days_nsx8);