use std::any::Any;
use std::sync::Arc;
use crate::error::Result;
use crate::{
bitmap::{Bitmap, MutableBitmap},
datatypes::DataType,
};
mod physical_binary;
pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
fn as_any(&self) -> &dyn Any;
fn as_any_mut(&mut self) -> &mut dyn Any;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn data_type(&self) -> &DataType;
fn validity(&self) -> Option<&Bitmap>;
#[inline]
fn null_count(&self) -> usize {
if self.data_type() == &DataType::Null {
return self.len();
};
self.validity()
.as_ref()
.map(|x| x.unset_bits())
.unwrap_or(0)
}
#[inline]
fn is_null(&self, i: usize) -> bool {
assert!(i < self.len());
unsafe { self.is_null_unchecked(i) }
}
#[inline]
unsafe fn is_null_unchecked(&self, i: usize) -> bool {
self.validity()
.as_ref()
.map(|x| !x.get_bit_unchecked(i))
.unwrap_or(false)
}
#[inline]
fn is_valid(&self, i: usize) -> bool {
!self.is_null(i)
}
fn slice(&mut self, offset: usize, length: usize);
unsafe fn slice_unchecked(&mut self, offset: usize, length: usize);
#[must_use]
fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array> {
let mut new = self.to_boxed();
new.slice(offset, length);
new
}
#[must_use]
unsafe fn sliced_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
let mut new = self.to_boxed();
new.slice_unchecked(offset, length);
new
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
fn to_boxed(&self) -> Box<dyn Array>;
}
dyn_clone::clone_trait_object!(Array);
pub(crate) trait Container {
fn with_capacity(capacity: usize) -> Self
where
Self: Sized;
}
pub trait MutableArray: std::fmt::Debug + Send + Sync {
fn data_type(&self) -> &DataType;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn validity(&self) -> Option<&MutableBitmap>;
fn as_box(&mut self) -> Box<dyn Array>;
fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
self.as_box().into()
}
fn as_any(&self) -> &dyn Any;
fn as_mut_any(&mut self) -> &mut dyn Any;
fn push_null(&mut self);
#[inline]
fn is_valid(&self, index: usize) -> bool {
self.validity()
.as_ref()
.map(|x| x.get(index))
.unwrap_or(true)
}
fn reserve(&mut self, additional: usize);
fn shrink_to_fit(&mut self);
}
impl MutableArray for Box<dyn MutableArray> {
fn len(&self) -> usize {
self.as_ref().len()
}
fn validity(&self) -> Option<&MutableBitmap> {
self.as_ref().validity()
}
fn as_box(&mut self) -> Box<dyn Array> {
self.as_mut().as_box()
}
fn as_arc(&mut self) -> Arc<dyn Array> {
self.as_mut().as_arc()
}
fn data_type(&self) -> &DataType {
self.as_ref().data_type()
}
fn as_any(&self) -> &dyn std::any::Any {
self.as_ref().as_any()
}
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self.as_mut().as_mut_any()
}
#[inline]
fn push_null(&mut self) {
self.as_mut().push_null()
}
fn shrink_to_fit(&mut self) {
self.as_mut().shrink_to_fit();
}
fn reserve(&mut self, additional: usize) {
self.as_mut().reserve(additional);
}
}
macro_rules! general_dyn {
($array:expr, $ty:ty, $f:expr) => {{
let array = $array.as_any().downcast_ref::<$ty>().unwrap();
($f)(array)
}};
}
macro_rules! fmt_dyn {
($array:expr, $ty:ty, $f:expr) => {{
let mut f = |x: &$ty| x.fmt($f);
general_dyn!($array, $ty, f)
}};
}
macro_rules! match_integer_type {(
$key_type:expr, | $_:tt $T:ident | $($body:tt)*
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::IntegerType::*;
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
UInt8 => __with_ty__! { u8 },
UInt16 => __with_ty__! { u16 },
UInt32 => __with_ty__! { u32 },
UInt64 => __with_ty__! { u64 },
}
})}
macro_rules! with_match_primitive_type {(
$key_type:expr, | $_:tt $T:ident | $($body:tt)*
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::{days_ms, months_days_ns, f16, i256};
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => __with_ty__! { days_ms },
MonthDayNano => __with_ty__! { months_days_ns },
UInt8 => __with_ty__! { u8 },
UInt16 => __with_ty__! { u16 },
UInt32 => __with_ty__! { u32 },
UInt64 => __with_ty__! { u64 },
Float16 => __with_ty__! { f16 },
Float32 => __with_ty__! { f32 },
Float64 => __with_ty__! { f64 },
}
})}
impl std::fmt::Debug for dyn Array + '_ {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use crate::datatypes::PhysicalType::*;
match self.data_type().to_physical_type() {
Null => fmt_dyn!(self, NullArray, f),
Boolean => fmt_dyn!(self, BooleanArray, f),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
fmt_dyn!(self, PrimitiveArray<$T>, f)
}),
Binary => fmt_dyn!(self, BinaryArray<i32>, f),
LargeBinary => fmt_dyn!(self, BinaryArray<i64>, f),
FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
List => fmt_dyn!(self, ListArray::<i32>, f),
LargeList => fmt_dyn!(self, ListArray::<i64>, f),
FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
Struct => fmt_dyn!(self, StructArray, f),
Union => fmt_dyn!(self, UnionArray, f),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
fmt_dyn!(self, DictionaryArray::<$T>, f)
})
}
Map => fmt_dyn!(self, MapArray, f),
}
}
}
pub fn new_empty_array(data_type: DataType) -> Box<dyn Array> {
use crate::datatypes::PhysicalType::*;
match data_type.to_physical_type() {
Null => Box::new(NullArray::new_empty(data_type)),
Boolean => Box::new(BooleanArray::new_empty(data_type)),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
Box::new(PrimitiveArray::<$T>::new_empty(data_type))
}),
Binary => Box::new(BinaryArray::<i32>::new_empty(data_type)),
LargeBinary => Box::new(BinaryArray::<i64>::new_empty(data_type)),
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(data_type)),
Utf8 => Box::new(Utf8Array::<i32>::new_empty(data_type)),
LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(data_type)),
List => Box::new(ListArray::<i32>::new_empty(data_type)),
LargeList => Box::new(ListArray::<i64>::new_empty(data_type)),
FixedSizeList => Box::new(FixedSizeListArray::new_empty(data_type)),
Struct => Box::new(StructArray::new_empty(data_type)),
Union => Box::new(UnionArray::new_empty(data_type)),
Map => Box::new(MapArray::new_empty(data_type)),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
Box::new(DictionaryArray::<$T>::new_empty(data_type))
})
}
}
}
pub fn new_null_array(data_type: DataType, length: usize) -> Box<dyn Array> {
use crate::datatypes::PhysicalType::*;
match data_type.to_physical_type() {
Null => Box::new(NullArray::new_null(data_type, length)),
Boolean => Box::new(BooleanArray::new_null(data_type, length)),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
Box::new(PrimitiveArray::<$T>::new_null(data_type, length))
}),
Binary => Box::new(BinaryArray::<i32>::new_null(data_type, length)),
LargeBinary => Box::new(BinaryArray::<i64>::new_null(data_type, length)),
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(data_type, length)),
Utf8 => Box::new(Utf8Array::<i32>::new_null(data_type, length)),
LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(data_type, length)),
List => Box::new(ListArray::<i32>::new_null(data_type, length)),
LargeList => Box::new(ListArray::<i64>::new_null(data_type, length)),
FixedSizeList => Box::new(FixedSizeListArray::new_null(data_type, length)),
Struct => Box::new(StructArray::new_null(data_type, length)),
Union => Box::new(UnionArray::new_null(data_type, length)),
Map => Box::new(MapArray::new_null(data_type, length)),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
Box::new(DictionaryArray::<$T>::new_null(data_type, length))
})
}
}
}
#[cfg(feature = "arrow")]
pub trait Arrow2Arrow: Array {
fn to_data(&self) -> arrow_data::ArrayData;
fn from_data(data: &arrow_data::ArrayData) -> Self;
}
#[cfg(feature = "arrow")]
macro_rules! to_data_dyn {
($array:expr, $ty:ty) => {{
let f = |x: &$ty| x.to_data();
general_dyn!($array, $ty, f)
}};
}
#[cfg(feature = "arrow")]
impl From<Box<dyn Array>> for arrow_array::ArrayRef {
fn from(value: Box<dyn Array>) -> Self {
value.as_ref().into()
}
}
#[cfg(feature = "arrow")]
impl From<&dyn Array> for arrow_array::ArrayRef {
fn from(value: &dyn Array) -> Self {
arrow_array::make_array(to_data(value))
}
}
#[cfg(feature = "arrow")]
impl From<arrow_array::ArrayRef> for Box<dyn Array> {
fn from(value: arrow_array::ArrayRef) -> Self {
value.as_ref().into()
}
}
#[cfg(feature = "arrow")]
impl From<&dyn arrow_array::Array> for Box<dyn Array> {
fn from(value: &dyn arrow_array::Array) -> Self {
from_data(&value.to_data())
}
}
#[cfg(feature = "arrow")]
pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData {
use crate::datatypes::PhysicalType::*;
match array.data_type().to_physical_type() {
Null => to_data_dyn!(array, NullArray),
Boolean => to_data_dyn!(array, BooleanArray),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
to_data_dyn!(array, PrimitiveArray<$T>)
}),
Binary => to_data_dyn!(array, BinaryArray<i32>),
LargeBinary => to_data_dyn!(array, BinaryArray<i64>),
FixedSizeBinary => to_data_dyn!(array, FixedSizeBinaryArray),
Utf8 => to_data_dyn!(array, Utf8Array::<i32>),
LargeUtf8 => to_data_dyn!(array, Utf8Array::<i64>),
List => to_data_dyn!(array, ListArray::<i32>),
LargeList => to_data_dyn!(array, ListArray::<i64>),
FixedSizeList => to_data_dyn!(array, FixedSizeListArray),
Struct => to_data_dyn!(array, StructArray),
Union => to_data_dyn!(array, UnionArray),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
to_data_dyn!(array, DictionaryArray::<$T>)
})
}
Map => to_data_dyn!(array, MapArray),
}
}
#[cfg(feature = "arrow")]
pub fn from_data(data: &arrow_data::ArrayData) -> Box<dyn Array> {
use crate::datatypes::PhysicalType::*;
let data_type: DataType = data.data_type().clone().into();
match data_type.to_physical_type() {
Null => Box::new(NullArray::from_data(data)),
Boolean => Box::new(BooleanArray::from_data(data)),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
Box::new(PrimitiveArray::<$T>::from_data(data))
}),
Binary => Box::new(BinaryArray::<i32>::from_data(data)),
LargeBinary => Box::new(BinaryArray::<i64>::from_data(data)),
FixedSizeBinary => Box::new(FixedSizeBinaryArray::from_data(data)),
Utf8 => Box::new(Utf8Array::<i32>::from_data(data)),
LargeUtf8 => Box::new(Utf8Array::<i64>::from_data(data)),
List => Box::new(ListArray::<i32>::from_data(data)),
LargeList => Box::new(ListArray::<i64>::from_data(data)),
FixedSizeList => Box::new(FixedSizeListArray::from_data(data)),
Struct => Box::new(StructArray::from_data(data)),
Union => Box::new(UnionArray::from_data(data)),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
Box::new(DictionaryArray::<$T>::from_data(data))
})
}
Map => Box::new(MapArray::from_data(data)),
}
}
macro_rules! clone_dyn {
($array:expr, $ty:ty) => {{
let f = |x: &$ty| Box::new(x.clone());
general_dyn!($array, $ty, f)
}};
}
macro_rules! impl_sliced {
() => {
#[inline]
#[must_use]
pub fn sliced(self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.sliced_unchecked(offset, length) }
}
#[inline]
#[must_use]
pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
self.slice_unchecked(offset, length);
self
}
};
}
macro_rules! impl_mut_validity {
() => {
#[must_use]
#[inline]
pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
self.set_validity(validity);
self
}
#[inline]
pub fn set_validity(&mut self, validity: Option<Bitmap>) {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity must be equal to the array's length")
}
self.validity = validity;
}
}
}
macro_rules! impl_mutable_array_mut_validity {
() => {
#[must_use]
#[inline]
pub fn with_validity(mut self, validity: Option<MutableBitmap>) -> Self {
self.set_validity(validity);
self
}
#[inline]
pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity must be equal to the array's length")
}
self.validity = validity;
}
#[inline]
pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
if let Some(validity) = std::mem::take(&mut self.validity) {
self.set_validity(Some(f(validity)))
}
}
}
}
macro_rules! impl_into_array {
() => {
pub fn boxed(self) -> Box<dyn Array> {
Box::new(self)
}
pub fn arced(self) -> std::sync::Arc<dyn Array> {
std::sync::Arc::new(self)
}
};
}
macro_rules! impl_common_array {
() => {
#[inline]
fn as_any(&self) -> &dyn std::any::Any {
self
}
#[inline]
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
self
}
#[inline]
fn len(&self) -> usize {
self.len()
}
#[inline]
fn data_type(&self) -> &DataType {
&self.data_type
}
#[inline]
fn slice(&mut self, offset: usize, length: usize) {
self.slice(offset, length);
}
#[inline]
unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
self.slice_unchecked(offset, length);
}
#[inline]
fn to_boxed(&self) -> Box<dyn Array> {
Box::new(self.clone())
}
};
}
pub fn clone(array: &dyn Array) -> Box<dyn Array> {
use crate::datatypes::PhysicalType::*;
match array.data_type().to_physical_type() {
Null => clone_dyn!(array, NullArray),
Boolean => clone_dyn!(array, BooleanArray),
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
clone_dyn!(array, PrimitiveArray<$T>)
}),
Binary => clone_dyn!(array, BinaryArray<i32>),
LargeBinary => clone_dyn!(array, BinaryArray<i64>),
FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
Utf8 => clone_dyn!(array, Utf8Array::<i32>),
LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
List => clone_dyn!(array, ListArray::<i32>),
LargeList => clone_dyn!(array, ListArray::<i64>),
FixedSizeList => clone_dyn!(array, FixedSizeListArray),
Struct => clone_dyn!(array, StructArray),
Union => clone_dyn!(array, UnionArray),
Map => clone_dyn!(array, MapArray),
Dictionary(key_type) => {
match_integer_type!(key_type, |$T| {
clone_dyn!(array, DictionaryArray::<$T>)
})
}
}
}
impl<'a> AsRef<(dyn Array + 'a)> for dyn Array {
fn as_ref(&self) -> &(dyn Array + 'a) {
self
}
}
mod binary;
mod boolean;
mod dictionary;
mod fixed_size_binary;
mod fixed_size_list;
mod list;
mod map;
mod null;
mod primitive;
mod specification;
mod struct_;
mod union;
mod utf8;
mod equal;
mod ffi;
mod fmt;
#[doc(hidden)]
pub mod indexable;
mod iterator;
pub mod growable;
pub mod ord;
pub(crate) use iterator::ArrayAccessor;
pub use iterator::ArrayValuesIter;
pub use equal::equal;
pub use fmt::{get_display, get_value_display};
pub use binary::{BinaryArray, BinaryValueIter, MutableBinaryArray, MutableBinaryValuesArray};
pub use boolean::{BooleanArray, MutableBooleanArray};
pub use dictionary::{DictionaryArray, DictionaryKey, MutableDictionaryArray};
pub use fixed_size_binary::{FixedSizeBinaryArray, MutableFixedSizeBinaryArray};
pub use fixed_size_list::{FixedSizeListArray, MutableFixedSizeListArray};
pub use list::{ListArray, ListValuesIter, MutableListArray};
pub use map::MapArray;
pub use null::{MutableNullArray, NullArray};
pub use primitive::*;
pub use struct_::{MutableStructArray, StructArray};
pub use union::UnionArray;
pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter};
pub(crate) use self::ffi::offset_buffers_children_dictionary;
pub(crate) use self::ffi::FromFfi;
pub(crate) use self::ffi::ToFfi;
pub trait TryExtend<A> {
fn try_extend<I: IntoIterator<Item = A>>(&mut self, iter: I) -> Result<()>;
}
pub trait TryPush<A> {
fn try_push(&mut self, item: A) -> Result<()>;
}
pub trait PushUnchecked<A> {
unsafe fn push_unchecked(&mut self, item: A);
}
pub trait TryExtendFromSelf {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()>;
}
pub unsafe trait GenericBinaryArray<O: crate::offset::Offset>: Array {
fn values(&self) -> &[u8];
fn offsets(&self) -> &[O];
}