pub struct Utf8Array<O: Offset> { /* private fields */ }
Expand description
A Utf8Array
is arrow’s semantic equivalent of an immutable Vec<Option<String>>
.
Cloning and slicing this struct is O(1)
.
§Example
use re_arrow2::bitmap::Bitmap;
use re_arrow2::buffer::Buffer;
use re_arrow2::array::Utf8Array;
let array = Utf8Array::<i32>::from([Some("hi"), None, Some("there")]);
assert_eq!(array.value(0), "hi");
assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some("hi"), None, Some("there")]);
assert_eq!(array.values_iter().collect::<Vec<_>>(), vec!["hi", "", "there"]);
// the underlying representation
assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
assert_eq!(array.values(), &Buffer::from(b"hithere".to_vec()));
assert_eq!(array.offsets().buffer(), &Buffer::from(vec![0, 2, 2, 2 + 5]));
§Generic parameter
The generic parameter Offset
can only be i32
or i64
and tradeoffs maximum array length with
memory usage:
- the sum of lengths of all elements cannot exceed
Offset::MAX
- the total size of the underlying data is
array.len() * size_of::<Offset>() + sum of lengths of all elements
§Safety
The following invariants hold:
- Two consecutives
offsets
casted (as
) tousize
are valid slices ofvalues
. - A slice of
values
taken from two consecutivesoffsets
is validutf8
. len
is equal tovalidity.len()
, when defined.
Implementations§
source§impl<O: Offset> Utf8Array<O>
impl<O: Offset> Utf8Array<O>
sourcepub fn try_new(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>
) -> Result<Self>
pub fn try_new( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Result<Self>
Returns a Utf8Array
created from its internal representation.
§Errors
This function returns an error iff:
- The last offset is not equal to the values’ length.
- the validity’s length is not equal to
offsets.len()
. - The
data_type
’scrate::datatypes::PhysicalType
is not equal to eitherUtf8
orLargeUtf8
. - The
values
between two consecutiveoffsets
are not valid utf8
§Implementation
This function is O(N)
- checking utf8 is O(N)
sourcepub fn from_slice<T: AsRef<str>, P: AsRef<[T]>>(slice: P) -> Self
pub fn from_slice<T: AsRef<str>, P: AsRef<[T]>>(slice: P) -> Self
Returns a Utf8Array
from a slice of &str
.
A convenience method that uses Self::from_trusted_len_values_iter
.
sourcepub fn from<T: AsRef<str>, P: AsRef<[Option<T>]>>(slice: P) -> Self
pub fn from<T: AsRef<str>, P: AsRef<[Option<T>]>>(slice: P) -> Self
Returns a new Utf8Array
from a slice of &str
.
A convenience method that uses Self::from_trusted_len_iter
.
sourcepub fn iter(&self) -> ZipValidity<&str, Utf8ValuesIter<'_, O>, BitmapIter<'_>> ⓘ
pub fn iter(&self) -> ZipValidity<&str, Utf8ValuesIter<'_, O>, BitmapIter<'_>> ⓘ
Returns an iterator of Option<&str>
sourcepub fn values_iter(&self) -> Utf8ValuesIter<'_, O>
pub fn values_iter(&self) -> Utf8ValuesIter<'_, O>
Returns an iterator of &str
sourcepub fn value(&self, i: usize) -> &str
pub fn value(&self, i: usize) -> &str
Returns the value of the element at index i
, ignoring the array’s validity.
§Panic
This function panics iff i >= self.len
.
sourcepub unsafe fn value_unchecked(&self, i: usize) -> &str
pub unsafe fn value_unchecked(&self, i: usize) -> &str
Returns the value of the element at index i
, ignoring the array’s validity.
§Safety
This function is safe iff i < self.len
.
sourcepub fn offsets(&self) -> &OffsetsBuffer<O>
pub fn offsets(&self) -> &OffsetsBuffer<O>
Returns the offsets of this Utf8Array
.
sourcepub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize)
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize)
sourcepub unsafe fn sliced_unchecked(self, offset: usize, length: usize) -> Self
pub unsafe fn sliced_unchecked(self, offset: usize, length: usize) -> Self
sourcepub fn with_validity(self, validity: Option<Bitmap>) -> Self
pub fn with_validity(self, validity: Option<Bitmap>) -> Self
sourcepub fn set_validity(&mut self, validity: Option<Bitmap>)
pub fn set_validity(&mut self, validity: Option<Bitmap>)
sourcepub fn boxed(self) -> Box<dyn Array>
pub fn boxed(self) -> Box<dyn Array>
Boxes this array into a Box<dyn Array>
.
sourcepub fn arced(self) -> Arc<dyn Array>
pub fn arced(self) -> Arc<dyn Array>
Arcs this array into a std::sync::Arc<dyn Array>
.
sourcepub fn into_inner(
self
) -> (DataType, OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>)
pub fn into_inner( self ) -> (DataType, OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>)
Returns its internal representation
sourcepub fn into_mut(self) -> Either<Self, MutableUtf8Array<O>> ⓘ
pub fn into_mut(self) -> Either<Self, MutableUtf8Array<O>> ⓘ
Try to convert this Utf8Array
to a MutableUtf8Array
sourcepub fn new_empty(data_type: DataType) -> Self
pub fn new_empty(data_type: DataType) -> Self
Returns a new empty Utf8Array
.
The array is guaranteed to have no elements nor validity.
sourcepub fn new_null(data_type: DataType, length: usize) -> Self
pub fn new_null(data_type: DataType, length: usize) -> Self
Returns a new Utf8Array
whose all slots are null / None
.
sourcepub fn default_data_type() -> DataType
pub fn default_data_type() -> DataType
Returns a default DataType
of this array, which depends on the generic parameter O
: DataType::Utf8
or DataType::LargeUtf8
sourcepub unsafe fn try_new_unchecked(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>
) -> Result<Self>
pub unsafe fn try_new_unchecked( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Result<Self>
Creates a new Utf8Array
without checking for offsets monotinicity nor utf8-validity
§Errors
This function returns an error iff:
- The last offset is not equal to the values’ length.
- the validity’s length is not equal to
offsets.len()
. - The
data_type
’scrate::datatypes::PhysicalType
is not equal to eitherUtf8
orLargeUtf8
.
§Safety
This function is unsound iff:
- The
values
between two consecutiveoffsets
are not valid utf8
§Implementation
This function is O(1)
sourcepub fn new(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>
) -> Self
pub fn new( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Self
Creates a new Utf8Array
.
§Panics
This function panics iff:
- The last offset is not equal to the values’ length.
- the validity’s length is not equal to
offsets.len()
. - The
data_type
’scrate::datatypes::PhysicalType
is not equal to eitherUtf8
orLargeUtf8
. - The
values
between two consecutiveoffsets
are not valid utf8
§Implementation
This function is O(N)
- checking utf8 is O(N)
sourcepub unsafe fn new_unchecked(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>
) -> Self
pub unsafe fn new_unchecked( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Self
Creates a new Utf8Array
without checking for offsets monotinicity.
§Errors
This function returns an error iff:
- The last offset is not equal to the values’ length.
- the validity’s length is not equal to
offsets.len()
. - The
data_type
’scrate::datatypes::PhysicalType
is not equal to eitherUtf8
orLargeUtf8
.
§Safety
This function is unsound iff:
- the offsets are not monotonically increasing
- The
values
between two consecutiveoffsets
are not valid utf8
§Implementation
This function is O(1)
sourcepub fn from_trusted_len_values_iter<T: AsRef<str>, I: TrustedLen<Item = T>>(
iterator: I
) -> Self
pub fn from_trusted_len_values_iter<T: AsRef<str>, I: TrustedLen<Item = T>>( iterator: I ) -> Self
Returns a (non-null) Utf8Array
created from a TrustedLen
of &str
.
§Implementation
This function is O(N)
sourcepub fn from_iter_values<T: AsRef<str>, I: Iterator<Item = T>>(
iterator: I
) -> Self
pub fn from_iter_values<T: AsRef<str>, I: Iterator<Item = T>>( iterator: I ) -> Self
sourcepub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
Creates a Utf8Array
from an iterator of trusted length.
§Safety
The iterator must be TrustedLen
.
I.e. that size_hint().1
correctly reports its length.
sourcepub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
Creates a Utf8Array
from an iterator of trusted length.
sourcepub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
iterator: I
) -> Result<Self, E>
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>( iterator: I ) -> Result<Self, E>
Creates a Utf8Array
from an falible iterator of trusted length.
§Safety
The iterator must be TrustedLen
.
I.e. that size_hint().1
correctly reports its length.
sourcepub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Self, E>
pub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Self, E>
Creates a Utf8Array
from an fallible iterator of trusted length.
Trait Implementations§
source§impl<O: Offset> Array for Utf8Array<O>
impl<O: Offset> Array for Utf8Array<O>
source§fn as_any(&self) -> &dyn Any
fn as_any(&self) -> &dyn Any
Any
, which enables downcasting to concrete types.source§fn as_any_mut(&mut self) -> &mut dyn Any
fn as_any_mut(&mut self) -> &mut dyn Any
Any
, which enables mutable downcasting to concrete types.source§fn len(&self) -> usize
fn len(&self) -> usize
Array
. Every array has a length corresponding to the number of
elements (slots).source§fn data_type(&self) -> &DataType
fn data_type(&self) -> &DataType
DataType
of the Array
. In combination with Array::as_any
, this can be
used to downcast trait objects (dyn Array
) to concrete arrays.source§unsafe fn slice_unchecked(&mut self, offset: usize, length: usize)
unsafe fn slice_unchecked(&mut self, offset: usize, length: usize)
source§unsafe fn is_null_unchecked(&self, i: usize) -> bool
unsafe fn is_null_unchecked(&self, i: usize) -> bool
i
is null. Read moresource§impl<'a, O: Offset> From<GrowableUtf8<'a, O>> for Utf8Array<O>
impl<'a, O: Offset> From<GrowableUtf8<'a, O>> for Utf8Array<O>
source§fn from(val: GrowableUtf8<'a, O>) -> Self
fn from(val: GrowableUtf8<'a, O>) -> Self
source§impl<O: Offset> From<MutableUtf8Array<O>> for Utf8Array<O>
impl<O: Offset> From<MutableUtf8Array<O>> for Utf8Array<O>
source§fn from(other: MutableUtf8Array<O>) -> Self
fn from(other: MutableUtf8Array<O>) -> Self
source§impl<O: Offset> From<MutableUtf8ValuesArray<O>> for Utf8Array<O>
impl<O: Offset> From<MutableUtf8ValuesArray<O>> for Utf8Array<O>
source§fn from(other: MutableUtf8ValuesArray<O>) -> Self
fn from(other: MutableUtf8ValuesArray<O>) -> Self
source§impl<O: Offset> GenericBinaryArray<O> for Utf8Array<O>
impl<O: Offset> GenericBinaryArray<O> for Utf8Array<O>
source§impl<'a, O: Offset> IntoIterator for &'a Utf8Array<O>
impl<'a, O: Offset> IntoIterator for &'a Utf8Array<O>
source§impl<O: Offset> PartialEq<&(dyn Array + 'static)> for Utf8Array<O>
impl<O: Offset> PartialEq<&(dyn Array + 'static)> for Utf8Array<O>
source§impl<O: Offset> PartialEq<Utf8Array<O>> for &dyn Array
impl<O: Offset> PartialEq<Utf8Array<O>> for &dyn Array
Auto Trait Implementations§
impl<O> Freeze for Utf8Array<O>
impl<O> RefUnwindSafe for Utf8Array<O>
impl<O> Send for Utf8Array<O>
impl<O> Sync for Utf8Array<O>
impl<O> Unpin for Utf8Array<O>
impl<O> UnwindSafe for Utf8Array<O>
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> IntoEither for T
impl<T> IntoEither for T
source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moresource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more