use std::borrow::Borrow;
use std::fmt::{self, Debug};
use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;
use crate::array::Array;
use crate::{
array::indexable::{AsIndexed, Indexable},
array::MutableArray,
datatypes::DataType,
error::{Error, Result},
};
use super::DictionaryKey;
#[derive(Copy, Clone, Default)]
pub struct PassthroughHasher(u64);
impl Hasher for PassthroughHasher {
#[inline]
fn write_u64(&mut self, value: u64) {
self.0 = value;
}
fn write(&mut self, _: &[u8]) {
unreachable!();
}
#[inline]
fn finish(&self) -> u64 {
self.0
}
}
#[derive(Clone)]
pub struct Hashed<K> {
hash: u64,
key: K,
}
#[inline]
fn ahash_hash<T: Hash + ?Sized>(value: &T) -> u64 {
let mut hasher = BuildHasherDefault::<ahash::AHasher>::default().build_hasher();
value.hash(&mut hasher);
hasher.finish()
}
impl<K> Hash for Hashed<K> {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.hash.hash(state)
}
}
#[derive(Clone)]
pub struct ValueMap<K: DictionaryKey, M: MutableArray> {
pub values: M,
pub map: HashMap<Hashed<K>, (), BuildHasherDefault<PassthroughHasher>>, }
impl<K: DictionaryKey, M: MutableArray> ValueMap<K, M> {
pub fn try_empty(values: M) -> Result<Self> {
if !values.is_empty() {
return Err(Error::InvalidArgumentError(
"initializing value map with non-empty values array".into(),
));
}
Ok(Self {
values,
map: HashMap::default(),
})
}
pub fn from_values(values: M) -> Result<Self>
where
M: Indexable,
M::Type: Eq + Hash,
{
let mut map = HashMap::<Hashed<K>, _, _>::with_capacity_and_hasher(
values.len(),
BuildHasherDefault::<PassthroughHasher>::default(),
);
for index in 0..values.len() {
let key = K::try_from(index).map_err(|_| Error::Overflow)?;
let value = unsafe { values.value_unchecked_at(index) };
let hash = ahash_hash(value.borrow());
match map.raw_entry_mut().from_hash(hash, |item| {
let stored_value = unsafe { values.value_unchecked_at(item.key.as_usize()) };
stored_value.borrow() == value.borrow()
}) {
RawEntryMut::Occupied(_) => {
return Err(Error::InvalidArgumentError(
"duplicate value in dictionary values array".into(),
))
}
RawEntryMut::Vacant(entry) => {
entry.insert_hashed_nocheck(hash, Hashed { hash, key }, ());
}
}
}
Ok(Self { values, map })
}
pub fn data_type(&self) -> &DataType {
self.values.data_type()
}
pub fn into_values(self) -> M {
self.values
}
pub fn take_into(&mut self) -> Box<dyn Array> {
let arr = self.values.as_box();
self.map.clear();
arr
}
#[inline]
pub fn values(&self) -> &M {
&self.values
}
pub fn try_push_valid<V>(
&mut self,
value: V,
mut push: impl FnMut(&mut M, V) -> Result<()>,
) -> Result<K>
where
M: Indexable,
V: AsIndexed<M>,
M::Type: Eq + Hash,
{
let hash = ahash_hash(value.as_indexed());
Ok(
match self.map.raw_entry_mut().from_hash(hash, |item| {
let index = unsafe { item.key.as_usize() };
let stored_value = unsafe { self.values.value_unchecked_at(index) };
stored_value.borrow() == value.as_indexed()
}) {
RawEntryMut::Occupied(entry) => entry.key().key,
RawEntryMut::Vacant(entry) => {
let index = self.values.len();
let key = K::try_from(index).map_err(|_| Error::Overflow)?;
entry.insert_hashed_nocheck(hash, Hashed { hash, key }, ()); push(&mut self.values, value)?;
debug_assert_eq!(self.values.len(), index + 1);
key
}
},
)
}
pub fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
}
}
impl<K: DictionaryKey, M: MutableArray> Debug for ValueMap<K, M> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.values.fmt(f)
}
}