use ahash::AHashMap;
use std::io::{Read, Seek};
use crate::array::Array;
use crate::chunk::Chunk;
use crate::datatypes::Schema;
use crate::error::Result;
use super::common::*;
use super::Dictionaries;
use super::{read_batch, read_file_dictionaries, FileMetadata};
pub struct FileReader<R: Read + Seek> {
reader: R,
metadata: FileMetadata,
dictionaries: Option<Dictionaries>,
current_block: usize,
projection: Option<(Vec<usize>, AHashMap<usize, usize>, Schema)>,
remaining: usize,
data_scratch: Vec<u8>,
message_scratch: Vec<u8>,
}
impl<R: Read + Seek> FileReader<R> {
pub fn new(
reader: R,
metadata: FileMetadata,
projection: Option<Vec<usize>>,
limit: Option<usize>,
) -> Self {
let projection = projection.map(|projection| {
let (p, h, fields) = prepare_projection(&metadata.schema.fields, projection);
let schema = Schema {
fields,
metadata: metadata.schema.metadata.clone(),
};
(p, h, schema)
});
Self {
reader,
metadata,
dictionaries: Default::default(),
projection,
remaining: limit.unwrap_or(usize::MAX),
current_block: 0,
data_scratch: Default::default(),
message_scratch: Default::default(),
}
}
pub fn schema(&self) -> &Schema {
self.projection
.as_ref()
.map(|x| &x.2)
.unwrap_or(&self.metadata.schema)
}
pub fn metadata(&self) -> &FileMetadata {
&self.metadata
}
pub fn into_inner(self) -> R {
self.reader
}
pub fn get_scratches(&mut self) -> (Vec<u8>, Vec<u8>) {
(
std::mem::take(&mut self.data_scratch),
std::mem::take(&mut self.message_scratch),
)
}
pub fn set_scratches(&mut self, scratches: (Vec<u8>, Vec<u8>)) {
(self.data_scratch, self.message_scratch) = scratches;
}
fn read_dictionaries(&mut self) -> Result<()> {
if self.dictionaries.is_none() {
self.dictionaries = Some(read_file_dictionaries(
&mut self.reader,
&self.metadata,
&mut self.data_scratch,
)?);
};
Ok(())
}
}
impl<R: Read + Seek> Iterator for FileReader<R> {
type Item = Result<Chunk<Box<dyn Array>>>;
fn next(&mut self) -> Option<Self::Item> {
if self.current_block == self.metadata.blocks.len() {
return None;
}
match self.read_dictionaries() {
Ok(_) => {}
Err(e) => return Some(Err(e)),
};
let block = self.current_block;
self.current_block += 1;
let chunk = read_batch(
&mut self.reader,
self.dictionaries.as_ref().unwrap(),
&self.metadata,
self.projection.as_ref().map(|x| x.0.as_ref()),
Some(self.remaining),
block,
&mut self.message_scratch,
&mut self.data_scratch,
);
self.remaining -= chunk.as_ref().map(|x| x.len()).unwrap_or_default();
let chunk = if let Some((_, map, _)) = &self.projection {
chunk.map(|chunk| apply_projection(chunk, map))
} else {
chunk
};
Some(chunk)
}
}