1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
//! The interface and implementations of LZ77 compression algorithm.
//!
//! LZ77 is a compression algorithm used in [DEFLATE](https://tools.ietf.org/html/rfc1951).
#![warn(missing_docs)]
#![cfg_attr(no_std, feature = "no_std")]
#[cfg(feature = "no_std")]
extern crate alloc;
pub use self::default::{DefaultLz77Encoder, DefaultLz77EncoderBuilder};
#[cfg(feature = "no_std")]
use alloc::vec::Vec;
#[cfg(feature = "no_std")]
use core2::io;
use rle_decode_fast::rle_decode;
#[cfg(not(feature = "no_std"))]
use std::io;
mod default;
/// Maximum length of sharable bytes in a pointer.
pub const MAX_LENGTH: u16 = 258;
/// Maximum backward distance of a pointer.
pub const MAX_DISTANCE: u16 = 32_768;
/// Maximum size of a sliding window.
pub const MAX_WINDOW_SIZE: u16 = MAX_DISTANCE;
/// A LZ77 encoded data.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Code {
/// Literal byte.
Literal(u8),
/// Backward pointer to shared data.
Pointer {
/// Length of the shared data.
/// The values must be limited to [`MAX_LENGTH`].
length: u16,
/// Distance between current position and start position of the shared data.
/// The values must be limited to [`MAX_DISTANCE`].
backward_distance: u16,
},
}
/// Compression level.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum CompressionLevel {
/// No compression.
None,
/// Best speed.
Fast,
/// Balanced between speed and size.
Balance,
/// Best compression.
Best,
}
/// The [`Sink`] trait represents a consumer of LZ77 encoded data.
pub trait Sink {
/// Consumes a LZ77 encoded `Code`.
fn consume(&mut self, code: Code);
}
impl<'a, T> Sink for &'a mut T
where
T: Sink,
{
fn consume(&mut self, code: Code) {
(*self).consume(code);
}
}
impl<T> Sink for Vec<T>
where
T: From<Code>,
{
fn consume(&mut self, code: Code) {
self.push(T::from(code));
}
}
/// The [`Lz77Encode`] trait defines the interface of LZ77 encoding algorithm.
pub trait Lz77Encode {
/// Encodes a buffer and writes result LZ77 codes to `sink`.
fn encode<S>(&mut self, buf: &[u8], sink: S)
where
S: Sink;
/// Flushes the encoder, ensuring that all intermediately buffered codes are consumed by `sink`.
fn flush<S>(&mut self, sink: S)
where
S: Sink;
/// Returns the compression level of the encoder.
///
/// If the implementation is omitted, [`CompressionLevel::Balance`] will be returned.
fn compression_level(&self) -> CompressionLevel {
CompressionLevel::Balance
}
/// Returns the window size of the encoder.
///
/// If the implementation is omitted, [`MAX_WINDOW_SIZE`] will be returned.
fn window_size(&self) -> u16 {
MAX_WINDOW_SIZE
}
}
/// A no compression implementation of [`Lz77Encode`] trait.
#[derive(Debug, Default)]
pub struct NoCompressionLz77Encoder;
impl NoCompressionLz77Encoder {
/// Makes a new encoder instance.
///
/// # Examples
/// ```
/// use libflate::deflate;
/// use libflate::lz77::{Lz77Encode, NoCompressionLz77Encoder, CompressionLevel};
///
/// let lz77 = NoCompressionLz77Encoder::new();
/// assert_eq!(lz77.compression_level(), CompressionLevel::None);
///
/// let options = deflate::EncodeOptions::with_lz77(lz77);
/// let _deflate = deflate::Encoder::with_options(Vec::new(), options);
/// ```
pub fn new() -> Self {
NoCompressionLz77Encoder
}
}
impl Lz77Encode for NoCompressionLz77Encoder {
fn encode<S>(&mut self, buf: &[u8], mut sink: S)
where
S: Sink,
{
for c in buf.iter().cloned().map(Code::Literal) {
sink.consume(c);
}
}
#[allow(unused_variables)]
fn flush<S>(&mut self, sink: S)
where
S: Sink,
{
}
fn compression_level(&self) -> CompressionLevel {
CompressionLevel::None
}
}
/// LZ77 decoder.
#[derive(Debug, Default)]
pub struct Lz77Decoder {
buffer: Vec<u8>,
offset: usize,
}
impl Lz77Decoder {
/// Makes a new [`Lz77Decoder`] instance.
pub fn new() -> Self {
Self::default()
}
/// Decodes a [`Code`].
///
/// The decoded bytes are appended to the buffer of [`Lz77Decoder`].
#[inline]
pub fn decode(&mut self, code: Code) -> io::Result<()> {
match code {
Code::Literal(b) => {
self.buffer.push(b);
}
Code::Pointer {
length,
backward_distance,
} => {
if self.buffer.len() < backward_distance as usize {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
#[cfg(not(feature = "no_std"))]
format!(
"Too long backword reference: buffer.len={}, distance={}",
self.buffer.len(),
backward_distance
),
#[cfg(feature = "no_std")]
"Too long backword reference",
));
}
rle_decode(
&mut self.buffer,
usize::from(backward_distance),
usize::from(length),
);
}
}
Ok(())
}
/// Appends the bytes read from `reader` to the buffer of [`Lz77Decoder`].
pub fn extend_from_reader<R: io::Read>(&mut self, mut reader: R) -> io::Result<usize> {
reader.read_to_end(&mut self.buffer)
}
/// Appends the given bytes to the buffer of [`Lz77Decoder`].
pub fn extend_from_slice(&mut self, buf: &[u8]) {
self.buffer.extend_from_slice(buf);
self.offset += buf.len();
}
/// Clears the buffer of [`Lz77Decoder`].
pub fn clear(&mut self) {
self.buffer.clear();
self.offset = 0;
}
/// Returns the buffer of [`Lz77Decoder`].
#[inline]
pub fn buffer(&self) -> &[u8] {
&self.buffer[self.offset..]
}
fn truncate_old_buffer(&mut self) {
if self.buffer().is_empty() && self.buffer.len() > MAX_DISTANCE as usize * 4 {
let old_len = self.buffer.len();
let new_len = MAX_DISTANCE as usize;
{
// isolation to please borrow checker
let (dst, src) = self.buffer.split_at_mut(old_len - new_len);
dst[..new_len].copy_from_slice(src);
}
self.buffer.truncate(new_len);
self.offset = new_len;
}
}
}
impl io::Read for Lz77Decoder {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let copy_size = std::cmp::min(buf.len(), self.buffer.len() - self.offset);
buf[..copy_size].copy_from_slice(&self.buffer[self.offset..][..copy_size]);
self.offset += copy_size;
self.truncate_old_buffer();
Ok(copy_size)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "no_std")]
use alloc::vec::Vec;
#[cfg(feature = "no_std")]
use core2::io::Read as _;
#[cfg(not(feature = "no_std"))]
use std::io::Read as _;
#[test]
fn encoder_and_decoder_works() {
let mut codes = Vec::new();
let mut encoder = DefaultLz77Encoder::new();
encoder.encode(b"hello world!", &mut codes);
encoder.flush(&mut codes);
assert!(!codes.is_empty());
let mut decoder = Lz77Decoder::new();
for code in codes {
decoder.decode(code).unwrap();
}
assert_eq!(decoder.buffer(), b"hello world!");
let mut decoded = Vec::new();
decoder.read_to_end(&mut decoded).unwrap();
assert_eq!(decoded, b"hello world!");
assert!(decoder.buffer().is_empty());
}
}