Vendor things

This commit is contained in:
John Doty 2024-03-08 11:03:01 -08:00
parent 5deceec006
commit 977e3c17e5
19434 changed files with 10682014 additions and 0 deletions

View file

@ -0,0 +1,58 @@
//! Buffer wrappers implementing default so we can allocate the buffers with `Box::default()`
//! to avoid stack copies. Box::new() doesn't at the moment, and using a vec means we would lose
//! static length info.
use crate::deflate::core::{LZ_DICT_SIZE, MAX_MATCH_LEN};
/// Size of the buffer of lz77 encoded data.
pub const LZ_CODE_BUF_SIZE: usize = 64 * 1024;
/// Size of the output buffer.
pub const OUT_BUF_SIZE: usize = (LZ_CODE_BUF_SIZE * 13) / 10;
pub const LZ_DICT_FULL_SIZE: usize = LZ_DICT_SIZE + MAX_MATCH_LEN - 1 + 1;
/// Size of hash values in the hash chains.
pub const LZ_HASH_BITS: i32 = 15;
/// How many bits to shift when updating the current hash value.
pub const LZ_HASH_SHIFT: i32 = (LZ_HASH_BITS + 2) / 3;
/// Size of the chained hash tables.
pub const LZ_HASH_SIZE: usize = 1 << LZ_HASH_BITS;
#[inline]
pub fn update_hash(current_hash: u16, byte: u8) -> u16 {
((current_hash << LZ_HASH_SHIFT) ^ u16::from(byte)) & (LZ_HASH_SIZE as u16 - 1)
}
pub struct HashBuffers {
pub dict: [u8; LZ_DICT_FULL_SIZE],
pub next: [u16; LZ_DICT_SIZE],
pub hash: [u16; LZ_DICT_SIZE],
}
impl HashBuffers {
#[inline]
pub fn reset(&mut self) {
*self = HashBuffers::default();
}
}
impl Default for HashBuffers {
fn default() -> HashBuffers {
HashBuffers {
dict: [0; LZ_DICT_FULL_SIZE],
next: [0; LZ_DICT_SIZE],
hash: [0; LZ_DICT_SIZE],
}
}
}
pub struct LocalBuf {
pub b: [u8; OUT_BUF_SIZE],
}
impl Default for LocalBuf {
fn default() -> LocalBuf {
LocalBuf {
b: [0; OUT_BUF_SIZE],
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,226 @@
//! This module contains functionality for compression.
use crate::alloc::vec;
use crate::alloc::vec::Vec;
mod buffer;
pub mod core;
pub mod stream;
use self::core::*;
/// How much processing the compressor should do to compress the data.
/// `NoCompression` and `Bestspeed` have special meanings, the other levels determine the number
/// of checks for matches in the hash chains and whether to use lazy or greedy parsing.
#[repr(i32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum CompressionLevel {
/// Don't do any compression, only output uncompressed blocks.
NoCompression = 0,
/// Fast compression. Uses a special compression routine that is optimized for speed.
BestSpeed = 1,
/// Slow/high compression. Do a lot of checks to try to find good matches.
BestCompression = 9,
/// Even more checks, can be very slow.
UberCompression = 10,
/// Default compromise between speed and compression.
DefaultLevel = 6,
/// Use the default compression level.
DefaultCompression = -1,
}
// Missing safe rust analogue (this and mem-to-mem are quite similar)
/*
fn tdefl_compress(
d: Option<&mut CompressorOxide>,
in_buf: *const c_void,
in_size: Option<&mut usize>,
out_buf: *mut c_void,
out_size: Option<&mut usize>,
flush: TDEFLFlush,
) -> TDEFLStatus {
let res = match d {
None => {
in_size.map(|size| *size = 0);
out_size.map(|size| *size = 0);
(TDEFLStatus::BadParam, 0, 0)
},
Some(compressor) => {
let callback_res = CallbackOxide::new(
compressor.callback_func.clone(),
in_buf,
in_size,
out_buf,
out_size,
);
if let Ok(mut callback) = callback_res {
let res = compress(compressor, &mut callback, flush);
callback.update_size(Some(res.1), Some(res.2));
res
} else {
(TDEFLStatus::BadParam, 0, 0)
}
}
};
res.0
}*/
// Missing safe rust analogue
/*
fn tdefl_init(
d: Option<&mut CompressorOxide>,
put_buf_func: PutBufFuncPtr,
put_buf_user: *mut c_void,
flags: c_int,
) -> TDEFLStatus {
if let Some(d) = d {
*d = CompressorOxide::new(
put_buf_func.map(|func|
CallbackFunc { put_buf_func: func, put_buf_user: put_buf_user }
),
flags as u32,
);
TDEFLStatus::Okay
} else {
TDEFLStatus::BadParam
}
}*/
// Missing safe rust analogue (though maybe best served by flate2 front-end instead)
/*
fn tdefl_compress_mem_to_output(
buf: *const c_void,
buf_len: usize,
put_buf_func: PutBufFuncPtr,
put_buf_user: *mut c_void,
flags: c_int,
) -> bool*/
// Missing safe Rust analogue
/*
fn tdefl_compress_mem_to_mem(
out_buf: *mut c_void,
out_buf_len: usize,
src_buf: *const c_void,
src_buf_len: usize,
flags: c_int,
) -> usize*/
/// Compress the input data to a vector, using the specified compression level (0-10).
pub fn compress_to_vec(input: &[u8], level: u8) -> Vec<u8> {
compress_to_vec_inner(input, level, 0, 0)
}
/// Compress the input data to a vector, using the specified compression level (0-10), and with a
/// zlib wrapper.
pub fn compress_to_vec_zlib(input: &[u8], level: u8) -> Vec<u8> {
compress_to_vec_inner(input, level, 1, 0)
}
/// Simple function to compress data to a vec.
fn compress_to_vec_inner(mut input: &[u8], level: u8, window_bits: i32, strategy: i32) -> Vec<u8> {
// The comp flags function sets the zlib flag if the window_bits parameter is > 0.
let flags = create_comp_flags_from_zip_params(level.into(), window_bits, strategy);
let mut compressor = CompressorOxide::new(flags);
let mut output = vec![0; ::core::cmp::max(input.len() / 2, 2)];
let mut out_pos = 0;
loop {
let (status, bytes_in, bytes_out) = compress(
&mut compressor,
input,
&mut output[out_pos..],
TDEFLFlush::Finish,
);
out_pos += bytes_out;
match status {
TDEFLStatus::Done => {
output.truncate(out_pos);
break;
}
TDEFLStatus::Okay if bytes_in <= input.len() => {
input = &input[bytes_in..];
// We need more space, so resize the vector.
if output.len().saturating_sub(out_pos) < 30 {
output.resize(output.len() * 2, 0)
}
}
// Not supposed to happen unless there is a bug.
_ => panic!("Bug! Unexpectedly failed to compress!"),
}
}
output
}
#[cfg(test)]
mod test {
use super::{compress_to_vec, compress_to_vec_inner, CompressionStrategy};
use crate::inflate::decompress_to_vec;
use alloc::vec;
/// Test deflate example.
///
/// Check if the encoder produces the same code as the example given by Mark Adler here:
/// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203
#[test]
fn compress_small() {
let test_data = b"Deflate late";
let check = [
0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00,
];
let res = compress_to_vec(test_data, 1);
assert_eq!(&check[..], res.as_slice());
let res = compress_to_vec(test_data, 9);
assert_eq!(&check[..], res.as_slice());
}
#[test]
fn compress_huff_only() {
let test_data = b"Deflate late";
let res = compress_to_vec_inner(test_data, 1, 0, CompressionStrategy::HuffmanOnly as i32);
let d = decompress_to_vec(res.as_slice()).expect("Failed to decompress!");
assert_eq!(test_data, d.as_slice());
}
/// Test that a raw block compresses fine.
#[test]
fn compress_raw() {
let text = b"Hello, zlib!";
let encoded = {
let len = text.len();
let notlen = !len;
let mut encoded = vec![
1,
len as u8,
(len >> 8) as u8,
notlen as u8,
(notlen >> 8) as u8,
];
encoded.extend_from_slice(&text[..]);
encoded
};
let res = compress_to_vec(text, 0);
assert_eq!(encoded, res.as_slice());
}
#[test]
fn short() {
let test_data = [10, 10, 10, 10, 10, 55];
let c = compress_to_vec(&test_data, 9);
let d = decompress_to_vec(c.as_slice()).expect("Failed to decompress!");
assert_eq!(&test_data, d.as_slice());
// Check that a static block is used here, rather than a raw block
// , so the data is actually compressed.
// (The optimal compressed length would be 5, but neither miniz nor zlib manages that either
// as neither checks matches against the byte at index 0.)
assert!(c.len() <= 6);
}
}

View file

@ -0,0 +1,121 @@
//! Extra streaming compression functionality.
//!
//! As of now this is mainly intended for use to build a higher-level wrapper.
//!
//! There is no DeflateState as the needed state is contained in the compressor struct itself.
use crate::deflate::core::{compress, CompressorOxide, TDEFLFlush, TDEFLStatus};
use crate::{MZError, MZFlush, MZStatus, StreamResult};
/// Try to compress from input to output with the given [`CompressorOxide`].
///
/// # Errors
///
/// Returns [`MZError::Buf`] If the size of the `output` slice is empty or no progress was made due
/// to lack of expected input data, or if called without [`MZFlush::Finish`] after the compression
/// was already finished.
///
/// Returns [`MZError::Param`] if the compressor parameters are set wrong.
///
/// Returns [`MZError::Stream`] when lower-level decompressor returns a
/// [`TDEFLStatus::PutBufFailed`]; may not actually be possible.
pub fn deflate(
compressor: &mut CompressorOxide,
input: &[u8],
output: &mut [u8],
flush: MZFlush,
) -> StreamResult {
if output.is_empty() {
return StreamResult::error(MZError::Buf);
}
if compressor.prev_return_status() == TDEFLStatus::Done {
return if flush == MZFlush::Finish {
StreamResult {
bytes_written: 0,
bytes_consumed: 0,
status: Ok(MZStatus::StreamEnd),
}
} else {
StreamResult::error(MZError::Buf)
};
}
let mut bytes_written = 0;
let mut bytes_consumed = 0;
let mut next_in = input;
let mut next_out = output;
let status = loop {
let in_bytes;
let out_bytes;
let defl_status = {
let res = compress(compressor, next_in, next_out, TDEFLFlush::from(flush));
in_bytes = res.1;
out_bytes = res.2;
res.0
};
next_in = &next_in[in_bytes..];
next_out = &mut next_out[out_bytes..];
bytes_consumed += in_bytes;
bytes_written += out_bytes;
// Check if we are done, or compression failed.
match defl_status {
TDEFLStatus::BadParam => break Err(MZError::Param),
// Don't think this can happen as we're not using a custom callback.
TDEFLStatus::PutBufFailed => break Err(MZError::Stream),
TDEFLStatus::Done => break Ok(MZStatus::StreamEnd),
_ => (),
};
// All the output space was used, so wait for more.
if next_out.is_empty() {
break Ok(MZStatus::Ok);
}
if next_in.is_empty() && (flush != MZFlush::Finish) {
let total_changed = bytes_written > 0 || bytes_consumed > 0;
break if (flush != MZFlush::None) || total_changed {
// We wrote or consumed something, and/or did a flush (sync/partial etc.).
Ok(MZStatus::Ok)
} else {
// No more input data, not flushing, and nothing was consumed or written,
// so couldn't make any progress.
Err(MZError::Buf)
};
}
};
StreamResult {
bytes_consumed,
bytes_written,
status,
}
}
#[cfg(test)]
mod test {
use super::deflate;
use crate::deflate::CompressorOxide;
use crate::inflate::decompress_to_vec_zlib;
use crate::{MZFlush, MZStatus};
use alloc::boxed::Box;
use alloc::vec;
#[test]
fn test_state() {
let data = b"Hello zlib!";
let mut compressed = vec![0; 50];
let mut compressor = Box::<CompressorOxide>::default();
let res = deflate(&mut compressor, data, &mut compressed, MZFlush::Finish);
let status = res.status.expect("Failed to compress!");
let decomp =
decompress_to_vec_zlib(&compressed).expect("Failed to decompress compressed data");
assert_eq!(status, MZStatus::StreamEnd);
assert_eq!(decomp[..], data[..]);
assert_eq!(res.bytes_consumed, data.len());
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,343 @@
//! This module contains functionality for decompression.
#[cfg(feature = "with-alloc")]
use crate::alloc::{boxed::Box, vec, vec::Vec};
use ::core::usize;
#[cfg(all(feature = "std", feature = "with-alloc"))]
use std::error::Error;
pub mod core;
mod output_buffer;
pub mod stream;
use self::core::*;
const TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS: i32 = -4;
const TINFL_STATUS_BAD_PARAM: i32 = -3;
const TINFL_STATUS_ADLER32_MISMATCH: i32 = -2;
const TINFL_STATUS_FAILED: i32 = -1;
const TINFL_STATUS_DONE: i32 = 0;
const TINFL_STATUS_NEEDS_MORE_INPUT: i32 = 1;
const TINFL_STATUS_HAS_MORE_OUTPUT: i32 = 2;
/// Return status codes.
#[repr(i8)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum TINFLStatus {
/// More input data was expected, but the caller indicated that there was no more data, so the
/// input stream is likely truncated.
///
/// This can't happen if you have provided the
/// [`TINFL_FLAG_HAS_MORE_INPUT`][core::inflate_flags::TINFL_FLAG_HAS_MORE_INPUT] flag to the
/// decompression. By setting that flag, you indicate more input exists but is not provided,
/// and so reaching the end of the input data without finding the end of the compressed stream
/// would instead return a [`NeedsMoreInput`][Self::NeedsMoreInput] status.
FailedCannotMakeProgress = TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS as i8,
/// The output buffer is an invalid size; consider the `flags` parameter.
BadParam = TINFL_STATUS_BAD_PARAM as i8,
/// The decompression went fine, but the adler32 checksum did not match the one
/// provided in the header.
Adler32Mismatch = TINFL_STATUS_ADLER32_MISMATCH as i8,
/// Failed to decompress due to invalid data.
Failed = TINFL_STATUS_FAILED as i8,
/// Finished decompression without issues.
///
/// This indicates the end of the compressed stream has been reached.
Done = TINFL_STATUS_DONE as i8,
/// The decompressor needs more input data to continue decompressing.
///
/// This occurs when there's no more consumable input, but the end of the stream hasn't been
/// reached, and you have supplied the
/// [`TINFL_FLAG_HAS_MORE_INPUT`][core::inflate_flags::TINFL_FLAG_HAS_MORE_INPUT] flag to the
/// decompressor. Had you not supplied that flag (which would mean you were asserting that you
/// believed all the data was available) you would have gotten a
/// [`FailedCannotMakeProcess`][Self::FailedCannotMakeProgress] instead.
NeedsMoreInput = TINFL_STATUS_NEEDS_MORE_INPUT as i8,
/// There is still pending data that didn't fit in the output buffer.
HasMoreOutput = TINFL_STATUS_HAS_MORE_OUTPUT as i8,
}
impl TINFLStatus {
pub fn from_i32(value: i32) -> Option<TINFLStatus> {
use self::TINFLStatus::*;
match value {
TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS => Some(FailedCannotMakeProgress),
TINFL_STATUS_BAD_PARAM => Some(BadParam),
TINFL_STATUS_ADLER32_MISMATCH => Some(Adler32Mismatch),
TINFL_STATUS_FAILED => Some(Failed),
TINFL_STATUS_DONE => Some(Done),
TINFL_STATUS_NEEDS_MORE_INPUT => Some(NeedsMoreInput),
TINFL_STATUS_HAS_MORE_OUTPUT => Some(HasMoreOutput),
_ => None,
}
}
}
/// Struct return when decompress_to_vec functions fail.
#[cfg(feature = "with-alloc")]
#[derive(Debug)]
pub struct DecompressError {
/// Decompressor status on failure. See [TINFLStatus] for details.
pub status: TINFLStatus,
/// The currently decompressed data if any.
pub output: Vec<u8>,
}
#[cfg(feature = "with-alloc")]
impl alloc::fmt::Display for DecompressError {
#[cold]
fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
f.write_str(match self.status {
TINFLStatus::FailedCannotMakeProgress => "Truncated input stream",
TINFLStatus::BadParam => "Invalid output buffer size",
TINFLStatus::Adler32Mismatch => "Adler32 checksum mismatch",
TINFLStatus::Failed => "Invalid input data",
TINFLStatus::Done => "", // Unreachable
TINFLStatus::NeedsMoreInput => "Truncated input stream",
TINFLStatus::HasMoreOutput => "Output size exceeded the specified limit",
})
}
}
/// Implement Error trait only if std feature is requested as it requires std.
#[cfg(all(feature = "std", feature = "with-alloc"))]
impl Error for DecompressError {}
#[cfg(feature = "with-alloc")]
fn decompress_error(status: TINFLStatus, output: Vec<u8>) -> Result<Vec<u8>, DecompressError> {
Err(DecompressError { status, output })
}
/// Decompress the deflate-encoded data in `input` to a vector.
///
/// NOTE: This function will not bound the output, so if the output is large enough it can result in an out of memory error.
/// It is therefore suggested to not use this for anything other than test programs, use the functions with a specified limit, or
/// ideally streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead.
///
/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] containing the status and so far decompressed data if any on failure.
#[inline]
#[cfg(feature = "with-alloc")]
pub fn decompress_to_vec(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
decompress_to_vec_inner(input, 0, usize::max_value())
}
/// Decompress the deflate-encoded data (with a zlib wrapper) in `input` to a vector.
///
/// NOTE: This function will not bound the output, so if the output is large enough it can result in an out of memory error.
/// It is therefore suggested to not use this for anything other than test programs, use the functions with a specified limit, or
/// ideally streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead.
///
/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] containing the status and so far decompressed data if any on failure.
#[inline]
#[cfg(feature = "with-alloc")]
pub fn decompress_to_vec_zlib(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
decompress_to_vec_inner(
input,
inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER,
usize::max_value(),
)
}
/// Decompress the deflate-encoded data in `input` to a vector.
///
/// The vector is grown to at most `max_size` bytes; if the data does not fit in that size,
/// the error [struct][DecompressError] will contain the status [`TINFLStatus::HasMoreOutput`] and the data that was decompressed on failure.
///
/// As this function tries to decompress everything in one go, it's not ideal for general use outside of tests or where the output size is expected to be small.
/// It is suggested to use streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead.
///
/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] on failure.
#[inline]
#[cfg(feature = "with-alloc")]
pub fn decompress_to_vec_with_limit(
input: &[u8],
max_size: usize,
) -> Result<Vec<u8>, DecompressError> {
decompress_to_vec_inner(input, 0, max_size)
}
/// Decompress the deflate-encoded data (with a zlib wrapper) in `input` to a vector.
/// The vector is grown to at most `max_size` bytes; if the data does not fit in that size,
/// the error [struct][DecompressError] will contain the status [`TINFLStatus::HasMoreOutput`] and the data that was decompressed on failure.
///
/// As this function tries to decompress everything in one go, it's not ideal for general use outside of tests or where the output size is expected to be small.
/// It is suggested to use streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead.
///
/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] on failure.
#[inline]
#[cfg(feature = "with-alloc")]
pub fn decompress_to_vec_zlib_with_limit(
input: &[u8],
max_size: usize,
) -> Result<Vec<u8>, DecompressError> {
decompress_to_vec_inner(input, inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER, max_size)
}
/// Backend of various to-[`Vec`] decompressions.
///
/// Returns [`Vec`] of decompressed data on success and the [error struct][DecompressError] with details on failure.
#[cfg(feature = "with-alloc")]
fn decompress_to_vec_inner(
mut input: &[u8],
flags: u32,
max_output_size: usize,
) -> Result<Vec<u8>, DecompressError> {
let flags = flags | inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
let mut ret: Vec<u8> = vec![0; input.len().saturating_mul(2).min(max_output_size)];
let mut decomp = Box::<DecompressorOxide>::default();
let mut out_pos = 0;
loop {
// Wrap the whole output slice so we know we have enough of the
// decompressed data for matches.
let (status, in_consumed, out_consumed) =
decompress(&mut decomp, input, &mut ret, out_pos, flags);
out_pos += out_consumed;
match status {
TINFLStatus::Done => {
ret.truncate(out_pos);
return Ok(ret);
}
TINFLStatus::HasMoreOutput => {
// in_consumed is not expected to be out of bounds,
// but the check eliminates a panicking code path
if in_consumed > input.len() {
return decompress_error(TINFLStatus::HasMoreOutput, ret);
}
input = &input[in_consumed..];
// if the buffer has already reached the size limit, return an error
if ret.len() >= max_output_size {
return decompress_error(TINFLStatus::HasMoreOutput, ret);
}
// calculate the new length, capped at `max_output_size`
let new_len = ret.len().saturating_mul(2).min(max_output_size);
ret.resize(new_len, 0);
}
_ => return decompress_error(status, ret),
}
}
}
/// Decompress one or more source slices from an iterator into the output slice.
///
/// * On success, returns the number of bytes that were written.
/// * On failure, returns the failure status code.
///
/// This will fail if the output buffer is not large enough, but in that case
/// the output buffer will still contain the partial decompression.
///
/// * `out` the output buffer.
/// * `it` the iterator of input slices.
/// * `zlib_header` if the first slice out of the iterator is expected to have a
/// Zlib header. Otherwise the slices are assumed to be the deflate data only.
/// * `ignore_adler32` if the adler32 checksum should be calculated or not.
pub fn decompress_slice_iter_to_slice<'out, 'inp>(
out: &'out mut [u8],
it: impl Iterator<Item = &'inp [u8]>,
zlib_header: bool,
ignore_adler32: bool,
) -> Result<usize, TINFLStatus> {
use self::core::inflate_flags::*;
let mut it = it.peekable();
let r = &mut DecompressorOxide::new();
let mut out_pos = 0;
while let Some(in_buf) = it.next() {
let has_more = it.peek().is_some();
let flags = {
let mut f = TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
if zlib_header {
f |= TINFL_FLAG_PARSE_ZLIB_HEADER;
}
if ignore_adler32 {
f |= TINFL_FLAG_IGNORE_ADLER32;
}
if has_more {
f |= TINFL_FLAG_HAS_MORE_INPUT;
}
f
};
let (status, _input_read, bytes_written) = decompress(r, in_buf, out, out_pos, flags);
out_pos += bytes_written;
match status {
TINFLStatus::NeedsMoreInput => continue,
TINFLStatus::Done => return Ok(out_pos),
e => return Err(e),
}
}
// If we ran out of source slices without getting a `Done` from the
// decompression we can call it a failure.
Err(TINFLStatus::FailedCannotMakeProgress)
}
#[cfg(all(test, feature = "with-alloc"))]
mod test {
use super::{
decompress_slice_iter_to_slice, decompress_to_vec_zlib, decompress_to_vec_zlib_with_limit,
DecompressError, TINFLStatus,
};
const ENCODED: [u8; 20] = [
120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19,
];
#[test]
fn decompress_vec() {
let res = decompress_to_vec_zlib(&ENCODED[..]).unwrap();
assert_eq!(res.as_slice(), &b"Hello, zlib!"[..]);
}
#[test]
fn decompress_vec_with_high_limit() {
let res = decompress_to_vec_zlib_with_limit(&ENCODED[..], 100_000).unwrap();
assert_eq!(res.as_slice(), &b"Hello, zlib!"[..]);
}
#[test]
fn fail_to_decompress_with_limit() {
let res = decompress_to_vec_zlib_with_limit(&ENCODED[..], 8);
match res {
Err(DecompressError {
status: TINFLStatus::HasMoreOutput,
..
}) => (), // expected result
_ => panic!("Decompression output size limit was not enforced"),
}
}
#[test]
fn test_decompress_slice_iter_to_slice() {
// one slice
let mut out = [0_u8; 12_usize];
let r =
decompress_slice_iter_to_slice(&mut out, Some(&ENCODED[..]).into_iter(), true, false);
assert_eq!(r, Ok(12));
assert_eq!(&out[..12], &b"Hello, zlib!"[..]);
// some chunks at a time
for chunk_size in 1..13 {
// Note: because of https://github.com/Frommi/miniz_oxide/issues/110 our
// out buffer needs to have +1 byte available when the chunk size cuts
// the adler32 data off from the last actual data.
let mut out = [0_u8; 12_usize + 1];
let r =
decompress_slice_iter_to_slice(&mut out, ENCODED.chunks(chunk_size), true, false);
assert_eq!(r, Ok(12));
assert_eq!(&out[..12], &b"Hello, zlib!"[..]);
}
// output buffer too small
let mut out = [0_u8; 3_usize];
let r = decompress_slice_iter_to_slice(&mut out, ENCODED.chunks(7), true, false);
assert!(r.is_err());
}
}

View file

@ -0,0 +1,60 @@
/// A wrapper for the output slice used when decompressing.
///
/// Using this rather than `Cursor` lets us implement the writing methods directly on
/// the buffer and lets us use a usize rather than u64 for the position which helps with
/// performance on 32-bit systems.
pub struct OutputBuffer<'a> {
slice: &'a mut [u8],
position: usize,
}
impl<'a> OutputBuffer<'a> {
#[inline]
pub fn from_slice_and_pos(slice: &'a mut [u8], position: usize) -> OutputBuffer<'a> {
OutputBuffer { slice, position }
}
#[inline]
pub const fn position(&self) -> usize {
self.position
}
#[inline]
pub fn set_position(&mut self, position: usize) {
self.position = position;
}
/// Write a byte to the current position and increment
///
/// Assumes that there is space.
#[inline]
pub fn write_byte(&mut self, byte: u8) {
self.slice[self.position] = byte;
self.position += 1;
}
/// Write a slice to the current position and increment
///
/// Assumes that there is space.
#[inline]
pub fn write_slice(&mut self, data: &[u8]) {
let len = data.len();
self.slice[self.position..self.position + len].copy_from_slice(data);
self.position += data.len();
}
#[inline]
pub const fn bytes_left(&self) -> usize {
self.slice.len() - self.position
}
#[inline]
pub const fn get_ref(&self) -> &[u8] {
self.slice
}
#[inline]
pub fn get_mut(&mut self) -> &mut [u8] {
self.slice
}
}

View file

@ -0,0 +1,423 @@
//! Extra streaming decompression functionality.
//!
//! As of now this is mainly intended for use to build a higher-level wrapper.
#[cfg(feature = "with-alloc")]
use crate::alloc::boxed::Box;
use core::{cmp, mem};
use crate::inflate::core::{decompress, inflate_flags, DecompressorOxide, TINFL_LZ_DICT_SIZE};
use crate::inflate::TINFLStatus;
use crate::{DataFormat, MZError, MZFlush, MZResult, MZStatus, StreamResult};
/// Tag that determines reset policy of [InflateState](struct.InflateState.html)
pub trait ResetPolicy {
/// Performs reset
fn reset(&self, state: &mut InflateState);
}
/// Resets state, without performing expensive ops (e.g. zeroing buffer)
///
/// Note that not zeroing buffer can lead to security issues when dealing with untrusted input.
pub struct MinReset;
impl ResetPolicy for MinReset {
fn reset(&self, state: &mut InflateState) {
state.decompressor().init();
state.dict_ofs = 0;
state.dict_avail = 0;
state.first_call = true;
state.has_flushed = false;
state.last_status = TINFLStatus::NeedsMoreInput;
}
}
/// Resets state and zero memory, continuing to use the same data format.
pub struct ZeroReset;
impl ResetPolicy for ZeroReset {
#[inline]
fn reset(&self, state: &mut InflateState) {
MinReset.reset(state);
state.dict = [0; TINFL_LZ_DICT_SIZE];
}
}
/// Full reset of the state, including zeroing memory.
///
/// Requires to provide new data format.
pub struct FullReset(pub DataFormat);
impl ResetPolicy for FullReset {
#[inline]
fn reset(&self, state: &mut InflateState) {
ZeroReset.reset(state);
state.data_format = self.0;
}
}
/// A struct that compbines a decompressor with extra data for streaming decompression.
///
pub struct InflateState {
/// Inner decompressor struct
decomp: DecompressorOxide,
/// Buffer of input bytes for matches.
/// TODO: Could probably do this a bit cleaner with some
/// Cursor-like class.
/// We may also look into whether we need to keep a buffer here, or just one in the
/// decompressor struct.
dict: [u8; TINFL_LZ_DICT_SIZE],
/// Where in the buffer are we currently at?
dict_ofs: usize,
/// How many bytes of data to be flushed is there currently in the buffer?
dict_avail: usize,
first_call: bool,
has_flushed: bool,
/// Whether the input data is wrapped in a zlib header and checksum.
/// TODO: This should be stored in the decompressor.
data_format: DataFormat,
last_status: TINFLStatus,
}
impl Default for InflateState {
fn default() -> Self {
InflateState {
decomp: DecompressorOxide::default(),
dict: [0; TINFL_LZ_DICT_SIZE],
dict_ofs: 0,
dict_avail: 0,
first_call: true,
has_flushed: false,
data_format: DataFormat::Raw,
last_status: TINFLStatus::NeedsMoreInput,
}
}
}
impl InflateState {
/// Create a new state.
///
/// Note that this struct is quite large due to internal buffers, and as such storing it on
/// the stack is not recommended.
///
/// # Parameters
/// `data_format`: Determines whether the compressed data is assumed to wrapped with zlib
/// metadata.
pub fn new(data_format: DataFormat) -> InflateState {
InflateState {
data_format,
..Default::default()
}
}
/// Create a new state on the heap.
///
/// # Parameters
/// `data_format`: Determines whether the compressed data is assumed to wrapped with zlib
/// metadata.
#[cfg(feature = "with-alloc")]
pub fn new_boxed(data_format: DataFormat) -> Box<InflateState> {
let mut b: Box<InflateState> = Box::default();
b.data_format = data_format;
b
}
/// Access the innner decompressor.
pub fn decompressor(&mut self) -> &mut DecompressorOxide {
&mut self.decomp
}
/// Return the status of the last call to `inflate` with this `InflateState`.
pub const fn last_status(&self) -> TINFLStatus {
self.last_status
}
/// Create a new state using miniz/zlib style window bits parameter.
///
/// The decompressor does not support different window sizes. As such,
/// any positive (>0) value will set the zlib header flag, while a negative one
/// will not.
#[cfg(feature = "with-alloc")]
pub fn new_boxed_with_window_bits(window_bits: i32) -> Box<InflateState> {
let mut b: Box<InflateState> = Box::default();
b.data_format = DataFormat::from_window_bits(window_bits);
b
}
#[inline]
/// Reset the decompressor without re-allocating memory, using the given
/// data format.
pub fn reset(&mut self, data_format: DataFormat) {
self.reset_as(FullReset(data_format));
}
#[inline]
/// Resets the state according to specified policy.
pub fn reset_as<T: ResetPolicy>(&mut self, policy: T) {
policy.reset(self)
}
}
/// Try to decompress from `input` to `output` with the given [`InflateState`]
///
/// # `flush`
///
/// Generally, the various [`MZFlush`] flags have meaning only on the compression side. They can be
/// supplied here, but the only one that has any semantic meaning is [`MZFlush::Finish`], which is a
/// signal that the stream is expected to finish, and failing to do so is an error. It isn't
/// necessary to specify it when the stream ends; you'll still get returned a
/// [`MZStatus::StreamEnd`] anyway. Other values either have no effect or cause errors. It's
/// likely that you'll almost always just want to use [`MZFlush::None`].
///
/// # Errors
///
/// Returns [`MZError::Buf`] if the size of the `output` slice is empty or no progress was made due
/// to lack of expected input data, or if called with [`MZFlush::Finish`] and input wasn't all
/// consumed.
///
/// Returns [`MZError::Data`] if this or a a previous call failed with an error return from
/// [`TINFLStatus`]; probably indicates corrupted data.
///
/// Returns [`MZError::Stream`] when called with [`MZFlush::Full`] (meaningless on
/// decompression), or when called without [`MZFlush::Finish`] after an earlier call with
/// [`MZFlush::Finish`] has been made.
pub fn inflate(
state: &mut InflateState,
input: &[u8],
output: &mut [u8],
flush: MZFlush,
) -> StreamResult {
let mut bytes_consumed = 0;
let mut bytes_written = 0;
let mut next_in = input;
let mut next_out = output;
if flush == MZFlush::Full {
return StreamResult::error(MZError::Stream);
}
let mut decomp_flags = if state.data_format == DataFormat::Zlib {
inflate_flags::TINFL_FLAG_COMPUTE_ADLER32
} else {
inflate_flags::TINFL_FLAG_IGNORE_ADLER32
};
if (state.data_format == DataFormat::Zlib)
| (state.data_format == DataFormat::ZLibIgnoreChecksum)
{
decomp_flags |= inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER;
}
let first_call = state.first_call;
state.first_call = false;
if state.last_status == TINFLStatus::FailedCannotMakeProgress {
return StreamResult::error(MZError::Buf);
}
if (state.last_status as i32) < 0 {
return StreamResult::error(MZError::Data);
}
if state.has_flushed && (flush != MZFlush::Finish) {
return StreamResult::error(MZError::Stream);
}
state.has_flushed |= flush == MZFlush::Finish;
if (flush == MZFlush::Finish) && first_call {
decomp_flags |= inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
let status = decompress(&mut state.decomp, next_in, next_out, 0, decomp_flags);
let in_bytes = status.1;
let out_bytes = status.2;
let status = status.0;
state.last_status = status;
bytes_consumed += in_bytes;
bytes_written += out_bytes;
let ret_status = {
if status == TINFLStatus::FailedCannotMakeProgress {
Err(MZError::Buf)
} else if (status as i32) < 0 {
Err(MZError::Data)
} else if status != TINFLStatus::Done {
state.last_status = TINFLStatus::Failed;
Err(MZError::Buf)
} else {
Ok(MZStatus::StreamEnd)
}
};
return StreamResult {
bytes_consumed,
bytes_written,
status: ret_status,
};
}
if flush != MZFlush::Finish {
decomp_flags |= inflate_flags::TINFL_FLAG_HAS_MORE_INPUT;
}
if state.dict_avail != 0 {
bytes_written += push_dict_out(state, &mut next_out);
return StreamResult {
bytes_consumed,
bytes_written,
status: Ok(
if (state.last_status == TINFLStatus::Done) && (state.dict_avail == 0) {
MZStatus::StreamEnd
} else {
MZStatus::Ok
},
),
};
}
let status = inflate_loop(
state,
&mut next_in,
&mut next_out,
&mut bytes_consumed,
&mut bytes_written,
decomp_flags,
flush,
);
StreamResult {
bytes_consumed,
bytes_written,
status,
}
}
fn inflate_loop(
state: &mut InflateState,
next_in: &mut &[u8],
next_out: &mut &mut [u8],
total_in: &mut usize,
total_out: &mut usize,
decomp_flags: u32,
flush: MZFlush,
) -> MZResult {
let orig_in_len = next_in.len();
loop {
let status = decompress(
&mut state.decomp,
next_in,
&mut state.dict,
state.dict_ofs,
decomp_flags,
);
let in_bytes = status.1;
let out_bytes = status.2;
let status = status.0;
state.last_status = status;
*next_in = &next_in[in_bytes..];
*total_in += in_bytes;
state.dict_avail = out_bytes;
*total_out += push_dict_out(state, next_out);
// The stream was corrupted, and decompression failed.
if (status as i32) < 0 {
return Err(MZError::Data);
}
// The decompressor has flushed all it's data and is waiting for more input, but
// there was no more input provided.
if (status == TINFLStatus::NeedsMoreInput) && orig_in_len == 0 {
return Err(MZError::Buf);
}
if flush == MZFlush::Finish {
if status == TINFLStatus::Done {
// There is not enough space in the output buffer to flush the remaining
// decompressed data in the internal buffer.
return if state.dict_avail != 0 {
Err(MZError::Buf)
} else {
Ok(MZStatus::StreamEnd)
};
// No more space in the output buffer, but we're not done.
} else if next_out.is_empty() {
return Err(MZError::Buf);
}
} else {
// We're not expected to finish, so it's fine if we can't flush everything yet.
let empty_buf = next_in.is_empty() || next_out.is_empty();
if (status == TINFLStatus::Done) || empty_buf || (state.dict_avail != 0) {
return if (status == TINFLStatus::Done) && (state.dict_avail == 0) {
// No more data left, we're done.
Ok(MZStatus::StreamEnd)
} else {
// Ok for now, still waiting for more input data or output space.
Ok(MZStatus::Ok)
};
}
}
}
}
fn push_dict_out(state: &mut InflateState, next_out: &mut &mut [u8]) -> usize {
let n = cmp::min(state.dict_avail, next_out.len());
(next_out[..n]).copy_from_slice(&state.dict[state.dict_ofs..state.dict_ofs + n]);
*next_out = &mut mem::take(next_out)[n..];
state.dict_avail -= n;
state.dict_ofs = (state.dict_ofs + (n)) & (TINFL_LZ_DICT_SIZE - 1);
n
}
#[cfg(all(test, feature = "with-alloc"))]
mod test {
use super::{inflate, InflateState};
use crate::{DataFormat, MZFlush, MZStatus};
use alloc::vec;
#[test]
fn test_state() {
let encoded = [
120u8, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4,
19,
];
let mut out = vec![0; 50];
let mut state = InflateState::new_boxed(DataFormat::Zlib);
let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish);
let status = res.status.expect("Failed to decompress!");
assert_eq!(status, MZStatus::StreamEnd);
assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]);
assert_eq!(res.bytes_consumed, encoded.len());
state.reset_as(super::ZeroReset);
out.iter_mut().map(|x| *x = 0).count();
let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish);
let status = res.status.expect("Failed to decompress!");
assert_eq!(status, MZStatus::StreamEnd);
assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]);
assert_eq!(res.bytes_consumed, encoded.len());
state.reset_as(super::MinReset);
out.iter_mut().map(|x| *x = 0).count();
let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish);
let status = res.status.expect("Failed to decompress!");
assert_eq!(status, MZStatus::StreamEnd);
assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]);
assert_eq!(res.bytes_consumed, encoded.len());
assert_eq!(state.decompressor().adler32(), Some(459605011));
// Test state when not computing adler.
state = InflateState::new_boxed(DataFormat::ZLibIgnoreChecksum);
out.iter_mut().map(|x| *x = 0).count();
let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish);
let status = res.status.expect("Failed to decompress!");
assert_eq!(status, MZStatus::StreamEnd);
assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]);
assert_eq!(res.bytes_consumed, encoded.len());
// Not computed, so should be Some(1)
assert_eq!(state.decompressor().adler32(), Some(1));
// Should still have the checksum read from the header file.
assert_eq!(state.decompressor().adler32_header(), Some(459605011))
}
}

View file

@ -0,0 +1,211 @@
//! A pure rust replacement for the [miniz](https://github.com/richgel999/miniz)
//! DEFLATE/zlib encoder/decoder.
//! Used a rust back-end for the
//! [flate2](https://github.com/alexcrichton/flate2-rs) crate.
//!
#![cfg_attr(
feature = "with-alloc",
doc = r##"
# Usage
## Simple compression/decompression:
``` rust
use miniz_oxide::inflate::decompress_to_vec;
use miniz_oxide::deflate::compress_to_vec;
fn roundtrip(data: &[u8]) {
let compressed = compress_to_vec(data, 6);
let decompressed = decompress_to_vec(compressed.as_slice()).expect("Failed to decompress!");
# let _ = decompressed;
}
# roundtrip(b"Test_data test data lalalal blabla");
"##
)]
#![forbid(unsafe_code)]
#![cfg_attr(not(feature = "std"), no_std)]
#[cfg(feature = "with-alloc")]
extern crate alloc;
#[cfg(feature = "with-alloc")]
pub mod deflate;
pub mod inflate;
mod shared;
pub use crate::shared::update_adler32 as mz_adler32_oxide;
pub use crate::shared::{MZ_ADLER32_INIT, MZ_DEFAULT_WINDOW_BITS};
/// A list of flush types.
///
/// See <http://www.bolet.org/~pornin/deflate-flush.html> for more in-depth info.
#[repr(i32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum MZFlush {
/// Don't force any flushing.
/// Used when more input data is expected.
None = 0,
/// Zlib partial flush.
/// Currently treated as [`Sync`].
Partial = 1,
/// Finish compressing the currently buffered data, and output an empty raw block.
/// Has no use in decompression.
Sync = 2,
/// Same as [`Sync`], but resets the compression dictionary so that further compressed
/// data does not depend on data compressed before the flush.
///
/// Has no use in decompression, and is an error to supply in that case.
Full = 3,
/// Attempt to flush the remaining data and end the stream.
Finish = 4,
/// Not implemented.
Block = 5,
}
impl MZFlush {
/// Create an MZFlush value from an integer value.
///
/// Returns `MZError::Param` on invalid values.
pub fn new(flush: i32) -> Result<Self, MZError> {
match flush {
0 => Ok(MZFlush::None),
1 | 2 => Ok(MZFlush::Sync),
3 => Ok(MZFlush::Full),
4 => Ok(MZFlush::Finish),
_ => Err(MZError::Param),
}
}
}
/// A list of miniz successful status codes.
///
/// These are emitted as the [`Ok`] side of a [`MZResult`] in the [`StreamResult`] returned from
/// [`deflate::stream::deflate()`] or [`inflate::stream::inflate()`].
#[repr(i32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum MZStatus {
/// Operation succeeded.
///
/// Some data was decompressed or compressed; see the byte counters in the [`StreamResult`] for
/// details.
Ok = 0,
/// Operation succeeded and end of deflate stream was found.
///
/// X-ref [`TINFLStatus::Done`][inflate::TINFLStatus::Done] or
/// [`TDEFLStatus::Done`][deflate::core::TDEFLStatus::Done] for `inflate` or `deflate`
/// respectively.
StreamEnd = 1,
/// Unused
NeedDict = 2,
}
/// A list of miniz failed status codes.
///
/// These are emitted as the [`Err`] side of a [`MZResult`] in the [`StreamResult`] returned from
/// [`deflate::stream::deflate()`] or [`inflate::stream::inflate()`].
#[repr(i32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum MZError {
/// Unused
ErrNo = -1,
/// General stream error.
///
/// See [`inflate::stream::inflate()`] docs for details of how it can occur there.
///
/// See [`deflate::stream::deflate()`] docs for how it can in principle occur there, though it's
/// believed impossible in practice.
Stream = -2,
/// Error in inflation; see [`inflate::stream::inflate()`] for details.
///
/// Not returned from [`deflate::stream::deflate()`].
Data = -3,
/// Unused
Mem = -4,
/// Buffer-related error.
///
/// See the docs of [`deflate::stream::deflate()`] or [`inflate::stream::inflate()`] for details
/// of when it would trigger in the one you're using.
Buf = -5,
/// Unused
Version = -6,
/// Bad parameters.
///
/// This can be returned from [`deflate::stream::deflate()`] in the case of bad parameters. See
/// [`TDEFLStatus::BadParam`][deflate::core::TDEFLStatus::BadParam].
Param = -10_000,
}
/// How compressed data is wrapped.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum DataFormat {
/// Wrapped using the [zlib](http://www.zlib.org/rfc-zlib.html) format.
Zlib,
/// Zlib wrapped but ignore and don't compute the adler32 checksum.
/// Currently only used for inflate, behaves the same as Zlib for compression.
ZLibIgnoreChecksum,
/// Raw DEFLATE.
Raw,
}
impl DataFormat {
pub fn from_window_bits(window_bits: i32) -> DataFormat {
if window_bits > 0 {
DataFormat::Zlib
} else {
DataFormat::Raw
}
}
pub fn to_window_bits(self) -> i32 {
match self {
DataFormat::Zlib | DataFormat::ZLibIgnoreChecksum => shared::MZ_DEFAULT_WINDOW_BITS,
DataFormat::Raw => -shared::MZ_DEFAULT_WINDOW_BITS,
}
}
}
/// `Result` alias for all miniz status codes both successful and failed.
pub type MZResult = Result<MZStatus, MZError>;
/// A structure containing the result of a call to the inflate or deflate streaming functions.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct StreamResult {
/// The number of bytes consumed from the input slice.
pub bytes_consumed: usize,
/// The number of bytes written to the output slice.
pub bytes_written: usize,
/// The return status of the call.
pub status: MZResult,
}
impl StreamResult {
#[inline]
pub const fn error(error: MZError) -> StreamResult {
StreamResult {
bytes_consumed: 0,
bytes_written: 0,
status: Err(error),
}
}
}
impl core::convert::From<StreamResult> for MZResult {
fn from(res: StreamResult) -> Self {
res.status
}
}
impl core::convert::From<&StreamResult> for MZResult {
fn from(res: &StreamResult) -> Self {
res.status
}
}

View file

@ -0,0 +1,25 @@
#[doc(hidden)]
pub const MZ_ADLER32_INIT: u32 = 1;
#[doc(hidden)]
pub const MZ_DEFAULT_WINDOW_BITS: i32 = 15;
pub const HUFFMAN_LENGTH_ORDER: [u8; 19] = [
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
];
#[doc(hidden)]
#[cfg(not(feature = "simd"))]
pub fn update_adler32(adler: u32, data: &[u8]) -> u32 {
let mut hash = adler::Adler32::from_checksum(adler);
hash.write_slice(data);
hash.checksum()
}
#[doc(hidden)]
#[cfg(feature = "simd")]
pub fn update_adler32(adler: u32, data: &[u8]) -> u32 {
let mut hash = simd_adler32::Adler32::from_checksum(adler);
hash.write(data);
hash.finish()
}