Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions libbz2-rs-sys/src/decompress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1009,9 +1009,40 @@
s.selector[i] = pos[0];
}

t = 0;
current_block = Block35;
break;
// try to read the coding tables in one go if there is sufficient input
let bits_needed =
usize::from(nGroups) * (5 + (usize::from(alphaSize) * 2 * 20));
let bytes_needed = bits_needed.div_ceil(8);

if strm.avail_in as usize >= bytes_needed {
for t in 0..usize::from(nGroups) {
let mut curr = GET_BITS!(strm, s, 5);
for i in 0..usize::from(alphaSize) {
loop {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should be possible to bound this loop (I think it's 20 bits of code at most, so 40bits total?). But it's hard to be confident

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fuzzed this for many hours using this size of 20. So it holds at least for files encoded using the current version (which uses 17 bits at most, while earlier bzip2 versions used up to 20 bits).

if !(1..=20).contains(&curr) {
error!(BZ_DATA_ERROR);

Check warning on line 1023 in libbz2-rs-sys/src/decompress.rs

View check run for this annotation

Codecov / codecov/patch

libbz2-rs-sys/src/decompress.rs#L1023

Added line #L1023 was not covered by tests
}
if !GET_BIT!(strm, s) {
break;
};
match GET_BIT!(strm, s) {
false => curr += 1,
true => curr -= 1,
}
}

s.len[t][i] = curr as u8;
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What code is this block based on?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this bit of C code

libbzip2-rs/decompress.c

Lines 322 to 335 in d5d181b

/*--- Now the coding tables ---*/
for (t = 0; t < nGroups; t++) {
GET_BITS(BZ_X_CODING_1, curr, 5);
for (i = 0; i < alphaSize; i++) {
while (True) {
if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
GET_BIT(BZ_X_CODING_2, uc);
if (uc == 0) break;
GET_BIT(BZ_X_CODING_3, uc);
if (uc == 0) curr++; else curr--;
}
s->len[t][i] = curr;
}
}

it's all scattered around in our implementation here, but if we assume sufficient input, we can basically copy the C code.


t = nGroups;
current_block = Block35;
break;
} else {
t = 0;
current_block = Block35;
break;
}
}
}
Block18 => {
Expand Down
46 changes: 38 additions & 8 deletions test-libbz2-rs-sys/src/chunked.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use crate::{compress_c, decompress_c, SAMPLE1_BZ2, SAMPLE1_REF};

fn decompress_rs_chunked_input<'a>(dest: &'a mut [u8], source: &[u8]) -> Result<&'a mut [u8], i32> {
fn decompress_rs_chunked_input<'a>(
dest: &'a mut [u8],
source: &[u8],
chunk_size: usize,
) -> Result<&'a mut [u8], i32> {
use libbz2_rs_sys::*;

let mut dest_len = dest.len() as _;
Expand All @@ -16,7 +20,7 @@ fn decompress_rs_chunked_input<'a>(dest: &'a mut [u8], source: &[u8]) -> Result<
strm.next_out = dest.as_mut_ptr().cast::<core::ffi::c_char>();
strm.avail_out = dest_len;

for chunk in source.chunks(1) {
for chunk in source.chunks(chunk_size) {
strm.next_in = chunk.as_ptr() as *mut core::ffi::c_char;
strm.avail_in = chunk.len() as _;

Expand Down Expand Up @@ -48,7 +52,7 @@ fn decompress_rs_chunked_input<'a>(dest: &'a mut [u8], source: &[u8]) -> Result<
#[test]
fn decompress_chunked_input() {
let mut dest_chunked = vec![0; 1 << 18];
let chunked = decompress_rs_chunked_input(&mut dest_chunked, SAMPLE1_BZ2).unwrap();
let chunked = decompress_rs_chunked_input(&mut dest_chunked, SAMPLE1_BZ2, 1).unwrap();

if !cfg!(miri) {
let mut dest = vec![0; 1 << 18];
Expand All @@ -69,7 +73,11 @@ fn decompress_chunked_input() {
}
}

fn compress_rs_chunked_input<'a>(dest: &'a mut [u8], source: &[u8]) -> Result<&'a mut [u8], i32> {
fn compress_rs_chunked_input<'a>(
dest: &'a mut [u8],
source: &[u8],
chunk_size: usize,
) -> Result<&'a mut [u8], i32> {
use libbz2_rs_sys::*;

let mut dest_len = dest.len() as _;
Expand All @@ -89,7 +97,7 @@ fn compress_rs_chunked_input<'a>(dest: &'a mut [u8], source: &[u8]) -> Result<&'
strm.next_out = dest.as_mut_ptr().cast::<core::ffi::c_char>();
strm.avail_out = dest_len;

for chunk in source.chunks(256) {
for chunk in source.chunks(chunk_size) {
strm.next_in = chunk.as_ptr() as *mut core::ffi::c_char;
strm.avail_in = chunk.len() as _;

Expand Down Expand Up @@ -130,7 +138,7 @@ fn compress_rs_chunked_input<'a>(dest: &'a mut [u8], source: &[u8]) -> Result<&'
#[test]
fn compress_chunked_input() {
let mut dest_chunked = vec![0; 1 << 18];
let chunked = compress_rs_chunked_input(&mut dest_chunked, SAMPLE1_REF).unwrap();
let chunked = compress_rs_chunked_input(&mut dest_chunked, SAMPLE1_REF, 256).unwrap();

if !cfg!(miri) {
let mut dest = vec![0; 1 << 18];
Expand Down Expand Up @@ -218,7 +226,7 @@ fn decompress_chunked_output() {
dest.truncate(dest_len as usize);

let mut dest_chunked = vec![0; 1 << 18];
let chunked = decompress_rs_chunked_input(&mut dest_chunked, SAMPLE1_BZ2).unwrap();
let chunked = decompress_rs_chunked_input(&mut dest_chunked, SAMPLE1_BZ2, 1).unwrap();

assert_eq!(chunked.len(), dest.len());
assert_eq!(chunked, dest);
Expand Down Expand Up @@ -283,7 +291,7 @@ fn compress_rs_chunked_output<'a>(dest: &'a mut [u8], source: &[u8]) -> Result<&
#[test]
fn compress_chunked_output() {
let mut dest_chunked = vec![0; 1 << 18];
let chunked = compress_rs_chunked_input(&mut dest_chunked, SAMPLE1_REF).unwrap();
let chunked = compress_rs_chunked_input(&mut dest_chunked, SAMPLE1_REF, 256).unwrap();

if !cfg!(miri) {
let mut dest = vec![0; 1 << 18];
Expand All @@ -304,3 +312,25 @@ fn compress_chunked_output() {
assert_eq!(chunked, dest);
}
}

#[test]
fn fuzzer_short() {
const INPUT: &[u8] = &[0, 0, 67, 0, 67, 0, 0, 5, 0, 0];

let mut dest = [0u8; 256];
let mut len = dest.len() as u32;
unsafe {
compress_c(
dest.as_mut_ptr(),
&mut len,
INPUT.as_ptr(),
INPUT.len() as u32,
1,
);
}

let input = &dest[..len as usize];

let mut dest_chunked = vec![0; 1 << 18];
let _ = decompress_rs_chunked_input(&mut dest_chunked, input, 6).unwrap();
}
Loading