Skip to content

Commit 3c4311c

Browse files
authored
Prevent buffer builder length overflow in MutableBuffer::extend_zeros (#9820)
# Which issue does this PR close? - None. # Rationale for this change BufferBuilder reserve paths relied on unchecked usize arithmetic when calculating the required byte length. In optimized builds, very large requested lengths could wrap before capacity growth. # What changes are included in this PR? This adds checked arithmetic for MutableBuffer byte length calculations used by reserve and zero-extension paths. # Are these changes tested? Yes. This adds regression coverage for overflowing BufferBuilder length calculations through reserve, append_n_zeroed, and advance. # Are there any user-facing changes? Invalid requested lengths whose byte size cannot be represented without overflow now panic consistently. There are no API changes.
1 parent 11f13a5 commit 3c4311c

2 files changed

Lines changed: 125 additions & 6 deletions

File tree

arrow-buffer/src/buffer/mutable.rs

Lines changed: 101 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ impl MutableBuffer {
112112
/// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
113113
///
114114
/// See [`MutableBuffer::with_capacity`].
115+
///
116+
/// # Panics
117+
///
118+
/// See [`MutableBuffer::with_capacity`].
115119
#[inline]
116120
pub fn new(capacity: usize) -> Self {
117121
Self::with_capacity(capacity)
@@ -156,6 +160,10 @@ impl MutableBuffer {
156160
/// let data = buffer.as_slice_mut();
157161
/// assert_eq!(data[126], 0u8);
158162
/// ```
163+
///
164+
/// # Panics
165+
///
166+
/// Panics if `len` is too large to construct a valid allocation [`Layout`]
159167
pub fn from_len_zeroed(len: usize) -> Self {
160168
let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
161169
let data = match layout.size() {
@@ -199,6 +207,10 @@ impl MutableBuffer {
199207

200208
/// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits.
201209
/// This is useful to create a buffer for packed bitmaps.
210+
///
211+
/// # Panics
212+
///
213+
/// See [`MutableBuffer::from_len_zeroed`].
202214
pub fn new_null(len: usize) -> Self {
203215
let num_bytes = bit_util::ceil(len, 8);
204216
MutableBuffer::from_len_zeroed(num_bytes)
@@ -210,6 +222,10 @@ impl MutableBuffer {
210222
/// This is useful when one wants to clear (or set) the bits and then manipulate
211223
/// the buffer directly (e.g., modifying the buffer by holding a mutable reference
212224
/// from `data_mut()`).
225+
///
226+
/// # Panics
227+
///
228+
/// Panics if `end` exceeds the buffer capacity.
213229
pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
214230
assert!(end <= self.layout.size());
215231
let v = if val { 255 } else { 0 };
@@ -225,6 +241,10 @@ impl MutableBuffer {
225241
/// This is used to initialize the bits in a buffer, however, it has no impact on the
226242
/// `len` of the buffer and so can be used to initialize the memory region from
227243
/// `len` to `capacity`.
244+
///
245+
/// # Panics
246+
///
247+
/// Panics if the byte range `start..start + count` exceeds the buffer capacity.
228248
pub fn set_null_bits(&mut self, start: usize, count: usize) {
229249
assert!(
230250
start.saturating_add(count) <= self.layout.size(),
@@ -250,11 +270,19 @@ impl MutableBuffer {
250270
/// let buffer: Buffer = buffer.into();
251271
/// assert_eq!(buffer.len(), 253);
252272
/// ```
273+
///
274+
/// # Panics
275+
///
276+
/// Panics if `self.len + additional` overflows `usize`, or if the required capacity is too
277+
/// large to round up to the next 64-byte boundary and construct a valid allocation layout.
253278
// For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
254279
// exits.
255280
#[inline(always)]
256281
pub fn reserve(&mut self, additional: usize) {
257-
let required_cap = self.len + additional;
282+
let required_cap = self
283+
.len
284+
.checked_add(additional)
285+
.expect("buffer length overflow");
258286
if required_cap > self.layout.size() {
259287
let new_capacity = bit_util::round_upto_multiple_of_64(required_cap);
260288
let new_capacity = std::cmp::max(new_capacity, self.layout.size() * 2);
@@ -274,6 +302,12 @@ impl MutableBuffer {
274302
/// buffer.repeat_slice_n_times(bytes_to_repeat, 3);
275303
/// assert_eq!(buffer.as_slice(), b"ababab");
276304
/// ```
305+
///
306+
/// # Panics
307+
///
308+
/// Panics if the repeated slice byte length overflows `usize`, if the resulting buffer
309+
/// length overflows `usize`, or if reserving the required capacity fails for the same
310+
/// reasons as [`MutableBuffer::reserve`].
277311
pub fn repeat_slice_n_times<T: ArrowNativeType>(
278312
&mut self,
279313
slice_to_repeat: &[T],
@@ -391,6 +425,11 @@ impl MutableBuffer {
391425
/// buffer.resize(253, 2); // allocates for the first time
392426
/// assert_eq!(buffer.as_slice()[252], 2u8);
393427
/// ```
428+
///
429+
/// # Panics
430+
///
431+
/// Panics if growing the buffer requires reserving a capacity that fails for the same
432+
/// reasons as [`MutableBuffer::reserve`].
394433
// For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
395434
// exits.
396435
#[inline(always)]
@@ -426,6 +465,11 @@ impl MutableBuffer {
426465
/// buffer.shrink_to_fit();
427466
/// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
428467
/// ```
468+
///
469+
/// # Panics
470+
///
471+
/// Panics if the current length is too large to round up to the next 64-byte boundary and
472+
/// construct a valid allocation layout.
429473
pub fn shrink_to_fit(&mut self) {
430474
let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
431475
if new_capacity < self.layout.size() {
@@ -505,8 +549,8 @@ impl MutableBuffer {
505549
///
506550
/// # Panics
507551
///
508-
/// This function panics if the underlying buffer is not aligned
509-
/// correctly for type `T`.
552+
/// This function panics if the underlying buffer is not aligned correctly for type `T`, or
553+
/// if its length is not a multiple of `size_of::<T>()`.
510554
pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
511555
// SAFETY
512556
// ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
@@ -520,8 +564,8 @@ impl MutableBuffer {
520564
///
521565
/// # Panics
522566
///
523-
/// This function panics if the underlying buffer is not aligned
524-
/// correctly for type `T`.
567+
/// This function panics if the underlying buffer is not aligned correctly for type `T`, or
568+
/// if its length is not a multiple of `size_of::<T>()`.
525569
pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
526570
// SAFETY
527571
// ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
@@ -539,6 +583,11 @@ impl MutableBuffer {
539583
/// buffer.extend_from_slice(&[2u32, 0]);
540584
/// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
541585
/// ```
586+
///
587+
/// # Panics
588+
///
589+
/// Panics if extending the buffer requires reserving a capacity that fails for the same
590+
/// reasons as [`MutableBuffer::reserve`].
542591
#[inline]
543592
pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
544593
let additional = mem::size_of_val(items);
@@ -562,6 +611,11 @@ impl MutableBuffer {
562611
/// buffer.push(256u32);
563612
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
564613
/// ```
614+
///
615+
/// # Panics
616+
///
617+
/// Panics if extending the buffer requires reserving a capacity that fails for the same
618+
/// reasons as [`MutableBuffer::reserve`].
565619
#[inline]
566620
pub fn push<T: ToByteSlice>(&mut self, item: T) {
567621
let additional = std::mem::size_of::<T>();
@@ -587,13 +641,26 @@ impl MutableBuffer {
587641
}
588642

589643
/// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed.
644+
///
645+
/// # Panics
646+
///
647+
/// Panics if `self.len + additional` overflows `usize`, or if growing the buffer requires
648+
/// reserving a capacity that fails for the same reasons as [`MutableBuffer::reserve`].
590649
#[inline]
591650
pub fn extend_zeros(&mut self, additional: usize) {
592-
self.resize(self.len + additional, 0);
651+
let new_len = self
652+
.len
653+
.checked_add(additional)
654+
.expect("buffer length overflow");
655+
self.resize(new_len, 0);
593656
}
594657

595658
/// # Safety
596659
/// The caller must ensure that the buffer was properly initialized up to `len`.
660+
///
661+
/// # Panics
662+
///
663+
/// Panics if `len` exceeds the buffer capacity.
597664
#[inline]
598665
pub unsafe fn set_len(&mut self, len: usize) {
599666
assert!(len <= self.capacity());
@@ -641,6 +708,13 @@ impl MutableBuffer {
641708
/// `offset` indicates the starting offset in bits in this buffer to begin writing to
642709
/// and must be less than or equal to the current length of this buffer.
643710
/// All bits not written to (but readable due to byte alignment) will be zeroed out.
711+
///
712+
/// # Panics
713+
///
714+
/// Panics if `iter` does not report an exact size via `size_hint`, or if it yields fewer
715+
/// items than reported, or if extending the buffer requires reserving a capacity that fails
716+
/// for the same reasons as [`MutableBuffer::reserve`].
717+
///
644718
/// # Safety
645719
/// Callers must ensure that `iter` reports an exact size via `size_hint`.
646720
#[inline]
@@ -866,6 +940,13 @@ impl MutableBuffer {
866940
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
867941
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
868942
/// ```
943+
///
944+
/// # Panics
945+
///
946+
/// Panics if the iterator does not report an upper bound via `size_hint`, or if the
947+
/// reported length does not match the number of items produced, or if allocating the
948+
/// required buffer fails for the same reasons as [`MutableBuffer::new`].
949+
///
869950
/// # Safety
870951
/// This method assumes that the iterator's size is correct and is undefined behavior
871952
/// to use it on an iterator that reports an incorrect length.
@@ -910,6 +991,12 @@ impl MutableBuffer {
910991
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
911992
/// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
912993
/// ```
994+
///
995+
/// # Panics
996+
///
997+
/// Panics if the iterator does not report an upper bound via `size_hint`, or if it yields
998+
/// fewer items than reported.
999+
///
9131000
/// # Safety
9141001
/// This method assumes that the iterator's size is correct and is undefined behavior
9151002
/// to use it on an iterator that reports an incorrect length.
@@ -928,6 +1015,14 @@ impl MutableBuffer {
9281015
/// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
9291016
/// if any of the items of the iterator is an error.
9301017
/// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
1018+
///
1019+
/// # Panics
1020+
///
1021+
/// Panics if the iterator does not report an upper bound via `size_hint`, or if the
1022+
/// reported length does not match the number of items produced before an error-free finish,
1023+
/// or if allocating the required buffer fails for the same reasons as
1024+
/// [`MutableBuffer::new`].
1025+
///
9311026
/// # Safety
9321027
/// This method assumes that the iterator's size is correct and is undefined behavior
9331028
/// to use it on an iterator that reports an incorrect length.

arrow-buffer/src/builder/mod.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,4 +439,28 @@ mod tests {
439439
let slice = builder.as_slice_mut();
440440
assert_eq!(slice.len(), 222);
441441
}
442+
443+
#[test]
444+
#[should_panic(expected = "buffer length overflow")]
445+
fn reserve_length_overflow() {
446+
let mut builder = BufferBuilder::<u8>::new(1);
447+
builder.append(0);
448+
builder.reserve(usize::MAX);
449+
}
450+
451+
#[test]
452+
#[should_panic(expected = "buffer length overflow")]
453+
fn append_n_zeroed_length_overflow() {
454+
let mut builder = BufferBuilder::<u64>::new(1);
455+
builder.append_n_zeroed(1);
456+
builder.append_n_zeroed(usize::MAX / mem::size_of::<u64>());
457+
}
458+
459+
#[test]
460+
#[should_panic(expected = "buffer length overflow")]
461+
fn advance_length_overflow() {
462+
let mut builder = BufferBuilder::<u64>::new(1);
463+
builder.advance(1);
464+
builder.advance(usize::MAX / mem::size_of::<u64>());
465+
}
442466
}

0 commit comments

Comments
 (0)