remove unsafe

a10y · a10y · commit 034256c209f5 · 2025-03-17T12:26:49.000-04:00
diff --git a/benches/micro.rs b/benches/micro.rs
@@ -21,7 +21,7 @@ fn bench_compress(c: &mut Criterion) {
         let compressor = compressor.build();
 
         let word = u64::from_le_bytes([b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h']);
-        b.iter(|| unsafe { compressor.compress_word(word, output_buf.as_mut_ptr()) });
+        b.iter(|| unsafe { compressor.compress_word(word, output_buf.spare_capacity_mut()) });
     });
 
     // We create a symbol table that is able to short-circuit the decompression
@@ -31,7 +31,7 @@ fn bench_compress(c: &mut Criterion) {
         let compressor = compressor.build();
 
         let word = u64::from_le_bytes([b'a', b'b', 0, 0, 0, 0, 0, 0]);
-        b.iter(|| unsafe { compressor.compress_word(word, output_buf.as_mut_ptr()) });
+        b.iter(|| unsafe { compressor.compress_word(word, output_buf.spare_capacity_mut()) });
     });
     group.finish();
 
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,4 +1,4 @@
-#![allow(unsafe_op_in_unsafe_fn)]
+// #![allow(unsafe_op_in_unsafe_fn)]
 #![doc = include_str!("../README.md")]
 #![cfg(target_endian = "little")]
 
@@ -558,25 +558,30 @@ impl Compressor {
     ///
     /// `advance_in` is the number of bytes to advance the input pointer before the next call.
     ///
-    /// `advance_out` is the number of bytes to advance `out_ptr` before the next call.
+    /// `advance_out` is the number of bytes to advance `out_ptr` before the next call. Will
+    /// be either 1 (if a code is emitted) or 2 (if a symbol is emitted).
     ///
     /// # Safety
     ///
     /// `out_ptr` must never be NULL or otherwise point to invalid memory.
-    pub unsafe fn compress_word(&self, word: u64, out_ptr: *mut u8) -> (usize, usize) {
+    pub unsafe fn compress_word(
+        &self,
+        word: u64,
+        out_ptr: &mut [MaybeUninit<u8>],
+    ) -> (usize, usize) {
         // Speculatively write the first byte of `word` at offset 1. This is necessary if it is an escape, and
         // if it isn't, it will be overwritten anyway.
         //
         // SAFETY: caller ensures out_ptr is not null
         let first_byte = word as u8;
-        out_ptr.byte_add(1).write_unaligned(first_byte);
+        out_ptr[1].write(first_byte);
 
         // First, check the two_bytes table
         let code_twobyte = self.codes_two_byte[word as u16 as usize];
 
         if code_twobyte.code() < self.has_suffix_code {
             // 2 byte code without having to worry about longer matches.
-            std::ptr::write(out_ptr, code_twobyte.code());
+            out_ptr[0].write(code_twobyte.code());
 
             // Advance input by symbol length (2) and output by a single code byte
             (2, 1)
@@ -590,10 +595,10 @@ impl Compressor {
                 && compare_masked(word, entry.symbol.as_u64(), ignored_bits)
             {
                 // Advance the input by the symbol length (variable) and the output by one code byte
-                std::ptr::write(out_ptr, entry.code.code());
+                out_ptr[0].write(entry.code.code());
                 (entry.code.len() as usize, 1)
             } else {
-                std::ptr::write(out_ptr, code_twobyte.code());
+                out_ptr[0].write(code_twobyte.code());
 
                 // Advance the input by the symbol length (variable) and the output by either 1
                 // byte (if was one-byte code) or two bytes (escape).
@@ -655,47 +660,47 @@ impl Compressor {
     /// all encoded data.
     pub unsafe fn compress_into(&self, plaintext: &[u8], values: &mut Vec<u8>) {
         let mut in_ptr = plaintext.as_ptr();
-        let mut out_ptr = values.as_mut_ptr();
+        // let mut out_ptr = values.as_mut_ptr();
+        let out_values = values.spare_capacity_mut();
+        let mut out_ptr = 0;
 
         // SAFETY: `end` will point just after the end of the `plaintext` slice.
         let in_end = unsafe { in_ptr.byte_add(plaintext.len()) };
         let in_end_sub8 = in_end as usize - 8;
-        // SAFETY: `end` will point just after the end of the `values` allocation.
-        let out_end = unsafe { out_ptr.byte_add(values.capacity()) };
 
-        while (in_ptr as usize) <= in_end_sub8 && out_ptr < out_end {
+        while (in_ptr as usize) <= in_end_sub8 {
             // SAFETY: pointer ranges are checked in the loop condition
             unsafe {
                 // Load a full 8-byte word of data from in_ptr.
-                // SAFETY: caller asserts in_ptr is not null. we may read past end of pointer though.
+                // SAFETY: we check above that in_ptr points to at least 8 bytes of valid allocation
                 let word: u64 = std::ptr::read_unaligned(in_ptr as *const u64);
-                let (advance_in, advance_out) = self.compress_word(word, out_ptr);
+                let (advance_in, advance_out) =
+                    self.compress_word(word, &mut out_values[out_ptr..]);
                 in_ptr = in_ptr.byte_add(advance_in);
-                out_ptr = out_ptr.byte_add(advance_out);
+                out_ptr += advance_out;
             };
         }
 
         let remaining_bytes = unsafe { in_end.byte_offset_from(in_ptr) };
-        assert!(
-            out_ptr < out_end || remaining_bytes == 0,
-            "output buffer sized too small"
-        );
 
         let remaining_bytes = remaining_bytes as usize;
 
         // Load the last `remaining_byte`s of data into a final world. We then replicate the loop above,
         // but shift data out of this word rather than advancing an input pointer and potentially reading
         // unowned memory.
         let mut bytes = [0u8; 8];
-        std::ptr::copy_nonoverlapping(in_ptr, bytes.as_mut_ptr(), remaining_bytes);
+        // SAFETY: we know that `remaining_bytes` <= 8.
+        unsafe { std::ptr::copy_nonoverlapping(in_ptr, bytes.as_mut_ptr(), remaining_bytes) };
         let mut last_word = u64::from_le_bytes(bytes);
 
-        while in_ptr < in_end && out_ptr < out_end {
+        while in_ptr < in_end {
             // Load a full 8-byte word of data from in_ptr.
-            // SAFETY: caller asserts in_ptr is not null. we may read past end of pointer though.
-            let (advance_in, advance_out) = self.compress_word(last_word, out_ptr);
-            in_ptr = in_ptr.byte_add(advance_in);
-            out_ptr = out_ptr.byte_add(advance_out);
+            // SAFETY: we check that the out_ptr is not more than 2 bytes from the end of the allocation.
+            let (advance_in, advance_out) =
+                unsafe { self.compress_word(last_word, &mut out_values[out_ptr..]) };
+            in_ptr = unsafe { in_ptr.byte_add(advance_in) };
+            // out_values = out_values.byte_add(advance_out);
+            out_ptr += advance_out;
 
             last_word = advance_8byte_word(last_word, advance_in);
         }
@@ -708,13 +713,9 @@ impl Compressor {
 
         // Count the number of bytes written
         // SAFETY: assertion
-        let bytes_written = out_ptr.offset_from(values.as_ptr());
-        assert!(
-            bytes_written >= 0,
-            "out_ptr ended before it started, not possible"
-        );
+        let bytes_written = out_ptr;
 
-        values.set_len(bytes_written as usize);
+        unsafe { values.set_len(bytes_written) };
     }
 
     /// Use the symbol table to compress the plaintext into a sequence of codes and escapes.