Skip to content

Add BufRead::skip_until #98943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions library/std/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1923,6 +1923,28 @@ fn read_until<R: BufRead + ?Sized>(r: &mut R, delim: u8, buf: &mut Vec<u8>) -> R
}
}

fn skip_until<R: BufRead + ?Sized>(r: &mut R, delim: u8) -> Result<usize> {
let mut read = 0;
loop {
let (done, used) = {
let available = match r.fill_buf() {
Ok(n) => n,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
match memchr::memchr(delim, available) {
Some(i) => (true, i + 1),
None => (false, available.len()),
}
};
r.consume(used);
read += used;
if done || used == 0 {
return Ok(read);
}
}
}

/// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it
/// to perform extra ways of reading.
///
Expand Down Expand Up @@ -2126,6 +2148,68 @@ pub trait BufRead: Read {
read_until(self, byte, buf)
}

/// Skip all bytes until the delimiter `byte` or EOF is reached.
///
/// This function will read (and discard) bytes from the underlying stream until the
/// delimiter or EOF is found.
///
/// If successful, this function will return the total number of bytes read,
/// including the delimiter byte.
///
/// This is useful for efficiently skipping data such as NUL-terminated strings
/// in binary file formats without buffering.
///
/// This function is blocking and should be used carefully: it is possible for
/// an attacker to continuously send bytes without ever sending the delimiter
/// or EOF.
///
/// # Errors
///
/// This function will ignore all instances of [`ErrorKind::Interrupted`] and
/// will otherwise return any errors returned by [`fill_buf`].
///
/// If an I/O error is encountered then all bytes read so far will be
/// present in `buf` and its length will have been adjusted appropriately.
///
/// [`fill_buf`]: BufRead::fill_buf
///
/// # Examples
///
/// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
/// this example, we use [`Cursor`] to read some NUL-terminated information
/// about Ferris from a binary string, skipping the fun fact:
///
/// ```
/// #![feature(bufread_skip_until)]
///
/// use std::io::{self, BufRead};
///
/// let mut cursor = io::Cursor::new(b"Ferris\0Likes long walks on the beach\0Crustacean\0");
///
/// // read name
/// let mut name = Vec::new();
/// let num_bytes = cursor.read_until(b'\0', &mut name)
/// .expect("reading from cursor won't fail");
/// assert_eq!(num_bytes, 7);
/// assert_eq!(name, b"Ferris\0");
///
/// // skip fun fact
/// let num_bytes = cursor.skip_until(b'\0')
/// .expect("reading from cursor won't fail");
/// assert_eq!(num_bytes, 30);
///
/// // read animal type
/// let mut animal = Vec::new();
/// let num_bytes = cursor.read_until(b'\0', &mut animal)
/// .expect("reading from cursor won't fail");
/// assert_eq!(num_bytes, 11);
/// assert_eq!(animal, b"Crustacean\0");
/// ```
#[unstable(feature = "bufread_skip_until", issue = "111735")]
fn skip_until(&mut self, byte: u8) -> Result<usize> {
skip_until(self, byte)
}

/// Read all bytes until a newline (the `0xA` byte) is reached, and append
/// them to the provided buffer. You do not need to clear the buffer before
/// appending.
Expand Down
30 changes: 30 additions & 0 deletions library/std/src/io/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,36 @@ fn read_until() {
assert_eq!(v, []);
}

#[test]
fn skip_until() {
let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore\0";
let mut reader = BufReader::new(bytes);

// read from the bytes, alternating between
// consuming `read\0`s and skipping `ignore\0`s
loop {
// consume `read\0`
let mut out = Vec::new();
let read = reader.read_until(0, &mut out).unwrap();
if read == 0 {
// eof
break;
} else {
assert_eq!(out, b"read\0");
assert_eq!(read, b"read\0".len());
}

// skip past `ignore\0`
let skipped = reader.skip_until(0).unwrap();
assert_eq!(skipped, b"ignore\0".len());
}

// ensure we are at the end of the byte slice and that we can skip no further
// also ensure skip_until matches the behavior of read_until at EOF
let skipped = reader.skip_until(0).unwrap();
assert_eq!(skipped, 0);
}

#[test]
fn split() {
let buf = Cursor::new(&b"12"[..]);
Expand Down