Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
285 changes: 278 additions & 7 deletions library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1089,6 +1089,22 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm
Iterator::cmp(left, right)
}

/// This is what the first component of our path is
///
/// In previous stable versions of Ancestors<'_>
/// the last component of relative paths produces
/// an "" at the end, so we must preserve that behavior
#[derive(Copy, Clone, Debug)]
enum FirstComponent {
/// For all paths starting with `/`
AbsolutePath,
/// For paths without root path like `.`, `..`, `a/`
RelativePath,
/// For Window specific paths like (`C:`, `\\?\UNC\server\share`,
/// `\\.\COM42`, etc.)
Prefix,
}

/// An iterator over [`Path`] and its ancestors.
///
/// This `struct` is created by the [`ancestors`] method on [`Path`].
Expand All @@ -1111,7 +1127,139 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[stable(feature = "path_ancestors", since = "1.28.0")]
pub struct Ancestors<'a> {
next: Option<&'a Path>,
path: &'a [u8],
front: usize,
back: usize,
trailing_seps: usize,
first_comp: Option<FirstComponent>,
}

impl<'a> Ancestors<'a> {
/// This is a helper function for consuming the physical first component in
/// either `Ancestors<'_>` `.next()` or `.next_back()`.
///
/// There are four cases we can have here:
/// - We have an unconsumed absolute component (`/`). We should just output `/`
/// in this case (with trailing separators if this is our original path).
/// - In previous implementations of `Ancestors<'_>`, our last component
/// produced is `""`. We also need to ensure that the first component of the
/// reverse ancestor returns `""` for symmetry as well.
/// - We have an unconsumed prefix component (Windows specific, e.g. `C:`).
/// We should just return that prefix component (with trailing separators
/// if this is our original path).
/// - We don't have a start component (frequent case), which means we just
/// return `None`.
#[inline]
fn consume_first_component(&mut self, dir_front: bool) -> Option<&'a Path> {
match self.first_comp {
Some(first_comp) => {
let sliced_path: &Path;
let path_len = self.path.len();
match first_comp {
FirstComponent::AbsolutePath => {
sliced_path = if dir_front {
self.advance_through_trailing_sep_front();
// This won't overflow because advance_through_trailing_sep_front()
// stops where `self.back` is at (and we know `self.back` max value is
// `path_len` - `self.trailing_seps`)
if self.front + self.trailing_seps == path_len {
// SAFETY: This contains the whole original path
unsafe { Path::from_u8_slice(&self.path[0..path_len]) }
} else {
Path::new("/")
}
} else {
if self.back + self.trailing_seps == path_len {
// SAFETY: This contains the whole original path
unsafe { Path::from_u8_slice(&self.path[0..path_len]) }
} else {
Path::new("/")
}
};
}
FirstComponent::RelativePath => {
sliced_path = Path::new("");
}
FirstComponent::Prefix => {
if dir_front {
let curr_front = self.front;
self.advance_through_trailing_sep_front();
// SAFETY: We either get the original path
// or slice at an ascii separator byte
sliced_path = unsafe {
if self.front + self.trailing_seps == path_len {
Path::from_u8_slice(&self.path[0..path_len])
} else {
Path::from_u8_slice(&self.path[0..curr_front])
}
};
} else {
sliced_path = unsafe {
if self.back + self.trailing_seps == path_len {
Path::from_u8_slice(&self.path[0..path_len])
} else {
Path::from_u8_slice(&self.path[0..self.back])
}
};
}
}
}
self.first_comp = None;
return Some(Path::new(sliced_path));
}
None => return None,
}
}

/// Skip any trailing separators in the forward direction
#[inline]
fn advance_through_trailing_sep_front(&mut self) {
loop {
if self.front == self.back || !is_sep_byte(self.path[self.front]) {
break;
}
self.front += 1;
}
}

/// Skip any trailing separators in the backward direction
#[inline]
fn advance_through_trailing_sep_back(&mut self) {
loop {
if self.back == self.front || !is_sep_byte(self.path[self.back - 1]) {
break;
}
self.back -= 1;
}
}

/// Increments our front pointer until we find the
/// next separator byte or have reached the component
/// that back index is pointing at
#[inline]
fn find_next_separator_front(&mut self) {
while self.front < self.back {
if is_sep_byte(self.path[self.front]) {
self.front += 1;
break;
}
self.front += 1;
}
}

/// Decrements our back pointer until we find the
/// next separator byte or have reached the component
/// that front index is pointing to
#[inline]
fn find_next_separator_back(&mut self) {
while self.back > self.front {
if is_sep_byte(self.path[self.back - 1]) {
self.back -= 1;
break;
}
self.back -= 1;
}
}
}

#[stable(feature = "path_ancestors", since = "1.28.0")]
Expand All @@ -1120,9 +1268,74 @@ impl<'a> Iterator for Ancestors<'a> {

#[inline]
fn next(&mut self) -> Option<Self::Item> {
let next = self.next;
self.next = next.and_then(Path::parent);
next
// We reach here when we no longer have anymore paths
// to consume, we're dealing with relative paths and
// need to output "", or we need to output Prefix component
if self.back <= self.front {
return self.consume_first_component(false);
}

let path_len = self.path.len();
// Our current `self.back` index at this point encompasses
// the parent path
let curr_back = self.back;

// We trace our `self.back` idx up the path until we reach a
// separator byte. This prepares the path we return on the next
// call to this function.
self.find_next_separator_back();
// Skip trailing seps
self.advance_through_trailing_sep_back();

// The first path our back pointer must return is the original path
if curr_back + self.trailing_seps == path_len {
// SAFETY: This contains the whole original path
let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..path_len]) };
return Some(Path::new(sliced_path));
}

// SAFETY: Our curr_back index is always stationed at an ascii separator byte
// so our u8 slice will always contain a valid path
let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..curr_back]) };
// We don't have to trim separator here because it's excluded by 0..curr_back
Some(Path::new(sliced_path))
}
}

#[stable(feature = "reverse_ancestors", since = "CURRENT_RUSTC_VERSION")]
impl<'a> DoubleEndedIterator for Ancestors<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
// We reach this case when we no longer have anymore paths
// to consume (return `None`), or if our front idx was initially
// equal to back idx (e.g. if we had `C:`, `.`, `/`)
if self.front >= self.back {
return self.consume_first_component(true);
}

// Consume our first component if we haven't already.
if let Some(sliced_path) = self.consume_first_component(true) {
return Some(sliced_path);
}

let path_len = self.path.len();
// We trace our `self.front` idx down the path until
// we hit a separator.
self.find_next_separator_front();
// Skip trailing seps
self.advance_through_trailing_sep_front();

// The last path front must return is the original path
if self.front + self.trailing_seps == path_len {
// SAFETY: This contains the whole original path
let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..path_len]) };
return Some(Path::new(sliced_path));
}

// SAFETY: Our front index always stops at an ascii separator byte
// so our u8 slice will always contain a valid path
let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..self.front]) };
Some(Path::new(sliced_path).trim_trailing_sep())
}
}

Expand Down Expand Up @@ -2616,11 +2829,13 @@ impl Path {

/// Produces an iterator over `Path` and its ancestors.
///
/// The iterator will yield the `Path` that is returned if the [`parent`] method is used zero
/// or more times. If the [`parent`] method returns [`None`], the iterator will do likewise.
/// The iterator will always yield at least one value, namely `Some(&self)`. Next it will yield
/// `&self.parent()`, `&self.parent().and_then(Path::parent)` and so on.
///
/// The iterator also allows you to yield `Path`(s) in the forward direction using
/// `.next_back()` or `.rev().next()`. It will always be symmetrical with the `.next()`
/// direction.
///
/// # Examples
///
/// ```
Expand All @@ -2640,11 +2855,67 @@ impl Path {
/// assert_eq!(ancestors.next(), None);
/// ```
///
/// ```
/// use std::path::Path;
///
/// let mut ancestors = Path::new("/foo/bar").ancestors();
/// assert_eq!(ancestors.next_back(), Some(Path::new("/")));
/// assert_eq!(ancestors.next_back(), Some(Path::new("/foo")));
/// assert_eq!(ancestors.next_back(), Some(Path::new("/foo/bar")));
/// assert_eq!(ancestors.next_back(), None);
///
/// let mut ancestors = Path::new("../foo/bar").ancestors();
/// assert_eq!(ancestors.next_back(), Some(Path::new("")));
/// assert_eq!(ancestors.next_back(), Some(Path::new("..")));
/// assert_eq!(ancestors.next_back(), Some(Path::new("../foo")));
/// assert_eq!(ancestors.next_back(), Some(Path::new("../foo/bar")));
/// assert_eq!(ancestors.next_back(), None);
/// ```
///
/// [`parent`]: Path::parent
#[stable(feature = "path_ancestors", since = "1.28.0")]
#[inline]
pub fn ancestors(&self) -> Ancestors<'_> {
Ancestors { next: Some(&self) }
let os_str_path = self.as_os_str();
let path_bytes = os_str_path.as_encoded_bytes();
let path_len = path_bytes.len();
let trailing_seps = if self.has_trailing_sep() {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a partial implementation of trim_trailing_sep, except without the handling for !self.has_root()...

I added an unresolved question to the tracking issue (#142503) on whether that's the right behavior for trim_trailing_sep/has_trailing_sep, but I think this is probably not what we want here...

// this won't panic because "" does not have
// a trailing separator
let mut idx = path_len;
while idx > 0 {
if !is_sep_byte(path_bytes[idx - 1]) {
break;
}
idx -= 1;
}
path_len - idx
} else {
0
};

// Windows specific component
let prefix = parse_prefix(os_str_path);
let prefix_exist = prefix.map(|_| true).unwrap_or(false);

// Parse what our start component, which is needed in cases where
// `self.front` == `self.back`, or we're dealing with symmetry with
// relative path on returning `""` at the start/end of an iterator
let first_comp = if prefix_exist {
Some(FirstComponent::Prefix)
} else if self.is_relative() {
Some(FirstComponent::RelativePath)
} else {
Some(FirstComponent::AbsolutePath)
};

// If we have a prefix, we encode that index into front
let front = prefix.map(|prefix| prefix.len()).unwrap_or(0);
// Set our back pointer to the last separator byte (without trailing)
// or last byte
let back = path_len - trailing_seps;

Ancestors { path: path_bytes, front, back, trailing_seps, first_comp }
}

/// Returns the final component of the `Path`, if there is one.
Expand Down
Loading
Loading