Skip to content

Commit ad2dc06

Browse files
committed
Add normalization for current directory components (trailing and non-trailing) in Ancestors<'_>
1 parent ef86174 commit ad2dc06

2 files changed

Lines changed: 224 additions & 19 deletions

File tree

library/std/src/path.rs

Lines changed: 148 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,31 +1211,115 @@ impl<'a> Ancestors<'a> {
12111211
}
12121212
}
12131213

1214-
/// Skip any trailing separators in the forward direction
1214+
/// Normalizes away trailing separators and current directory ('.') components
1215+
/// in the forward direction.
12151216
#[inline]
12161217
fn advance_through_trailing_sep_front(&mut self) {
1218+
// `Some(false)` is used to denote that
1219+
// we haven't seen a '.' component *yet*,
1220+
// `Some(true)` means we have seen a '.' component,
1221+
// and `None` means that the component is not '.'
1222+
let mut curr_dir = Some(false);
1223+
// We rebound to the original index for path components
1224+
// like '..' or 'abc.'
1225+
let mut rebound_ind: Option<usize> = None;
12171226
loop {
1218-
if self.front == self.back || !is_sep_byte(self.path[self.front]) {
1227+
if self.front == self.back {
1228+
if let Some(front_ind) = rebound_ind {
1229+
self.front = front_ind;
1230+
}
12191231
break;
12201232
}
1233+
1234+
if is_sep_byte(self.path[self.front]) {
1235+
if let Some(curr_dir_present) = curr_dir
1236+
&& curr_dir_present
1237+
{
1238+
curr_dir = Some(false);
1239+
rebound_ind = None;
1240+
}
1241+
} else {
1242+
if self.path[self.front] == b'.' {
1243+
if let Some(curr_dir_present) = curr_dir {
1244+
if !curr_dir_present {
1245+
curr_dir = Some(true);
1246+
} else {
1247+
curr_dir = None;
1248+
}
1249+
} else {
1250+
if let Some(front_ind) = rebound_ind {
1251+
self.front = front_ind;
1252+
}
1253+
break;
1254+
}
1255+
} else {
1256+
if let Some(front_ind) = rebound_ind {
1257+
self.front = front_ind;
1258+
}
1259+
break;
1260+
}
1261+
}
1262+
12211263
self.front += 1;
12221264
}
12231265
}
12241266

1225-
/// Skip any trailing separators in the backward direction
1267+
/// Normalizes away trailing separators and current directory ('.') components
1268+
/// in the backward direction
12261269
#[inline]
12271270
fn advance_through_trailing_sep_back(&mut self) {
1271+
// `Some(false)` is used to denote that
1272+
// we haven't seen a '.' component *yet*,
1273+
// `Some(true)` means we have seen a '.' component,
1274+
// and `None` means that the component is not '.'
1275+
let mut curr_dir = Some(false);
1276+
// We rebound to the original index for path components
1277+
// like '..' or 'abc.'
1278+
let mut rebound_ind: Option<usize> = None;
12281279
loop {
1229-
if self.back == self.front || !is_sep_byte(self.path[self.back - 1]) {
1280+
if self.back == self.front {
1281+
if let Some(back_ind) = rebound_ind {
1282+
self.back = back_ind;
1283+
}
12301284
break;
12311285
}
1286+
1287+
if is_sep_byte(self.path[self.back - 1]) {
1288+
if let Some(curr_dir_present) = curr_dir
1289+
&& curr_dir_present
1290+
{
1291+
curr_dir = Some(false);
1292+
rebound_ind = None;
1293+
}
1294+
} else {
1295+
if self.path[self.back - 1] == b'.' {
1296+
if let Some(curr_dir_present) = curr_dir {
1297+
if !curr_dir_present {
1298+
curr_dir = Some(true);
1299+
rebound_ind = Some(self.back);
1300+
} else {
1301+
curr_dir = None;
1302+
}
1303+
} else {
1304+
if let Some(back_ind) = rebound_ind {
1305+
self.back = back_ind;
1306+
}
1307+
break;
1308+
}
1309+
} else {
1310+
if let Some(back_ind) = rebound_ind {
1311+
self.back = back_ind;
1312+
}
1313+
break;
1314+
}
1315+
}
12321316
self.back -= 1;
12331317
}
12341318
}
12351319

12361320
/// Increments our front pointer until we find the
12371321
/// next separator byte or have reached the component
1238-
/// that back index is pointing at
1322+
/// that back index is pointing at.
12391323
#[inline]
12401324
fn find_next_separator_front(&mut self) {
12411325
while self.front < self.back {
@@ -1249,7 +1333,7 @@ impl<'a> Ancestors<'a> {
12491333

12501334
/// Decrements our back pointer until we find the
12511335
/// next separator byte or have reached the component
1252-
/// that front index is pointing to
1336+
/// that front index is pointing to.
12531337
#[inline]
12541338
fn find_next_separator_back(&mut self) {
12551339
while self.back > self.front {
@@ -1284,7 +1368,8 @@ impl<'a> Iterator for Ancestors<'a> {
12841368
// separator byte. This prepares the path we return on the next
12851369
// call to this function.
12861370
self.find_next_separator_back();
1287-
// Skip trailing seps
1371+
// Normalizes trailing seps and curr dirs in preparation for
1372+
// next front component
12881373
self.advance_through_trailing_sep_back();
12891374

12901375
// The first path our back pointer must return is the original path
@@ -1322,7 +1407,11 @@ impl<'a> DoubleEndedIterator for Ancestors<'a> {
13221407
// We trace our `self.front` idx down the path until
13231408
// we hit a separator.
13241409
self.find_next_separator_front();
1325-
// Skip trailing seps
1410+
// In case paths like "././././a", we just want the first
1411+
// '.' path and normalize the rest away
1412+
let curr_front = self.front;
1413+
// Normalizes trailing seps and curr dirs in preparation for
1414+
// next front component
13261415
self.advance_through_trailing_sep_front();
13271416

13281417
// The last path front must return is the original path
@@ -1334,7 +1423,7 @@ impl<'a> DoubleEndedIterator for Ancestors<'a> {
13341423

13351424
// SAFETY: Our front index always stops at an ascii separator byte
13361425
// so our u8 slice will always contain a valid path
1337-
let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..self.front]) };
1426+
let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..curr_front]) };
13381427
Some(Path::new(sliced_path).trim_trailing_sep())
13391428
}
13401429
}
@@ -2876,23 +2965,63 @@ impl Path {
28762965
#[stable(feature = "path_ancestors", since = "1.28.0")]
28772966
#[inline]
28782967
pub fn ancestors(&self) -> Ancestors<'_> {
2879-
let os_str_path = self.as_os_str();
2880-
let path_bytes = os_str_path.as_encoded_bytes();
2881-
let path_len = path_bytes.len();
2882-
let trailing_seps = if self.has_trailing_sep() {
2968+
/// Normalizes the trailing portion of given path
2969+
/// and returns the number of bytes that it occupied
2970+
#[inline]
2971+
fn trailing_path_length(path_bytes: &[u8]) -> usize {
2972+
let path_len = path_bytes.len();
28832973
// this won't panic because "" does not have
28842974
// a trailing separator
28852975
let mut idx = path_len;
2976+
2977+
// `Some(false)` is used to denote that
2978+
// we haven't seen a '.' component *yet*,
2979+
// `Some(true)` means we have seen a '.' component,
2980+
// and `None` means that the component is not '.'
2981+
let mut curr_dir = false;
2982+
// We rebound to the original index for path components
2983+
// like '..' or 'abc.'
2984+
let mut rebound_idx: Option<usize> = None;
28862985
while idx > 0 {
2887-
if !is_sep_byte(path_bytes[idx - 1]) {
2888-
break;
2986+
if is_sep_byte(path_bytes[idx - 1]) {
2987+
if curr_dir {
2988+
rebound_idx = None;
2989+
curr_dir = false;
2990+
}
2991+
} else {
2992+
if path_bytes[idx - 1] == b'.' {
2993+
if !curr_dir {
2994+
rebound_idx = Some(idx);
2995+
curr_dir = true;
2996+
} else {
2997+
if let Some(r_idx) = rebound_idx {
2998+
curr_dir = false;
2999+
idx = r_idx;
3000+
}
3001+
break;
3002+
}
3003+
} else {
3004+
if let Some(r_idx) = rebound_idx {
3005+
curr_dir = false;
3006+
idx = r_idx;
3007+
}
3008+
break;
3009+
}
28893010
}
28903011
idx -= 1;
28913012
}
3013+
3014+
// If our path is `./a/b/c`, this `.` is not normalized
3015+
// away because it's treated as its own component
3016+
if curr_dir {
3017+
idx += 1;
3018+
}
28923019
path_len - idx
2893-
} else {
2894-
0
2895-
};
3020+
}
3021+
3022+
let os_str_path = self.as_os_str();
3023+
let path_bytes = os_str_path.as_encoded_bytes();
3024+
let trailing_seps = trailing_path_length(path_bytes);
28963025

28973026
// Windows specific component
28983027
let prefix = parse_prefix(os_str_path);
@@ -2913,7 +3042,7 @@ impl Path {
29133042
let front = prefix.map(|prefix| prefix.len()).unwrap_or(0);
29143043
// Set our back pointer to the last separator byte (without trailing)
29153044
// or last byte
2916-
let back = path_len - trailing_seps;
3045+
let back = path_bytes.len() - trailing_seps;
29173046

29183047
Ancestors { path: path_bytes, front, back, trailing_seps, first_comp }
29193048
}

library/std/tests/path_ancestors.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,26 @@ fn curr_dir_relative_path_ancestors() {
124124
assert_eq!(rev_ancestors.next_back(), None);
125125
}
126126

127+
#[test]
128+
fn multiple_curr_dir_relative_path_ancestors() {
129+
let path = Path::new("././././baz/beam/boo");
130+
let mut ancestors = path.ancestors();
131+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("././././baz/beam/boo"));
132+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("././././baz/beam"));
133+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("././././baz"));
134+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("."));
135+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(""));
136+
assert_eq!(ancestors.next(), None);
137+
138+
let mut rev_ancestors = path.ancestors();
139+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new(""));
140+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("."));
141+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("././././baz"));
142+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("././././baz/beam"));
143+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("././././baz/beam/boo"));
144+
assert_eq!(rev_ancestors.next_back(), None);
145+
}
146+
127147
#[test]
128148
fn parent_dir_only_path_ancestors() {
129149
let path = Path::new("..");
@@ -228,6 +248,30 @@ fn absolute_path_ancestors() {
228248
assert_eq!(rev_ancestors.next_back(), None);
229249
}
230250

251+
#[test]
252+
fn absolute_path_with_curr_dir_path_ancestors() {
253+
let path = Path::new("/.");
254+
let mut ancestors = path.ancestors();
255+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/."));
256+
assert_eq!(ancestors.next(), None);
257+
258+
let mut rev_ancestors = path.ancestors();
259+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/."));
260+
assert_eq!(rev_ancestors.next_back(), None);
261+
}
262+
263+
#[test]
264+
fn absolute_path_with_trailing_curr_dir_path_ancestors() {
265+
let path = Path::new("/./././././.");
266+
let mut ancestors = path.ancestors();
267+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/./././././."));
268+
assert_eq!(ancestors.next(), None);
269+
270+
let mut rev_ancestors = path.ancestors();
271+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/./././././."));
272+
assert_eq!(rev_ancestors.next_back(), None);
273+
}
274+
231275
#[test]
232276
fn absolute_with_in_between_trailing_seps_path_ancestors() {
233277
let path = Path::new("/foo/////bar/");
@@ -244,6 +288,38 @@ fn absolute_with_in_between_trailing_seps_path_ancestors() {
244288
assert_eq!(rev_ancestors.next_back(), None);
245289
}
246290

291+
#[test]
292+
fn absolute_curr_dir_and_trailing_seps_path_ancestors() {
293+
let path = Path::new("/foo/bar/./././.");
294+
let mut ancestors = path.ancestors();
295+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo/bar/./././."));
296+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo"));
297+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/"));
298+
assert_eq!(ancestors.next(), None);
299+
300+
let mut rev_ancestors = path.ancestors();
301+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/"));
302+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo"));
303+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo/bar/./././."));
304+
assert_eq!(rev_ancestors.next_back(), None);
305+
}
306+
307+
#[test]
308+
fn absolute_curr_dir_and_in_between_trailing_seps_path_ancestors() {
309+
let path = Path::new("/foo////.////bar");
310+
let mut ancestors = path.ancestors();
311+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo////.////bar"));
312+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo"));
313+
assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/"));
314+
assert_eq!(ancestors.next(), None);
315+
316+
let mut rev_ancestors = path.ancestors();
317+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/"));
318+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo"));
319+
assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo////.////bar"));
320+
assert_eq!(rev_ancestors.next_back(), None);
321+
}
322+
247323
#[test]
248324
fn absolute_rev_path_ancestors() {
249325
let path = Path::new("/foo/bar/baz/");

0 commit comments

Comments
 (0)