From eea18ba56da028363c947f6c4d2ae09178baa43a Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Sat, 27 Apr 2024 13:50:26 +0200 Subject: [PATCH 1/4] Cache file names in fill_todo Background: While working with cargo, I've noticed that it takes ~30s to cargo clean -p with large enough target directory (~200GB). With a profiler, it turned out that most of the time was spent retrieving paths for removal in https://github.com/rust-lang/cargo/blob/eee4ea2f5a5fa1ae184a44675315548ec932a15c/src/cargo/ops/cargo_clean.rs#L319 (and not actually removing the files). Change description: In call to .sort_by, we repetitively parse the paths to obtain file names for comparison. This commit caches file names in PathWrapper object, akin to #135 that did so for dir info. For my use case, a cargo build using that branch takes ~14s to clean files instead of previous 30s (I've measured against main branch of this directory, to account for changes made since 0.3.1). Still not ideal, but hey, we're shaving 50% of time off for a bit heavier memory use. --- src/lib.rs | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8428a9a..a21e7c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -72,6 +72,7 @@ doctest!("../README.md"); use std::cmp; use std::cmp::Ordering; use std::error::Error; +use std::ffi::OsString; use std::fmt; use std::fs; use std::fs::DirEntry; @@ -329,10 +330,11 @@ impl fmt::Display for GlobError { struct PathWrapper { path: PathBuf, is_directory: bool, + file_name: Option, } impl PathWrapper { - fn from_dir_entry(path: PathBuf, e: DirEntry) -> Self { + fn from_dir_entry(path: PathBuf, file_name: OsString, e: DirEntry) -> Self { let is_directory = e .file_type() .ok() @@ -347,11 +349,20 @@ impl PathWrapper { }) .or_else(|| fs::metadata(&path).map(|m| m.is_dir()).ok()) .unwrap_or(false); - Self { path, is_directory } + Self { + path, + is_directory, + file_name: Some(file_name), + } } fn from_path(path: PathBuf) -> Self { let is_directory = fs::metadata(&path).map(|m| m.is_dir()).unwrap_or(false); - Self { path, is_directory } + let file_name = path.file_name().map(ToOwned::to_owned); + Self { + path, + is_directory, + file_name, + } } fn into_path(self) -> PathBuf { @@ -930,12 +941,16 @@ fn fill_todo( let dirs = fs::read_dir(path).and_then(|d| { d.map(|e| { e.map(|e| { - let path = if curdir { - PathBuf::from(e.path().file_name().unwrap()) + let (path, file_name) = if curdir { + let path = e.path(); + let file_name = path.file_name().unwrap(); + (PathBuf::from(file_name), file_name.to_owned()) } else { - e.path() + let path = e.path(); + let file_name = path.file_name().unwrap().to_owned(); + (path, file_name) }; - PathWrapper::from_dir_entry(path, e) + PathWrapper::from_dir_entry(path, file_name, e) }) }) .collect::, _>>() @@ -946,7 +961,7 @@ fn fill_todo( children .retain(|x| !x.file_name().unwrap().to_str().unwrap().starts_with('.')); } - children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name())); + children.sort_by(|p1, p2| p2.file_name.cmp(&p1.file_name)); todo.extend(children.into_iter().map(|x| Ok((x, idx)))); // Matching the special directory entries . and .. that From b7dbb48ee893ec685449d117f79c50bae970bbe4 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 23 Dec 2024 12:59:50 +0100 Subject: [PATCH 2/4] review: Remove extraenous .unwrap call --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a21e7c6..8415694 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -334,7 +334,7 @@ struct PathWrapper { } impl PathWrapper { - fn from_dir_entry(path: PathBuf, file_name: OsString, e: DirEntry) -> Self { + fn from_dir_entry(path: PathBuf, file_name: Option, e: DirEntry) -> Self { let is_directory = e .file_type() .ok() @@ -352,7 +352,7 @@ impl PathWrapper { Self { path, is_directory, - file_name: Some(file_name), + file_name, } } fn from_path(path: PathBuf) -> Self { @@ -944,10 +944,10 @@ fn fill_todo( let (path, file_name) = if curdir { let path = e.path(); let file_name = path.file_name().unwrap(); - (PathBuf::from(file_name), file_name.to_owned()) + (PathBuf::from(file_name), Some(file_name.to_owned())) } else { let path = e.path(); - let file_name = path.file_name().unwrap().to_owned(); + let file_name = path.file_name().map(ToOwned::to_owned); (path, file_name) }; PathWrapper::from_dir_entry(path, file_name, e) From 778365def42fe56d0d6b5afa8c9d382132d23a2d Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:11:28 +0100 Subject: [PATCH 3/4] Use Box instead of OsString for filename storage --- src/lib.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8415694..02f7321 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -72,7 +72,7 @@ doctest!("../README.md"); use std::cmp; use std::cmp::Ordering; use std::error::Error; -use std::ffi::OsString; +use std::ffi::OsStr; use std::fmt; use std::fs; use std::fs::DirEntry; @@ -330,11 +330,11 @@ impl fmt::Display for GlobError { struct PathWrapper { path: PathBuf, is_directory: bool, - file_name: Option, + file_name: Option>, } impl PathWrapper { - fn from_dir_entry(path: PathBuf, file_name: Option, e: DirEntry) -> Self { + fn from_dir_entry(path: PathBuf, file_name: Option>, e: DirEntry) -> Self { let is_directory = e .file_type() .ok() @@ -357,7 +357,7 @@ impl PathWrapper { } fn from_path(path: PathBuf) -> Self { let is_directory = fs::metadata(&path).map(|m| m.is_dir()).unwrap_or(false); - let file_name = path.file_name().map(ToOwned::to_owned); + let file_name = path.file_name().map(Box::from); Self { path, is_directory, @@ -944,10 +944,10 @@ fn fill_todo( let (path, file_name) = if curdir { let path = e.path(); let file_name = path.file_name().unwrap(); - (PathBuf::from(file_name), Some(file_name.to_owned())) + (PathBuf::from(file_name), Some(Box::from(file_name))) } else { let path = e.path(); - let file_name = path.file_name().map(ToOwned::to_owned); + let file_name = path.file_name().map(Box::from); (path, file_name) }; PathWrapper::from_dir_entry(path, file_name, e) From 3b0c35a4a6d74a76645000add089f341abff7ed4 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Sat, 4 Jan 2025 16:02:52 +0100 Subject: [PATCH 4/4] Use Box instead of PathBuf --- src/lib.rs | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 02f7321..9222daa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -200,13 +200,13 @@ pub fn glob_with(pattern: &str, options: MatchOptions) -> Result PathBuf { + fn to_scope(p: &Path) -> Box { // FIXME handle volume relative paths here - p.to_path_buf() + p.into() } #[cfg(not(windows))] - fn to_scope(p: &Path) -> PathBuf { - p.to_path_buf() + fn to_scope(p: &Path) -> Box { + p.into() } // make sure that the pattern is valid first, else early return with error @@ -243,7 +243,7 @@ pub fn glob_with(pattern: &str, options: MatchOptions) -> Result, is_directory: bool, file_name: Option>, } impl PathWrapper { - fn from_dir_entry(path: PathBuf, file_name: Option>, e: DirEntry) -> Self { + fn from_dir_entry(path: Box, file_name: Option>, e: DirEntry) -> Self { let is_directory = e .file_type() .ok() @@ -355,7 +355,7 @@ impl PathWrapper { file_name, } } - fn from_path(path: PathBuf) -> Self { + fn from_path(path: Box) -> Self { let is_directory = fs::metadata(&path).map(|m| m.is_dir()).unwrap_or(false); let file_name = path.file_name().map(Box::from); Self { @@ -366,7 +366,7 @@ impl PathWrapper { } fn into_path(self) -> PathBuf { - self.path + self.path.into_path_buf() } } @@ -928,7 +928,7 @@ fn fill_todo( } else { path.join(&s) }; - let next_path = PathWrapper::from_path(next_path); + let next_path = PathWrapper::from_path(next_path.into_boxed_path()); if (special && is_dir) || (!special && (fs::metadata(&next_path).is_ok() @@ -944,9 +944,9 @@ fn fill_todo( let (path, file_name) = if curdir { let path = e.path(); let file_name = path.file_name().unwrap(); - (PathBuf::from(file_name), Some(Box::from(file_name))) + (Box::from(file_name.as_ref()), Some(Box::from(file_name))) } else { - let path = e.path(); + let path = e.path().into_boxed_path(); let file_name = path.file_name().map(Box::from); (path, file_name) }; @@ -972,7 +972,10 @@ fn fill_todo( if !pattern.tokens.is_empty() && pattern.tokens[0] == Char('.') { for &special in &[".", ".."] { if pattern.matches_with(special, options) { - add(todo, PathWrapper::from_path(path.join(special))); + add( + todo, + PathWrapper::from_path(path.join(special).into_boxed_path()), + ); } } }