From 42e71bb8ea2bf59fb33cf3f83a3dcf34e314f520 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 11 Nov 2024 18:34:46 +0100 Subject: [PATCH] rustc_metadata: Preprocess search paths for better performance Over in Zed we've noticed that loading crates for a large-ish workspace can take almost 200ms. We've pinned it down to how rustc searches for paths, as it performs a linear search over the list of candidate paths. In our case the candidate list had about 20k entries which we had to iterate over for each dependency being loaded. This commit introduces a simple FilesIndex that's just a sorted Vec under the hood. Since crates are looked up by both prefix and suffix, we perform a range search on said Vec (which constraints the search space based on prefix) and follow up with a linear scan of entries with matching suffixes. FilesIndex is also pre-filtered before any queries are performed using available target information; query prefixes/sufixes are based on the target we are compiling for, so we can remove entries that can never match up front. Overall, this commit brings down build time for us in dev scenarios by about 6%. 100ms might not seem like much, but this is a constant cost that each of our workspace crates has to pay, even when said crate is miniscule. --- compiler/rustc_codegen_ssa/src/back/link.rs | 2 +- compiler/rustc_interface/src/passes.rs | 2 +- compiler/rustc_metadata/src/creader.rs | 3 +- compiler/rustc_metadata/src/locator.rs | 84 +++++++++++---------- compiler/rustc_metadata/src/native_libs.rs | 4 +- compiler/rustc_session/src/filesearch.rs | 39 ++++++---- compiler/rustc_session/src/search_paths.rs | 59 ++++++++++++--- compiler/rustc_session/src/session.rs | 20 +++-- 8 files changed, 137 insertions(+), 76 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index fc1f96481cf79..fd1126e852848 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -1647,7 +1647,7 @@ fn get_object_file_path(sess: &Session, name: &str, self_contained: bool) -> Pat return file_path; } } - for search_path in sess.target_filesearch(PathKind::Native).search_paths() { + for search_path in sess.target_filesearch().search_paths(PathKind::Native) { let file_path = search_path.dir.join(name); if file_path.exists() { return file_path; diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs index fd850d2f39a5f..7e629c1d18ff2 100644 --- a/compiler/rustc_interface/src/passes.rs +++ b/compiler/rustc_interface/src/passes.rs @@ -175,7 +175,7 @@ fn configure_and_expand( if cfg!(windows) { old_path = env::var_os("PATH").unwrap_or(old_path); let mut new_path = Vec::from_iter( - sess.host_filesearch(PathKind::All).search_paths().map(|p| p.dir.clone()), + sess.host_filesearch().search_paths(PathKind::All).map(|p| p.dir.clone()), ); for path in env::split_paths(&old_path) { if !new_path.contains(&path) { diff --git a/compiler/rustc_metadata/src/creader.rs b/compiler/rustc_metadata/src/creader.rs index e525d94a0c1e4..ca16a66763ac9 100644 --- a/compiler/rustc_metadata/src/creader.rs +++ b/compiler/rustc_metadata/src/creader.rs @@ -507,7 +507,8 @@ impl<'a, 'tcx> CrateLoader<'a, 'tcx> { locator.is_proc_macro = true; locator.target = &self.sess.host; locator.tuple = TargetTuple::from_tuple(config::host_tuple()); - locator.filesearch = self.sess.host_filesearch(path_kind); + locator.filesearch = self.sess.host_filesearch(); + locator.path_kind = path_kind; let Some(host_result) = self.load(locator)? else { return Ok(None); diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index ddd97fc66f66e..0b53e5eeaa83a 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -253,9 +253,10 @@ pub(crate) struct CrateLocator<'a> { extra_filename: Option<&'a str>, pub target: &'a Target, pub tuple: TargetTuple, - pub filesearch: FileSearch<'a>, + pub filesearch: &'a FileSearch, pub is_proc_macro: bool, + pub path_kind: PathKind, // Mutable in-progress state or output. crate_rejections: CrateRejections, } @@ -339,7 +340,8 @@ impl<'a> CrateLocator<'a> { extra_filename, target: &sess.target, tuple: sess.opts.target_triple.clone(), - filesearch: sess.target_filesearch(path_kind), + filesearch: sess.target_filesearch(), + path_kind, is_proc_macro: false, crate_rejections: CrateRejections::default(), } @@ -407,47 +409,49 @@ impl<'a> CrateLocator<'a> { // given that `extra_filename` comes from the `-C extra-filename` // option and thus can be anything, and the incorrect match will be // handled safely in `extract_one`. - for search_path in self.filesearch.search_paths() { + for search_path in self.filesearch.search_paths(self.path_kind) { debug!("searching {}", search_path.dir.display()); - for spf in search_path.files.iter() { - debug!("testing {}", spf.path.display()); + let spf = &search_path.files; - let f = &spf.file_name_str; - let (hash, kind) = if let Some(f) = f.strip_prefix(rlib_prefix) - && let Some(f) = f.strip_suffix(rlib_suffix) - { - (f, CrateFlavor::Rlib) - } else if let Some(f) = f.strip_prefix(rmeta_prefix) - && let Some(f) = f.strip_suffix(rmeta_suffix) - { - (f, CrateFlavor::Rmeta) - } else if let Some(f) = f.strip_prefix(dylib_prefix) - && let Some(f) = f.strip_suffix(dylib_suffix.as_ref()) - { - (f, CrateFlavor::Dylib) - } else { - if f.starts_with(staticlib_prefix) && f.ends_with(staticlib_suffix.as_ref()) { - self.crate_rejections.via_kind.push(CrateMismatch { - path: spf.path.clone(), - got: "static".to_string(), - }); - } - continue; - }; - - info!("lib candidate: {}", spf.path.display()); + let mut should_check_staticlibs = true; + for (prefix, suffix, kind) in [ + (rlib_prefix.as_str(), rlib_suffix, CrateFlavor::Rlib), + (rmeta_prefix.as_str(), rmeta_suffix, CrateFlavor::Rmeta), + (dylib_prefix, dylib_suffix, CrateFlavor::Dylib), + ] { + if prefix == staticlib_prefix && suffix == staticlib_suffix { + should_check_staticlibs = false; + } + if let Some(matches) = spf.query(prefix, suffix) { + for (hash, spf) in matches { + info!("lib candidate: {}", spf.path.display()); - let (rlibs, rmetas, dylibs) = candidates.entry(hash.to_string()).or_default(); - let path = try_canonicalize(&spf.path).unwrap_or_else(|_| spf.path.clone()); - if seen_paths.contains(&path) { - continue; - }; - seen_paths.insert(path.clone()); - match kind { - CrateFlavor::Rlib => rlibs.insert(path, search_path.kind), - CrateFlavor::Rmeta => rmetas.insert(path, search_path.kind), - CrateFlavor::Dylib => dylibs.insert(path, search_path.kind), - }; + let (rlibs, rmetas, dylibs) = + candidates.entry(hash.to_string()).or_default(); + let path = + try_canonicalize(&spf.path).unwrap_or_else(|_| spf.path.to_path_buf()); + if seen_paths.contains(&path) { + continue; + }; + seen_paths.insert(path.clone()); + match kind { + CrateFlavor::Rlib => rlibs.insert(path, search_path.kind), + CrateFlavor::Rmeta => rmetas.insert(path, search_path.kind), + CrateFlavor::Dylib => dylibs.insert(path, search_path.kind), + }; + } + } + } + if let Some(static_matches) = should_check_staticlibs + .then(|| spf.query(staticlib_prefix, staticlib_suffix)) + .flatten() + { + for (_, spf) in static_matches { + self.crate_rejections.via_kind.push(CrateMismatch { + path: spf.path.to_path_buf(), + got: "static".to_string(), + }); + } } } diff --git a/compiler/rustc_metadata/src/native_libs.rs b/compiler/rustc_metadata/src/native_libs.rs index b7695216f3cea..493db498b7c23 100644 --- a/compiler/rustc_metadata/src/native_libs.rs +++ b/compiler/rustc_metadata/src/native_libs.rs @@ -28,10 +28,10 @@ pub fn walk_native_lib_search_dirs( mut f: impl FnMut(&Path, bool /*is_framework*/) -> ControlFlow, ) -> ControlFlow { // Library search paths explicitly supplied by user (`-L` on the command line). - for search_path in sess.target_filesearch(PathKind::Native).cli_search_paths() { + for search_path in sess.target_filesearch().cli_search_paths(PathKind::Native) { f(&search_path.dir, false)?; } - for search_path in sess.target_filesearch(PathKind::Framework).cli_search_paths() { + for search_path in sess.target_filesearch().cli_search_paths(PathKind::Framework) { // Frameworks are looked up strictly in framework-specific paths. if search_path.kind != PathKind::All { f(&search_path.dir, true)?; diff --git a/compiler/rustc_session/src/filesearch.rs b/compiler/rustc_session/src/filesearch.rs index 213a94ab88091..4be013fd6fd9c 100644 --- a/compiler/rustc_session/src/filesearch.rs +++ b/compiler/rustc_session/src/filesearch.rs @@ -4,37 +4,44 @@ use std::path::{Path, PathBuf}; use std::{env, fs}; use rustc_fs_util::{fix_windows_verbatim_for_gcc, try_canonicalize}; +use rustc_target::spec::Target; use smallvec::{SmallVec, smallvec}; use crate::search_paths::{PathKind, SearchPath}; #[derive(Clone)] -pub struct FileSearch<'a> { - cli_search_paths: &'a [SearchPath], - tlib_path: &'a SearchPath, - kind: PathKind, +pub struct FileSearch { + cli_search_paths: Vec, + tlib_path: SearchPath, } -impl<'a> FileSearch<'a> { - pub fn cli_search_paths(&self) -> impl Iterator { - let kind = self.kind; +impl FileSearch { + pub fn cli_search_paths<'b>(&'b self, kind: PathKind) -> impl Iterator { self.cli_search_paths.iter().filter(move |sp| sp.kind.matches(kind)) } - pub fn search_paths(&self) -> impl Iterator { - let kind = self.kind; + pub fn search_paths<'b>(&'b self, kind: PathKind) -> impl Iterator { self.cli_search_paths .iter() .filter(move |sp| sp.kind.matches(kind)) - .chain(std::iter::once(self.tlib_path)) + .chain(std::iter::once(&self.tlib_path)) } - pub fn new( - cli_search_paths: &'a [SearchPath], - tlib_path: &'a SearchPath, - kind: PathKind, - ) -> FileSearch<'a> { - FileSearch { cli_search_paths, tlib_path, kind } + pub fn new(cli_search_paths: &[SearchPath], tlib_path: &SearchPath, target: &Target) -> Self { + let this = FileSearch { + cli_search_paths: cli_search_paths.to_owned(), + tlib_path: tlib_path.clone(), + }; + this.refine(&["lib", &target.staticlib_prefix, &target.dll_prefix]) + } + // Produce a new file search from this search that has a smaller set of candidates. + fn refine(mut self, allowed_prefixes: &[&str]) -> FileSearch { + self.cli_search_paths + .iter_mut() + .for_each(|search_paths| search_paths.files.retain(allowed_prefixes)); + self.tlib_path.files.retain(allowed_prefixes); + + self } } diff --git a/compiler/rustc_session/src/search_paths.rs b/compiler/rustc_session/src/search_paths.rs index c148b09c718a0..78473fccd2de0 100644 --- a/compiler/rustc_session/src/search_paths.rs +++ b/compiler/rustc_session/src/search_paths.rs @@ -1,4 +1,5 @@ use std::path::{Path, PathBuf}; +use std::sync::Arc; use rustc_macros::{Decodable, Encodable, HashStable_Generic}; use rustc_target::spec::TargetTuple; @@ -10,9 +11,44 @@ use crate::filesearch::make_target_lib_path; pub struct SearchPath { pub kind: PathKind, pub dir: PathBuf, - pub files: Vec, + pub files: FilesIndex, } +/// [FilesIndex] contains paths that can be efficiently looked up with (prefix, suffix) pairs. +#[derive(Clone, Debug)] +pub struct FilesIndex(Vec<(Arc, SearchPathFile)>); + +impl FilesIndex { + /// Look up [SearchPathFile] by (prefix, suffix) pair. + pub fn query<'this, 'prefix, 'suffix>( + &'this self, + prefix: &'prefix str, + suffix: &'suffix str, + ) -> Option + use<'this, 'prefix, 'suffix>> + { + let start = self.0.partition_point(|(k, _)| **k < *prefix); + if start == self.0.len() { + return None; + } + let end = self.0[start..].partition_point(|(k, _)| k.starts_with(prefix)); + let prefixed_items = &self.0[start..][..end]; + + let ret = prefixed_items.into_iter().filter_map(move |(k, v)| { + k.ends_with(suffix).then(|| { + ( + String::from( + &v.file_name_str[prefix.len()..v.file_name_str.len() - suffix.len()], + ), + v, + ) + }) + }); + Some(ret) + } + pub fn retain(&mut self, prefixes: &[&str]) { + self.0.retain(|(k, _)| prefixes.iter().any(|prefix| k.starts_with(prefix))); + } +} /// The obvious implementation of `SearchPath::files` is a `Vec`. But /// it is searched repeatedly by `find_library_crate`, and the searches involve /// checking the prefix and suffix of the filename of each `PathBuf`. This is @@ -26,8 +62,8 @@ pub struct SearchPath { /// UTF-8, and so a non-UTF-8 filename couldn't be one we're looking for.) #[derive(Clone, Debug)] pub struct SearchPathFile { - pub path: PathBuf, - pub file_name_str: String, + pub path: Arc, + pub file_name_str: Arc, } #[derive(PartialEq, Clone, Copy, Debug, Hash, Eq, Encodable, Decodable, HashStable_Generic)] @@ -98,20 +134,25 @@ impl SearchPath { pub fn new(kind: PathKind, dir: PathBuf) -> Self { // Get the files within the directory. - let files = match std::fs::read_dir(&dir) { + let mut files = match std::fs::read_dir(&dir) { Ok(files) => files .filter_map(|e| { e.ok().and_then(|e| { - e.file_name().to_str().map(|s| SearchPathFile { - path: e.path(), - file_name_str: s.to_string(), + e.file_name().to_str().map(|s| { + let file_name_str: Arc = s.into(); + (Arc::clone(&file_name_str), SearchPathFile { + path: e.path().into(), + file_name_str, + }) }) }) }) .collect::>(), - Err(..) => vec![], - }; + Err(..) => Default::default(), + }; + files.sort_by(|(lhs, _), (rhs, _)| lhs.cmp(rhs)); + let files = FilesIndex(files); SearchPath { kind, dir, files } } } diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index d8d6b79974fe1..29fabdd1deb8d 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -44,8 +44,9 @@ use crate::config::{ InstrumentCoverage, OptLevel, OutFileName, OutputType, RemapPathScopeComponents, SwitchWithOptPath, }; +use crate::filesearch::FileSearch; use crate::parse::{ParseSess, add_feature_diagnostics}; -use crate::search_paths::{PathKind, SearchPath}; +use crate::search_paths::SearchPath; use crate::{errors, filesearch, lint}; struct OptimizationFuel { @@ -218,6 +219,9 @@ pub struct Session { /// This is mainly useful for other tools that reads that debuginfo to figure out /// how to call the compiler with the same arguments. pub expanded_args: Vec, + + target_filesearch: FileSearch, + host_filesearch: FileSearch, } #[derive(PartialEq, Eq, PartialOrd, Ord)] @@ -443,11 +447,11 @@ impl Session { format!("__rustc_proc_macro_decls_{:08x}__", stable_crate_id.as_u64()) } - pub fn target_filesearch(&self, kind: PathKind) -> filesearch::FileSearch<'_> { - filesearch::FileSearch::new(&self.opts.search_paths, &self.target_tlib_path, kind) + pub fn target_filesearch(&self) -> &filesearch::FileSearch { + &self.target_filesearch } - pub fn host_filesearch(&self, kind: PathKind) -> filesearch::FileSearch<'_> { - filesearch::FileSearch::new(&self.opts.search_paths, &self.host_tlib_path, kind) + pub fn host_filesearch(&self) -> &filesearch::FileSearch { + &self.host_filesearch } /// Returns a list of directories where target-specific tool binaries are located. Some fallback @@ -1111,7 +1115,9 @@ pub fn build_session( }); let asm_arch = if target.allow_asm { InlineAsmArch::from_str(&target.arch).ok() } else { None }; - + let target_filesearch = + filesearch::FileSearch::new(&sopts.search_paths, &target_tlib_path, &target); + let host_filesearch = filesearch::FileSearch::new(&sopts.search_paths, &host_tlib_path, &host); let sess = Session { target, host, @@ -1138,6 +1144,8 @@ pub fn build_session( cfg_version, using_internal_features, expanded_args, + target_filesearch, + host_filesearch, }; validate_commandline_args_with_session_available(&sess);