Skip to content

Optimize workspace serach for .venv and .conda #81

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions crates/pet-core/src/python_environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ pub struct PythonEnvironment {
// Some of the known symlinks for the environment.
// E.g. in the case of Homebrew there are a number of symlinks that are created.
pub symlinks: Option<Vec<PathBuf>>,
/// The folder/path that was searched to find this environment.
/// Generally applies to workspace folder, and means that the environment was found in this folder or is related to this folder.
/// Similar in meaqning to `project` but is more of a search path.
pub search_path: Option<PathBuf>,
}

impl Ord for PythonEnvironment {
Expand Down Expand Up @@ -109,6 +113,7 @@ impl Default for PythonEnvironment {
project: None,
arch: None,
symlinks: None,
search_path: None,
}
}
}
Expand Down Expand Up @@ -159,6 +164,9 @@ impl std::fmt::Display for PythonEnvironment {
if let Some(project) = &self.project {
writeln!(f, " Project : {}", project.to_str().unwrap()).unwrap_or_default();
}
if let Some(search_path) = &self.search_path {
writeln!(f, " Search Path : {}", search_path.to_str().unwrap()).unwrap_or_default();
}
if let Some(arch) = &self.arch {
writeln!(f, " Architecture: {}", arch).unwrap_or_default();
}
Expand Down Expand Up @@ -208,6 +216,7 @@ pub struct PythonEnvironmentBuilder {
project: Option<PathBuf>,
arch: Option<Architecture>,
symlinks: Option<Vec<PathBuf>>,
search_path: Option<PathBuf>,
}

impl PythonEnvironmentBuilder {
Expand All @@ -223,6 +232,7 @@ impl PythonEnvironmentBuilder {
project: None,
arch: None,
symlinks: None,
search_path: None,
}
}

Expand Down Expand Up @@ -285,6 +295,11 @@ impl PythonEnvironmentBuilder {
self
}

pub fn search_path(mut self, search_path: Option<PathBuf>) -> Self {
self.search_path = search_path;
self
}

fn update_symlinks_and_exe(&mut self, symlinks: Option<Vec<PathBuf>>) {
let mut all = vec![];
if let Some(ref exe) = self.executable {
Expand Down Expand Up @@ -337,6 +352,7 @@ impl PythonEnvironmentBuilder {
manager: self.manager,
project: self.project,
arch: self.arch,
search_path: self.search_path,
symlinks,
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/pet-poetry/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ impl Locator for Poetry {
}
fn configure(&self, config: &Configuration) {
if let Some(search_paths) = &config.search_paths {
self.project_dirs.lock().unwrap().clear();
if !search_paths.is_empty() {
self.project_dirs.lock().unwrap().clear();
self.project_dirs
.lock()
.unwrap()
Expand Down
160 changes: 79 additions & 81 deletions crates/pet/src/find.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ pub fn find_and_report_envs(
locators,
false,
&global_env_search_paths,
None,
);
summary.lock().unwrap().find_path_time = start.elapsed();
});
Expand Down Expand Up @@ -138,6 +139,7 @@ pub fn find_and_report_envs(
locators,
false,
&global_env_search_paths,
None,
);
summary.lock().unwrap().find_global_virtual_envs_time = start.elapsed();
});
Expand All @@ -161,8 +163,6 @@ pub fn find_and_report_envs(
search_paths,
reporter,
locators,
0,
1,
);
summary.lock().unwrap().find_search_paths_time = start.elapsed();
});
Expand All @@ -173,61 +173,50 @@ pub fn find_and_report_envs(
}

fn find_python_environments_in_workspace_folders_recursive(
paths: Vec<PathBuf>,
workspace_folders: Vec<PathBuf>,
reporter: &dyn Reporter,
locators: &Arc<Vec<Arc<dyn Locator>>>,
depth: u32,
max_depth: u32,
) {
thread::scope(|s| {
// Find in cwd
let paths1 = paths.clone();
s.spawn(|| {
find_python_environments(paths1, reporter, locators, true, &[]);

if depth >= max_depth {
return;
}

let bin = if cfg!(windows) { "Scripts" } else { "bin" };
// If the folder has a bin or scripts, then ignore it, its most likely an env.
// I.e. no point looking for python environments in a Python environment.
let paths = paths
.into_iter()
.filter(|p| !p.join(bin).exists())
.collect::<Vec<PathBuf>>();
for workspace_folder in workspace_folders {
find_python_environments_in_paths_with_locators(
vec![
// Possible this is a virtual env
workspace_folder.clone(),
// Optimize for finding these first.
workspace_folder.join(".venv"),
// Optimize for finding these first.
workspace_folder.join(".conda"),
],
locators,
reporter,
true,
&[],
Some(workspace_folder.clone()),
);

if workspace_folder.join(bin).exists() {
// If the folder has a bin or scripts, then ignore it, its most likely an env.
// I.e. no point looking for python environments in a Python environment.
continue;
}

for path in paths {
if let Ok(reader) = fs::read_dir(&path) {
let reader = reader
if let Ok(reader) = fs::read_dir(&workspace_folder) {
for folder in reader
.filter_map(Result::ok)
.filter(|d| d.file_type().is_ok_and(|f| f.is_dir()))
.map(|p| p.path())
.filter(should_search_for_environments_in_path);

// Take a batch of 20 items at a time.
let reader = reader.fold(vec![], |f, a| {
let mut f = f;
if f.is_empty() {
f.push(vec![a]);
return f;
}
let last_item = f.last_mut().unwrap();
if last_item.is_empty() || last_item.len() < 20 {
last_item.push(a);
return f;
}
f.push(vec![a]);
f
});

for entry in reader {
find_python_environments_in_workspace_folders_recursive(
entry,
.filter(should_search_for_environments_in_path)
{
find_python_environments(
vec![folder],
reporter,
locators,
depth + 1,
max_depth,
true,
&[],
Some(workspace_folder.clone()),
);
}
}
Expand All @@ -242,22 +231,23 @@ fn find_python_environments(
locators: &Arc<Vec<Arc<dyn Locator>>>,
is_workspace_folder: bool,
global_env_search_paths: &[PathBuf],
search_path: Option<PathBuf>,
) {
if paths.is_empty() {
return;
}
thread::scope(|s| {
let chunks = if is_workspace_folder { paths.len() } else { 1 };
for item in paths.chunks(chunks) {
let lst = item.to_vec().clone();
for item in paths {
let locators = locators.clone();
let search_path = search_path.clone();
s.spawn(move || {
find_python_environments_in_paths_with_locators(
lst,
vec![item],
&locators,
reporter,
is_workspace_folder,
global_env_search_paths,
search_path,
);
});
}
Expand All @@ -270,55 +260,63 @@ fn find_python_environments_in_paths_with_locators(
reporter: &dyn Reporter,
is_workspace_folder: bool,
global_env_search_paths: &[PathBuf],
search_path: Option<PathBuf>,
) {
let executables = if is_workspace_folder {
// If we're in a workspace folder, then we only need to look for bin/python or bin/python.exe
// As workspace folders generally have either virtual env or conda env or the like.
// They will not have environments that will ONLY have a file like `bin/python3`.
// I.e. bin/python will almost always exist.
paths
.iter()
for path in paths {
let executables = if is_workspace_folder {
// If we're in a workspace folder, then we only need to look for bin/python or bin/python.exe
// As workspace folders generally have either virtual env or conda env or the like.
// They will not have environments that will ONLY have a file like `bin/python3`.
// I.e. bin/python will almost always exist.

// Paths like /Library/Frameworks/Python.framework/Versions/3.10/bin can end up in the current PATH variable.
// Hence do not just look for files in a bin directory of the path.
.flat_map(|p| find_executable(p))
.filter_map(Option::Some)
.collect::<Vec<PathBuf>>()
} else {
paths
.iter()
if let Some(executable) = find_executable(&path) {
vec![executable]
} else {
vec![]
}
} else {
// Paths like /Library/Frameworks/Python.framework/Versions/3.10/bin can end up in the current PATH variable.
// Hence do not just look for files in a bin directory of the path.
.flat_map(find_executables)
.filter(|p| {
// Exclude python2 on macOS
if std::env::consts::OS == "macos" {
return p.to_str().unwrap_or_default() != "/usr/bin/python2";
}
true
})
.collect::<Vec<PathBuf>>()
};
find_executables(path)
.into_iter()
.filter(|p| {
// Exclude python2 on macOS
if std::env::consts::OS == "macos" {
return p.to_str().unwrap_or_default() != "/usr/bin/python2";
}
true
})
.collect::<Vec<PathBuf>>()
};

identify_python_executables_using_locators(
executables,
locators,
reporter,
global_env_search_paths,
);
identify_python_executables_using_locators(
executables,
locators,
reporter,
global_env_search_paths,
search_path.clone(),
);
}
}

fn identify_python_executables_using_locators(
executables: Vec<PathBuf>,
locators: &Arc<Vec<Arc<dyn Locator>>>,
reporter: &dyn Reporter,
global_env_search_paths: &[PathBuf],
search_path: Option<PathBuf>,
) {
for exe in executables.into_iter() {
let executable = exe.clone();
let env = PythonEnv::new(exe.to_owned(), None, None);
if let Some(env) =
identify_python_environment_using_locators(&env, locators, global_env_search_paths)
{
if let Some(env) = identify_python_environment_using_locators(
&env,
locators,
global_env_search_paths,
search_path.clone(),
) {
reporter.report_environment(&env);
if let Some(manager) = env.manager {
reporter.report_manager(&manager);
Expand Down
6 changes: 5 additions & 1 deletion crates/pet/src/jsonrpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,15 @@ pub fn handle_resolve(context: Arc<Context>, id: u32, params: Value) {
match serde_json::from_value::<ResolveOptions>(params.clone()) {
Ok(request_options) => {
let executable = request_options.executable.clone();
let search_paths = context.configuration.read().unwrap().clone().search_paths;
let search_paths = search_paths.unwrap_or_default();
// Start in a new thread, we can have multiple resolve requests.
thread::spawn(move || {
let now = SystemTime::now();
trace!("Resolving env {:?}", executable);
if let Some(result) = resolve_environment(&executable, &context.locators) {
if let Some(result) =
resolve_environment(&executable, &context.locators, search_paths)
{
if let Some(resolved) = result.resolved {
// Gather telemetry of this resolved env and see what we got wrong.
let _ = report_inaccuracies_identified_after_resolving(
Expand Down
Loading
Loading