From e28c1f6a694623ef1be6a7b47facb07839170159 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Wed, 3 Jul 2024 13:09:10 +1000 Subject: [PATCH 1/7] Optimize workspace serach for .venv and .conda --- crates/pet-core/src/python_environment.rs | 16 +++ crates/pet-poetry/src/lib.rs | 2 +- crates/pet/src/find.rs | 160 +++++++++++----------- crates/pet/src/locators.rs | 25 +++- crates/pet/src/resolve.rs | 2 +- 5 files changed, 116 insertions(+), 89 deletions(-) diff --git a/crates/pet-core/src/python_environment.rs b/crates/pet-core/src/python_environment.rs index bcd1de16..cba89949 100644 --- a/crates/pet-core/src/python_environment.rs +++ b/crates/pet-core/src/python_environment.rs @@ -71,6 +71,10 @@ pub struct PythonEnvironment { // Some of the known symlinks for the environment. // E.g. in the case of Homebrew there are a number of symlinks that are created. pub symlinks: Option>, + /// The folder/path that was searched to find this environment. + /// Generally applies to workspace folder, and means that the environment was found in this folder or is related to this folder. + /// Similar in meaqning to `project` but is more of a search path. + pub search_path: Option, } impl Ord for PythonEnvironment { @@ -109,6 +113,7 @@ impl Default for PythonEnvironment { project: None, arch: None, symlinks: None, + search_path: None, } } } @@ -159,6 +164,9 @@ impl std::fmt::Display for PythonEnvironment { if let Some(project) = &self.project { writeln!(f, " Project : {}", project.to_str().unwrap()).unwrap_or_default(); } + if let Some(search_path) = &self.search_path { + writeln!(f, " Search Path : {}", search_path.to_str().unwrap()).unwrap_or_default(); + } if let Some(arch) = &self.arch { writeln!(f, " Architecture: {}", arch).unwrap_or_default(); } @@ -208,6 +216,7 @@ pub struct PythonEnvironmentBuilder { project: Option, arch: Option, symlinks: Option>, + search_path: Option, } impl PythonEnvironmentBuilder { @@ -223,6 +232,7 @@ impl PythonEnvironmentBuilder { project: None, arch: None, symlinks: None, + search_path: None, } } @@ -285,6 +295,11 @@ impl PythonEnvironmentBuilder { self } + pub fn search_path(mut self, search_path: Option) -> Self { + self.search_path = search_path; + self + } + fn update_symlinks_and_exe(&mut self, symlinks: Option>) { let mut all = vec![]; if let Some(ref exe) = self.executable { @@ -337,6 +352,7 @@ impl PythonEnvironmentBuilder { manager: self.manager, project: self.project, arch: self.arch, + search_path: self.search_path, symlinks, } } diff --git a/crates/pet-poetry/src/lib.rs b/crates/pet-poetry/src/lib.rs index 58bfaa93..8a9d9ac2 100644 --- a/crates/pet-poetry/src/lib.rs +++ b/crates/pet-poetry/src/lib.rs @@ -140,8 +140,8 @@ impl Locator for Poetry { } fn configure(&self, config: &Configuration) { if let Some(search_paths) = &config.search_paths { + self.project_dirs.lock().unwrap().clear(); if !search_paths.is_empty() { - self.project_dirs.lock().unwrap().clear(); self.project_dirs .lock() .unwrap() diff --git a/crates/pet/src/find.rs b/crates/pet/src/find.rs index c1903297..b35c33c0 100644 --- a/crates/pet/src/find.rs +++ b/crates/pet/src/find.rs @@ -109,6 +109,7 @@ pub fn find_and_report_envs( locators, false, &global_env_search_paths, + None, ); summary.lock().unwrap().find_path_time = start.elapsed(); }); @@ -138,6 +139,7 @@ pub fn find_and_report_envs( locators, false, &global_env_search_paths, + None, ); summary.lock().unwrap().find_global_virtual_envs_time = start.elapsed(); }); @@ -161,8 +163,6 @@ pub fn find_and_report_envs( search_paths, reporter, locators, - 0, - 1, ); summary.lock().unwrap().find_search_paths_time = start.elapsed(); }); @@ -173,61 +173,50 @@ pub fn find_and_report_envs( } fn find_python_environments_in_workspace_folders_recursive( - paths: Vec, + workspace_folders: Vec, reporter: &dyn Reporter, locators: &Arc>>, - depth: u32, - max_depth: u32, ) { thread::scope(|s| { - // Find in cwd - let paths1 = paths.clone(); s.spawn(|| { - find_python_environments(paths1, reporter, locators, true, &[]); - - if depth >= max_depth { - return; - } - let bin = if cfg!(windows) { "Scripts" } else { "bin" }; - // If the folder has a bin or scripts, then ignore it, its most likely an env. - // I.e. no point looking for python environments in a Python environment. - let paths = paths - .into_iter() - .filter(|p| !p.join(bin).exists()) - .collect::>(); + for workspace_folder in workspace_folders { + find_python_environments_in_paths_with_locators( + vec![ + // Possible this is a virtual env + workspace_folder.clone(), + // Optimize for finding these first. + workspace_folder.join(".venv"), + // Optimize for finding these first. + workspace_folder.join(".conda"), + ], + locators, + reporter, + true, + &[], + Some(workspace_folder.clone()), + ); + + if workspace_folder.join(bin).exists() { + // If the folder has a bin or scripts, then ignore it, its most likely an env. + // I.e. no point looking for python environments in a Python environment. + continue; + } - for path in paths { - if let Ok(reader) = fs::read_dir(&path) { - let reader = reader + if let Ok(reader) = fs::read_dir(&workspace_folder) { + for folder in reader .filter_map(Result::ok) .filter(|d| d.file_type().is_ok_and(|f| f.is_dir())) .map(|p| p.path()) - .filter(should_search_for_environments_in_path); - - // Take a batch of 20 items at a time. - let reader = reader.fold(vec![], |f, a| { - let mut f = f; - if f.is_empty() { - f.push(vec![a]); - return f; - } - let last_item = f.last_mut().unwrap(); - if last_item.is_empty() || last_item.len() < 20 { - last_item.push(a); - return f; - } - f.push(vec![a]); - f - }); - - for entry in reader { - find_python_environments_in_workspace_folders_recursive( - entry, + .filter(should_search_for_environments_in_path) + { + find_python_environments( + vec![folder], reporter, locators, - depth + 1, - max_depth, + true, + &[], + Some(workspace_folder.clone()), ); } } @@ -242,22 +231,23 @@ fn find_python_environments( locators: &Arc>>, is_workspace_folder: bool, global_env_search_paths: &[PathBuf], + search_path: Option, ) { if paths.is_empty() { return; } thread::scope(|s| { - let chunks = if is_workspace_folder { paths.len() } else { 1 }; - for item in paths.chunks(chunks) { - let lst = item.to_vec().clone(); + for item in paths { let locators = locators.clone(); + let search_path = search_path.clone(); s.spawn(move || { find_python_environments_in_paths_with_locators( - lst, + vec![item], &locators, reporter, is_workspace_folder, global_env_search_paths, + search_path, ); }); } @@ -270,41 +260,45 @@ fn find_python_environments_in_paths_with_locators( reporter: &dyn Reporter, is_workspace_folder: bool, global_env_search_paths: &[PathBuf], + search_path: Option, ) { - let executables = if is_workspace_folder { - // If we're in a workspace folder, then we only need to look for bin/python or bin/python.exe - // As workspace folders generally have either virtual env or conda env or the like. - // They will not have environments that will ONLY have a file like `bin/python3`. - // I.e. bin/python will almost always exist. - paths - .iter() + for path in paths { + let executables = if is_workspace_folder { + // If we're in a workspace folder, then we only need to look for bin/python or bin/python.exe + // As workspace folders generally have either virtual env or conda env or the like. + // They will not have environments that will ONLY have a file like `bin/python3`. + // I.e. bin/python will almost always exist. + // Paths like /Library/Frameworks/Python.framework/Versions/3.10/bin can end up in the current PATH variable. // Hence do not just look for files in a bin directory of the path. - .flat_map(|p| find_executable(p)) - .filter_map(Option::Some) - .collect::>() - } else { - paths - .iter() + if let Some(executable) = find_executable(&path) { + vec![executable] + } else { + vec![] + } + } else { // Paths like /Library/Frameworks/Python.framework/Versions/3.10/bin can end up in the current PATH variable. // Hence do not just look for files in a bin directory of the path. - .flat_map(find_executables) - .filter(|p| { - // Exclude python2 on macOS - if std::env::consts::OS == "macos" { - return p.to_str().unwrap_or_default() != "/usr/bin/python2"; - } - true - }) - .collect::>() - }; + find_executables(path) + .into_iter() + .filter(|p| { + // Exclude python2 on macOS + if std::env::consts::OS == "macos" { + return p.to_str().unwrap_or_default() != "/usr/bin/python2"; + } + true + }) + .collect::>() + }; - identify_python_executables_using_locators( - executables, - locators, - reporter, - global_env_search_paths, - ); + identify_python_executables_using_locators( + executables, + locators, + reporter, + global_env_search_paths, + search_path.clone(), + ); + } } fn identify_python_executables_using_locators( @@ -312,13 +306,17 @@ fn identify_python_executables_using_locators( locators: &Arc>>, reporter: &dyn Reporter, global_env_search_paths: &[PathBuf], + search_path: Option, ) { for exe in executables.into_iter() { let executable = exe.clone(); let env = PythonEnv::new(exe.to_owned(), None, None); - if let Some(env) = - identify_python_environment_using_locators(&env, locators, global_env_search_paths) - { + if let Some(env) = identify_python_environment_using_locators( + &env, + locators, + global_env_search_paths, + search_path.clone(), + ) { reporter.report_environment(&env); if let Some(manager) = env.manager { reporter.report_manager(&manager); diff --git a/crates/pet/src/locators.rs b/crates/pet/src/locators.rs index 6bb3ae97..5f355e77 100644 --- a/crates/pet/src/locators.rs +++ b/crates/pet/src/locators.rs @@ -80,16 +80,24 @@ pub fn create_locators(conda_locator: Arc) -> Arc>> Arc::new(locators) } +/// Identify the Python environment using the locators. +/// search_path : Generally refers to original folder that was being searched when the env was found. pub fn identify_python_environment_using_locators( env: &PythonEnv, locators: &[Arc], global_env_search_paths: &[PathBuf], + search_path: Option, ) -> Option { let executable = env.executable.clone(); - if let Some(env) = locators.iter().fold( - None, - |e, loc| if e.is_some() { e } else { loc.try_from(env) }, - ) { + if let Some(mut env) = + locators.iter().fold( + None, + |e, loc| if e.is_some() { e } else { loc.try_from(env) }, + ) + { + if env.project.is_none() { + env.search_path = search_path; + } return Some(env); } @@ -98,7 +106,7 @@ pub fn identify_python_environment_using_locators( // We try to get the interpreter info, hoping that the real exe returned might be identifiable. if let Some(resolved_env) = ResolvedPythonEnv::from(&executable) { let env = resolved_env.to_python_env(); - if let Some(env) = + if let Some(mut env) = locators.iter().fold( None, |e, loc| if e.is_some() { e } else { loc.try_from(&env) }, @@ -109,6 +117,9 @@ pub fn identify_python_environment_using_locators( executable, env.category ); + if env.project.is_none() { + env.search_path = search_path; + } // TODO: Telemetry point. // As we had to spawn earlier. return Some(env); @@ -136,7 +147,9 @@ pub fn identify_python_environment_using_locators( "Unknown Env ({:?}) in Path resolved as {:?} and reported as {:?}", executable, resolved_env, fallback_category ); - return Some(create_unknown_env(resolved_env, fallback_category)); + let mut env = create_unknown_env(resolved_env, fallback_category); + env.search_path = search_path; + return Some(env); } } None diff --git a/crates/pet/src/resolve.rs b/crates/pet/src/resolve.rs index 59e665f8..12a8c806 100644 --- a/crates/pet/src/resolve.rs +++ b/crates/pet/src/resolve.rs @@ -29,7 +29,7 @@ pub fn resolve_environment( let global_env_search_paths: Vec = get_search_paths_from_env_variables(&environment); if let Some(env) = - identify_python_environment_using_locators(&env, locators, &global_env_search_paths) + identify_python_environment_using_locators(&env, locators, &global_env_search_paths, None) { // Ok we got the environment. // Now try to resolve this fully, by spawning python. From eaff0baf0b4df7b294134534151b17ba74e6bcd5 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Wed, 3 Jul 2024 13:18:06 +1000 Subject: [PATCH 2/7] wip --- crates/pet/tests/ci_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pet/tests/ci_test.rs b/crates/pet/tests/ci_test.rs index ef75a40d..70e0c6e5 100644 --- a/crates/pet/tests/ci_test.rs +++ b/crates/pet/tests/ci_test.rs @@ -313,7 +313,7 @@ fn verify_we_can_get_same_env_info_using_from_with_exe( let env = PythonEnv::new(executable.clone(), None, None); let resolved = - identify_python_environment_using_locators(&env, &locators, &global_env_search_paths) + identify_python_environment_using_locators(&env, &locators, &global_env_search_paths, None) .expect( format!( "Failed to resolve environment using `resolve` for {:?}", From fd0f4f2bd18ccea92c0abd10bc20cabac8a8af71 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Wed, 3 Jul 2024 13:31:48 +1000 Subject: [PATCH 3/7] wip --- crates/pet/src/jsonrpc.rs | 10 +++++++++- crates/pet/src/locators.rs | 37 ++++++++++++++++++++++++++++++------- crates/pet/src/resolve.rs | 6 ++++-- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/crates/pet/src/jsonrpc.rs b/crates/pet/src/jsonrpc.rs index fc3cb724..0cb50148 100644 --- a/crates/pet/src/jsonrpc.rs +++ b/crates/pet/src/jsonrpc.rs @@ -149,11 +149,19 @@ pub fn handle_resolve(context: Arc, id: u32, params: Value) { match serde_json::from_value::(params.clone()) { Ok(request_options) => { let executable = request_options.executable.clone(); + let search_paths = context.configuration.read().unwrap().clone().search_paths; + let search_paths = if let Some(search_paths) = search_paths { + search_paths + } else { + vec![] + }; // Start in a new thread, we can have multiple resolve requests. thread::spawn(move || { let now = SystemTime::now(); trace!("Resolving env {:?}", executable); - if let Some(result) = resolve_environment(&executable, &context.locators) { + if let Some(result) = + resolve_environment(&executable, &context.locators, search_paths) + { if let Some(resolved) = result.resolved { // Gather telemetry of this resolved env and see what we got wrong. let _ = report_inaccuracies_identified_after_resolving( diff --git a/crates/pet/src/locators.rs b/crates/pet/src/locators.rs index 5f355e77..5c565eae 100644 --- a/crates/pet/src/locators.rs +++ b/crates/pet/src/locators.rs @@ -89,15 +89,18 @@ pub fn identify_python_environment_using_locators( search_path: Option, ) -> Option { let executable = env.executable.clone(); + let search_paths = if let Some(search_path) = search_path { + vec![search_path] + } else { + vec![] + }; if let Some(mut env) = locators.iter().fold( None, |e, loc| if e.is_some() { e } else { loc.try_from(env) }, ) { - if env.project.is_none() { - env.search_path = search_path; - } + identify_and_set_search_path(&mut env, &search_paths); return Some(env); } @@ -117,9 +120,7 @@ pub fn identify_python_environment_using_locators( executable, env.category ); - if env.project.is_none() { - env.search_path = search_path; - } + identify_and_set_search_path(&mut env, &search_paths); // TODO: Telemetry point. // As we had to spawn earlier. return Some(env); @@ -148,13 +149,35 @@ pub fn identify_python_environment_using_locators( executable, resolved_env, fallback_category ); let mut env = create_unknown_env(resolved_env, fallback_category); - env.search_path = search_path; + identify_and_set_search_path(&mut env, &search_paths); return Some(env); } } None } +/// Assume we found a .venv environment, generally these are specific to a workspace folder, i.e. they belong in a worksapce folder. +/// If thats the case then verify this by checking if the workspace folder is a parent of the prefix (.venv folder). +/// If it is, and there is not project set, then set the search_path to the workspace folder. +pub fn identify_and_set_search_path(env: &mut PythonEnvironment, search_path: &Vec) { + if search_path.is_empty() || env.project.is_some() { + return; + } + if env.category == PythonEnvironmentCategory::Conda + || env.category == PythonEnvironmentCategory::Venv + || env.category == PythonEnvironmentCategory::VirtualEnv + { + if let Some(prefix) = &env.prefix { + for path in search_path { + if path.starts_with(prefix) { + env.search_path = Some(path.clone()); + break; + } + } + } + } +} + fn create_unknown_env( resolved_env: ResolvedPythonEnv, fallback_category: Option, diff --git a/crates/pet/src/resolve.rs b/crates/pet/src/resolve.rs index 12a8c806..cb134a88 100644 --- a/crates/pet/src/resolve.rs +++ b/crates/pet/src/resolve.rs @@ -11,7 +11,7 @@ use pet_core::{ use pet_env_var_path::get_search_paths_from_env_variables; use pet_python_utils::env::{PythonEnv, ResolvedPythonEnv}; -use crate::locators::identify_python_environment_using_locators; +use crate::locators::{identify_and_set_search_path, identify_python_environment_using_locators}; #[derive(Debug)] pub struct ResolvedEnvironment { @@ -22,15 +22,17 @@ pub struct ResolvedEnvironment { pub fn resolve_environment( executable: &PathBuf, locators: &Arc>>, + search_paths: Vec, ) -> Option { // First check if this is a known environment let env = PythonEnv::new(executable.to_owned(), None, None); let environment = EnvironmentApi::new(); let global_env_search_paths: Vec = get_search_paths_from_env_variables(&environment); - if let Some(env) = + if let Some(mut env) = identify_python_environment_using_locators(&env, locators, &global_env_search_paths, None) { + identify_and_set_search_path(&mut env, &search_paths); // Ok we got the environment. // Now try to resolve this fully, by spawning python. if let Some(ref executable) = env.executable { From aece9e343eeee6a37408869a7b7e874b733cbec5 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Wed, 3 Jul 2024 13:33:11 +1000 Subject: [PATCH 4/7] wip --- crates/pet/tests/ci_test.rs | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/crates/pet/tests/ci_test.rs b/crates/pet/tests/ci_test.rs index 70e0c6e5..f9568851 100644 --- a/crates/pet/tests/ci_test.rs +++ b/crates/pet/tests/ci_test.rs @@ -303,7 +303,8 @@ fn verify_we_can_get_same_env_info_using_from_with_exe( let os_environment = EnvironmentApi::new(); let conda_locator = Arc::new(Conda::from(&os_environment)); let mut config = Configuration::default(); - config.search_paths = Some(vec![project_dir.clone()]); + let search_paths = vec![project_dir.clone()]; + config.search_paths = Some(search_paths.clone()); let locators = create_locators(conda_locator.clone()); for locator in locators.iter() { locator.configure(&config); @@ -312,15 +313,19 @@ fn verify_we_can_get_same_env_info_using_from_with_exe( get_search_paths_from_env_variables(&os_environment); let env = PythonEnv::new(executable.clone(), None, None); - let resolved = - identify_python_environment_using_locators(&env, &locators, &global_env_search_paths, None) - .expect( - format!( - "Failed to resolve environment using `resolve` for {:?}", - environment - ) - .as_str(), - ); + let resolved = identify_python_environment_using_locators( + &env, + &locators, + &global_env_search_paths, + Some(project_dir.clone()), + ) + .expect( + format!( + "Failed to resolve environment using `resolve` for {:?}", + environment + ) + .as_str(), + ); trace!( "For exe {:?} we got Environment = {:?}, To compare against {:?}", executable, From a4b0cb38b163e13beb81cada5ba570e9fb49e462 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Wed, 3 Jul 2024 13:34:59 +1000 Subject: [PATCH 5/7] wip --- crates/pet/src/locators.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/pet/src/locators.rs b/crates/pet/src/locators.rs index 5c565eae..71248c02 100644 --- a/crates/pet/src/locators.rs +++ b/crates/pet/src/locators.rs @@ -163,6 +163,12 @@ pub fn identify_and_set_search_path(env: &mut PythonEnvironment, search_path: &V if search_path.is_empty() || env.project.is_some() { return; } + + // All other environments generally need to be found globally, + // If we end up with some env thats not found globally, but only found in a special folder for some reason, + // then thats a weird situation, either way, when we cache the result it will re-appear (however for all other workspaces as well) + // Thats fine for now (if users complain then we'll find out that there's a problem and we can fix it then). + // Else no need to try and identify/fix edge cases that may not exist. if env.category == PythonEnvironmentCategory::Conda || env.category == PythonEnvironmentCategory::Venv || env.category == PythonEnvironmentCategory::VirtualEnv From 7d212dc6b9cfc25be75f732c2447e9f80610e94a Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Wed, 3 Jul 2024 13:35:54 +1000 Subject: [PATCH 6/7] fixes --- crates/pet/tests/ci_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pet/tests/ci_test.rs b/crates/pet/tests/ci_test.rs index f9568851..3b9087ad 100644 --- a/crates/pet/tests/ci_test.rs +++ b/crates/pet/tests/ci_test.rs @@ -506,7 +506,7 @@ fn verify_we_can_get_same_env_info_using_resolve_with_exe( locator.configure(&config); } - let env = resolve_environment(&executable, &locators).expect( + let env = resolve_environment(&executable, &locators, vec![project_dir.clone()]).expect( format!( "Failed to resolve environment using `resolve` for {:?}", environment From 9e3b698195341f585c0872e62d40fd76ecd0c097 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Wed, 3 Jul 2024 13:38:14 +1000 Subject: [PATCH 7/7] fixes --- crates/pet/src/jsonrpc.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/pet/src/jsonrpc.rs b/crates/pet/src/jsonrpc.rs index 0cb50148..81f6ddc5 100644 --- a/crates/pet/src/jsonrpc.rs +++ b/crates/pet/src/jsonrpc.rs @@ -150,11 +150,7 @@ pub fn handle_resolve(context: Arc, id: u32, params: Value) { Ok(request_options) => { let executable = request_options.executable.clone(); let search_paths = context.configuration.read().unwrap().clone().search_paths; - let search_paths = if let Some(search_paths) = search_paths { - search_paths - } else { - vec![] - }; + let search_paths = search_paths.unwrap_or_default(); // Start in a new thread, we can have multiple resolve requests. thread::spawn(move || { let now = SystemTime::now();