From a2a4d525f91a85b537ce02b8b77b0e2558602bf2 Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Wed, 12 Jun 2024 12:17:39 -0700 Subject: [PATCH 1/6] ModuleSet: store a set rather than a map Previously, a `ModuleSet` wrapped a `HashMap`. This had a number of undesirable consequences: * The data about a module's name (as its path) and how it was loaded (as its `TargetKind`) were split from each other and difficult to reference. * The module's import name wasn't explicitly stored anywhere, so we needed to convert between paths and dotted names when those were needed, which required hitting the disk. * There wasn't a type for the module's import name, so when we (e.g.) `:unadd`ed modules we needed to format them as strings. Now, a `ModuleSet` wraps a `HashSet`. * A `LoadedModule` wraps a path but optionally contains the module's dotted name, if the module is loaded by name (and needs to be referred to by name to avoid the "module defined in multiple files" error). * The `LoadedModule` `Display` instance formats the module's import name correctly (with a dotted name if needed) and avoids hitting the disk or any string processing. --- src/ghci/loaded_module.rs | 122 ++++++++++++++++++++++++++ src/ghci/mod.rs | 106 +++++++++++++++++----- src/ghci/module_set.rs | 94 ++++++++++++++++++++ src/ghci/parse/mod.rs | 4 - src/ghci/parse/module_set.rs | 156 --------------------------------- src/ghci/parse/show_paths.rs | 11 ++- src/ghci/parse/show_targets.rs | 26 +++--- src/ghci/parse/target_kind.rs | 12 --- src/ghci/stdin.rs | 16 ++-- src/ghci/stdout.rs | 6 +- 10 files changed, 335 insertions(+), 218 deletions(-) create mode 100644 src/ghci/loaded_module.rs create mode 100644 src/ghci/module_set.rs delete mode 100644 src/ghci/parse/module_set.rs delete mode 100644 src/ghci/parse/target_kind.rs diff --git a/src/ghci/loaded_module.rs b/src/ghci/loaded_module.rs new file mode 100644 index 00000000..778aa1c4 --- /dev/null +++ b/src/ghci/loaded_module.rs @@ -0,0 +1,122 @@ +use std::borrow::Borrow; +use std::fmt::Display; +use std::hash::Hash; +use std::hash::Hasher; + +use camino::Utf8Path; + +use crate::normal_path::NormalPath; + +/// Information about a module loaded into a `ghci` session. +/// +/// Hashing and equality are determined by the module's path alone. +#[derive(Debug, Clone, Eq)] +pub struct LoadedModule { + /// The module's source file. + path: NormalPath, + + /// The module's name. + /// + /// This is present if and only if the module is loaded by name. + /// + /// Entries in `:show targets` can be one of two types: module paths or module names (with `.` in + /// place of path separators). Due to a `ghci` bug, the module can only be referred to as whichever + /// form it was originally added as (see below), so we use this to track how we refer to modules. + /// + /// See: + name: Option, +} + +impl LoadedModule { + /// Create a new module, loaded by path. + pub fn new(path: NormalPath) -> Self { + Self { path, name: None } + } + + /// Create a new module, loaded by name. + pub fn with_name(path: NormalPath, name: String) -> Self { + Self { + path, + name: Some(name), + } + } + + /// Get the name to use to refer to this module. + pub fn name(&self) -> LoadedModuleName { + match self.name.as_deref() { + Some(name) => LoadedModuleName::Name(name), + None => LoadedModuleName::Path(&self.path), + } + } + + /// Get the module's source path. + pub fn path(&self) -> &NormalPath { + &self.path + } +} + +impl Display for LoadedModule { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name()) + } +} + +impl Hash for LoadedModule { + fn hash(&self, state: &mut H) { + self.path.hash(state) + } +} + +impl PartialEq for LoadedModule { + fn eq(&self, other: &Self) -> bool { + self.path.eq(&other.path) + } +} + +impl PartialOrd for LoadedModule { + fn partial_cmp(&self, other: &Self) -> Option { + self.path.partial_cmp(&other.path) + } +} + +impl Ord for LoadedModule { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.path.cmp(&other.path) + } +} + +impl Borrow for LoadedModule { + fn borrow(&self) -> &NormalPath { + &self.path + } +} + +impl Borrow for LoadedModule { + fn borrow(&self) -> &Utf8Path { + &self.path + } +} + +/// The name to use to refer to a module loaded into a GHCi session. +/// +/// Entries in `:show targets` can be one of two types: module paths or module names (with `.` in +/// place of path separators). Due to a `ghci` bug, the module can only be referred to as whichever +/// form it was originally added as (see below), so we use this to track how we refer to modules. +/// +/// See: +#[derive(Debug)] +pub enum LoadedModuleName<'a> { + /// A path to a Haskell source file, like `src/My/Cool/Module.hs`. + Path(&'a Utf8Path), + /// A dotted module name, like `My.Cool.Module`. + Name(&'a str), +} + +impl<'a> Display for LoadedModuleName<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LoadedModuleName::Path(path) => write!(f, "{path}"), + LoadedModuleName::Name(name) => write!(f, "{name}"), + } + } +} diff --git a/src/ghci/mod.rs b/src/ghci/mod.rs index 5a1a6b44..98b10f71 100644 --- a/src/ghci/mod.rs +++ b/src/ghci/mod.rs @@ -6,6 +6,7 @@ use nix::sys::signal::Signal; use owo_colors::OwoColorize; use owo_colors::Stream::Stdout; use std::borrow::Borrow; +use std::borrow::Cow; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::fmt::Debug; @@ -50,7 +51,6 @@ pub mod parse; use parse::parse_eval_commands; use parse::CompilationResult; use parse::EvalCommand; -use parse::ModuleSet; use parse::ShowPaths; mod ghci_command; @@ -63,6 +63,12 @@ mod writer; use crate::buffers::GHCI_BUFFER_CAPACITY; pub use crate::ghci::writer::GhciWriter; +mod module_set; +pub use module_set::ModuleSet; + +mod loaded_module; +use loaded_module::LoadedModule; + use crate::aho_corasick::AhoCorasickExt; use crate::buffers::LINE_BUFFER_CAPACITY; use crate::cli::Opts; @@ -80,8 +86,6 @@ use crate::shutdown::ShutdownHandle; use crate::CommandExt; use crate::StringCase; -use self::parse::TargetKind; - /// The `ghci` prompt we use. Should be unique enough, but maybe we can make it better with Unicode /// private-use-area codepoints or something in the future. pub const PROMPT: &str = "###~GHCIWATCH-PROMPT~###"; @@ -613,10 +617,10 @@ impl Ghci { let mut eval_commands = BTreeMap::new(); - for path in self.targets.iter() { - let commands = Self::parse_eval_commands(path).await?; + for target in self.targets.iter() { + let commands = Self::parse_eval_commands(target.path()).await?; if !commands.is_empty() { - eval_commands.insert(path.clone(), commands); + eval_commands.insert(target.path().clone(), commands); } } @@ -670,23 +674,79 @@ impl Ghci { Ok(commands) } - /// `:add` a module or modules to the `ghci` session by path. + /// `:add` a module or modules to the GHCi session. #[instrument(skip(self), level = "debug")] async fn add_modules( &mut self, paths: &[NormalPath], log: &mut CompilationLog, ) -> miette::Result<()> { - let modules = self.targets.format_modules(&self.search_paths, paths)?; + let mut modules = Vec::with_capacity(paths.len()); + for path in paths { + if self.targets.contains_source_path(path) { + return Err(miette!( + "Attempting to add already-loaded module: {path}\n\ + This is a ghciwatch bug; please report it upstream" + )); + } else { + modules.push(LoadedModule::new(path.clone())); + } + } self.stdin .add_modules(&mut self.stdout, &modules, log) .await?; - for path in paths { - self.targets - .insert_source_path(path.clone(), TargetKind::Path); - } + // TODO: This could lead to the module set getting out of sync with the underlying GHCi + // session. + // + // If there's a TOATOU bug here (e.g. we're attempting to add a module but the file no + // longer exists), then we can get into a situation like this: + // + // ghci> :add src/DoesntExist.hs src/MyLib.hs + // File src/DoesntExist.hs not found + // [4 of 4] Compiling MyLib ( src/MyLib.hs, interpreted ) + // Ok, four modules loaded. + // + // ghci> :show targets + // src/MyLib.hs + // ... + // + // We've requested to load two modules, only one has been loaded, but GHCi has reported + // that compilation was successful and hasn't added the failing module to the target set. + // Note that if the file is found but compilation fails, the file _is_ added to the target + // set: + // + // ghci> :add src/MyCoolLib.hs + // [4 of 4] Compiling MyCoolLib ( src/MyCoolLib.hs, interpreted ) + // + // src/MyCoolLib.hs:4:12: error: + // • Couldn't match expected type ‘IO ()’ with actual type ‘()’ + // • In the expression: () + // In an equation for ‘someFunc’: someFunc = () + // | + // 4 | someFunc = () + // | ^^ + // Failed, three modules loaded. + // + // ghci> :show targets + // src/MyCoolLib.hs + // ... + // + // I think this is OK, because the only reason we need to know which modules are loaded is + // to avoid the "module defined in multiple files" bug [1], so the potential outcomes of + // making this mistake are: + // + // 1. The next time the file is modified, we attempt to `:add` it instead of `:reload`ing + // it. This is harmless, though it changes the order that `:show modules` prints output + // in (maybe local binding order as well or something). + // 2. The next time the file is modified, we attempt to `:add` it by path instead of by + // module name, but this function is only used when the modules aren't already in the + // target set, so we know the module doesn't need to be referred to by its module name. + // + // [1]: https://gitlab.haskell.org/ghc/ghc/-/issues/13254#note_525037 + + self.targets.extend(modules); self.refresh_eval_commands_for_paths(paths).await?; @@ -702,14 +762,15 @@ impl Ghci { path: &NormalPath, log: &mut CompilationLog, ) -> miette::Result<()> { - let module = self.targets.module_import_name(&self.search_paths, path)?; + let module = self.targets.get_import_name(path); self.stdin - .interpret_module(&mut self.stdout, &module.name, log) + .interpret_module(&mut self.stdout, &module, log) .await?; - if !module.loaded { - self.targets.insert_source_path(path.clone(), module.kind); + // Note: A borrowed path is only returned if the path is already present in the module set. + if let Cow::Owned(module) = module { + self.targets.insert_module(module); } self.refresh_eval_commands_for_paths(std::iter::once(path)) @@ -725,21 +786,24 @@ impl Ghci { paths: &[NormalPath], log: &mut CompilationLog, ) -> miette::Result<()> { + let modules = paths + .iter() + .map(|path| self.targets.get_import_name(path).into_owned()) + .collect::>(); + // Each `:unadd` implicitly reloads as well, so we have to `:unadd` all the modules in a // single command so that GHCi doesn't try to load a bunch of removed modules after each // one. - let modules = self.targets.format_modules(&self.search_paths, paths)?; - self.stdin - .remove_modules(&mut self.stdout, &modules, log) + .remove_modules(&mut self.stdout, modules.iter().map(Borrow::borrow), log) .await?; for path in paths { self.targets.remove_source_path(path); - self.clear_eval_commands_for_paths(std::iter::once(path)) - .await; } + self.clear_eval_commands_for_paths(paths).await; + Ok(()) } diff --git a/src/ghci/module_set.rs b/src/ghci/module_set.rs new file mode 100644 index 00000000..6a57a14d --- /dev/null +++ b/src/ghci/module_set.rs @@ -0,0 +1,94 @@ +use std::borrow::Borrow; +use std::borrow::Cow; +use std::cmp::Eq; +use std::collections::HashSet; +use std::hash::Hash; + +use crate::normal_path::NormalPath; + +use super::loaded_module::LoadedModule; + +/// A collection of source paths, retaining information about loaded modules in a `ghci` +/// session. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct ModuleSet { + modules: HashSet, +} + +impl ModuleSet { + /// Iterate over the modules in this set. + pub fn iter(&self) -> std::collections::hash_set::Iter<'_, LoadedModule> { + self.modules.iter() + } + + /// Iterate over the modules in this set. + #[cfg(test)] + pub fn into_iter(self) -> std::collections::hash_set::IntoIter { + self.modules.into_iter() + } + + /// Get the number of modules in this set. + pub fn len(&self) -> usize { + self.modules.len() + } + + /// Determine if a module with the given source path is contained in this module set. + pub fn contains_source_path

(&self, path: &P) -> bool + where + LoadedModule: Borrow

, + P: Hash + Eq + ?Sized, + { + self.modules.contains(path) + } + + /// Add a module to this set. + /// + /// Returns whether the module was newly inserted. + pub fn insert_module(&mut self, module: LoadedModule) -> bool { + self.modules.insert(module) + } + + /// Remove a source path from this module set. + /// + /// Returns whether the path was present in the set. + pub fn remove_source_path

(&mut self, path: &P) -> bool + where + LoadedModule: Borrow

, + P: Hash + Eq + ?Sized, + { + self.modules.remove(path) + } + + /// Get a module in this set. + pub fn get_module

(&self, path: &P) -> Option<&LoadedModule> + where + LoadedModule: Borrow

, + P: Hash + Eq + ?Sized, + { + self.modules.get(path) + } + + /// Get the import name for a module. + /// + /// The path parameter should be relative to the GHCi session's working directory. + pub fn get_import_name(&self, path: &NormalPath) -> Cow<'_, LoadedModule> { + match self.get_module(path) { + Some(module) => Cow::Borrowed(module), + None => Cow::Owned(LoadedModule::new(path.clone())), + } + } +} + +impl FromIterator for ModuleSet { + fn from_iter>(iter: T) -> Self { + Self { + modules: iter.into_iter().collect(), + } + } +} + +impl Extend for ModuleSet { + fn extend>(&mut self, iter: T) { + self.modules.extend(iter) + } +} diff --git a/src/ghci/parse/mod.rs b/src/ghci/parse/mod.rs index ae4a31ab..be016b09 100644 --- a/src/ghci/parse/mod.rs +++ b/src/ghci/parse/mod.rs @@ -5,10 +5,8 @@ mod ghc_message; mod haskell_grammar; mod lines; mod module_and_files; -mod module_set; mod show_paths; mod show_targets; -mod target_kind; use haskell_grammar::module_name; use lines::rest_of_line; @@ -23,8 +21,6 @@ pub use ghc_message::GhcDiagnostic; pub use ghc_message::GhcMessage; pub use ghc_message::Severity; pub use module_and_files::CompilingModule; -pub use module_set::ModuleSet; pub use show_paths::parse_show_paths; pub use show_paths::ShowPaths; pub use show_targets::parse_show_targets; -pub use target_kind::TargetKind; diff --git a/src/ghci/parse/module_set.rs b/src/ghci/parse/module_set.rs deleted file mode 100644 index cc25ff1f..00000000 --- a/src/ghci/parse/module_set.rs +++ /dev/null @@ -1,156 +0,0 @@ -use std::borrow::Borrow; -use std::cmp::Eq; -use std::collections::hash_map::Keys; -use std::collections::HashMap; -use std::fmt::Display; -use std::hash::Hash; -use std::path::Path; - -use crate::normal_path::NormalPath; - -use super::ShowPaths; -use super::TargetKind; - -/// A collection of source paths, retaining information about loaded modules in a `ghci` -/// session. -#[derive(Debug, Clone, Default, PartialEq, Eq)] -pub struct ModuleSet { - modules: HashMap, -} - -impl ModuleSet { - /// Construct a `ModuleSet` from an iterator of module source paths. - pub fn from_paths( - paths: impl IntoIterator, TargetKind)>, - current_dir: impl AsRef, - ) -> miette::Result { - let current_dir = current_dir.as_ref(); - Ok(Self { - modules: paths - .into_iter() - .map(|(path, kind)| { - NormalPath::new(path.as_ref(), current_dir).map(|path| (path, kind)) - }) - .collect::>()?, - }) - } - - /// Get the number of modules in this set. - pub fn len(&self) -> usize { - self.modules.len() - } - - /// Determine if a module with the given source path is contained in this module set. - pub fn contains_source_path

(&self, path: &P) -> bool - where - NormalPath: Borrow

, - P: Hash + Eq + ?Sized, - { - self.modules.contains_key(path) - } - - /// Add a source path to this module set. - /// - /// Returns whether the value was newly inserted. - pub fn insert_source_path(&mut self, path: NormalPath, kind: TargetKind) -> bool { - match self.modules.insert(path, kind) { - Some(old_kind) => { - assert!(kind == old_kind, "`ghciwatch` failed to track how modules were imported in `ghci`; please report this as a bug"); - true - } - None => false, - } - } - - /// Remove a source path from this module set. - /// - /// Returns the target's kind, if it was present in the set. - pub fn remove_source_path

(&mut self, path: &P) -> Option - where - NormalPath: Borrow

, - P: Hash + Eq + ?Sized, - { - self.modules.remove(path) - } - - /// Get the name used to refer to the given module path when importing it. - /// - /// If the module isn't imported, a path will be returned. - /// - /// Otherwise, the form used to import the module originally will be used. Generally this is a - /// path if `ghciwatch` imported the module, and a module name if `ghci` imported the module on - /// startup. - /// - /// See: - pub fn module_import_name( - &self, - show_paths: &ShowPaths, - path: &NormalPath, - ) -> miette::Result { - match self.modules.get(path) { - Some(&kind) => match kind { - TargetKind::Path => Ok(ImportInfo { - name: path.relative().to_string(), - kind, - loaded: true, - }), - TargetKind::Module => Ok(ImportInfo { - name: show_paths.path_to_module(path)?, - kind, - loaded: true, - }), - }, - None => { - let path = show_paths.make_relative(path)?; - Ok(ImportInfo { - name: path.into_relative().into_string(), - kind: TargetKind::Path, - loaded: false, - }) - } - } - } - - /// Format modules for adding or removing from a GHCi session. - /// - /// See [`ModuleSet::module_import_name`]. - pub fn format_modules( - &self, - show_paths: &ShowPaths, - modules: &[NormalPath], - ) -> miette::Result { - modules - .iter() - .map(|path| { - self.module_import_name(show_paths, path) - .map(|module| module.name) - }) - .collect::, _>>() - .map(|modules| modules.join(" ")) - } - - /// Iterate over the source paths in this module set. - pub fn iter(&self) -> Keys<'_, NormalPath, TargetKind> { - self.modules.keys() - } -} - -/// Information about a module to be imported into a `ghci` session. -#[derive(Debug, Clone)] -pub struct ImportInfo { - /// The name to refer to the module by. - /// - /// This may either be a dotted module name like `My.Cool.Module` or a path like - /// `src/My/Cool/Module.hs`. - pub name: String, - /// Whether the `name` is a name or path. - pub kind: TargetKind, - /// Whether the module is already loaded in the `ghci` session. - pub loaded: bool, -} - -impl Display for ImportInfo { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.name) - } -} diff --git a/src/ghci/parse/show_paths.rs b/src/ghci/parse/show_paths.rs index 895e00b2..bfd2c982 100644 --- a/src/ghci/parse/show_paths.rs +++ b/src/ghci/parse/show_paths.rs @@ -14,12 +14,12 @@ use winnow::combinator::repeat; use winnow::PResult; use winnow::Parser; +use crate::ghci::loaded_module::LoadedModule; use crate::haskell_source_file::is_haskell_source_file; use crate::haskell_source_file::HASKELL_SOURCE_EXTENSIONS; use crate::normal_path::NormalPath; use super::lines::until_newline; -use super::TargetKind; /// Parsed `:show paths` output. #[derive(Debug, Clone, PartialEq, Eq)] @@ -37,13 +37,13 @@ impl ShowPaths { } /// Convert a target (from `:show targets` output) to a module source path. - pub fn target_to_path(&self, target: &str) -> miette::Result<(NormalPath, TargetKind)> { + pub fn target_to_path(&self, target: &str) -> miette::Result { let target_path = Utf8Path::new(target); if is_haskell_source_file(target_path) { // The target is already a path. let path = self.cwd.join(target_path); tracing::trace!(%path, %target, "Target is path"); - return Ok((NormalPath::new(path, &self.cwd)?, TargetKind::Path)); + return Ok(LoadedModule::new(NormalPath::new(path, &self.cwd)?)); } else { // Else, split by `.` to get path components. let mut path = target.split('.').collect::(); @@ -56,7 +56,10 @@ impl ShowPaths { let path = search_path.join(&path); if path.exists() { tracing::trace!(%path, %target, "Found path for target"); - return Ok((NormalPath::new(path, &self.cwd)?, TargetKind::Module)); + return Ok(LoadedModule::with_name( + NormalPath::new(path, &self.cwd)?, + target.to_owned(), + )); } } } diff --git a/src/ghci/parse/show_targets.rs b/src/ghci/parse/show_targets.rs index 2b56f631..34fb548b 100644 --- a/src/ghci/parse/show_targets.rs +++ b/src/ghci/parse/show_targets.rs @@ -2,16 +2,13 @@ use miette::miette; use winnow::combinator::repeat; use winnow::Parser; +use crate::ghci::ModuleSet; + use super::lines::until_newline; use super::show_paths::ShowPaths; -use super::TargetKind; -use crate::normal_path::NormalPath; /// Parse `:show targets` output into a set of module source paths. -pub fn parse_show_targets( - search_paths: &ShowPaths, - input: &str, -) -> miette::Result> { +pub fn parse_show_targets(search_paths: &ShowPaths, input: &str) -> miette::Result { let targets: Vec<_> = repeat(0.., until_newline) .parse(input) .map_err(|err| miette!("{err}"))?; @@ -24,6 +21,9 @@ pub fn parse_show_targets( #[cfg(test)] mod tests { + use std::collections::HashSet; + + use crate::ghci::loaded_module::LoadedModule; use crate::normal_path::NormalPath; use super::*; @@ -55,13 +55,17 @@ mod tests { " ) ) - .unwrap(), + .unwrap() + .into_iter() + .collect::>(), vec![ - (normal_path("src/MyLib.hs"), TargetKind::Path), - (normal_path("test/TestMain.hs"), TargetKind::Module), - (normal_path("src/MyLib.hs"), TargetKind::Module), - (normal_path("src/MyModule.hs"), TargetKind::Module), + LoadedModule::new(normal_path("src/MyLib.hs")), + LoadedModule::with_name(normal_path("test/TestMain.hs"), "TestMain".to_owned()), + LoadedModule::with_name(normal_path("src/MyLib.hs"), "MyLib".to_owned()), + LoadedModule::with_name(normal_path("src/MyModule.hs"), "MyModule".to_owned()), ] + .into_iter() + .collect() ); } } diff --git a/src/ghci/parse/target_kind.rs b/src/ghci/parse/target_kind.rs deleted file mode 100644 index 118edd9e..00000000 --- a/src/ghci/parse/target_kind.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Entries in `:show targets` can be one of two types: module paths or module names (with `.` in -/// place of path separators). Due to a `ghci` bug, the module can only be referred to as whichever -/// form it was originally added as (see below), so we use this to track how we refer to modules. -/// -/// See: -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum TargetKind { - /// A target named by its source path. - Path, - /// A target named by its module name. - Module, -} diff --git a/src/ghci/stdin.rs b/src/ghci/stdin.rs index ce8d84e3..53bba627 100644 --- a/src/ghci/stdin.rs +++ b/src/ghci/stdin.rs @@ -1,3 +1,4 @@ +use itertools::Itertools; use miette::Context; use miette::IntoDiagnostic; use tokio::io::AsyncWriteExt; @@ -7,11 +8,12 @@ use tracing::instrument; use crate::incremental_reader::FindAt; -use super::parse::ModuleSet; +use super::loaded_module::LoadedModule; use super::parse::ShowPaths; use super::stderr::StderrEvent; use super::CompilationLog; use super::GhciCommand; +use super::ModuleSet; use super::PROMPT; use crate::ghci::GhciStdout; @@ -102,13 +104,14 @@ impl GhciStdin { self.write_line(stdout, ":reload\n", log).await } - #[instrument(skip(self, stdout), level = "debug")] + #[instrument(skip_all, level = "debug")] pub async fn add_modules( &mut self, stdout: &mut GhciStdout, - modules: &str, + modules: impl IntoIterator, log: &mut CompilationLog, ) -> miette::Result<()> { + let modules = modules.into_iter().format(" "); // We use `:add` because `:load` unloads all previously loaded modules: // // > All previously loaded modules, except package modules, are forgotten. The new set of @@ -120,13 +123,14 @@ impl GhciStdin { .await } - #[instrument(skip(self, stdout), level = "debug")] + #[instrument(skip_all, level = "debug")] pub async fn remove_modules( &mut self, stdout: &mut GhciStdout, - modules: &str, + modules: impl IntoIterator, log: &mut CompilationLog, ) -> miette::Result<()> { + let modules = modules.into_iter().format(" "); self.write_line(stdout, &format!(":unadd {modules}\n"), log) .await } @@ -135,7 +139,7 @@ impl GhciStdin { pub async fn interpret_module( &mut self, stdout: &mut GhciStdout, - module: &str, + module: &LoadedModule, log: &mut CompilationLog, ) -> miette::Result<()> { // `:add *` forces the module to be interpreted, even if it was already loaded from diff --git a/src/ghci/stdout.rs b/src/ghci/stdout.rs index 44fadaf9..5980b123 100644 --- a/src/ghci/stdout.rs +++ b/src/ghci/stdout.rs @@ -15,11 +15,11 @@ use crate::incremental_reader::WriteBehavior; use super::parse::parse_ghc_messages; use super::parse::parse_show_paths; use super::parse::parse_show_targets; -use super::parse::ModuleSet; use super::parse::ShowPaths; use super::stderr::StderrEvent; use super::writer::GhciWriter; use super::CompilationLog; +use super::ModuleSet; pub struct GhciStdout { /// Reader for parsing and forwarding the underlying stdout stream. @@ -123,9 +123,7 @@ impl GhciStdout { buffer: &mut self.buffer, }) .await?; - let paths = parse_show_targets(search_paths, &lines) - .wrap_err("Failed to parse `:show targets` output")?; - ModuleSet::from_paths(paths, &search_paths.cwd) + parse_show_targets(search_paths, &lines).wrap_err("Failed to parse `:show targets` output") } #[instrument(skip_all, level = "debug")] From ffde01730e6e2bb362fae7fcfa5723fc94bb62bd Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Wed, 12 Jun 2024 13:00:29 -0700 Subject: [PATCH 2/6] Remove `LoadedModuleName` --- src/ghci/loaded_module.rs | 44 ++++++++------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/src/ghci/loaded_module.rs b/src/ghci/loaded_module.rs index 778aa1c4..ed6fb84a 100644 --- a/src/ghci/loaded_module.rs +++ b/src/ghci/loaded_module.rs @@ -12,10 +12,10 @@ use crate::normal_path::NormalPath; /// Hashing and equality are determined by the module's path alone. #[derive(Debug, Clone, Eq)] pub struct LoadedModule { - /// The module's source file. + /// The module's source file, like `src/My/Cool/Module.hs`. path: NormalPath, - /// The module's name. + /// The module's dotted name, like `My.Cool.Module`. /// /// This is present if and only if the module is loaded by name. /// @@ -41,14 +41,6 @@ impl LoadedModule { } } - /// Get the name to use to refer to this module. - pub fn name(&self) -> LoadedModuleName { - match self.name.as_deref() { - Some(name) => LoadedModuleName::Name(name), - None => LoadedModuleName::Path(&self.path), - } - } - /// Get the module's source path. pub fn path(&self) -> &NormalPath { &self.path @@ -57,7 +49,13 @@ impl LoadedModule { impl Display for LoadedModule { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.name()) + write!( + f, + "{}", + self.name + .as_deref() + .unwrap_or_else(|| self.path.relative().as_str()) + ) } } @@ -96,27 +94,3 @@ impl Borrow for LoadedModule { &self.path } } - -/// The name to use to refer to a module loaded into a GHCi session. -/// -/// Entries in `:show targets` can be one of two types: module paths or module names (with `.` in -/// place of path separators). Due to a `ghci` bug, the module can only be referred to as whichever -/// form it was originally added as (see below), so we use this to track how we refer to modules. -/// -/// See: -#[derive(Debug)] -pub enum LoadedModuleName<'a> { - /// A path to a Haskell source file, like `src/My/Cool/Module.hs`. - Path(&'a Utf8Path), - /// A dotted module name, like `My.Cool.Module`. - Name(&'a str), -} - -impl<'a> Display for LoadedModuleName<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - LoadedModuleName::Path(path) => write!(f, "{path}"), - LoadedModuleName::Name(name) => write!(f, "{name}"), - } - } -} From dfc545b09a1add593ec007256cf9fa3965476811 Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Fri, 14 Jun 2024 17:01:12 -0700 Subject: [PATCH 3/6] Add `transform_till` and `recognize_till` parsers Upstream: https://github.com/winnow-rs/winnow/pull/541 --- src/ghci/parse/mod.rs | 3 ++ src/ghci/parse/transform_till.rs | 81 ++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 src/ghci/parse/transform_till.rs diff --git a/src/ghci/parse/mod.rs b/src/ghci/parse/mod.rs index be016b09..eaf84e4c 100644 --- a/src/ghci/parse/mod.rs +++ b/src/ghci/parse/mod.rs @@ -7,6 +7,7 @@ mod lines; mod module_and_files; mod show_paths; mod show_targets; +mod transform_till; use haskell_grammar::module_name; use lines::rest_of_line; @@ -24,3 +25,5 @@ pub use module_and_files::CompilingModule; pub use show_paths::parse_show_paths; pub use show_paths::ShowPaths; pub use show_targets::parse_show_targets; +pub use transform_till::recognize_till; +pub use transform_till::transform_till; diff --git a/src/ghci/parse/transform_till.rs b/src/ghci/parse/transform_till.rs new file mode 100644 index 00000000..f7e6e329 --- /dev/null +++ b/src/ghci/parse/transform_till.rs @@ -0,0 +1,81 @@ +use winnow::combinator::eof; +use winnow::combinator::terminated; +use winnow::error::ErrMode; +use winnow::error::ErrorKind; +use winnow::error::ParserError; +use winnow::stream::Offset; +use winnow::stream::Stream; +use winnow::stream::StreamIsPartial; +use winnow::Parser; + +/// Call the `repeat` parser until the `end` parser produces a result. +/// +/// Then, return the input consumed until the `end` parser was called, and the result of the `end` +/// parser. +/// +/// See: +pub fn recognize_till( + mut repeat: impl Parser, + mut end: impl Parser, +) -> impl Parser::Slice, O), E> +where + I: Stream, + E: ParserError, +{ + move |input: &mut I| { + let start = input.checkpoint(); + + loop { + let before_end = input.checkpoint(); + match end.parse_next(input) { + Ok(end_parsed) => { + let after_end = input.checkpoint(); + + let offset_to_before_end = before_end.offset_from(&start); + input.reset(start); + let input_until_end = input.next_slice(offset_to_before_end); + input.reset(after_end); + + return Ok((input_until_end, end_parsed)); + } + Err(ErrMode::Backtrack(_)) => { + input.reset(before_end); + match repeat.parse_next(input) { + Ok(_) => {} + Err(e) => return Err(e.append(input, ErrorKind::Many)), + } + } + Err(e) => return Err(e), + } + } + } +} + +/// Like [`recognize_till`], but it also applies a `transform` parser to the recognized input. +pub fn transform_till( + mut repeat: impl Parser, + mut transform: impl Parser<::Slice, O1, E>, + mut end: impl Parser, +) -> impl Parser +where + I: Stream, + E: ParserError, + E: ParserError<::Slice>, + ::Slice: Stream + StreamIsPartial, +{ + move |input: &mut I| { + let (mut until_end, end_parsed) = + recognize_till(repeat.by_ref(), end.by_ref()).parse_next(input)?; + + let inner_parsed = terminated(transform.by_ref(), eof) + .parse_next(&mut until_end) + .map_err(|err_mode| match err_mode { + ErrMode::Incomplete(_) => { + panic!("complete parsers should not report `ErrMode::Incomplete(_)`") + } + ErrMode::Backtrack(inner) | ErrMode::Cut(inner) => ErrMode::Cut(inner), + })?; + + Ok((inner_parsed, end_parsed)) + } +} From 1778a0cfa3529893710cead251ab3b4cf52bfdab Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Fri, 14 Jun 2024 17:01:12 -0700 Subject: [PATCH 4/6] Parse single-quoted GHC output more reliably MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GHC output contains quoted fragments: Module graph contains a cycle: module ‘C’ (./C.hs) imports module ‘A’ (A.hs) which imports module ‘B’ (./B.hs) which imports module ‘C’ (./C.hs) When Unicode output is not available, the Unicode quotes are substituted for GNU-style ASCII quotes: module `C' (./C.hs) However, when the quoted text starts or ends with a single quote, ASCII quotes are omitted. This leads to ambiguous output: A → `A' A' → A' `A' → `A' 'A → 'A 'A' → 'A' Correctly parsing this is challenging. This probably increases the amount of backtracking and lookahead required for these parsers. Not sure if that's significant or relevant. --- src/ghci/parse/ghc_message/mod.rs | 2 +- .../module_import_cycle_diagnostic.rs | 7 +- src/ghci/parse/ghc_message/single_quote.rs | 39 ---- src/ghci/parse/ghc_message/single_quoted.rs | 175 ++++++++++++++++++ 4 files changed, 178 insertions(+), 45 deletions(-) delete mode 100644 src/ghci/parse/ghc_message/single_quote.rs create mode 100644 src/ghci/parse/ghc_message/single_quoted.rs diff --git a/src/ghci/parse/ghc_message/mod.rs b/src/ghci/parse/ghc_message/mod.rs index 6eb9574f..78332bca 100644 --- a/src/ghci/parse/ghc_message/mod.rs +++ b/src/ghci/parse/ghc_message/mod.rs @@ -14,7 +14,7 @@ pub use position::PositionRange; mod severity; pub use severity::Severity; -mod single_quote; +mod single_quoted; mod path_colon; use path_colon::path_colon; diff --git a/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs b/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs index 33caf88b..561aaa04 100644 --- a/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs +++ b/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs @@ -13,7 +13,7 @@ use crate::ghci::parse::lines::line_ending_or_eof; use crate::ghci::parse::lines::rest_of_line; use crate::ghci::parse::Severity; -use super::single_quote::single_quote; +use super::single_quoted::single_quoted; use super::GhcDiagnostic; use super::GhcMessage; @@ -39,10 +39,7 @@ pub fn module_import_cycle_diagnostic(input: &mut &str) -> PResult PResult { - one_of(['`', '\'', '‘', '’']).parse_next(input) -} - -#[cfg(test)] -mod tests { - use super::*; - - use pretty_assertions::assert_eq; - - #[test] - fn test_parse_single_quote() { - assert_eq!(single_quote.parse("\'").unwrap(), '\''); - assert_eq!(single_quote.parse("`").unwrap(), '`'); - assert_eq!(single_quote.parse("‘").unwrap(), '‘'); - assert_eq!(single_quote.parse("’").unwrap(), '’'); - - assert!(single_quote.parse("''").is_err()); - assert!(single_quote.parse(" '").is_err()); - assert!(single_quote.parse("' ").is_err()); - assert!(single_quote.parse("`foo'").is_err()); - } -} diff --git a/src/ghci/parse/ghc_message/single_quoted.rs b/src/ghci/parse/ghc_message/single_quoted.rs new file mode 100644 index 00000000..b591c02a --- /dev/null +++ b/src/ghci/parse/ghc_message/single_quoted.rs @@ -0,0 +1,175 @@ +use winnow::combinator::alt; +use winnow::combinator::preceded; +use winnow::error::ParserError; +use winnow::stream::AsChar; +use winnow::stream::Stream; +use winnow::token::any; +use winnow::token::take_till; +use winnow::Parser; + +use crate::ghci::parse::transform_till; + +/// Parse a single-quoted portion of GHC output. +/// +/// If Unicode is supported and `GHC_NO_UNICODE` is unset, the output will be surrounded with +/// Unicode single quotes: +/// +/// ```text +/// ‘puppy’ +/// ``` +/// +/// Otherwise, the output will be surrounded with "GNU-style" quotes: +/// +/// ```text +/// `puppy' +/// ``` +/// +/// However, if the quoted string starts or ends with an ASCII single quote (`'`) and Unicode +/// output is disabled, the quotes will be omitted entirely: +/// +/// ```text +/// puppy -> `puppy' +/// puppy' -> puppy' +/// 'puppy -> 'puppy +/// 'puppy' -> 'puppy' +/// `puppy' -> `puppy' +/// ``` +/// +/// Note that the quoted output for the first and last examples is the same, so the output is +/// ambiguous in this case. +/// +/// See: +/// +/// See: +pub fn single_quoted<'i, O1, O2, E>( + mut inner: impl Parser<&'i str, O1, E>, + mut end: impl Parser<&'i str, O2, E>, +) -> impl Parser<&'i str, (O1, O2), E> +where + E: ParserError<&'i str>, +{ + move |input: &mut &'i str| { + let start = input.checkpoint(); + + let initial = any.parse_next(input)?.as_char(); + match initial { + '‘' => transform_till( + alt((preceded('’', take_till(0.., '’')), take_till(1.., '’'))), + inner.by_ref(), + preceded('’', end.by_ref()), + ) + .parse_next(input), + '`' => { + // If the output starts with a backtick, it must end with a single quote. + // * Either the output is quoted normally (in which case it ends with a single quote), or + // the quotes are skipped. + // * If the quotes are skipped, then the output either starts or ends with a single quote. + // * The output starts with a backtick, so we know it doesn't start with a single quote. + // * Therefore, it must end with a single quote. + transform_till( + alt((preceded('\'', take_till(0.., '\'')), take_till(1.., '\''))), + inner.by_ref(), + preceded('\'', end.by_ref()), + ) + .parse_next(input) + } + // If the output starts with anything else, the quoting must be skipped. + _ => { + input.reset(start); + // Potentially this will have to consume the entire input before backtracking. Sad! + transform_till(any, inner.by_ref(), end.by_ref()).parse_next(input) + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::ghci::parse::haskell_grammar::module_name; + + use super::*; + + use pretty_assertions::assert_eq; + + #[test] + fn test_parse_single_quoted() { + // Unicode. + assert_eq!( + single_quoted(module_name, ' ').parse("‘Puppy’ ").unwrap(), + ("Puppy", ' ') + ); + + assert_eq!( + single_quoted(module_name, ' ').parse("‘Puppy'’ ").unwrap(), + ("Puppy'", ' ') + ); + + assert_eq!( + single_quoted(module_name, ' ').parse("‘Puppy''’ ").unwrap(), + ("Puppy''", ' ') + ); + + // ASCII. + assert_eq!( + single_quoted(module_name, ' ').parse("`Puppy' ").unwrap(), + ("Puppy", ' ') + ); + + // Internal quotes. + assert_eq!( + single_quoted(module_name, ' ').parse("`Pupp'y' ").unwrap(), + ("Pupp'y", ' ') + ); + assert_eq!( + single_quoted(module_name, ' ').parse("`Pupp''y' ").unwrap(), + ("Pupp''y", ' ') + ); + assert_eq!( + single_quoted(module_name, ' ') + .parse("`Pupp'''y' ") + .unwrap(), + ("Pupp'''y", ' ') + ); + assert_eq!( + single_quoted(module_name, ' ') + .parse("`Pupp''''y' ") + .unwrap(), + ("Pupp''''y", ' ') + ); + + // Starts/ends with single quote. + assert_eq!( + single_quoted(module_name, ' ').parse("Puppy' ").unwrap(), + ("Puppy'", ' ') + ); + assert_eq!( + single_quoted(module_name, ' ').parse("Puppy'' ").unwrap(), + ("Puppy''", ' ') + ); + assert_eq!( + single_quoted(preceded('\'', module_name), ' ') + .parse("'Puppy ") + .unwrap(), + ("Puppy", ' ') + ); + assert_eq!( + single_quoted(preceded('\'', module_name), ' ') + .parse("'Puppy' ") + .unwrap(), + ("Puppy'", ' ') + ); + + // Negative cases. + + // No valid ending. + assert!(single_quoted(module_name, ' ').parse("‘Puppy’x").is_err()); + + // Modules can't start with numbers. + assert!(single_quoted(module_name, ' ').parse("`0' ").is_err()); + assert!(single_quoted(module_name, ' ').parse("0 ").is_err()); + + // Delimiters have to match. + assert!(single_quoted(module_name, ' ').parse("‘Puppy' ").is_err()); + assert!(single_quoted(module_name, ' ').parse("`Puppy’ ").is_err()); + } +} From f75c43cade715bad961e4c2189c1f82438d1c671 Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Fri, 14 Jun 2024 17:01:12 -0700 Subject: [PATCH 5/6] Parse Haskell source paths more reliably MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Haskell source paths, as GHC understands them, are remarkably permissive: they must end with one of the source extensions (now more accurately listed here, with references to the upstream GHC code), but can otherwise contain quirks up to and including multiple extensions, whitespace, and newlines. GHCi is actually even more lenient than this in what it accepts; it'll automatically append `.hs` and `.lhs` to paths you give it and check if those exist, but fortunately they get printed out in `:show targets` and diagnostics as the resolved source paths: ```text ghci> :add src/MyLib [1 of 1] Compiling MyLib ( src/MyLib.hs, interpreted ) ghci> :show targets src/MyLib.hs ghci> :add src/Foo target ‘src/Foo’ is not a module name or a source file ghci> :add src/MyLib.lhs File src/MyLib.lhs not found ghci> :add "src/ Foo.hs" File src/ Foo.hs not found ghci> :add "src\n/Foo.hs" File src /Foo.hs not found ``` --- .../module_import_cycle_diagnostic.rs | 7 +- src/ghci/parse/haskell_source_file.rs | 156 ++++++++++++++++++ src/ghci/parse/mod.rs | 2 + src/haskell_source_file.rs | 19 ++- 4 files changed, 172 insertions(+), 12 deletions(-) create mode 100644 src/ghci/parse/haskell_source_file.rs diff --git a/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs b/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs index 561aaa04..80f682e8 100644 --- a/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs +++ b/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs @@ -4,11 +4,11 @@ use winnow::ascii::space1; use winnow::combinator::alt; use winnow::combinator::opt; use winnow::combinator::repeat; -use winnow::token::take_until; use winnow::PResult; use winnow::Parser; use crate::ghci::parse::haskell_grammar::module_name; +use crate::ghci::parse::haskell_source_file; use crate::ghci::parse::lines::line_ending_or_eof; use crate::ghci::parse::lines::rest_of_line; use crate::ghci::parse::Severity; @@ -41,11 +41,10 @@ pub fn module_import_cycle_diagnostic(input: &mut &str) -> PResult PResult> { diff --git a/src/ghci/parse/haskell_source_file.rs b/src/ghci/parse/haskell_source_file.rs new file mode 100644 index 00000000..6c1dcb8d --- /dev/null +++ b/src/ghci/parse/haskell_source_file.rs @@ -0,0 +1,156 @@ +use camino::Utf8PathBuf; +use winnow::combinator::alt; +use winnow::combinator::repeat_till; +use winnow::error::ParserError; +use winnow::stream::Accumulate; +use winnow::stream::AsChar; +use winnow::stream::Compare; +use winnow::stream::Stream; +use winnow::stream::StreamIsPartial; +use winnow::token::take_till; +use winnow::Parser; + +use crate::haskell_source_file::HASKELL_SOURCE_EXTENSIONS; + +/// Parse a Haskell source file name and an ending delimiter. +/// +/// The returned path will end with a dot and one of the [`HASKELL_SOURCE_EXTENSIONS`], but may +/// otherwise contain quirks up to and including multiple extensions, whitespace, and newlines. +/// +/// GHCi is actually even more lenient than this in what it accepts; it'll automatically append +/// `.hs` and `.lhs` to paths you give it and check if those exist, but fortunately they get +/// printed out in `:show targets` and diagnostics as the resolved source paths: +/// +/// ```text +/// ghci> :add src/MyLib +/// [1 of 1] Compiling MyLib ( src/MyLib.hs, interpreted ) +/// +/// ghci> :show targets +/// src/MyLib.hs +/// +/// ghci> :add src/Foo +/// target ‘src/Foo’ is not a module name or a source file +/// +/// ghci> :add src/MyLib.lhs +/// File src/MyLib.lhs not found +/// +/// ghci> :add "src/ Foo.hs" +/// File src/ Foo.hs not found +/// +/// ghci> :add "src\n/Foo.hs" +/// File src +/// /Foo.hs not found +/// ``` +pub fn haskell_source_file( + end: impl Parser, +) -> impl Parser +where + I: Stream + StreamIsPartial + for<'a> Compare<&'a str>, + E: ParserError, + ::Token: AsChar, + char: Parser::Token, E>, + String: Accumulate<::Slice>, +{ + repeat_till(1.., path_chunk(), end) + .map(|(path, end): (String, O)| (Utf8PathBuf::from(path), end)) +} + +fn path_chunk() -> impl Parser::Slice, E> +where + I: Stream + StreamIsPartial + for<'a> Compare<&'a str>, + E: ParserError, + ::Token: AsChar, + char: Parser::Token, E>, +{ + repeat_till::<_, _, (), _, _, _, _>( + 1.., + (take_till(0.., '.'), '.'), + alt(HASKELL_SOURCE_EXTENSIONS), + ) + .recognize() +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + use winnow::error::ContextError; + use winnow::error::ParseError; + + use super::*; + + fn parse_haskell_source_file<'a, O>( + input: &'a str, + end: impl Parser<&'a str, O, ContextError>, + ) -> Result<(Utf8PathBuf, O), ParseError<&'a str, ContextError>> { + haskell_source_file::<&str, _, ContextError>(end).parse(input) + } + + #[test] + fn test_parse_haskell_source_file() { + // No end delimiter. + assert!(parse_haskell_source_file("src/Puppy.hs", ' ').is_err()); + + // No source file. + assert!(parse_haskell_source_file(" ", ' ').is_err()); + + // Simple source file. + assert_eq!( + parse_haskell_source_file("src/Puppy.hs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hs"), ' ') + ); + + // Weirder path, non-standard extension. + assert_eq!( + parse_haskell_source_file("src/../Puppy/Doggy.lhs ", ' ').unwrap(), + (Utf8PathBuf::from("src/../Puppy/Doggy.lhs"), ' ') + ); + + // Multiple extensions! + assert_eq!( + parse_haskell_source_file("src/Puppy.hs.lhs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hs.lhs"), ' ') + ); + + // More filename after extension. + assert_eq!( + parse_haskell_source_file("src/Puppy.hs.Doggy.lhs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hs.Doggy.lhs"), ' ') + ); + + // More filename after extension, no dot after extension. + assert_eq!( + parse_haskell_source_file("src/Puppy.hsDoggy.lhs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hsDoggy.lhs"), ' ') + ); + + // Space in middle. + assert_eq!( + parse_haskell_source_file("src/Pu ppy.hs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Pu ppy.hs"), ' ') + ); + + // Space and extension in middle. + assert_eq!( + parse_haskell_source_file("src/Puppy.hsD oggy.hs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hsD oggy.hs"), ' ') + ); + + // Do you know that GHCi will happily read paths that contain newlines?? + assert_eq!( + parse_haskell_source_file("src/\nPuppy.hs ", ' ').unwrap(), + (Utf8PathBuf::from("src/\nPuppy.hs"), ' ') + ); + + // If you do this and it breaks it's your own fault: + assert_eq!( + parse_haskell_source_file("src/Puppy.hs.hs", ".hs").unwrap(), + (Utf8PathBuf::from("src/Puppy.hs"), ".hs") + ); + + // This is dubious for the same reason: + assert_eq!( + parse_haskell_source_file("src/Puppy.hs.", '.').unwrap(), + (Utf8PathBuf::from("src/Puppy.hs"), '.') + ); + } +} diff --git a/src/ghci/parse/mod.rs b/src/ghci/parse/mod.rs index eaf84e4c..d982932e 100644 --- a/src/ghci/parse/mod.rs +++ b/src/ghci/parse/mod.rs @@ -3,6 +3,7 @@ mod eval; mod ghc_message; mod haskell_grammar; +mod haskell_source_file; mod lines; mod module_and_files; mod show_paths; @@ -10,6 +11,7 @@ mod show_targets; mod transform_till; use haskell_grammar::module_name; +use haskell_source_file::haskell_source_file; use lines::rest_of_line; use module_and_files::module_and_files; diff --git a/src/haskell_source_file.rs b/src/haskell_source_file.rs index 53e1dd4f..2ed3538e 100644 --- a/src/haskell_source_file.rs +++ b/src/haskell_source_file.rs @@ -3,17 +3,20 @@ use camino::Utf8Path; /// File extensions for Haskell source code. -pub const HASKELL_SOURCE_EXTENSIONS: [&str; 9] = [ +/// +/// See: +/// +/// See: +pub const HASKELL_SOURCE_EXTENSIONS: [&str; 8] = [ // NOTE: This should start with `hs` so that iterators try the most common extension first. - "hs", // Haskell - "lhs", // Literate Haskell + "hs", // Haskell + "lhs", // Literate Haskell "hs-boot", // See: https://downloads.haskell.org/ghc/latest/docs/users_guide/separate_compilation.html#how-to-compile-mutually-recursive-modules + "lhs-boot", // Literate `hs-boot`. "hsig", // Backpack module signatures: https://ghc.gitlab.haskell.org/ghc/doc/users_guide/separate_compilation.html#module-signatures - "hsc", // `hsc2hs` C bindings: https://downloads.haskell.org/ghc/latest/docs/users_guide/utils.html?highlight=interfaces#writing-haskell-interfaces-to-c-code-hsc2hs - "x", // `alex` (lexer generator): https://hackage.haskell.org/package/alex - "y", // `happy` (parser generator): https://hackage.haskell.org/package/happy - "c2hs", // `c2hs` C bindings: https://hackage.haskell.org/package/c2hs - "gc", // `greencard` C bindings: https://hackage.haskell.org/package/greencard + "lhsig", // Literate backpack module signatures. + "hspp", // "A file created by the preprocessor". + "hscpp", // Haskell C-preprocessor files. ]; /// Determine if a given path represents a Haskell source file. From 492bfa156e1592c7adf2cae8c9e5ea7afe6bd08d Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Fri, 14 Jun 2024 17:01:12 -0700 Subject: [PATCH 6/6] Parse `File ... not found` messages This will help us keep the module set in sync more reliably. --- src/ghci/parse/ghc_message/mod.rs | 19 ++ src/ghci/parse/ghc_message/not_found.rs | 308 ++++++++++++++++++++++++ 2 files changed, 327 insertions(+) create mode 100644 src/ghci/parse/ghc_message/not_found.rs diff --git a/src/ghci/parse/ghc_message/mod.rs b/src/ghci/parse/ghc_message/mod.rs index 78332bca..a20eb71c 100644 --- a/src/ghci/parse/ghc_message/mod.rs +++ b/src/ghci/parse/ghc_message/mod.rs @@ -43,6 +43,10 @@ use module_import_cycle_diagnostic::module_import_cycle_diagnostic; mod no_location_info_diagnostic; use no_location_info_diagnostic::no_location_info_diagnostic; +mod not_found; +use not_found::not_found; +pub use not_found::NotFound; + use super::rest_of_line; use super::CompilingModule; @@ -51,6 +55,13 @@ use super::CompilingModule; /// These include progress updates on compilation, errors and warnings, or GHCi messages. #[derive(Debug, Clone, PartialEq, Eq)] pub enum GhcMessage { + /// A module or file was not found. + /// + /// ```text + /// File src/Foo.hs not found + /// Module Foo not found + /// ``` + NotFound(NotFound), /// A module being compiled. /// /// ```text @@ -172,6 +183,7 @@ fn parse_messages_inner(input: &mut &str) -> PResult> { .map(GhcMessage::Diagnostic) .map(Item::One), compilation_summary.map(GhcMessage::Summary).map(Item::One), + not_found.map(GhcMessage::NotFound).map(Item::One), cant_find_file_diagnostic .map(GhcMessage::Diagnostic) .map(Item::One), @@ -219,6 +231,10 @@ mod tests { Preprocessing library 'test-dev' for my-simple-package-0.1.0.0.. GHCi, version 9.0.2: https://www.haskell.org/ghc/ :? for help Loaded GHCi configuration from /Users/wiggles/.ghci + File src/Puppy.hs not found + File src/ + Puppy.hs not found + Module Puppy.Doggy not found [1 of 4] Compiling MyLib ( src/MyLib.hs, interpreted ) [2 of 4] Compiling MyModule ( src/MyModule.hs, interpreted ) @@ -239,6 +255,9 @@ mod tests { GhcMessage::LoadConfig { path: "/Users/wiggles/.ghci".into() }, + GhcMessage::NotFound(NotFound::File("src/Puppy.hs".into())), + GhcMessage::NotFound(NotFound::File("src/\nPuppy.hs".into())), + GhcMessage::NotFound(NotFound::Module("Puppy.Doggy".into())), GhcMessage::Compiling(CompilingModule { name: "MyLib".into(), path: "src/MyLib.hs".into(), diff --git a/src/ghci/parse/ghc_message/not_found.rs b/src/ghci/parse/ghc_message/not_found.rs new file mode 100644 index 00000000..b9c63eee --- /dev/null +++ b/src/ghci/parse/ghc_message/not_found.rs @@ -0,0 +1,308 @@ +use std::str::FromStr; + +use camino::Utf8PathBuf; +use miette::miette; +use winnow::combinator::alt; +use winnow::combinator::rest; +use winnow::PResult; +use winnow::Parser; + +use crate::ghci::parse::haskell_grammar::module_name; +use crate::ghci::parse::haskell_source_file; +use crate::ghci::parse::lines::line_ending_or_eof; + +use super::single_quoted::single_quoted; + +/// A module or file was not found. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NotFound { + /// A file for import was not found. + /// + /// ```text + /// File src/Foo.hs not found + /// ``` + File(Utf8PathBuf), + + /// A module for import was not found. + /// + /// ```text + /// Module Foo not found + /// ``` + Module(String), + + /// A target was not found as a source file or recognized as a module name. + /// + /// ```text + /// target ‘src/Puppy’ is not a module name or a source file + /// ``` + Unrecognized(String), +} + +impl FromStr for NotFound { + type Err = miette::Report; + + fn from_str(s: &str) -> Result { + not_found.parse(s).map_err(|err| miette!("{err}")) + } +} + +/// Parse a `File src/Foo.hs not found` message. +fn file_not_found(input: &mut &str) -> PResult { + let _ = "File ".parse_next(input)?; + let (path, _) = haskell_source_file(" not found").parse_next(input)?; + let _ = line_ending_or_eof.parse_next(input)?; + Ok(path) +} + +/// Parse a `Module Foo not found` message. +fn module_not_found(input: &mut &str) -> PResult { + let _ = "Module ".parse_next(input)?; + let module = module_name(input)?; + let _ = " not found".parse_next(input)?; + let _ = line_ending_or_eof.parse_next(input)?; + Ok(module.to_owned()) +} + +/// Parse a `target 'Foo' is not a module name or a source file` message. +fn unrecognized(input: &mut &str) -> PResult { + let _ = "target ".parse_next(input)?; + let (name, _) = single_quoted( + rest, + (" is not a module name or a source file", line_ending_or_eof), + ) + .parse_next(input)?; + Ok(name.to_owned()) +} + +pub fn not_found(input: &mut &str) -> PResult { + alt(( + file_not_found.map(NotFound::File), + module_not_found.map(NotFound::Module), + unrecognized.map(NotFound::Unrecognized), + )) + .parse_next(input) +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_parse_module_not_found() { + assert_eq!( + "Module Puppy not found".parse::().unwrap(), + NotFound::Module("Puppy".into()) + ); + + assert_eq!( + "Module Puppy not found\n".parse::().unwrap(), + NotFound::Module("Puppy".into()) + ); + + assert_eq!( + "Module Puppy.Doggy' not found\n" + .parse::() + .unwrap(), + NotFound::Module("Puppy.Doggy'".into()) + ); + + // Negative cases. + assert!("Module Puppy Doggy not found\n" + .parse::() + .is_err()); + assert!("Module Puppy\\ Doggy not found\n" + .parse::() + .is_err()); + assert!("Module Puppy*.Doggy not found\n" + .parse::() + .is_err()); + assert!("Module Puppy.Doggy not\n".parse::().is_err()); + assert!("Module Puppy\n.Doggy not found\n" + .parse::() + .is_err()); + } + + #[test] + fn test_parse_file_not_found() { + assert_eq!( + "File src/Puppy.hs not found".parse::().unwrap(), + NotFound::File("src/Puppy.hs".into()) + ); + + assert_eq!( + "File src/Puppy.hs not found\n".parse::().unwrap(), + NotFound::File("src/Puppy.hs".into()) + ); + + assert_eq!( + "File src/ Puppy.hs not found\n" + .parse::() + .unwrap(), + NotFound::File("src/ Puppy.hs".into()) + ); + + assert_eq!( + "File src/\nPuppy.hs not found\n" + .parse::() + .unwrap(), + NotFound::File("src/\nPuppy.hs".into()) + ); + + assert_eq!( + "File src/Puppy.hs.lhs not found\n" + .parse::() + .unwrap(), + NotFound::File("src/Puppy.hs.lhs".into()) + ); + + assert_eq!( + "File src/Puppy.hs not foun.lhs not found\n" + .parse::() + .unwrap(), + NotFound::File("src/Puppy.hs not foun.lhs".into()) + ); + + // Negative cases. + + // No extension. + assert!("File src/Puppy not found\n".parse::().is_err()); + + // Non-Haskell extension. + assert!("File src/Puppy.x not found\n".parse::().is_err()); + assert!("File src/Puppy.hs.bak not found\n" + .parse::() + .is_err()); + + // Extra punctuation. + assert!("File src/Puppy.hs not found!\n" + .parse::() + .is_err()); + + // Case sensitivity. + assert!("file src/Puppy.hs not found!\n" + .parse::() + .is_err()); + } + + #[test] + fn test_parse_unrecognized_not_found() { + // The input and quoting here is maddeningly open-ended so there's a ton of these cases. + + assert_eq!( + "target ‘src/Puppy’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/Puppy".into()), + ); + + // Newline at end. + assert_eq!( + "target ‘src/Puppy’ is not a module name or a source file\n" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/Puppy".into()), + ); + + // Empty string. + assert_eq!( + "target ‘’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("".into()), + ); + + // Whitespace. + assert_eq!( + "target ‘src/ Puppy’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/ Puppy".into()), + ); + assert_eq!( + "target ‘ src/Puppy’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized(" src/Puppy".into()), + ); + assert_eq!( + "target ‘src/Puppy ’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/Puppy ".into()), + ); + + // Internal quotes! + assert_eq!( + "target ‘src/Pupp'y’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/Pupp'y".into()), + ); + assert_eq!( + "target ‘src/Pupp'''y’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/Pupp'''y".into()), + ); + assert_eq!( + "target ‘'src/Puppy'’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("'src/Puppy'".into()), + ); + assert_eq!( + "target ‘‘src/Puppy’’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("‘src/Puppy’".into()), + ); + + // Newlines oh my! + assert_eq!( + "target ‘src\n/Puppy\n’ is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src\n/Puppy\n".into()), + ); + + // ASCII quotes. + assert_eq!( + "target `src/Puppy' is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/Puppy".into()), + ); + assert_eq!( + "target ``src/Puppy' is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("`src/Puppy".into()), + ); + assert_eq!( + "target `src/Pupp'y`' is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/Pupp'y`".into()), + ); + + assert_eq!( + "target 'src/Puppy' is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("'src/Puppy'".into()), + ); + assert_eq!( + "target 'src/Pupp'y' is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("'src/Pupp'y'".into()), + ); + assert_eq!( + "target src/Puppy' is not a module name or a source file" + .parse::() + .unwrap(), + NotFound::Unrecognized("src/Puppy'".into()), + ); + } +}