Skip to content

Commit 9f73cf3

Browse files
committed
ModuleSet: store a set rather than a map
Previously, a `ModuleSet` wrapped a `HashMap<NormalPath, TargetKind>`. This had a number of undesirable consequences: * The data about a module's name (as its path) and how it was loaded (as its `TargetKind`) were split from each other and difficult to reference. * The module's import name wasn't explicitly stored anywhere, so we needed to convert between paths and dotted names when those were needed, which required hitting the disk. * There wasn't a type for the module's import name, so when we (e.g.) `:unadd`ed modules we needed to format them as strings. Now, a `ModuleSet` wraps a `HashSet<LoadedModule>`. * A `LoadedModule` wraps a path but optionally contains the module's dotted name, if the module is loaded by name (and needs to be referred to by name to avoid the "module defined in multiple files" error). * The `LoadedModule` `Display` instance formats the module's import name correctly (with a dotted name if needed) and avoids hitting the disk or any string processing.
1 parent 910c4c3 commit 9f73cf3

File tree

10 files changed

+335
-218
lines changed

10 files changed

+335
-218
lines changed

src/ghci/loaded_module.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
use std::borrow::Borrow;
2+
use std::fmt::Display;
3+
use std::hash::Hash;
4+
use std::hash::Hasher;
5+
6+
use camino::Utf8Path;
7+
8+
use crate::normal_path::NormalPath;
9+
10+
/// Information about a module loaded into a `ghci` session.
11+
///
12+
/// Hashing and equality are determined by the module's path alone.
13+
#[derive(Debug, Clone, Eq)]
14+
pub struct LoadedModule {
15+
/// The module's source file.
16+
path: NormalPath,
17+
18+
/// The module's name.
19+
///
20+
/// This is present if and only if the module is loaded by name.
21+
///
22+
/// Entries in `:show targets` can be one of two types: module paths or module names (with `.` in
23+
/// place of path separators). Due to a `ghci` bug, the module can only be referred to as whichever
24+
/// form it was originally added as (see below), so we use this to track how we refer to modules.
25+
///
26+
/// See: <https://gitlab.haskell.org/ghc/ghc/-/issues/13254#note_525037>
27+
name: Option<String>,
28+
}
29+
30+
impl LoadedModule {
31+
/// Create a new module, loaded by path.
32+
pub fn new(path: NormalPath) -> Self {
33+
Self { path, name: None }
34+
}
35+
36+
/// Create a new module, loaded by name.
37+
pub fn with_name(path: NormalPath, name: String) -> Self {
38+
Self {
39+
path,
40+
name: Some(name),
41+
}
42+
}
43+
44+
/// Get the name to use to refer to this module.
45+
pub fn name(&self) -> LoadedModuleName {
46+
match self.name.as_deref() {
47+
Some(name) => LoadedModuleName::Name(name),
48+
None => LoadedModuleName::Path(&self.path),
49+
}
50+
}
51+
52+
/// Get the module's source path.
53+
pub fn path(&self) -> &NormalPath {
54+
&self.path
55+
}
56+
}
57+
58+
impl Display for LoadedModule {
59+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60+
write!(f, "{}", self.name())
61+
}
62+
}
63+
64+
impl Hash for LoadedModule {
65+
fn hash<H: Hasher>(&self, state: &mut H) {
66+
self.path.hash(state)
67+
}
68+
}
69+
70+
impl PartialEq for LoadedModule {
71+
fn eq(&self, other: &Self) -> bool {
72+
self.path.eq(&other.path)
73+
}
74+
}
75+
76+
impl PartialOrd for LoadedModule {
77+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
78+
self.path.partial_cmp(&other.path)
79+
}
80+
}
81+
82+
impl Ord for LoadedModule {
83+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
84+
self.path.cmp(&other.path)
85+
}
86+
}
87+
88+
impl Borrow<NormalPath> for LoadedModule {
89+
fn borrow(&self) -> &NormalPath {
90+
&self.path
91+
}
92+
}
93+
94+
impl Borrow<Utf8Path> for LoadedModule {
95+
fn borrow(&self) -> &Utf8Path {
96+
&self.path
97+
}
98+
}
99+
100+
/// The name to use to refer to a module loaded into a GHCi session.
101+
///
102+
/// Entries in `:show targets` can be one of two types: module paths or module names (with `.` in
103+
/// place of path separators). Due to a `ghci` bug, the module can only be referred to as whichever
104+
/// form it was originally added as (see below), so we use this to track how we refer to modules.
105+
///
106+
/// See: <https://gitlab.haskell.org/ghc/ghc/-/issues/13254#note_525037>
107+
#[derive(Debug)]
108+
pub enum LoadedModuleName<'a> {
109+
/// A path to a Haskell source file, like `src/My/Cool/Module.hs`.
110+
Path(&'a Utf8Path),
111+
/// A dotted module name, like `My.Cool.Module`.
112+
Name(&'a str),
113+
}
114+
115+
impl<'a> Display for LoadedModuleName<'a> {
116+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117+
match self {
118+
LoadedModuleName::Path(path) => write!(f, "{path}"),
119+
LoadedModuleName::Name(name) => write!(f, "{name}"),
120+
}
121+
}
122+
}

src/ghci/mod.rs

Lines changed: 85 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use nix::sys::signal::Signal;
66
use owo_colors::OwoColorize;
77
use owo_colors::Stream::Stdout;
88
use std::borrow::Borrow;
9+
use std::borrow::Cow;
910
use std::collections::BTreeMap;
1011
use std::collections::BTreeSet;
1112
use std::fmt::Debug;
@@ -50,7 +51,6 @@ pub mod parse;
5051
use parse::parse_eval_commands;
5152
use parse::CompilationResult;
5253
use parse::EvalCommand;
53-
use parse::ModuleSet;
5454
use parse::ShowPaths;
5555

5656
mod ghci_command;
@@ -63,6 +63,12 @@ mod writer;
6363
use crate::buffers::GHCI_BUFFER_CAPACITY;
6464
pub use crate::ghci::writer::GhciWriter;
6565

66+
mod module_set;
67+
pub use module_set::ModuleSet;
68+
69+
mod loaded_module;
70+
use loaded_module::LoadedModule;
71+
6672
use crate::aho_corasick::AhoCorasickExt;
6773
use crate::buffers::LINE_BUFFER_CAPACITY;
6874
use crate::cli::Opts;
@@ -80,8 +86,6 @@ use crate::shutdown::ShutdownHandle;
8086
use crate::CommandExt;
8187
use crate::StringCase;
8288

83-
use self::parse::TargetKind;
84-
8589
/// The `ghci` prompt we use. Should be unique enough, but maybe we can make it better with Unicode
8690
/// private-use-area codepoints or something in the future.
8791
pub const PROMPT: &str = "###~GHCIWATCH-PROMPT~###";
@@ -613,10 +617,10 @@ impl Ghci {
613617

614618
let mut eval_commands = BTreeMap::new();
615619

616-
for path in self.targets.iter() {
617-
let commands = Self::parse_eval_commands(path).await?;
620+
for target in self.targets.iter() {
621+
let commands = Self::parse_eval_commands(target.path()).await?;
618622
if !commands.is_empty() {
619-
eval_commands.insert(path.clone(), commands);
623+
eval_commands.insert(target.path().clone(), commands);
620624
}
621625
}
622626

@@ -670,23 +674,79 @@ impl Ghci {
670674
Ok(commands)
671675
}
672676

673-
/// `:add` a module or modules to the `ghci` session by path.
677+
/// `:add` a module or modules to the GHCi session.
674678
#[instrument(skip(self), level = "debug")]
675679
async fn add_modules(
676680
&mut self,
677681
paths: &[NormalPath],
678682
log: &mut CompilationLog,
679683
) -> miette::Result<()> {
680-
let modules = self.targets.format_modules(&self.search_paths, paths)?;
684+
let mut modules = Vec::with_capacity(paths.len());
685+
for path in paths {
686+
if self.targets.contains_source_path(path) {
687+
return Err(miette!(
688+
"Attempting to add already-loaded module: {path}\n\
689+
This is a ghciwatch bug; please report it upstream"
690+
));
691+
} else {
692+
modules.push(LoadedModule::new(path.clone()));
693+
}
694+
}
681695

682696
self.stdin
683697
.add_modules(&mut self.stdout, &modules, log)
684698
.await?;
685699

686-
for path in paths {
687-
self.targets
688-
.insert_source_path(path.clone(), TargetKind::Path);
689-
}
700+
// TODO: This could lead to the module set getting out of sync with the underlying GHCi
701+
// session.
702+
//
703+
// If there's a TOATOU bug here (e.g. we're attempting to add a module but the file no
704+
// longer exists), then we can get into a situation like this:
705+
//
706+
// ghci> :add src/DoesntExist.hs src/MyLib.hs
707+
// File src/DoesntExist.hs not found
708+
// [4 of 4] Compiling MyLib ( src/MyLib.hs, interpreted )
709+
// Ok, four modules loaded.
710+
//
711+
// ghci> :show targets
712+
// src/MyLib.hs
713+
// ...
714+
//
715+
// We've requested to load two modules, only one has been loaded, but GHCi has reported
716+
// that compilation was successful and hasn't added the failing module to the target set.
717+
// Note that if the file is found but compilation fails, the file _is_ added to the target
718+
// set:
719+
//
720+
// ghci> :add src/MyCoolLib.hs
721+
// [4 of 4] Compiling MyCoolLib ( src/MyCoolLib.hs, interpreted )
722+
//
723+
// src/MyCoolLib.hs:4:12: error:
724+
// • Couldn't match expected type ‘IO ()’ with actual type ‘()’
725+
// • In the expression: ()
726+
// In an equation for ‘someFunc’: someFunc = ()
727+
// |
728+
// 4 | someFunc = ()
729+
// | ^^
730+
// Failed, three modules loaded.
731+
//
732+
// ghci> :show targets
733+
// src/MyCoolLib.hs
734+
// ...
735+
//
736+
// I think this is OK, because the only reason we need to know which modules are loaded is
737+
// to avoid the "module defined in multiple files" bug [1], so the potential outcomes of
738+
// making this mistake are:
739+
//
740+
// 1. The next time the file is modified, we attempt to `:add` it instead of `:reload`ing
741+
// it. This is harmless, though it changes the order that `:show modules` prints output
742+
// in (maybe local binding order as well or something).
743+
// 2. The next time the file is modified, we attempt to `:add` it by path instead of by
744+
// module name, but this function is only used when the modules aren't already in the
745+
// target set, so we know the module doesn't need to be referred to by its module name.
746+
//
747+
// [1]: https://gitlab.haskell.org/ghc/ghc/-/issues/13254#note_525037
748+
749+
self.targets.extend(modules);
690750

691751
self.refresh_eval_commands_for_paths(paths).await?;
692752

@@ -702,14 +762,15 @@ impl Ghci {
702762
path: &NormalPath,
703763
log: &mut CompilationLog,
704764
) -> miette::Result<()> {
705-
let module = self.targets.module_import_name(&self.search_paths, path)?;
765+
let module = self.targets.get_import_name(path);
706766

707767
self.stdin
708-
.interpret_module(&mut self.stdout, &module.name, log)
768+
.interpret_module(&mut self.stdout, &module, log)
709769
.await?;
710770

711-
if !module.loaded {
712-
self.targets.insert_source_path(path.clone(), module.kind);
771+
// Note: A borrowed path is only returned if the path is already present in the module set.
772+
if let Cow::Owned(module) = module {
773+
self.targets.insert_module(module);
713774
}
714775

715776
self.refresh_eval_commands_for_paths(std::iter::once(path))
@@ -725,21 +786,24 @@ impl Ghci {
725786
paths: &[NormalPath],
726787
log: &mut CompilationLog,
727788
) -> miette::Result<()> {
789+
let modules = paths
790+
.iter()
791+
.map(|path| self.targets.get_import_name(path).into_owned())
792+
.collect::<Vec<_>>();
793+
728794
// Each `:unadd` implicitly reloads as well, so we have to `:unadd` all the modules in a
729795
// single command so that GHCi doesn't try to load a bunch of removed modules after each
730796
// one.
731-
let modules = self.targets.format_modules(&self.search_paths, paths)?;
732-
733797
self.stdin
734-
.remove_modules(&mut self.stdout, &modules, log)
798+
.remove_modules(&mut self.stdout, modules.iter().map(Borrow::borrow), log)
735799
.await?;
736800

737801
for path in paths {
738802
self.targets.remove_source_path(path);
739-
self.clear_eval_commands_for_paths(std::iter::once(path))
740-
.await;
741803
}
742804

805+
self.clear_eval_commands_for_paths(paths).await;
806+
743807
Ok(())
744808
}
745809

0 commit comments

Comments
 (0)