diff --git a/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs b/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs index 33caf88b..9d471af6 100644 --- a/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs +++ b/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs @@ -4,11 +4,11 @@ use winnow::ascii::space1; use winnow::combinator::alt; use winnow::combinator::opt; use winnow::combinator::repeat; -use winnow::token::take_until; use winnow::PResult; use winnow::Parser; use crate::ghci::parse::haskell_grammar::module_name; +use crate::ghci::parse::haskell_source_file; use crate::ghci::parse::lines::line_ending_or_eof; use crate::ghci::parse::lines::rest_of_line; use crate::ghci::parse::Severity; @@ -44,11 +44,10 @@ pub fn module_import_cycle_diagnostic(input: &mut &str) -> PResult PResult> { diff --git a/src/ghci/parse/haskell_source_file.rs b/src/ghci/parse/haskell_source_file.rs new file mode 100644 index 00000000..6c1dcb8d --- /dev/null +++ b/src/ghci/parse/haskell_source_file.rs @@ -0,0 +1,156 @@ +use camino::Utf8PathBuf; +use winnow::combinator::alt; +use winnow::combinator::repeat_till; +use winnow::error::ParserError; +use winnow::stream::Accumulate; +use winnow::stream::AsChar; +use winnow::stream::Compare; +use winnow::stream::Stream; +use winnow::stream::StreamIsPartial; +use winnow::token::take_till; +use winnow::Parser; + +use crate::haskell_source_file::HASKELL_SOURCE_EXTENSIONS; + +/// Parse a Haskell source file name and an ending delimiter. +/// +/// The returned path will end with a dot and one of the [`HASKELL_SOURCE_EXTENSIONS`], but may +/// otherwise contain quirks up to and including multiple extensions, whitespace, and newlines. +/// +/// GHCi is actually even more lenient than this in what it accepts; it'll automatically append +/// `.hs` and `.lhs` to paths you give it and check if those exist, but fortunately they get +/// printed out in `:show targets` and diagnostics as the resolved source paths: +/// +/// ```text +/// ghci> :add src/MyLib +/// [1 of 1] Compiling MyLib ( src/MyLib.hs, interpreted ) +/// +/// ghci> :show targets +/// src/MyLib.hs +/// +/// ghci> :add src/Foo +/// target ‘src/Foo’ is not a module name or a source file +/// +/// ghci> :add src/MyLib.lhs +/// File src/MyLib.lhs not found +/// +/// ghci> :add "src/ Foo.hs" +/// File src/ Foo.hs not found +/// +/// ghci> :add "src\n/Foo.hs" +/// File src +/// /Foo.hs not found +/// ``` +pub fn haskell_source_file( + end: impl Parser, +) -> impl Parser +where + I: Stream + StreamIsPartial + for<'a> Compare<&'a str>, + E: ParserError, + ::Token: AsChar, + char: Parser::Token, E>, + String: Accumulate<::Slice>, +{ + repeat_till(1.., path_chunk(), end) + .map(|(path, end): (String, O)| (Utf8PathBuf::from(path), end)) +} + +fn path_chunk() -> impl Parser::Slice, E> +where + I: Stream + StreamIsPartial + for<'a> Compare<&'a str>, + E: ParserError, + ::Token: AsChar, + char: Parser::Token, E>, +{ + repeat_till::<_, _, (), _, _, _, _>( + 1.., + (take_till(0.., '.'), '.'), + alt(HASKELL_SOURCE_EXTENSIONS), + ) + .recognize() +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + use winnow::error::ContextError; + use winnow::error::ParseError; + + use super::*; + + fn parse_haskell_source_file<'a, O>( + input: &'a str, + end: impl Parser<&'a str, O, ContextError>, + ) -> Result<(Utf8PathBuf, O), ParseError<&'a str, ContextError>> { + haskell_source_file::<&str, _, ContextError>(end).parse(input) + } + + #[test] + fn test_parse_haskell_source_file() { + // No end delimiter. + assert!(parse_haskell_source_file("src/Puppy.hs", ' ').is_err()); + + // No source file. + assert!(parse_haskell_source_file(" ", ' ').is_err()); + + // Simple source file. + assert_eq!( + parse_haskell_source_file("src/Puppy.hs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hs"), ' ') + ); + + // Weirder path, non-standard extension. + assert_eq!( + parse_haskell_source_file("src/../Puppy/Doggy.lhs ", ' ').unwrap(), + (Utf8PathBuf::from("src/../Puppy/Doggy.lhs"), ' ') + ); + + // Multiple extensions! + assert_eq!( + parse_haskell_source_file("src/Puppy.hs.lhs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hs.lhs"), ' ') + ); + + // More filename after extension. + assert_eq!( + parse_haskell_source_file("src/Puppy.hs.Doggy.lhs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hs.Doggy.lhs"), ' ') + ); + + // More filename after extension, no dot after extension. + assert_eq!( + parse_haskell_source_file("src/Puppy.hsDoggy.lhs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hsDoggy.lhs"), ' ') + ); + + // Space in middle. + assert_eq!( + parse_haskell_source_file("src/Pu ppy.hs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Pu ppy.hs"), ' ') + ); + + // Space and extension in middle. + assert_eq!( + parse_haskell_source_file("src/Puppy.hsD oggy.hs ", ' ').unwrap(), + (Utf8PathBuf::from("src/Puppy.hsD oggy.hs"), ' ') + ); + + // Do you know that GHCi will happily read paths that contain newlines?? + assert_eq!( + parse_haskell_source_file("src/\nPuppy.hs ", ' ').unwrap(), + (Utf8PathBuf::from("src/\nPuppy.hs"), ' ') + ); + + // If you do this and it breaks it's your own fault: + assert_eq!( + parse_haskell_source_file("src/Puppy.hs.hs", ".hs").unwrap(), + (Utf8PathBuf::from("src/Puppy.hs"), ".hs") + ); + + // This is dubious for the same reason: + assert_eq!( + parse_haskell_source_file("src/Puppy.hs.", '.').unwrap(), + (Utf8PathBuf::from("src/Puppy.hs"), '.') + ); + } +} diff --git a/src/ghci/parse/mod.rs b/src/ghci/parse/mod.rs index ae4a31ab..245568bf 100644 --- a/src/ghci/parse/mod.rs +++ b/src/ghci/parse/mod.rs @@ -3,6 +3,7 @@ mod eval; mod ghc_message; mod haskell_grammar; +mod haskell_source_file; mod lines; mod module_and_files; mod module_set; @@ -11,6 +12,7 @@ mod show_targets; mod target_kind; use haskell_grammar::module_name; +use haskell_source_file::haskell_source_file; use lines::rest_of_line; use module_and_files::module_and_files; diff --git a/src/haskell_source_file.rs b/src/haskell_source_file.rs index 53e1dd4f..2ed3538e 100644 --- a/src/haskell_source_file.rs +++ b/src/haskell_source_file.rs @@ -3,17 +3,20 @@ use camino::Utf8Path; /// File extensions for Haskell source code. -pub const HASKELL_SOURCE_EXTENSIONS: [&str; 9] = [ +/// +/// See: +/// +/// See: +pub const HASKELL_SOURCE_EXTENSIONS: [&str; 8] = [ // NOTE: This should start with `hs` so that iterators try the most common extension first. - "hs", // Haskell - "lhs", // Literate Haskell + "hs", // Haskell + "lhs", // Literate Haskell "hs-boot", // See: https://downloads.haskell.org/ghc/latest/docs/users_guide/separate_compilation.html#how-to-compile-mutually-recursive-modules + "lhs-boot", // Literate `hs-boot`. "hsig", // Backpack module signatures: https://ghc.gitlab.haskell.org/ghc/doc/users_guide/separate_compilation.html#module-signatures - "hsc", // `hsc2hs` C bindings: https://downloads.haskell.org/ghc/latest/docs/users_guide/utils.html?highlight=interfaces#writing-haskell-interfaces-to-c-code-hsc2hs - "x", // `alex` (lexer generator): https://hackage.haskell.org/package/alex - "y", // `happy` (parser generator): https://hackage.haskell.org/package/happy - "c2hs", // `c2hs` C bindings: https://hackage.haskell.org/package/c2hs - "gc", // `greencard` C bindings: https://hackage.haskell.org/package/greencard + "lhsig", // Literate backpack module signatures. + "hspp", // "A file created by the preprocessor". + "hscpp", // Haskell C-preprocessor files. ]; /// Determine if a given path represents a Haskell source file.