Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ use winnow::ascii::space1;
use winnow::combinator::alt;
use winnow::combinator::opt;
use winnow::combinator::repeat;
use winnow::token::take_until;
use winnow::PResult;
use winnow::Parser;

use crate::ghci::parse::haskell_grammar::module_name;
use crate::ghci::parse::haskell_source_file;
use crate::ghci::parse::lines::line_ending_or_eof;
use crate::ghci::parse::lines::rest_of_line;
use crate::ghci::parse::Severity;
Expand Down Expand Up @@ -44,11 +44,10 @@ pub fn module_import_cycle_diagnostic(input: &mut &str) -> PResult<Vec<GhcMessag
let _ = single_quote.parse_next(input)?;
let _ = space1.parse_next(input)?;
let _ = "(".parse_next(input)?;
let path = take_until(1.., ")").parse_next(input)?;
let _ = ")".parse_next(input)?;
let (path, _) = haskell_source_file(')').parse_next(input)?;
let _ = rest_of_line.parse_next(input)?;

Ok(Utf8PathBuf::from(path))
Ok(path)
}

fn inner(input: &mut &str) -> PResult<Vec<Utf8PathBuf>> {
Expand Down
156 changes: 156 additions & 0 deletions src/ghci/parse/haskell_source_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
use camino::Utf8PathBuf;
use winnow::combinator::alt;
use winnow::combinator::repeat_till;
use winnow::error::ParserError;
use winnow::stream::Accumulate;
use winnow::stream::AsChar;
use winnow::stream::Compare;
use winnow::stream::Stream;
use winnow::stream::StreamIsPartial;
use winnow::token::take_till;
use winnow::Parser;

use crate::haskell_source_file::HASKELL_SOURCE_EXTENSIONS;

/// Parse a Haskell source file name and an ending delimiter.
///
/// The returned path will end with a dot and one of the [`HASKELL_SOURCE_EXTENSIONS`], but may
/// otherwise contain quirks up to and including multiple extensions, whitespace, and newlines.
///
/// GHCi is actually even more lenient than this in what it accepts; it'll automatically append
/// `.hs` and `.lhs` to paths you give it and check if those exist, but fortunately they get
/// printed out in `:show targets` and diagnostics as the resolved source paths:
///
/// ```text
/// ghci> :add src/MyLib
/// [1 of 1] Compiling MyLib ( src/MyLib.hs, interpreted )
///
/// ghci> :show targets
/// src/MyLib.hs
///
/// ghci> :add src/Foo
/// target ‘src/Foo’ is not a module name or a source file
///
/// ghci> :add src/MyLib.lhs
/// File src/MyLib.lhs not found
///
/// ghci> :add "src/ Foo.hs"
/// File src/ Foo.hs not found
///
/// ghci> :add "src\n/Foo.hs"
/// File src
/// /Foo.hs not found
/// ```
pub fn haskell_source_file<I, O, E>(
end: impl Parser<I, O, E>,
) -> impl Parser<I, (Utf8PathBuf, O), E>
where
I: Stream + StreamIsPartial + for<'a> Compare<&'a str>,
E: ParserError<I>,
<I as Stream>::Token: AsChar,
char: Parser<I, <I as Stream>::Token, E>,
String: Accumulate<<I as Stream>::Slice>,
{
repeat_till(1.., path_chunk(), end)
.map(|(path, end): (String, O)| (Utf8PathBuf::from(path), end))
}

fn path_chunk<I, E>() -> impl Parser<I, <I as Stream>::Slice, E>
where
I: Stream + StreamIsPartial + for<'a> Compare<&'a str>,
E: ParserError<I>,
<I as Stream>::Token: AsChar,
char: Parser<I, <I as Stream>::Token, E>,
{
repeat_till::<_, _, (), _, _, _, _>(
1..,
(take_till(0.., '.'), '.'),
alt(HASKELL_SOURCE_EXTENSIONS),
)
.recognize()
}

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use winnow::error::ContextError;
use winnow::error::ParseError;

use super::*;

fn parse_haskell_source_file<'a, O>(
input: &'a str,
end: impl Parser<&'a str, O, ContextError>,
) -> Result<(Utf8PathBuf, O), ParseError<&'a str, ContextError>> {
haskell_source_file::<&str, _, ContextError>(end).parse(input)
}

#[test]
fn test_parse_haskell_source_file() {
// No end delimiter.
assert!(parse_haskell_source_file("src/Puppy.hs", ' ').is_err());

// No source file.
assert!(parse_haskell_source_file(" ", ' ').is_err());

// Simple source file.
assert_eq!(
parse_haskell_source_file("src/Puppy.hs ", ' ').unwrap(),
(Utf8PathBuf::from("src/Puppy.hs"), ' ')
);

// Weirder path, non-standard extension.
assert_eq!(
parse_haskell_source_file("src/../Puppy/Doggy.lhs ", ' ').unwrap(),
(Utf8PathBuf::from("src/../Puppy/Doggy.lhs"), ' ')
);

// Multiple extensions!
assert_eq!(
parse_haskell_source_file("src/Puppy.hs.lhs ", ' ').unwrap(),
(Utf8PathBuf::from("src/Puppy.hs.lhs"), ' ')
);

// More filename after extension.
assert_eq!(
parse_haskell_source_file("src/Puppy.hs.Doggy.lhs ", ' ').unwrap(),
(Utf8PathBuf::from("src/Puppy.hs.Doggy.lhs"), ' ')
);

// More filename after extension, no dot after extension.
assert_eq!(
parse_haskell_source_file("src/Puppy.hsDoggy.lhs ", ' ').unwrap(),
(Utf8PathBuf::from("src/Puppy.hsDoggy.lhs"), ' ')
);

// Space in middle.
assert_eq!(
parse_haskell_source_file("src/Pu ppy.hs ", ' ').unwrap(),
(Utf8PathBuf::from("src/Pu ppy.hs"), ' ')
);

// Space and extension in middle.
assert_eq!(
parse_haskell_source_file("src/Puppy.hsD oggy.hs ", ' ').unwrap(),
(Utf8PathBuf::from("src/Puppy.hsD oggy.hs"), ' ')
);

// Do you know that GHCi will happily read paths that contain newlines??
assert_eq!(
parse_haskell_source_file("src/\nPuppy.hs ", ' ').unwrap(),
(Utf8PathBuf::from("src/\nPuppy.hs"), ' ')
);

// If you do this and it breaks it's your own fault:
assert_eq!(
parse_haskell_source_file("src/Puppy.hs.hs", ".hs").unwrap(),
(Utf8PathBuf::from("src/Puppy.hs"), ".hs")
);

// This is dubious for the same reason:
assert_eq!(
parse_haskell_source_file("src/Puppy.hs.", '.').unwrap(),
(Utf8PathBuf::from("src/Puppy.hs"), '.')
);
}
}
2 changes: 2 additions & 0 deletions src/ghci/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
mod eval;
mod ghc_message;
mod haskell_grammar;
mod haskell_source_file;
mod lines;
mod module_and_files;
mod module_set;
Expand All @@ -11,6 +12,7 @@ mod show_targets;
mod target_kind;

use haskell_grammar::module_name;
use haskell_source_file::haskell_source_file;
use lines::rest_of_line;
use module_and_files::module_and_files;

Expand Down
19 changes: 11 additions & 8 deletions src/haskell_source_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,20 @@
use camino::Utf8Path;

/// File extensions for Haskell source code.
pub const HASKELL_SOURCE_EXTENSIONS: [&str; 9] = [
///
/// See: <https://downloads.haskell.org/ghc/latest/docs/users_guide/using.html#meaningful-file-suffixes>
///
/// See: <https://gitlab.haskell.org/ghc/ghc/-/blob/077cb2e11fa81076e8c9c5f8dd3bdfa99c8aaf8d/compiler/GHC/Driver/Phases.hs#L236-L252>
pub const HASKELL_SOURCE_EXTENSIONS: [&str; 8] = [
// NOTE: This should start with `hs` so that iterators try the most common extension first.
"hs", // Haskell
"lhs", // Literate Haskell
"hs", // Haskell
"lhs", // Literate Haskell
"hs-boot", // See: https://downloads.haskell.org/ghc/latest/docs/users_guide/separate_compilation.html#how-to-compile-mutually-recursive-modules
"lhs-boot", // Literate `hs-boot`.
"hsig", // Backpack module signatures: https://ghc.gitlab.haskell.org/ghc/doc/users_guide/separate_compilation.html#module-signatures
"hsc", // `hsc2hs` C bindings: https://downloads.haskell.org/ghc/latest/docs/users_guide/utils.html?highlight=interfaces#writing-haskell-interfaces-to-c-code-hsc2hs
"x", // `alex` (lexer generator): https://hackage.haskell.org/package/alex
"y", // `happy` (parser generator): https://hackage.haskell.org/package/happy
"c2hs", // `c2hs` C bindings: https://hackage.haskell.org/package/c2hs
"gc", // `greencard` C bindings: https://hackage.haskell.org/package/greencard
"lhsig", // Literate backpack module signatures.
"hspp", // "A file created by the preprocessor".
"hscpp", // Haskell C-preprocessor files.
];

/// Determine if a given path represents a Haskell source file.
Expand Down