diff --git a/src/ghci/parse/ghc_message/mod.rs b/src/ghci/parse/ghc_message/mod.rs index 6eb9574f..78332bca 100644 --- a/src/ghci/parse/ghc_message/mod.rs +++ b/src/ghci/parse/ghc_message/mod.rs @@ -14,7 +14,7 @@ pub use position::PositionRange; mod severity; pub use severity::Severity; -mod single_quote; +mod single_quoted; mod path_colon; use path_colon::path_colon; diff --git a/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs b/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs index 33caf88b..561aaa04 100644 --- a/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs +++ b/src/ghci/parse/ghc_message/module_import_cycle_diagnostic.rs @@ -13,7 +13,7 @@ use crate::ghci::parse::lines::line_ending_or_eof; use crate::ghci::parse::lines::rest_of_line; use crate::ghci::parse::Severity; -use super::single_quote::single_quote; +use super::single_quoted::single_quoted; use super::GhcDiagnostic; use super::GhcMessage; @@ -39,10 +39,7 @@ pub fn module_import_cycle_diagnostic(input: &mut &str) -> PResult PResult { - one_of(['`', '\'', '‘', '’']).parse_next(input) -} - -#[cfg(test)] -mod tests { - use super::*; - - use pretty_assertions::assert_eq; - - #[test] - fn test_parse_single_quote() { - assert_eq!(single_quote.parse("\'").unwrap(), '\''); - assert_eq!(single_quote.parse("`").unwrap(), '`'); - assert_eq!(single_quote.parse("‘").unwrap(), '‘'); - assert_eq!(single_quote.parse("’").unwrap(), '’'); - - assert!(single_quote.parse("''").is_err()); - assert!(single_quote.parse(" '").is_err()); - assert!(single_quote.parse("' ").is_err()); - assert!(single_quote.parse("`foo'").is_err()); - } -} diff --git a/src/ghci/parse/ghc_message/single_quoted.rs b/src/ghci/parse/ghc_message/single_quoted.rs new file mode 100644 index 00000000..b591c02a --- /dev/null +++ b/src/ghci/parse/ghc_message/single_quoted.rs @@ -0,0 +1,175 @@ +use winnow::combinator::alt; +use winnow::combinator::preceded; +use winnow::error::ParserError; +use winnow::stream::AsChar; +use winnow::stream::Stream; +use winnow::token::any; +use winnow::token::take_till; +use winnow::Parser; + +use crate::ghci::parse::transform_till; + +/// Parse a single-quoted portion of GHC output. +/// +/// If Unicode is supported and `GHC_NO_UNICODE` is unset, the output will be surrounded with +/// Unicode single quotes: +/// +/// ```text +/// ‘puppy’ +/// ``` +/// +/// Otherwise, the output will be surrounded with "GNU-style" quotes: +/// +/// ```text +/// `puppy' +/// ``` +/// +/// However, if the quoted string starts or ends with an ASCII single quote (`'`) and Unicode +/// output is disabled, the quotes will be omitted entirely: +/// +/// ```text +/// puppy -> `puppy' +/// puppy' -> puppy' +/// 'puppy -> 'puppy +/// 'puppy' -> 'puppy' +/// `puppy' -> `puppy' +/// ``` +/// +/// Note that the quoted output for the first and last examples is the same, so the output is +/// ambiguous in this case. +/// +/// See: +/// +/// See: +pub fn single_quoted<'i, O1, O2, E>( + mut inner: impl Parser<&'i str, O1, E>, + mut end: impl Parser<&'i str, O2, E>, +) -> impl Parser<&'i str, (O1, O2), E> +where + E: ParserError<&'i str>, +{ + move |input: &mut &'i str| { + let start = input.checkpoint(); + + let initial = any.parse_next(input)?.as_char(); + match initial { + '‘' => transform_till( + alt((preceded('’', take_till(0.., '’')), take_till(1.., '’'))), + inner.by_ref(), + preceded('’', end.by_ref()), + ) + .parse_next(input), + '`' => { + // If the output starts with a backtick, it must end with a single quote. + // * Either the output is quoted normally (in which case it ends with a single quote), or + // the quotes are skipped. + // * If the quotes are skipped, then the output either starts or ends with a single quote. + // * The output starts with a backtick, so we know it doesn't start with a single quote. + // * Therefore, it must end with a single quote. + transform_till( + alt((preceded('\'', take_till(0.., '\'')), take_till(1.., '\''))), + inner.by_ref(), + preceded('\'', end.by_ref()), + ) + .parse_next(input) + } + // If the output starts with anything else, the quoting must be skipped. + _ => { + input.reset(start); + // Potentially this will have to consume the entire input before backtracking. Sad! + transform_till(any, inner.by_ref(), end.by_ref()).parse_next(input) + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::ghci::parse::haskell_grammar::module_name; + + use super::*; + + use pretty_assertions::assert_eq; + + #[test] + fn test_parse_single_quoted() { + // Unicode. + assert_eq!( + single_quoted(module_name, ' ').parse("‘Puppy’ ").unwrap(), + ("Puppy", ' ') + ); + + assert_eq!( + single_quoted(module_name, ' ').parse("‘Puppy'’ ").unwrap(), + ("Puppy'", ' ') + ); + + assert_eq!( + single_quoted(module_name, ' ').parse("‘Puppy''’ ").unwrap(), + ("Puppy''", ' ') + ); + + // ASCII. + assert_eq!( + single_quoted(module_name, ' ').parse("`Puppy' ").unwrap(), + ("Puppy", ' ') + ); + + // Internal quotes. + assert_eq!( + single_quoted(module_name, ' ').parse("`Pupp'y' ").unwrap(), + ("Pupp'y", ' ') + ); + assert_eq!( + single_quoted(module_name, ' ').parse("`Pupp''y' ").unwrap(), + ("Pupp''y", ' ') + ); + assert_eq!( + single_quoted(module_name, ' ') + .parse("`Pupp'''y' ") + .unwrap(), + ("Pupp'''y", ' ') + ); + assert_eq!( + single_quoted(module_name, ' ') + .parse("`Pupp''''y' ") + .unwrap(), + ("Pupp''''y", ' ') + ); + + // Starts/ends with single quote. + assert_eq!( + single_quoted(module_name, ' ').parse("Puppy' ").unwrap(), + ("Puppy'", ' ') + ); + assert_eq!( + single_quoted(module_name, ' ').parse("Puppy'' ").unwrap(), + ("Puppy''", ' ') + ); + assert_eq!( + single_quoted(preceded('\'', module_name), ' ') + .parse("'Puppy ") + .unwrap(), + ("Puppy", ' ') + ); + assert_eq!( + single_quoted(preceded('\'', module_name), ' ') + .parse("'Puppy' ") + .unwrap(), + ("Puppy'", ' ') + ); + + // Negative cases. + + // No valid ending. + assert!(single_quoted(module_name, ' ').parse("‘Puppy’x").is_err()); + + // Modules can't start with numbers. + assert!(single_quoted(module_name, ' ').parse("`0' ").is_err()); + assert!(single_quoted(module_name, ' ').parse("0 ").is_err()); + + // Delimiters have to match. + assert!(single_quoted(module_name, ' ').parse("‘Puppy' ").is_err()); + assert!(single_quoted(module_name, ' ').parse("`Puppy’ ").is_err()); + } +} diff --git a/src/ghci/parse/mod.rs b/src/ghci/parse/mod.rs index be016b09..eaf84e4c 100644 --- a/src/ghci/parse/mod.rs +++ b/src/ghci/parse/mod.rs @@ -7,6 +7,7 @@ mod lines; mod module_and_files; mod show_paths; mod show_targets; +mod transform_till; use haskell_grammar::module_name; use lines::rest_of_line; @@ -24,3 +25,5 @@ pub use module_and_files::CompilingModule; pub use show_paths::parse_show_paths; pub use show_paths::ShowPaths; pub use show_targets::parse_show_targets; +pub use transform_till::recognize_till; +pub use transform_till::transform_till; diff --git a/src/ghci/parse/transform_till.rs b/src/ghci/parse/transform_till.rs new file mode 100644 index 00000000..f7e6e329 --- /dev/null +++ b/src/ghci/parse/transform_till.rs @@ -0,0 +1,81 @@ +use winnow::combinator::eof; +use winnow::combinator::terminated; +use winnow::error::ErrMode; +use winnow::error::ErrorKind; +use winnow::error::ParserError; +use winnow::stream::Offset; +use winnow::stream::Stream; +use winnow::stream::StreamIsPartial; +use winnow::Parser; + +/// Call the `repeat` parser until the `end` parser produces a result. +/// +/// Then, return the input consumed until the `end` parser was called, and the result of the `end` +/// parser. +/// +/// See: +pub fn recognize_till( + mut repeat: impl Parser, + mut end: impl Parser, +) -> impl Parser::Slice, O), E> +where + I: Stream, + E: ParserError, +{ + move |input: &mut I| { + let start = input.checkpoint(); + + loop { + let before_end = input.checkpoint(); + match end.parse_next(input) { + Ok(end_parsed) => { + let after_end = input.checkpoint(); + + let offset_to_before_end = before_end.offset_from(&start); + input.reset(start); + let input_until_end = input.next_slice(offset_to_before_end); + input.reset(after_end); + + return Ok((input_until_end, end_parsed)); + } + Err(ErrMode::Backtrack(_)) => { + input.reset(before_end); + match repeat.parse_next(input) { + Ok(_) => {} + Err(e) => return Err(e.append(input, ErrorKind::Many)), + } + } + Err(e) => return Err(e), + } + } + } +} + +/// Like [`recognize_till`], but it also applies a `transform` parser to the recognized input. +pub fn transform_till( + mut repeat: impl Parser, + mut transform: impl Parser<::Slice, O1, E>, + mut end: impl Parser, +) -> impl Parser +where + I: Stream, + E: ParserError, + E: ParserError<::Slice>, + ::Slice: Stream + StreamIsPartial, +{ + move |input: &mut I| { + let (mut until_end, end_parsed) = + recognize_till(repeat.by_ref(), end.by_ref()).parse_next(input)?; + + let inner_parsed = terminated(transform.by_ref(), eof) + .parse_next(&mut until_end) + .map_err(|err_mode| match err_mode { + ErrMode::Incomplete(_) => { + panic!("complete parsers should not report `ErrMode::Incomplete(_)`") + } + ErrMode::Backtrack(inner) | ErrMode::Cut(inner) => ErrMode::Cut(inner), + })?; + + Ok((inner_parsed, end_parsed)) + } +}