|
| 1 | +use winnow::combinator::alt; |
| 2 | +use winnow::combinator::preceded; |
| 3 | +use winnow::error::ParserError; |
| 4 | +use winnow::stream::AsChar; |
| 5 | +use winnow::stream::Stream; |
| 6 | +use winnow::token::any; |
| 7 | +use winnow::token::take_till; |
| 8 | +use winnow::Parser; |
| 9 | + |
| 10 | +use crate::ghci::parse::transform_till; |
| 11 | + |
| 12 | +/// Parse a single-quoted portion of GHC output. |
| 13 | +/// |
| 14 | +/// If Unicode is supported and `GHC_NO_UNICODE` is unset, the output will be surrounded with |
| 15 | +/// Unicode single quotes: |
| 16 | +/// |
| 17 | +/// ```text |
| 18 | +/// ‘puppy’ |
| 19 | +/// ``` |
| 20 | +/// |
| 21 | +/// Otherwise, the output will be surrounded with "GNU-style" quotes: |
| 22 | +/// |
| 23 | +/// ```text |
| 24 | +/// `puppy' |
| 25 | +/// ``` |
| 26 | +/// |
| 27 | +/// However, if the quoted string starts or ends with an ASCII single quote (`'`) and Unicode |
| 28 | +/// output is disabled, the quotes will be omitted entirely: |
| 29 | +/// |
| 30 | +/// ```text |
| 31 | +/// puppy -> `puppy' |
| 32 | +/// puppy' -> puppy' |
| 33 | +/// 'puppy -> 'puppy |
| 34 | +/// 'puppy' -> 'puppy' |
| 35 | +/// `puppy' -> `puppy' |
| 36 | +/// ``` |
| 37 | +/// |
| 38 | +/// Note that the quoted output for the first and last examples is the same, so the output is |
| 39 | +/// ambiguous in this case. |
| 40 | +/// |
| 41 | +/// See: <https://gitlab.haskell.org/ghc/ghc/-/blob/077cb2e11fa81076e8c9c5f8dd3bdfa99c8aaf8d/compiler/GHC/Utils/Outputable.hs#L744-L756> |
| 42 | +/// |
| 43 | +/// See: <https://gitlab.haskell.org/ghc/ghc/-/blob/077cb2e11fa81076e8c9c5f8dd3bdfa99c8aaf8d/compiler/GHC/Utils/Ppr.hs#L468> |
| 44 | +pub fn single_quoted<'i, O1, O2, E>( |
| 45 | + mut inner: impl Parser<&'i str, O1, E>, |
| 46 | + mut end: impl Parser<&'i str, O2, E>, |
| 47 | +) -> impl Parser<&'i str, (O1, O2), E> |
| 48 | +where |
| 49 | + E: ParserError<&'i str>, |
| 50 | +{ |
| 51 | + move |input: &mut &'i str| { |
| 52 | + let start = input.checkpoint(); |
| 53 | + |
| 54 | + let initial = any.parse_next(input)?.as_char(); |
| 55 | + match initial { |
| 56 | + '‘' => transform_till( |
| 57 | + alt((preceded('’', take_till(0.., '’')), take_till(1.., '’'))), |
| 58 | + inner.by_ref(), |
| 59 | + preceded('’', end.by_ref()), |
| 60 | + ) |
| 61 | + .parse_next(input), |
| 62 | + '`' => { |
| 63 | + // If the output starts with a backtick, it must end with a single quote. |
| 64 | + // * Either the output is quoted normally (in which case it ends with a single quote), or |
| 65 | + // the quotes are skipped. |
| 66 | + // * If the quotes are skipped, then the output either starts or ends with a single quote. |
| 67 | + // * The output starts with a backtick, so we know it doesn't start with a single quote. |
| 68 | + // * Therefore, it must end with a single quote. |
| 69 | + transform_till( |
| 70 | + alt((preceded('\'', take_till(0.., '\'')), take_till(1.., '\''))), |
| 71 | + inner.by_ref(), |
| 72 | + preceded('\'', end.by_ref()), |
| 73 | + ) |
| 74 | + .parse_next(input) |
| 75 | + } |
| 76 | + // If the output starts with anything else, the quoting must be skipped. |
| 77 | + _ => { |
| 78 | + input.reset(start); |
| 79 | + // Potentially this will have to consume the entire input before backtracking. Sad! |
| 80 | + transform_till(any, inner.by_ref(), end.by_ref()).parse_next(input) |
| 81 | + } |
| 82 | + } |
| 83 | + } |
| 84 | +} |
| 85 | + |
| 86 | +#[cfg(test)] |
| 87 | +mod tests { |
| 88 | + use crate::ghci::parse::haskell_grammar::module_name; |
| 89 | + |
| 90 | + use super::*; |
| 91 | + |
| 92 | + use pretty_assertions::assert_eq; |
| 93 | + |
| 94 | + #[test] |
| 95 | + fn test_parse_single_quoted() { |
| 96 | + // Unicode. |
| 97 | + assert_eq!( |
| 98 | + single_quoted(module_name, ' ').parse("‘Puppy’ ").unwrap(), |
| 99 | + ("Puppy", ' ') |
| 100 | + ); |
| 101 | + |
| 102 | + assert_eq!( |
| 103 | + single_quoted(module_name, ' ').parse("‘Puppy'’ ").unwrap(), |
| 104 | + ("Puppy'", ' ') |
| 105 | + ); |
| 106 | + |
| 107 | + assert_eq!( |
| 108 | + single_quoted(module_name, ' ').parse("‘Puppy''’ ").unwrap(), |
| 109 | + ("Puppy''", ' ') |
| 110 | + ); |
| 111 | + |
| 112 | + // ASCII. |
| 113 | + assert_eq!( |
| 114 | + single_quoted(module_name, ' ').parse("`Puppy' ").unwrap(), |
| 115 | + ("Puppy", ' ') |
| 116 | + ); |
| 117 | + |
| 118 | + // Internal quotes. |
| 119 | + assert_eq!( |
| 120 | + single_quoted(module_name, ' ').parse("`Pupp'y' ").unwrap(), |
| 121 | + ("Pupp'y", ' ') |
| 122 | + ); |
| 123 | + assert_eq!( |
| 124 | + single_quoted(module_name, ' ').parse("`Pupp''y' ").unwrap(), |
| 125 | + ("Pupp''y", ' ') |
| 126 | + ); |
| 127 | + assert_eq!( |
| 128 | + single_quoted(module_name, ' ') |
| 129 | + .parse("`Pupp'''y' ") |
| 130 | + .unwrap(), |
| 131 | + ("Pupp'''y", ' ') |
| 132 | + ); |
| 133 | + assert_eq!( |
| 134 | + single_quoted(module_name, ' ') |
| 135 | + .parse("`Pupp''''y' ") |
| 136 | + .unwrap(), |
| 137 | + ("Pupp''''y", ' ') |
| 138 | + ); |
| 139 | + |
| 140 | + // Starts/ends with single quote. |
| 141 | + assert_eq!( |
| 142 | + single_quoted(module_name, ' ').parse("Puppy' ").unwrap(), |
| 143 | + ("Puppy'", ' ') |
| 144 | + ); |
| 145 | + assert_eq!( |
| 146 | + single_quoted(module_name, ' ').parse("Puppy'' ").unwrap(), |
| 147 | + ("Puppy''", ' ') |
| 148 | + ); |
| 149 | + assert_eq!( |
| 150 | + single_quoted(preceded('\'', module_name), ' ') |
| 151 | + .parse("'Puppy ") |
| 152 | + .unwrap(), |
| 153 | + ("Puppy", ' ') |
| 154 | + ); |
| 155 | + assert_eq!( |
| 156 | + single_quoted(preceded('\'', module_name), ' ') |
| 157 | + .parse("'Puppy' ") |
| 158 | + .unwrap(), |
| 159 | + ("Puppy'", ' ') |
| 160 | + ); |
| 161 | + |
| 162 | + // Negative cases. |
| 163 | + |
| 164 | + // No valid ending. |
| 165 | + assert!(single_quoted(module_name, ' ').parse("‘Puppy’x").is_err()); |
| 166 | + |
| 167 | + // Modules can't start with numbers. |
| 168 | + assert!(single_quoted(module_name, ' ').parse("`0' ").is_err()); |
| 169 | + assert!(single_quoted(module_name, ' ').parse("0 ").is_err()); |
| 170 | + |
| 171 | + // Delimiters have to match. |
| 172 | + assert!(single_quoted(module_name, ' ').parse("‘Puppy' ").is_err()); |
| 173 | + assert!(single_quoted(module_name, ' ').parse("`Puppy’ ").is_err()); |
| 174 | + } |
| 175 | +} |
0 commit comments