From f02dcb46fae6693653f7895c90b90a3bc1b5b9a1 Mon Sep 17 00:00:00 2001 From: James Brock Date: Fri, 1 Apr 2022 21:26:39 +0900 Subject: [PATCH] Add index field to Position --- CHANGELOG.md | 1 + src/Parsing/Indent.purs | 40 +++++++++++--------------------- src/Parsing/Pos.purs | 25 ++++++++++++-------- src/Parsing/String.purs | 26 ++++++++++++++++----- test/Main.purs | 51 +++++++++++++++++++---------------------- 5 files changed, 73 insertions(+), 70 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 982b7d4..cd2820f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Breaking changes: - Rename module prefix from `Text.Parsing.Parser` to `Parsing` (#169 by @jamesdbrock) - Replace the `regex` parser. (#170 by @jamesdbrock) - Reorganize Combinators for #154 (#182 by @jamesdbrock) +- Add the `index` field to `Position`. (#171 by @jamesdbrock) New features: diff --git a/src/Parsing/Indent.purs b/src/Parsing/Indent.purs index 4afe1ad..a09f53c 100644 --- a/src/Parsing/Indent.purs +++ b/src/Parsing/Indent.purs @@ -85,18 +85,6 @@ get' = do put' :: forall s. Position -> IndentParser s Unit put' p = lift (put p) -sourceColumn :: Position -> Int -sourceColumn (Position { line: _, column: c }) = c - -sourceLine :: Position -> Int -sourceLine (Position { line: l, column: _ }) = l - -setSourceLine :: Position -> Int -> Position -setSourceLine (Position { line: _, column: c }) l = Position { line: l, column: c } - -biAp :: forall a b c. (a -> b) -> (b -> b -> c) -> a -> a -> c -biAp f c v1 v2 = c (f v1) (f v2) - many1 :: forall s m a. ParserT s m a -> ParserT s m (List a) many1 p = lift2 Cons p (many p) @@ -121,19 +109,17 @@ withBlock' = withBlock (flip const) -- | Parses only when indented past the level of the reference indented :: forall s. IndentParser s Unit indented = do - pos <- position - s <- get' - if biAp sourceColumn (<=) pos s then fail "not indented" - else do - put' $ setSourceLine s (sourceLine pos) - pure unit + Position p <- position + Position s <- get' + if p.column <= s.column then fail "not indented" + else put' $ Position { index: 0, line: p.line, column: s.column } -- | Same as `indented`, but does not change internal state indented' :: forall s. IndentParser s Unit indented' = do - pos <- position - s <- get' - if biAp sourceColumn (<=) pos s then fail "not indented" else pure unit + Position p <- position + Position s <- get' + if p.column <= s.column then fail "not indented" else pure unit -- | Parses only when indented past the level of the reference or on the same line sameOrIndented :: forall s. IndentParser s Unit @@ -142,9 +128,9 @@ sameOrIndented = sameLine <|> indented -- | Parses only on the same line as the reference sameLine :: forall s. IndentParser s Unit sameLine = do - pos <- position - s <- get' - if biAp sourceLine (==) pos s then pure unit else fail "over one line" + Position p <- position + Position s <- get' + if p.line == s.line then pure unit else fail "over one line" -- | Parses a block of lines at the same indentation level block1 :: forall s a. IndentParser s a -> IndentParser s (List a) @@ -169,9 +155,9 @@ withPos x = do -- | Ensures the current indentation level matches that of the reference checkIndent :: forall s. IndentParser s Unit checkIndent = do - s <- get' - p <- position - if biAp sourceColumn (==) p s then pure unit else fail "indentation doesn't match" + Position p <- position + Position s <- get' + if p.column == s.column then pure unit else fail "indentation doesn't match" -- | Run the result of an indentation sensitive parse runIndent :: forall a. State Position a -> a diff --git a/src/Parsing/Pos.purs b/src/Parsing/Pos.purs index ab7f28e..63b7936 100644 --- a/src/Parsing/Pos.purs +++ b/src/Parsing/Pos.purs @@ -3,25 +3,30 @@ module Parsing.Pos where import Prelude import Data.Generic.Rep (class Generic) +import Data.Show.Generic (genericShow) -- | `Position` represents the position of the parser in the input. -- | --- | - `line` is the current line in the input --- | - `column` is the column of the next character in the current line that will be parsed +-- | - `index` is the position since the start of the input. Starts at 0. +-- | - `line` is the current line in the input. Starts at 1. +-- | - `column` is the column of the next character in the current line that +-- | will be parsed. Starts at 1. newtype Position = Position - { line :: Int + { index :: Int + , line :: Int , column :: Int } -derive instance genericPosition :: Generic Position _ +derive instance Generic Position _ +instance Show Position where + show x = genericShow x -instance showPosition :: Show Position where - show (Position { line: line, column: column }) = - "(Position { line: " <> show line <> ", column: " <> show column <> " })" +instance Eq Position where + eq (Position l) (Position r) = l.index == r.index -derive instance eqPosition :: Eq Position -derive instance ordPosition :: Ord Position +instance Ord Position where + compare (Position l) (Position r) = compare l.index r.index -- | The `Position` before any input has been parsed. initialPos :: Position -initialPos = Position { line: 1, column: 1 } +initialPos = Position { index: 0, line: 1, column: 1 } diff --git a/src/Parsing/String.purs b/src/Parsing/String.purs index 0302dd7..15e801a 100644 --- a/src/Parsing/String.purs +++ b/src/Parsing/String.purs @@ -19,6 +19,20 @@ -- | The other primitive parsers, which return `CodePoint` and `String` types, -- | can parse the full Unicode character set. All of the primitive parsers -- | in this module can be used together. +-- | +-- | ### Position +-- | +-- | In a `String` parser, the `Position {index}` counts the number of +-- | unicode `CodePoint`s since the beginning of the input string. +-- | +-- | Each tab character (`0x09`) encountered in a `String` parser will advance +-- | the `Position {column}` by 8. +-- | +-- | These patterns will advance the `Position {line}` by 1 and reset +-- | the `Position {column}` to 1: +-- | - newline (`0x0A`) +-- | - carriage-return (`0x0D`) +-- | - carriage-return-newline (`0x0D 0x0A`) module Parsing.String ( string , eof @@ -187,14 +201,14 @@ updatePosString pos before after = case uncons before of -- | Updates a `Position` by adding the columns and lines in a -- | single `CodePoint`. updatePosSingle :: Position -> CodePoint -> String -> Position -updatePosSingle (Position { line, column }) cp after = case fromEnum cp of - 10 -> Position { line: line + 1, column: 1 } -- "\n" +updatePosSingle (Position { index, line, column }) cp after = case fromEnum cp of + 10 -> Position { index: index + 1, line: line + 1, column: 1 } -- "\n" 13 -> case codePointAt 0 after of - Just nextCp | fromEnum nextCp == 10 -> Position { line, column } -- "\r\n" lookahead - _ -> Position { line: line + 1, column: 1 } -- "\r" - 9 -> Position { line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns? - _ -> Position { line, column: column + 1 } + Just nextCp | fromEnum nextCp == 10 -> Position { index: index + 1, line, column } -- "\r\n" lookahead + _ -> Position { index: index + 1, line: line + 1, column: 1 } -- "\r" + 9 -> Position { index: index + 1, line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns? + _ -> Position { index: index + 1, line, column: column + 1 } -- | Combinator which returns both the result of a parse and the slice of -- | the input that was consumed while it was being parsed. diff --git a/test/Main.purs b/test/Main.purs index f8a5ac2..42880fe 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -126,7 +126,7 @@ stackSafeLoopsTest = do parseErrorTestPosition (many1Till (string "a") (string "b")) "baa" - (Position { line: 1, column: 1 }) + (Position { index: 0, line: 1, column: 1 }) parseTest "a,a,a,b,a,a" (toUnfoldable [ "a", "a", "a" ]) $ sepEndBy (string "a") (string ",") @@ -142,7 +142,7 @@ stackSafeLoopsTest = do parseErrorTestPosition (sepEndBy1 (string "a") (string ",")) "b,a,a" - (Position { line: 1, column: 1 }) + (Position { index: 0, line: 1, column: 1 }) -- 8 `div` (8 `div` 2) == 2 parseTest "8x8x2" 2 $ @@ -154,7 +154,7 @@ stackSafeLoopsTest = do parseErrorTestPosition (chainr1 digit (string "x" $> div)) "" - (Position { line: 1, column: 1 }) + (Position { index: 0, line: 1, column: 1 }) -- (8 `div` 2) `div` 2 == 2 parseTest "8x2x2" 2 $ @@ -166,7 +166,7 @@ stackSafeLoopsTest = do parseErrorTestPosition (chainl1 digit (string "x" $> div)) "" - (Position { line: 1, column: 1 }) + (Position { index: 0, line: 1, column: 1 }) parseTest "aaaabcd" "b" $ skipMany1 (string "a") @@ -174,7 +174,7 @@ stackSafeLoopsTest = do parseErrorTestPosition (skipMany1 (string "a")) "bcd" - (Position { line: 1, column: 1 }) + (Position { index: 0, line: 1, column: 1 }) parseTest "aaaabcd" "b" $ skipMany (string "a") @@ -188,7 +188,7 @@ stackSafeLoopsTest = do parseErrorTestPosition (many1 (string "a")) "" - (Position { line: 1, column: 1 }) + (Position { index: 0, line: 1, column: 1 }) parseTest "a,a,ab" (toUnfoldable [ "a", "a", "a" ]) $ sepBy (string "a") (string ",") @@ -202,11 +202,11 @@ stackSafeLoopsTest = do parseErrorTestPosition (sepBy1 (string "a") (string ",")) "" - (Position { line: 1, column: 1 }) + (Position { index: 0, line: 1, column: 1 }) parseErrorTestPosition (sepBy1 (string "a") (string ",")) "a," - (Position { line: 1, column: 3 }) + (Position { index: 2, line: 1, column: 3 }) parseTest "a,a,a,b" (toUnfoldable [ "a", "a", "a" ]) $ endBy (string "a") (string ",") @@ -220,11 +220,11 @@ stackSafeLoopsTest = do parseErrorTestPosition (endBy1 (string "a") (string ",")) "" - (Position { line: 1, column: 1 }) + (Position { index: 0, line: 1, column: 1 }) parseErrorTestPosition (endBy1 (string "a") (string ",")) "a,a" - (Position { line: 1, column: 4 }) + (Position { index: 3, line: 1, column: 4 }) data TestToken = A | B @@ -245,10 +245,7 @@ testTokenParser :: TokenParser testTokenParser = makeTokenParser haskellDef mkPos :: Int -> Position -mkPos n = mkPos' n 1 - -mkPos' :: Int -> Int -> Position -mkPos' column line = Position { column: column, line: line } +mkPos n = Position { index: n - 1, line: 1, column: n } type TestM = Effect Unit @@ -575,12 +572,12 @@ main = do parseErrorTestPosition (many $ char 'f' *> char '?') "foo" - (Position { column: 2, line: 1 }) + (Position { index: 1, column: 2, line: 1 }) parseErrorTestPosition (satisfy (_ == '?')) "foo" - (Position { column: 1, line: 1 }) + (Position { index: 0, column: 1, line: 1 }) parseTest "foo" @@ -605,17 +602,17 @@ main = do parseTest "rest" "rest" rest parseTest "rest" unit (rest *> eof) - parseTest "rest\nrest" (Position { line: 2, column: 5 }) (rest *> position) + parseTest "rest\nrest" (Position { index: 9, line: 2, column: 5 }) (rest *> position) parseErrorTestPosition (rest *> notFollowedBy eof) "aa\naa" - (Position { column: 3, line: 2 }) + (Position { index: 5, column: 3, line: 2 }) parseErrorTestPosition - anyChar - "𝅘𝅥𝅯" - (Position { column: 1, line: 1 }) + (string "𝅘𝅥𝅘𝅥𝅮" *> string "𝅘𝅥𝅘𝅥𝅮") + "𝅘𝅥𝅘𝅥𝅮x𝅘𝅥𝅯" + (Position { index: 2, column: 3, line: 1 }) parseTest "𝅘𝅥𝅘𝅥𝅮x𝅘𝅥𝅯" [ "𝅘𝅥", "𝅘𝅥𝅮", "x", "𝅘𝅥𝅯" ] do quarter <- anyCodePoint @@ -631,8 +628,8 @@ main = do parseTest "abcd" "ab" $ takeN 2 parseTest "abcd" "" $ takeN 0 - parseErrorTestPosition (takeN 10) "abcd" (Position { column: 1, line: 1 }) - parseErrorTestPosition (takeN (-1)) "abcd" (Position { column: 1, line: 1 }) + parseErrorTestPosition (takeN 10) "abcd" (Position { index: 0, column: 1, line: 1 }) + parseErrorTestPosition (takeN (-1)) "abcd" (Position { index: 0, column: 1, line: 1 }) parseErrorTestMessage (noneOfCodePoints $ SCP.toCodePointArray "❓✅") @@ -673,10 +670,10 @@ main = do parseTest "ababab" [ 'b', 'b', 'b' ] $ Array.many (char 'a' *> char 'b') parseTest "abaXab" [ 'b' ] $ Array.many (try (char 'a' *> char 'b')) - parseErrorTestPosition (string "abc") "bcd" (Position { column: 1, line: 1 }) - parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { column: 4, line: 1 }) - parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 }) - parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 }) + parseErrorTestPosition (string "abc") "bcd" (Position { index: 0, column: 1, line: 1 }) + parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { index: 3, column: 4, line: 1 }) + parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { index: 6, column: 1, line: 4 }) + parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { index: 2, column: 10, line: 1 }) log "\nTESTS number\n"