Revert #59 (#67)

JordanMartinez · web-flow · commit 0eaf1867603d · 2020-12-28T10:23:59.000-08:00
* Revert #59 * Update code in light of ParseError change * Update CI to v0.14.0-rc5
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
       - name: Set up a PureScript toolchain
         uses: purescript-contrib/setup-purescript@main
         with:
-          purescript: "0.14.0-rc3"
+          purescript: "0.14.0-rc5"
 
       - name: Cache PureScript dependencies
         uses: actions/cache@v2
diff --git a/src/Text/Parsing/StringParser/CodePoints.purs b/src/Text/Parsing/StringParser/CodePoints.purs
@@ -0,0 +1,155 @@
+-- | Primitive parsers for strings, parsing based on code points.
+-- |
+-- | These functions will be much slower than the `CodeUnits` alternatives, but
+-- | will behave correctly in the presence of Unicode characters made up of
+-- | multiple code units.
+module Text.Parsing.StringParser.CodePoints
+  ( eof
+  , anyChar
+  , anyDigit
+  , string
+  , satisfy
+  , char
+  , whiteSpace
+  , skipSpaces
+  , oneOf
+  , noneOf
+  , lowerCaseChar
+  , upperCaseChar
+  , anyLetter
+  , alphaNum
+  , regex
+  ) where
+
+import Prelude
+
+import Control.Alt ((<|>))
+import Data.Array ((..))
+import Data.Array.NonEmpty as NEA
+import Data.Char (fromCharCode, toCharCode)
+import Data.Either (Either(..))
+import Data.Enum (fromEnum)
+import Data.Foldable (class Foldable, foldMap, elem, notElem)
+import Data.Maybe (Maybe(..))
+import Data.String.CodePoints (codePointAt, drop, indexOf', length, stripPrefix)
+import Data.String.CodeUnits (singleton)
+import Data.String.Pattern (Pattern(..))
+import Data.String.Regex as Regex
+import Data.String.Regex.Flags (noFlags)
+import Text.Parsing.StringParser (Parser(..), try, fail)
+import Text.Parsing.StringParser.Combinators (many, (<?>))
+
+-- | Match the end of the file.
+eof :: Parser Unit
+eof = Parser \s ->
+  case s of
+    { str, pos } | pos < length str -> Left { pos, error: "Expected EOF" }
+    _ -> Right { result: unit, suffix: s }
+
+-- | Match any character.
+anyChar :: Parser Char
+anyChar = Parser \{ str, pos } ->
+  case codePointAt pos str of
+    Just cp -> case toChar cp of
+      Just chr -> Right { result: chr, suffix: { str, pos: pos + 1 } }
+      Nothing -> Left { pos, error: "CodePoint " <> show cp <> " is not a character" }
+    Nothing -> Left { pos, error: "Unexpected EOF" }
+  where
+    toChar = fromCharCode <<< fromEnum
+
+-- | Match any digit.
+anyDigit :: Parser Char
+anyDigit = try do
+  c <- anyChar
+  if c >= '0' && c <= '9'
+     then pure c
+     else fail $ "Character " <> show c <> " is not a digit"
+
+-- | Match the specified string.
+string :: String -> Parser String
+string nt = Parser \s ->
+  case s of
+    { str, pos } | indexOf' (Pattern nt) pos str == Just pos -> Right { result: nt, suffix: { str, pos: pos + length nt } }
+    { pos } -> Left { pos, error: "Expected '" <> nt <> "'." }
+
+-- | Match a character satisfying the given predicate.
+satisfy :: (Char -> Boolean) -> Parser Char
+satisfy f = try do
+  c <- anyChar
+  if f c
+     then pure c
+     else fail $ "Character " <> show c <> " did not satisfy predicate"
+
+-- | Match the specified character.
+char :: Char -> Parser Char
+char c = satisfy (_ == c) <?> "Could not match character " <> show c
+
+-- | Match many whitespace characters.
+whiteSpace :: Parser String
+whiteSpace = do
+  cs <- many (satisfy \ c -> c == '\n' || c == '\r' || c == ' ' || c == '\t')
+  pure (foldMap singleton cs)
+
+-- | Skip many whitespace characters.
+skipSpaces :: Parser Unit
+skipSpaces = void whiteSpace
+
+-- | Match one of the characters in the foldable structure.
+oneOf :: forall f. Foldable f => f Char -> Parser Char
+oneOf = satisfy <<< flip elem
+
+-- | Match any character not in the foldable structure.
+noneOf :: forall f. Foldable f => f Char -> Parser Char
+noneOf = satisfy <<< flip notElem
+
+-- | Match any lower case character.
+lowerCaseChar :: Parser Char
+lowerCaseChar = try do
+  c <- anyChar
+  if toCharCode c `elem` (97 .. 122)
+     then pure c
+     else fail $ "Expected a lower case character but found " <> show c
+
+-- | Match any upper case character.
+upperCaseChar :: Parser Char
+upperCaseChar = try do
+  c <- anyChar
+  if toCharCode c `elem` (65 .. 90)
+     then pure c
+     else fail $ "Expected an upper case character but found " <> show c
+
+-- | Match any letter.
+anyLetter :: Parser Char
+anyLetter = lowerCaseChar <|> upperCaseChar <?> "Expected a letter"
+
+-- | Match a letter or a number.
+alphaNum :: Parser Char
+alphaNum = anyLetter <|> anyDigit <?> "Expected a letter or a number"
+
+-- | match the regular expression
+regex :: String -> Parser String
+regex pat =
+  case Regex.regex pattern noFlags of
+    Left _ ->
+      fail $ "Text.Parsing.StringParser.String.regex': illegal regex " <> pat
+    Right r ->
+      matchRegex r
+  where
+    -- ensure the pattern only matches the current position in the parse
+    pattern =
+      case stripPrefix (Pattern "^") pat of
+        Nothing ->
+          "^" <> pat
+        _ ->
+          pat
+    matchRegex :: Regex.Regex -> Parser String
+    matchRegex r =
+      Parser \{ str, pos } ->
+        let
+          remainder = drop pos str
+        in
+          case NEA.head <$> Regex.match r remainder of
+            Just (Just matched)  ->
+              Right { result: matched, suffix: { str, pos: pos + length matched } }
+            _ ->
+              Left { pos, error: "no match" }
diff --git a/src/Text/Parsing/StringParser/CodeUnits.purs b/src/Text/Parsing/StringParser/CodeUnits.purs
@@ -6,7 +6,6 @@
 module Text.Parsing.StringParser.CodeUnits
   ( eof
   , anyChar
-  , anyCodePoint
   , anyDigit
   , string
   , satisfy
@@ -31,7 +30,6 @@ import Data.Char (toCharCode)
 import Data.Either (Either(..))
 import Data.Foldable (class Foldable, foldMap, elem, notElem)
 import Data.Maybe (Maybe(..))
-import Data.String as SCP
 import Data.String.CodeUnits (charAt, singleton)
 import Data.String.CodeUnits as SCU
 import Data.String.Pattern (Pattern(..))
@@ -47,22 +45,13 @@ eof = Parser \s ->
     { str, pos } | pos < SCU.length str -> Left { pos, error: "Expected EOF" }
     _ -> Right { result: unit, suffix: s }
 
--- | Match any character. This is limited by `Char` to any code points
--- | that are below `0xFFFF`. If you need to use higher code points
--- | (e.g. emoji), see `anyCodePoint` and `string`.
+-- | Match any character.
 anyChar :: Parser Char
 anyChar = Parser \{ str, pos } ->
   case charAt pos str of
     Just chr -> Right { result: chr, suffix: { str, pos: pos + 1 } }
     Nothing -> Left { pos, error: "Unexpected EOF" }
 
--- | Match any code point, including those above `0xFFFF`
-anyCodePoint :: Parser SCP.CodePoint
-anyCodePoint = Parser \rec@{ str, pos } ->
-   case SCP.codePointAt 0 (SCU.drop pos str) of
-     Just cp -> Right { result: cp, suffix: { str, pos: pos + SCU.length (SCP.singleton cp) } }
-     Nothing -> Left { pos, error: "Unexpected EOF" }
-
 -- | Match any digit.
 anyDigit :: Parser Char
 anyDigit = try do
diff --git a/test/CodePoints.purs b/test/CodePoints.purs
@@ -0,0 +1,102 @@
+module Test.CodePoints where
+
+import Prelude hiding (between)
+
+import Control.Alt ((<|>))
+import Data.Either (isLeft, isRight, Either(..))
+import Data.Foldable (fold)
+import Data.List (List(Nil), (:))
+import Data.List.Lazy (take, repeat)
+import Data.List.NonEmpty (NonEmptyList(..))
+import Data.NonEmpty ((:|))
+import Data.String.CodeUnits (singleton)
+import Data.String.Common as SC
+import Data.Unfoldable (replicate)
+import Effect (Effect)
+import Test.Assert (assert', assert)
+import Text.Parsing.StringParser (Parser, runParser, try)
+import Text.Parsing.StringParser.Combinators (many1, endBy1, sepBy1, optionMaybe, many, manyTill, many1Till, chainl, fix, between)
+import Text.Parsing.StringParser.Expr (Assoc(..), Operator(..), buildExprParser)
+import Text.Parsing.StringParser.CodePoints (anyDigit, char, eof, string, anyChar, regex)
+
+parens :: forall a. Parser a -> Parser a
+parens = between (string "(") (string ")")
+
+nested :: Parser Int
+nested = fix $ \p -> (do
+  _ <- string "a"
+  pure 0) <|> ((+) 1) <$> parens p
+
+opTest :: Parser String
+opTest = chainl (singleton <$> anyChar) (string "+" $> append) ""
+
+digit :: Parser Int
+digit = string "0" $> 0
+    <|> string "1" $> 1
+    <|> string "2" $> 2
+    <|> string "3" $> 3
+    <|> string "4" $> 4
+    <|> string "5" $> 5
+    <|> string "6" $> 6
+    <|> string "7" $> 7
+    <|> string "8" $> 8
+    <|> string "9" $> 9
+
+exprTest :: Parser Int
+exprTest = buildExprParser [ [Infix (string "/" >>= \_ -> pure div) AssocRight]
+                           , [Infix (string "*" >>= \_ -> pure mul) AssocRight]
+                           , [Infix (string "-" >>= \_ -> pure sub) AssocRight]
+                           , [Infix (string "+" >>= \_ -> pure add) AssocRight]
+                           ] digit
+
+tryTest :: Parser String
+            -- reduce the possible array of matches to 0 or 1 elements to aid Array pattern matching
+tryTest =
+  try (string "aa" <> string "bb") <|>
+      (string "aa" <> string "cc")
+
+canParse :: forall a. Parser a -> String -> Boolean
+canParse p input = isRight $ runParser p input
+
+parseFail :: forall a. Parser a -> String -> Boolean
+parseFail p input = isLeft $ runParser p input
+
+expectResult :: forall a. Eq a => a -> Parser a -> String -> Boolean
+expectResult res p input = runParser p input == Right res
+
+testCodePoints :: Effect Unit
+testCodePoints = do
+  assert' "many should not blow the stack" $ canParse (many (string "a")) (SC.joinWith "" $ replicate 100000 "a")
+  assert' "many failing after" $ parseFail (do
+    as <- many (string "a")
+    eof
+    pure as) (SC.joinWith "" (replicate 100000 "a") <> "b" )
+
+  assert $ expectResult 3 nested "(((a)))"
+  assert $ expectResult ("a":"a":"a":Nil)  (many (string "a")) "aaa"
+  assert $ parseFail (many1 (string "a")) ""
+  assert $ canParse (parens (do
+    _ <- string "a"
+    optionMaybe $ string "b")) "(ab)"
+  assert $ expectResult (NonEmptyList ("a" :| "a":"a":Nil)) (string "a" `sepBy1` string ",") "a,a,a"
+  assert $ canParse (do
+    as <- string "a" `endBy1` string ","
+    eof
+    pure as) "a,a,a,"
+  assert' "opTest" $ expectResult "abc" opTest "a+b+c"
+  assert' "exprTest" $ expectResult (-3) exprTest "1*2+3/4-5"
+  assert' "tryTest "$ canParse tryTest "aacc"
+  assert $ expectResult (NonEmptyList ('0' :| '1':'2':'3':'4':Nil)) (many1 anyDigit) "01234/"
+  assert $ expectResult (NonEmptyList ('5' :| '6':'7':'8':'9':Nil)) (many1 anyDigit) "56789:"
+  assert $ expectResult "aaaa" (regex "a+") "aaaab"
+  assert $ expectResult ("a":"a":"a":Nil)  (manyTill (string "a") (string "b")) "aaab"
+  assert $ expectResult Nil (manyTill (string "a") (string "b")) "b"
+  assert $ expectResult (NonEmptyList ("a" :| "a":"a":Nil)) (many1Till (string "a") (string "b")) "aaab"
+  assert $ parseFail (many1Till (string "a") (string "b")) "b"
+  -- check against overflow
+  assert $ canParse (many1Till (string "a") (string "and")) $ (fold <<< take 10000 $ repeat "a") <> "and"
+  -- check correct order
+  assert $ expectResult (NonEmptyList ('a' :| 'b':'c':Nil)) (many1Till anyChar (string "d")) "abcd"
+  assert $ expectResult "\x458CA" (string "\x458CA" <* char ']' <* eof ) "\x458CA]"
+  assert $ expectResult "\x458CA" (string "\x458CA" <* string ")" <* eof ) "\x458CA)"
+  assert $ expectResult '\xEEE2' (char '\xEEE2' <* eof ) "\xEEE2"
diff --git a/test/CodeUnits.purs b/test/CodeUnits.purs
@@ -9,16 +9,15 @@ import Data.List (List(Nil), (:))
 import Data.List.Lazy (take, repeat)
 import Data.List.NonEmpty (NonEmptyList(..))
 import Data.NonEmpty ((:|))
-import Data.String.CodePoints as SCP
 import Data.String.CodeUnits (singleton)
 import Data.String.Common as SC
 import Data.Unfoldable (replicate)
 import Effect (Effect)
 import Test.Assert (assert', assert)
 import Text.Parsing.StringParser (Parser, runParser, try)
-import Text.Parsing.StringParser.CodeUnits (anyChar, anyCodePoint, anyDigit, eof, regex, string)
 import Text.Parsing.StringParser.Combinators (many1, endBy1, sepBy1, optionMaybe, many, manyTill, many1Till, chainl, fix, between)
 import Text.Parsing.StringParser.Expr (Assoc(..), Operator(..), buildExprParser)
+import Text.Parsing.StringParser.CodeUnits (anyDigit, eof, string, anyChar, regex)
 
 parens :: forall a. Parser a -> Parser a
 parens = between (string "(") (string ")")
@@ -98,22 +97,3 @@ testCodeUnits = do
   assert $ canParse (many1Till (string "a") (string "and")) $ (fold <<< take 10000 $ repeat "a") <> "and"
   -- check correct order
   assert $ expectResult (NonEmptyList ('a' :| 'b':'c':Nil)) (many1Till anyChar (string "d")) "abcd"
-  -- check anyCodePoint
-  let anyCodePointStr = map SCP.singleton anyCodePoint
-  let anyCharStr = map singleton anyChar
-  assert $ expectResult (NonEmptyList ("🍔" :| "🍺":Nil)) (many1 $ anyCodePointStr) "🍔🍺"
-  assert $ expectResult "🍔" (anyChar *> anyCodePointStr <* anyChar) "a🍔a"
-  assert $ expectResult ({a: "🍔", b: "🍺"}) ({a:_, b:_} <$> (anyCodePointStr <* void anyChar) <*> anyCodePointStr) "🍔a🍺"
-  assert $ expectResult ({a: "a", b: "b", c:"c"}) ({a:_, b:_, c:_} <$> anyCodePointStr <*> anyCodePointStr <*> anyCodePointStr) "abc"
-  -- check string
-  assert $ expectResult "🍔🍺" (string "🍔🍺") "🍔🍺"
-  assert $ expectResult (NonEmptyList ("🍔🍺" :| "🍔🍺":"🍔🍺":Nil)) (many1 $ string "🍔🍺") "🍔🍺🍔🍺🍔🍺"
-  assert $ expectResult (NonEmptyList ("a🍔🍺":|"a🍔🍺":"a🍔🍺":Nil)) (many1 $ string "a🍔🍺") "a🍔🍺a🍔🍺a🍔🍺"
-  assert $ expectResult (NonEmptyList ("🍔a🍺":|"🍔a🍺":"🍔a🍺":Nil)) (many1 $ string "🍔a🍺") "🍔a🍺🍔a🍺🍔a🍺"
-  assert $ expectResult (NonEmptyList ("🍔🍺a" :| "🍔🍺a":"🍔🍺a":Nil)) (many1 $ string "🍔🍺a") "🍔🍺a🍔🍺a🍔🍺a"
-  assert $ expectResult (NonEmptyList ("a" :| "a":"a":Nil)) (many1 $ string "a") "aaa"
-  assert $ expectResult (NonEmptyList ("abc" :| "abc":"abc":Nil)) (many1 $ string "abc") "abcabcabc"
-  assert $ expectResult (NonEmptyList ("abc" :| "abc":"abc":Nil)) (many1 $ string "abc") "abcabcabc"
-  assert $ expectResult (NonEmptyList ("abc�def" :| Nil)) (many1 $ string "abc�def") "abc�def"
-
-  assert $ expectResult "🍔\xd83c" (string "🍔\xd83c") "🍔🍺"
diff --git a/test/Examples.purs b/test/Examples.purs
@@ -9,7 +9,7 @@ import Data.List.Types (NonEmptyList)
 import Effect (Effect)
 import Effect.Console (log, logShow)
 import Text.Parsing.StringParser (Parser, fail, runParser, unParser)
-import Text.Parsing.StringParser.CodeUnits (anyChar, char, eof, regex, skipSpaces, string)
+import Text.Parsing.StringParser.CodePoints (anyChar, char, eof, regex, skipSpaces, string)
 import Text.Parsing.StringParser.Combinators (between, endBy1, lookAhead, many, many1, sepBy1, (<?>))
 
 -- Serves only to make this file runnable
diff --git a/test/Main.purs b/test/Main.purs
@@ -4,9 +4,13 @@ import Prelude
 
 import Effect (Effect)
 import Effect.Console (log)
+import Test.CodePoints (testCodePoints)
 import Test.CodeUnits (testCodeUnits)
 
 main :: Effect Unit
 main = do
+  log "Testing CodePoint parsing\n"
+  testCodePoints
+
   log "\n\nTesting CodeUnit parsing\n"
   testCodeUnits