Skip to content

Commit 0eaf186

Browse files
Revert #59 (#67)
* Revert #59 * Update code in light of ParseError change * Update CI to v0.14.0-rc5
1 parent 20d6665 commit 0eaf186

File tree

7 files changed

+265
-35
lines changed

7 files changed

+265
-35
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
- name: Set up a PureScript toolchain
1717
uses: purescript-contrib/setup-purescript@main
1818
with:
19-
purescript: "0.14.0-rc3"
19+
purescript: "0.14.0-rc5"
2020

2121
- name: Cache PureScript dependencies
2222
uses: actions/cache@v2
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
-- | Primitive parsers for strings, parsing based on code points.
2+
-- |
3+
-- | These functions will be much slower than the `CodeUnits` alternatives, but
4+
-- | will behave correctly in the presence of Unicode characters made up of
5+
-- | multiple code units.
6+
module Text.Parsing.StringParser.CodePoints
7+
( eof
8+
, anyChar
9+
, anyDigit
10+
, string
11+
, satisfy
12+
, char
13+
, whiteSpace
14+
, skipSpaces
15+
, oneOf
16+
, noneOf
17+
, lowerCaseChar
18+
, upperCaseChar
19+
, anyLetter
20+
, alphaNum
21+
, regex
22+
) where
23+
24+
import Prelude
25+
26+
import Control.Alt ((<|>))
27+
import Data.Array ((..))
28+
import Data.Array.NonEmpty as NEA
29+
import Data.Char (fromCharCode, toCharCode)
30+
import Data.Either (Either(..))
31+
import Data.Enum (fromEnum)
32+
import Data.Foldable (class Foldable, foldMap, elem, notElem)
33+
import Data.Maybe (Maybe(..))
34+
import Data.String.CodePoints (codePointAt, drop, indexOf', length, stripPrefix)
35+
import Data.String.CodeUnits (singleton)
36+
import Data.String.Pattern (Pattern(..))
37+
import Data.String.Regex as Regex
38+
import Data.String.Regex.Flags (noFlags)
39+
import Text.Parsing.StringParser (Parser(..), try, fail)
40+
import Text.Parsing.StringParser.Combinators (many, (<?>))
41+
42+
-- | Match the end of the file.
43+
eof :: Parser Unit
44+
eof = Parser \s ->
45+
case s of
46+
{ str, pos } | pos < length str -> Left { pos, error: "Expected EOF" }
47+
_ -> Right { result: unit, suffix: s }
48+
49+
-- | Match any character.
50+
anyChar :: Parser Char
51+
anyChar = Parser \{ str, pos } ->
52+
case codePointAt pos str of
53+
Just cp -> case toChar cp of
54+
Just chr -> Right { result: chr, suffix: { str, pos: pos + 1 } }
55+
Nothing -> Left { pos, error: "CodePoint " <> show cp <> " is not a character" }
56+
Nothing -> Left { pos, error: "Unexpected EOF" }
57+
where
58+
toChar = fromCharCode <<< fromEnum
59+
60+
-- | Match any digit.
61+
anyDigit :: Parser Char
62+
anyDigit = try do
63+
c <- anyChar
64+
if c >= '0' && c <= '9'
65+
then pure c
66+
else fail $ "Character " <> show c <> " is not a digit"
67+
68+
-- | Match the specified string.
69+
string :: String -> Parser String
70+
string nt = Parser \s ->
71+
case s of
72+
{ str, pos } | indexOf' (Pattern nt) pos str == Just pos -> Right { result: nt, suffix: { str, pos: pos + length nt } }
73+
{ pos } -> Left { pos, error: "Expected '" <> nt <> "'." }
74+
75+
-- | Match a character satisfying the given predicate.
76+
satisfy :: (Char -> Boolean) -> Parser Char
77+
satisfy f = try do
78+
c <- anyChar
79+
if f c
80+
then pure c
81+
else fail $ "Character " <> show c <> " did not satisfy predicate"
82+
83+
-- | Match the specified character.
84+
char :: Char -> Parser Char
85+
char c = satisfy (_ == c) <?> "Could not match character " <> show c
86+
87+
-- | Match many whitespace characters.
88+
whiteSpace :: Parser String
89+
whiteSpace = do
90+
cs <- many (satisfy \ c -> c == '\n' || c == '\r' || c == ' ' || c == '\t')
91+
pure (foldMap singleton cs)
92+
93+
-- | Skip many whitespace characters.
94+
skipSpaces :: Parser Unit
95+
skipSpaces = void whiteSpace
96+
97+
-- | Match one of the characters in the foldable structure.
98+
oneOf :: forall f. Foldable f => f Char -> Parser Char
99+
oneOf = satisfy <<< flip elem
100+
101+
-- | Match any character not in the foldable structure.
102+
noneOf :: forall f. Foldable f => f Char -> Parser Char
103+
noneOf = satisfy <<< flip notElem
104+
105+
-- | Match any lower case character.
106+
lowerCaseChar :: Parser Char
107+
lowerCaseChar = try do
108+
c <- anyChar
109+
if toCharCode c `elem` (97 .. 122)
110+
then pure c
111+
else fail $ "Expected a lower case character but found " <> show c
112+
113+
-- | Match any upper case character.
114+
upperCaseChar :: Parser Char
115+
upperCaseChar = try do
116+
c <- anyChar
117+
if toCharCode c `elem` (65 .. 90)
118+
then pure c
119+
else fail $ "Expected an upper case character but found " <> show c
120+
121+
-- | Match any letter.
122+
anyLetter :: Parser Char
123+
anyLetter = lowerCaseChar <|> upperCaseChar <?> "Expected a letter"
124+
125+
-- | Match a letter or a number.
126+
alphaNum :: Parser Char
127+
alphaNum = anyLetter <|> anyDigit <?> "Expected a letter or a number"
128+
129+
-- | match the regular expression
130+
regex :: String -> Parser String
131+
regex pat =
132+
case Regex.regex pattern noFlags of
133+
Left _ ->
134+
fail $ "Text.Parsing.StringParser.String.regex': illegal regex " <> pat
135+
Right r ->
136+
matchRegex r
137+
where
138+
-- ensure the pattern only matches the current position in the parse
139+
pattern =
140+
case stripPrefix (Pattern "^") pat of
141+
Nothing ->
142+
"^" <> pat
143+
_ ->
144+
pat
145+
matchRegex :: Regex.Regex -> Parser String
146+
matchRegex r =
147+
Parser \{ str, pos } ->
148+
let
149+
remainder = drop pos str
150+
in
151+
case NEA.head <$> Regex.match r remainder of
152+
Just (Just matched) ->
153+
Right { result: matched, suffix: { str, pos: pos + length matched } }
154+
_ ->
155+
Left { pos, error: "no match" }

src/Text/Parsing/StringParser/CodeUnits.purs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
module Text.Parsing.StringParser.CodeUnits
77
( eof
88
, anyChar
9-
, anyCodePoint
109
, anyDigit
1110
, string
1211
, satisfy
@@ -31,7 +30,6 @@ import Data.Char (toCharCode)
3130
import Data.Either (Either(..))
3231
import Data.Foldable (class Foldable, foldMap, elem, notElem)
3332
import Data.Maybe (Maybe(..))
34-
import Data.String as SCP
3533
import Data.String.CodeUnits (charAt, singleton)
3634
import Data.String.CodeUnits as SCU
3735
import Data.String.Pattern (Pattern(..))
@@ -47,22 +45,13 @@ eof = Parser \s ->
4745
{ str, pos } | pos < SCU.length str -> Left { pos, error: "Expected EOF" }
4846
_ -> Right { result: unit, suffix: s }
4947

50-
-- | Match any character. This is limited by `Char` to any code points
51-
-- | that are below `0xFFFF`. If you need to use higher code points
52-
-- | (e.g. emoji), see `anyCodePoint` and `string`.
48+
-- | Match any character.
5349
anyChar :: Parser Char
5450
anyChar = Parser \{ str, pos } ->
5551
case charAt pos str of
5652
Just chr -> Right { result: chr, suffix: { str, pos: pos + 1 } }
5753
Nothing -> Left { pos, error: "Unexpected EOF" }
5854

59-
-- | Match any code point, including those above `0xFFFF`
60-
anyCodePoint :: Parser SCP.CodePoint
61-
anyCodePoint = Parser \rec@{ str, pos } ->
62-
case SCP.codePointAt 0 (SCU.drop pos str) of
63-
Just cp -> Right { result: cp, suffix: { str, pos: pos + SCU.length (SCP.singleton cp) } }
64-
Nothing -> Left { pos, error: "Unexpected EOF" }
65-
6655
-- | Match any digit.
6756
anyDigit :: Parser Char
6857
anyDigit = try do

test/CodePoints.purs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
module Test.CodePoints where
2+
3+
import Prelude hiding (between)
4+
5+
import Control.Alt ((<|>))
6+
import Data.Either (isLeft, isRight, Either(..))
7+
import Data.Foldable (fold)
8+
import Data.List (List(Nil), (:))
9+
import Data.List.Lazy (take, repeat)
10+
import Data.List.NonEmpty (NonEmptyList(..))
11+
import Data.NonEmpty ((:|))
12+
import Data.String.CodeUnits (singleton)
13+
import Data.String.Common as SC
14+
import Data.Unfoldable (replicate)
15+
import Effect (Effect)
16+
import Test.Assert (assert', assert)
17+
import Text.Parsing.StringParser (Parser, runParser, try)
18+
import Text.Parsing.StringParser.Combinators (many1, endBy1, sepBy1, optionMaybe, many, manyTill, many1Till, chainl, fix, between)
19+
import Text.Parsing.StringParser.Expr (Assoc(..), Operator(..), buildExprParser)
20+
import Text.Parsing.StringParser.CodePoints (anyDigit, char, eof, string, anyChar, regex)
21+
22+
parens :: forall a. Parser a -> Parser a
23+
parens = between (string "(") (string ")")
24+
25+
nested :: Parser Int
26+
nested = fix $ \p -> (do
27+
_ <- string "a"
28+
pure 0) <|> ((+) 1) <$> parens p
29+
30+
opTest :: Parser String
31+
opTest = chainl (singleton <$> anyChar) (string "+" $> append) ""
32+
33+
digit :: Parser Int
34+
digit = string "0" $> 0
35+
<|> string "1" $> 1
36+
<|> string "2" $> 2
37+
<|> string "3" $> 3
38+
<|> string "4" $> 4
39+
<|> string "5" $> 5
40+
<|> string "6" $> 6
41+
<|> string "7" $> 7
42+
<|> string "8" $> 8
43+
<|> string "9" $> 9
44+
45+
exprTest :: Parser Int
46+
exprTest = buildExprParser [ [Infix (string "/" >>= \_ -> pure div) AssocRight]
47+
, [Infix (string "*" >>= \_ -> pure mul) AssocRight]
48+
, [Infix (string "-" >>= \_ -> pure sub) AssocRight]
49+
, [Infix (string "+" >>= \_ -> pure add) AssocRight]
50+
] digit
51+
52+
tryTest :: Parser String
53+
-- reduce the possible array of matches to 0 or 1 elements to aid Array pattern matching
54+
tryTest =
55+
try (string "aa" <> string "bb") <|>
56+
(string "aa" <> string "cc")
57+
58+
canParse :: forall a. Parser a -> String -> Boolean
59+
canParse p input = isRight $ runParser p input
60+
61+
parseFail :: forall a. Parser a -> String -> Boolean
62+
parseFail p input = isLeft $ runParser p input
63+
64+
expectResult :: forall a. Eq a => a -> Parser a -> String -> Boolean
65+
expectResult res p input = runParser p input == Right res
66+
67+
testCodePoints :: Effect Unit
68+
testCodePoints = do
69+
assert' "many should not blow the stack" $ canParse (many (string "a")) (SC.joinWith "" $ replicate 100000 "a")
70+
assert' "many failing after" $ parseFail (do
71+
as <- many (string "a")
72+
eof
73+
pure as) (SC.joinWith "" (replicate 100000 "a") <> "b" )
74+
75+
assert $ expectResult 3 nested "(((a)))"
76+
assert $ expectResult ("a":"a":"a":Nil) (many (string "a")) "aaa"
77+
assert $ parseFail (many1 (string "a")) ""
78+
assert $ canParse (parens (do
79+
_ <- string "a"
80+
optionMaybe $ string "b")) "(ab)"
81+
assert $ expectResult (NonEmptyList ("a" :| "a":"a":Nil)) (string "a" `sepBy1` string ",") "a,a,a"
82+
assert $ canParse (do
83+
as <- string "a" `endBy1` string ","
84+
eof
85+
pure as) "a,a,a,"
86+
assert' "opTest" $ expectResult "abc" opTest "a+b+c"
87+
assert' "exprTest" $ expectResult (-3) exprTest "1*2+3/4-5"
88+
assert' "tryTest "$ canParse tryTest "aacc"
89+
assert $ expectResult (NonEmptyList ('0' :| '1':'2':'3':'4':Nil)) (many1 anyDigit) "01234/"
90+
assert $ expectResult (NonEmptyList ('5' :| '6':'7':'8':'9':Nil)) (many1 anyDigit) "56789:"
91+
assert $ expectResult "aaaa" (regex "a+") "aaaab"
92+
assert $ expectResult ("a":"a":"a":Nil) (manyTill (string "a") (string "b")) "aaab"
93+
assert $ expectResult Nil (manyTill (string "a") (string "b")) "b"
94+
assert $ expectResult (NonEmptyList ("a" :| "a":"a":Nil)) (many1Till (string "a") (string "b")) "aaab"
95+
assert $ parseFail (many1Till (string "a") (string "b")) "b"
96+
-- check against overflow
97+
assert $ canParse (many1Till (string "a") (string "and")) $ (fold <<< take 10000 $ repeat "a") <> "and"
98+
-- check correct order
99+
assert $ expectResult (NonEmptyList ('a' :| 'b':'c':Nil)) (many1Till anyChar (string "d")) "abcd"
100+
assert $ expectResult "\x458CA" (string "\x458CA" <* char ']' <* eof ) "\x458CA]"
101+
assert $ expectResult "\x458CA" (string "\x458CA" <* string ")" <* eof ) "\x458CA)"
102+
assert $ expectResult '\xEEE2' (char '\xEEE2' <* eof ) "\xEEE2"

test/CodeUnits.purs

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,15 @@ import Data.List (List(Nil), (:))
99
import Data.List.Lazy (take, repeat)
1010
import Data.List.NonEmpty (NonEmptyList(..))
1111
import Data.NonEmpty ((:|))
12-
import Data.String.CodePoints as SCP
1312
import Data.String.CodeUnits (singleton)
1413
import Data.String.Common as SC
1514
import Data.Unfoldable (replicate)
1615
import Effect (Effect)
1716
import Test.Assert (assert', assert)
1817
import Text.Parsing.StringParser (Parser, runParser, try)
19-
import Text.Parsing.StringParser.CodeUnits (anyChar, anyCodePoint, anyDigit, eof, regex, string)
2018
import Text.Parsing.StringParser.Combinators (many1, endBy1, sepBy1, optionMaybe, many, manyTill, many1Till, chainl, fix, between)
2119
import Text.Parsing.StringParser.Expr (Assoc(..), Operator(..), buildExprParser)
20+
import Text.Parsing.StringParser.CodeUnits (anyDigit, eof, string, anyChar, regex)
2221

2322
parens :: forall a. Parser a -> Parser a
2423
parens = between (string "(") (string ")")
@@ -98,22 +97,3 @@ testCodeUnits = do
9897
assert $ canParse (many1Till (string "a") (string "and")) $ (fold <<< take 10000 $ repeat "a") <> "and"
9998
-- check correct order
10099
assert $ expectResult (NonEmptyList ('a' :| 'b':'c':Nil)) (many1Till anyChar (string "d")) "abcd"
101-
-- check anyCodePoint
102-
let anyCodePointStr = map SCP.singleton anyCodePoint
103-
let anyCharStr = map singleton anyChar
104-
assert $ expectResult (NonEmptyList ("🍔" :| "🍺":Nil)) (many1 $ anyCodePointStr) "🍔🍺"
105-
assert $ expectResult "🍔" (anyChar *> anyCodePointStr <* anyChar) "a🍔a"
106-
assert $ expectResult ({a: "🍔", b: "🍺"}) ({a:_, b:_} <$> (anyCodePointStr <* void anyChar) <*> anyCodePointStr) "🍔a🍺"
107-
assert $ expectResult ({a: "a", b: "b", c:"c"}) ({a:_, b:_, c:_} <$> anyCodePointStr <*> anyCodePointStr <*> anyCodePointStr) "abc"
108-
-- check string
109-
assert $ expectResult "🍔🍺" (string "🍔🍺") "🍔🍺"
110-
assert $ expectResult (NonEmptyList ("🍔🍺" :| "🍔🍺":"🍔🍺":Nil)) (many1 $ string "🍔🍺") "🍔🍺🍔🍺🍔🍺"
111-
assert $ expectResult (NonEmptyList ("a🍔🍺":|"a🍔🍺":"a🍔🍺":Nil)) (many1 $ string "a🍔🍺") "a🍔🍺a🍔🍺a🍔🍺"
112-
assert $ expectResult (NonEmptyList ("🍔a🍺":|"🍔a🍺":"🍔a🍺":Nil)) (many1 $ string "🍔a🍺") "🍔a🍺🍔a🍺🍔a🍺"
113-
assert $ expectResult (NonEmptyList ("🍔🍺a" :| "🍔🍺a":"🍔🍺a":Nil)) (many1 $ string "🍔🍺a") "🍔🍺a🍔🍺a🍔🍺a"
114-
assert $ expectResult (NonEmptyList ("a" :| "a":"a":Nil)) (many1 $ string "a") "aaa"
115-
assert $ expectResult (NonEmptyList ("abc" :| "abc":"abc":Nil)) (many1 $ string "abc") "abcabcabc"
116-
assert $ expectResult (NonEmptyList ("abc" :| "abc":"abc":Nil)) (many1 $ string "abc") "abcabcabc"
117-
assert $ expectResult (NonEmptyList ("abc�def" :| Nil)) (many1 $ string "abc�def") "abc�def"
118-
119-
assert $ expectResult "🍔\xd83c" (string "🍔\xd83c") "🍔🍺"

test/Examples.purs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import Data.List.Types (NonEmptyList)
99
import Effect (Effect)
1010
import Effect.Console (log, logShow)
1111
import Text.Parsing.StringParser (Parser, fail, runParser, unParser)
12-
import Text.Parsing.StringParser.CodeUnits (anyChar, char, eof, regex, skipSpaces, string)
12+
import Text.Parsing.StringParser.CodePoints (anyChar, char, eof, regex, skipSpaces, string)
1313
import Text.Parsing.StringParser.Combinators (between, endBy1, lookAhead, many, many1, sepBy1, (<?>))
1414

1515
-- Serves only to make this file runnable

test/Main.purs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,13 @@ import Prelude
44

55
import Effect (Effect)
66
import Effect.Console (log)
7+
import Test.CodePoints (testCodePoints)
78
import Test.CodeUnits (testCodeUnits)
89

910
main :: Effect Unit
1011
main = do
12+
log "Testing CodePoint parsing\n"
13+
testCodePoints
14+
1115
log "\n\nTesting CodeUnit parsing\n"
1216
testCodeUnits

0 commit comments

Comments
 (0)