diff --git a/.travis.yml b/.travis.yml index 968390b..2bc26a4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ install: - bower install script: - npm run -s build + - npm run -s test after_success: - >- test $TRAVIS_TAG && diff --git a/bower.json b/bower.json index 4284b1b..3d23f13 100644 --- a/bower.json +++ b/bower.json @@ -25,7 +25,7 @@ "purescript-foldable-traversable": "^3.0.0", "purescript-identity": "^3.0.0", "purescript-integers": "^3.0.0", - "purescript-lists": "^4.0.0", + "purescript-lists": "^4.6.0", "purescript-maybe": "^3.0.0", "purescript-strings": "^3.0.0", "purescript-transformers": "^3.0.0", diff --git a/package.json b/package.json index e6343ef..52fb9bd 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,8 @@ "private": true, "scripts": { "clean": "rimraf output && rimraf .pulp-cache", - "build": "pulp build && pulp test" + "build": "pulp build", + "test": "pulp test" }, "devDependencies": { "pulp": "^11.0.0", diff --git a/src/Text/Parsing/Parser.purs b/src/Text/Parsing/Parser.purs index 77a132c..258ce70 100644 --- a/src/Text/Parsing/Parser.purs +++ b/src/Text/Parsing/Parser.purs @@ -7,6 +7,8 @@ module Text.Parsing.Parser , Parser , runParser , runParserT + , unParserT + , inParserT , hoistParserT , mapParserT , consume @@ -22,14 +24,14 @@ import Control.Lazy (defer, class Lazy) import Control.Monad.Error.Class (class MonadThrow, throwError) import Control.Monad.Except (class MonadError, ExceptT(..), runExceptT, mapExceptT) import Control.Monad.Rec.Class (class MonadRec) -import Control.Monad.State (runStateT, class MonadState, StateT(..), gets, evalStateT, mapStateT, modify) +import Control.Monad.State (runStateT, class MonadState, StateT(..), gets, mapStateT, modify) import Control.Monad.Trans.Class (class MonadTrans, lift) import Control.MonadPlus (class Alternative, class MonadZero, class MonadPlus, class Plus) import Data.Either (Either(..)) import Data.Identity (Identity) import Data.Monoid (class Monoid, mempty) import Data.Newtype (class Newtype, unwrap, over) -import Data.Tuple (Tuple(..)) +import Data.Tuple (Tuple(..), fst) import Text.Parsing.Parser.Pos (Position, initialPos) -- | A parsing error, consisting of a message and position information. @@ -49,7 +51,9 @@ derive instance eqParseError :: Eq ParseError derive instance ordParseError :: Ord ParseError -- | Contains the remaining input and current position. -data ParseState s = ParseState s Position Boolean +-- data ParseState s = ParseState s Position Boolean +newtype ParseState s = ParseState + { input :: s, pos :: Position, consumed :: Boolean } -- | The Parser monad transformer. -- | @@ -61,8 +65,23 @@ derive instance newtypeParserT :: Newtype (ParserT s m a) _ -- | Apply a parser, keeping only the parsed result. runParserT :: forall m s a. Monad m => s -> ParserT s m a -> m (Either ParseError a) -runParserT s p = evalStateT (runExceptT (unwrap p)) initialState where - initialState = ParseState s initialPos false +runParserT input p = fst <$> unParserT p initialState + where + initialState = ParseState { input, pos: initialPos, consumed: false } + +-- Reveals inner function of parser +unParserT :: forall m s a + . Monad m + => ParserT s m a + -> (ParseState s -> m (Tuple (Either ParseError a) (ParseState s))) +unParserT (ParserT p) = runStateT $ runExceptT p + +-- Takes inner function of Parser and constructs one +inParserT :: forall m s a + . Monad m + => (ParseState s -> m (Tuple (Either ParseError a) (ParseState s))) + -> ParserT s m a +inParserT = ParserT <<< ExceptT <<< StateT -- | The `Parser` monad is a synonym for the parser monad transformer applied to the `Identity` monad. type Parser s = ParserT s Identity @@ -101,12 +120,12 @@ derive newtype instance monadThrowParserT :: Monad m => MonadThrow ParseError (P derive newtype instance monadErrorParserT :: Monad m => MonadError ParseError (ParserT s m) instance altParserT :: Monad m => Alt (ParserT s m) where - alt p1 p2 = (ParserT <<< ExceptT <<< StateT) \(s@(ParseState i p _)) -> do - Tuple e s'@(ParseState i' p' c') <- runStateT (runExceptT (unwrap p1)) (ParseState i p false) - case e of - Left err - | not c' -> runStateT (runExceptT (unwrap p2)) s - _ -> pure (Tuple e s') + alt p1 p2 = inParserT \(ParseState state) -> + unParserT p1 (ParseState (state{consumed = false})) >>= \(Tuple e (ParseState nextState)) -> + case e of + Left err + | not nextState.consumed -> unParserT p2 (ParseState state) + _ -> pure (Tuple e (ParseState nextState)) instance plusParserT :: Monad m => Plus (ParserT s m) where empty = fail "No alternative" @@ -122,12 +141,12 @@ instance monadTransParserT :: MonadTrans (ParserT s) where -- | Set the consumed flag. consume :: forall s m. Monad m => ParserT s m Unit -consume = modify \(ParseState input pos _) -> - ParseState input pos true +consume = modify \(ParseState state) -> + ParseState state{consumed = true} -- | Returns the current position in the stream. position :: forall s m. Monad m => ParserT s m Position -position = gets \(ParseState _ pos _) -> pos +position = gets \(ParseState state) -> state.pos -- | Fail with a message. fail :: forall m s a. Monad m => String -> ParserT s m a diff --git a/src/Text/Parsing/Parser/Combinators.purs b/src/Text/Parsing/Parser/Combinators.purs index d346258..74d515d 100644 --- a/src/Text/Parsing/Parser/Combinators.purs +++ b/src/Text/Parsing/Parser/Combinators.purs @@ -15,22 +15,19 @@ -- | be used in conjunction with `Data.String.fromCharArray` to achieve "Parsec-like" results. -- | -- | ```purescript --- | Text.Parsec.many (char 'x') <=> fromCharArray <$> Data.Array.many (char 'x') +-- | Text.Parsec.many (match 'x') <=> fromCharArray <$> Data.Array.many (match 'x') -- | ``` module Text.Parsing.Parser.Combinators where import Prelude -import Control.Monad.Except (runExceptT, ExceptT(..)) -import Control.Monad.State (StateT(..), runStateT) import Control.Plus (empty, (<|>)) import Data.Either (Either(..)) import Data.Foldable (class Foldable, foldl) import Data.List (List(..), (:), many, some, singleton) import Data.Maybe (Maybe(..)) -import Data.Newtype (unwrap) import Data.Tuple (Tuple(..)) -import Text.Parsing.Parser (ParseState(..), ParserT(..), ParseError(..), fail) +import Text.Parsing.Parser (ParseState(..), ParserT, ParseError(..), unParserT, inParserT, fail) -- | Provide an error message in the case of failure. withErrorMessage :: forall m s a. Monad m => ParserT s m a -> String -> ParserT s m a @@ -49,7 +46,7 @@ infix 3 asErrorMessage as -- | For example: -- | -- | ```purescript --- | parens = between (string "(") (string ")") +-- | parens = between (prefix "(") (prefix ")") -- | ``` between :: forall m s a open close. Monad m => ParserT s m open -> ParserT s m close -> ParserT s m a -> ParserT s m a between open close p = open *> p <* close @@ -68,24 +65,28 @@ optionMaybe p = option Nothing (Just <$> p) -- | In case of failure, reset the stream to the unconsumed state. try :: forall m s a. Monad m => ParserT s m a -> ParserT s m a -try p = (ParserT <<< ExceptT <<< StateT) \(s@(ParseState _ _ consumed)) -> do - Tuple e s'@(ParseState input position _) <- runStateT (runExceptT (unwrap p)) s - case e of - Left _ -> pure (Tuple e (ParseState input position consumed)) - _ -> pure (Tuple e s') +try p = inParserT \(ParseState state) -> + unParserT p (ParseState state) <#> \(Tuple e (ParseState nextState)) -> + case e of + Left _ -> Tuple e (ParseState nextState{consumed = state.consumed}) + Right _ -> Tuple e (ParseState nextState) -- | Like `try`, but will reannotate the error location to the `try` point. tryRethrow :: forall m s a. Monad m => ParserT s m a -> ParserT s m a -tryRethrow p = (ParserT <<< ExceptT <<< StateT) \(s@(ParseState _ position consumed)) -> do - Tuple e s'@(ParseState input' position' _) <- runStateT (runExceptT (unwrap p)) s - case e of - Left (ParseError err _) -> pure (Tuple (Left (ParseError err position)) (ParseState input' position' consumed)) - _ -> pure (Tuple e s') +tryRethrow p = inParserT \(ParseState state) -> + unParserT p (ParseState state) <#> \(Tuple e (ParseState nextState)) -> + case e of + Left (ParseError err _) -> + Tuple + (Left (ParseError err state.pos)) + (ParseState nextState{consumed = state.consumed}) + Right _ -> + Tuple e (ParseState nextState) -- | Parse a phrase, without modifying the consumed state or stream position. lookAhead :: forall s a m. Monad m => ParserT s m a -> ParserT s m a -lookAhead p = (ParserT <<< ExceptT <<< StateT) \s -> do - Tuple e _ <- runStateT (runExceptT (unwrap p)) s +lookAhead p = inParserT \s -> do + Tuple e _ <- unParserT p s pure (Tuple e s) -- | Parse phrases delimited by a separator. @@ -93,7 +94,7 @@ lookAhead p = (ParserT <<< ExceptT <<< StateT) \s -> do -- | For example: -- | -- | ```purescript --- | digit `sepBy` string "," +-- | digit `sepBy` prefix "," -- | ``` sepBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) sepBy p sep = sepBy1 p sep <|> pure Nil @@ -130,7 +131,7 @@ endBy p sep = many $ p <* sep -- | For example: -- | -- | ```purescript --- | chainr digit (string "+" *> add) 0 +-- | chainr digit (prefix "+" *> add) 0 -- | ``` chainr :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a chainr p f a = chainr1 p f <|> pure a diff --git a/src/Text/Parsing/Parser/Expr.purs b/src/Text/Parsing/Parser/Expr.purs index adfa62e..7e9286e 100644 --- a/src/Text/Parsing/Parser/Expr.purs +++ b/src/Text/Parsing/Parser/Expr.purs @@ -32,10 +32,10 @@ type SplitAccum m s a = { rassoc :: List (ParserT s m (a -> a -> a)) -- | For example: -- | -- | ```purescript --- | buildExprParser [ [ Infix (string "/" $> div) AssocRight ] --- | , [ Infix (string "*" $> mul) AssocRight ] --- | , [ Infix (string "-" $> sub) AssocRight ] --- | , [ Infix (string "+" $> add) AssocRight ] +-- | buildExprParser [ [ Infix (prefix "/" $> div) AssocRight ] +-- | , [ Infix (prefix "*" $> mul) AssocRight ] +-- | , [ Infix (prefix "-" $> sub) AssocRight ] +-- | , [ Infix (prefix "+" $> add) AssocRight ] -- | ] digit -- | ``` buildExprParser :: forall m s a. Monad m => OperatorTable m s a -> ParserT s m a -> ParserT s m a diff --git a/src/Text/Parsing/Parser/Indent.purs b/src/Text/Parsing/Parser/Indent.purs index e0b9619..f83c06d 100644 --- a/src/Text/Parsing/Parser/Indent.purs +++ b/src/Text/Parsing/Parser/Indent.purs @@ -59,10 +59,10 @@ import Control.Monad.State.Trans (get, put) import Control.Monad.Trans.Class (lift) import Data.List (List(..), many) import Data.Maybe (Maybe(..)) -import Text.Parsing.Parser (ParserT, ParseState(ParseState), fail) +import Text.Parsing.Parser (ParserT, ParseState(..), fail) import Text.Parsing.Parser.Combinators (option, optionMaybe) import Text.Parsing.Parser.Pos (Position(..), initialPos) -import Text.Parsing.Parser.String (string, oneOf) +import Text.Parsing.Parser.Stream (prefix, oneOf) -- | Indentation sensitive parser type. Usually @ m @ will -- | be @ Identity @ as with any @ ParserT @ @@ -71,7 +71,7 @@ type IndentParser s a = ParserT s (State Position) a -- | @ getPosition @ returns current position -- | should probably be added to Text.Parsing.Parser.Pos getPosition :: forall m s. (Monad m) => ParserT s m Position -getPosition = gets \(ParseState _ pos _) -> pos +getPosition = gets \(ParseState state) -> state.pos -- | simple helper function to avoid typ-problems with MonadState instance get' :: forall s. IndentParser s Position @@ -100,7 +100,7 @@ many1 :: forall s m a. (Monad m) => ParserT s m a -> ParserT s m (List a) many1 p = lift2 Cons p (many p) symbol :: forall m. (Monad m) => String -> ParserT String m String -symbol name = (many $ oneOf [' ','\t']) *> (string name) +symbol name = (many $ oneOf [' ','\t']) *> (prefix name) -- | `withBlock f a p` parses `a` -- | followed by an indented block of `p` diff --git a/src/Text/Parsing/Parser/Language.purs b/src/Text/Parsing/Parser/Language.purs index c1f5e25..837010a 100644 --- a/src/Text/Parsing/Parser/Language.purs +++ b/src/Text/Parsing/Parser/Language.purs @@ -12,8 +12,9 @@ import Prelude import Control.Alt ((<|>)) import Text.Parsing.Parser (ParserT) -import Text.Parsing.Parser.String (char, oneOf) -import Text.Parsing.Parser.Token (LanguageDef, TokenParser, GenLanguageDef(..), unGenLanguageDef, makeTokenParser, alphaNum, letter) +import Text.Parsing.Parser.Stream (match, oneOf) +import Text.Parsing.Parser.String (alphaNum, letter) +import Text.Parsing.Parser.Token (LanguageDef, TokenParser, GenLanguageDef(..), unGenLanguageDef, makeTokenParser) ----------------------------------------------------------- -- Styles: haskellStyle, javaStyle @@ -70,7 +71,7 @@ emptyDef = LanguageDef , commentEnd: "" , commentLine: "" , nestedComments: true - , identStart: letter <|> char '_' + , identStart: letter <|> match '_' , identLetter: alphaNum <|> oneOf ['_', '\''] , opStart: op' , opLetter: op' @@ -95,7 +96,7 @@ haskellDef :: LanguageDef haskellDef = case haskell98Def of (LanguageDef def) -> LanguageDef def - { identLetter = def.identLetter <|> char '#' + { identLetter = def.identLetter <|> match '#' , reservedNames = def.reservedNames <> ["foreign","import","export","primitive" ,"_ccall_","_casm_" diff --git a/src/Text/Parsing/Parser/Pos.purs b/src/Text/Parsing/Parser/Pos.purs index e65c6f2..0640d57 100644 --- a/src/Text/Parsing/Parser/Pos.purs +++ b/src/Text/Parsing/Parser/Pos.purs @@ -2,8 +2,7 @@ module Text.Parsing.Parser.Pos where import Prelude import Data.Foldable (foldl) -import Data.Newtype (wrap) -import Data.String (split) +import Data.String (toCharArray) -- | `Position` represents the position of the parser in the input. -- | @@ -27,10 +26,11 @@ initialPos = Position { line: 1, column: 1 } -- | Updates a `Position` by adding the columns and lines in `String`. updatePosString :: Position -> String -> Position -updatePosString pos' str = foldl updatePosChar pos' (split (wrap "") str) - where - updatePosChar (Position pos) c = case c of - "\n" -> Position { line: pos.line + 1, column: 1 } - "\r" -> Position { line: pos.line + 1, column: 1 } - "\t" -> Position { line: pos.line, column: pos.column + 8 - ((pos.column - 1) `mod` 8) } - _ -> Position { line: pos.line, column: pos.column + 1 } +updatePosString pos' str = foldl updatePosChar pos' (toCharArray str) + +updatePosChar :: Position -> Char -> Position +updatePosChar (Position pos) c = case c of + '\n' -> Position { line: pos.line + 1, column: 1 } + '\r' -> Position { line: pos.line + 1, column: 1 } + '\t' -> Position { line: pos.line, column: pos.column + 8 - ((pos.column - 1) `mod` 8) } + _ -> Position { line: pos.line, column: pos.column + 1 } diff --git a/src/Text/Parsing/Parser/Stream.purs b/src/Text/Parsing/Parser/Stream.purs new file mode 100644 index 0000000..7ccce0f --- /dev/null +++ b/src/Text/Parsing/Parser/Stream.purs @@ -0,0 +1,116 @@ +-- | Primitive parsers for working with an `Stream` input. + +module Text.Parsing.Parser.Stream where + +import Control.Monad.State (put, get) +import Control.Monad.Trans.Class (lift) +import Data.Foldable (foldl, elem, notElem) +import Data.List as L +import Data.List.Lazy as LazyL +import Data.Maybe (Maybe(..)) +import Data.Newtype (class Newtype) +import Data.String as S +import Data.Tuple (Tuple(..)) +import Prelude hiding (between) +import Text.Parsing.Parser (ParseState(..), ParserT, fail) +import Text.Parsing.Parser.Combinators (tryRethrow, ()) +import Text.Parsing.Parser.Pos (Position, updatePosString, updatePosChar) + +-- | A newtype used to identify a prefix of a stream +newtype Prefix a = Prefix a + +derive instance eqPrefix :: Eq a => Eq (Prefix a) +derive instance ordPrefix :: Ord a => Ord (Prefix a) +derive instance newtypePrefix :: Newtype (Prefix a) _ + +instance showPrefix :: Show a => Show (Prefix a) where + show (Prefix s) = "(Prefix " <> show s <> ")" + +class HasUpdatePosition a where + updatePos :: Position -> a -> Position + +instance stringHasUpdatePosition :: HasUpdatePosition String where + updatePos = updatePosString + +instance charHasUpdatePosition :: HasUpdatePosition Char where + updatePos = updatePosChar + +-- | This class exists to abstract over streams which support the string-like +-- | operations with position tracking, which this modules needs. +-- | +-- | Instances must satisfy the following laws: +-- | - `stripPrefix (Prefix input) {input, position} >>= uncons = Nothing` + +class Stream s m t | s -> t where + uncons :: forall r. ParserCursor s r -> m (Maybe (Tuple t (ParserCursor s r))) + stripPrefix :: forall r. Prefix s -> ParserCursor s r -> m (Maybe (ParserCursor s r)) + +-- Part or ParseState which is exposed to Stream instances +type ParserCursor s r = { input :: s, pos :: Position | r} + + +instance stringStream :: (Applicative m) => Stream String m Char where + uncons state = pure $ S.uncons state.input <#> \({ head, tail}) -> + Tuple head state{input = tail, pos = updatePos state.pos head } + stripPrefix (Prefix p) state = pure $ S.stripPrefix (S.Pattern p) state.input <#> \rest -> + state{input = rest, pos = updatePos state.pos p} + +instance listStream :: (Applicative m, Eq a, HasUpdatePosition a) => Stream (L.List a) m a where + uncons state = pure $ L.uncons state.input <#> \({ head, tail}) -> + Tuple head state{input = tail, pos = updatePos state.pos head } + stripPrefix (Prefix p) state = pure $ L.stripPrefix (L.Pattern p) state.input <#> \rest -> + state{input = rest, pos = foldl updatePos state.pos p} + +instance lazyListStream :: (Applicative m, Eq a, HasUpdatePosition a) => Stream (LazyL.List a) m a where + uncons state = pure $ LazyL.uncons state.input <#> \({ head, tail}) -> + Tuple head state{input = tail, pos = updatePos state.pos head } + stripPrefix (Prefix p) state = pure $ LazyL.stripPrefix (LazyL.Pattern p) state.input <#> \rest -> + state{input = rest, pos = foldl updatePos state.pos p} + +-- | Match end of stream. +eof :: forall s t m. Stream s m t => Monad m => ParserT s m Unit +eof = do + ParseState state <- get + (lift $ uncons state) >>= case _ of + Nothing -> pure unit + _ -> fail "Expected EOF" + +-- | Match the specified prefix. +prefix :: forall f c m. Stream f m c => Show f => Monad m => f -> ParserT f m f +prefix p = do + ParseState state <- get + (lift $ stripPrefix (Prefix p) state) >>= case _ of + Nothing -> fail $ "Expected " <> show p + Just nextState -> do + put $ ParseState nextState{consumed = true} + pure p + +-- | Match any token. +token :: forall s t m. Stream s m t => Monad m => ParserT s m t +token = do + ParseState state <- get + (lift $ uncons state) >>= case _ of + Nothing -> fail "Unexpected EOF" + Just (Tuple head nextState) -> do + put $ ParseState nextState{consumed = true} + pure head + +-- | Match a token satisfying the specified predicate. +satisfy :: forall s t m. Stream s m t => Show t => Monad m => (t -> Boolean) -> ParserT s m t +satisfy f = tryRethrow do + c <- token + if f c then pure c + else fail $ "Token " <> show c <> " did not satisfy predicate" + +-- | Match the specified token +match :: forall s t m. Stream s m t => Eq t => Show t => Monad m => t -> ParserT s m t +match c = satisfy (_ == c) show c + + +-- | Match one of the tokens in the array. +oneOf :: forall s t m. Stream s m t => Show t => Eq t => Monad m => Array t -> ParserT s m t +oneOf ss = satisfy (flip elem ss) ("one of " <> show ss) + +-- | Match any token not in the array. +noneOf :: forall s t m. Stream s m t => Show t => Eq t => Monad m => Array t -> ParserT s m t +noneOf ss = satisfy (flip notElem ss) ("none of " <> show ss) diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 335bab2..a5f7d38 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -2,89 +2,46 @@ module Text.Parsing.Parser.String where -import Data.String as S -import Control.Monad.State (modify, gets) import Data.Array (many) -import Data.Foldable (elem, notElem) -import Data.Maybe (Maybe(..)) -import Data.Newtype (wrap) -import Data.String (Pattern, fromCharArray, length, singleton) -import Text.Parsing.Parser (ParseState(..), ParserT, fail) -import Text.Parsing.Parser.Combinators (tryRethrow, ()) -import Text.Parsing.Parser.Pos (updatePosString) +import Data.Char.Unicode (isAlpha, isAlphaNum, isDigit, isHexDigit, isOctDigit, isSpace, isUpper) import Prelude hiding (between) +import Text.Parsing.Parser (ParserT) +import Text.Parsing.Parser.Combinators (()) +import Text.Parsing.Parser.Stream (class Stream, satisfy) --- | This class exists to abstract over streams which support the string-like --- | operations which this modules needs. -class StringLike s where - drop :: Int -> s -> s - indexOf :: Pattern -> s -> Maybe Int - null :: s -> Boolean - uncons :: s -> Maybe { head :: Char, tail :: s } +-- | Match a whitespace characters but returns them using Array. +whiteSpace :: forall s m. Stream s m Char => Monad m => ParserT s m (Array Char) +whiteSpace = many space -instance stringLikeString :: StringLike String where - uncons = S.uncons - drop = S.drop - indexOf = S.indexOf - null = S.null - --- | Match end-of-file. -eof :: forall s m. StringLike s => Monad m => ParserT s m Unit -eof = do - input <- gets \(ParseState input _ _) -> input - unless (null input) (fail "Expected EOF") - --- | Match the specified string. -string :: forall s m. StringLike s => Monad m => String -> ParserT s m String -string str = do - input <- gets \(ParseState input _ _) -> input - case indexOf (wrap str) input of - Just 0 -> do - modify \(ParseState _ position _) -> - ParseState (drop (length str) input) - (updatePosString position str) - true - pure str - _ -> fail ("Expected " <> show str) +-- | Skip whitespace characters. +skipSpaces :: forall s m. Stream s m Char => Monad m => ParserT s m Unit +skipSpaces = void whiteSpace --- | Match any character. -anyChar :: forall s m. StringLike s => Monad m => ParserT s m Char -anyChar = do - input <- gets \(ParseState input _ _) -> input - case uncons input of - Nothing -> fail "Unexpected EOF" - Just { head, tail } -> do - modify \(ParseState _ position _) -> - ParseState tail - (updatePosString position (singleton head)) - true - pure head +-- | Parse a digit. Matches any char that satisfies `Data.Char.Unicode.isDigit`. +digit :: forall s m . Stream s m Char => Monad m => ParserT s m Char +digit = satisfy isDigit "digit" --- | Match a character satisfying the specified predicate. -satisfy :: forall s m. StringLike s => Monad m => (Char -> Boolean) -> ParserT s m Char -satisfy f = tryRethrow do - c <- anyChar - if f c then pure c - else fail $ "Character '" <> singleton c <> "' did not satisfy predicate" +-- | Parse a hex digit. Matches any char that satisfies `Data.Char.Unicode.isHexDigit`. +hexDigit :: forall s m . Stream s m Char => Monad m => ParserT s m Char +hexDigit = satisfy isHexDigit "hex digit" --- | Match the specified character -char :: forall s m. StringLike s => Monad m => Char -> ParserT s m Char -char c = satisfy (_ == c) show c +-- | Parse an octal digit. Matches any char that satisfies `Data.Char.Unicode.isOctDigit`. +octDigit :: forall s m . Stream s m Char => Monad m => ParserT s m Char +octDigit = satisfy isOctDigit "oct digit" --- | Match a whitespace character. -whiteSpace :: forall s m. StringLike s => Monad m => ParserT s m String -whiteSpace = do - cs <- many $ satisfy \c -> c == '\n' || c == '\r' || c == ' ' || c == '\t' - pure $ fromCharArray cs +-- | Parse an uppercase letter. Matches any char that satisfies `Data.Char.Unicode.isUpper`. +upper :: forall s m . Stream s m Char => Monad m => ParserT s m Char +upper = satisfy isUpper "uppercase letter" --- | Skip whitespace characters. -skipSpaces :: forall s m. StringLike s => Monad m => ParserT s m Unit -skipSpaces = void whiteSpace +-- | Parse a space character. Matches any char that satisfies `Data.Char.Unicode.isSpace`. +space :: forall s m . Stream s m Char => Monad m => ParserT s m Char +space = satisfy isSpace "space" --- | Match one of the characters in the array. -oneOf :: forall s m. StringLike s => Monad m => Array Char -> ParserT s m Char -oneOf ss = satisfy (flip elem ss) ("one of " <> show ss) +-- | Parse an alphabetical character. Matches any char that satisfies `Data.Char.Unicode.isAlpha`. +letter :: forall s m . Stream s m Char => Monad m => ParserT s m Char +letter = satisfy isAlpha "letter" --- | Match any character not in the array. -noneOf :: forall s m. StringLike s => Monad m => Array Char -> ParserT s m Char -noneOf ss = satisfy (flip notElem ss) ("none of " <> show ss) +-- | Parse an alphabetical or numerical character. +-- | Matches any char that satisfies `Data.Char.Unicode.isAlphaNum`. +alphaNum :: forall s m . Stream s m Char => Monad m => ParserT s m Char +alphaNum = satisfy isAlphaNum "letter or digit" diff --git a/src/Text/Parsing/Parser/Token.purs b/src/Text/Parsing/Parser/Token.purs index 0761e5f..a41ced8 100644 --- a/src/Text/Parsing/Parser/Token.purs +++ b/src/Text/Parsing/Parser/Token.purs @@ -1,23 +1,12 @@ -- | Functions for working with streams of tokens. module Text.Parsing.Parser.Token - ( token - , when - , match - , LanguageDef + ( LanguageDef , GenLanguageDef(LanguageDef) , unGenLanguageDef , TokenParser , GenTokenParser , makeTokenParser - -- should these be exported? Maybe they should go in a different module? - , digit - , hexDigit - , octDigit - , upper - , space - , letter - , alphaNum ) where @@ -25,10 +14,9 @@ import Data.Array as Array import Data.Char.Unicode as Unicode import Data.List as List import Control.Lazy (fix) -import Control.Monad.State (modify, gets) -import Control.MonadPlus (guard, (<|>)) +import Control.MonadPlus ((<|>)) import Data.Char (fromCharCode, toCharCode) -import Data.Char.Unicode (digitToInt, isAlpha, isAlphaNum, isDigit, isHexDigit, isOctDigit, isSpace, isUpper) +import Data.Char.Unicode (digitToInt, isAlpha) import Data.Either (Either(..)) import Data.Foldable (foldl, foldr) import Data.Identity (Identity) @@ -38,34 +26,12 @@ import Data.Maybe (Maybe(..), maybe) import Data.String (toCharArray, null, toLower, fromCharArray, singleton, uncons) import Data.Tuple (Tuple(..)) import Math (pow) -import Text.Parsing.Parser (ParseState(..), ParserT, fail) -import Text.Parsing.Parser.Combinators (skipMany1, try, tryRethrow, skipMany, notFollowedBy, option, choice, between, sepBy1, sepBy, (), ()) -import Text.Parsing.Parser.Pos (Position) -import Text.Parsing.Parser.String (satisfy, oneOf, noneOf, string, char) +import Text.Parsing.Parser (ParserT, fail) +import Text.Parsing.Parser.Combinators (skipMany1, try, skipMany, notFollowedBy, option, choice, between, sepBy1, sepBy, (), ()) +import Text.Parsing.Parser.Stream (satisfy, oneOf, noneOf, prefix, match) +import Text.Parsing.Parser.String (digit, hexDigit, octDigit, space, upper) import Prelude hiding (when,between) --- | Create a parser which Returns the first token in the stream. -token :: forall m a. Monad m => (a -> Position) -> ParserT (List a) m a -token tokpos = do - input <- gets \(ParseState input _ _) -> input - case List.uncons input of - Nothing -> fail "Unexpected EOF" - Just { head, tail } -> do - modify \(ParseState _ position _) -> - ParseState tail (tokpos head) true - pure head - --- | Create a parser which matches any token satisfying the predicate. -when :: forall m a. Monad m => (a -> Position) -> (a -> Boolean) -> ParserT (List a) m a -when tokpos f = tryRethrow do - a <- token tokpos - guard $ f a - pure a - --- | Match the specified token at the head of the stream. -match :: forall a m. Monad m => Eq a => (a -> Position) -> a -> ParserT (List a) m a -match tokpos tok = when tokpos (_ == tok) - type LanguageDef = GenLanguageDef String Identity -- | The `GenLanguageDef` type is a record that contains all parameterizable @@ -85,10 +51,10 @@ newtype GenLanguageDef s m -- | Set to `true` if the language supports nested block comments. nestedComments :: Boolean, -- | This parser should accept any start characters of identifiers. For - -- | example `letter <|> char '_'`. + -- | example `letter <|> match '_'`. identStart :: ParserT s m Char, -- | This parser should accept any legal tail characters of identifiers. - -- | For example `alphaNum <|> char '_'`. + -- | For example `alphaNum <|> match '_'`. identLetter :: ParserT s m Char, -- | This parser should accept any start characters of operators. For -- | example `oneOf [':', '+', '=']`. @@ -380,13 +346,13 @@ makeTokenParser (LanguageDef languageDef) charLiteral = lexeme go "character" where go :: ParserT String m Char - go = between (char '\'') (char '\'' "end of character") characterChar + go = between (match '\'') (match '\'' "end of character") characterChar characterChar :: ParserT String m Char characterChar = charLetter <|> charEscape "literal character" charEscape :: ParserT String m Char - charEscape = char '\\' *> escapeCode + charEscape = match '\\' *> escapeCode charLetter :: ParserT String m Char charLetter = satisfy \c -> (c /= '\'') && (c /= '\\') && (c > '\026') @@ -396,7 +362,7 @@ makeTokenParser (LanguageDef languageDef) where go :: ParserT String m String go = do - maybeChars <- between (char '"') (char '"' "end of string") (List.many stringChar) + maybeChars <- between (match '"') (match '"' "end of string") (List.many stringChar) pure $ fromCharArray $ List.toUnfoldable $ foldr folder Nil maybeChars folder :: Maybe Char -> List Char -> List Char @@ -414,14 +380,14 @@ makeTokenParser (LanguageDef languageDef) stringEscape :: ParserT String m (Maybe Char) stringEscape = do - _ <- char '\\' + _ <- match '\\' (escapeGap $> Nothing) <|> (escapeEmpty $> Nothing) <|> (Just <$> escapeCode) escapeEmpty :: ParserT String m Char - escapeEmpty = char '&' + escapeEmpty = match '&' escapeGap :: ParserT String m Char - escapeGap = Array.some space *> char '\\' "end of string gap" + escapeGap = Array.some space *> match '\\' "end of string gap" -- -- escape codes escapeCode :: ParserT String m Char @@ -430,15 +396,15 @@ makeTokenParser (LanguageDef languageDef) charControl :: ParserT String m Char charControl = do - _ <- char '^' + _ <- match '^' code <- upper pure <<< fromCharCode $ toCharCode code - toCharCode 'A' + 1 charNum :: ParserT String m Char charNum = do code <- decimal - <|> ( char 'o' *> number 8 octDigit ) - <|> ( char 'x' *> number 16 hexDigit ) + <|> ( match 'o' *> number 8 octDigit ) + <|> ( match 'x' *> number 16 hexDigit ) if code > 0x10FFFF then fail "invalid escape sequence" else pure $ fromCharCode code @@ -447,13 +413,13 @@ makeTokenParser (LanguageDef languageDef) charEsc = choice (map parseEsc escMap) where parseEsc :: Tuple Char Char -> ParserT String m Char - parseEsc (Tuple c code) = char c $> code + parseEsc (Tuple c code) = match c $> code charAscii :: ParserT String m Char charAscii = choice (map parseAscii asciiMap) where parseAscii :: Tuple String Char -> ParserT String m Char - parseAscii (Tuple asc code) = try $ string asc $> code + parseAscii (Tuple asc code) = try $ prefix asc $> code -- escape code tables escMap :: Array (Tuple Char Char) @@ -504,7 +470,7 @@ makeTokenParser (LanguageDef languageDef) floating = decimal >>= fractExponent natFloat :: ParserT String m (Either Int Number) - natFloat = char '0' *> zeroNumFloat + natFloat = match '0' *> zeroNumFloat <|> decimalFloat zeroNumFloat :: ParserT String m (Either Int Number) @@ -537,7 +503,7 @@ makeTokenParser (LanguageDef languageDef) fraction :: ParserT String m Number fraction = "fraction" do - _ <- char '.' + _ <- match '.' digits <- Array.some digit "fraction" maybe (fail "not digit") pure $ foldr op (Just 0.0) digits where @@ -566,15 +532,15 @@ makeTokenParser (LanguageDef languageDef) pure $ f n sign :: forall a . (Ring a) => ParserT String m (a -> a) - sign = (char '-' $> negate) - <|> (char '+' $> id) + sign = (match '-' $> negate) + <|> (match '+' $> id) <|> pure id nat :: ParserT String m Int nat = zeroNumber <|> decimal zeroNumber :: ParserT String m Int - zeroNumber = char '0' *> + zeroNumber = match '0' *> ( hexadecimal <|> octal <|> decimal <|> pure 0 ) "" decimal :: ParserT String m Int @@ -604,7 +570,7 @@ makeTokenParser (LanguageDef languageDef) where go :: ParserT String m Unit go = do - _ <- string name + _ <- prefix name notFollowedBy languageDef.opLetter "end of " <> name operator :: ParserT String m String @@ -641,7 +607,7 @@ makeTokenParser (LanguageDef languageDef) go = caseString name *> (notFollowedBy languageDef.identLetter "end of " <> name) caseString :: String -> ParserT String m String - caseString name | languageDef.caseSensitive = string name $> name + caseString name | languageDef.caseSensitive = prefix name $> name | otherwise = walk name $> name where walk :: String -> ParserT String m Unit @@ -650,8 +616,8 @@ makeTokenParser (LanguageDef languageDef) Just { head: c, tail: cs } -> (caseChar c msg) *> walk cs caseChar :: Char -> ParserT String m Char - caseChar c | isAlpha c = char (Unicode.toLower c) <|> char (Unicode.toUpper c) - | otherwise = char c + caseChar c | isAlpha c = match (Unicode.toLower c) <|> match (Unicode.toUpper c) + | otherwise = match c msg :: String msg = show name @@ -682,7 +648,7 @@ makeTokenParser (LanguageDef languageDef) -- White space & symbols ----------------------------------------------------------- symbol :: String -> ParserT String m String - symbol name = lexeme (string name) $> name + symbol name = lexeme (prefix name) $> name lexeme :: forall a . ParserT String m a -> ParserT String m a lexeme p = p <* whiteSpace' (LanguageDef languageDef) @@ -734,15 +700,15 @@ whiteSpace' langDef@(LanguageDef languageDef) skipMany (simpleSpace <|> oneLineComment langDef <|> multiLineComment langDef "") simpleSpace :: forall m . Monad m => ParserT String m Unit -simpleSpace = skipMany1 (satisfy isSpace) +simpleSpace = skipMany1 (space) oneLineComment :: forall m . Monad m => GenLanguageDef String m -> ParserT String m Unit oneLineComment (LanguageDef languageDef) = - try (string languageDef.commentLine) *> skipMany (satisfy (_ /= '\n')) + try (prefix languageDef.commentLine) *> skipMany (satisfy (_ /= '\n')) multiLineComment :: forall m . Monad m => GenLanguageDef String m -> ParserT String m Unit multiLineComment langDef@(LanguageDef languageDef) = - try (string languageDef.commentStart) *> inComment langDef + try (prefix languageDef.commentStart) *> inComment langDef inComment :: forall m . Monad m => GenLanguageDef String m -> ParserT String m Unit inComment langDef@(LanguageDef languageDef) = @@ -750,7 +716,7 @@ inComment langDef@(LanguageDef languageDef) = inCommentMulti :: forall m . Monad m => GenLanguageDef String m -> ParserT String m Unit inCommentMulti langDef@(LanguageDef languageDef) = - fix \p -> ( void $ try (string languageDef.commentEnd) ) + fix \p -> ( void $ try (prefix languageDef.commentEnd) ) <|> ( multiLineComment langDef *> p ) <|> ( skipMany1 (noneOf startEnd) *> p ) <|> ( oneOf startEnd *> p ) @@ -761,7 +727,7 @@ inCommentMulti langDef@(LanguageDef languageDef) = inCommentSingle :: forall m . Monad m => GenLanguageDef String m -> ParserT String m Unit inCommentSingle (LanguageDef languageDef) = - fix \p -> ( void $ try (string languageDef.commentEnd) ) + fix \p -> ( void $ try (prefix languageDef.commentEnd) ) <|> ( skipMany1 (noneOf startEnd) *> p ) <|> ( oneOf startEnd *> p ) "end of comment" @@ -772,32 +738,3 @@ inCommentSingle (LanguageDef languageDef) = ------------------------------------------------------------------------- -- Helper functions that should maybe go in Text.Parsing.Parser.String -- ------------------------------------------------------------------------- - --- | Parse a digit. Matches any char that satisfies `Data.Char.Unicode.isDigit`. -digit :: forall m . Monad m => ParserT String m Char -digit = satisfy isDigit "digit" - --- | Parse a hex digit. Matches any char that satisfies `Data.Char.Unicode.isHexDigit`. -hexDigit :: forall m . Monad m => ParserT String m Char -hexDigit = satisfy isHexDigit "hex digit" - --- | Parse an octal digit. Matches any char that satisfies `Data.Char.Unicode.isOctDigit`. -octDigit :: forall m . Monad m => ParserT String m Char -octDigit = satisfy isOctDigit "oct digit" - --- | Parse an uppercase letter. Matches any char that satisfies `Data.Char.Unicode.isUpper`. -upper :: forall m . Monad m => ParserT String m Char -upper = satisfy isUpper "uppercase letter" - --- | Parse a space character. Matches any char that satisfies `Data.Char.Unicode.isSpace`. -space :: forall m . Monad m => ParserT String m Char -space = satisfy isSpace "space" - --- | Parse an alphabetical character. Matches any char that satisfies `Data.Char.Unicode.isAlpha`. -letter :: forall m . Monad m => ParserT String m Char -letter = satisfy isAlpha "letter" - --- | Parse an alphabetical or numerical character. --- | Matches any char that satisfies `Data.Char.Unicode.isAlphaNum`. -alphaNum :: forall m . Monad m => ParserT String m Char -alphaNum = satisfy isAlphaNum "letter or digit" diff --git a/test/Main.purs b/test/Main.purs index a401378..b68050c 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -15,17 +15,17 @@ import Text.Parsing.Parser (Parser, ParserT, runParser, parseErrorPosition) import Text.Parsing.Parser.Combinators (endBy1, sepBy1, optionMaybe, try, chainl, between) import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser) import Text.Parsing.Parser.Language (javaStyle, haskellStyle, haskellDef) -import Text.Parsing.Parser.Pos (Position(..), initialPos) -import Text.Parsing.Parser.String (eof, string, char, satisfy, anyChar) -import Text.Parsing.Parser.Token (TokenParser, match, when, token, makeTokenParser) +import Text.Parsing.Parser.Pos (Position(..)) +import Text.Parsing.Parser.Stream (eof, prefix, match, satisfy, token, class HasUpdatePosition) +import Text.Parsing.Parser.Token (TokenParser, makeTokenParser) import Prelude hiding (between,when) parens :: forall m a. Monad m => ParserT String m a -> ParserT String m a -parens = between (string "(") (string ")") +parens = between (prefix "(") (prefix ")") nested :: forall m. Monad m => ParserT String m Int nested = fix \p -> (do - _ <- string "a" + _ <- prefix "a" pure 0) <|> ((+) 1) <$> parens p parseTest :: forall s a eff. Show a => Eq a => s -> a -> Parser s a -> Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit @@ -44,32 +44,32 @@ parseErrorTestPosition p input expected = case runParser input p of logShow expected opTest :: Parser String String -opTest = chainl (singleton <$> anyChar) (char '+' $> append) "" +opTest = chainl (singleton <$> token) (match '+' $> append) "" digit :: Parser String Int -digit = (string "0" >>= \_ -> pure 0) - <|> (string "1" >>= \_ -> pure 1) - <|> (string "2" >>= \_ -> pure 2) - <|> (string "3" >>= \_ -> pure 3) - <|> (string "4" >>= \_ -> pure 4) - <|> (string "5" >>= \_ -> pure 5) - <|> (string "6" >>= \_ -> pure 6) - <|> (string "7" >>= \_ -> pure 7) - <|> (string "8" >>= \_ -> pure 8) - <|> (string "9" >>= \_ -> pure 9) +digit = (prefix "0" >>= \_ -> pure 0) + <|> (prefix "1" >>= \_ -> pure 1) + <|> (prefix "2" >>= \_ -> pure 2) + <|> (prefix "3" >>= \_ -> pure 3) + <|> (prefix "4" >>= \_ -> pure 4) + <|> (prefix "5" >>= \_ -> pure 5) + <|> (prefix "6" >>= \_ -> pure 6) + <|> (prefix "7" >>= \_ -> pure 7) + <|> (prefix "8" >>= \_ -> pure 8) + <|> (prefix "9" >>= \_ -> pure 9) exprTest :: Parser String Int -exprTest = buildExprParser [ [ Infix (string "/" >>= \_ -> pure (/)) AssocRight ] - , [ Infix (string "*" >>= \_ -> pure (*)) AssocRight ] - , [ Infix (string "-" >>= \_ -> pure (-)) AssocRight ] - , [ Infix (string "+" >>= \_ -> pure (+)) AssocRight ] +exprTest = buildExprParser [ [ Infix (prefix "/" >>= \_ -> pure (/)) AssocRight ] + , [ Infix (prefix "*" >>= \_ -> pure (*)) AssocRight ] + , [ Infix (prefix "-" >>= \_ -> pure (-)) AssocRight ] + , [ Infix (prefix "+" >>= \_ -> pure (+)) AssocRight ] ] digit manySatisfyTest :: Parser String String manySatisfyTest = do r <- some $ satisfy (\s -> s /= '?') - _ <- char '?' + _ <- match '?' pure (fromCharArray r) data TestToken = A | B @@ -83,6 +83,9 @@ instance testTokensEq :: Eq TestToken where eq B B = true eq _ _ = false +instance stringHasUpdatePosition :: HasUpdatePosition TestToken where + updatePos (Position { column, line }) tok = Position { column: column + 1, line} + isA :: TestToken -> Boolean isA A = true isA _ = false @@ -280,34 +283,34 @@ tokenParserWhiteSpaceTest = do tokenParserParensTest :: TestM tokenParserParensTest = do -- parse parens - parseTest "(hello)" "hello" $ testTokenParser.parens $ string "hello" + parseTest "(hello)" "hello" $ testTokenParser.parens $ prefix "hello" -- fail on non-closed parens - parseErrorTestPosition (testTokenParser.parens $ string "hello") "(hello" $ mkPos 7 + parseErrorTestPosition (testTokenParser.parens $ prefix "hello") "(hello" $ mkPos 7 tokenParserBracesTest :: TestM tokenParserBracesTest = do -- parse braces - parseTest "{hello}" "hello" $ testTokenParser.braces $ string "hello" + parseTest "{hello}" "hello" $ testTokenParser.braces $ prefix "hello" -- fail on non-closed braces - parseErrorTestPosition (testTokenParser.braces $ string "hello") "{hello" $ mkPos 7 + parseErrorTestPosition (testTokenParser.braces $ prefix "hello") "{hello" $ mkPos 7 tokenParserAnglesTest :: TestM tokenParserAnglesTest = do -- parse angles - parseTest "" "hello" $ testTokenParser.angles $ string "hello" + parseTest "" "hello" $ testTokenParser.angles $ prefix "hello" -- fail on non-closed angles - parseErrorTestPosition (testTokenParser.angles $ string "hello") " char '?') + (many $ match 'f' *> match '?') "foo" (Position { column: 2, line: 1 }) @@ -427,36 +430,35 @@ main = do parseTest "foo" Nil - (many $ try $ char 'f' *> char '?') + (many $ try $ match 'f' *> match '?') parseTest "(((a)))" 3 nested - parseTest "aaa" (Cons "a" (Cons "a" (Cons "a" Nil))) $ many (string "a") + parseTest "aaa" (Cons "a" (Cons "a" (Cons "a" Nil))) $ many (prefix "a") parseTest "(ab)" (Just "b") $ parens do - _ <- string "a" - optionMaybe $ string "b" - parseTest "a,a,a" (Cons "a" (Cons "a" (Cons "a" Nil))) $ string "a" `sepBy1` string "," + _ <- prefix "a" + optionMaybe $ prefix "b" + parseTest "a,a,a" (Cons "a" (Cons "a" (Cons "a" Nil))) $ prefix "a" `sepBy1` prefix "," parseTest "a,a,a," (Cons "a" (Cons "a" (Cons "a" Nil))) $ do - as <- string "a" `endBy1` string "," + as <- prefix "a" `endBy1` prefix "," eof pure as parseTest "a+b+c" "abc" opTest parseTest "1*2+3/4-5" (-3) exprTest parseTest "ab?" "ab" manySatisfyTest - let tokpos = const initialPos - parseTest (fromFoldable [A, B]) A (token tokpos) - parseTest (fromFoldable [B, A]) B (token tokpos) + parseTest (fromFoldable [A, B]) A (token) + parseTest (fromFoldable [B, A]) B (token) - parseTest (fromFoldable [A, B]) A (when tokpos isA) + parseTest (fromFoldable [A, B]) A (satisfy isA) - parseTest (fromFoldable [A]) A (match tokpos A) - parseTest (fromFoldable [B]) B (match tokpos B) - parseTest (fromFoldable [A, B]) A (match tokpos A) + parseTest (fromFoldable [A]) A (match A) + parseTest (fromFoldable [B]) B (match B) + parseTest (fromFoldable [A, B]) A (match A) - parseErrorTestPosition (string "abc") "bcd" (Position { column: 1, line: 1 }) - parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { column: 4, line: 1 }) - parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 }) - parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 }) + parseErrorTestPosition (prefix "abc") "bcd" (Position { column: 1, line: 1 }) + parseErrorTestPosition (prefix "abc" *> eof) "abcdefg" (Position { column: 4, line: 1 }) + parseErrorTestPosition (prefix "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 }) + parseErrorTestPosition (prefix "\ta" *> eof) "\tab" (Position { column: 10, line: 1 }) tokenParserIdentifierTest tokenParserReservedTest