@@ -14,13 +14,19 @@ module Data.ProtoLens.TextFormat.Parser
14
14
, parser
15
15
) where
16
16
17
- import Data.List (intercalate )
17
+ import Data.ByteString (ByteString , pack )
18
+ import Data.Char (ord )
18
19
import Data.Functor.Identity (Identity )
20
+ import Data.List (intercalate )
21
+ import Data.Maybe (catMaybes )
19
22
import Data.Text.Lazy (Text )
20
- import Text.Parsec.Char (alphaNum , char , letter , oneOf )
23
+ import Data.Word (Word8 )
24
+ import Numeric (readOct , readHex )
25
+ import Text.Parsec.Char
26
+ (alphaNum , char , hexDigit , letter , octDigit , oneOf , satisfy )
21
27
import Text.Parsec.Text.Lazy (Parser )
22
- import Text.Parsec.Combinator (eof , sepBy1 , many1 , choice )
23
- import Text.Parsec.Token
28
+ import Text.Parsec.Combinator (choice , eof , many1 , optionMaybe , sepBy1 )
29
+ import Text.Parsec.Token hiding ( octal )
24
30
import Control.Applicative ((<*) , (<|>) , (*>) , many )
25
31
import Control.Monad (liftM , liftM2 , mzero )
26
32
@@ -60,7 +66,7 @@ data Key = Key String -- ^ A standard key that is just a string.
60
66
61
67
data Value = IntValue Integer -- ^ An integer
62
68
| DoubleValue Double -- ^ Any floating point number
63
- | StringValue String -- ^ A string literal
69
+ | ByteStringValue ByteString -- ^ A string or bytes literal
64
70
| MessageValue Message -- ^ A sub message
65
71
| EnumValue String -- ^ Any undelimited string (including false & true)
66
72
deriving (Show ,Ord ,Eq )
@@ -91,7 +97,8 @@ parser = whiteSpace ptp *> parseMessage <* eof
91
97
negative <- (symbol ptp " -" >> return True ) <|> return False
92
98
value <- naturalOrFloat ptp
93
99
return $ makeNumberValue negative value
94
- parseString = liftM (StringValue . concat ) . many1 $ stringLiteral ptp
100
+ parseString = liftM (ByteStringValue . mconcat )
101
+ $ many1 $ lexeme ptp $ protoStringLiteral
95
102
parseEnumValue = liftM EnumValue (identifier ptp)
96
103
parseMessageValue = liftM MessageValue
97
104
(braces ptp parseMessage <|> angles ptp parseMessage)
@@ -101,3 +108,50 @@ parser = whiteSpace ptp *> parseMessage <* eof
101
108
makeNumberValue False (Left intValue) = IntValue intValue
102
109
makeNumberValue True (Right doubleValue) = DoubleValue (negate doubleValue)
103
110
makeNumberValue False (Right doubleValue) = DoubleValue doubleValue
111
+
112
+ -- | Reads a literal string the way the Protocol Buffer distribution's
113
+ -- tokenizer.cc does. This differs from Haskell string literals in treating,
114
+ -- e.g. "\11" as octal instead of decimal, so reading as 9 instead of 11. Also,
115
+ -- like tokenizer.cc we assume octal and hex escapes can have at most three and
116
+ -- two digits, respectively.
117
+ --
118
+ -- TODO: implement reading of Unicode escapes.
119
+ protoStringLiteral :: Parser ByteString
120
+ protoStringLiteral = do
121
+ initialQuoteChar <- char ' \' ' <|> char ' \" '
122
+ word8s <- many stringChar
123
+ _ <- char initialQuoteChar
124
+ return $ pack word8s
125
+ where
126
+ stringChar :: Parser Word8
127
+ stringChar = nonEscape <|> stringEscape
128
+ nonEscape = fmap (fromIntegral . ord)
129
+ $ satisfy (\ c -> c `notElem` " \\\'\" " && ord c < 256 )
130
+ stringEscape = char ' \\ ' >> (octal <|> hex <|> unicode <|> simple)
131
+ octal = do d0 <- octDigit
132
+ d1 <- optionMaybe octDigit
133
+ d2 <- optionMaybe octDigit
134
+ readMaybeDigits readOct [Just d0, d1, d2]
135
+ readMaybeDigits :: ReadS Word8 -> [Maybe Char ] -> Parser Word8
136
+ readMaybeDigits reader
137
+ = return . (\ str -> let [(v, " " )] = reader str in v) . catMaybes
138
+ hex = do _ <- oneOf " xX"
139
+ d0 <- hexDigit
140
+ d1 <- optionMaybe hexDigit
141
+ readMaybeDigits readHex [Just d0, d1]
142
+ unicode = oneOf " uU" >> fail " Unicode in string literals not yet supported"
143
+ simple = choice $ map charRet [ (' a' , ' \a ' )
144
+ , (' b' , ' \b ' )
145
+ , (' f' , ' \f ' )
146
+ , (' n' , ' \n ' )
147
+ , (' r' , ' \r ' )
148
+ , (' t' , ' \t ' )
149
+ , (' v' , ' \v ' )
150
+ , (' \\ ' , ' \\ ' )
151
+ , (' \' ' , ' \' ' )
152
+ , (' \" ' , ' \" ' )
153
+ ]
154
+ where
155
+ charRet :: (Char , Char ) -> Parser Word8
156
+ charRet (escapeCh, ch) = do _ <- char escapeCh
157
+ return $ fromIntegral $ ord ch
0 commit comments