Skip to content

Commit c80ffe9

Browse files
committed
UTF8: Better error message for invalid UTF8.
Read bytestring and use Text's decodeUtf8 instead of using System.IO's hGetContents. This way you get a message saying "invalid UTF-8 stream" instead of "invalid byte sequence." You are also told which byte caused the problem.
1 parent 885ef20 commit c80ffe9

1 file changed

Lines changed: 6 additions & 4 deletions

File tree

src/Text/Pandoc/UTF8.hs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ import Codec.Binary.UTF8.String (encodeString, decodeString)
5252

5353
import System.IO hiding (readFile, writeFile, getContents,
5454
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
55-
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
55+
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn,
56+
catch)
5657
import qualified System.IO as IO
5758
import qualified Data.ByteString.Char8 as B
5859
import qualified Data.ByteString.Lazy as BL
@@ -86,9 +87,10 @@ hPutStrLn :: Handle -> String -> IO ()
8687
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
8788

8889
hGetContents :: Handle -> IO String
89-
hGetContents h = hSetEncoding h utf8_bom
90-
>> hSetNewlineMode h universalNewlineMode
91-
>> IO.hGetContents h
90+
hGetContents h = fmap (TL.unpack . TL.decodeUtf8) $ BL.hGetContents h
91+
-- hGetContents h = hSetEncoding h utf8_bom
92+
-- >> hSetNewlineMode h universalNewlineMode
93+
-- >> IO.hGetContents h
9294

9395
toString :: B.ByteString -> String
9496
toString = T.unpack . T.decodeUtf8With lenientDecode

0 commit comments

Comments
 (0)