Skip to content

Commit 550ea8f

Browse files
committed
Replace manual UTF-8 decoding in Distribution.Parsec.FieldLineStream
This currently requires CPP for text<2, required for Cabal bootstrap plan on GHC 9.2. Hopefully we will be able to get rid of it in a not so distant future: `text-2.0` itself is buildable with GHC >= 8.0 and shipped as a boot package starting from GHC 9.4.
1 parent 406568f commit 550ea8f

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

Cabal-syntax/src/Distribution/Parsec/FieldLineStream.hs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE CPP #-}
12
{-# LANGUAGE FlexibleInstances #-}
23
{-# LANGUAGE MultiParamTypeClasses #-}
34
{-# LANGUAGE OverloadedStrings #-}
@@ -11,7 +12,6 @@ module Distribution.Parsec.FieldLineStream
1112
, fieldLineStreamEnd
1213
) where
1314

14-
import Data.Bits
1515
import Data.ByteString (ByteString)
1616
import Distribution.Compat.Prelude
1717
import Distribution.Utils.Generic (toUTF8BS)
@@ -20,6 +20,12 @@ import Prelude ()
2020
import qualified Data.ByteString as BS
2121
import qualified Text.Parsec as Parsec
2222

23+
#if MIN_VERSION_text(2,0,0)
24+
import Data.Text.Internal.Encoding.Utf8
25+
#else
26+
import Data.Bits
27+
#endif
28+
2329
-- | This is essentially a lazy bytestring, but chunks are glued with newline @\'\\n\'@.
2430
data FieldLineStream
2531
= FLSLast !ByteString
@@ -47,6 +53,17 @@ instance Monad m => Parsec.Stream FieldLineStream m Char where
4753
Nothing -> Just ('\n', s)
4854
Just (c, bs') -> Just (unconsChar c bs' (\bs'' -> FLSCons bs'' s) s)
4955

56+
#if MIN_VERSION_text(2,0,0)
57+
unconsChar :: forall a. Word8 -> ByteString -> (ByteString -> a) -> a -> (Char, a)
58+
unconsChar c0 bs0 f next = go (utf8DecodeStart c0) bs0
59+
where
60+
go decoderResult bs = case decoderResult of
61+
Accept ch -> (ch, f bs)
62+
Reject -> (replacementChar, f bs)
63+
Incomplete state codePoint -> case BS.uncons bs of
64+
Nothing -> (replacementChar, next)
65+
Just (w, bs') -> go (utf8DecodeContinue w state codePoint) bs'
66+
#else
5067
-- Based on implementation 'decodeStringUtf8'
5168
unconsChar :: forall a. Word8 -> ByteString -> (ByteString -> a) -> a -> (Char, a)
5269
unconsChar c0 bs0 f next
@@ -88,6 +105,7 @@ unconsChar c0 bs0 f next
88105
bs1
89106
((acc `shiftL` 6) .|. fromIntegral cn .&. 0x3F)
90107
| otherwise -> (replacementChar, f bs1)
108+
#endif
91109

92110
replacementChar :: Char
93111
replacementChar = '\xfffd'

0 commit comments

Comments
 (0)