diff --git a/Cabal/src/Distribution/Simple/Program/Run.hs b/Cabal/src/Distribution/Simple/Program/Run.hs index 268cb794505..e9a1298559b 100644 --- a/Cabal/src/Distribution/Simple/Program/Run.hs +++ b/Cabal/src/Distribution/Simple/Program/Run.hs @@ -24,6 +24,7 @@ module Distribution.Simple.Program.Run , getProgramInvocationOutput , getProgramInvocationLBS , getProgramInvocationOutputAndErrors + , getProgramInvocationLBSAndErrors , getEffectiveEnvironment ) where @@ -181,6 +182,13 @@ getProgramInvocationOutputAndErrors verbosity inv = case progInvokeOutputEncodin (output', errors, exitCode) <- getProgramInvocationIODataAndErrors verbosity inv IODataModeBinary return (normaliseLineEndings (fromUTF8LBS output'), errors, exitCode) +getProgramInvocationLBSAndErrors + :: Verbosity + -> ProgramInvocation + -> IO (LBS.ByteString, String, ExitCode) +getProgramInvocationLBSAndErrors verbosity inv = + getProgramInvocationIODataAndErrors verbosity inv IODataModeBinary + getProgramInvocationIODataAndErrors :: KnownIODataMode mode => Verbosity diff --git a/cabal-install-solver/cabal-install-solver.cabal b/cabal-install-solver/cabal-install-solver.cabal index fe440a78963..2f1e8d6bdf1 100644 --- a/cabal-install-solver/cabal-install-solver.cabal +++ b/cabal-install-solver/cabal-install-solver.cabal @@ -110,6 +110,7 @@ library , mtl >=2.0 && <2.4 , pretty ^>=1.1 , transformers >=0.4.2.0 && <0.7 + , text (>= 1.2.3.0 && < 1.3) || (>= 2.0 && < 2.2) if flag(debug-expensive-assertions) cpp-options: -DDEBUG_EXPENSIVE_ASSERTIONS diff --git a/cabal-install-solver/src/Distribution/Solver/Types/PkgConfigDb.hs b/cabal-install-solver/src/Distribution/Solver/Types/PkgConfigDb.hs index ee2f22032ca..21845eafdec 100644 --- a/cabal-install-solver/src/Distribution/Solver/Types/PkgConfigDb.hs +++ b/cabal-install-solver/src/Distribution/Solver/Types/PkgConfigDb.hs @@ -1,5 +1,6 @@ {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} ----------------------------------------------------------------------------- -- | -- Module : Distribution.Solver.Types.PkgConfigDb @@ -23,17 +24,23 @@ module Distribution.Solver.Types.PkgConfigDb import Distribution.Solver.Compat.Prelude import Prelude () -import Control.Exception (handle) -import Control.Monad (mapM) -import qualified Data.Map as M -import System.FilePath (splitSearchPath) +import Control.Exception (handle) +import Control.Monad (mapM) +import Data.ByteString (ByteString) +import qualified Data.ByteString.Lazy as LBS +import qualified Data.Map as M +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Encoding.Error as T +import System.FilePath (splitSearchPath) import Distribution.Compat.Environment (lookupEnv) import Distribution.Package (PkgconfigName, mkPkgconfigName) import Distribution.Parsec import Distribution.Simple.Program (ProgramDb, getProgramOutput, pkgConfigProgram, needProgram, ConfiguredProgram) -import Distribution.Simple.Program.Run (getProgramInvocationOutputAndErrors, programInvocation) +import Distribution.Simple.Program.Run + (getProgramInvocationOutputAndErrors, programInvocation, getProgramInvocationLBSAndErrors) import Distribution.Simple.Utils (info) import Distribution.Types.PkgconfigVersion import Distribution.Types.PkgconfigVersionRange @@ -63,10 +70,37 @@ readPkgConfigDb verbosity progdb = handle ioErrorHandler $ do case mpkgConfig of Nothing -> noPkgConfig "Cannot find pkg-config program" Just (pkgConfig, _) -> do - pkgList <- lines <$> getProgramOutput verbosity pkgConfig ["--list-all"] - -- The output of @pkg-config --list-all@ also includes a description - -- for each package, which we do not need. - let pkgNames = map (takeWhile (not . isSpace)) pkgList + -- To prevent malformed Unicode in the descriptions from crashing cabal, + -- read without interpreting any encoding first. (#9608) + (listAllOutput, listAllErrs, listAllExitcode) <- + getProgramInvocationLBSAndErrors verbosity (programInvocation pkgConfig ["--list-all"]) + when (listAllExitcode /= ExitSuccess) $ + ioError (userError ("pkg-config --list-all failed: " ++ listAllErrs)) + let pkgList = LBS.split (fromIntegral (ord '\n')) listAllOutput + -- Now decode the package *names* to a String. The ones where decoding + -- failed end up in 'failedPkgNames'. + let (failedPkgNames, pkgNames) = + partitionEithers + -- Drop empty package names. This will handle empty lines + -- in pkg-config's output, including the spurious one + -- after the last newline (because of LBS.split). + . filter (either (const True) (not . null)) + -- Try decoding strictly; if it fails, put the lenient + -- decoding in a Left for later reporting. + . map (\bsname -> + let sbsname = LBS.toStrict bsname + in case T.decodeUtf8' sbsname of + Left _ -> Left (T.unpack (decodeUtf8LenientCompat sbsname)) + Right name -> Right (T.unpack name)) + -- The output of @pkg-config --list-all@ also includes a + -- description for each package, which we do not need. + -- We don't use Data.Char.isSpace because that would also + -- include 0xA0, the non-breaking space, which can occur + -- in multi-byte UTF-8 sequences. + . map (LBS.takeWhile (not . isAsciiSpace)) + $ pkgList + when (not (null failedPkgNames)) $ + info verbosity ("Some pkg-config packages have names containing invalid unicode: " ++ intercalate ", " failedPkgNames) (outs, _errs, exitCode) <- getProgramInvocationOutputAndErrors verbosity (programInvocation pkgConfig ("--modversion" : pkgNames)) @@ -104,6 +138,15 @@ readPkgConfigDb verbosity progdb = handle ioErrorHandler $ do ExitSuccess -> Just (pkg, pkgVersion) _ -> Nothing + isAsciiSpace :: Word8 -> Bool + isAsciiSpace c = c `elem` map (fromIntegral . ord) " \t" + + -- The decodeUtf8Lenient function is defined starting with text-2.0.1; this + -- function simply reimplements it. When the minimum supported GHC version + -- is >= 9.4, switch to decodeUtf8Lenient. + decodeUtf8LenientCompat :: ByteString -> T.Text + decodeUtf8LenientCompat = T.decodeUtf8With T.lenientDecode + -- | Create a `PkgConfigDb` from a list of @(packageName, version)@ pairs. pkgConfigDbFromList :: [(String, String)] -> PkgConfigDb pkgConfigDbFromList pairs = (PkgConfigDb . M.fromList . map convert) pairs diff --git a/cabal-testsuite/PackageTests/ExtraProgPath/pkg-config b/cabal-testsuite/PackageTests/ExtraProgPath/pkg-config index 7c5fafbf0ca..195df7c2c41 100755 --- a/cabal-testsuite/PackageTests/ExtraProgPath/pkg-config +++ b/cabal-testsuite/PackageTests/ExtraProgPath/pkg-config @@ -1,3 +1,3 @@ -#!/usr/bin/sh +#!/bin/sh exit 1; diff --git a/cabal-testsuite/PackageTests/PkgConfigParse/MyLibrary.hs b/cabal-testsuite/PackageTests/PkgConfigParse/MyLibrary.hs new file mode 100644 index 00000000000..a51c414bcd2 --- /dev/null +++ b/cabal-testsuite/PackageTests/PkgConfigParse/MyLibrary.hs @@ -0,0 +1 @@ +module MyLibrary () where diff --git a/cabal-testsuite/PackageTests/PkgConfigParse/cabal.project b/cabal-testsuite/PackageTests/PkgConfigParse/cabal.project new file mode 100644 index 00000000000..5a93e28e878 --- /dev/null +++ b/cabal-testsuite/PackageTests/PkgConfigParse/cabal.project @@ -0,0 +1 @@ +packages: *.cabal \ No newline at end of file diff --git a/cabal-testsuite/PackageTests/PkgConfigParse/my.cabal b/cabal-testsuite/PackageTests/PkgConfigParse/my.cabal new file mode 100644 index 00000000000..38b7020b8a1 --- /dev/null +++ b/cabal-testsuite/PackageTests/PkgConfigParse/my.cabal @@ -0,0 +1,19 @@ +name: PkgConfigParse +version: 0.1 +license: BSD3 +author: Tom Smeding +maintainer: Tom Smeding +synopsis: Pkg Config Parse +category: PackageTests +build-type: Simple +cabal-version: 2.0 + +description: + Check that Cabal does not crash when pkg-config outputs invalid Unicode. + +Library + pkgconfig-depends: vpl + default-language: Haskell2010 + build-depends: base <5.0 + exposed-modules: + MyLibrary diff --git a/cabal-testsuite/PackageTests/PkgConfigParse/pkg-config b/cabal-testsuite/PackageTests/PkgConfigParse/pkg-config new file mode 100755 index 00000000000..183d08e0a00 --- /dev/null +++ b/cabal-testsuite/PackageTests/PkgConfigParse/pkg-config @@ -0,0 +1,49 @@ +#!/bin/sh + +set -eu + +# ugly, but "good enough" for this test +# This will need to be updated whenever cabal invokes pkg-config +# in new ways +case "$*" in + '--version') + echo 2.1.0 # whatever + ;; + + '--variable pc_path pkg-config') + echo '.' + ;; + + '--list-all') + printf 'zlib zlib - zlib compression library\n' + # \256 = \xAE is the iso-8859-1 (latin-1) encoded version of U+00AE, + # i.e. the "registered sign": ® + # This resulted in problems, see #9608 + printf 'vpl Intel\256 Video Processing Library - Accelerated video decode, encode, and frame processing capabilities on Intel\256 GPUs\n' + # \360 = \xF0 is latin-1 for ð; this is orð, Icelandic for "word"/"words". + printf 'or\360 Icelandic characters\n' + ;; + + '--modversion '*) + shift # drop the --modversion + for arg; do + case "$arg" in + zlib) echo 1.3; ;; # whatever + vpl) echo 2.10; ;; # whatever + # No entry for orð here; let's not even try to match on that + *) + echo >&2 "Package $arg was not found in the pkg-config search path." + exit 1 + esac + done + ;; + + # Ignore some stuff we're not implementing + '--cflags '*) ;; + '--libs '*) ;; + + *) + echo >&2 "pkg-config: unrecognised arguments $* (this is an incomplete shim)" + exit 1 + ;; +esac diff --git a/cabal-testsuite/PackageTests/PkgConfigParse/setup.out b/cabal-testsuite/PackageTests/PkgConfigParse/setup.out new file mode 100644 index 00000000000..92fd8204a40 --- /dev/null +++ b/cabal-testsuite/PackageTests/PkgConfigParse/setup.out @@ -0,0 +1 @@ +# cabal v2-build diff --git a/cabal-testsuite/PackageTests/PkgConfigParse/setup.test.hs b/cabal-testsuite/PackageTests/PkgConfigParse/setup.test.hs new file mode 100644 index 00000000000..0f860ab637a --- /dev/null +++ b/cabal-testsuite/PackageTests/PkgConfigParse/setup.test.hs @@ -0,0 +1,9 @@ +import Test.Cabal.Prelude + +-- Test that invalid unicode in pkg-config output doesn't trip up cabal very much +main = cabalTest $ do + -- skipped on windows because using a script to dummy up an executable doesn't work the same. + skipIfWindows + cdir <- testCurrentDir `fmap` getTestEnv + res <- cabal' "v2-build" ["--extra-prog-path="++cdir, "-v2"] + assertOutputContains "Some pkg-config packages have names containing invalid unicode: or" res diff --git a/changelog.d/pr-9609 b/changelog.d/pr-9609 new file mode 100644 index 00000000000..c156706ef03 --- /dev/null +++ b/changelog.d/pr-9609 @@ -0,0 +1,12 @@ +synopsis: Ignore invalid Unicode in pkg-config descriptions +packages: cabal-install-solver +prs: #9609 +issues: #9608 + +description: { + +Previously, cabal-install would crash when `pkg-config --list-all` contained +invalid Unicode. With this change, invalid unicode in package descriptions is +ignored, and unparseable package names are considered nonexistent. + +}