Skip to content

Commit d4dff63

Browse files
tomsmedingmergify[bot]
authored andcommitted
Ignore invalid Unicode in pkg-config descriptions (#9609)
* Ignore invalid Unicode in pkg-config descriptions Previously, if any of the pkg-config packages on the system had invalid Unicode in their description fields (like the Intel vpl package has at the time of writing, 2024-01-11, see #9608), cabal would crash because it tried to interpret the entire `pkg-config --list-all` output as Unicode. This change, as suggested by gbaz in #9608 (comment) switches to using a lazy ByteString for reading in the output, splitting on the first space in byte land, and then parsing only the package _name_ to a String. For further future-proofing, package names that don't parse as valid Unicode don't crash Cabal, but are instead ignored. * Add changelog entry * cabal-install-solver: Add bounds on 'text' * No literal ASCII values, use 'ord' * Address review comments re invalid unicode from pkg-config * Add test for invalid unicode from pkg-config * Compatibility with text-1.2.5.0 * Align imports * Handle different exception type * Use only POSIX shell syntax * Add invalid-input handler in pkg-config shim This is to appease shellcheck * Actually implement all required stuff in the pkg-config shim * Less exception dance * Fix shebang lines MacOS doesn't have /usr/bin/sh, and /bin/sh is the standard (for a POSIX shell) anyway * Don't expect a particular representation of invalid characters --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> (cherry picked from commit 0b34b4e)
1 parent e5a204d commit d4dff63

File tree

11 files changed

+166
-24
lines changed

11 files changed

+166
-24
lines changed

Cabal/src/Distribution/Simple/Program/Run.hs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,19 @@
1313
-- This module provides a data type for program invocations and functions to
1414
-- run them.
1515

16-
module Distribution.Simple.Program.Run (
17-
ProgramInvocation(..),
18-
IOEncoding(..),
19-
emptyProgramInvocation,
20-
simpleProgramInvocation,
21-
programInvocation,
22-
multiStageProgramInvocation,
23-
24-
runProgramInvocation,
25-
getProgramInvocationOutput,
26-
getProgramInvocationLBS,
27-
getProgramInvocationOutputAndErrors,
28-
29-
getEffectiveEnvironment,
16+
module Distribution.Simple.Program.Run
17+
( ProgramInvocation (..)
18+
, IOEncoding (..)
19+
, emptyProgramInvocation
20+
, simpleProgramInvocation
21+
, programInvocation
22+
, multiStageProgramInvocation
23+
, runProgramInvocation
24+
, getProgramInvocationOutput
25+
, getProgramInvocationLBS
26+
, getProgramInvocationOutputAndErrors
27+
, getProgramInvocationLBSAndErrors
28+
, getEffectiveEnvironment
3029
) where
3130

3231
import Distribution.Compat.Prelude
@@ -164,6 +163,13 @@ getProgramInvocationOutputAndErrors verbosity inv = case progInvokeOutputEncodin
164163
(output', errors, exitCode) <- getProgramInvocationIODataAndErrors verbosity inv IODataModeBinary
165164
return (normaliseLineEndings (fromUTF8LBS output'), errors, exitCode)
166165

166+
getProgramInvocationLBSAndErrors
167+
:: Verbosity
168+
-> ProgramInvocation
169+
-> IO (LBS.ByteString, String, ExitCode)
170+
getProgramInvocationLBSAndErrors verbosity inv =
171+
getProgramInvocationIODataAndErrors verbosity inv IODataModeBinary
172+
167173
getProgramInvocationIODataAndErrors
168174
:: KnownIODataMode mode => Verbosity -> ProgramInvocation -> IODataMode mode
169175
-> IO (mode, String, ExitCode)

cabal-install-solver/cabal-install-solver.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ library
115115
, mtl >=2.0 && <2.4
116116
, pretty ^>=1.1
117117
, transformers >=0.4.2.0 && <0.7
118+
, text (>= 1.2.3.0 && < 1.3) || (>= 2.0 && < 2.2)
118119

119120
if flag(debug-expensive-assertions)
120121
cpp-options: -DDEBUG_EXPENSIVE_ASSERTIONS

cabal-install-solver/src/Distribution/Solver/Types/PkgConfigDb.hs

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{-# LANGUAGE DeriveDataTypeable #-}
22
{-# LANGUAGE DeriveGeneric #-}
3+
{-# LANGUAGE LambdaCase #-}
34
-----------------------------------------------------------------------------
45
-- |
56
-- Module : Distribution.Solver.Types.PkgConfigDb
@@ -23,17 +24,23 @@ module Distribution.Solver.Types.PkgConfigDb
2324
import Distribution.Solver.Compat.Prelude
2425
import Prelude ()
2526

26-
import Control.Exception (handle)
27-
import Control.Monad (mapM)
28-
import qualified Data.Map as M
29-
import System.FilePath (splitSearchPath)
27+
import Control.Exception (handle)
28+
import Control.Monad (mapM)
29+
import Data.ByteString (ByteString)
30+
import qualified Data.ByteString.Lazy as LBS
31+
import qualified Data.Map as M
32+
import qualified Data.Text as T
33+
import qualified Data.Text.Encoding as T
34+
import qualified Data.Text.Encoding.Error as T
35+
import System.FilePath (splitSearchPath)
3036

3137
import Distribution.Compat.Environment (lookupEnv)
3238
import Distribution.Package (PkgconfigName, mkPkgconfigName)
3339
import Distribution.Parsec
3440
import Distribution.Simple.Program
3541
(ProgramDb, getProgramOutput, pkgConfigProgram, needProgram, ConfiguredProgram)
36-
import Distribution.Simple.Program.Run (getProgramInvocationOutputAndErrors, programInvocation)
42+
import Distribution.Simple.Program.Run
43+
(getProgramInvocationOutputAndErrors, programInvocation, getProgramInvocationLBSAndErrors)
3744
import Distribution.Simple.Utils (info)
3845
import Distribution.Types.PkgconfigVersion
3946
import Distribution.Types.PkgconfigVersionRange
@@ -63,10 +70,37 @@ readPkgConfigDb verbosity progdb = handle ioErrorHandler $ do
6370
case mpkgConfig of
6471
Nothing -> noPkgConfig "Cannot find pkg-config program"
6572
Just (pkgConfig, _) -> do
66-
pkgList <- lines <$> getProgramOutput verbosity pkgConfig ["--list-all"]
67-
-- The output of @pkg-config --list-all@ also includes a description
68-
-- for each package, which we do not need.
69-
let pkgNames = map (takeWhile (not . isSpace)) pkgList
73+
-- To prevent malformed Unicode in the descriptions from crashing cabal,
74+
-- read without interpreting any encoding first. (#9608)
75+
(listAllOutput, listAllErrs, listAllExitcode) <-
76+
getProgramInvocationLBSAndErrors verbosity (programInvocation pkgConfig ["--list-all"])
77+
when (listAllExitcode /= ExitSuccess) $
78+
ioError (userError ("pkg-config --list-all failed: " ++ listAllErrs))
79+
let pkgList = LBS.split (fromIntegral (ord '\n')) listAllOutput
80+
-- Now decode the package *names* to a String. The ones where decoding
81+
-- failed end up in 'failedPkgNames'.
82+
let (failedPkgNames, pkgNames) =
83+
partitionEithers
84+
-- Drop empty package names. This will handle empty lines
85+
-- in pkg-config's output, including the spurious one
86+
-- after the last newline (because of LBS.split).
87+
. filter (either (const True) (not . null))
88+
-- Try decoding strictly; if it fails, put the lenient
89+
-- decoding in a Left for later reporting.
90+
. map (\bsname ->
91+
let sbsname = LBS.toStrict bsname
92+
in case T.decodeUtf8' sbsname of
93+
Left _ -> Left (T.unpack (decodeUtf8LenientCompat sbsname))
94+
Right name -> Right (T.unpack name))
95+
-- The output of @pkg-config --list-all@ also includes a
96+
-- description for each package, which we do not need.
97+
-- We don't use Data.Char.isSpace because that would also
98+
-- include 0xA0, the non-breaking space, which can occur
99+
-- in multi-byte UTF-8 sequences.
100+
. map (LBS.takeWhile (not . isAsciiSpace))
101+
$ pkgList
102+
when (not (null failedPkgNames)) $
103+
info verbosity ("Some pkg-config packages have names containing invalid unicode: " ++ intercalate ", " failedPkgNames)
70104
(outs, _errs, exitCode) <-
71105
getProgramInvocationOutputAndErrors verbosity
72106
(programInvocation pkgConfig ("--modversion" : pkgNames))
@@ -104,6 +138,15 @@ readPkgConfigDb verbosity progdb = handle ioErrorHandler $ do
104138
ExitSuccess -> Just (pkg, pkgVersion)
105139
_ -> Nothing
106140

141+
isAsciiSpace :: Word8 -> Bool
142+
isAsciiSpace c = c `elem` map (fromIntegral . ord) " \t"
143+
144+
-- The decodeUtf8Lenient function is defined starting with text-2.0.1; this
145+
-- function simply reimplements it. When the minimum supported GHC version
146+
-- is >= 9.4, switch to decodeUtf8Lenient.
147+
decodeUtf8LenientCompat :: ByteString -> T.Text
148+
decodeUtf8LenientCompat = T.decodeUtf8With T.lenientDecode
149+
107150
-- | Create a `PkgConfigDb` from a list of @(packageName, version)@ pairs.
108151
pkgConfigDbFromList :: [(String, String)] -> PkgConfigDb
109152
pkgConfigDbFromList pairs = (PkgConfigDb . M.fromList . map convert) pairs
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
#!/usr/bin/sh
1+
#!/bin/sh
22

33
exit 1;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
module MyLibrary () where
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
packages: *.cabal
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: PkgConfigParse
2+
version: 0.1
3+
license: BSD3
4+
author: Tom Smeding
5+
maintainer: Tom Smeding
6+
synopsis: Pkg Config Parse
7+
category: PackageTests
8+
build-type: Simple
9+
cabal-version: 2.0
10+
11+
description:
12+
Check that Cabal does not crash when pkg-config outputs invalid Unicode.
13+
14+
Library
15+
pkgconfig-depends: vpl
16+
default-language: Haskell2010
17+
build-depends: base <5.0
18+
exposed-modules:
19+
MyLibrary
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/bin/sh
2+
3+
set -eu
4+
5+
# ugly, but "good enough" for this test
6+
# This will need to be updated whenever cabal invokes pkg-config
7+
# in new ways
8+
case "$*" in
9+
'--version')
10+
echo 2.1.0 # whatever
11+
;;
12+
13+
'--variable pc_path pkg-config')
14+
echo '.'
15+
;;
16+
17+
'--list-all')
18+
printf 'zlib zlib - zlib compression library\n'
19+
# \256 = \xAE is the iso-8859-1 (latin-1) encoded version of U+00AE,
20+
# i.e. the "registered sign": ®
21+
# This resulted in problems, see #9608
22+
printf 'vpl Intel\256 Video Processing Library - Accelerated video decode, encode, and frame processing capabilities on Intel\256 GPUs\n'
23+
# \360 = \xF0 is latin-1 for ð; this is orð, Icelandic for "word"/"words".
24+
printf 'or\360 Icelandic characters\n'
25+
;;
26+
27+
'--modversion '*)
28+
shift # drop the --modversion
29+
for arg; do
30+
case "$arg" in
31+
zlib) echo 1.3; ;; # whatever
32+
vpl) echo 2.10; ;; # whatever
33+
# No entry for orð here; let's not even try to match on that
34+
*)
35+
echo >&2 "Package $arg was not found in the pkg-config search path."
36+
exit 1
37+
esac
38+
done
39+
;;
40+
41+
# Ignore some stuff we're not implementing
42+
'--cflags '*) ;;
43+
'--libs '*) ;;
44+
45+
*)
46+
echo >&2 "pkg-config: unrecognised arguments $* (this is an incomplete shim)"
47+
exit 1
48+
;;
49+
esac
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# cabal v2-build
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import Test.Cabal.Prelude
2+
3+
-- Test that invalid unicode in pkg-config output doesn't trip up cabal very much
4+
main = cabalTest $ do
5+
-- skipped on windows because using a script to dummy up an executable doesn't work the same.
6+
skipIfWindows
7+
cdir <- testCurrentDir `fmap` getTestEnv
8+
res <- cabal' "v2-build" ["--extra-prog-path="++cdir, "-v2"]
9+
assertOutputContains "Some pkg-config packages have names containing invalid unicode: or" res

changelog.d/pr-9609

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
synopsis: Ignore invalid Unicode in pkg-config descriptions
2+
packages: cabal-install-solver
3+
prs: #9609
4+
issues: #9608
5+
6+
description: {
7+
8+
Previously, cabal-install would crash when `pkg-config --list-all` contained
9+
invalid Unicode. With this change, invalid unicode in package descriptions is
10+
ignored, and unparseable package names are considered nonexistent.
11+
12+
}

0 commit comments

Comments
 (0)