Skip to content

Commit 267efc8

Browse files
authored
Merge pull request #4674 from grayjay/solver-hackage-benchmark
Start adding a benchmark that solves for all packages on Hackage.
2 parents 5377e10 + 384bb92 commit 267efc8

10 files changed

+529
-2
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ matrix:
4747
- env: GHCVER=7.10.3 SCRIPT=script USE_GOLD=YES
4848
os: linux
4949
sudo: required
50-
- env: GHCVER=8.0.2 SCRIPT=script DEPLOY_DOCS=YES USE_GOLD=YES
50+
- env: GHCVER=8.0.2 SCRIPT=script DEPLOY_DOCS=YES USE_GOLD=YES TEST_SOLVER_BENCHMARKS=YES
5151
sudo: required
5252
os: linux
5353

cabal.project

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
packages: Cabal/ cabal-testsuite/ cabal-install/
1+
packages: Cabal/ cabal-testsuite/ cabal-install/ solver-benchmarks/
22
constraints: unix >= 2.7.1.0,
33
cabal-install +lib +monolithic
44

solver-benchmarks/HackageBenchmark.hs

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
{-# OPTIONS_GHC -fno-warn-orphans #-}
2+
{-# LANGUAGE RecordWildCards #-}
3+
{-# LANGUAGE TupleSections #-}
4+
5+
module HackageBenchmark (
6+
hackageBenchmarkMain
7+
8+
-- Exposed for testing:
9+
, CabalResult(..)
10+
, isSignificantTimeDifference
11+
, combineTrialResults
12+
, isSignificantResult
13+
, shouldContinueAfterFirstTrial
14+
) where
15+
16+
import Control.Monad (forM_, replicateM, unless, when)
17+
import qualified Data.ByteString as B
18+
import Data.List (nub, unzip4)
19+
import Data.Maybe (isJust)
20+
import Data.Monoid ((<>))
21+
import Data.String (fromString)
22+
import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
23+
import qualified Data.Vector.Unboxed as V
24+
import Options.Applicative
25+
import Statistics.Sample (mean, stdDev)
26+
import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
27+
, mannWhitneyUCriticalValue
28+
, mannWhitneyUtest)
29+
import Statistics.Types (PValue, mkPValue)
30+
import System.Exit (ExitCode(..), exitFailure)
31+
import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
32+
, stdout)
33+
import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
34+
, createProcess, readProcess, shell, waitForProcess )
35+
import Text.Printf (printf)
36+
37+
import Distribution.Package (PackageName, mkPackageName, unPackageName)
38+
39+
data Args = Args {
40+
argCabal1 :: FilePath
41+
, argCabal2 :: FilePath
42+
, argCabal1Flags :: [String]
43+
, argCabal2Flags :: [String]
44+
, argPackages :: [PackageName]
45+
, argMinRunTimeDifferenceToRerun :: Double
46+
, argPValue :: PValue Double
47+
, argTrials :: Int
48+
, argPrintTrials :: Bool
49+
, argPrintSkippedPackages :: Bool
50+
, argTimeoutSeconds :: Int
51+
}
52+
53+
data CabalTrial = CabalTrial NominalDiffTime CabalResult
54+
55+
data CabalResult
56+
= Solution
57+
| NoInstallPlan
58+
| BackjumpLimit
59+
| PkgNotFound
60+
| Timeout
61+
| Unknown
62+
deriving (Eq, Show)
63+
64+
hackageBenchmarkMain :: IO ()
65+
hackageBenchmarkMain = do
66+
hSetBuffering stdout LineBuffering
67+
args@Args {..} <- execParser parserInfo
68+
checkArgs args
69+
printConfig args
70+
pkgs <- getPackages args
71+
putStrLn ""
72+
73+
let -- The maximum length of the heading and package names.
74+
nameColumnWidth :: Int
75+
nameColumnWidth =
76+
maximum $ map length $ "package" : map unPackageName pkgs
77+
runCabal1 = runCabal argTimeoutSeconds argCabal1 argCabal1Flags
78+
runCabal2 = runCabal argTimeoutSeconds argCabal2 argCabal2Flags
79+
80+
-- When the output contains both trails and summaries, label each row as
81+
-- "trial" or "summary".
82+
when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
83+
putStrLn $
84+
printf "%-*s %-13s %-13s %11s %11s %11s %11s %11s"
85+
nameColumnWidth "package" "result1" "result2"
86+
"mean1" "mean2" "stddev1" "stddev2" "speedup"
87+
88+
forM_ pkgs $ \pkg -> do
89+
let printTrial msgType result1 result2 time1 time2 =
90+
putStrLn $
91+
printf "%-16s %-*s %-13s %-13s %10.3fs %10.3fs"
92+
msgType nameColumnWidth (unPackageName pkg)
93+
(show result1) (show result2)
94+
(diffTimeToDouble time1) (diffTimeToDouble time2)
95+
96+
CabalTrial t1 r1 <- runCabal1 pkg
97+
CabalTrial t2 r2 <- runCabal2 pkg
98+
if not $
99+
shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
100+
then when argPrintSkippedPackages $
101+
if argPrintTrials
102+
then printTrial "trial (skipping)" r1 r2 t1 t2
103+
else putStrLn $ printf "%-*s (first run times were too similar)"
104+
nameColumnWidth (unPackageName pkg)
105+
else do
106+
when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
107+
(ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
108+
. replicateM (argTrials - 1) $ do
109+
CabalTrial t1' r1' <- runCabal1 pkg
110+
CabalTrial t2' r2' <- runCabal2 pkg
111+
when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
112+
return (t1', t2', r1', r2')
113+
114+
let result1 = combineTrialResults rs1
115+
result2 = combineTrialResults rs2
116+
times1 = V.fromList (map diffTimeToDouble ts1)
117+
times2 = V.fromList (map diffTimeToDouble ts2)
118+
mean1 = mean times1
119+
mean2 = mean times2
120+
stddev1 = stdDev times1
121+
stddev2 = stdDev times2
122+
speedup = mean1 / mean2
123+
124+
when argPrintTrials $ putStr $ printf "%-16s " "summary"
125+
if isSignificantResult result1 result2
126+
|| isSignificantTimeDifference argPValue ts1 ts2
127+
then putStrLn $
128+
printf "%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
129+
nameColumnWidth (unPackageName pkg)
130+
(show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
131+
else when (argPrintTrials || argPrintSkippedPackages) $
132+
putStrLn $
133+
printf "%-*s (not significant)" nameColumnWidth (unPackageName pkg)
134+
where
135+
checkArgs :: Args -> IO ()
136+
checkArgs Args {..} = do
137+
let die msg = hPutStrLn stderr msg >> exitFailure
138+
unless (argTrials > 0) $ die "--trials must be greater than 0."
139+
unless (argMinRunTimeDifferenceToRerun >= 0) $
140+
die "--min-run-time-percentage-difference-to-rerun must be non-negative."
141+
unless (isSampleLargeEnough argPValue argTrials) $
142+
die "p-value is too small for the number of trials."
143+
144+
printConfig :: Args -> IO ()
145+
printConfig Args {..} = do
146+
putStrLn "Comparing:"
147+
putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
148+
callProcess argCabal1 ["--version"]
149+
putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
150+
callProcess argCabal2 ["--version"]
151+
-- TODO: Print index state.
152+
putStrLn "Base package database:"
153+
callProcess "ghc-pkg" ["list"]
154+
155+
getPackages :: Args -> IO [PackageName]
156+
getPackages Args {..} = do
157+
pkgs <-
158+
if null argPackages
159+
then do
160+
putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
161+
list <- readProcess argCabal1 ["list", "--simple-output"] ""
162+
return $ nub [mkPackageName $ head (words line) | line <- lines list]
163+
else do
164+
putStrLn "Using given package list ..."
165+
return argPackages
166+
putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
167+
return pkgs
168+
169+
runCabal :: Int -> FilePath -> [String] -> PackageName -> IO CabalTrial
170+
runCabal timeoutSeconds cabal flags pkg = do
171+
((exitCode, err), time) <- timeEvent $ do
172+
let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
173+
cabalCmd =
174+
unwords $
175+
[cabal, "install", unPackageName pkg, "--dry-run", "-v0"] ++ flags
176+
cmd = (shell (timeout ++ " " ++ cabalCmd)) { std_err = CreatePipe }
177+
178+
-- TODO: Read stdout and compare the install plans.
179+
(_, _, Just errh, ph) <- createProcess cmd
180+
err <- B.hGetContents errh
181+
(, err) <$> waitForProcess ph
182+
let exhaustiveMsg =
183+
"After searching the rest of the dependency tree exhaustively"
184+
result
185+
| exitCode == ExitSuccess = Solution
186+
| exitCode == ExitFailure 124 = Timeout
187+
| fromString exhaustiveMsg `B.isInfixOf` err = NoInstallPlan
188+
| fromString "Backjump limit reached" `B.isInfixOf` err = BackjumpLimit
189+
| fromString "There is no package named" `B.isInfixOf` err = PkgNotFound
190+
| otherwise = Unknown
191+
return (CabalTrial time result)
192+
193+
isSampleLargeEnough :: PValue Double -> Int -> Bool
194+
isSampleLargeEnough pvalue trials =
195+
-- mannWhitneyUCriticalValue, which can fail with too few samples, is only
196+
-- used when both sample sizes are less than or equal to 20.
197+
trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
198+
199+
isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
200+
isSignificantTimeDifference pvalue xs ys =
201+
let toVector = V.fromList . map diffTimeToDouble
202+
in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
203+
Nothing -> error "not enough data for mannWhitneyUtest"
204+
Just Significant -> True
205+
Just NotSignificant -> False
206+
207+
-- Should we stop after the first trial of this package to save time? This
208+
-- function skips the package if the results are uninteresting and the times are
209+
-- within --min-run-time-percentage-difference-to-rerun.
210+
shouldContinueAfterFirstTrial :: Double
211+
-> NominalDiffTime
212+
-> NominalDiffTime
213+
-> CabalResult
214+
-> CabalResult
215+
-> Bool
216+
shouldContinueAfterFirstTrial 0 _ _ _ _ = True
217+
shouldContinueAfterFirstTrial _ _ _ Timeout Timeout = False
218+
shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2 =
219+
isSignificantResult r1 r2
220+
|| abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
221+
222+
isSignificantResult :: CabalResult -> CabalResult -> Bool
223+
isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
224+
225+
-- Is this result expected in a benchmark run on all of Hackage?
226+
isExpectedResult :: CabalResult -> Bool
227+
isExpectedResult Solution = True
228+
isExpectedResult NoInstallPlan = True
229+
isExpectedResult BackjumpLimit = True
230+
isExpectedResult Timeout = True
231+
isExpectedResult PkgNotFound = False
232+
isExpectedResult Unknown = False
233+
234+
-- Combine CabalResults from multiple trials. Ignoring timeouts, all results
235+
-- should be the same. If they aren't the same, we returns Unknown.
236+
combineTrialResults :: [CabalResult] -> CabalResult
237+
combineTrialResults rs
238+
| allEqual rs = head rs
239+
| allEqual [r | r <- rs, r /= Timeout] = Timeout
240+
| otherwise = Unknown
241+
where
242+
allEqual :: Eq a => [a] -> Bool
243+
allEqual xs = length (nub xs) == 1
244+
245+
timeEvent :: IO a -> IO (a, NominalDiffTime)
246+
timeEvent task = do
247+
start <- getCurrentTime
248+
r <- task
249+
end <- getCurrentTime
250+
return (r, diffUTCTime end start)
251+
252+
diffTimeToDouble :: NominalDiffTime -> Double
253+
diffTimeToDouble = fromRational . toRational
254+
255+
parserInfo :: ParserInfo Args
256+
parserInfo = info (argParser <**> helper)
257+
( fullDesc
258+
<> progDesc ("Find differences between two cabal commands when solving"
259+
++ " for all packages on Hackage.")
260+
<> header "hackage-benchmark" )
261+
262+
argParser :: Parser Args
263+
argParser = Args
264+
<$> strOption
265+
( long "cabal1"
266+
<> metavar "PATH"
267+
<> help "First cabal executable")
268+
<*> strOption
269+
( long "cabal2"
270+
<> metavar "PATH"
271+
<> help "Second cabal executable")
272+
<*> option (words <$> str)
273+
( long "cabal1-flags"
274+
<> value []
275+
<> metavar "FLAGS"
276+
<> help "Extra flags for the first cabal executable")
277+
<*> option (words <$> str)
278+
( long "cabal2-flags"
279+
<> value []
280+
<> metavar "FLAGS"
281+
<> help "Extra flags for the second cabal executable")
282+
<*> option (map mkPackageName . words <$> str)
283+
( long "packages"
284+
<> value []
285+
<> metavar "PACKAGES"
286+
<> help ("Space separated list of packages to test, or all of Hackage"
287+
++ " if unspecified"))
288+
<*> option auto
289+
( long "min-run-time-percentage-difference-to-rerun"
290+
<> showDefault
291+
<> value 0.0
292+
<> metavar "PERCENTAGE"
293+
<> help ("Stop testing a package when the difference in run times in"
294+
++ " the first trial are within this percentage, in order to"
295+
++ " save time"))
296+
<*> option (mkPValue <$> auto)
297+
( long "pvalue"
298+
<> showDefault
299+
<> value (mkPValue 0.05)
300+
<> metavar "DOUBLE"
301+
<> help ("p-value used to determine whether to print the results for"
302+
++ " each package"))
303+
<*> option auto
304+
( long "trials"
305+
<> showDefault
306+
<> value 10
307+
<> metavar "N"
308+
<> help "Number of trials for each package")
309+
<*> switch
310+
( long "print-trials"
311+
<> help "Whether to include the results from individual trials in the output")
312+
<*> switch
313+
( long "print-skipped-packages"
314+
<> help "Whether to include skipped packages in the output")
315+
<*> option auto
316+
( long "timeout"
317+
<> showDefault
318+
<> value 90
319+
<> metavar "SECONDS"
320+
<> help "Maximum time to run a cabal command, in seconds")

solver-benchmarks/LICENSE

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
Copyright (c) 2003-2017, Cabal Development Team.
2+
See the AUTHORS file for the full list of copyright holders.
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are
7+
met:
8+
9+
* Redistributions of source code must retain the above copyright
10+
notice, this list of conditions and the following disclaimer.
11+
12+
* Redistributions in binary form must reproduce the above
13+
copyright notice, this list of conditions and the following
14+
disclaimer in the documentation and/or other materials provided
15+
with the distribution.
16+
17+
* Neither the name of Isaac Jones nor the names of other
18+
contributors may be used to endorse or promote products derived
19+
from this software without specific prior written permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

solver-benchmarks/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Dependency Solver Benchmarks
2+
============================
3+
4+
hackage-benchmark
5+
-----------------
6+
7+
The goal of this benchmark is to find examples of packages that show a
8+
difference in behavior between two versions of cabal. It doesn't try
9+
to determine which version of cabal performs better.
10+
11+
`hackage-benchmark` compares two `cabal` commands by running each one
12+
on each package in a list. The list is either the package index or a
13+
list of packages provided on the command line. In order to save time,
14+
the benchmark initially only runs one trial for each package. If the
15+
results (solution, no solution, timeout, etc.) are the same and the
16+
times are too similar, it skips the package. Otherwise, it runs more
17+
trials and prints the results if they are significant.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import HackageBenchmark
2+
3+
main :: IO ()
4+
main = hackageBenchmarkMain

0 commit comments

Comments
 (0)