|
| 1 | +{-# OPTIONS_GHC -fno-warn-orphans #-} |
| 2 | +{-# LANGUAGE RecordWildCards #-} |
| 3 | +{-# LANGUAGE TupleSections #-} |
| 4 | + |
| 5 | +module HackageBenchmark ( |
| 6 | + hackageBenchmarkMain |
| 7 | + |
| 8 | +-- Exposed for testing: |
| 9 | + , CabalResult(..) |
| 10 | + , isSignificantTimeDifference |
| 11 | + , combineTrialResults |
| 12 | + , isSignificantResult |
| 13 | + , shouldContinueAfterFirstTrial |
| 14 | + ) where |
| 15 | + |
| 16 | +import Control.Monad (forM_, replicateM, unless, when) |
| 17 | +import qualified Data.ByteString as B |
| 18 | +import Data.List (nub, unzip4) |
| 19 | +import Data.Maybe (isJust) |
| 20 | +import Data.Monoid ((<>)) |
| 21 | +import Data.String (fromString) |
| 22 | +import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime) |
| 23 | +import qualified Data.Vector.Unboxed as V |
| 24 | +import Options.Applicative |
| 25 | +import Statistics.Sample (mean, stdDev) |
| 26 | +import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..) |
| 27 | + , mannWhitneyUCriticalValue |
| 28 | + , mannWhitneyUtest) |
| 29 | +import Statistics.Types (PValue, mkPValue) |
| 30 | +import System.Exit (ExitCode(..), exitFailure) |
| 31 | +import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr |
| 32 | + , stdout) |
| 33 | +import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess |
| 34 | + , createProcess, readProcess, shell, waitForProcess ) |
| 35 | +import Text.Printf (printf) |
| 36 | + |
| 37 | +import Distribution.Package (PackageName, mkPackageName, unPackageName) |
| 38 | + |
| 39 | +data Args = Args { |
| 40 | + argCabal1 :: FilePath |
| 41 | + , argCabal2 :: FilePath |
| 42 | + , argCabal1Flags :: [String] |
| 43 | + , argCabal2Flags :: [String] |
| 44 | + , argPackages :: [PackageName] |
| 45 | + , argMinRunTimeDifferenceToRerun :: Double |
| 46 | + , argPValue :: PValue Double |
| 47 | + , argTrials :: Int |
| 48 | + , argPrintTrials :: Bool |
| 49 | + , argPrintSkippedPackages :: Bool |
| 50 | + , argTimeoutSeconds :: Int |
| 51 | + } |
| 52 | + |
| 53 | +data CabalTrial = CabalTrial NominalDiffTime CabalResult |
| 54 | + |
| 55 | +data CabalResult |
| 56 | + = Solution |
| 57 | + | NoInstallPlan |
| 58 | + | BackjumpLimit |
| 59 | + | PkgNotFound |
| 60 | + | Timeout |
| 61 | + | Unknown |
| 62 | + deriving (Eq, Show) |
| 63 | + |
| 64 | +hackageBenchmarkMain :: IO () |
| 65 | +hackageBenchmarkMain = do |
| 66 | + hSetBuffering stdout LineBuffering |
| 67 | + args@Args {..} <- execParser parserInfo |
| 68 | + checkArgs args |
| 69 | + printConfig args |
| 70 | + pkgs <- getPackages args |
| 71 | + putStrLn "" |
| 72 | + |
| 73 | + let -- The maximum length of the heading and package names. |
| 74 | + nameColumnWidth :: Int |
| 75 | + nameColumnWidth = |
| 76 | + maximum $ map length $ "package" : map unPackageName pkgs |
| 77 | + runCabal1 = runCabal argTimeoutSeconds argCabal1 argCabal1Flags |
| 78 | + runCabal2 = runCabal argTimeoutSeconds argCabal2 argCabal2Flags |
| 79 | + |
| 80 | + -- When the output contains both trails and summaries, label each row as |
| 81 | + -- "trial" or "summary". |
| 82 | + when argPrintTrials $ putStr $ printf "%-16s " "trial/summary" |
| 83 | + putStrLn $ |
| 84 | + printf "%-*s %-13s %-13s %11s %11s %11s %11s %11s" |
| 85 | + nameColumnWidth "package" "result1" "result2" |
| 86 | + "mean1" "mean2" "stddev1" "stddev2" "speedup" |
| 87 | + |
| 88 | + forM_ pkgs $ \pkg -> do |
| 89 | + let printTrial msgType result1 result2 time1 time2 = |
| 90 | + putStrLn $ |
| 91 | + printf "%-16s %-*s %-13s %-13s %10.3fs %10.3fs" |
| 92 | + msgType nameColumnWidth (unPackageName pkg) |
| 93 | + (show result1) (show result2) |
| 94 | + (diffTimeToDouble time1) (diffTimeToDouble time2) |
| 95 | + |
| 96 | + CabalTrial t1 r1 <- runCabal1 pkg |
| 97 | + CabalTrial t2 r2 <- runCabal2 pkg |
| 98 | + if not $ |
| 99 | + shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2 |
| 100 | + then when argPrintSkippedPackages $ |
| 101 | + if argPrintTrials |
| 102 | + then printTrial "trial (skipping)" r1 r2 t1 t2 |
| 103 | + else putStrLn $ printf "%-*s (first run times were too similar)" |
| 104 | + nameColumnWidth (unPackageName pkg) |
| 105 | + else do |
| 106 | + when argPrintTrials $ printTrial "trial" r1 r2 t1 t2 |
| 107 | + (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>) |
| 108 | + . replicateM (argTrials - 1) $ do |
| 109 | + CabalTrial t1' r1' <- runCabal1 pkg |
| 110 | + CabalTrial t2' r2' <- runCabal2 pkg |
| 111 | + when argPrintTrials $ printTrial "trial" r1' r2' t1' t2' |
| 112 | + return (t1', t2', r1', r2') |
| 113 | + |
| 114 | + let result1 = combineTrialResults rs1 |
| 115 | + result2 = combineTrialResults rs2 |
| 116 | + times1 = V.fromList (map diffTimeToDouble ts1) |
| 117 | + times2 = V.fromList (map diffTimeToDouble ts2) |
| 118 | + mean1 = mean times1 |
| 119 | + mean2 = mean times2 |
| 120 | + stddev1 = stdDev times1 |
| 121 | + stddev2 = stdDev times2 |
| 122 | + speedup = mean1 / mean2 |
| 123 | + |
| 124 | + when argPrintTrials $ putStr $ printf "%-16s " "summary" |
| 125 | + if isSignificantResult result1 result2 |
| 126 | + || isSignificantTimeDifference argPValue ts1 ts2 |
| 127 | + then putStrLn $ |
| 128 | + printf "%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f" |
| 129 | + nameColumnWidth (unPackageName pkg) |
| 130 | + (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup |
| 131 | + else when (argPrintTrials || argPrintSkippedPackages) $ |
| 132 | + putStrLn $ |
| 133 | + printf "%-*s (not significant)" nameColumnWidth (unPackageName pkg) |
| 134 | + where |
| 135 | + checkArgs :: Args -> IO () |
| 136 | + checkArgs Args {..} = do |
| 137 | + let die msg = hPutStrLn stderr msg >> exitFailure |
| 138 | + unless (argTrials > 0) $ die "--trials must be greater than 0." |
| 139 | + unless (argMinRunTimeDifferenceToRerun >= 0) $ |
| 140 | + die "--min-run-time-percentage-difference-to-rerun must be non-negative." |
| 141 | + unless (isSampleLargeEnough argPValue argTrials) $ |
| 142 | + die "p-value is too small for the number of trials." |
| 143 | + |
| 144 | + printConfig :: Args -> IO () |
| 145 | + printConfig Args {..} = do |
| 146 | + putStrLn "Comparing:" |
| 147 | + putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags |
| 148 | + callProcess argCabal1 ["--version"] |
| 149 | + putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags |
| 150 | + callProcess argCabal2 ["--version"] |
| 151 | + -- TODO: Print index state. |
| 152 | + putStrLn "Base package database:" |
| 153 | + callProcess "ghc-pkg" ["list"] |
| 154 | + |
| 155 | + getPackages :: Args -> IO [PackageName] |
| 156 | + getPackages Args {..} = do |
| 157 | + pkgs <- |
| 158 | + if null argPackages |
| 159 | + then do |
| 160 | + putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..." |
| 161 | + list <- readProcess argCabal1 ["list", "--simple-output"] "" |
| 162 | + return $ nub [mkPackageName $ head (words line) | line <- lines list] |
| 163 | + else do |
| 164 | + putStrLn "Using given package list ..." |
| 165 | + return argPackages |
| 166 | + putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages." |
| 167 | + return pkgs |
| 168 | + |
| 169 | +runCabal :: Int -> FilePath -> [String] -> PackageName -> IO CabalTrial |
| 170 | +runCabal timeoutSeconds cabal flags pkg = do |
| 171 | + ((exitCode, err), time) <- timeEvent $ do |
| 172 | + let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds |
| 173 | + cabalCmd = |
| 174 | + unwords $ |
| 175 | + [cabal, "install", unPackageName pkg, "--dry-run", "-v0"] ++ flags |
| 176 | + cmd = (shell (timeout ++ " " ++ cabalCmd)) { std_err = CreatePipe } |
| 177 | + |
| 178 | + -- TODO: Read stdout and compare the install plans. |
| 179 | + (_, _, Just errh, ph) <- createProcess cmd |
| 180 | + err <- B.hGetContents errh |
| 181 | + (, err) <$> waitForProcess ph |
| 182 | + let exhaustiveMsg = |
| 183 | + "After searching the rest of the dependency tree exhaustively" |
| 184 | + result |
| 185 | + | exitCode == ExitSuccess = Solution |
| 186 | + | exitCode == ExitFailure 124 = Timeout |
| 187 | + | fromString exhaustiveMsg `B.isInfixOf` err = NoInstallPlan |
| 188 | + | fromString "Backjump limit reached" `B.isInfixOf` err = BackjumpLimit |
| 189 | + | fromString "There is no package named" `B.isInfixOf` err = PkgNotFound |
| 190 | + | otherwise = Unknown |
| 191 | + return (CabalTrial time result) |
| 192 | + |
| 193 | +isSampleLargeEnough :: PValue Double -> Int -> Bool |
| 194 | +isSampleLargeEnough pvalue trials = |
| 195 | + -- mannWhitneyUCriticalValue, which can fail with too few samples, is only |
| 196 | + -- used when both sample sizes are less than or equal to 20. |
| 197 | + trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue) |
| 198 | + |
| 199 | +isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool |
| 200 | +isSignificantTimeDifference pvalue xs ys = |
| 201 | + let toVector = V.fromList . map diffTimeToDouble |
| 202 | + in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of |
| 203 | + Nothing -> error "not enough data for mannWhitneyUtest" |
| 204 | + Just Significant -> True |
| 205 | + Just NotSignificant -> False |
| 206 | + |
| 207 | +-- Should we stop after the first trial of this package to save time? This |
| 208 | +-- function skips the package if the results are uninteresting and the times are |
| 209 | +-- within --min-run-time-percentage-difference-to-rerun. |
| 210 | +shouldContinueAfterFirstTrial :: Double |
| 211 | + -> NominalDiffTime |
| 212 | + -> NominalDiffTime |
| 213 | + -> CabalResult |
| 214 | + -> CabalResult |
| 215 | + -> Bool |
| 216 | +shouldContinueAfterFirstTrial 0 _ _ _ _ = True |
| 217 | +shouldContinueAfterFirstTrial _ _ _ Timeout Timeout = False |
| 218 | +shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2 = |
| 219 | + isSignificantResult r1 r2 |
| 220 | + || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100) |
| 221 | + |
| 222 | +isSignificantResult :: CabalResult -> CabalResult -> Bool |
| 223 | +isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1) |
| 224 | + |
| 225 | +-- Is this result expected in a benchmark run on all of Hackage? |
| 226 | +isExpectedResult :: CabalResult -> Bool |
| 227 | +isExpectedResult Solution = True |
| 228 | +isExpectedResult NoInstallPlan = True |
| 229 | +isExpectedResult BackjumpLimit = True |
| 230 | +isExpectedResult Timeout = True |
| 231 | +isExpectedResult PkgNotFound = False |
| 232 | +isExpectedResult Unknown = False |
| 233 | + |
| 234 | +-- Combine CabalResults from multiple trials. Ignoring timeouts, all results |
| 235 | +-- should be the same. If they aren't the same, we returns Unknown. |
| 236 | +combineTrialResults :: [CabalResult] -> CabalResult |
| 237 | +combineTrialResults rs |
| 238 | + | allEqual rs = head rs |
| 239 | + | allEqual [r | r <- rs, r /= Timeout] = Timeout |
| 240 | + | otherwise = Unknown |
| 241 | + where |
| 242 | + allEqual :: Eq a => [a] -> Bool |
| 243 | + allEqual xs = length (nub xs) == 1 |
| 244 | + |
| 245 | +timeEvent :: IO a -> IO (a, NominalDiffTime) |
| 246 | +timeEvent task = do |
| 247 | + start <- getCurrentTime |
| 248 | + r <- task |
| 249 | + end <- getCurrentTime |
| 250 | + return (r, diffUTCTime end start) |
| 251 | + |
| 252 | +diffTimeToDouble :: NominalDiffTime -> Double |
| 253 | +diffTimeToDouble = fromRational . toRational |
| 254 | + |
| 255 | +parserInfo :: ParserInfo Args |
| 256 | +parserInfo = info (argParser <**> helper) |
| 257 | + ( fullDesc |
| 258 | + <> progDesc ("Find differences between two cabal commands when solving" |
| 259 | + ++ " for all packages on Hackage.") |
| 260 | + <> header "hackage-benchmark" ) |
| 261 | + |
| 262 | +argParser :: Parser Args |
| 263 | +argParser = Args |
| 264 | + <$> strOption |
| 265 | + ( long "cabal1" |
| 266 | + <> metavar "PATH" |
| 267 | + <> help "First cabal executable") |
| 268 | + <*> strOption |
| 269 | + ( long "cabal2" |
| 270 | + <> metavar "PATH" |
| 271 | + <> help "Second cabal executable") |
| 272 | + <*> option (words <$> str) |
| 273 | + ( long "cabal1-flags" |
| 274 | + <> value [] |
| 275 | + <> metavar "FLAGS" |
| 276 | + <> help "Extra flags for the first cabal executable") |
| 277 | + <*> option (words <$> str) |
| 278 | + ( long "cabal2-flags" |
| 279 | + <> value [] |
| 280 | + <> metavar "FLAGS" |
| 281 | + <> help "Extra flags for the second cabal executable") |
| 282 | + <*> option (map mkPackageName . words <$> str) |
| 283 | + ( long "packages" |
| 284 | + <> value [] |
| 285 | + <> metavar "PACKAGES" |
| 286 | + <> help ("Space separated list of packages to test, or all of Hackage" |
| 287 | + ++ " if unspecified")) |
| 288 | + <*> option auto |
| 289 | + ( long "min-run-time-percentage-difference-to-rerun" |
| 290 | + <> showDefault |
| 291 | + <> value 0.0 |
| 292 | + <> metavar "PERCENTAGE" |
| 293 | + <> help ("Stop testing a package when the difference in run times in" |
| 294 | + ++ " the first trial are within this percentage, in order to" |
| 295 | + ++ " save time")) |
| 296 | + <*> option (mkPValue <$> auto) |
| 297 | + ( long "pvalue" |
| 298 | + <> showDefault |
| 299 | + <> value (mkPValue 0.05) |
| 300 | + <> metavar "DOUBLE" |
| 301 | + <> help ("p-value used to determine whether to print the results for" |
| 302 | + ++ " each package")) |
| 303 | + <*> option auto |
| 304 | + ( long "trials" |
| 305 | + <> showDefault |
| 306 | + <> value 10 |
| 307 | + <> metavar "N" |
| 308 | + <> help "Number of trials for each package") |
| 309 | + <*> switch |
| 310 | + ( long "print-trials" |
| 311 | + <> help "Whether to include the results from individual trials in the output") |
| 312 | + <*> switch |
| 313 | + ( long "print-skipped-packages" |
| 314 | + <> help "Whether to include skipped packages in the output") |
| 315 | + <*> option auto |
| 316 | + ( long "timeout" |
| 317 | + <> showDefault |
| 318 | + <> value 90 |
| 319 | + <> metavar "SECONDS" |
| 320 | + <> help "Maximum time to run a cabal command, in seconds") |
0 commit comments