haskell
diff --git a/‎.travis.yml
Lines changed: 1 addition & 1 deletion b/‎.travis.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎cabal.project
Lines changed: 1 addition & 1 deletion b/‎cabal.project
Lines changed: 1 addition & 1 deletion
diff --git a/‎solver-benchmarks/HackageBenchmark.hs
Lines changed: 320 additions & 0 deletions b/‎solver-benchmarks/HackageBenchmark.hs
Lines changed: 320 additions & 0 deletions
diff --git a/‎solver-benchmarks/LICENSE
Lines changed: 31 additions & 0 deletions b/‎solver-benchmarks/LICENSE
Lines changed: 31 additions & 0 deletions
diff --git a/‎solver-benchmarks/README.md
Lines changed: 17 additions & 0 deletions b/‎solver-benchmarks/README.md
Lines changed: 17 additions & 0 deletions
diff --git a/‎solver-benchmarks/main/hackage-benchmark.hs
Lines changed: 4 additions & 0 deletions b/‎solver-benchmarks/main/hackage-benchmark.hs
Lines changed: 4 additions & 0 deletions
@@ -47,7 +47,7 @@ matrix:
    - env: GHCVER=7.10.3 SCRIPT=script USE_GOLD=YES
      os: linux
      sudo: required
-   - env: GHCVER=8.0.2 SCRIPT=script DEPLOY_DOCS=YES USE_GOLD=YES
+   - env: GHCVER=8.0.2 SCRIPT=script DEPLOY_DOCS=YES USE_GOLD=YES TEST_SOLVER_BENCHMARKS=YES
      sudo: required
      os: linux
 
 
@@ -1,4 +1,4 @@
-packages: Cabal/ cabal-testsuite/ cabal-install/
+packages: Cabal/ cabal-testsuite/ cabal-install/ solver-benchmarks/
 constraints: unix >= 2.7.1.0,
              cabal-install +lib +monolithic
 
 
@@ -0,0 +1,320 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+
+module HackageBenchmark (
+    hackageBenchmarkMain
+
+-- Exposed for testing:
+  , CabalResult(..)
+  , isSignificantTimeDifference
+  , combineTrialResults
+  , isSignificantResult
+  , shouldContinueAfterFirstTrial
+  ) where
+
+import Control.Monad (forM_, replicateM, unless, when)
+import qualified Data.ByteString as B
+import Data.List (nub, unzip4)
+import Data.Maybe (isJust)
+import Data.Monoid ((<>))
+import Data.String (fromString)
+import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
+import qualified Data.Vector.Unboxed as V
+import Options.Applicative
+import Statistics.Sample (mean, stdDev)
+import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
+                                    , mannWhitneyUCriticalValue
+                                    , mannWhitneyUtest)
+import Statistics.Types (PValue, mkPValue)
+import System.Exit (ExitCode(..), exitFailure)
+import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
+                 , stdout)
+import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
+                      , createProcess, readProcess, shell, waitForProcess )
+import Text.Printf (printf)
+
+import Distribution.Package (PackageName, mkPackageName, unPackageName)
+
+data Args = Args {
+    argCabal1                      :: FilePath
+  , argCabal2                      :: FilePath
+  , argCabal1Flags                 :: [String]
+  , argCabal2Flags                 :: [String]
+  , argPackages                    :: [PackageName]
+  , argMinRunTimeDifferenceToRerun :: Double
+  , argPValue                      :: PValue Double
+  , argTrials                      :: Int
+  , argPrintTrials                 :: Bool
+  , argPrintSkippedPackages        :: Bool
+  , argTimeoutSeconds              :: Int
+  }
+
+data CabalTrial = CabalTrial NominalDiffTime CabalResult
+
+data CabalResult
+  = Solution
+  | NoInstallPlan
+  | BackjumpLimit
+  | PkgNotFound
+  | Timeout
+  | Unknown
+  deriving (Eq, Show)
+
+hackageBenchmarkMain :: IO ()
+hackageBenchmarkMain = do
+  hSetBuffering stdout LineBuffering
+  args@Args {..} <- execParser parserInfo
+  checkArgs args
+  printConfig args
+  pkgs <- getPackages args
+  putStrLn ""
+
+  let -- The maximum length of the heading and package names.
+      nameColumnWidth :: Int
+      nameColumnWidth =
+          maximum $ map length $ "package" : map unPackageName pkgs
+      runCabal1 = runCabal argTimeoutSeconds argCabal1 argCabal1Flags
+      runCabal2 = runCabal argTimeoutSeconds argCabal2 argCabal2Flags
+
+  -- When the output contains both trails and summaries, label each row as
+  -- "trial" or "summary".
+  when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
+  putStrLn $
+      printf "%-*s %-13s %-13s %11s %11s %11s %11s %11s"
+             nameColumnWidth "package" "result1" "result2"
+             "mean1" "mean2" "stddev1" "stddev2" "speedup"
+
+  forM_ pkgs $ \pkg -> do
+    let printTrial msgType result1 result2 time1 time2 =
+            putStrLn $
+            printf "%-16s %-*s %-13s %-13s %10.3fs %10.3fs"
+                   msgType nameColumnWidth (unPackageName pkg)
+                   (show result1) (show result2)
+                   (diffTimeToDouble time1) (diffTimeToDouble time2)
+
+    CabalTrial t1 r1 <- runCabal1 pkg
+    CabalTrial t2 r2 <- runCabal2 pkg
+    if not $
+       shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
+    then when argPrintSkippedPackages $
+         if argPrintTrials
+         then printTrial "trial (skipping)" r1 r2 t1 t2
+         else putStrLn $ printf "%-*s (first run times were too similar)"
+                                nameColumnWidth (unPackageName pkg)
+    else do
+      when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
+      (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
+                            . replicateM (argTrials - 1) $ do
+        CabalTrial t1' r1' <- runCabal1 pkg
+        CabalTrial t2' r2' <- runCabal2 pkg
+        when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
+        return (t1', t2', r1', r2')
+
+      let result1 = combineTrialResults rs1
+          result2 = combineTrialResults rs2
+          times1 = V.fromList (map diffTimeToDouble ts1)
+          times2 = V.fromList (map diffTimeToDouble ts2)
+          mean1 = mean times1
+          mean2 = mean times2
+          stddev1 = stdDev times1
+          stddev2 = stdDev times2
+          speedup = mean1 / mean2
+
+      when argPrintTrials $ putStr $ printf "%-16s " "summary"
+      if isSignificantResult result1 result2
+          || isSignificantTimeDifference argPValue ts1 ts2
+      then putStrLn $
+           printf "%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
+                  nameColumnWidth (unPackageName pkg)
+                  (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
+      else when (argPrintTrials || argPrintSkippedPackages) $
+           putStrLn $
+           printf "%-*s (not significant)" nameColumnWidth (unPackageName pkg)
+  where
+    checkArgs :: Args -> IO ()
+    checkArgs Args {..} = do
+      let die msg = hPutStrLn stderr msg >> exitFailure
+      unless (argTrials > 0) $ die "--trials must be greater than 0."
+      unless (argMinRunTimeDifferenceToRerun >= 0) $
+          die "--min-run-time-percentage-difference-to-rerun must be non-negative."
+      unless (isSampleLargeEnough argPValue argTrials) $
+          die "p-value is too small for the number of trials."
+
+    printConfig :: Args -> IO ()
+    printConfig Args {..} = do
+      putStrLn "Comparing:"
+      putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
+      callProcess argCabal1 ["--version"]
+      putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
+      callProcess argCabal2 ["--version"]
+      -- TODO: Print index state.
+      putStrLn "Base package database:"
+      callProcess "ghc-pkg" ["list"]
+
+    getPackages :: Args -> IO [PackageName]
+    getPackages Args {..} = do
+      pkgs <-
+          if null argPackages
+          then do
+            putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
+            list <- readProcess argCabal1 ["list", "--simple-output"] ""
+            return $ nub [mkPackageName $ head (words line) | line <- lines list]
+          else do
+            putStrLn "Using given package list ..."
+            return argPackages
+      putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
+      return pkgs
+
+runCabal :: Int -> FilePath -> [String] -> PackageName -> IO CabalTrial
+runCabal timeoutSeconds cabal flags pkg = do
+  ((exitCode, err), time) <- timeEvent $ do
+    let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
+        cabalCmd =
+            unwords $
+            [cabal, "install", unPackageName pkg, "--dry-run", "-v0"] ++ flags
+        cmd = (shell (timeout ++ " " ++ cabalCmd)) { std_err = CreatePipe }
+
+    -- TODO: Read stdout and compare the install plans.
+    (_, _, Just errh, ph) <- createProcess cmd
+    err <- B.hGetContents errh
+    (, err) <$> waitForProcess ph
+  let exhaustiveMsg =
+          "After searching the rest of the dependency tree exhaustively"
+      result
+        | exitCode == ExitSuccess                                  = Solution
+        | exitCode == ExitFailure 124                              = Timeout
+        | fromString exhaustiveMsg `B.isInfixOf` err               = NoInstallPlan
+        | fromString "Backjump limit reached" `B.isInfixOf` err    = BackjumpLimit
+        | fromString "There is no package named" `B.isInfixOf` err = PkgNotFound
+        | otherwise                                                = Unknown
+  return (CabalTrial time result)
+
+isSampleLargeEnough :: PValue Double -> Int -> Bool
+isSampleLargeEnough pvalue trials =
+    -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
+    -- used when both sample sizes are less than or equal to 20.
+    trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
+
+isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
+isSignificantTimeDifference pvalue xs ys =
+  let toVector = V.fromList . map diffTimeToDouble
+  in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
+       Nothing             -> error "not enough data for mannWhitneyUtest"
+       Just Significant    -> True
+       Just NotSignificant -> False
+
+-- Should we stop after the first trial of this package to save time? This
+-- function skips the package if the results are uninteresting and the times are
+-- within --min-run-time-percentage-difference-to-rerun.
+shouldContinueAfterFirstTrial :: Double
+                              -> NominalDiffTime
+                              -> NominalDiffTime
+                              -> CabalResult
+                              -> CabalResult
+                              -> Bool
+shouldContinueAfterFirstTrial 0                            _  _  _       _       = True
+shouldContinueAfterFirstTrial _                            _  _  Timeout Timeout = False
+shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1      r2      =
+    isSignificantResult r1 r2
+ || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
+
+isSignificantResult :: CabalResult -> CabalResult -> Bool
+isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
+
+-- Is this result expected in a benchmark run on all of Hackage?
+isExpectedResult :: CabalResult -> Bool
+isExpectedResult Solution      = True
+isExpectedResult NoInstallPlan = True
+isExpectedResult BackjumpLimit = True
+isExpectedResult Timeout       = True
+isExpectedResult PkgNotFound   = False
+isExpectedResult Unknown       = False
+
+-- Combine CabalResults from multiple trials. Ignoring timeouts, all results
+-- should be the same. If they aren't the same, we returns Unknown.
+combineTrialResults :: [CabalResult] -> CabalResult
+combineTrialResults rs
+  | allEqual rs                          = head rs
+  | allEqual [r | r <- rs, r /= Timeout] = Timeout
+  | otherwise                            = Unknown
+  where
+    allEqual :: Eq a => [a] -> Bool
+    allEqual xs = length (nub xs) == 1
+
+timeEvent :: IO a -> IO (a, NominalDiffTime)
+timeEvent task = do
+  start <- getCurrentTime
+  r <- task
+  end <- getCurrentTime
+  return (r, diffUTCTime end start)
+
+diffTimeToDouble :: NominalDiffTime -> Double
+diffTimeToDouble = fromRational . toRational
+
+parserInfo :: ParserInfo Args
+parserInfo = info (argParser <**> helper)
+     ( fullDesc
+    <> progDesc ("Find differences between two cabal commands when solving"
+                   ++ " for all packages on Hackage.")
+    <> header "hackage-benchmark" )
+
+argParser :: Parser Args
+argParser = Args
+    <$> strOption
+         ( long "cabal1"
+        <> metavar "PATH"
+        <> help "First cabal executable")
+    <*> strOption
+         ( long "cabal2"
+        <> metavar "PATH"
+        <> help "Second cabal executable")
+    <*> option (words <$> str)
+         ( long "cabal1-flags"
+        <> value []
+        <> metavar "FLAGS"
+        <> help "Extra flags for the first cabal executable")
+    <*> option (words <$> str)
+         ( long "cabal2-flags"
+        <> value []
+        <> metavar "FLAGS"
+        <> help "Extra flags for the second cabal executable")
+    <*> option (map mkPackageName . words <$> str)
+         ( long "packages"
+        <> value []
+        <> metavar "PACKAGES"
+        <> help ("Space separated list of packages to test, or all of Hackage"
+                   ++ " if unspecified"))
+    <*> option auto
+         ( long "min-run-time-percentage-difference-to-rerun"
+        <> showDefault
+        <> value 0.0
+        <> metavar "PERCENTAGE"
+        <> help ("Stop testing a package when the difference in run times in"
+                   ++ " the first trial are within this percentage, in order to"
+                   ++ " save time"))
+    <*> option (mkPValue <$> auto)
+         ( long "pvalue"
+        <> showDefault
+        <> value (mkPValue 0.05)
+        <> metavar "DOUBLE"
+        <> help ("p-value used to determine whether to print the results for"
+                   ++ " each package"))
+    <*> option auto
+         ( long "trials"
+        <> showDefault
+        <> value 10
+        <> metavar "N"
+        <> help "Number of trials for each package")
+    <*> switch
+         ( long "print-trials"
+        <> help "Whether to include the results from individual trials in the output")
+    <*> switch
+         ( long "print-skipped-packages"
+        <> help "Whether to include skipped packages in the output")
+    <*> option auto
+         ( long "timeout"
+        <> showDefault
+        <> value 90
+        <> metavar "SECONDS"
+        <> help "Maximum time to run a cabal command, in seconds")
@@ -0,0 +1,31 @@
+Copyright (c) 2003-2017, Cabal Development Team.
+See the AUTHORS file for the full list of copyright holders.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of Isaac Jones nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,17 @@
+Dependency Solver Benchmarks
+============================
+
+hackage-benchmark
+-----------------
+
+The goal of this benchmark is to find examples of packages that show a
+difference in behavior between two versions of cabal.  It doesn't try
+to determine which version of cabal performs better.
+
+`hackage-benchmark` compares two `cabal` commands by running each one
+on each package in a list.  The list is either the package index or a
+list of packages provided on the command line.  In order to save time,
+the benchmark initially only runs one trial for each package.  If the
+results (solution, no solution, timeout, etc.) are the same and the
+times are too similar, it skips the package.  Otherwise, it runs more
+trials and prints the results if they are significant.
@@ -0,0 +1,4 @@
+import HackageBenchmark
+
+main :: IO ()
+main = hackageBenchmarkMain
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-packages: Cabal/ cabal-testsuite/ cabal-install/`
	`1`	`+packages: Cabal/ cabal-testsuite/ cabal-install/ solver-benchmarks/`
`2`	`2`	`constraints: unix >= 2.7.1.0,`
`3`	`3`	`cabal-install +lib +monolithic`
`4`	`4`