Skip to content

Commit 971edb9

Browse files
committed
use environments to lazily initialize benchmarks
if we only need to run a few benchmarks, e.g. `text-benchmarks Pure/drop/Text+tiny` it's wasteful to initialize all of them (and read all the test data in memory - all the test data is a few hundreds of MB, but we read it in several tests, as different types, and most seriously, as String - which turns this into GBs of memory)
1 parent 44ec2ce commit 971edb9

21 files changed

+208
-122
lines changed

benchmarks/haskell/Benchmarks.hs

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ module Main
55
( main
66
) where
77

8-
import Criterion.Main (Benchmark, defaultMain, bgroup)
8+
import Criterion.Main (defaultMain, bgroup, env)
99
import System.FilePath ((</>))
1010
import System.IO (IOMode (WriteMode), openFile, hSetEncoding, utf8)
1111

@@ -32,50 +32,42 @@ import qualified Benchmarks.Programs.StripTags as Programs.StripTags
3232
import qualified Benchmarks.Programs.Throughput as Programs.Throughput
3333

3434
main :: IO ()
35-
main = benchmarks >>= defaultMain
36-
37-
benchmarks :: IO [Benchmark]
38-
benchmarks = do
35+
main = do
3936
sink <- openFile "/dev/null" WriteMode
4037
hSetEncoding sink utf8
41-
42-
-- Traditional benchmarks
43-
bs <- sequence
38+
defaultMain
4439
[ Builder.benchmark
4540
, Concat.benchmark
46-
, DecodeUtf8.benchmark "html" (tf "libya-chinese.html")
47-
, DecodeUtf8.benchmark "xml" (tf "yiwiki.xml")
48-
, DecodeUtf8.benchmark "ascii" (tf "ascii.txt")
49-
, DecodeUtf8.benchmark "russian" (tf "russian.txt")
50-
, DecodeUtf8.benchmark "japanese" (tf "japanese.txt")
41+
, env (DecodeUtf8.initEnv (tf "libya-chinese.html")) (DecodeUtf8.benchmark "html")
42+
, env (DecodeUtf8.initEnv (tf "yiwiki.xml")) (DecodeUtf8.benchmark "xml")
43+
, env (DecodeUtf8.initEnv (tf "ascii.txt")) (DecodeUtf8.benchmark "ascii")
44+
, env (DecodeUtf8.initEnv (tf "russian.txt")) (DecodeUtf8.benchmark "russian")
45+
, env (DecodeUtf8.initEnv (tf "japanese.txt")) (DecodeUtf8.benchmark "japanese")
5146
, EncodeUtf8.benchmark "επανάληψη 竺法蘭共譯"
52-
, Equality.benchmark (tf "japanese.txt")
47+
, env (Equality.initEnv (tf "japanese.txt")) Equality.benchmark
5348
, FileRead.benchmark (tf "russian.txt")
5449
, FoldLines.benchmark (tf "russian.txt")
55-
, Mul.benchmark
56-
, Pure.benchmark "tiny" (tf "tiny.txt")
57-
, Pure.benchmark "ascii" (tf "ascii-small.txt")
58-
-- , Pure.benchmark "france" (tf "france.html")
59-
, Pure.benchmark "russian" (tf "russian-small.txt")
60-
, Pure.benchmark "japanese" (tf "japanese.txt")
61-
, ReadNumbers.benchmark (tf "numbers.txt")
62-
, Replace.benchmark (tf "russian.txt") "принимая" "своем"
63-
, Search.benchmark (tf "russian.txt") "принимая"
64-
, Stream.benchmark (tf "russian.txt")
65-
, WordFrequencies.benchmark (tf "russian.txt")
50+
, env Mul.initEnv Mul.benchmark
51+
, env (Pure.initEnv (tf "tiny.txt")) (Pure.benchmark "tiny")
52+
, env (Pure.initEnv (tf "ascii-small.txt")) (Pure.benchmark "ascii-small")
53+
, env (Pure.initEnv (tf "ascii.txt")) (Pure.benchmark "ascii")
54+
, env (Pure.initEnv (tf "english.txt")) (Pure.benchmark "english")
55+
, env (Pure.initEnv (tf "russian-small.txt")) (Pure.benchmark "russian")
56+
, env (Pure.initEnv (tf "japanese.txt")) (Pure.benchmark "japanese")
57+
, env (ReadNumbers.initEnv (tf "numbers.txt")) ReadNumbers.benchmark
58+
, env (Replace.initEnv (tf "russian.txt")) (Replace.benchmark "принимая" "своем")
59+
, env (Search.initEnv (tf "russian.txt")) (Search.benchmark "принимая")
60+
, env (Stream.initEnv (tf "russian.txt")) Stream.benchmark
61+
, env (WordFrequencies.initEnv (tf "russian.txt")) WordFrequencies.benchmark
62+
, bgroup "Programs"
63+
[ Programs.BigTable.benchmark sink
64+
, Programs.Cut.benchmark (tf "russian.txt") sink 20 40
65+
, Programs.Fold.benchmark (tf "russian.txt") sink
66+
, Programs.Sort.benchmark (tf "russian.txt") sink
67+
, Programs.StripTags.benchmark (tf "yiwiki.xml") sink
68+
, Programs.Throughput.benchmark (tf "russian.txt") sink
69+
]
6670
]
67-
68-
-- Program-like benchmarks
69-
ps <- bgroup "Programs" `fmap` sequence
70-
[ Programs.BigTable.benchmark sink
71-
, Programs.Cut.benchmark (tf "russian.txt") sink 20 40
72-
, Programs.Fold.benchmark (tf "russian.txt") sink
73-
, Programs.Sort.benchmark (tf "russian.txt") sink
74-
, Programs.StripTags.benchmark (tf "yiwiki.xml") sink
75-
, Programs.Throughput.benchmark (tf "russian.txt") sink
76-
]
77-
78-
return $ bs ++ [ps]
79-
where
71+
where
8072
-- Location of a test file
8173
tf = ("../tests/text-test-data" </>)

benchmarks/haskell/Benchmarks/Builder.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ import qualified Data.Text.Lazy.Builder as LTB
2323
import qualified Data.Text.Lazy.Builder.Int as Int
2424
import Data.Int (Int64)
2525

26-
benchmark :: IO Benchmark
27-
benchmark = return $ bgroup "Builder"
26+
benchmark :: Benchmark
27+
benchmark = bgroup "Builder"
2828
[ bgroup "Comparison"
2929
[ bench "LazyText" $ nf
3030
(LT.length . LTB.toLazyText . mconcat . map LTB.fromText) texts

benchmarks/haskell/Benchmarks/Concat.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ import Control.Monad.Trans.Writer
66
import Criterion (Benchmark, bgroup, bench, whnf)
77
import Data.Text as T
88

9-
benchmark :: IO Benchmark
10-
benchmark = return $ bgroup "Concat"
9+
benchmark :: Benchmark
10+
benchmark = bgroup "Concat"
1111
[ bench "append" $ whnf (append4 "Text 1" "Text 2" "Text 3") "Text 4"
1212
, bench "concat" $ whnf (concat4 "Text 1" "Text 2" "Text 3") "Text 4"
1313
, bench "write" $ whnf (write4 "Text 1" "Text 2" "Text 3") "Text 4"

benchmarks/haskell/Benchmarks/DecodeUtf8.hs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
-- The latter are used for testing stream fusion.
1616
--
1717
module Benchmarks.DecodeUtf8
18-
( benchmark
18+
( initEnv
19+
, benchmark
1920
) where
2021

2122
import Foreign.C.Types
@@ -34,18 +35,24 @@ import qualified Data.Text.Encoding as T
3435
import qualified Data.Text.Lazy as TL
3536
import qualified Data.Text.Lazy.Encoding as TL
3637

37-
benchmark :: String -> FilePath -> IO Benchmark
38-
benchmark kind fp = do
38+
type Env = (B.ByteString, BL.ByteString)
39+
40+
initEnv :: FilePath -> IO Env
41+
initEnv fp = do
3942
bs <- B.readFile fp
4043
lbs <- BL.readFile fp
44+
return (bs, lbs)
45+
46+
benchmark :: String -> Env -> Benchmark
47+
benchmark kind ~(bs, lbs) =
4148
let bench name = C.bench (name ++ "+" ++ kind)
4249
decodeStream (Chunk b0 bs0) = case T.streamDecodeUtf8 b0 of
4350
T.Some t0 _ f0 -> t0 : go f0 bs0
4451
where go f (Chunk b bs1) = case f b of
4552
T.Some t1 _ f1 -> t1 : go f1 bs1
4653
go _ _ = []
4754
decodeStream _ = []
48-
return $ bgroup "DecodeUtf8"
55+
in bgroup "DecodeUtf8"
4956
[ bench "Strict" $ nf T.decodeUtf8 bs
5057
, bench "Stream" $ nf decodeStream lbs
5158
, bench "IConv" $ whnfIO $ iconv bs

benchmarks/haskell/Benchmarks/EncodeUtf8.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ import qualified Data.Text.Encoding as T
1818
import qualified Data.Text.Lazy as TL
1919
import qualified Data.Text.Lazy.Encoding as TL
2020

21-
benchmark :: String -> IO Benchmark
22-
benchmark string = do
23-
return $ bgroup "EncodeUtf8"
21+
benchmark :: String -> Benchmark
22+
benchmark string =
23+
bgroup "EncodeUtf8"
2424
[ bench "Text" $ whnf (B.length . T.encodeUtf8) text
2525
, bench "LazyText" $ whnf (BL.length . TL.encodeUtf8) lazyText
2626
]

benchmarks/haskell/Benchmarks/Equality.hs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
-- * Comparison of strings (Eq instance)
77
--
88
module Benchmarks.Equality
9-
( benchmark
9+
( initEnv
10+
, benchmark
1011
) where
1112

1213
import Criterion (Benchmark, bgroup, bench, whnf)
@@ -17,8 +18,10 @@ import qualified Data.Text.Encoding as T
1718
import qualified Data.Text.Lazy as TL
1819
import qualified Data.Text.Lazy.Encoding as TL
1920

20-
benchmark :: FilePath -> IO Benchmark
21-
benchmark fp = do
21+
type Env = (T.Text, TL.Text, B.ByteString, BL.ByteString, BL.ByteString, String)
22+
23+
initEnv :: FilePath -> IO Env
24+
initEnv fp = do
2225
b <- B.readFile fp
2326
bl1 <- BL.readFile fp
2427
-- A lazy bytestring is a list of chunks. When we do not explicitly create two
@@ -27,9 +30,11 @@ benchmark fp = do
2730
-- we read the lazy bytestring twice here.
2831
bl2 <- BL.readFile fp
2932
l <- readFile fp
30-
let t = T.decodeUtf8 b
31-
tl = TL.decodeUtf8 bl1
32-
return $ bgroup "Equality"
33+
return (T.decodeUtf8 b, TL.decodeUtf8 bl1, b, bl1, bl2, l)
34+
35+
benchmark :: Env -> Benchmark
36+
benchmark ~(t, tl, b, bl1, bl2, l) =
37+
bgroup "Equality"
3338
[ bench "Text" $ whnf (== T.init t `T.snoc` '\xfffd') t
3439
, bench "LazyText" $ whnf (== TL.init tl `TL.snoc` '\xfffd') tl
3540
, bench "ByteString" $ whnf (== B.init b `B.snoc` '\xfffd') b

benchmarks/haskell/Benchmarks/FileRead.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ import qualified Data.Text.Lazy as LT
1919
import qualified Data.Text.Lazy.Encoding as LT
2020
import qualified Data.Text.Lazy.IO as LT
2121

22-
benchmark :: FilePath -> IO Benchmark
23-
benchmark p = return $ bgroup "FileRead"
22+
benchmark :: FilePath -> Benchmark
23+
benchmark p = bgroup "FileRead"
2424
[ bench "String" $ whnfIO $ length <$> readFile p
2525
, bench "ByteString" $ whnfIO $ SB.length <$> SB.readFile p
2626
, bench "LazyByteString" $ whnfIO $ LB.length <$> LB.readFile p

benchmarks/haskell/Benchmarks/FoldLines.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ import qualified Data.ByteString as B
1616
import qualified Data.Text as T
1717
import qualified Data.Text.IO as T
1818

19-
benchmark :: FilePath -> IO Benchmark
20-
benchmark fp = return $ bgroup "ReadLines"
19+
benchmark :: FilePath -> Benchmark
20+
benchmark fp = bgroup "ReadLines"
2121
[ bench "Text" $ withHandle $ foldLinesT (\n _ -> n + 1) (0 :: Int)
2222
, bench "ByteString" $ withHandle $ foldLinesB (\n _ -> n + 1) (0 :: Int)
2323
]

benchmarks/haskell/Benchmarks/Mul.hs

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
module Benchmarks.Mul (benchmark) where
1+
module Benchmarks.Mul
2+
( initEnv
3+
, benchmark
4+
) where
25

36
import Control.Exception (evaluate)
47
import Criterion.Main
@@ -12,16 +15,21 @@ oldMul m n
1215
| m <= maxBound `quot` n = m * n
1316
| otherwise = error "overflow"
1417

15-
benchmark :: IO Benchmark
16-
benchmark = do
17-
_ <- evaluate testVector32
18-
_ <- evaluate testVector64
19-
return $ bgroup "Mul" [
20-
bench "oldMul" $ whnf (U.map (uncurry oldMul)) testVector64
21-
, bench "mul64" $ whnf (U.map (uncurry mul64)) testVector64
22-
, bench "*64" $ whnf (U.map (uncurry (*))) testVector64
23-
, bench "mul32" $ whnf (U.map (uncurry mul32)) testVector32
24-
, bench "*32" $ whnf (U.map (uncurry (*))) testVector32
18+
type Env = (U.Vector (Int32,Int32), U.Vector (Int64,Int64))
19+
20+
initEnv :: IO Env
21+
initEnv = do
22+
x <- evaluate testVector32
23+
y <- evaluate testVector64
24+
return (x, y)
25+
26+
benchmark :: Env -> Benchmark
27+
benchmark ~(tv32, tv64) = bgroup "Mul"
28+
[ bench "oldMul" $ whnf (U.map (uncurry oldMul)) tv64
29+
, bench "mul64" $ whnf (U.map (uncurry mul64)) tv64
30+
, bench "*64" $ whnf (U.map (uncurry (*))) tv64
31+
, bench "mul32" $ whnf (U.map (uncurry mul32)) tv32
32+
, bench "*32" $ whnf (U.map (uncurry (*))) tv32
2533
]
2634

2735
testVector64 :: U.Vector (Int64,Int64)

benchmarks/haskell/Benchmarks/Programs/BigTable.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ import Data.Text.Lazy.IO (hPutStr)
1818
import System.IO (Handle)
1919
import qualified Data.Text as T
2020

21-
benchmark :: Handle -> IO Benchmark
22-
benchmark sink = return $ bench "BigTable" $ whnfIO $ do
21+
benchmark :: Handle -> Benchmark
22+
benchmark sink = bench "BigTable" $ whnfIO $ do
2323
hPutStr sink "Content-Type: text/html\n\n<table>"
2424
hPutStr sink . toLazyText . makeTable =<< rows
2525
hPutStr sink "</table>"

benchmarks/haskell/Benchmarks/Programs/Cut.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ import qualified Data.Text.Lazy as TL
2929
import qualified Data.Text.Lazy.Encoding as TL
3030
import qualified Data.Text.Lazy.IO as TL
3131

32-
benchmark :: FilePath -> Handle -> Int -> Int -> IO Benchmark
33-
benchmark p sink from to = return $ bgroup "Cut"
32+
benchmark :: FilePath -> Handle -> Int -> Int -> Benchmark
33+
benchmark p sink from to = bgroup "Cut"
3434
[ bench' "String" string
3535
, bench' "ByteString" byteString
3636
, bench' "LazyByteString" lazyByteString

benchmarks/haskell/Benchmarks/Programs/Fold.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ import qualified Data.Text.Lazy.Builder as TLB
2828
import qualified Data.Text.Lazy as TL
2929
import qualified Data.Text.Lazy.IO as TL
3030

31-
benchmark :: FilePath -> Handle -> IO Benchmark
32-
benchmark i o = return $
31+
benchmark :: FilePath -> Handle -> Benchmark
32+
benchmark i o =
3333
bench "Fold" $ whnfIO $ T.readFile i >>= TL.hPutStr o . fold 80
3434

3535
-- | We represent a paragraph by a word list

benchmarks/haskell/Benchmarks/Programs/Sort.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ import qualified Data.Text.Lazy.Builder as TLB
3333
import qualified Data.Text.Lazy.Encoding as TL
3434
import qualified Data.Text.Lazy.IO as TL
3535

36-
benchmark :: FilePath -> Handle -> IO Benchmark
37-
benchmark i o = return $ bgroup "Sort"
36+
benchmark :: FilePath -> Handle -> Benchmark
37+
benchmark i o = bgroup "Sort"
3838
[ bench "String" $ whnfIO $ readFile i >>= hPutStr o . string
3939
, bench "ByteString" $ whnfIO $ B.readFile i >>= B.hPutStr o . byteString
4040
, bench "LazyByteString" $ whnfIO $

benchmarks/haskell/Benchmarks/Programs/StripTags.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import qualified Data.Text as T
2424
import qualified Data.Text.Encoding as T
2525
import qualified Data.Text.IO as T
2626

27-
benchmark :: FilePath -> Handle -> IO Benchmark
28-
benchmark i o = return $ bgroup "StripTags"
27+
benchmark :: FilePath -> Handle -> Benchmark
28+
benchmark i o = bgroup "StripTags"
2929
[ bench "String" $ whnfIO $ readFile i >>= hPutStr o . string
3030
, bench "ByteString" $ whnfIO $ B.readFile i >>= B.hPutStr o . byteString
3131
, bench "Text" $ whnfIO $ T.readFile i >>= T.hPutStr o . text

benchmarks/haskell/Benchmarks/Programs/Throughput.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ import qualified Data.Text.IO as T
2727
import qualified Data.Text.Lazy.Encoding as TL
2828
import qualified Data.Text.Lazy.IO as TL
2929

30-
benchmark :: FilePath -> Handle -> IO Benchmark
31-
benchmark fp sink = return $ bgroup "Throughput"
30+
benchmark :: FilePath -> Handle -> Benchmark
31+
benchmark fp sink = bgroup "Throughput"
3232
[ bench "String" $ whnfIO $ readFile fp >>= hPutStr sink
3333
, bench "ByteString" $ whnfIO $ B.readFile fp >>= B.hPutStr sink
3434
, bench "LazyByteString" $ whnfIO $ BL.readFile fp >>= BL.hPutStr sink

benchmarks/haskell/Benchmarks/Pure.hs

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
-- * Most pure functions defined the string types
66
--
77
{-# LANGUAGE BangPatterns, CPP, GADTs, MagicHash #-}
8+
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, RecordWildCards #-}
89
{-# OPTIONS_GHC -fno-warn-orphans #-}
910
module Benchmarks.Pure
10-
( benchmark
11+
( initEnv
12+
, benchmark
1113
) where
1214

1315
import Control.DeepSeq (NFData (..))
@@ -16,6 +18,8 @@ import Criterion (Benchmark, bgroup, bench, nf)
1618
import Data.Char (toLower, toUpper)
1719
import Data.Monoid (mappend, mempty)
1820
import GHC.Base (Char (..), Int (..), chr#, ord#, (+#))
21+
import GHC.Generics (Generic)
22+
import GHC.Int (Int64)
1923
import qualified Data.ByteString.Char8 as BS
2024
import qualified Data.ByteString.Lazy.Char8 as BL
2125
import qualified Data.ByteString.UTF8 as UTF8
@@ -26,8 +30,32 @@ import qualified Data.Text.Lazy as TL
2630
import qualified Data.Text.Lazy.Builder as TB
2731
import qualified Data.Text.Lazy.Encoding as TL
2832

29-
benchmark :: String -> FilePath -> IO Benchmark
30-
benchmark kind fp = do
33+
data Env = Env
34+
{ bsa :: !BS.ByteString
35+
, ta :: !T.Text
36+
, tb :: !T.Text
37+
, tla :: !TL.Text
38+
, tlb :: !TL.Text
39+
, bsb :: !BS.ByteString
40+
, bla :: !BL.ByteString
41+
, blb :: !BL.ByteString
42+
, sa :: !String
43+
, sb :: !String
44+
, bsa_len :: !Int
45+
, ta_len :: !Int
46+
, bla_len :: !Int64
47+
, tla_len :: !Int64
48+
, sa_len :: !Int
49+
, bsl :: [BS.ByteString]
50+
, bll :: [BL.ByteString]
51+
, tl :: [T.Text]
52+
, tll :: [TL.Text]
53+
, sl :: [String]
54+
} deriving (Generic, NFData)
55+
56+
57+
initEnv :: FilePath -> IO Env
58+
initEnv fp = do
3159
-- Evaluate stuff before actually running the benchmark, we don't want to
3260
-- count it here.
3361

@@ -63,7 +91,11 @@ benchmark kind fp = do
6391
tll <- evaluate $ TL.lines tla
6492
sl <- evaluate $ L.lines sa
6593

66-
return $ bgroup "Pure"
94+
return Env{..}
95+
96+
benchmark :: String -> Env -> Benchmark
97+
benchmark kind ~Env{..} =
98+
bgroup "Pure"
6799
[ bgroup "append"
68100
[ benchT $ nf (T.append tb) ta
69101
, benchTL $ nf (TL.append tlb) tla

0 commit comments

Comments
 (0)