Skip to content

Commit 4101260

Browse files
Updating expanding union benchmark's measurement scope
1 parent 2ea2351 commit 4101260

File tree

1 file changed

+129
-84
lines changed

1 file changed

+129
-84
lines changed

bench/macro/lsm-tree-bench-unions.hs

Lines changed: 129 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,6 @@ import qualified Text.Read.Lex as Lex
8181
import Database.LSMTree.Extras (groupsOfN)
8282
import Database.LSMTree.Internal.ByteString (byteArrayToSBS)
8383

84-
import System.Environment
85-
8684
-- We should be able to write this benchmark
8785
-- using only use public lsm-tree interface
8886
import qualified Database.LSMTree.Simple as LSM
@@ -144,14 +142,14 @@ data GlobalOpts = GlobalOpts
144142
{ rootDir :: !FilePath -- ^ session directory.
145143
, tableCount :: !Int -- ^ Number of tables in the benchmark
146144
, initialSize :: !Int
145+
, seed :: !Word64
147146
}
148147
deriving stock Show
149148

150149
data RunOpts = RunOpts
151150
{ batchCount :: !Int
152151
, batchSize :: !Int
153152
, check :: !Bool
154-
, seed :: !Word64
155153
, pipelined :: !Bool
156154
, payRate :: !PaymentRate
157155
}
@@ -196,6 +194,7 @@ globalOptsP = pure GlobalOpts
196194
<*> O.option O.str (O.long "bench-dir" <> O.value (Fold.fold ["_", benchPerformanceOf, "_", benchWorkProductNo]) <> O.showDefault <> O.help "Benchmark directory to put files in")
197195
<*> O.option O.auto (O.long "table-count" <> O.value 10 <> O.showDefault <> O.help "Number of tables to benchmark")
198196
<*> O.option O.auto (O.long "initial-size" <> O.value 1_000_000 <> O.showDefault <> O.help "Initial LSM tree size")
197+
<*> O.option O.auto (O.long "seed" <> O.value 1337 <> O.showDefault <> O.help "Random seed")
199198

200199
cmdP :: O.Parser Cmd
201200
cmdP = O.subparser $ mconcat
@@ -216,7 +215,6 @@ runOptsP = pure RunOpts
216215
<*> O.option O.auto (O.long "batch-count" <> O.value 200 <> O.showDefault <> O.help "Batch count")
217216
<*> O.option O.auto (O.long "batch-size" <> O.value 256 <> O.showDefault <> O.help "Batch size")
218217
<*> O.switch (O.long "check" <> O.help "Check generated key distribution")
219-
<*> O.option O.auto (O.long "seed" <> O.value 1337 <> O.showDefault <> O.help "Random seed")
220218
<*> O.switch (O.long "pipelined" <> O.help "Use pipelined mode")
221219
<*> O.option O.auto (O.long "payment-rate" <> O.value 1 <> O.showDefault <> O.help "Debt repayment rate")
222220

@@ -385,17 +383,31 @@ doSetup' gopts = do
385383
-- Ensure that our mount point exists on the real file system
386384
createDirectoryIfMissing True rooting
387385

386+
-- Define some constants
387+
let populationBatchSize = 256
388+
keyMax = 2 * initialSize gopts
389+
keyMin = 1
390+
391+
-- Create an RNG for randomized deletions
392+
refRNG <- newIORef $ MCG.make
393+
(toEnum populationBatchSize)
394+
(seed gopts)
395+
396+
-- Populate the specified number of tables
388397
forM_ (tableRange gopts) $ \tID -> do
389398
let name = makeTableName tID
390399
LSM.withSession (rootDir gopts) $ \session -> do
400+
-- Create a new table
391401
tbl <- LSM.newTable @K @V session
392-
393-
forM_ (groupsOfN 256 [ 1 .. 2 * initialSize gopts ]) $ \batch -> do
394-
let (valuesDeletes, _valuesUpserts) = NE.splitAt 128 batch
402+
-- Populate the tablke in batches
403+
forM_ (groupsOfN populationBatchSize [ keyMin .. keyMax ]) $ \batch -> do
404+
-- Insert all values in the batch
395405
LSM.inserts tbl $ V.fromList [
396406
(makeKey (fromIntegral k), theValue)
397407
| k <- NE.toList batch
398408
]
409+
-- Randomly delete half the values of the batch
410+
let (valuesDeletes,_) = NE.splitAt 128 batch
399411
LSM.deletes tbl $ V.fromList [
400412
makeKey (fromIntegral k)
401413
| k <- valuesDeletes
@@ -422,7 +434,7 @@ doDryRun' gopts opts = do
422434
-- calculated some expected statistics for generated batches
423435
-- using nested do block to limit scope of intermediate bindings n, d, p, and q
424436
do
425-
let d = toInteger (maxBound :: Word64)
437+
let d = toInteger $ 2 * initialSize gopts
426438
-- we generate n random numbers in range of [ 1 .. d ]
427439
-- what is the chance they are all distinct
428440
-- In this case each key in a table is could possibly share a key in another table.
@@ -480,7 +492,7 @@ renderRational len rat = sign <> shows prefix ("." ++ suffix)
480492
suffix = case next of
481493
0 -> "0"
482494
n -> take len $ go n
483-
495+
484496
num = numerator rat
485497
den = denominator rat
486498
go 0 = ""
@@ -553,21 +565,28 @@ toOperations lookups = batch1
553565

554566
doRun :: GlobalOpts -> RunOpts -> IO ()
555567
doRun gopts opts = do
556-
-- 100 ticks for all tables
557-
let PaymentRate paymentRate = payRate opts
558-
steps = 100
568+
-- Perform 3 measurement phases
569+
-- * Phase 1: Measure performance before supplying any credits.
570+
-- * Phase 2: Measure performance as credits are incrementally supplied and debt is repaid.
571+
-- * Phase 3: Measure performance when debt is 0.
572+
let tickCountPrefix = 20
573+
tickCountMiddle = 100
574+
tickCountSuffix = 20
575+
tickCountEnding = maximum indicesPhase3
576+
indicesPhase1 = negate <$> reverse [ 0 .. tickCountPrefix ]
577+
indicesPhase2 = [ 1 .. tickCountMiddle ]
578+
indicesPhase3 = [ tickCountMiddle + 1 .. tickCountMiddle + tickCountSuffix ]
579+
PaymentRate paymentRate = payRate opts
559580
benchmarkIterations h
560581
| pipelined opts = pipelinedIterations h
561582
| otherwise = sequentialIterations h
562583

563-
print $ deriveFileNameForPlot gopts opts
564-
565-
refRNG <- newIORef $ initGen
584+
refRNG <- newIORef $ initGen
566585
(initialSize gopts)
567586
(batchSize opts)
568587
(batchCount opts)
569-
(seed opts)
570-
588+
(seed gopts)
589+
571590
putStrLn "Operations per second:"
572591
measurements <- LSM.withSession (rootDir gopts) $ \session ->
573592
withLatencyHandle $ \h -> do
@@ -579,78 +598,114 @@ doRun gopts opts = do
579598

580599
LSM.withIncrementalUnions tables $ \table -> do
581600
LSM.UnionDebt totalDebt <- LSM.remainingUnionDebt table
582-
-- Determine the number of credits to supply per tick
583-
-- in order to have all debt repaid when 90% complete.
601+
-- Determine the number of credits to supply per tick in order to
602+
-- all debt repaid at the time specified by the rpayment rate.
603+
-- Each tick should supply credits equal to:
604+
-- paymentRate * totalDebt / tickCountMiddle
584605
let paymentPerTick = ceiling $ product
585-
[ paymentRate, toInteger totalDebt % 1, 1 % steps ]
586-
let tickCredit = LSM.UnionCredits paymentPerTick
587-
forM [1 .. steps] $ \step -> do
606+
[ paymentRate, toInteger totalDebt % 1, 1 % tickCountMiddle ]
607+
608+
let measurePerformance :: Integer -> IO (Int, Int, Double)
609+
measurePerformance tickIndex = do
610+
-- Note this tick's debt for subsequent measurement purposes.
611+
LSM.UnionDebt debtCurr <- LSM.remainingUnionDebt table
612+
-- Note the cumulative credits supplied through this tick.
613+
let paidCurr = max 0 $ totalDebt - fromInteger (max 0 tickIndex) * paymentPerTick
614+
currRNG <- readIORef refRNG
615+
(nextRNG,time, _, _) <- benchmarkIterations
616+
h
617+
(\_ _ -> pure ())
618+
(initialSize gopts)
619+
(batchSize opts)
620+
(batchCount opts)
621+
currRNG
622+
table
623+
-- Update the RNG state
624+
writeIORef refRNG nextRNG
625+
-- Perform measurement of batched lookups
626+
-- Save the result for later to be included in the performance plot
627+
let ops = batchCount opts * batchSize opts
628+
rate = fromIntegral ops / time
629+
-- Print a status report while running the benchmark
630+
printf
631+
(Fold.fold [
632+
" [%",
633+
show . length $ show tickCountEnding,
634+
"d/",
635+
show tickCountEnding,
636+
"]: %7.01f ops/sec",
637+
" with debt = %8d\n"
638+
])
639+
tickIndex
640+
rate
641+
debtCurr
642+
pure (debtCurr, paidCurr, rate)
643+
644+
-- Phase 1 measurements: Debt = 100%
645+
resultsPhase1 <- forM indicesPhase1 $ \step -> do
646+
measurePerformance step
647+
648+
-- Phase 2 measurements: Debt ∈ [0%, 99%]
649+
resultsPhase2 <- forM indicesPhase2 $ \step -> do
588650
LSM.UnionDebt debtPrev <- LSM.remainingUnionDebt table
589-
when (debtPrev > 0) . void $
590-
LSM.supplyUnionCredits table tickCredit
591-
LSM.UnionDebt debtCurr <- LSM.remainingUnionDebt table
592-
currRNG <- readIORef refRNG
593-
(nextRNG,time, _, _) <- benchmarkIterations
594-
h
595-
(\_ _ -> pure ())
596-
(initialSize gopts)
597-
(batchSize opts)
598-
(batchCount opts)
599-
currRNG
600-
table
601-
writeIORef refRNG nextRNG
602-
603-
-- Perform measurement of batched lookups
604-
-- Save the result for later to be included in the performance plot
605-
let ops = batchCount opts * batchSize opts
606-
rate = fromIntegral ops / time
607-
-- Print a status report while running the benchmark
608-
printf
609-
(Fold.fold [
610-
" [%",
611-
show . length $ show steps,
612-
"d/",
613-
show steps,
614-
"]: %7.01f ops/sec",
615-
" with debt = %8d\n"
616-
])
617-
step
618-
rate
619-
debtCurr
620-
pure (debtCurr, rate)
621-
622-
let (balances', operations) = unzip measurements
623-
maxDebit = toInteger $ head balances'
651+
-- When there is debt remaining, supply the fixed credits-per-tick.
652+
when (debtPrev > 0) . void $
653+
LSM.supplyUnionCredits table (LSM.UnionCredits paymentPerTick)
654+
measurePerformance step
655+
656+
-- Phase 3 measurements: Debt = 0%
657+
resultsPhase3 <- forM indicesPhase3 $ \step -> do
658+
measurePerformance step
659+
660+
pure $ mconcat [ resultsPhase1, resultsPhase2, resultsPhase3 ]
661+
662+
let (balances', payments', operations) = unzip3 measurements
624663
maxValue = ceiling $ maximum operations
625-
balances = (\b -> fromRational $ (fromIntegral b * maxValue) % maxDebit) <$> balances'
664+
standardize xs =
665+
let maxInput = toInteger $ maximum xs
666+
scale x = fromRational $ (fromIntegral x * maxValue) % maxInput
667+
in scale <$> xs
668+
balances = standardize balances'
669+
payments = standardize payments'
626670

627671
-- Generate a performance plot based on the benchmark results.
628-
Plot.toFile Plot.def (rootDir gopts <> "/benchmark.png") $ do
672+
Plot.toFile Plot.def (rootDir gopts <> "/" <> deriveFileNameForPlot gopts opts) $ do
629673
Plot.layout_title .= "Incremental Unions Performance"
630674
Plot.layout_x_axis . Plot.laxis_override .= Plot.axisGridHide
631675
Plot.layout_x_axis . Plot.laxis_title .= "Credits supplied over time"
632676
Plot.layout_y_axis . Plot.laxis_title .= "Lookup access time"
633-
Plot.plot $ fillBetween "Debt balance" [ (d,(0,v)) | (d, v) <- zip [1 :: Word .. ] balances ]
677+
let colorD = Color.sRGB 0.875 1.0 0.125 `Plot.withOpacity` 0.5
678+
let colorE = Color.sRGB 0.625 1.0 0.875 `Plot.withOpacity` 0.5
679+
Plot.plot $ fillBetween colorD "Debt balance"
680+
[ (d,(0,v)) | (d, v) <- zip [1 :: Word .. ] balances ]
681+
Plot.plot $ fillBetween colorE "Extra credits"
682+
[ (d,(v,w)) | (d, v, w) <- zip3 [1 :: Word .. ] balances payments ]
634683
Plot.plot $ Plot.line "operations per second" [ zip [1 :: Word .. ] operations ]
635684

636-
fillBetween :: String -> [(x1, (y1, y1))] -> Plot.EC l20 (Plot.PlotFillBetween x1 y1)
637-
fillBetween title vs = Plot.liftEC $ do
685+
fillBetween :: Plot.AlphaColour Double -> String -> [(x, (y, y))] -> Plot.EC l20 (Plot.PlotFillBetween x y)
686+
fillBetween color title vs = Plot.liftEC $ do
638687
Plot.plot_fillbetween_title .= title
639-
let color = Color.sRGB 0.875 1.0 0.125 `Plot.withOpacity` 0.5
640688
Plot.plot_fillbetween_style .= Plot.solidFillStyle color
641689
Plot.plot_fillbetween_values .= vs
642690

643691
deriveFileNameForPlot :: GlobalOpts -> RunOpts -> FilePath
644692
deriveFileNameForPlot gOpts rOpts =
645-
let partTable = show $ tableCount gOpts
646-
partWidth = List.intercalate "_" . fmap Fold.toList . groupsOfN 3 . reverse . show $ initialSize gOpts
647-
in Fold.fold
693+
let sep1000th = reverse . List.intercalate "_" . fmap Fold.toList . groupsOfN 3 . reverse . show
694+
partTable = show $ tableCount gOpts
695+
partWidth = sep1000th $ initialSize gOpts
696+
partSeed0 = printf "SEED_%016x" (seed gOpts)
697+
partRatio =
698+
let PaymentRate r = payRate rOpts
699+
n = numerator r
700+
d = denominator r
701+
sep = "x"
702+
in show n <> sep <> show d
703+
in List.intercalate "-"
648704
[ "benchmark"
649-
, partTable
650-
, "×"
651-
, partWidth
652-
, ".png"
653-
]
705+
, partTable <> "x" <> partWidth
706+
, partSeed0
707+
, partRatio
708+
] <> ".png"
654709

655710
{-
656711
data GlobalOpts = GlobalOpts
@@ -703,15 +758,6 @@ sequentialIterations h output !initialSize !batchSize !batchCount !currRNG !tbl
703758
(x,y,z) <- timed_ $ forM_ (zip [0 ..] allBatches) $ sequentialIteration h output tbl
704759
pure (nextRNG,x,y,z)
705760

706-
{-
707-
generateBatch ::
708-
Int -- ^ initial size of the collection
709-
-> Int -- ^ batch size
710-
-> MCG.MCG -- ^ generator
711-
-> Int -- ^ batch number
712-
-> (MCG.MCG, V.Vector K)
713-
714-
-}
715761
-------------------------------------------------------------------------------
716762
-- pipelined
717763
-------------------------------------------------------------------------------
@@ -764,7 +810,7 @@ pipelinedIteration :: LatencyHandle
764810
-> MVar (LSM.Table K V)
765811
-> MVar (V.Vector K)
766812
-> MVar (V.Vector K)
767-
-> MVar [V.Vector K]
813+
-> MVar [V.Vector K]
768814
-> LSM.Table K V
769815
-> Int
770816
-> IO (LSM.Table K V)
@@ -788,7 +834,7 @@ pipelinedIteration h output
788834
-- using tbl_n. They used it to generate tbl_n+1 (which they gave us).
789835
LSM.closeTable tbl_n
790836
pure tbl_n1
791-
837+
792838
ls_next <- dequeue queue
793839
putMVar syncTblOut tbl_n1
794840
putMVar syncVecOut ls_next
@@ -853,7 +899,7 @@ pipelinedIterations h output !initialSize !batchSize !batchCount !currRNG tbl_0
853899

854900
dequeue :: Monoid a => MVar [a] -> IO a
855901
dequeue q = modifyMVar q $ pure . swap . fromMaybe (mempty, []) . List.uncons
856-
902+
857903
-------------------------------------------------------------------------------
858904
-- measure batch latency
859905
-------------------------------------------------------------------------------
@@ -965,7 +1011,6 @@ main = do
9651011
putStrLn " To benchmark in release mode, pass:"
9661012
putStrLn " --project-file=cabal.project.release"
9671013
#endif
968-
getArgs >>= Fold.traverse_ print
9691014
(gopts, cmd) <- O.customExecParser prefs cliP
9701015
print gopts
9711016
print cmd

0 commit comments

Comments
 (0)