@@ -81,8 +81,6 @@ import qualified Text.Read.Lex as Lex
81
81
import Database.LSMTree.Extras (groupsOfN )
82
82
import Database.LSMTree.Internal.ByteString (byteArrayToSBS )
83
83
84
- import System.Environment
85
-
86
84
-- We should be able to write this benchmark
87
85
-- using only use public lsm-tree interface
88
86
import qualified Database.LSMTree.Simple as LSM
@@ -144,14 +142,14 @@ data GlobalOpts = GlobalOpts
144
142
{ rootDir :: ! FilePath -- ^ session directory.
145
143
, tableCount :: ! Int -- ^ Number of tables in the benchmark
146
144
, initialSize :: ! Int
145
+ , seed :: ! Word64
147
146
}
148
147
deriving stock Show
149
148
150
149
data RunOpts = RunOpts
151
150
{ batchCount :: ! Int
152
151
, batchSize :: ! Int
153
152
, check :: ! Bool
154
- , seed :: ! Word64
155
153
, pipelined :: ! Bool
156
154
, payRate :: ! PaymentRate
157
155
}
@@ -196,6 +194,7 @@ globalOptsP = pure GlobalOpts
196
194
<*> O. option O. str (O. long " bench-dir" <> O. value (Fold. fold [" _" , benchPerformanceOf, " _" , benchWorkProductNo]) <> O. showDefault <> O. help " Benchmark directory to put files in" )
197
195
<*> O. option O. auto (O. long " table-count" <> O. value 10 <> O. showDefault <> O. help " Number of tables to benchmark" )
198
196
<*> O. option O. auto (O. long " initial-size" <> O. value 1_000_000 <> O. showDefault <> O. help " Initial LSM tree size" )
197
+ <*> O. option O. auto (O. long " seed" <> O. value 1337 <> O. showDefault <> O. help " Random seed" )
199
198
200
199
cmdP :: O. Parser Cmd
201
200
cmdP = O. subparser $ mconcat
@@ -216,7 +215,6 @@ runOptsP = pure RunOpts
216
215
<*> O. option O. auto (O. long " batch-count" <> O. value 200 <> O. showDefault <> O. help " Batch count" )
217
216
<*> O. option O. auto (O. long " batch-size" <> O. value 256 <> O. showDefault <> O. help " Batch size" )
218
217
<*> O. switch (O. long " check" <> O. help " Check generated key distribution" )
219
- <*> O. option O. auto (O. long " seed" <> O. value 1337 <> O. showDefault <> O. help " Random seed" )
220
218
<*> O. switch (O. long " pipelined" <> O. help " Use pipelined mode" )
221
219
<*> O. option O. auto (O. long " payment-rate" <> O. value 1 <> O. showDefault <> O. help " Debt repayment rate" )
222
220
@@ -385,17 +383,31 @@ doSetup' gopts = do
385
383
-- Ensure that our mount point exists on the real file system
386
384
createDirectoryIfMissing True rooting
387
385
386
+ -- Define some constants
387
+ let populationBatchSize = 256
388
+ keyMax = 2 * initialSize gopts
389
+ keyMin = 1
390
+
391
+ -- Create an RNG for randomized deletions
392
+ refRNG <- newIORef $ MCG. make
393
+ (toEnum populationBatchSize)
394
+ (seed gopts)
395
+
396
+ -- Populate the specified number of tables
388
397
forM_ (tableRange gopts) $ \ tID -> do
389
398
let name = makeTableName tID
390
399
LSM. withSession (rootDir gopts) $ \ session -> do
400
+ -- Create a new table
391
401
tbl <- LSM. newTable @ K @ V session
392
-
393
- forM_ (groupsOfN 256 [ 1 .. 2 * initialSize gopts ]) $ \ batch -> do
394
- let (valuesDeletes, _valuesUpserts) = NE. splitAt 128 batch
402
+ -- Populate the tablke in batches
403
+ forM_ (groupsOfN populationBatchSize [ keyMin .. keyMax ]) $ \ batch -> do
404
+ -- Insert all values in the batch
395
405
LSM. inserts tbl $ V. fromList [
396
406
(makeKey (fromIntegral k), theValue)
397
407
| k <- NE. toList batch
398
408
]
409
+ -- Randomly delete half the values of the batch
410
+ let (valuesDeletes,_) = NE. splitAt 128 batch
399
411
LSM. deletes tbl $ V. fromList [
400
412
makeKey (fromIntegral k)
401
413
| k <- valuesDeletes
@@ -422,7 +434,7 @@ doDryRun' gopts opts = do
422
434
-- calculated some expected statistics for generated batches
423
435
-- using nested do block to limit scope of intermediate bindings n, d, p, and q
424
436
do
425
- let d = toInteger ( maxBound :: Word64 )
437
+ let d = toInteger $ 2 * initialSize gopts
426
438
-- we generate n random numbers in range of [ 1 .. d ]
427
439
-- what is the chance they are all distinct
428
440
-- In this case each key in a table is could possibly share a key in another table.
@@ -480,7 +492,7 @@ renderRational len rat = sign <> shows prefix ("." ++ suffix)
480
492
suffix = case next of
481
493
0 -> " 0"
482
494
n -> take len $ go n
483
-
495
+
484
496
num = numerator rat
485
497
den = denominator rat
486
498
go 0 = " "
@@ -553,21 +565,28 @@ toOperations lookups = batch1
553
565
554
566
doRun :: GlobalOpts -> RunOpts -> IO ()
555
567
doRun gopts opts = do
556
- -- 100 ticks for all tables
557
- let PaymentRate paymentRate = payRate opts
558
- steps = 100
568
+ -- Perform 3 measurement phases
569
+ -- * Phase 1: Measure performance before supplying any credits.
570
+ -- * Phase 2: Measure performance as credits are incrementally supplied and debt is repaid.
571
+ -- * Phase 3: Measure performance when debt is 0.
572
+ let tickCountPrefix = 20
573
+ tickCountMiddle = 100
574
+ tickCountSuffix = 20
575
+ tickCountEnding = maximum indicesPhase3
576
+ indicesPhase1 = negate <$> reverse [ 0 .. tickCountPrefix ]
577
+ indicesPhase2 = [ 1 .. tickCountMiddle ]
578
+ indicesPhase3 = [ tickCountMiddle + 1 .. tickCountMiddle + tickCountSuffix ]
579
+ PaymentRate paymentRate = payRate opts
559
580
benchmarkIterations h
560
581
| pipelined opts = pipelinedIterations h
561
582
| otherwise = sequentialIterations h
562
583
563
- print $ deriveFileNameForPlot gopts opts
564
-
565
- refRNG <- newIORef $ initGen
584
+ refRNG <- newIORef $ initGen
566
585
(initialSize gopts)
567
586
(batchSize opts)
568
587
(batchCount opts)
569
- (seed opts )
570
-
588
+ (seed gopts )
589
+
571
590
putStrLn " Operations per second:"
572
591
measurements <- LSM. withSession (rootDir gopts) $ \ session ->
573
592
withLatencyHandle $ \ h -> do
@@ -579,78 +598,114 @@ doRun gopts opts = do
579
598
580
599
LSM. withIncrementalUnions tables $ \ table -> do
581
600
LSM. UnionDebt totalDebt <- LSM. remainingUnionDebt table
582
- -- Determine the number of credits to supply per tick
583
- -- in order to have all debt repaid when 90% complete.
601
+ -- Determine the number of credits to supply per tick in order to
602
+ -- all debt repaid at the time specified by the rpayment rate.
603
+ -- Each tick should supply credits equal to:
604
+ -- paymentRate * totalDebt / tickCountMiddle
584
605
let paymentPerTick = ceiling $ product
585
- [ paymentRate, toInteger totalDebt % 1 , 1 % steps ]
586
- let tickCredit = LSM. UnionCredits paymentPerTick
587
- forM [1 .. steps] $ \ step -> do
606
+ [ paymentRate, toInteger totalDebt % 1 , 1 % tickCountMiddle ]
607
+
608
+ let measurePerformance :: Integer -> IO (Int , Int , Double )
609
+ measurePerformance tickIndex = do
610
+ -- Note this tick's debt for subsequent measurement purposes.
611
+ LSM. UnionDebt debtCurr <- LSM. remainingUnionDebt table
612
+ -- Note the cumulative credits supplied through this tick.
613
+ let paidCurr = max 0 $ totalDebt - fromInteger (max 0 tickIndex) * paymentPerTick
614
+ currRNG <- readIORef refRNG
615
+ (nextRNG,time, _, _) <- benchmarkIterations
616
+ h
617
+ (\ _ _ -> pure () )
618
+ (initialSize gopts)
619
+ (batchSize opts)
620
+ (batchCount opts)
621
+ currRNG
622
+ table
623
+ -- Update the RNG state
624
+ writeIORef refRNG nextRNG
625
+ -- Perform measurement of batched lookups
626
+ -- Save the result for later to be included in the performance plot
627
+ let ops = batchCount opts * batchSize opts
628
+ rate = fromIntegral ops / time
629
+ -- Print a status report while running the benchmark
630
+ printf
631
+ (Fold. fold [
632
+ " [%" ,
633
+ show . length $ show tickCountEnding,
634
+ " d/" ,
635
+ show tickCountEnding,
636
+ " ]: %7.01f ops/sec" ,
637
+ " with debt = %8d\n "
638
+ ])
639
+ tickIndex
640
+ rate
641
+ debtCurr
642
+ pure (debtCurr, paidCurr, rate)
643
+
644
+ -- Phase 1 measurements: Debt = 100%
645
+ resultsPhase1 <- forM indicesPhase1 $ \ step -> do
646
+ measurePerformance step
647
+
648
+ -- Phase 2 measurements: Debt ∈ [0%, 99%]
649
+ resultsPhase2 <- forM indicesPhase2 $ \ step -> do
588
650
LSM. UnionDebt debtPrev <- LSM. remainingUnionDebt table
589
- when (debtPrev > 0 ) . void $
590
- LSM. supplyUnionCredits table tickCredit
591
- LSM. UnionDebt debtCurr <- LSM. remainingUnionDebt table
592
- currRNG <- readIORef refRNG
593
- (nextRNG,time, _, _) <- benchmarkIterations
594
- h
595
- (\ _ _ -> pure () )
596
- (initialSize gopts)
597
- (batchSize opts)
598
- (batchCount opts)
599
- currRNG
600
- table
601
- writeIORef refRNG nextRNG
602
-
603
- -- Perform measurement of batched lookups
604
- -- Save the result for later to be included in the performance plot
605
- let ops = batchCount opts * batchSize opts
606
- rate = fromIntegral ops / time
607
- -- Print a status report while running the benchmark
608
- printf
609
- (Fold. fold [
610
- " [%" ,
611
- show . length $ show steps,
612
- " d/" ,
613
- show steps,
614
- " ]: %7.01f ops/sec" ,
615
- " with debt = %8d\n "
616
- ])
617
- step
618
- rate
619
- debtCurr
620
- pure (debtCurr, rate)
621
-
622
- let (balances', operations) = unzip measurements
623
- maxDebit = toInteger $ head balances'
651
+ -- When there is debt remaining, supply the fixed credits-per-tick.
652
+ when (debtPrev > 0 ) . void $
653
+ LSM. supplyUnionCredits table (LSM. UnionCredits paymentPerTick)
654
+ measurePerformance step
655
+
656
+ -- Phase 3 measurements: Debt = 0%
657
+ resultsPhase3 <- forM indicesPhase3 $ \ step -> do
658
+ measurePerformance step
659
+
660
+ pure $ mconcat [ resultsPhase1, resultsPhase2, resultsPhase3 ]
661
+
662
+ let (balances', payments', operations) = unzip3 measurements
624
663
maxValue = ceiling $ maximum operations
625
- balances = (\ b -> fromRational $ (fromIntegral b * maxValue) % maxDebit) <$> balances'
664
+ standardize xs =
665
+ let maxInput = toInteger $ maximum xs
666
+ scale x = fromRational $ (fromIntegral x * maxValue) % maxInput
667
+ in scale <$> xs
668
+ balances = standardize balances'
669
+ payments = standardize payments'
626
670
627
671
-- Generate a performance plot based on the benchmark results.
628
- Plot. toFile Plot. def (rootDir gopts <> " /benchmark.png " ) $ do
672
+ Plot. toFile Plot. def (rootDir gopts <> " /" <> deriveFileNameForPlot gopts opts ) $ do
629
673
Plot. layout_title .= " Incremental Unions Performance"
630
674
Plot. layout_x_axis . Plot. laxis_override .= Plot. axisGridHide
631
675
Plot. layout_x_axis . Plot. laxis_title .= " Credits supplied over time"
632
676
Plot. layout_y_axis . Plot. laxis_title .= " Lookup access time"
633
- Plot. plot $ fillBetween " Debt balance" [ (d,(0 ,v)) | (d, v) <- zip [1 :: Word .. ] balances ]
677
+ let colorD = Color. sRGB 0.875 1.0 0.125 `Plot.withOpacity` 0.5
678
+ let colorE = Color. sRGB 0.625 1.0 0.875 `Plot.withOpacity` 0.5
679
+ Plot. plot $ fillBetween colorD " Debt balance"
680
+ [ (d,(0 ,v)) | (d, v) <- zip [1 :: Word .. ] balances ]
681
+ Plot. plot $ fillBetween colorE " Extra credits"
682
+ [ (d,(v,w)) | (d, v, w) <- zip3 [1 :: Word .. ] balances payments ]
634
683
Plot. plot $ Plot. line " operations per second" [ zip [1 :: Word .. ] operations ]
635
684
636
- fillBetween :: String -> [(x1 , (y1 , y1 ))] -> Plot. EC l20 (Plot. PlotFillBetween x1 y1 )
637
- fillBetween title vs = Plot. liftEC $ do
685
+ fillBetween :: Plot. AlphaColour Double -> String -> [(x , (y , y ))] -> Plot. EC l20 (Plot. PlotFillBetween x y )
686
+ fillBetween color title vs = Plot. liftEC $ do
638
687
Plot. plot_fillbetween_title .= title
639
- let color = Color. sRGB 0.875 1.0 0.125 `Plot.withOpacity` 0.5
640
688
Plot. plot_fillbetween_style .= Plot. solidFillStyle color
641
689
Plot. plot_fillbetween_values .= vs
642
690
643
691
deriveFileNameForPlot :: GlobalOpts -> RunOpts -> FilePath
644
692
deriveFileNameForPlot gOpts rOpts =
645
- let partTable = show $ tableCount gOpts
646
- partWidth = List. intercalate " _" . fmap Fold. toList . groupsOfN 3 . reverse . show $ initialSize gOpts
647
- in Fold. fold
693
+ let sep1000th = reverse . List. intercalate " _" . fmap Fold. toList . groupsOfN 3 . reverse . show
694
+ partTable = show $ tableCount gOpts
695
+ partWidth = sep1000th $ initialSize gOpts
696
+ partSeed0 = printf " SEED_%016x" (seed gOpts)
697
+ partRatio =
698
+ let PaymentRate r = payRate rOpts
699
+ n = numerator r
700
+ d = denominator r
701
+ sep = " x"
702
+ in show n <> sep <> show d
703
+ in List. intercalate " -"
648
704
[ " benchmark"
649
- , partTable
650
- , " ×"
651
- , partWidth
652
- , " .png"
653
- ]
705
+ , partTable <> " x" <> partWidth
706
+ , partSeed0
707
+ , partRatio
708
+ ] <> " .png"
654
709
655
710
{-
656
711
data GlobalOpts = GlobalOpts
@@ -703,15 +758,6 @@ sequentialIterations h output !initialSize !batchSize !batchCount !currRNG !tbl
703
758
(x,y,z) <- timed_ $ forM_ (zip [0 .. ] allBatches) $ sequentialIteration h output tbl
704
759
pure (nextRNG,x,y,z)
705
760
706
- {-
707
- generateBatch ::
708
- Int -- ^ initial size of the collection
709
- -> Int -- ^ batch size
710
- -> MCG.MCG -- ^ generator
711
- -> Int -- ^ batch number
712
- -> (MCG.MCG, V.Vector K)
713
-
714
- -}
715
761
-------------------------------------------------------------------------------
716
762
-- pipelined
717
763
-------------------------------------------------------------------------------
@@ -764,7 +810,7 @@ pipelinedIteration :: LatencyHandle
764
810
-> MVar (LSM. Table K V )
765
811
-> MVar (V. Vector K )
766
812
-> MVar (V. Vector K )
767
- -> MVar [V. Vector K ]
813
+ -> MVar [V. Vector K ]
768
814
-> LSM. Table K V
769
815
-> Int
770
816
-> IO (LSM. Table K V )
@@ -788,7 +834,7 @@ pipelinedIteration h output
788
834
-- using tbl_n. They used it to generate tbl_n+1 (which they gave us).
789
835
LSM. closeTable tbl_n
790
836
pure tbl_n1
791
-
837
+
792
838
ls_next <- dequeue queue
793
839
putMVar syncTblOut tbl_n1
794
840
putMVar syncVecOut ls_next
@@ -853,7 +899,7 @@ pipelinedIterations h output !initialSize !batchSize !batchCount !currRNG tbl_0
853
899
854
900
dequeue :: Monoid a => MVar [a ] -> IO a
855
901
dequeue q = modifyMVar q $ pure . swap . fromMaybe (mempty , [] ) . List. uncons
856
-
902
+
857
903
-------------------------------------------------------------------------------
858
904
-- measure batch latency
859
905
-------------------------------------------------------------------------------
@@ -965,7 +1011,6 @@ main = do
965
1011
putStrLn " To benchmark in release mode, pass:"
966
1012
putStrLn " --project-file=cabal.project.release"
967
1013
#endif
968
- getArgs >>= Fold. traverse_ print
969
1014
(gopts, cmd) <- O. customExecParser prefs cliP
970
1015
print gopts
971
1016
print cmd
0 commit comments