Skip to content

Commit 32c80d8

Browse files
authored
Build Set and Map more efficiently and consistently (#1042)
Use "Builder"s to implement some Set and Map construction functions. As a result, some have become good consumers in terms of list fusion, and all are now O(n) for non-decreasing input. Fusible Fusible O(n) for O(n) for before after before after Set.fromList No Yes Strict incr Non-decr Set.map - - Strict incr Non-decr Map.fromList No Yes Strict incr Non-decr Map.fromListWith Yes Yes Never Non-decr Map.fromListWithKey Yes Yes Never Non-decr Map.mapKeys - - Strict incr Non-decr Map.mapKeysWith - - Never Non-decr
1 parent dcafd78 commit 32c80d8

File tree

7 files changed

+229
-162
lines changed

7 files changed

+229
-162
lines changed

containers-tests/benchmarks/Map.hs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,24 @@ main = do
8888
, bench "intersection" $ whnf (M.intersection m) m_even
8989
, bench "split" $ whnf (M.split (bound `div` 2)) m
9090
, bench "fromList" $ whnf M.fromList elems
91-
, bench "fromList-desc" $ whnf M.fromList elems_desc
91+
, bench "fromList-distinctAsc" $ whnf M.fromList elems_distinct_asc
92+
, bench "fromList-distinctAsc:fusion" $
93+
whnf (\n -> M.fromList [(i,i) | i <- [1..n]]) bound
94+
, bench "fromList-distinctDesc" $ whnf M.fromList elems_distinct_desc
95+
, bench "fromList-distinctDesc:fusion" $
96+
whnf (\n -> M.fromList [(i,i) | i <- [n,n-1..1]]) bound
97+
, bench "fromListWith-asc" $ whnf (M.fromListWith (+)) elems_asc
98+
, bench "fromListWith-asc:fusion" $
99+
whnf (\n -> M.fromListWith (+) [(i `div` 2, i) | i <- [1..n]]) bound
100+
, bench "fromListWith-desc" $ whnf (M.fromListWith (+)) elems_desc
101+
, bench "fromListWith-desc:fusion" $
102+
whnf (\n -> M.fromListWith (+) [(i `div` 2, i) | i <- [n,n-1..1]]) bound
103+
, bench "fromListWithKey-asc" $ whnf (M.fromListWithKey sumkv) elems_asc
104+
, bench "fromListWithKey-asc:fusion" $
105+
whnf (\n -> M.fromListWithKey sumkv [(i `div` 2, i) | i <- [1..n]]) bound
106+
, bench "fromListWithKey-desc" $ whnf (M.fromListWithKey sumkv) elems_desc
107+
, bench "fromListWithKey-desc:fusion" $
108+
whnf (\n -> M.fromListWithKey sumkv [(i `div` 2, i) | i <- [n,n-1..1]]) bound
92109
, bench "fromAscList" $ whnf M.fromAscList elems_asc
93110
, bench "fromAscList:fusion" $
94111
whnf (\n -> M.fromAscList [(i `div` 2, i) | i <- [1..n]]) bound
@@ -113,6 +130,10 @@ main = do
113130
, bgroup "folds" $ foldBenchmarks M.foldr M.foldl M.foldr' M.foldl' foldMap m
114131
, bgroup "folds with key" $
115132
foldWithKeyBenchmarks M.foldrWithKey M.foldlWithKey M.foldrWithKey' M.foldlWithKey' M.foldMapWithKey m
133+
, bench "mapKeys:asc" $ whnf (M.mapKeys (+1)) m
134+
, bench "mapKeys:desc" $ whnf (M.mapKeys (negate . (+1))) m
135+
, bench "mapKeysWith:asc" $ whnf (M.mapKeysWith (+) (`div` 2)) m
136+
, bench "mapKeysWith:desc" $ whnf (M.mapKeysWith (+) (negate . (`div` 2))) m
116137
]
117138
where
118139
bound = 2^12

containers-tests/benchmarks/Set.hs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ main = do
2222
defaultMain
2323
[ bench "member" $ whnf (member elems) s
2424
, bench "insert" $ whnf (ins elems) S.empty
25-
, bench "map" $ whnf (S.map (+ 1)) s
25+
, bench "map:asc" $ whnf (S.map (+ 1)) s
26+
, bench "map:desc" $ whnf (S.map (negate . (+ 1))) s
2627
, bench "filter" $ whnf (S.filter ((== 0) . (`mod` 2))) s
2728
, bench "partition" $ whnf (S.partition ((== 0) . (`mod` 2))) s
2829
, bench "delete" $ whnf (del elems) s
@@ -35,7 +36,10 @@ main = do
3536
, bench "difference" $ whnf (S.difference s) s_even
3637
, bench "intersection" $ whnf (S.intersection s) s_even
3738
, bench "fromList" $ whnf S.fromList elems
38-
, bench "fromList-desc" $ whnf S.fromList elems_desc
39+
, bench "fromList-distinctAsc" $ whnf S.fromList elems_distinct_asc
40+
, bench "fromList-distinctAsc:fusion" $ whnf (\n -> S.fromList [1..n]) bound
41+
, bench "fromList-distinctDesc" $ whnf S.fromList elems_distinct_desc
42+
, bench "fromList-distinctDesc:fusion" $ whnf (\n -> S.fromList [n,n-1..1]) bound
3943
, bench "fromAscList" $ whnf S.fromAscList elems_asc
4044
, bench "fromAscList:fusion" $
4145
whnf (\n -> S.fromAscList [i `div` 2 | i <- [1..n]]) bound

containers-tests/tests/map-properties.hs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ main = defaultMain $ testGroup "map-properties"
201201
, testProperty "toDescList" prop_descList
202202
, testProperty "toAscList+toDescList" prop_ascDescList
203203
, testProperty "fromList" prop_fromList
204+
, testProperty "fromListWith" prop_fromListWith
205+
, testProperty "fromListWithKey" prop_fromListWithKey
204206
, testProperty "alter" prop_alter
205207
, testProperty "alterF/alter" prop_alterF_alter
206208
, testProperty "alterF/alter/noRULES" prop_alterF_alter_noRULES
@@ -229,7 +231,8 @@ main = defaultMain $ testGroup "map-properties"
229231
, testProperty "partition" prop_partition
230232
, testProperty "map" prop_map
231233
, testProperty "fmap" prop_fmap
232-
, testProperty "mapkeys" prop_mapkeys
234+
, testProperty "mapKeys" prop_mapKeys
235+
, testProperty "mapKeysWith" prop_mapKeysWith
233236
, testProperty "split" prop_splitModel
234237
, testProperty "fold" prop_fold
235238
, testProperty "foldMap" prop_foldMap
@@ -1338,6 +1341,16 @@ prop_fromDistinctAscList kxs =
13381341
List.sortBy (comparing fst) kxs
13391342
t = fromDistinctAscList nubSortedKxs
13401343

1344+
prop_fromListWith :: Fun (A, A) A -> [(Int, A)] -> Property
1345+
prop_fromListWith f kxs =
1346+
fromListWith (applyFun2 f) kxs ===
1347+
List.foldl' (\m (kx, x) -> insertWith (applyFun2 f) kx x m) empty kxs
1348+
1349+
prop_fromListWithKey :: Fun (Int, A, A) A -> [(Int, A)] -> Property
1350+
prop_fromListWithKey f kxs =
1351+
fromListWithKey (applyFun3 f) kxs ===
1352+
List.foldl' (\m (kx, x) -> insertWithKey (applyFun3 f) kx x m) empty kxs
1353+
13411354
----------------------------------------------------------------
13421355

13431356
prop_alter :: UMap -> Int -> Bool
@@ -1543,11 +1556,15 @@ prop_fmap f ys = length ys > 0 ==>
15431556
m = fromList xs
15441557
in fmap (apply f) m == fromList [ (a, (apply f) b) | (a,b) <- xs ]
15451558

1546-
prop_mapkeys :: Fun Int Int -> [(Int, Int)] -> Property
1547-
prop_mapkeys f ys = length ys > 0 ==>
1548-
let xs = List.nubBy ((==) `on` fst) ys
1549-
m = fromList xs
1550-
in mapKeys (apply f) m == (fromList $ List.nubBy ((==) `on` fst) $ reverse [ (apply f a, b) | (a,b) <- sort xs])
1559+
prop_mapKeys :: Fun Int Int -> Map Int A -> Property
1560+
prop_mapKeys f m =
1561+
mapKeys (applyFun f) m ===
1562+
fromList (fmap (\(kx,x) -> (applyFun f kx, x)) (toList m))
1563+
1564+
prop_mapKeysWith :: Fun (A, A) A -> Fun Int Int -> Map Int A -> Property
1565+
prop_mapKeysWith f g m =
1566+
mapKeysWith (applyFun2 f) (applyFun g) m ===
1567+
fromListWith (applyFun2 f) (fmap (\(kx,x) -> (applyFun g kx, x)) (toList m))
15511568

15521569
prop_splitModel :: Int -> [(Int, Int)] -> Property
15531570
prop_splitModel n ys = length ys > 0 ==>

containers/changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@
6363
* Improved performance for `Data.Set` and `Data.Map`'s `fromAscList*` and
6464
`fromDescList*` functions.
6565

66+
* Improved performance for `Data.Set`'s `fromList`, `map` and `Data.Map`'s
67+
`fromList`, `fromListWith`, `fromListWithKey`, `mapKeys`, `mapKeysWith`.
68+
6669
## Unreleased with `@since` annotation for 0.7.1:
6770

6871
### Additions

containers/src/Data/Map/Internal.hs

Lines changed: 75 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,10 @@ module Data.Map.Internal (
367367
, Identity(..)
368368
, Stack(..)
369369
, foldl'Stack
370+
, MapBuilder(..)
371+
, emptyB
372+
, insertB
373+
, finishB
370374

371375
-- Used by Map.Merge.Lazy
372376
, mapWhenMissing
@@ -388,7 +392,6 @@ import Data.Semigroup (Semigroup((<>)))
388392
#endif
389393
import Control.Applicative (Const (..))
390394
import Control.DeepSeq (NFData(rnf),NFData1(liftRnf),NFData2(liftRnf2))
391-
import Data.Bits (shiftL, shiftR)
392395
import qualified Data.Foldable as Foldable
393396
import Data.Bifoldable
394397
import Utils.Containers.Internal.Prelude hiding
@@ -3247,6 +3250,8 @@ mapAccumRWithKey f a (Bin sx kx x l r) =
32473250
-- | \(O(n \log n)\).
32483251
-- @'mapKeys' f s@ is the map obtained by applying @f@ to each key of @s@.
32493252
--
3253+
-- If `f` is monotonically non-decreasing, this function takes \(O(n)\) time.
3254+
--
32503255
-- The size of the result may be smaller if @f@ maps two or more distinct
32513256
-- keys to the same new key. In this case the value at the greatest of the
32523257
-- original keys is retained.
@@ -3256,14 +3261,16 @@ mapAccumRWithKey f a (Bin sx kx x l r) =
32563261
-- > mapKeys (\ _ -> 3) (fromList [(1,"b"), (2,"a"), (3,"d"), (4,"c")]) == singleton 3 "c"
32573262

32583263
mapKeys :: Ord k2 => (k1->k2) -> Map k1 a -> Map k2 a
3259-
mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []
3264+
mapKeys f m = finishB (foldlWithKey' (\b kx x -> insertB (f kx) x b) emptyB m)
32603265
#if __GLASGOW_HASKELL__
32613266
{-# INLINABLE mapKeys #-}
32623267
#endif
32633268

32643269
-- | \(O(n \log n)\).
32653270
-- @'mapKeysWith' c f s@ is the map obtained by applying @f@ to each key of @s@.
32663271
--
3272+
-- If `f` is monotonically non-decreasing, this function takes \(O(n)\) time.
3273+
--
32673274
-- The size of the result may be smaller if @f@ maps two or more distinct
32683275
-- keys to the same new key. In this case the associated values will be
32693276
-- combined using @c@. The value at the greater of the two original keys
@@ -3275,7 +3282,8 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []
32753282
-- Also see the performance note on 'fromListWith'.
32763283

32773284
mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a
3278-
mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) []
3285+
mapKeysWith c f m =
3286+
finishB (foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB m)
32793287
#if __GLASGOW_HASKELL__
32803288
{-# INLINABLE mapKeysWith #-}
32813289
#endif
@@ -3520,55 +3528,20 @@ instance (Ord k) => GHCExts.IsList (Map k v) where
35203528
-- If the list contains more than one value for the same key, the last value
35213529
-- for the key is retained.
35223530
--
3523-
-- If the keys of the list are ordered, a linear-time implementation is used.
3531+
-- If the keys are in non-decreasing order, this function takes \(O(n)\) time.
35243532
--
35253533
-- > fromList [] == empty
35263534
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
35273535
-- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")]
35283536

3529-
-- For some reason, when 'singleton' is used in fromList or in
3530-
-- create, it is not inlined, so we inline it manually.
35313537
fromList :: Ord k => [(k,a)] -> Map k a
3532-
fromList [] = Tip
3533-
fromList [(kx, x)] = Bin 1 kx x Tip Tip
3534-
fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip Tip) xs0
3535-
| otherwise = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
3536-
where
3537-
not_ordered _ [] = False
3538-
not_ordered kx ((ky,_) : _) = kx >= ky
3539-
{-# INLINE not_ordered #-}
3540-
3541-
fromList' t0 xs = Foldable.foldl' ins t0 xs
3542-
where ins t (k,x) = insert k x t
3543-
3544-
go !_ t [] = t
3545-
go _ t [(kx, x)] = insertMax kx x t
3546-
go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs
3547-
| otherwise = case create s xss of
3548-
(r, ys, []) -> go (s `shiftL` 1) (link kx x l r) ys
3549-
(r, _, ys) -> fromList' (link kx x l r) ys
3550-
3551-
-- The create is returning a triple (tree, xs, ys). Both xs and ys
3552-
-- represent not yet processed elements and only one of them can be nonempty.
3553-
-- If ys is nonempty, the keys in ys are not ordered with respect to tree
3554-
-- and must be inserted using fromList'. Otherwise the keys have been
3555-
-- ordered so far.
3556-
create !_ [] = (Tip, [], [])
3557-
create s xs@(xp : xss)
3558-
| s == 1 = case xp of (kx, x) | not_ordered kx xss -> (Bin 1 kx x Tip Tip, [], xss)
3559-
| otherwise -> (Bin 1 kx x Tip Tip, xss, [])
3560-
| otherwise = case create (s `shiftR` 1) xs of
3561-
res@(_, [], _) -> res
3562-
(l, [(ky, y)], zs) -> (insertMax ky y l, [], zs)
3563-
(l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys)
3564-
| otherwise -> case create (s `shiftR` 1) yss of
3565-
(r, zs, ws) -> (link ky y l r, zs, ws)
3566-
#if __GLASGOW_HASKELL__
3567-
{-# INLINABLE fromList #-}
3568-
#endif
3538+
fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs)
3539+
{-# INLINE fromList #-} -- INLINE for fusion
35693540

35703541
-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'.
35713542
--
3543+
-- If the keys are in non-decreasing order, this function takes \(O(n)\) time.
3544+
--
35723545
-- > fromListWith (++) [(5,"a"), (5,"b"), (3,"x"), (5,"c")] == fromList [(3, "x"), (5, "cba")]
35733546
-- > fromListWith (++) [] == empty
35743547
--
@@ -3604,28 +3577,24 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip T
36043577
-- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples
36053578

36063579
fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
3607-
fromListWith f xs
3608-
= fromListWithKey (\_ x y -> f x y) xs
3609-
#if __GLASGOW_HASKELL__
3610-
{-# INLINABLE fromListWith #-}
3611-
#endif
3580+
fromListWith f xs =
3581+
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs)
3582+
{-# INLINE fromListWith #-} -- INLINE for fusion
36123583

36133584
-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'.
36143585
--
3586+
-- If the keys are in non-decreasing order, this function takes \(O(n)\) time.
3587+
--
36153588
-- > let f key new_value old_value = show key ++ ":" ++ new_value ++ "|" ++ old_value
36163589
-- > fromListWithKey f [(5,"a"), (5,"b"), (3,"b"), (3,"a"), (5,"c")] == fromList [(3, "3:a|b"), (5, "5:c|5:b|a")]
36173590
-- > fromListWithKey f [] == empty
36183591
--
36193592
-- Also see the performance note on 'fromListWith'.
36203593

36213594
fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a
3622-
fromListWithKey f xs
3623-
= Foldable.foldl' ins empty xs
3624-
where
3625-
ins t (k,x) = insertWithKey f k x t
3626-
#if __GLASGOW_HASKELL__
3627-
{-# INLINABLE fromListWithKey #-}
3628-
#endif
3595+
fromListWithKey f xs =
3596+
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs)
3597+
{-# INLINE fromListWithKey #-} -- INLINE for fusion
36293598

36303599
-- | \(O(n)\). Convert the map to a list of key\/value pairs. Subject to list fusion.
36313600
--
@@ -3971,6 +3940,57 @@ splitMember k0 m = case go k0 m of
39713940

39723941
data StrictTriple a b c = StrictTriple !a !b !c
39733942

3943+
{--------------------------------------------------------------------
3944+
MapBuilder
3945+
--------------------------------------------------------------------}
3946+
3947+
-- See Note [SetBuilder] in Data.Set.Internal
3948+
3949+
data MapBuilder k a
3950+
= BAsc !(Stack k a)
3951+
| BMap !(Map k a)
3952+
3953+
-- Empty builder.
3954+
emptyB :: MapBuilder k a
3955+
emptyB = BAsc Nada
3956+
3957+
-- Insert a key and value. Replaces the old value if one already exists for
3958+
-- the key.
3959+
insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a
3960+
insertB !ky y b = case b of
3961+
BAsc stk -> case stk of
3962+
Push kx x l stk' -> case compare ky kx of
3963+
LT -> BMap (insert ky y (ascLinkAll stk))
3964+
EQ -> BAsc (Push ky y l stk')
3965+
GT -> case l of
3966+
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
3967+
Bin{} -> BAsc (Push ky y Tip stk)
3968+
Nada -> BAsc (Push ky y Tip Nada)
3969+
BMap m -> BMap (insert ky y m)
3970+
{-# INLINE insertB #-}
3971+
3972+
-- Insert a key and value. The new value is combined with the old value if one
3973+
-- already exists for the key.
3974+
insertWithB
3975+
:: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a
3976+
insertWithB f !ky y b = case b of
3977+
BAsc stk -> case stk of
3978+
Push kx x l stk' -> case compare ky kx of
3979+
LT -> BMap (insertWith f ky y (ascLinkAll stk))
3980+
EQ -> BAsc (Push ky (f y x) l stk')
3981+
GT -> case l of
3982+
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
3983+
Bin{} -> BAsc (Push ky y Tip stk)
3984+
Nada -> BAsc (Push ky y Tip Nada)
3985+
BMap m -> BMap (insertWith f ky y m)
3986+
{-# INLINE insertWithB #-}
3987+
3988+
-- Finalize the builder into a Map.
3989+
finishB :: MapBuilder k a -> Map k a
3990+
finishB (BAsc stk) = ascLinkAll stk
3991+
finishB (BMap m) = m
3992+
{-# INLINABLE finishB #-}
3993+
39743994
{--------------------------------------------------------------------
39753995
Utility functions that maintain the balance properties of the tree.
39763996
All constructors assume that all values in [l] < [k] and all values

0 commit comments

Comments
 (0)