Skip to content

Commit 77c8e5f

Browse files
authored
Merge pull request #658 from int-e/intmap-fromasclist
Improve `fromAscList` and friends for `IntMap` and `IntSet`, making them somewhat faster and much easier to understand.
2 parents 14c4611 + 48c1ca0 commit 77c8e5f

File tree

3 files changed

+218
-106
lines changed

3 files changed

+218
-106
lines changed

containers/src/Data/IntMap/Internal.hs

Lines changed: 77 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ module Data.IntMap.Internal (
266266
, natFromInt
267267
, intFromNat
268268
, link
269+
, linkWithMask
269270
, bin
270271
, binCheckLeft
271272
, binCheckRight
@@ -3111,8 +3112,8 @@ fromListWithKey f xs
31113112
-- > fromAscList [(3,"b"), (5,"a"), (5,"b")] == fromList [(3, "b"), (5, "b")]
31123113

31133114
fromAscList :: [(Key,a)] -> IntMap a
3114-
fromAscList xs
3115-
= fromAscListWithKey (\_ x _ -> x) xs
3115+
fromAscList = fromMonoListWithKey Nondistinct (\_ x _ -> x)
3116+
{-# NOINLINE fromAscList #-}
31163117

31173118
-- | /O(n)/. Build a map from a list of key\/value pairs where
31183119
-- the keys are in ascending order, with a combining function on equal keys.
@@ -3121,8 +3122,8 @@ fromAscList xs
31213122
-- > fromAscListWith (++) [(3,"b"), (5,"a"), (5,"b")] == fromList [(3, "b"), (5, "ba")]
31223123

31233124
fromAscListWith :: (a -> a -> a) -> [(Key,a)] -> IntMap a
3124-
fromAscListWith f xs
3125-
= fromAscListWithKey (\_ x y -> f x y) xs
3125+
fromAscListWith f = fromMonoListWithKey Nondistinct (\_ x y -> f x y)
3126+
{-# NOINLINE fromAscListWith #-}
31263127

31273128
-- | /O(n)/. Build a map from a list of key\/value pairs where
31283129
-- the keys are in ascending order, with a combining function on equal keys.
@@ -3132,50 +3133,80 @@ fromAscListWith f xs
31323133
-- > fromAscListWithKey f [(3,"b"), (5,"a"), (5,"b")] == fromList [(3, "b"), (5, "5:b|a")]
31333134

31343135
fromAscListWithKey :: (Key -> a -> a -> a) -> [(Key,a)] -> IntMap a
3135-
fromAscListWithKey _ [] = Nil
3136-
fromAscListWithKey f (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
3137-
where
3138-
-- [combineEq f xs] combines equal elements with function [f] in an ordered list [xs]
3139-
combineEq z [] = [z]
3140-
combineEq z@(kz,zz) (x@(kx,xx):xs)
3141-
| kx==kz = let yy = f kx xx zz in combineEq (kx,yy) xs
3142-
| otherwise = z:combineEq x xs
3136+
fromAscListWithKey f = fromMonoListWithKey Nondistinct f
3137+
{-# NOINLINE fromAscListWithKey #-}
31433138

31443139
-- | /O(n)/. Build a map from a list of key\/value pairs where
31453140
-- the keys are in ascending order and all distinct.
31463141
-- /The precondition (input list is strictly ascending) is not checked./
31473142
--
31483143
-- > fromDistinctAscList [(3,"b"), (5,"a")] == fromList [(3, "b"), (5, "a")]
31493144

3150-
#if __GLASGOW_HASKELL__
3151-
fromDistinctAscList :: forall a. [(Key,a)] -> IntMap a
3152-
#else
3153-
fromDistinctAscList :: [(Key,a)] -> IntMap a
3154-
#endif
3155-
fromDistinctAscList [] = Nil
3156-
fromDistinctAscList (z0 : zs0) = work z0 zs0 Nada
3157-
where
3158-
work (kx,vx) [] stk = finish kx (Tip kx vx) stk
3159-
work (kx,vx) (z@(kz,_):zs) stk = reduce z zs (branchMask kx kz) kx (Tip kx vx) stk
3160-
3161-
#if __GLASGOW_HASKELL__
3162-
reduce :: (Key,a) -> [(Key,a)] -> Mask -> Prefix -> IntMap a -> Stack a -> IntMap a
3163-
#endif
3164-
reduce z zs _ px tx Nada = work z zs (Push px tx Nada)
3165-
reduce z zs m px tx stk@(Push py ty stk') =
3166-
let mxy = branchMask px py
3167-
pxy = mask px mxy
3168-
in if shorter m mxy
3169-
then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
3170-
else work z zs (Push px tx stk)
3171-
3172-
finish _ t Nada = t
3173-
finish px tx (Push py ty stk) = finish p (link py ty px tx) stk
3174-
where m = branchMask px py
3175-
p = mask px m
3145+
fromDistinctAscList :: [(Key,a)] -> IntMap a
3146+
fromDistinctAscList = fromMonoListWithKey Distinct (\_ x _ -> x)
3147+
{-# NOINLINE fromDistinctAscList #-}
31763148

3177-
data Stack a = Push {-# UNPACK #-} !Prefix !(IntMap a) !(Stack a) | Nada
3149+
-- | /O(n)/. Build a map from a list of key\/value pairs with monotonic keys
3150+
-- and a combining function.
3151+
--
3152+
-- The precise conditions under which this function works are subtle:
3153+
-- For any branch mask, keys with the same prefix w.r.t. the branch
3154+
-- mask must occur consecutively in the list.
31783155

3156+
fromMonoListWithKey :: Distinct -> (Key -> a -> a -> a) -> [(Key,a)] -> IntMap a
3157+
fromMonoListWithKey distinct f = go
3158+
where
3159+
go [] = Nil
3160+
go ((kx,vx) : zs1) = addAll' kx vx zs1
3161+
3162+
-- `addAll'` collects all keys equal to `kx` into a single value,
3163+
-- and then proceeds with `addAll`.
3164+
addAll' !kx vx []
3165+
= Tip kx vx
3166+
addAll' !kx vx ((ky,vy) : zs)
3167+
| Nondistinct <- distinct, kx == ky
3168+
= let v = f kx vy vx in addAll' ky v zs
3169+
-- inlined: | otherwise = addAll kx (Tip kx vx) (ky : zs)
3170+
| m <- branchMask kx ky
3171+
, Inserted ty zs' <- addMany' m ky vy zs
3172+
= addAll kx (linkWithMask m ky ty {-kx-} (Tip kx vx)) zs'
3173+
3174+
-- for `addAll` and `addMany`, kx is /a/ key inside the tree `tx`
3175+
-- `addAll` consumes the rest of the list, adding to the tree `tx`
3176+
addAll !kx !tx []
3177+
= tx
3178+
addAll !kx !tx ((ky,vy) : zs)
3179+
| m <- branchMask kx ky
3180+
, Inserted ty zs' <- addMany' m ky vy zs
3181+
= addAll kx (linkWithMask m ky ty {-kx-} tx) zs'
3182+
3183+
-- `addMany'` is similar to `addAll'`, but proceeds with `addMany'`.
3184+
addMany' !m !kx vx []
3185+
= Inserted (Tip kx vx) []
3186+
addMany' !m !kx vx zs0@((ky,vy) : zs)
3187+
| Nondistinct <- distinct, kx == ky
3188+
= let v = f kx vy vx in addMany' m ky v zs
3189+
-- inlined: | otherwise = addMany m kx (Tip kx vx) (ky : zs)
3190+
| mask kx m /= mask ky m
3191+
= Inserted (Tip kx vx) zs0
3192+
| mxy <- branchMask kx ky
3193+
, Inserted ty zs' <- addMany' mxy ky vy zs
3194+
= addMany m kx (linkWithMask mxy ky ty {-kx-} (Tip kx vx)) zs'
3195+
3196+
-- `addAll` adds to `tx` all keys whose prefix w.r.t. `m` agrees with `kx`.
3197+
addMany !m !kx tx []
3198+
= Inserted tx []
3199+
addMany !m !kx tx zs0@((ky,vy) : zs)
3200+
| mask kx m /= mask ky m
3201+
= Inserted tx zs0
3202+
| mxy <- branchMask kx ky
3203+
, Inserted ty zs' <- addMany' mxy ky vy zs
3204+
= addMany m kx (linkWithMask mxy ky ty {-kx-} tx) zs'
3205+
{-# INLINE fromMonoListWithKey #-}
3206+
3207+
data Inserted a = Inserted !(IntMap a) ![(Key,a)]
3208+
3209+
data Distinct = Distinct | Nondistinct
31793210

31803211
{--------------------------------------------------------------------
31813212
Eq
@@ -3297,13 +3328,17 @@ INSTANCE_TYPEABLE1(IntMap)
32973328
Link
32983329
--------------------------------------------------------------------}
32993330
link :: Prefix -> IntMap a -> Prefix -> IntMap a -> IntMap a
3300-
link p1 t1 p2 t2
3331+
link p1 t1 p2 t2 = linkWithMask (branchMask p1 p2) p1 t1 {-p2-} t2
3332+
{-# INLINE link #-}
3333+
3334+
-- `linkWithMask` is useful when the `branchMask` has already been computed
3335+
linkWithMask :: Mask -> Prefix -> IntMap a -> IntMap a -> IntMap a
3336+
linkWithMask m p1 t1 {-p2-} t2
33013337
| zero p1 m = Bin p m t1 t2
33023338
| otherwise = Bin p m t2 t1
33033339
where
3304-
m = branchMask p1 p2
33053340
p = mask p1 m
3306-
{-# INLINE link #-}
3341+
{-# INLINE linkWithMask #-}
33073342

33083343
{--------------------------------------------------------------------
33093344
@bin@ assures that we never have empty trees within a tree.

containers/src/Data/IntMap/Strict/Internal.hs

Lines changed: 71 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ import Data.IntMap.Internal
270270
, binCheckLeft
271271
, binCheckRight
272272
, link
273+
, linkWithMask
273274

274275
, (\\)
275276
, (!)
@@ -1098,8 +1099,8 @@ fromListWithKey f xs
10981099
-- > fromAscList [(3,"b"), (5,"a"), (5,"b")] == fromList [(3, "b"), (5, "b")]
10991100

11001101
fromAscList :: [(Key,a)] -> IntMap a
1101-
fromAscList xs
1102-
= fromAscListWithKey (\_ x _ -> x) xs
1102+
fromAscList = fromMonoListWithKey Nondistinct (\_ x _ -> x)
1103+
{-# NOINLINE fromAscList #-}
11031104

11041105
-- | /O(n)/. Build a map from a list of key\/value pairs where
11051106
-- the keys are in ascending order, with a combining function on equal keys.
@@ -1108,8 +1109,8 @@ fromAscList xs
11081109
-- > fromAscListWith (++) [(3,"b"), (5,"a"), (5,"b")] == fromList [(3, "b"), (5, "ba")]
11091110

11101111
fromAscListWith :: (a -> a -> a) -> [(Key,a)] -> IntMap a
1111-
fromAscListWith f xs
1112-
= fromAscListWithKey (\_ x y -> f x y) xs
1112+
fromAscListWith f = fromMonoListWithKey Nondistinct (\_ x y -> f x y)
1113+
{-# NOINLINE fromAscListWith #-}
11131114

11141115
-- | /O(n)/. Build a map from a list of key\/value pairs where
11151116
-- the keys are in ascending order, with a combining function on equal keys.
@@ -1118,14 +1119,8 @@ fromAscListWith f xs
11181119
-- > fromAscListWith (++) [(3,"b"), (5,"a"), (5,"b")] == fromList [(3, "b"), (5, "ba")]
11191120

11201121
fromAscListWithKey :: (Key -> a -> a -> a) -> [(Key,a)] -> IntMap a
1121-
fromAscListWithKey _ [] = Nil
1122-
fromAscListWithKey f (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
1123-
where
1124-
-- [combineEq f xs] combines equal elements with function [f] in an ordered list [xs]
1125-
combineEq z [] = [z]
1126-
combineEq z@(kz,zz) (x@(kx,xx):xs)
1127-
| kx==kz = let !yy = f kx xx zz in combineEq (kx,yy) xs
1128-
| otherwise = z:combineEq x xs
1122+
fromAscListWithKey f = fromMonoListWithKey Nondistinct f
1123+
{-# NOINLINE fromAscListWithKey #-}
11291124

11301125
-- | /O(n)/. Build a map from a list of key\/value pairs where
11311126
-- the keys are in ascending order and all distinct.
@@ -1134,24 +1129,69 @@ fromAscListWithKey f (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
11341129
-- > fromDistinctAscList [(3,"b"), (5,"a")] == fromList [(3, "b"), (5, "a")]
11351130

11361131
fromDistinctAscList :: [(Key,a)] -> IntMap a
1137-
fromDistinctAscList [] = Nil
1138-
fromDistinctAscList (z0 : zs0) = work z0 zs0 Nada
1139-
where
1140-
work (kx,!vx) [] stk = finish kx (Tip kx vx) stk
1141-
work (kx,!vx) (z@(kz,_):zs) stk = reduce z zs (branchMask kx kz) kx (Tip kx vx) stk
1142-
1143-
reduce :: (Key,a) -> [(Key,a)] -> Mask -> Prefix -> IntMap a -> Stack a -> IntMap a
1144-
reduce z zs _ px tx Nada = work z zs (Push px tx Nada)
1145-
reduce z zs m px tx stk@(Push py ty stk') =
1146-
let mxy = branchMask px py
1147-
pxy = mask px mxy
1148-
in if shorter m mxy
1149-
then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
1150-
else work z zs (Push px tx stk)
1151-
1152-
finish _ t Nada = t
1153-
finish px tx (Push py ty stk) = finish p (link py ty px tx) stk
1154-
where m = branchMask px py
1155-
p = mask px m
1132+
fromDistinctAscList = fromMonoListWithKey Distinct (\_ x _ -> x)
1133+
{-# NOINLINE fromDistinctAscList #-}
11561134

11571135
data Stack a = Push {-# UNPACK #-} !Prefix !(IntMap a) !(Stack a) | Nada
1136+
1137+
-- | /O(n)/. Build a map from a list of key\/value pairs with monotonic keys
1138+
-- and a combining function.
1139+
--
1140+
-- The precise conditions under which this function works are subtle:
1141+
-- For any branch mask, keys with the same prefix w.r.t. the branch
1142+
-- mask must occur consecutively in the list.
1143+
1144+
fromMonoListWithKey :: Distinct -> (Key -> a -> a -> a) -> [(Key,a)] -> IntMap a
1145+
fromMonoListWithKey distinct f = go
1146+
where
1147+
go [] = Nil
1148+
go ((kx,vx) : zs1) = addAll' kx vx zs1
1149+
1150+
-- `addAll'` collects all keys equal to `kx` into a single value,
1151+
-- and then proceeds with `addAll`.
1152+
addAll' !kx vx []
1153+
= Tip kx $! vx
1154+
addAll' !kx vx ((ky,vy) : zs)
1155+
| Nondistinct <- distinct, kx == ky
1156+
= let !v = f kx vy vx in addAll' ky v zs
1157+
-- inlined: | otherwise = addAll kx (Tip kx $! vx) (ky : zs)
1158+
| m <- branchMask kx ky
1159+
, Inserted ty zs' <- addMany' m ky vy zs
1160+
= addAll kx (linkWithMask m ky ty {-kx-} (Tip kx $! vx)) zs'
1161+
1162+
-- for `addAll` and `addMany`, kx is /a/ key inside the tree `tx`
1163+
-- `addAll` consumes the rest of the list, adding to the tree `tx`
1164+
addAll !kx !tx []
1165+
= tx
1166+
addAll !kx !tx ((ky,vy) : zs)
1167+
| m <- branchMask kx ky
1168+
, Inserted ty zs' <- addMany' m ky vy zs
1169+
= addAll kx (linkWithMask m ky ty {-kx-} tx) zs'
1170+
1171+
-- `addMany'` is similar to `addAll'`, but proceeds with `addMany'`.
1172+
addMany' !m !kx vx []
1173+
= Inserted (Tip kx $! vx) []
1174+
addMany' !m !kx vx zs0@((ky,vy) : zs)
1175+
| Nondistinct <- distinct, kx == ky
1176+
= let !v = f kx vy vx in addMany' m ky v zs
1177+
-- inlined: | otherwise = addMany m kx (Tip kx $! vx) (ky : zs)
1178+
| mask kx m /= mask ky m
1179+
= Inserted (Tip kx $! vx) zs0
1180+
| mxy <- branchMask kx ky
1181+
, Inserted ty zs' <- addMany' mxy ky vy zs
1182+
= addMany m kx (linkWithMask mxy ky ty {-kx-} (Tip kx $! vx)) zs'
1183+
1184+
-- `addAll` adds to `tx` all keys whose prefix w.r.t. `m` agrees with `kx`.
1185+
addMany !m !kx tx []
1186+
= Inserted tx []
1187+
addMany !m !kx tx zs0@((ky,vy) : zs)
1188+
| mask kx m /= mask ky m
1189+
= Inserted tx zs0
1190+
| mxy <- branchMask kx ky
1191+
, Inserted ty zs' <- addMany' mxy ky vy zs
1192+
= addMany m kx (linkWithMask mxy ky ty {-kx-} tx) zs'
1193+
{-# INLINE fromMonoListWithKey #-}
1194+
1195+
data Inserted a = Inserted !(IntMap a) ![(Key,a)]
1196+
1197+
data Distinct = Distinct | Nondistinct

0 commit comments

Comments
 (0)