@@ -290,6 +290,10 @@ module Data.IntMap.Internal (
290
290
, bin
291
291
, binCheckLeft
292
292
, binCheckRight
293
+ , MonoState (.. )
294
+ , Stack (.. )
295
+ , ascLinkTop
296
+ , ascLinkAll
293
297
294
298
-- * Used by "IntMap.Merge.Lazy" and "IntMap.Merge.Strict"
295
299
, mapWhenMissing
@@ -3385,8 +3389,8 @@ fromListWithKey f xs
3385
3389
-- > fromAscList [(3,"b"), (5,"a"), (5,"b")] == fromList [(3, "b"), (5, "b")]
3386
3390
3387
3391
fromAscList :: [(Key ,a )] -> IntMap a
3388
- fromAscList = fromMonoListWithKey Nondistinct (\ _ x _ -> x)
3389
- {-# NOINLINE fromAscList #-}
3392
+ fromAscList xs = fromAscListWithKey (\ _ x _ -> x) xs
3393
+ {-# INLINE fromAscList #-} -- Inline for list fusion
3390
3394
3391
3395
-- | \(O(n)\). Build a map from a list of key\/value pairs where
3392
3396
-- the keys are in ascending order, with a combining function on equal keys.
@@ -3400,8 +3404,8 @@ fromAscList = fromMonoListWithKey Nondistinct (\_ x _ -> x)
3400
3404
-- Also see the performance note on 'fromListWith'.
3401
3405
3402
3406
fromAscListWith :: (a -> a -> a ) -> [(Key ,a )] -> IntMap a
3403
- fromAscListWith f = fromMonoListWithKey Nondistinct (\ _ x y -> f x y)
3404
- {-# NOINLINE fromAscListWith #-}
3407
+ fromAscListWith f xs = fromAscListWithKey (\ _ x y -> f x y) xs
3408
+ {-# INLINE fromAscListWith #-} -- Inline for list fusion
3405
3409
3406
3410
-- | \(O(n)\). Build a map from a list of key\/value pairs where
3407
3411
-- the keys are in ascending order, with a combining function on equal keys.
@@ -3415,86 +3419,66 @@ fromAscListWith f = fromMonoListWithKey Nondistinct (\_ x y -> f x y)
3415
3419
--
3416
3420
-- Also see the performance note on 'fromListWith'.
3417
3421
3422
+ -- See Note [fromAscList implementation]
3418
3423
fromAscListWithKey :: (Key -> a -> a -> a ) -> [(Key ,a )] -> IntMap a
3419
- fromAscListWithKey f = fromMonoListWithKey Nondistinct f
3420
- {-# NOINLINE fromAscListWithKey #-}
3424
+ fromAscListWithKey f xs = ascLinkAll (Foldable. foldl' next MSNada xs)
3425
+ where
3426
+ next s (! ky, y) = case s of
3427
+ MSNada -> MSPush ky y Nada
3428
+ MSPush kx x stk
3429
+ | kx == ky -> MSPush ky (f ky y x) stk
3430
+ | otherwise -> let m = branchMask kx ky
3431
+ in MSPush ky y (ascLinkTop stk kx (Tip kx x) m)
3432
+ {-# INLINE fromAscListWithKey #-} -- Inline for list fusion
3421
3433
3422
3434
-- | \(O(n)\). Build a map from a list of key\/value pairs where
3423
3435
-- the keys are in ascending order and all distinct.
3424
3436
--
3425
- -- __Warning__: This function should be used only if the keys are in
3426
- -- strictly increasing order. This precondition is not checked. Use 'fromList'
3427
- -- if the precondition may not hold.
3428
- --
3429
- -- > fromDistinctAscList [(3,"b"), (5,"a")] == fromList [(3, "b"), (5, "a")]
3430
-
3431
- fromDistinctAscList :: [(Key ,a )] -> IntMap a
3432
- fromDistinctAscList = fromMonoListWithKey Distinct (\ _ x _ -> x)
3433
- {-# NOINLINE fromDistinctAscList #-}
3434
-
3435
- -- | \(O(n)\). Build a map from a list of key\/value pairs with monotonic keys
3436
- -- and a combining function.
3437
+ -- @fromDistinctAscList = 'fromAscList'@
3437
3438
--
3438
- -- The precise conditions under which this function works are subtle:
3439
- -- For any branch mask, keys with the same prefix w.r.t. the branch
3440
- -- mask must occur consecutively in the list.
3439
+ -- See warning on 'fromAscList'.
3441
3440
--
3442
- -- Also see the performance note on 'fromListWith'.
3443
-
3444
- fromMonoListWithKey :: Distinct -> (Key -> a -> a -> a ) -> [(Key ,a )] -> IntMap a
3445
- fromMonoListWithKey distinct f = go
3446
- where
3447
- go [] = Nil
3448
- go ((kx,vx) : zs1) = addAll' kx vx zs1
3449
-
3450
- -- `addAll'` collects all keys equal to `kx` into a single value,
3451
- -- and then proceeds with `addAll`.
3452
- addAll' ! kx vx []
3453
- = Tip kx vx
3454
- addAll' ! kx vx ((ky,vy) : zs)
3455
- | Nondistinct <- distinct, kx == ky
3456
- = let v = f kx vy vx in addAll' ky v zs
3457
- -- inlined: | otherwise = addAll kx (Tip kx vx) (ky : zs)
3458
- | m <- branchMask kx ky
3459
- , Inserted ty zs' <- addMany' m ky vy zs
3460
- = addAll kx (linkWithMask m ky ty kx (Tip kx vx)) zs'
3461
-
3462
- -- for `addAll` and `addMany`, kx is /a/ key inside the tree `tx`
3463
- -- `addAll` consumes the rest of the list, adding to the tree `tx`
3464
- addAll ! _kx ! tx []
3465
- = tx
3466
- addAll ! kx ! tx ((ky,vy) : zs)
3467
- | m <- branchMask kx ky
3468
- , Inserted ty zs' <- addMany' m ky vy zs
3469
- = addAll kx (linkWithMask m ky ty kx tx) zs'
3470
-
3471
- -- `addMany'` is similar to `addAll'`, but proceeds with `addMany'`.
3472
- addMany' ! _m ! kx vx []
3473
- = Inserted (Tip kx vx) []
3474
- addMany' ! m ! kx vx zs0@ ((ky,vy) : zs)
3475
- | Nondistinct <- distinct, kx == ky
3476
- = let v = f kx vy vx in addMany' m ky v zs
3477
- -- inlined: | otherwise = addMany m kx (Tip kx vx) (ky : zs)
3478
- | mask kx m /= mask ky m
3479
- = Inserted (Tip kx vx) zs0
3480
- | mxy <- branchMask kx ky
3481
- , Inserted ty zs' <- addMany' mxy ky vy zs
3482
- = addMany m kx (linkWithMask mxy ky ty kx (Tip kx vx)) zs'
3483
-
3484
- -- `addAll` adds to `tx` all keys whose prefix w.r.t. `m` agrees with `kx`.
3485
- addMany ! _m ! _kx tx []
3486
- = Inserted tx []
3487
- addMany ! m ! kx tx zs0@ ((ky,vy) : zs)
3488
- | mask kx m /= mask ky m
3489
- = Inserted tx zs0
3490
- | mxy <- branchMask kx ky
3491
- , Inserted ty zs' <- addMany' mxy ky vy zs
3492
- = addMany m kx (linkWithMask mxy ky ty kx tx) zs'
3493
- {-# INLINE fromMonoListWithKey #-}
3494
-
3495
- data Inserted a = Inserted ! (IntMap a ) ! [(Key ,a )]
3496
-
3497
- data Distinct = Distinct | Nondistinct
3441
+ -- This definition exists for backwards compatibility. It offers no advantage
3442
+ -- over @fromAscList@.
3443
+ fromDistinctAscList :: [(Key ,a )] -> IntMap a
3444
+ -- Note: There is nothing we can optimize compared to fromAscList.
3445
+ -- The adjacent key equals check (kx == ky) might seem unnecessary for
3446
+ -- fromDistinctAscList, but it guards branchMask which has undefined behavior
3447
+ -- under that case. We could error on kx == ky instead, but that isn't any
3448
+ -- better.
3449
+ fromDistinctAscList = fromAscList
3450
+ {-# INLINE fromDistinctAscList #-} -- Inline for list fusion
3451
+
3452
+ data Stack a
3453
+ = Nada
3454
+ | Push {- # UNPACK #-} !Int ! (IntMap a ) ! (Stack a )
3455
+
3456
+ data MonoState a
3457
+ = MSNada
3458
+ | MSPush {- # UNPACK #-} !Key a ! (Stack a )
3459
+
3460
+ ascLinkTop :: Stack a -> Int -> IntMap a -> Int -> Stack a
3461
+ ascLinkTop stk ! rk r ! rm = case stk of
3462
+ Nada -> Push rm r stk
3463
+ Push m l stk'
3464
+ | i2w m < i2w rm -> let p = Prefix (mask rk m .|. m)
3465
+ in ascLinkTop stk' rk (Bin p l r) rm
3466
+ | otherwise -> Push rm r stk
3467
+
3468
+ ascLinkAll :: MonoState a -> IntMap a
3469
+ ascLinkAll s = case s of
3470
+ MSNada -> Nil
3471
+ MSPush kx x stk -> ascLinkStack stk kx (Tip kx x)
3472
+ {-# INLINABLE ascLinkAll #-}
3473
+
3474
+ ascLinkStack :: Stack a -> Int -> IntMap a -> IntMap a
3475
+ ascLinkStack stk ! rk r = case stk of
3476
+ Nada -> r
3477
+ Push m l stk'
3478
+ | signBranch p -> Bin p r l
3479
+ | otherwise -> ascLinkStack stk' rk (Bin p l r)
3480
+ where
3481
+ p = Prefix (mask rk m .|. m)
3498
3482
3499
3483
{- -------------------------------------------------------------------
3500
3484
Eq
@@ -3884,3 +3868,33 @@ withEmpty bars = " ":bars
3884
3868
-- * This is similar to the Map merge complexity, which is O(m log (n/m)).
3885
3869
-- * When m is a small constant the term simplifies to O(min(n, W)), which is
3886
3870
-- just the complexity we expect for single operations like insert and delete.
3871
+
3872
+ -- Note [fromAscList implementation]
3873
+ -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3874
+ -- fromAscList is an implementation that builds up the result bottom-up
3875
+ -- in linear time. It maintains a state (MonoState) that gets updated with
3876
+ -- key-value pairs from the input list one at a time. The state contains the
3877
+ -- last key-value pair, and a stack of pending trees.
3878
+ --
3879
+ -- For a new key-value pair, the branchMask with the previous key is computed.
3880
+ -- This represents the depth of the lowest common ancestor that the tree with
3881
+ -- the previous key, say tl, and the tree with the new key, tr, must have in
3882
+ -- the final result. Since the keys are in ascending order we expect no more
3883
+ -- keys in tl, and we can build it by moving up the stack and linking trees. We
3884
+ -- know when to stop by the branchMask value. We must not link higher than that
3885
+ -- depth, otherwise instead of tl we will build the parent of tl prematurely
3886
+ -- before tr is ready. Once the linking is done, tl will be at the top of the
3887
+ -- stack.
3888
+ --
3889
+ -- We also store the branchMask of a tree with its future right sibling in the
3890
+ -- stack. This is an optimization, benchmarks show that this is faster than
3891
+ -- recomputing the branchMask values when linking trees.
3892
+ --
3893
+ -- In the end, we link all the trees remaining in the stack. There is a small
3894
+ -- catch: negative keys appear in the input before non-negative keys (if they
3895
+ -- both appear), but the tree with negative keys and the tree with non-negative
3896
+ -- keys must be the right and left child of the root respectively. So we check
3897
+ -- for this and link them accordingly.
3898
+ --
3899
+ -- The implementation is defined as a foldl' over the input list, which makes
3900
+ -- it a good consumer in list fusion.
0 commit comments