Skip to content

Commit 7aff529

Browse files
jwaldmanntreeowl
authored andcommitted
test that instances for Eq and Ord agree with going via toAscList (#670)
* Test that instances for Eq and Ord agree with going via toAscList * Add benchmark for "instance Ord IntSet", using "Set IntSet" * Improve implementation of "instance Ord IntSet" that avoids toAscList and walks the tree directly. See #470
1 parent eb55a75 commit 7aff529

File tree

3 files changed

+264
-35
lines changed

3 files changed

+264
-35
lines changed

containers-tests/benchmarks/IntSet.hs

Lines changed: 102 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,123 @@
1-
{-# LANGUAGE BangPatterns #-}
1+
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving #-}
23

34
module Main where
45

56
import Control.DeepSeq (rnf)
67
import Control.Exception (evaluate)
78
import Gauge (bench, defaultMain, whnf)
89
import Data.List (foldl')
9-
import qualified Data.IntSet as S
10+
import Data.Monoid (Sum(..))
11+
#if !MIN_VERSION_base(4,8,0)
12+
import Data.Foldable (foldMap)
13+
#endif
14+
import qualified Data.IntSet as IS
15+
-- benchmarks for "instance Ord IntSet"
16+
-- uses IntSet as keys of maps, and elements of sets
17+
import qualified Data.Set as S
18+
import qualified Data.IntMap as IM
19+
import qualified Data.Map.Strict as M
1020

1121
main = do
12-
let s = S.fromAscList elems :: S.IntSet
13-
s_even = S.fromAscList elems_even :: S.IntSet
14-
s_odd = S.fromAscList elems_odd :: S.IntSet
22+
let s = IS.fromAscList elems :: IS.IntSet
23+
s_even = IS.fromAscList elems_even :: IS.IntSet
24+
s_odd = IS.fromAscList elems_odd :: IS.IntSet
1525
evaluate $ rnf [s, s_even, s_odd]
1626
defaultMain
1727
[ bench "member" $ whnf (member elems) s
18-
, bench "insert" $ whnf (ins elems) S.empty
19-
, bench "map" $ whnf (S.map (+ 1)) s
20-
, bench "filter" $ whnf (S.filter ((== 0) . (`mod` 2))) s
21-
, bench "partition" $ whnf (S.partition ((== 0) . (`mod` 2))) s
22-
, bench "fold" $ whnf (S.fold (:) []) s
28+
, bench "insert" $ whnf (ins elems) IS.empty
29+
, bench "map" $ whnf (IS.map (+ 1)) s
30+
, bench "filter" $ whnf (IS.filter ((== 0) . (`mod` 2))) s
31+
, bench "partition" $ whnf (IS.partition ((== 0) . (`mod` 2))) s
32+
, bench "fold" $ whnf (IS.fold (:) []) s
2333
, bench "delete" $ whnf (del elems) s
24-
, bench "findMin" $ whnf S.findMin s
25-
, bench "findMax" $ whnf S.findMax s
26-
, bench "deleteMin" $ whnf S.deleteMin s
27-
, bench "deleteMax" $ whnf S.deleteMax s
28-
, bench "unions" $ whnf S.unions [s_even, s_odd]
29-
, bench "union" $ whnf (S.union s_even) s_odd
30-
, bench "difference" $ whnf (S.difference s) s_even
31-
, bench "intersection" $ whnf (S.intersection s) s_even
32-
, bench "fromList" $ whnf S.fromList elems
33-
, bench "fromAscList" $ whnf S.fromAscList elems
34-
, bench "fromDistinctAscList" $ whnf S.fromDistinctAscList elems
35-
, bench "disjoint:false" $ whnf (S.disjoint s) s_even
36-
, bench "disjoint:true" $ whnf (S.disjoint s_odd) s_even
37-
, bench "null.intersection:false" $ whnf (S.null. S.intersection s) s_even
38-
, bench "null.intersection:true" $ whnf (S.null. S.intersection s_odd) s_even
34+
, bench "findMin" $ whnf IS.findMin s
35+
, bench "findMax" $ whnf IS.findMax s
36+
, bench "deleteMin" $ whnf IS.deleteMin s
37+
, bench "deleteMax" $ whnf IS.deleteMax s
38+
, bench "unions" $ whnf IS.unions [s_even, s_odd]
39+
, bench "union" $ whnf (IS.union s_even) s_odd
40+
, bench "difference" $ whnf (IS.difference s) s_even
41+
, bench "intersection" $ whnf (IS.intersection s) s_even
42+
, bench "fromList" $ whnf IS.fromList elems
43+
, bench "fromAscList" $ whnf IS.fromAscList elems
44+
, bench "fromDistinctAscList" $ whnf IS.fromDistinctAscList elems
45+
, bench "disjoint:false" $ whnf (IS.disjoint s) s_even
46+
, bench "disjoint:true" $ whnf (IS.disjoint s_odd) s_even
47+
, bench "null.intersection:false" $ whnf (IS.null. IS.intersection s) s_even
48+
, bench "null.intersection:true" $ whnf (IS.null. IS.intersection s_odd) s_even
49+
, bench "instanceOrd:dense" -- the IntSet will just use one Tip
50+
$ whnf (num_transitions . det 2 0) $ hard_nfa 1 16
51+
, bench "instanceOrd:sparse" -- many Bin, each Tip is singleton
52+
$ whnf (num_transitions . det 2 0) $ hard_nfa 1111 16
3953
]
4054
where
4155
elems = [1..2^12]
4256
elems_even = [2,4..2^12]
4357
elems_odd = [1,3..2^12]
4458

45-
member :: [Int] -> S.IntSet -> Int
46-
member xs s = foldl' (\n x -> if S.member x s then n + 1 else n) 0 xs
59+
member :: [Int] -> IS.IntSet -> Int
60+
member xs s = foldl' (\n x -> if IS.member x s then n + 1 else n) 0 xs
4761

48-
ins :: [Int] -> S.IntSet -> S.IntSet
49-
ins xs s0 = foldl' (\s a -> S.insert a s) s0 xs
62+
ins :: [Int] -> IS.IntSet -> IS.IntSet
63+
ins xs s0 = foldl' (\s a -> IS.insert a s) s0 xs
5064

51-
del :: [Int] -> S.IntSet -> S.IntSet
52-
del xs s0 = foldl' (\s k -> S.delete k s) s0 xs
65+
del :: [Int] -> IS.IntSet -> IS.IntSet
66+
del xs s0 = foldl' (\s k -> IS.delete k s) s0 xs
67+
68+
69+
70+
-- | Automata contain just the transitions
71+
type NFA = IM.IntMap (IM.IntMap IS.IntSet)
72+
type DFA = IM.IntMap (M.Map IS.IntSet IS.IntSet)
73+
74+
newtype State = State Int deriving (Num, Enum)
75+
instance Show State where show (State s) = show s
76+
newtype Sigma = Sigma Int deriving (Num, Enum, Eq)
77+
78+
num_transitions :: DFA -> Int
79+
num_transitions = getSum . foldMap (Sum . M.size)
80+
81+
det :: Sigma -> State -> NFA -> DFA
82+
det sigma (State initial) aut =
83+
let get :: State -> Sigma -> IS.IntSet
84+
get (State p) (Sigma s) = IM.findWithDefault IS.empty p
85+
$ IM.findWithDefault IM.empty s aut
86+
go :: DFA -> S.Set IS.IntSet -> S.Set IS.IntSet -> DFA
87+
go !accu !done !todo = case S.minView todo of
88+
Nothing -> accu
89+
Just (t, odo) ->
90+
if S.member t done
91+
then go accu done odo
92+
else let ts = do
93+
s <- [0 .. sigma-1]
94+
let next :: IS.IntSet
95+
next = foldMap (\p -> get (State p) s) $ IS.toList t
96+
return (t, s, next)
97+
in go (union_dfa (dfa ts) accu)
98+
(S.insert t done)
99+
(Data.List.foldl' (\ o (_,_,q) -> S.insert q o) odo ts)
100+
in go IM.empty S.empty $ S.singleton $ IS.singleton initial
101+
102+
nfa :: [(State,Sigma,State)] -> NFA
103+
nfa ts = IM.fromListWith ( IM.unionWith IS.union )
104+
$ Prelude.map (\(State p,Sigma s,State q) ->
105+
(s, IM.singleton p (IS.singleton q))) ts
106+
107+
dfa :: [(IS.IntSet, Sigma, IS.IntSet)] -> DFA
108+
dfa ts = IM.fromListWith ( M.unionWith ( error "WAT") )
109+
$ Prelude.map (\( p, Sigma s, q) ->
110+
(s, M.singleton p q)) ts
111+
112+
union_dfa a b = IM.unionWith (M.unionWith (error "WAT")) a b
113+
114+
-- | for the language Sigma^* 1 Sigma^{n-2} where Sigma={0,1}.
115+
-- this NFA has n states. DFA has 2^(n-1) states
116+
-- since it needs to remember the last n characters.
117+
-- Extra parameter delta: the automaton will use states [0, delta .. ]
118+
-- for IntSet, larger deltas should be harder,
119+
-- since for delta=1, all the states do fit in one Tip
120+
hard_nfa :: State -> Int -> NFA
121+
hard_nfa delta n = nfa
122+
$ [ (0, 0, 0), (0,1,0), (0, 1, delta) ]
123+
++ do k <- [1 .. State n - 2] ; c <- [0,1] ; return (delta * k,c,delta *(k+1))

containers-tests/tests/intset-properties.hs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ main = defaultMain [ testCase "lookupLT" test_lookupLT
2424
, testProperty "prop_EmptyValid" prop_EmptyValid
2525
, testProperty "prop_SingletonValid" prop_SingletonValid
2626
, testProperty "prop_InsertIntoEmptyValid" prop_InsertIntoEmptyValid
27+
, testProperty "prop_instanceEqIntSet" prop_instanceEqIntSet
28+
, testProperty "prop_instanceOrdIntSet" prop_instanceOrdIntSet
2729
, testProperty "prop_Single" prop_Single
2830
, testProperty "prop_Member" prop_Member
2931
, testProperty "prop_NotMember" prop_NotMember
@@ -141,6 +143,16 @@ prop_InsertIntoEmptyValid :: Int -> Property
141143
prop_InsertIntoEmptyValid x =
142144
valid (insert x empty)
143145

146+
{--------------------------------------------------------------------
147+
Instances for Eq and Ord
148+
--------------------------------------------------------------------}
149+
150+
prop_instanceEqIntSet :: IntSet -> IntSet -> Bool
151+
prop_instanceEqIntSet x y = (x == y) == (toAscList x == toAscList y)
152+
153+
prop_instanceOrdIntSet :: IntSet -> IntSet -> Bool
154+
prop_instanceOrdIntSet x y = (compare x y) == (compare (toAscList x) (toAscList y))
155+
144156
{--------------------------------------------------------------------
145157
Single, Member, Insert, Delete, Member, FromList
146158
--------------------------------------------------------------------}

containers/src/Data/IntSet/Internal.hs

Lines changed: 150 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,8 @@ import Utils.Containers.Internal.BitUtil
211211
import Utils.Containers.Internal.StrictPair
212212

213213
#if __GLASGOW_HASKELL__
214-
import Data.Data (Data(..), Constr, mkConstr, constrIndex, Fixity(Prefix), DataType, mkDataType)
214+
import Data.Data (Data(..), Constr, mkConstr, constrIndex, DataType, mkDataType)
215+
import qualified Data.Data
215216
import Text.Read
216217
#endif
217218

@@ -311,7 +312,7 @@ instance Data IntSet where
311312
dataTypeOf _ = intSetDataType
312313

313314
fromListConstr :: Constr
314-
fromListConstr = mkConstr intSetDataType "fromList" [] Prefix
315+
fromListConstr = mkConstr intSetDataType "fromList" [] Data.Data.Prefix
315316

316317
intSetDataType :: DataType
317318
intSetDataType = mkDataType "Data.IntSet.Internal.IntSet" [fromListConstr]
@@ -1173,8 +1174,153 @@ nequal _ _ = True
11731174
--------------------------------------------------------------------}
11741175

11751176
instance Ord IntSet where
1176-
compare s1 s2 = compare (toAscList s1) (toAscList s2)
1177-
-- tentative implementation. See if more efficient exists.
1177+
compare Nil Nil = EQ
1178+
compare Nil _ = LT
1179+
compare _ Nil = GT
1180+
compare t1@(Tip _ _) t2@(Tip _ _)
1181+
= orderingOf $ relateTipTip t1 t2
1182+
compare xs ys
1183+
| (xsNeg, xsNonNeg) <- splitSign xs
1184+
, (ysNeg, ysNonNeg) <- splitSign ys
1185+
= case relate xsNeg ysNeg of
1186+
Less -> LT
1187+
Prefix -> if null xsNonNeg then LT else GT
1188+
Equals -> orderingOf (relate xsNonNeg ysNonNeg)
1189+
FlipPrefix -> if null ysNonNeg then GT else LT
1190+
Greater -> GT
1191+
1192+
-- | detailed outcome of lexicographic comparison of lists.
1193+
-- w.r.t. Ordering, there are two extra cases,
1194+
-- since (++) is not monotonic w.r.t. lex. order on lists
1195+
-- (which is used by definition):
1196+
-- consider comparison of (Bin [0,3,4] [ 6] ) to (Bin [0,3] [7] )
1197+
-- where [0,3,4] > [0,3] but [0,3,4,6] < [0,3,7].
1198+
1199+
data Relation
1200+
= Less -- ^ holds for [0,3,4] [0,3,5,1]
1201+
| Prefix -- ^ holds for [0,3,4] [0,3,4,5]
1202+
| Equals -- ^ holds for [0,3,4] [0,3,4]
1203+
| FlipPrefix -- ^ holds for [0,3,4] [0,3]
1204+
| Greater -- ^ holds for [0,3,4] [0,2,5]
1205+
deriving (Show, Eq)
1206+
1207+
orderingOf :: Relation -> Ordering
1208+
{-# INLINE orderingOf #-}
1209+
orderingOf r = case r of
1210+
Less -> LT
1211+
Prefix -> LT
1212+
Equals -> EQ
1213+
FlipPrefix -> GT
1214+
Greater -> GT
1215+
1216+
-- | precondition: each argument is non-mixed
1217+
relate :: IntSet -> IntSet -> Relation
1218+
relate Nil Nil = Equals
1219+
relate Nil t2 = Prefix
1220+
relate t1 Nil = FlipPrefix
1221+
relate t1@(Tip p1 bm1) t2@(Tip p2 bm2) = relateTipTip t1 t2
1222+
relate t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
1223+
| succUpperbound t1 <= lowerbound t2 = Less
1224+
| lowerbound t1 >= succUpperbound t2 = Greater
1225+
| otherwise = case compare (natFromInt m1) (natFromInt m2) of
1226+
GT -> combine_left (relate l1 t2)
1227+
EQ -> combine (relate l1 l2) (relate r1 r2)
1228+
LT -> combine_right (relate t1 l2)
1229+
relate t1@(Bin p1 m1 l1 r1) t2@(Tip p2 _)
1230+
| succUpperbound t1 <= lowerbound t2 = Less
1231+
| lowerbound t1 >= succUpperbound t2 = Greater
1232+
| 0 == (m1 .&. p2) = combine_left (relate l1 t2)
1233+
| otherwise = Less
1234+
relate t1@(Tip p1 _) t2@(Bin p2 m2 l2 r2)
1235+
| succUpperbound t1 <= lowerbound t2 = Less
1236+
| lowerbound t1 >= succUpperbound t2 = Greater
1237+
| 0 == (p1 .&. m2) = combine_right (relate t1 l2)
1238+
| otherwise = Greater
1239+
1240+
relateTipTip :: IntSet -> IntSet -> Relation
1241+
{-# INLINE relateTipTip #-}
1242+
relateTipTip t1@(Tip p1 bm1) t2@(Tip p2 bm2) = case compare p1 p2 of
1243+
LT -> Less
1244+
EQ -> relateBM bm1 bm2
1245+
GT -> Greater
1246+
1247+
relateBM :: BitMap -> BitMap -> Relation
1248+
{-# inline relateBM #-}
1249+
relateBM w1 w2 | w1 == w2 = Equals
1250+
relateBM w1 w2 =
1251+
let delta = xor w1 w2
1252+
lowest_diff_mask = delta .&. complement (delta-1)
1253+
prefix = (complement lowest_diff_mask + 1)
1254+
.&. (complement lowest_diff_mask)
1255+
in if 0 == lowest_diff_mask .&. w1
1256+
then if 0 == w1 .&. prefix
1257+
then Prefix else Greater
1258+
else if 0 == w2 .&. prefix
1259+
then FlipPrefix else Less
1260+
1261+
-- | This function has the property
1262+
-- relate t1@(Bin p m l1 r1) t2@(Bin p m l2 r2) = combine (relate l1 l2) (relate r1 r2)
1263+
-- It is important that `combine` is lazy in the second argument (achieved by inlining)
1264+
combine :: Relation -> Relation -> Relation
1265+
{-# inline combine #-}
1266+
combine r eq = case r of
1267+
Less -> Less
1268+
Prefix -> Greater
1269+
Equals -> eq
1270+
FlipPrefix -> Less
1271+
Greater -> Greater
1272+
1273+
-- | This function has the property
1274+
-- relate t1@(Bin p1 m1 l1 r1) t2 = combine_left (relate l1 t2)
1275+
-- under the precondition that the range of l1 contains the range of t2,
1276+
-- and r1 is non-empty
1277+
combine_left :: Relation -> Relation
1278+
{-# inline combine_left #-}
1279+
combine_left r = case r of
1280+
Less -> Less
1281+
Prefix -> Greater
1282+
Equals -> FlipPrefix
1283+
FlipPrefix -> FlipPrefix
1284+
Greater -> Greater
1285+
1286+
-- | This function has the property
1287+
-- relate t1 t2@(Bin p2 m2 l2 r2) = combine_right (relate t1 l2)
1288+
-- under the precondition that the range of t1 is included in the range of l2,
1289+
-- and r2 is non-empty
1290+
combine_right :: Relation -> Relation
1291+
{-# inline combine_right #-}
1292+
combine_right r = case r of
1293+
Less -> Less
1294+
Prefix -> Prefix
1295+
Equals -> Prefix
1296+
FlipPrefix -> Less
1297+
Greater -> Greater
1298+
1299+
-- | shall only be applied to non-mixed non-Nil trees
1300+
lowerbound :: IntSet -> Int
1301+
{-# INLINE lowerbound #-}
1302+
lowerbound (Tip p _) = p
1303+
lowerbound (Bin p _ _ _) = p
1304+
1305+
-- | this is one more than the actual upper bound (to save one operation)
1306+
-- shall only be applied to non-mixed non-Nil trees
1307+
succUpperbound :: IntSet -> Int
1308+
{-# INLINE succUpperbound #-}
1309+
succUpperbound (Tip p _) = p + wordSize
1310+
succUpperbound (Bin p m _ _) = p + shiftR m 1
1311+
1312+
-- | split a set into subsets of negative and non-negative elements
1313+
splitSign :: IntSet -> (IntSet,IntSet)
1314+
{-# INLINE splitSign #-}
1315+
splitSign t@(Tip kx _)
1316+
| kx >= 0 = (Nil, t)
1317+
| otherwise = (t, Nil)
1318+
splitSign t@(Bin p m l r)
1319+
-- m < 0 is the usual way to find out if we have positives and negatives (see findMax)
1320+
| m < 0 = (r, l)
1321+
| p < 0 = (t, Nil)
1322+
| otherwise = (Nil, t)
1323+
splitSign Nil = (Nil, Nil)
11781324

11791325
{--------------------------------------------------------------------
11801326
Show

0 commit comments

Comments
 (0)