never executed always true always false
1 -- |
2 -- A fast, space efficient Bloom filter implementation. A Bloom
3 -- filter is a set-like data structure that provides a probabilistic
4 -- membership test.
5 --
6 -- * Queries do not give false negatives. When an element is added to
7 -- a filter, a subsequent membership test will definitely return
8 -- 'True'.
9 --
10 -- * False positives /are/ possible. If an element has not been added
11 -- to a filter, a membership test /may/ nevertheless indicate that
12 -- the element is present.
13 --
14 -- This module provides low-level control. For an easier to use
15 -- interface, see the "Data.BloomFilter.Easy" module.
16
17 module Data.BloomFilter.Mutable (
18 -- * Overview
19 -- $overview
20
21 -- ** Ease of use
22 -- $ease
23
24 -- ** Performance
25 -- $performance
26
27 -- * Types
28 Hash,
29 MBloom,
30 MBloom',
31 CheapHashes,
32 RealHashes,
33 -- * Mutable Bloom filters
34
35 -- ** Creation
36 new,
37
38 -- ** Accessors
39 length,
40 elem,
41
42 -- ** Mutation
43 insert,
44 ) where
45
46 import Control.Monad (liftM)
47 import Control.Monad.ST (ST)
48 import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable,
49 Hashes (..), RealHashes)
50 import Data.BloomFilter.Mutable.Internal
51 import Data.Word (Word64)
52
53 import qualified Data.BloomFilter.BitVec64 as V
54
55 import Prelude hiding (elem, length)
56
57 -- | Mutable Bloom filter using CheapHashes hashing scheme.
58 type MBloom s = MBloom' s CheapHashes
59
60 -- | Create a new mutable Bloom filter.
61 --
62 -- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters.
63 --
64 new :: Int -- ^ number of hash functions to use
65 -> Word64 -- ^ number of bits in filter
66 -> ST s (MBloom' s h a)
67 new hash numBits = MBloom hash numBits' `liftM` V.new numBits'
68 where numBits' | numBits == 0 = 1
69 | numBits >= 0xffff_ffff_ffff = 0x1_0000_0000_0000
70 | otherwise = numBits
71
72 -- | Insert a value into a mutable Bloom filter. Afterwards, a
73 -- membership query for the same value is guaranteed to return @True@.
74 insert :: (Hashes h, Hashable a) => MBloom' s h a -> a -> ST s ()
75 insert !mb !x = insertHashes mb (makeHashes x)
76
77 insertHashes :: Hashes h => MBloom' s h a -> h a -> ST s ()
78 insertHashes (MBloom k m v) !h = go 0
79 where
80 go !i | i >= k = return ()
81 | otherwise = let !idx = evalHashes h i `rem` m
82 in V.unsafeWrite v idx True >> go (i + 1)
83
84 -- | Query a mutable Bloom filter for membership. If the value is
85 -- present, return @True@. If the value is not present, there is
86 -- /still/ some possibility that @True@ will be returned.
87 elem :: (Hashes h, Hashable a) => a -> MBloom' s h a -> ST s Bool
88 elem elt mb = elemHashes (makeHashes elt) mb
89
90 elemHashes :: forall h s a. Hashes h => h a -> MBloom' s h a -> ST s Bool
91 elemHashes !ch (MBloom k m v) = go 0 where
92 go :: Int -> ST s Bool
93 go !i | i >= k = return True
94 | otherwise = do let !idx' = evalHashes ch i
95 let !idx = idx' `rem` m
96 b <- V.unsafeRead v idx
97 if b
98 then go (i + 1)
99 else return False
100
101 -- | Return the size of a mutable Bloom filter, in bits.
102 length :: MBloom' s h a -> Word64
103 length = size
104
105 -- $overview
106 --
107 -- Each of the functions for creating Bloom filters accepts two parameters:
108 --
109 -- * The number of bits that should be used for the filter. Note that
110 -- a filter is fixed in size; it cannot be resized after creation.
111 --
112 -- * A number of hash functions, /k/, to be used for the filter.
113 --
114 -- By choosing these parameters with care, it is possible to tune for
115 -- a particular false positive rate.
116 -- The 'Data.BloomFilter.Easy.suggestSizing' function in
117 -- the "Data.BloomFilter.Easy" module calculates useful estimates for
118 -- these parameters.
119
120 -- $ease
121 --
122 -- This module provides both mutable interfaces for creating and
123 -- querying a Bloom filter. It is most useful as a low-level way to
124 -- manage a Bloom filter with a custom set of characteristics.
125
126 -- $performance
127 --
128 -- The implementation has been carefully tuned for high performance
129 -- and low space consumption.