never executed always true always false
    1 -- |
    2 -- A fast, space efficient Bloom filter implementation.  A Bloom
    3 -- filter is a set-like data structure that provides a probabilistic
    4 -- membership test.
    5 --
    6 -- * Queries do not give false negatives.  When an element is added to
    7 --   a filter, a subsequent membership test will definitely return
    8 --   'True'.
    9 --
   10 -- * False positives /are/ possible.  If an element has not been added
   11 --   to a filter, a membership test /may/ nevertheless indicate that
   12 --   the element is present.
   13 --
   14 -- This module provides low-level control.  For an easier to use
   15 -- interface, see the "Data.BloomFilter.Easy" module.
   16 
   17 module Data.BloomFilter.Mutable (
   18     -- * Overview
   19     -- $overview
   20 
   21     -- ** Ease of use
   22     -- $ease
   23 
   24     -- ** Performance
   25     -- $performance
   26 
   27     -- * Types
   28     Hash,
   29     MBloom,
   30     MBloom',
   31     CheapHashes,
   32     RealHashes,
   33     -- * Mutable Bloom filters
   34 
   35     -- ** Creation
   36     new,
   37 
   38     -- ** Accessors
   39     length,
   40     elem,
   41 
   42     -- ** Mutation
   43     insert,
   44 ) where
   45 
   46 import           Control.Monad (liftM)
   47 import           Control.Monad.ST (ST)
   48 import           Data.BloomFilter.Hash (CheapHashes, Hash, Hashable,
   49                      Hashes (..), RealHashes)
   50 import           Data.BloomFilter.Mutable.Internal
   51 import           Data.Word (Word64)
   52 
   53 import qualified Data.BloomFilter.BitVec64 as V
   54 
   55 import           Prelude hiding (elem, length)
   56 
   57 -- | Mutable Bloom filter using CheapHashes hashing scheme.
   58 type MBloom s = MBloom' s CheapHashes
   59 
   60 -- | Create a new mutable Bloom filter.
   61 --
   62 -- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters.
   63 --
   64 new :: Int                    -- ^ number of hash functions to use
   65     -> Word64                 -- ^ number of bits in filter
   66     -> ST s (MBloom' s h a)
   67 new hash numBits = MBloom hash numBits' `liftM` V.new numBits'
   68   where numBits' | numBits == 0                = 1
   69                  | numBits >= 0xffff_ffff_ffff = 0x1_0000_0000_0000
   70                  | otherwise                   = numBits
   71 
   72 -- | Insert a value into a mutable Bloom filter.  Afterwards, a
   73 -- membership query for the same value is guaranteed to return @True@.
   74 insert :: (Hashes h, Hashable a) => MBloom' s h a -> a -> ST s ()
   75 insert !mb !x = insertHashes mb (makeHashes x)
   76 
   77 insertHashes :: Hashes h => MBloom' s h a -> h a -> ST s ()
   78 insertHashes (MBloom k m v) !h = go 0
   79   where
   80     go !i | i >= k = return ()
   81           | otherwise = let !idx = evalHashes h i `rem` m
   82                         in V.unsafeWrite v idx True >> go (i + 1)
   83 
   84 -- | Query a mutable Bloom filter for membership.  If the value is
   85 -- present, return @True@.  If the value is not present, there is
   86 -- /still/ some possibility that @True@ will be returned.
   87 elem :: (Hashes h, Hashable a) => a -> MBloom' s h a -> ST s Bool
   88 elem elt mb = elemHashes (makeHashes elt) mb
   89 
   90 elemHashes :: forall h s a. Hashes h => h a -> MBloom' s h a -> ST s Bool
   91 elemHashes !ch (MBloom k m v) = go 0 where
   92     go :: Int -> ST s Bool
   93     go !i | i >= k    = return True
   94           | otherwise = do let !idx' = evalHashes ch i
   95                            let !idx = idx' `rem` m
   96                            b <- V.unsafeRead v idx
   97                            if b
   98                            then go (i + 1)
   99                            else return False
  100 
  101 -- | Return the size of a mutable Bloom filter, in bits.
  102 length :: MBloom' s h a -> Word64
  103 length = size
  104 
  105 -- $overview
  106 --
  107 -- Each of the functions for creating Bloom filters accepts two parameters:
  108 --
  109 -- * The number of bits that should be used for the filter.  Note that
  110 --   a filter is fixed in size; it cannot be resized after creation.
  111 --
  112 -- * A number of hash functions, /k/, to be used for the filter.
  113 --
  114 -- By choosing these parameters with care, it is possible to tune for
  115 -- a particular false positive rate.
  116 -- The 'Data.BloomFilter.Easy.suggestSizing' function in
  117 -- the "Data.BloomFilter.Easy" module calculates useful estimates for
  118 -- these parameters.
  119 
  120 -- $ease
  121 --
  122 -- This module provides both mutable interfaces for creating and
  123 -- querying a Bloom filter.  It is most useful as a low-level way to
  124 -- manage a Bloom filter with a custom set of characteristics.
  125 
  126 -- $performance
  127 --
  128 -- The implementation has been carefully tuned for high performance
  129 -- and low space consumption.