66package cuckoo
77
88import (
9+ "bytes"
910 "encoding/binary"
1011 "errors"
1112 "fmt"
13+ "io"
14+
1215 "github.com/dgryski/go-metro"
1316)
1417
1518// maximum number of cuckoo kicks before claiming failure
1619const kMaxCuckooCount uint = 500
1720
1821const (
19- //TableTypeSingle normal single table
22+ // TableTypeSingle normal single table
2023 TableTypeSingle = 0
21- //TableTypePacked packed table, use semi-sort to save 1 bit per item
24+ // TableTypePacked packed table, use semi-sort to save 1 bit per item
2225 TableTypePacked = 1
2326)
2427
2528type table interface {
26- Init (tagsPerBucket , bitsPerTag , num uint )
29+ Init (tagsPerBucket , bitsPerTag , num uint , initialBucketsHint [] byte ) error
2730 NumBuckets () uint
2831 FindTagInBuckets (i1 , i2 uint , tag uint32 ) bool
2932 DeleteTagFromBucket (i uint , tag uint32 ) bool
@@ -32,7 +35,7 @@ type table interface {
3235 SizeInBytes () uint
3336 Info () string
3437 BitsPerItem () uint
35- Encode () [] byte
38+ Reader () (io. Reader , uint )
3639 Decode ([]byte ) error
3740 Reset ()
3841}
@@ -52,7 +55,9 @@ type victimCache struct {
5255 used bool
5356}
5457
55- //Filter cuckoo filter type struct
58+ const filterMetadataSize = 3 * bytesPerUint32 + 1
59+
60+ // Filter cuckoo filter type struct
5661type Filter struct {
5762 victim victimCache
5863 numItems uint
@@ -75,7 +80,7 @@ func NewFilter(tagsPerBucket, bitsPerItem, maxNumKeys, tableType uint) *Filter {
7580 numBuckets = 1
7681 }
7782 table := getTable (tableType ).(table )
78- table .Init (tagsPerBucket , bitsPerItem , numBuckets )
83+ _ = table .Init (tagsPerBucket , bitsPerItem , numBuckets , nil )
7984 return & Filter {
8085 table : table ,
8186 }
@@ -102,7 +107,7 @@ func (f *Filter) altIndex(index uint, tag uint32) uint {
102107 return f .indexHash (uint32 (index ) ^ (tag * 0x5bd1e995 ))
103108}
104109
105- //Size return num of items that filter store
110+ // Size return num of items that filter store
106111func (f * Filter ) Size () uint {
107112 var c uint
108113 if f .victim .used {
@@ -111,22 +116,22 @@ func (f *Filter) Size() uint {
111116 return f .numItems + c
112117}
113118
114- //LoadFactor return current filter's loadFactor
119+ // LoadFactor return current filter's loadFactor
115120func (f * Filter ) LoadFactor () float64 {
116121 return 1.0 * float64 (f .Size ()) / float64 (f .table .SizeInTags ())
117122}
118123
119- //SizeInBytes return bytes occupancy of filter's table
124+ // SizeInBytes return bytes occupancy of filter's table
120125func (f * Filter ) SizeInBytes () uint {
121126 return f .table .SizeInBytes ()
122127}
123128
124- //BitsPerItem return bits occupancy per item of filter's table
129+ // BitsPerItem return bits occupancy per item of filter's table
125130func (f * Filter ) BitsPerItem () float64 {
126131 return 8.0 * float64 (f .table .SizeInBytes ()) / float64 (f .Size ())
127132}
128133
129- //Add add an item into filter, return false when filter is full
134+ // Add add an item into filter, return false when filter is full
130135func (f * Filter ) Add (item []byte ) bool {
131136 if f .victim .used {
132137 return false
@@ -135,7 +140,7 @@ func (f *Filter) Add(item []byte) bool {
135140 return f .addImpl (i , tag )
136141}
137142
138- //AddUnique add an item into filter, return false when filter already contains it or filter is full
143+ // AddUnique add an item into filter, return false when filter already contains it or filter is full
139144func (f * Filter ) AddUnique (item []byte ) bool {
140145 if f .Contain (item ) {
141146 return false
@@ -169,7 +174,7 @@ func (f *Filter) addImpl(i uint, tag uint32) bool {
169174 return true
170175}
171176
172- //Contain return if filter contains an item
177+ // Contain return if filter contains an item
173178func (f * Filter ) Contain (key []byte ) bool {
174179 i1 , tag := f .generateIndexTagHash (key )
175180 i2 := f .altIndex (i1 , tag )
@@ -182,7 +187,7 @@ func (f *Filter) Contain(key []byte) bool {
182187 return false
183188}
184189
185- //Delete delete item from filter, return false when item not exist
190+ // Delete delete item from filter, return false when item not exist
186191func (f * Filter ) Delete (key []byte ) bool {
187192 i1 , tag := f .generateIndexTagHash (key )
188193 i2 := f .altIndex (i1 , tag )
@@ -238,7 +243,7 @@ func (f *Filter) FalsePositiveRate() float64 {
238243 return float64 (fp ) / float64 (rounds )
239244}
240245
241- //Info return filter's detail info
246+ // Info return filter's detail info
242247func (f * Filter ) Info () string {
243248 return fmt .Sprintf ("CuckooFilter Status:\n " +
244249 "\t \t %v\n " +
@@ -250,37 +255,51 @@ func (f *Filter) Info() string {
250255}
251256
252257// Encode returns a byte slice representing a Cuckoo filter
253- func (f * Filter ) Encode () []byte {
254- var b [3 ][bytesPerUint32 ]byte
255- binary .LittleEndian .PutUint32 (b [0 ][:], uint32 (f .numItems ))
256- binary .LittleEndian .PutUint32 (b [1 ][:], uint32 (f .victim .index ))
257- binary .LittleEndian .PutUint32 (b [2 ][:], f .victim .tag )
258-
259- ret := append (b [0 ][:], b [1 ][:]... )
260- ret = append (ret , b [2 ][:]... )
258+ func (f * Filter ) Encode () ([]byte , error ) {
259+ filterReader , filterSize := f .EncodeReader ()
260+ buf := make ([]byte , filterSize )
261+ if _ , err := io .ReadFull (filterReader , buf ); err != nil {
262+ return nil , err
263+ }
264+ return buf , nil
265+ }
266+
267+ // EncodeReader returns a reader representing a Cuckoo filter
268+ func (f * Filter ) EncodeReader () (io.Reader , uint ) {
269+ var metadata [filterMetadataSize ]byte
270+
271+ for i , n := range []uint32 {uint32 (f .numItems ), uint32 (f .victim .index ), f .victim .tag } {
272+ binary .LittleEndian .PutUint32 (metadata [i * bytesPerUint32 :], n )
273+ }
274+
275+ victimUsed := byte (0 )
261276 if f .victim .used {
262- ret = append (ret , byte (1 ))
263- } else {
264- ret = append (ret , byte (0 ))
277+ victimUsed = byte (1 )
265278 }
266- ret = append (ret , f .table .Encode ()... )
279+ metadata [bytesPerUint32 * 3 ] = victimUsed
280+ tableReader , tableEncodedSize := f .table .Reader ()
281+ return io .MultiReader (bytes .NewReader (metadata [:]), tableReader ), uint (len (metadata )) + tableEncodedSize
282+ }
267283
268- return ret
284+ // Decode returns a Cuckoo Filter using a copy of the provided byte slice.
285+ func Decode (b []byte ) (* Filter , error ) {
286+ copiedBytes := make ([]byte , len (b ))
287+ copy (copiedBytes , b )
288+ return DecodeFrom (copiedBytes )
269289}
270290
271- // Decode returns a Cuckoo Filter from a byte slice
272- func Decode ( bytes []byte ) (* Filter , error ) {
273- if len (bytes ) < 20 {
291+ // DecodeFrom returns a Cuckoo Filter using the exact provided byte slice (no copy).
292+ func DecodeFrom ( b []byte ) (* Filter , error ) {
293+ if len (b ) < 20 {
274294 return nil , errors .New ("unexpected bytes length" )
275295 }
276- numItems := uint (binary .LittleEndian .Uint32 (bytes [ 0 : 4 ]))
277- curIndex := uint (binary .LittleEndian .Uint32 (bytes [ 4 : 8 ]))
278- curTag := binary .LittleEndian .Uint32 (bytes [ 8 : 12 ])
279- used := bytes [12 ] == byte (1 )
280- tableType := uint (bytes [13 ])
296+ numItems := uint (binary .LittleEndian .Uint32 (b [ 0 * bytesPerUint32 : ]))
297+ curIndex := uint (binary .LittleEndian .Uint32 (b [ 1 * bytesPerUint32 : ]))
298+ curTag := binary .LittleEndian .Uint32 (b [ 2 * 1 * bytesPerUint32 : ])
299+ used := b [12 ] == byte (1 )
300+ tableType := uint (b [13 ])
281301 table := getTable (tableType ).(table )
282- err := table .Decode (bytes [13 :])
283- if err != nil {
302+ if err := table .Decode (b [13 :]); err != nil {
284303 return nil , err
285304 }
286305 return & Filter {
0 commit comments