Skip to content

Commit 92f5275

Browse files
authored
Merge pull request #3 from EladGabay/main
Introduce DecodeFrom and EncodeReader
2 parents bc1f8ba + 65cc15d commit 92f5275

File tree

5 files changed

+193
-137
lines changed

5 files changed

+193
-137
lines changed

cuckoofilter.go

Lines changed: 57 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,27 @@
66
package cuckoo
77

88
import (
9+
"bytes"
910
"encoding/binary"
1011
"errors"
1112
"fmt"
13+
"io"
14+
1215
"github.com/dgryski/go-metro"
1316
)
1417

1518
// maximum number of cuckoo kicks before claiming failure
1619
const kMaxCuckooCount uint = 500
1720

1821
const (
19-
//TableTypeSingle normal single table
22+
// TableTypeSingle normal single table
2023
TableTypeSingle = 0
21-
//TableTypePacked packed table, use semi-sort to save 1 bit per item
24+
// TableTypePacked packed table, use semi-sort to save 1 bit per item
2225
TableTypePacked = 1
2326
)
2427

2528
type table interface {
26-
Init(tagsPerBucket, bitsPerTag, num uint)
29+
Init(tagsPerBucket, bitsPerTag, num uint, initialBucketsHint []byte) error
2730
NumBuckets() uint
2831
FindTagInBuckets(i1, i2 uint, tag uint32) bool
2932
DeleteTagFromBucket(i uint, tag uint32) bool
@@ -32,7 +35,7 @@ type table interface {
3235
SizeInBytes() uint
3336
Info() string
3437
BitsPerItem() uint
35-
Encode() []byte
38+
Reader() (io.Reader, uint)
3639
Decode([]byte) error
3740
Reset()
3841
}
@@ -52,7 +55,9 @@ type victimCache struct {
5255
used bool
5356
}
5457

55-
//Filter cuckoo filter type struct
58+
const filterMetadataSize = 3*bytesPerUint32 + 1
59+
60+
// Filter cuckoo filter type struct
5661
type Filter struct {
5762
victim victimCache
5863
numItems uint
@@ -75,7 +80,7 @@ func NewFilter(tagsPerBucket, bitsPerItem, maxNumKeys, tableType uint) *Filter {
7580
numBuckets = 1
7681
}
7782
table := getTable(tableType).(table)
78-
table.Init(tagsPerBucket, bitsPerItem, numBuckets)
83+
_ = table.Init(tagsPerBucket, bitsPerItem, numBuckets, nil)
7984
return &Filter{
8085
table: table,
8186
}
@@ -102,7 +107,7 @@ func (f *Filter) altIndex(index uint, tag uint32) uint {
102107
return f.indexHash(uint32(index) ^ (tag * 0x5bd1e995))
103108
}
104109

105-
//Size return num of items that filter store
110+
// Size return num of items that filter store
106111
func (f *Filter) Size() uint {
107112
var c uint
108113
if f.victim.used {
@@ -111,22 +116,22 @@ func (f *Filter) Size() uint {
111116
return f.numItems + c
112117
}
113118

114-
//LoadFactor return current filter's loadFactor
119+
// LoadFactor return current filter's loadFactor
115120
func (f *Filter) LoadFactor() float64 {
116121
return 1.0 * float64(f.Size()) / float64(f.table.SizeInTags())
117122
}
118123

119-
//SizeInBytes return bytes occupancy of filter's table
124+
// SizeInBytes return bytes occupancy of filter's table
120125
func (f *Filter) SizeInBytes() uint {
121126
return f.table.SizeInBytes()
122127
}
123128

124-
//BitsPerItem return bits occupancy per item of filter's table
129+
// BitsPerItem return bits occupancy per item of filter's table
125130
func (f *Filter) BitsPerItem() float64 {
126131
return 8.0 * float64(f.table.SizeInBytes()) / float64(f.Size())
127132
}
128133

129-
//Add add an item into filter, return false when filter is full
134+
// Add add an item into filter, return false when filter is full
130135
func (f *Filter) Add(item []byte) bool {
131136
if f.victim.used {
132137
return false
@@ -135,7 +140,7 @@ func (f *Filter) Add(item []byte) bool {
135140
return f.addImpl(i, tag)
136141
}
137142

138-
//AddUnique add an item into filter, return false when filter already contains it or filter is full
143+
// AddUnique add an item into filter, return false when filter already contains it or filter is full
139144
func (f *Filter) AddUnique(item []byte) bool {
140145
if f.Contain(item) {
141146
return false
@@ -169,7 +174,7 @@ func (f *Filter) addImpl(i uint, tag uint32) bool {
169174
return true
170175
}
171176

172-
//Contain return if filter contains an item
177+
// Contain return if filter contains an item
173178
func (f *Filter) Contain(key []byte) bool {
174179
i1, tag := f.generateIndexTagHash(key)
175180
i2 := f.altIndex(i1, tag)
@@ -182,7 +187,7 @@ func (f *Filter) Contain(key []byte) bool {
182187
return false
183188
}
184189

185-
//Delete delete item from filter, return false when item not exist
190+
// Delete delete item from filter, return false when item not exist
186191
func (f *Filter) Delete(key []byte) bool {
187192
i1, tag := f.generateIndexTagHash(key)
188193
i2 := f.altIndex(i1, tag)
@@ -238,7 +243,7 @@ func (f *Filter) FalsePositiveRate() float64 {
238243
return float64(fp) / float64(rounds)
239244
}
240245

241-
//Info return filter's detail info
246+
// Info return filter's detail info
242247
func (f *Filter) Info() string {
243248
return fmt.Sprintf("CuckooFilter Status:\n"+
244249
"\t\t%v\n"+
@@ -250,37 +255,51 @@ func (f *Filter) Info() string {
250255
}
251256

252257
// Encode returns a byte slice representing a Cuckoo filter
253-
func (f *Filter) Encode() []byte {
254-
var b [3][bytesPerUint32]byte
255-
binary.LittleEndian.PutUint32(b[0][:], uint32(f.numItems))
256-
binary.LittleEndian.PutUint32(b[1][:], uint32(f.victim.index))
257-
binary.LittleEndian.PutUint32(b[2][:], f.victim.tag)
258-
259-
ret := append(b[0][:], b[1][:]...)
260-
ret = append(ret, b[2][:]...)
258+
func (f *Filter) Encode() ([]byte, error) {
259+
filterReader, filterSize := f.EncodeReader()
260+
buf := make([]byte, filterSize)
261+
if _, err := io.ReadFull(filterReader, buf); err != nil {
262+
return nil, err
263+
}
264+
return buf, nil
265+
}
266+
267+
// EncodeReader returns a reader representing a Cuckoo filter
268+
func (f *Filter) EncodeReader() (io.Reader, uint) {
269+
var metadata [filterMetadataSize]byte
270+
271+
for i, n := range []uint32{uint32(f.numItems), uint32(f.victim.index), f.victim.tag} {
272+
binary.LittleEndian.PutUint32(metadata[i*bytesPerUint32:], n)
273+
}
274+
275+
victimUsed := byte(0)
261276
if f.victim.used {
262-
ret = append(ret, byte(1))
263-
} else {
264-
ret = append(ret, byte(0))
277+
victimUsed = byte(1)
265278
}
266-
ret = append(ret, f.table.Encode()...)
279+
metadata[bytesPerUint32*3] = victimUsed
280+
tableReader, tableEncodedSize := f.table.Reader()
281+
return io.MultiReader(bytes.NewReader(metadata[:]), tableReader), uint(len(metadata)) + tableEncodedSize
282+
}
267283

268-
return ret
284+
// Decode returns a Cuckoo Filter using a copy of the provided byte slice.
285+
func Decode(b []byte) (*Filter, error) {
286+
copiedBytes := make([]byte, len(b))
287+
copy(copiedBytes, b)
288+
return DecodeFrom(copiedBytes)
269289
}
270290

271-
// Decode returns a Cuckoo Filter from a byte slice
272-
func Decode(bytes []byte) (*Filter, error) {
273-
if len(bytes) < 20 {
291+
// DecodeFrom returns a Cuckoo Filter using the exact provided byte slice (no copy).
292+
func DecodeFrom(b []byte) (*Filter, error) {
293+
if len(b) < 20 {
274294
return nil, errors.New("unexpected bytes length")
275295
}
276-
numItems := uint(binary.LittleEndian.Uint32(bytes[0:4]))
277-
curIndex := uint(binary.LittleEndian.Uint32(bytes[4:8]))
278-
curTag := binary.LittleEndian.Uint32(bytes[8:12])
279-
used := bytes[12] == byte(1)
280-
tableType := uint(bytes[13])
296+
numItems := uint(binary.LittleEndian.Uint32(b[0*bytesPerUint32:]))
297+
curIndex := uint(binary.LittleEndian.Uint32(b[1*bytesPerUint32:]))
298+
curTag := binary.LittleEndian.Uint32(b[2*1*bytesPerUint32:])
299+
used := b[12] == byte(1)
300+
tableType := uint(b[13])
281301
table := getTable(tableType).(table)
282-
err := table.Decode(bytes[13:])
283-
if err != nil {
302+
if err := table.Decode(b[13:]); err != nil {
284303
return nil, err
285304
}
286305
return &Filter{

cuckoofilter_test.go

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package cuckoo
77

88
import (
9+
"bytes"
910
"crypto/rand"
1011
"fmt"
1112
"io"
@@ -15,9 +16,11 @@ import (
1516

1617
const size = 100000
1718

18-
var testBucketSize = []uint{2, 4, 8}
19-
var testFingerprintSize = []uint{2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 16, 17, 23, 31, 32}
20-
var testTableType = []uint{TableTypeSingle, TableTypePacked}
19+
var (
20+
testBucketSize = []uint{2, 4, 8}
21+
testFingerprintSize = []uint{2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 16, 17, 23, 31, 32}
22+
testTableType = []uint{TableTypeSingle, TableTypePacked}
23+
)
2124

2225
func TestFilter(t *testing.T) {
2326
var insertNum uint = 50000
@@ -33,7 +36,7 @@ func TestFilter(t *testing.T) {
3336
continue
3437
}
3538
cf := NewFilter(b, f, 8190, table)
36-
//fmt.Println(cf.Info())
39+
// fmt.Println(cf.Info())
3740
a := make([][]byte, 0)
3841
for i := uint(0); i < insertNum; i++ {
3942
_, _ = io.ReadFull(rand.Reader, hash[:])
@@ -45,12 +48,48 @@ func TestFilter(t *testing.T) {
4548
}
4649

4750
count := cf.Size()
48-
4951
if count != uint(len(a)) {
5052
t.Errorf("Expected count = %d, instead count = %d, b %v f %v", uint(len(a)), count, b, f)
5153
return
5254
}
5355

56+
encodedBytes, err := cf.Encode()
57+
if err != nil {
58+
t.Fatalf("err %v", err)
59+
}
60+
if len(encodedBytes) != cap(encodedBytes) {
61+
t.Fatalf("len(%d) != cap(%d)", len(encodedBytes), cap(encodedBytes))
62+
}
63+
ncf, err := Decode(encodedBytes)
64+
if err != nil || !reflect.DeepEqual(cf, ncf) {
65+
t.Errorf("Expected epual, err %v", err)
66+
return
67+
}
68+
69+
encodedBytes, err = cf.Encode()
70+
if err != nil {
71+
t.Fatalf("err %v", err)
72+
}
73+
ncf, err = DecodeFrom(encodedBytes)
74+
if err != nil || !reflect.DeepEqual(cf, ncf) {
75+
t.Errorf("Expected epual, err %v", err)
76+
return
77+
}
78+
79+
filterReader, _ := cf.EncodeReader()
80+
bytesFromReader, err := io.ReadAll(filterReader)
81+
if err != nil {
82+
t.Fatalf("Error reading from reader")
83+
}
84+
if !bytes.Equal(bytesFromReader, encodedBytes) {
85+
t.Fatalf("Expected to be equal")
86+
}
87+
88+
fmt.Println(cf.Info())
89+
cf.BitsPerItem()
90+
cf.SizeInBytes()
91+
cf.LoadFactor()
92+
5493
for _, v := range a {
5594
if !cf.Contain(v) {
5695
t.Errorf("Expected contain, instead not contain, b %v f %v table type %v", b, f, table)
@@ -65,22 +104,10 @@ func TestFilter(t *testing.T) {
65104
return
66105
}
67106

68-
bytes := cf.Encode()
69-
ncf, err := Decode(bytes)
70-
if err != nil || !reflect.DeepEqual(cf, ncf) {
71-
t.Errorf("Expected epual, err %v", err)
72-
return
73-
}
74-
75-
cf.Info()
76-
cf.BitsPerItem()
77-
cf.SizeInBytes()
78-
cf.LoadFactor()
79107
fmt.Printf("Filter bucketSize %v fingerprintSize %v tableType %v falsePositive Rate %v \n", b, f, table, cf.FalsePositiveRate())
80108
}
81109
}
82110
}
83-
84111
}
85112

86113
func BenchmarkFilterSingle_Reset(b *testing.B) {

0 commit comments

Comments
 (0)