Skip to content

Commit 2090032

Browse files
author
balaji
authored
Binary search based table picker (#983)
Use binary search to pick tables instead of simple looping. This seems to improve the performance of key iterator creation close to seek. Signed-off-by: பாலாஜி <[email protected]>
1 parent 5f64ecf commit 2090032

File tree

4 files changed

+141
-21
lines changed

4 files changed

+141
-21
lines changed

iterator.go

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"bytes"
2121
"fmt"
2222
"hash/crc32"
23+
"sort"
2324
"sync"
2425
"sync/atomic"
2526
"time"
@@ -339,20 +340,23 @@ type IteratorOptions struct {
339340
InternalAccess bool // Used to allow internal access to badger keys.
340341
}
341342

343+
func (opt *IteratorOptions) compareToPrefix(key []byte) int {
344+
// We should compare key without timestamp. For example key - a[TS] might be > "aa" prefix.
345+
key = y.ParseKey(key)
346+
if len(key) > len(opt.Prefix) {
347+
key = key[:len(opt.Prefix)]
348+
}
349+
return bytes.Compare(key, opt.Prefix)
350+
}
351+
342352
func (opt *IteratorOptions) pickTable(t table.TableInterface) bool {
343353
if len(opt.Prefix) == 0 {
344354
return true
345355
}
346-
trim := func(key []byte) []byte {
347-
if len(key) > len(opt.Prefix) {
348-
return key[:len(opt.Prefix)]
349-
}
350-
return key
351-
}
352-
if bytes.Compare(trim(y.ParseKey(t.Smallest())), opt.Prefix) > 0 {
356+
if opt.compareToPrefix(t.Smallest()) > 0 {
353357
return false
354358
}
355-
if bytes.Compare(trim(y.ParseKey(t.Biggest())), opt.Prefix) < 0 {
359+
if opt.compareToPrefix(t.Biggest()) < 0 {
356360
return false
357361
}
358362
// Bloom filter lookup would only work if opt.Prefix does NOT have the read
@@ -363,6 +367,49 @@ func (opt *IteratorOptions) pickTable(t table.TableInterface) bool {
363367
return true
364368
}
365369

370+
// pickTables picks the necessary table for the iterator. This function also assumes
371+
// that the tables are sorted in the right order.
372+
func (opt *IteratorOptions) pickTables(all []*table.Table) []*table.Table {
373+
if len(opt.Prefix) == 0 {
374+
out := make([]*table.Table, len(all))
375+
copy(out, all)
376+
return out
377+
}
378+
sIdx := sort.Search(len(all), func(i int) bool {
379+
return opt.compareToPrefix(all[i].Biggest()) >= 0
380+
})
381+
if sIdx == len(all) {
382+
// Not found.
383+
return []*table.Table{}
384+
}
385+
386+
filtered := all[sIdx:]
387+
if !opt.prefixIsKey {
388+
eIdx := sort.Search(len(filtered), func(i int) bool {
389+
return opt.compareToPrefix(filtered[i].Smallest()) > 0
390+
})
391+
out := make([]*table.Table, len(filtered[:eIdx]))
392+
copy(out, filtered[:eIdx])
393+
return out
394+
}
395+
396+
var out []*table.Table
397+
for _, t := range filtered {
398+
// When we encounter the first table whose smallest key is higher than
399+
// opt.Prefix, we can stop.
400+
if opt.compareToPrefix(t.Smallest()) > 0 {
401+
return out
402+
}
403+
// opt.Prefix is actually the key. So, we can run bloom filter checks
404+
// as well.
405+
if t.DoesNotHave(opt.Prefix) {
406+
continue
407+
}
408+
out = append(out, t)
409+
}
410+
return out
411+
}
412+
366413
// DefaultIteratorOptions contains default options when iterating over Badger key-value stores.
367414
var DefaultIteratorOptions = IteratorOptions{
368415
PrefetchValues: true,

iterator_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"testing"
2828

2929
"github.com/dgraph-io/badger/options"
30+
"github.com/dgraph-io/badger/table"
3031
"github.com/dgraph-io/badger/y"
3132
"github.com/stretchr/testify/require"
3233
)
@@ -70,6 +71,59 @@ func TestPickTables(t *testing.T) {
7071
outside("abd", "ab", "abc123")
7172
}
7273

74+
func TestPickSortTables(t *testing.T) {
75+
type MockKeys struct {
76+
small string
77+
large string
78+
}
79+
genTables := func(mks ...MockKeys) []*table.Table {
80+
out := make([]*table.Table, 0)
81+
for _, mk := range mks {
82+
f := buildTable(t, [][]string{{mk.small, "some value"}, {mk.large, "some value"}})
83+
opts := table.Options{LoadingMode: options.MemoryMap,
84+
ChkMode: options.OnTableAndBlockRead}
85+
tbl, err := table.OpenTable(f, opts)
86+
require.NoError(t, err)
87+
out = append(out, tbl)
88+
}
89+
return out
90+
}
91+
tables := genTables(MockKeys{small: "a", large: "abc"},
92+
MockKeys{small: "abcd", large: "cde"},
93+
MockKeys{small: "cge", large: "chf"},
94+
MockKeys{small: "glr", large: "gyup"})
95+
opt := DefaultIteratorOptions
96+
opt.prefixIsKey = false
97+
opt.Prefix = []byte("c")
98+
filtered := opt.pickTables(tables)
99+
require.Equal(t, 2, len(filtered))
100+
// build table adds time stamp so removing tailing bytes.
101+
require.Equal(t, filtered[0].Smallest()[:4], []byte("abcd"))
102+
require.Equal(t, filtered[1].Smallest()[:3], []byte("cge"))
103+
tables = genTables(MockKeys{small: "a", large: "abc"},
104+
MockKeys{small: "abcd", large: "ade"},
105+
MockKeys{small: "cge", large: "chf"},
106+
MockKeys{small: "glr", large: "gyup"})
107+
filtered = opt.pickTables(tables)
108+
require.Equal(t, 1, len(filtered))
109+
require.Equal(t, filtered[0].Smallest()[:3], []byte("cge"))
110+
tables = genTables(MockKeys{small: "a", large: "abc"},
111+
MockKeys{small: "abcd", large: "ade"},
112+
MockKeys{small: "cge", large: "chf"},
113+
MockKeys{small: "ckr", large: "cyup"},
114+
MockKeys{small: "csfr", large: "gyup"})
115+
filtered = opt.pickTables(tables)
116+
require.Equal(t, 3, len(filtered))
117+
require.Equal(t, filtered[0].Smallest()[:3], []byte("cge"))
118+
require.Equal(t, filtered[1].Smallest()[:3], []byte("ckr"))
119+
require.Equal(t, filtered[2].Smallest()[:4], []byte("csfr"))
120+
121+
opt.Prefix = []byte("aa")
122+
filtered = opt.pickTables(tables)
123+
require.Equal(t, y.ParseKey(filtered[0].Smallest()), []byte("a"))
124+
require.Equal(t, y.ParseKey(filtered[0].Biggest()), []byte("abc"))
125+
}
126+
73127
func TestIteratePrefix(t *testing.T) {
74128
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
75129
bkey := func(i int) []byte {

level_handler.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -262,21 +262,23 @@ func (s *levelHandler) appendIterators(iters []y.Iterator, opt *IteratorOptions)
262262
s.RLock()
263263
defer s.RUnlock()
264264

265-
tables := make([]*table.Table, 0, len(s.tables))
266-
for _, t := range s.tables {
267-
if opt.pickTable(t) {
268-
tables = append(tables, t)
265+
if s.level == 0 {
266+
// Remember to add in reverse order!
267+
// The newer table at the end of s.tables should be added first as it takes precedence.
268+
// Level 0 tables are not in key sorted order, so we need to consider them one by one.
269+
var out []*table.Table
270+
for _, t := range s.tables {
271+
if opt.pickTable(t) {
272+
out = append(out, t)
273+
}
269274
}
275+
return appendIteratorsReversed(iters, out, opt.Reverse)
270276
}
277+
278+
tables := opt.pickTables(s.tables)
271279
if len(tables) == 0 {
272280
return iters
273281
}
274-
275-
if s.level == 0 {
276-
// Remember to add in reverse order!
277-
// The newer table at the end of s.tables should be added first as it takes precedence.
278-
return appendIteratorsReversed(iters, tables, opt.Reverse)
279-
}
280282
return append(iters, table.NewConcatIterator(tables, opt.Reverse))
281283
}
282284

table/iterator.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,12 @@ type ConcatIterator struct {
402402
func NewConcatIterator(tbls []*Table, reversed bool) *ConcatIterator {
403403
iters := make([]*Iterator, len(tbls))
404404
for i := 0; i < len(tbls); i++ {
405-
iters[i] = tbls[i].NewIterator(reversed)
405+
// Increment the reference count. Since, we're not creating the iterator right now.
406+
// Here, We'll hold the reference of the tables, till the lifecycle of the iterator.
407+
tbls[i].IncrRef()
408+
409+
// Save cycles by not initializing the iterators until needed.
410+
// iters[i] = tbls[i].NewIterator(reversed)
406411
}
407412
return &ConcatIterator{
408413
reversed: reversed,
@@ -416,9 +421,12 @@ func (s *ConcatIterator) setIdx(idx int) {
416421
s.idx = idx
417422
if idx < 0 || idx >= len(s.iters) {
418423
s.cur = nil
419-
} else {
420-
s.cur = s.iters[s.idx]
424+
return
425+
}
426+
if s.iters[idx] == nil {
427+
s.iters[idx] = s.tables[idx].NewIterator(s.reversed)
421428
}
429+
s.cur = s.iters[s.idx]
422430
}
423431

424432
// Rewind implements y.Interface
@@ -498,7 +506,16 @@ func (s *ConcatIterator) Next() {
498506

499507
// Close implements y.Interface.
500508
func (s *ConcatIterator) Close() error {
509+
for _, t := range s.tables {
510+
// DeReference the tables while closing the iterator.
511+
if err := t.DecrRef(); err != nil {
512+
return err
513+
}
514+
}
501515
for _, it := range s.iters {
516+
if it == nil {
517+
continue
518+
}
502519
if err := it.Close(); err != nil {
503520
return errors.Wrap(err, "ConcatIterator")
504521
}

0 commit comments

Comments
 (0)