Skip to content

Commit 39c1033

Browse files
committed
Matcher interface; benchmarks
Also a bugfix re default rebuild logic. Also pruner.NewMatcher() copies the default rebuild trigger.
1 parent 111713c commit 39c1033

File tree

5 files changed

+129
-40
lines changed

5 files changed

+129
-40
lines changed

lib/core_matcher.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,15 @@ func (m *CoreMatcher) MatchesForJSONEvent(event []byte) ([]X, error) {
108108
if err != nil {
109109
return nil, err
110110
}
111-
matches := m.MatchesForFields(fields)
112-
return matches, nil
111+
return m.MatchesForFields(fields)
112+
113113
}
114114

115115
// MatchesForFields takes a list of Field structures and sorts them by pathname; the fields in a pattern to
116116
// matched are similarly sorted; thus running an automaton over them works
117-
func (m *CoreMatcher) MatchesForFields(fields []Field) []X {
117+
func (m *CoreMatcher) MatchesForFields(fields []Field) ([]X, error) {
118118
sort.Slice(fields, func(i, j int) bool { return string(fields[i].Path) < string(fields[j].Path) })
119-
return m.matchesForSortedFields(fields).matches()
119+
return m.matchesForSortedFields(fields).matches(), nil
120120
}
121121

122122
// proposedTransition represents a suggestion that the name/value pair at fields[fieldIndex] might allow a transition

lib/fj.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func (fj *FJ) FlattenAndMatch(event []byte) ([]X, error) {
7272
if err != nil {
7373
return nil, err
7474
}
75-
return fj.matcher.MatchesForFields(fields), nil
75+
return fj.matcher.MatchesForFields(fields)
7676
}
7777

7878
// Flatten implements the Flattener interface. It assumes that the event is immutable - if you modify the event

lib/matcher.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@ package quamina
33
type Matcher interface {
44
AddPattern(x X, pat string) error
55
MatchesForJSONEvent(event []byte) ([]X, error)
6-
MatchesForFields(fields []Field) []X
6+
MatchesForFields(fields []Field) ([]X, error)
77
DeletePattern(x X) error
88
}

pruner/pruner.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ type Matcher struct {
6262
// Matcher is the underlying matcher that does the hard work.
6363
//
6464
// Maybe Matcher should maybe not be embedded or public.
65-
*quamina.Matcher
65+
Matcher *quamina.CoreMatcher
6666

6767
// live is live set of patterns.
6868
live LivePatternsState
@@ -108,6 +108,7 @@ func newTooMuchFiltering(ratio float64, min int64) *tooMuchFiltering {
108108
}
109109

110110
func (t *tooMuchFiltering) Rebuild(added bool, s *Stats) bool {
111+
111112
if added {
112113
// No need to think when we're adding a pattern since
113114
// that operation cannot result in an increase of
@@ -123,11 +124,11 @@ func (t *tooMuchFiltering) Rebuild(added bool, s *Stats) bool {
123124

124125
// We won't rebuild if nothing's been emitted yet.
125126
//
126-
// In isolating, that heuristic is arguable, but for this
127+
// In isolation, this heuristic is arguable, but for this
127128
// policy we need it. Otherwise we'll divide by zero, and
128129
// nobody wants that.
129130
if s.Emitted == 0 {
130-
return true
131+
return false
131132
}
132133

133134
var (
@@ -170,10 +171,11 @@ func NewMatcher(s LivePatternsState) *Matcher {
170171
if s == nil {
171172
s = NewMemState()
172173
}
174+
trigger := *defaultRebuildTrigger // Copy
173175
return &Matcher{
174-
Matcher: quamina.NewMatcher(),
176+
Matcher: quamina.NewCoreMatcher(),
175177
live: s,
176-
rebuildTrigger: defaultRebuildTrigger,
178+
rebuildTrigger: &trigger,
177179
}
178180
}
179181

@@ -236,7 +238,10 @@ func (m *Matcher) MatchesForJSONEvent(event []byte) ([]quamina.X, error) {
236238

237239
func (m *Matcher) MatchesForFields(fields []quamina.Field) ([]quamina.X, error) {
238240

239-
xs := m.Matcher.MatchesForFields(fields)
241+
xs, err := m.Matcher.MatchesForFields(fields)
242+
if err != nil {
243+
return nil, err
244+
}
240245

241246
// Remove any X that isn't in the live set.
242247

@@ -273,7 +278,7 @@ func (m *Matcher) MatchesForFields(fields []quamina.Field) ([]quamina.X, error)
273278
//
274279
// The return boolean when true indicates that at least one pattern
275280
// for x was removed.
276-
func (m *Matcher) DeletePattern(x quamina.X) (bool, error) {
281+
func (m *Matcher) DeletePattern(x quamina.X) error {
277282
// Maybe better to return (int,error) as in
278283
// LivePatternStats.Delete(), or maybe just return an error
279284
// and nothing else.
@@ -289,7 +294,7 @@ func (m *Matcher) DeletePattern(x quamina.X) (bool, error) {
289294
}
290295
}
291296

292-
return 0 < n, err
297+
return err
293298
}
294299

295300
// Rebuild rebuilds the matcher state based on only live patterns.
@@ -313,7 +318,7 @@ func (m *Matcher) rebuild(fearlessly bool) error {
313318

314319
var (
315320
then = time.Now()
316-
m1 = quamina.NewMatcher()
321+
m1 = quamina.NewCoreMatcher()
317322
)
318323

319324
if fearlessly {

pruner/pruner_test.go

Lines changed: 109 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
package pruner
22

33
import (
4+
"bufio"
45
"fmt"
56
"log"
6-
quamina "quamina/lib"
7+
"os"
78
"testing"
89
"time"
10+
11+
quamina "quamina/lib"
912
)
1013

1114
var verbose = false
@@ -50,15 +53,11 @@ func TestBasic(t *testing.T) {
5053

5154
m.printStats()
5255

53-
if have, err := m.DeletePattern(id); err != nil {
56+
if err := m.DeletePattern(id); err != nil {
5457
t.Fatal(err)
55-
} else if !have {
56-
t.Fatal(have)
5758
}
58-
if have, err := m.DeletePattern(id); err != nil {
59+
if err := m.DeletePattern(id); err != nil {
5960
t.Fatal(err)
60-
} else if have {
61-
t.Fatal(have)
6261
}
6362

6463
m.printStats()
@@ -92,8 +91,8 @@ func TestBasic(t *testing.T) {
9291

9392
func TestRebuildSome(t *testing.T) {
9493
var (
94+
n = int(2 * defaultRebuildTrigger.MinAction)
9595
m = NewMatcher(nil)
96-
n = int(defaultRebuildTrigger.MinAction + 100)
9796
)
9897

9998
populate := func() {
@@ -107,10 +106,8 @@ func TestRebuildSome(t *testing.T) {
107106

108107
depopulate := func() {
109108
for i := 0; i < n; i += 2 {
110-
if had, err := m.DeletePattern(i); err != nil {
109+
if err := m.DeletePattern(i); err != nil {
111110
t.Fatal(err)
112-
} else if !had {
113-
t.Fatal(i)
114111
}
115112
}
116113
// Maybe check a lot more often.
@@ -153,6 +150,7 @@ func TestRebuildSome(t *testing.T) {
153150
m.printStats()
154151
depopulate()
155152
query(false)
153+
m.printStats()
156154
if s := m.Stats(); 0 == s.RebuildDuration {
157155
t.Fatal(s)
158156
}
@@ -195,7 +193,7 @@ func TestTriggerTooManyFilteredDenom(t *testing.T) {
195193
if err := m.AddPattern(1, `{"likes":["tacos"]}`); err != nil {
196194
t.Fatal(err)
197195
}
198-
if _, err := m.DeletePattern(1); err != nil {
196+
if err := m.DeletePattern(1); err != nil {
199197
t.Fatal(err)
200198
}
201199
_, err := m.MatchesForJSONEvent([]byte(`{"likes":"tacos"}`))
@@ -217,12 +215,12 @@ func TestTriggerRebuild(t *testing.T) {
217215
doomed = func(id int) bool {
218216
return id%2 == 0
219217
}
220-
printState = func() {
221-
logf("state:")
222-
for id, p := range m.live.(*MemState).m {
223-
logf(" %v -> %s", id, p)
224-
}
225-
}
218+
// printState = func() {
219+
// logf("state:")
220+
// for id, p := range m.live.(*MemState).m {
221+
// logf(" %v -> %s", id, p)
222+
// }
223+
// }
226224
)
227225

228226
trigger.MinAction = 5
@@ -235,13 +233,13 @@ func TestTriggerRebuild(t *testing.T) {
235233
}
236234

237235
if doomed(i) {
238-
if _, err := m.DeletePattern(i); err != nil {
236+
if err := m.DeletePattern(i); err != nil {
239237
t.Fatal(err)
240238
}
241239
}
242240
}
243241

244-
printState()
242+
// printState()
245243
m.printStats()
246244

247245
for i := 0; i < n; i++ {
@@ -261,7 +259,7 @@ func TestTriggerRebuild(t *testing.T) {
261259
}
262260
}
263261

264-
printState()
262+
// printState()
265263
m.printStats()
266264

267265
s := m.Stats()
@@ -314,7 +312,7 @@ func TestBadState(t *testing.T) {
314312
if err := m.AddPattern(1, `{"likes":["queso"]}`); err == nil {
315313
t.Fatal("expected error")
316314
}
317-
if _, err := m.DeletePattern(1); err == nil {
315+
if err := m.DeletePattern(1); err == nil {
318316
t.Fatal("expected error")
319317
}
320318
if err := m.Rebuild(false); err == nil {
@@ -433,9 +431,7 @@ func TestMultiplePatternsWithSameId(t *testing.T) {
433431
t.Fatal(s.Live)
434432
}
435433

436-
if had, err := m.DeletePattern(id); err != nil {
437-
t.Fatal(err)
438-
} else if !had {
434+
if err := m.DeletePattern(id); err != nil {
439435
t.Fatal(err)
440436
}
441437

@@ -450,3 +446,91 @@ func TestMultiplePatternsWithSameId(t *testing.T) {
450446
}
451447

452448
}
449+
450+
func BenchmarkCityLotsCore(b *testing.B) {
451+
benchmarkCityLots(b, quamina.NewCoreMatcher())
452+
}
453+
454+
func BenchmarkCityLotsPruner(b *testing.B) {
455+
benchmarkCityLots(b, NewMatcher(nil))
456+
}
457+
458+
// benchmarkCityLots is distilled from TestCityLots.
459+
func benchmarkCityLots(b *testing.B, m quamina.Matcher) {
460+
461+
oneMeg := 1024 * 1024
462+
file, err := os.Open("../test_data/citylots.jlines")
463+
if err != nil {
464+
b.Errorf("Can't open file %s", err)
465+
}
466+
defer file.Close()
467+
468+
patterns := []string{
469+
`{ "properties": { "STREET": [ "CRANLEIGH" ] } }`,
470+
`{ "properties": { "STREET": [ "17TH" ], "ODD_EVEN": [ "E"] } }`,
471+
`{ "geometry": { "coordinates": [ 37.807807921694092 ] } }`,
472+
`{ "properties": { "MAPBLKLOT": ["0011008"], "BLKLOT": ["0011008"]}, "geometry": { "coordinates": [ 37.807807921694092 ] } } `,
473+
}
474+
names := []string{
475+
"CRANLEIGH",
476+
"17TH Even",
477+
"Geometry",
478+
"0011008",
479+
}
480+
481+
scanner := bufio.NewScanner(file)
482+
buf := make([]byte, oneMeg)
483+
scanner.Buffer(buf, oneMeg)
484+
485+
var fj quamina.Flattener
486+
switch vv := m.(type) {
487+
case *Matcher:
488+
fj = quamina.NewFJ(vv.Matcher)
489+
vv.DisableRebuild()
490+
case *quamina.CoreMatcher:
491+
fj = quamina.NewFJ(vv)
492+
default:
493+
b.Fatalf("%T", vv)
494+
}
495+
496+
for i := range names {
497+
err = m.AddPattern(names[i], patterns[i])
498+
if err != nil {
499+
b.Errorf("AddPattern error %s", err)
500+
}
501+
}
502+
results := make(map[quamina.X]int)
503+
504+
lineCount := 0
505+
var lines [][]byte
506+
for scanner.Scan() {
507+
lineCount++
508+
lines = append(lines, []byte(scanner.Text()))
509+
}
510+
lineCount = 0
511+
512+
b.ResetTimer()
513+
514+
for _, line := range lines {
515+
matches, err := fj.FlattenAndMatch(line)
516+
if err != nil {
517+
b.Errorf("Matches4JSON error %s on %s", err, line)
518+
}
519+
lineCount++
520+
if lineCount == b.N {
521+
break
522+
}
523+
for _, match := range matches {
524+
count, ok := results[match]
525+
if !ok {
526+
count = 0
527+
}
528+
results[match] = count + 1
529+
}
530+
}
531+
532+
err = scanner.Err()
533+
if err != nil {
534+
b.Errorf("Scanner error %s", err)
535+
}
536+
}

0 commit comments

Comments
 (0)