Skip to content

Commit 0c50fbd

Browse files
committed
Switched from map[string] to smallTable based value matching.
1 parent 2f65a53 commit 0c50fbd

13 files changed

+412
-176
lines changed

lib/benchmarks_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ func TestCityLots(t *testing.T) {
6161
if err != nil {
6262
t.Error("Can't open file: " + err.Error())
6363
}
64-
defer file.Close()
64+
defer func(file *os.File) {
65+
_ = file.Close()
66+
}(file)
6567

6668
patterns := []string{
6769
`{ "properties": { "STREET": [ "CRANLEIGH" ] } }`,

lib/field_matcher.go

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,45 @@
11
package quamina
22

33
// fieldMatchState has a map which is keyed by the field pathSegments values that can start transitions from this
4-
// state; for eacrh such field, there is a valueMatcher which, given the field's value, determines whether
4+
// state; for each such field, there is a valueMatcher which, given the field's value, determines whether
55
// the automaton progresses to another fieldMatchState
66
// matches contains the X values that arrival at this state implies have matched
77
// existsFalseFailures reports the condition that traversal has occurred by matching a field which is named in an
88
// exists:false pattern, and the named X's should be subtracted from the matches list being built up by a match project
99
// The matches field contains a list of the patterns that have been matched if traversal arrives at this state
1010
type fieldMatchState struct {
11-
transitions map[string]*valueMatchState
11+
transitions map[string]*valueMatcher
1212
matches []X
1313
existsFalseFailures *matchSet
1414
}
1515

1616
func newFieldMatchState() *fieldMatchState {
17-
return &fieldMatchState{transitions: make(map[string]*valueMatchState), existsFalseFailures: newMatchSet()}
17+
return &fieldMatchState{transitions: make(map[string]*valueMatcher), existsFalseFailures: newMatchSet()}
1818
}
1919

2020
func (m *fieldMatchState) addTransition(field *patternField) []*fieldMatchState {
2121

2222
// transition from a fieldMatchstate might already be present; create a new empty one if not
23-
valueMatcher, ok := m.transitions[field.path]
23+
vm, ok := m.transitions[field.path]
2424
if !ok {
25-
valueMatcher = newValueMatchState()
26-
m.transitions[field.path] = valueMatcher
25+
vm = newValueMatcher()
26+
m.transitions[field.path] = vm
2727
}
2828

2929
// suppose I'm adding the first pattern to a matcher and it has "x": [1, 2]. In principle the branches on
3030
// "x": 1 and "x": 2 could go to tne same next state. But we have to make a unique next state for each of them
3131
// because some future other pattern might have "x": [2, 3] and thus we need a separate branch to potentially
3232
// match two patterns on "x": 2 but not "x": 1. If you were optimizing the automaton for size you might detect
3333
// cases where this doesn't happen and reduce the number of fieldMatchstates
34-
3534
var nextFieldMatchers []*fieldMatchState
3635
for _, val := range field.vals {
37-
nextFieldMatchers = append(nextFieldMatchers, valueMatcher.addTransition(val))
36+
nextFieldMatchers = append(nextFieldMatchers, vm.addTransition(val))
3837
}
3938
return nextFieldMatchers
4039
}
4140

4241
// transitionOn returns one or more fieldMatchStates you can transition to on a field's name/value combination,
43-
// or nil if no trnasitions are possible.
42+
// or nil if no transitions are possible.
4443
func (m *fieldMatchState) transitionOn(field *Field) []*fieldMatchState {
4544

4645
// are there transitions on this field name?
@@ -51,15 +50,3 @@ func (m *fieldMatchState) transitionOn(field *Field) []*fieldMatchState {
5150

5251
return valMatcher.transitionOn(field.Val)
5352
}
54-
55-
/* for debugging
56-
func (m *fieldMatchState) String() string {
57-
var keys []string
58-
for k := range m.transitions {
59-
p := strings.ReplaceAll(k, "\n", "**")
60-
keys = append(keys, p)
61-
}
62-
keys = append(keys, fmt.Sprintf(" Matches: %d", len(m.matches)))
63-
return strings.Join(keys, " / ")
64-
}
65-
*/

lib/fj.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,7 @@ func (fj *FJ) stepOneArrayElement() {
661661
fj.arrayTrail[len(fj.arrayTrail)-1].Pos++
662662
}
663663

664-
// ch fetches the next byte from the event, while tracking line/column numbers. It doesn't check array bounds,
664+
// ch fetches the next byte from the event. It doesn't check array bounds,
665665
// so it's the caller's responsibility to ensure we haven't run off the end of the event.
666666
func (fj *FJ) ch() byte {
667667
return fj.event[fj.eventIndex]

lib/flattener.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ type Field struct {
1010
ArrayTrail []ArrayPos
1111
}
1212

13-
// Flattener by example:
13+
// Flattener is provided as an interface in the hope that flatterners for other non-JSON message formats might
14+
// be implemented.
15+
// How it needs to work, by JSON example:
1416
// { "a": 1, "b": "two", "c": true", "d": nil, "e": { "e1": 2, "e2":, 3.02e-5} "f": [33, "x"]} }
1517
// should produce
1618
// "a", "1"
@@ -30,7 +32,6 @@ type Field struct {
3032
// "a", "1"
3133
// "f", "33"
3234
// "f", "\"x\""
33-
3435
type Flattener interface {
3536
Flatten(event []byte, nameTracker NameTracker) ([]Field, error)
3637
Reset()

lib/match_set.go

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package quamina
22

3+
// matchSet is what it says on the tin; implements a set semantic on matches, which are of type X. These could all
4+
// be implemented as match[X]bool but this makes the calling code more readable.
35
type matchSet struct {
46
set map[X]bool
57
}
@@ -11,26 +13,12 @@ func newMatchSet() *matchSet {
1113
func (m *matchSet) addX(x X) {
1214
m.set[x] = true
1315
}
14-
func (m *matchSet) removeX(x X) {
15-
delete(m.set, x)
16-
}
16+
1717
func (m *matchSet) contains(x X) bool {
1818
_, ok := m.set[x]
1919
return ok
2020
}
2121

22-
func (m *matchSet) subtractXs(xs []X) {
23-
for _, x := range xs {
24-
delete(m.set, x)
25-
}
26-
}
27-
28-
func (m *matchSet) addSet(addend *matchSet) {
29-
for x := range addend.set {
30-
m.set[x] = true
31-
}
32-
}
33-
3422
func (m *matchSet) matches() []X {
3523
matches := make([]X, 0, len(m.set))
3624
for x := range m.set {

lib/matcher.go

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
)
1414

1515
// Matcher uses a finite automaton to implement the MatchesForJSONEvent and MatchesForFields functions.
16-
// startState is the start of the atomaton
16+
// startState is the start of the automaton
1717
// namesUsed is a map of field names that are used in any of the patterns that this automaton encodes. Typically,
1818
// patterns only consider a subset of the fields in an incoming data object, and there is no reason to consider
1919
// fields that do not appear in patterns when using the automaton for matching
@@ -66,7 +66,7 @@ func (m *Matcher) AddPattern(x X, patternJSON string) error {
6666

6767
// "states" now holds the set of terminal states arrived at by matching each field in the pattern,
6868
// so update the matches value to indicate this (skipping those that are only there to serve
69-
// exists:false processing
69+
// exists:false processing)
7070
for _, endState := range states {
7171
if !endState.existsFalseFailures.contains(x) {
7272
endState.matches = append(endState.matches, x)
@@ -108,9 +108,9 @@ func (m *Matcher) matchesForSortedFields(fields []Field) *matchSet {
108108

109109
failedExistsFalseMatches := newMatchSet()
110110

111-
// The idea is that we addX potential field transitions to the proposals list; any time such a transition
111+
// The idea is that we add potential field transitions to the proposals list; any time such a transition
112112
// succeeds, i.e. matches a particular field and moves to a new state, we propose transitions from that
113-
// state on all the following fields in the list
113+
// state on all the following fields in the event
114114
// Start by giving each field a chance to match against the start state. Doing it by pre-allocating the
115115
// proposals and filling in their values is observably faster than the more idiomatic append()
116116
proposals := make([]proposedTransition, len(fields))
@@ -134,7 +134,7 @@ func (m *Matcher) matchesForSortedFields(fields []Field) *matchSet {
134134
// for each state in the set of transitions from the proposed state
135135
for _, nextState := range nextStates {
136136

137-
// if arriving at this state means we've matched one or more patterns, record that fact (& lose dupes)
137+
// if arriving at this state means we've matched one or more patterns, record that fact
138138
for _, nextMatch := range nextState.matches {
139139
matches.addX(nextMatch)
140140
}
@@ -187,21 +187,3 @@ func (m *Matcher) IsNameUsed(label []byte) bool {
187187
_, ok := m.namesUsed[string(label)]
188188
return ok
189189
}
190-
191-
// for debugging
192-
/*
193-
func prettyField(f Field) string {
194-
p := string(f.Path)
195-
p = strings.ReplaceAll(p, "\n", "/")
196-
v := string(f.Val)
197-
pv := p + "*" + v + " "
198-
for _, ap := range f.ArrayTrail {
199-
pv = pv + fmt.Sprintf("@%d.%d ", ap.Array, ap.Pos)
200-
}
201-
return pv
202-
}
203-
204-
log := fmt.Sprintf("S: %v, F: %s, matches %d", prop.state, prop.fields[prop.fieldIndex], len(nextStates))
205-
log = strings.ReplaceAll(log, "\n", "**")
206-
fmt.Println(log)
207-
*/

lib/matcher_test.go

Lines changed: 11 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,14 @@ func TestBasicMatching(t *testing.T) {
1515
t.Error(err.Error())
1616
}
1717
shouldMatch := []string{
18-
`{"a": 2, "b": "3", "x": 33}`,
1918
`{"b": "3", "a": 1}`,
19+
`{"a": 2, "b": "3", "x": 33}`,
2020
}
2121
shouldNotMatch := []string{
2222
`{"b": "3", "a": 6}`,
2323
`{"a": 2}`,
2424
`{"b": "3"}`,
2525
}
26-
for _, shouldNot := range shouldNotMatch {
27-
var matches []X
28-
matches, err = m.MatchesForJSONEvent([]byte(shouldNot))
29-
if len(matches) != 0 {
30-
t.Error("Matched: " + shouldNot)
31-
}
32-
}
3326
for _, should := range shouldMatch {
3427
var matches []X
3528
matches, err = m.MatchesForJSONEvent([]byte(should))
@@ -40,6 +33,13 @@ func TestBasicMatching(t *testing.T) {
4033
t.Errorf("event %s, LM %d", should, len(matches))
4134
}
4235
}
36+
for _, shouldNot := range shouldNotMatch {
37+
var matches []X
38+
matches, err = m.MatchesForJSONEvent([]byte(shouldNot))
39+
if len(matches) != 0 {
40+
t.Error("Matched: " + shouldNot)
41+
}
42+
}
4343
}
4444

4545
func TestExerciseMatching(t *testing.T) {
@@ -49,7 +49,7 @@ func TestExerciseMatching(t *testing.T) {
4949
"Height": 600,
5050
"Title": "View from 15th Floor",
5151
"Thumbnail": {
52-
"Url": "http://www.example.com/image/481989943",
52+
"Url": "https://www.example.com/image/481989943",
5353
"Height": 125,
5454
"Width": 100
5555
},
@@ -128,64 +128,9 @@ func TestSimpleAddPattern(t *testing.T) {
128128
t.Errorf("s0 trans len %d", len(s0.transitions))
129129
}
130130

131-
v0, ok := s0.transitions["a"]
131+
_, ok := s0.transitions["a"]
132132
if !ok {
133133
t.Error("No trans from start on 'a'")
134134
}
135-
if len(v0.valueTransitions) != 2 {
136-
t.Errorf("v1 trans %d wanted 2", len(v0.valueTransitions))
137-
}
138-
s1, ok := v0.valueTransitions["1"]
139-
if !ok {
140-
t.Error("no trans on 1 fro s1")
141-
}
142-
s2, ok := v0.valueTransitions["2"]
143-
if !ok {
144-
t.Error("no trans on 2 from s2")
145-
}
146-
if len(s1.transitions) != 1 {
147-
t.Errorf("s1 trans len %d", len(s1.transitions))
148-
}
149-
if len(s2.transitions) != 1 {
150-
t.Errorf("s2 trans len %d", len(s2.transitions))
151-
}
152-
v1, ok := s1.transitions["b"]
153-
if !ok {
154-
t.Error("no trans on b from s1")
155-
}
156-
v2, ok := s2.transitions["b"]
157-
if !ok {
158-
t.Error("no trans on b from s2")
159-
}
160-
for _, v := range []*valueMatchState{v1, v2} {
161-
if len(v.valueTransitions) != 2 {
162-
t.Errorf("trans len on %v = %d", v, len(v.valueTransitions))
163-
}
164-
s3, ok := v.valueTransitions["1"]
165-
if !ok {
166-
t.Error("no trans on 1 at s3")
167-
}
168-
if len(s3.transitions) != 0 {
169-
t.Errorf("len trans s3 = %d", len(s3.transitions))
170-
}
171-
if len(s3.matches) != 1 {
172-
t.Errorf("s3 matches %d", len(s3.matches))
173-
}
174-
if s3.matches[0] != x {
175-
t.Error("s3 match mismatch")
176-
}
177-
s4, ok := v.valueTransitions[`"3"`]
178-
if !ok {
179-
t.Error(`no trans on "3" at s4`)
180-
}
181-
if len(s4.transitions) != 0 {
182-
t.Errorf("len trans s4 = %d", len(s4.transitions))
183-
}
184-
if len(s4.matches) != 1 {
185-
t.Errorf("s4 matches %d", len(s4.matches))
186-
}
187-
if s4.matches[0] != x {
188-
t.Error("s4 match mismatch")
189-
}
190-
}
135+
// TODO: Consider hand-checking the smallValueMacher structure
191136
}

lib/pattern.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,24 @@ type patternField struct {
2828
vals []typedVal
2929
}
3030

31+
// for debugging
32+
func (f *patternField) String() string {
33+
s := fmt.Sprintf("p=%s: ", f.path)
34+
var v string
35+
for _, val := range f.vals {
36+
switch val.vType {
37+
case existsTrueType:
38+
v = " exists:true"
39+
case existsFalseType:
40+
v = " exists:false"
41+
default:
42+
v = " <" + val.val + ">"
43+
}
44+
s += v
45+
}
46+
return s
47+
}
48+
3149
type patternBuild struct {
3250
jd *json.Decoder
3351
path []string

lib/pattern_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,34 +36,34 @@ func TestPatternFromJSON(t *testing.T) {
3636
`{"x": [ {"exists": true} ] }`,
3737
`{"x": { "y": [ {"exists": false} ] } }`,
3838
}
39-
w1 := []*patternField{&patternField{path: "x", vals: []typedVal{typedVal{numberType, "2"}}}}
40-
w2 := []*patternField{&patternField{path: "x", vals: []typedVal{
39+
w1 := []*patternField{{path: "x", vals: []typedVal{{numberType, "2"}}}}
40+
w2 := []*patternField{{path: "x", vals: []typedVal{
4141
{literalType, "null"},
4242
{literalType, "true"},
4343
{literalType, "false"},
4444
{stringType, `"hopp"`},
4545
{numberType, "3.072e-11"},
46-
}}}
46+
}}}
4747
w3 := []*patternField{
48-
&patternField{path: "x\na", vals: []typedVal{
48+
{path: "x\na", vals: []typedVal{
4949
{numberType, "27"},
5050
{numberType, "28"},
5151
}},
52-
&patternField{path: "x\nb\nm", vals: []typedVal{
52+
{path: "x\nb\nm", vals: []typedVal{
5353
{stringType, `"a"`},
5454
{stringType, `"b"`},
5555
}},
5656
}
5757
w4 := []*patternField{
58-
&patternField{path: "x", vals: []typedVal{
58+
{path: "x", vals: []typedVal{
5959
{vType: existsTrueType, val: ""},
6060
},
61-
}}
61+
}}
6262
w5 := []*patternField{
63-
&patternField{path: "x\ny", vals: []typedVal{
63+
{path: "x\ny", vals: []typedVal{
6464
{vType: existsFalseType, val: ""},
6565
},
66-
}}
66+
}}
6767
wanted := [][]*patternField{w1, w2, w3, w4, w5}
6868

6969
for i, good := range goods {

0 commit comments

Comments
 (0)