Skip to content

Commit c2004ea

Browse files
committed
First steps on numbers, back off shellstyle to one '*'
1 parent bcbaf47 commit c2004ea

File tree

10 files changed

+13141
-37
lines changed

10 files changed

+13141
-37
lines changed

README.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,20 @@ The following patterns would match it:
6767
```json
6868
{
6969
"Image": {
70-
"Thumbnail": {
71-
"Url": [ { "shellstyle": "*.example.com/*" } ]
72-
}
70+
"Thumbnail": { "Url": [ { "shellstyle": "*9943" } ] }
71+
}
72+
}
73+
{
74+
"Image": {
75+
"Thumbnail": { "Url": [ { "shellstyle": "http://www.example.com/*" } ] }
7376
}
7477
}
78+
{
79+
"Image": {
80+
"Thumbnail": { "Url": [ { "shellstyle": "http://www.example.com/*9943" } ] }
81+
}
82+
}
83+
7584
```
7685
The structure of the pattern, in terms of field names
7786
and nesting, must be the same as the structure of the event

lib/benchmarks_test.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ library, which would be unacceptable.`
147147
}
148148
}
149149

150+
// - restore when we've got multi-glob working
151+
/*
150152
func TestMySoftwareHatesMe(t *testing.T) {
151153
line := `{ "type": "Feature", "properties": { "STREET": "BELVEDERE" } }`
152154
m := NewMatcher()
@@ -176,6 +178,8 @@ func TestMySoftwareHatesMe(t *testing.T) {
176178
t.Error("no match for EEE")
177179
}
178180
}
181+
*/
182+
179183

180184
func containsX(list []X, x X) bool {
181185
for _, in := range list {
@@ -209,10 +213,12 @@ func TestBigShellStyle(t *testing.T) {
209213
"V": 4322, "W": 4162, "X": 0, "Y": 721, "Z": 25,
210214
}
211215

216+
/* - restore when we've got multi-glob working
212217
funky := map[X]int{
213218
`{"properties": {"STREET":[ {"shellstyle": "N*P*"} ] } }`: 927,
214219
`{"properties": {"STREET":[ {"shellstyle": "*E*E*E*"} ] } }`: 1212,
215220
}
221+
*/
216222

217223
for letter := range wanted {
218224
pat := fmt.Sprintf(`{"properties": {"STREET":[ {"shellstyle": "%s*"} ] } }`, letter)
@@ -222,12 +228,14 @@ func TestBigShellStyle(t *testing.T) {
222228
}
223229
}
224230

231+
/*
225232
for funk := range funky {
226233
err := m.AddPattern(funk, funk.(string))
227234
if err != nil {
228-
t.Errorf("err on %c: %s", funk, err.Error())
235+
t.Errorf("err on %s: %s", funk, err.Error())
229236
}
230237
}
238+
*/
231239
fmt.Println(matcherStats(m))
232240

233241
lineCount := 0
@@ -262,11 +270,14 @@ func TestBigShellStyle(t *testing.T) {
262270
t.Errorf("for %s wanted %d got %d", k, wc, lCounts[k])
263271
}
264272
}
273+
/*
265274
for k, wc := range funky {
266275
if lCounts[k] != wc {
267276
t.Errorf("for %s wanted %d got %d", k, wc, lCounts[k])
268277
}
269278
}
279+
280+
*/
270281
}
271282

272283
// TestPatternAddition adds a whole lot of string-only rules as fast as possible The profiler says that the

lib/field_matcher.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,18 @@ func (m *fieldMatcher) addTransition(field *patternField) []*fieldMatcher {
4242
var nextFieldMatchers []*fieldMatcher
4343
for _, val := range field.vals {
4444
nextFieldMatchers = append(nextFieldMatchers, vm.addTransition(val))
45+
46+
// if the val is a number, let's add a transition on the canonicalized number
47+
if val.vType == numberType {
48+
c, err := canonicalize([]byte(val.val))
49+
if err == nil {
50+
number := typedVal{
51+
vType: literalType,
52+
val: c,
53+
}
54+
nextFieldMatchers = append(nextFieldMatchers, vm.addTransition(number))
55+
}
56+
}
4557
}
4658
return nextFieldMatchers
4759
}

lib/fj.go

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ func (fj *FJ) readObject(pathName []byte) error {
175175
}
176176

177177
var val []byte
178+
var alt []byte
178179
switch ch {
179180
case '"':
180181
val, err = fj.readStringValue()
@@ -185,7 +186,7 @@ func (fj *FJ) readObject(pathName []byte) error {
185186
case 'n':
186187
val, err = fj.readLiteral(nullBytes)
187188
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
188-
val, err = fj.readNumber()
189+
val, alt, err = fj.readNumber()
189190
case '[':
190191
if !fj.nameTracker.IsNameUsed(memberName) {
191192
fj.skipping++
@@ -228,6 +229,9 @@ func (fj *FJ) readObject(pathName []byte) error {
228229
fj.storeObjectMemberField(pathForChild(pathName, memberName), arrayTrail, val)
229230
}
230231
}
232+
if alt != nil {
233+
alt = nil
234+
}
231235
state = afterValueState
232236
case afterValueState:
233237
switch ch {
@@ -265,6 +269,7 @@ func (fj *FJ) readArray(pathName []byte) error {
265269
for {
266270
ch := fj.ch()
267271
var val []byte // resets on each loop
272+
var alt []byte
268273
switch state {
269274
case inArrayState:
270275
// bypass space before element value. A bit klunky but allows for immense simplification
@@ -285,7 +290,7 @@ func (fj *FJ) readArray(pathName []byte) error {
285290
case 'n':
286291
val, err = fj.readLiteral(nullBytes)
287292
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
288-
val, err = fj.readNumber()
293+
val, alt, err = fj.readNumber()
289294
case '{':
290295
if fj.skipping == 0 {
291296
fj.stepOneArrayElement()
@@ -315,6 +320,9 @@ func (fj *FJ) readArray(pathName []byte) error {
315320
fj.storeArrayElementField(pathName, val)
316321
}
317322
}
323+
if alt != nil {
324+
alt = nil
325+
}
318326
state = afterValueState
319327
case afterValueState:
320328
switch ch {
@@ -341,7 +349,7 @@ func (fj *FJ) readArray(pathName []byte) error {
341349
* these higher-level funcs are going to advance the pointer after each invocation
342350
*/
343351

344-
func (fj *FJ) readNumber() ([]byte, error) {
352+
func (fj *FJ) readNumber() ([]byte, []byte, error) {
345353
numStart := fj.eventIndex
346354
state := numberStartState
347355
for {
@@ -354,7 +362,7 @@ func (fj *FJ) readNumber() ([]byte, error) {
354362
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
355363
state = numberIntegralPartState
356364
default:
357-
return nil, fj.error(fmt.Sprintf("illegal char '%c' in number", ch))
365+
return nil, nil, fj.error(fmt.Sprintf("illegal char '%c' in number", ch))
358366
}
359367
case numberIntegralPartState:
360368
switch ch {
@@ -366,35 +374,56 @@ func (fj *FJ) readNumber() ([]byte, error) {
366374
state = numberExpState
367375
case ',', ']', '}', ' ', '\t', '\n', '\r':
368376
fj.eventIndex--
369-
return fj.event[numStart : fj.eventIndex+1], nil
377+
// TODO: Too expensive; make it possible for people to ask for this
378+
//bytes := fj.event[numStart : fj.eventIndex+1]
379+
//c, err := canonicalize(bytes)
380+
var alt []byte
381+
//if err == nil {
382+
// alt = []byte(c)
383+
//}
384+
return fj.event[numStart : fj.eventIndex+1], alt, nil
370385
default:
371-
return nil, fj.error(fmt.Sprintf("illegal char '%c' in number", ch))
386+
return nil, nil, fj.error(fmt.Sprintf("illegal char '%c' in number", ch))
372387
}
373388
case numberFracState:
374389
switch ch {
375390
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
376391
// no-op
377392
case ',', ']', '}', ' ', '\t', '\n', '\r':
378393
fj.eventIndex--
379-
return fj.event[numStart : fj.eventIndex+1], nil
394+
bytes := fj.event[numStart : fj.eventIndex+1]
395+
// TODO: Too expensive; make it possible for people to ask for this
396+
// c, err := canonicalize(bytes)
397+
var alt []byte
398+
//if err == nil {
399+
// alt = []byte(c)
400+
//}
401+
return bytes, alt, nil
380402
case 'e':
381403
state = numberExpState
382404
default:
383-
return nil, fj.error(fmt.Sprintf("illegal char '%c' in number", ch))
405+
return nil, nil, fj.error(fmt.Sprintf("illegal char '%c' in number", ch))
384406
}
385407
case numberExpState:
386408
switch ch {
387409
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
388410
// no-op
389411
case ',', ']', '}', ' ', '\t', '\n', '\r':
390412
fj.eventIndex--
391-
return fj.event[numStart : fj.eventIndex+1], nil
413+
// bytes := fj.event[numStart : fj.eventIndex+1]
414+
// TODO: Too expensive; make it possible for people to ask for this
415+
// c, err := canonicalize(bytes)
416+
var alt []byte
417+
// if err == nil {
418+
// alt = []byte(c)
419+
// }
420+
return fj.event[numStart : fj.eventIndex+1], alt, nil
392421
}
393422
default:
394-
return nil, fj.error(fmt.Sprintf("illegal char '%c' in number", ch))
423+
return nil, nil, fj.error(fmt.Sprintf("illegal char '%c' in number", ch))
395424
}
396425
if fj.step() != nil {
397-
return nil, fj.error("event truncated in number")
426+
return nil, nil, fj.error("event truncated in number")
398427
}
399428
}
400429
}

lib/matcher_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@ func TestExerciseMatching(t *testing.T) {
6464
`{"Image": { "Title": [ { "exists": true } ] } }`,
6565
`{"Image": { "Width": [800], "Title": [ { "exists": true } ], "Animated": [ false ] } }`,
6666
`{"Image": { "Width": [800], "IDs": [ { "exists": true } ] } }`,
67-
`{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://*.example.com/*" } ] } } }`,
67+
//`{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://*.example.com/*" } ] } } }`,
68+
`{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "*9943" } ] } } }`,
69+
`{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://www.example.com/*" } ] } } }`,
70+
`{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://www.example.com/*9943" } ] } } }`,
6871
}
6972

7073
var err error

lib/numbers.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package quamina
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"strconv"
7+
)
8+
9+
const (
10+
nineDigits = 1000000000.0
11+
digitsOfPrecision = 18
12+
)
13+
14+
func canonicalize(s []byte) (string, error) {
15+
var err error
16+
if len(s) > digitsOfPrecision {
17+
return "", errors.New(fmt.Sprintf("number has %d digits, exceeds max of %d", len(s), digitsOfPrecision))
18+
}
19+
var f float64
20+
f, err = strconv.ParseFloat(string(s), 63)
21+
if err != nil {
22+
return "", err
23+
}
24+
if f >= nineDigits || f <= -nineDigits {
25+
return "", errors.New(fmt.Sprintf("number is outside of range [%f, %f]", -nineDigits, nineDigits))
26+
}
27+
return fmt.Sprintf("%019.0f", (f + nineDigits) * nineDigits), nil
28+
}
29+

lib/numbers_test.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package quamina
2+
3+
import (
4+
"fmt"
5+
"math"
6+
"math/rand"
7+
"sort"
8+
"testing"
9+
)
10+
11+
func TestVariants(t *testing.T) {
12+
f := []string{
13+
"350",
14+
"350.0",
15+
"350.0000000000",
16+
"3.5e2",
17+
}
18+
var o []string
19+
for _, s := range f {
20+
c, err := canonicalize([]byte(s))
21+
if err != nil {
22+
t.Errorf("canon err on %s: %s", s, err.Error())
23+
}
24+
o = append(o, c)
25+
}
26+
for i := 1; i < len(o); i++ {
27+
if o[i] != o[i-1] {
28+
t.Errorf("%s and %s differ", o[i-1], o[i])
29+
}
30+
}
31+
}
32+
33+
func TestOrdering(t *testing.T) {
34+
var in []float64
35+
for i := 0; i < 10000; i++ {
36+
f := rand.Float64() * math.Pow(10, 9) * 2
37+
f -= nineDigits
38+
in = append(in, f)
39+
}
40+
sort.Float64s(in)
41+
var out []string
42+
for _, f := range in {
43+
s := fmt.Sprintf("%f", f)
44+
c, err := canonicalize([]byte(s))
45+
if err != nil {
46+
t.Errorf("failed on %s", s)
47+
}
48+
out = append(out, c)
49+
}
50+
if !sort.StringsAreSorted(out) {
51+
t.Errorf("Not sorted")
52+
}
53+
for i, c := range out {
54+
if len(c) != 19 {
55+
t.Errorf("%s: %d at %d", c, len(c), i)
56+
}
57+
}
58+
}
59+

0 commit comments

Comments
 (0)