@@ -2,16 +2,50 @@ package core
22
33import (
44 "bufio"
5+ "compress/gzip"
56 "fmt"
67 "os"
78 "runtime"
89 "runtime/debug"
10+ "sync"
911 "testing"
1012 "time"
1113)
1214
1315const oneMeg = 1024 * 1024
1416
17+ var cityLotsLock sync.Mutex
18+ var cityLotsLines [][]byte
19+ var cityLotsLineCount int
20+
21+ func getCityLotsLines (t * testing.T ) [][]byte {
22+ cityLotsLock .Lock ()
23+ defer cityLotsLock .Unlock ()
24+ if cityLotsLines != nil {
25+ return cityLotsLines
26+ }
27+ file , err := os .Open ("../testdata/citylots.jlines.gz" )
28+ if err != nil {
29+ t .Error ("Can't open citlots.jlines.gz: " + err .Error ())
30+ }
31+ defer func (file * os.File ) {
32+ _ = file .Close ()
33+ }(file )
34+ zr , err := gzip .NewReader (file )
35+ if err != nil {
36+ t .Error ("Can't open zip reader: " + err .Error ())
37+ }
38+
39+ scanner := bufio .NewScanner (zr )
40+ buf := make ([]byte , oneMeg )
41+ scanner .Buffer (buf , oneMeg )
42+ for scanner .Scan () {
43+ cityLotsLineCount ++
44+ cityLotsLines = append (cityLotsLines , []byte (scanner .Text ()))
45+ }
46+ return cityLotsLines
47+ }
48+
1549func TestCRANLEIGH (t * testing.T ) {
1650
1751 jCranleigh := `{ "type": "Feature", "properties": { "MAPBLKLOT": "7222001", "BLKLOT": "7222001", "BLOCK_NUM": "7222", "LOT_NUM": "001", "FROM_ST": "1", "TO_ST": "1", "STREET": "CRANLEIGH", "ST_TYPE": "DR", "ODD_EVEN": "O" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.472773074480756, 37.73439178240811, 0.0 ], [ -122.47278111723567, 37.73451247621523, 0.0 ], [ -122.47242608711845, 37.73452184591072, 0.0 ], [ -122.472418368113281, 37.734401143064396, 0.0 ], [ -122.472773074480756, 37.73439178240811, 0.0 ] ] ] } }`
@@ -55,14 +89,6 @@ const thresholdPerformance = 1.0
5589// that it uses geometry/co-ordintes, which will force the fj flattener to process the big arrays of numbers in
5690// each line. A high proportion of typical Quamina workloads should run faster.
5791func TestCityLots (t * testing.T ) {
58- file , err := os .Open ("../testdata/citylots.jlines" )
59- if err != nil {
60- t .Error ("Can't open file: " + err .Error ())
61- }
62- defer func (file * os.File ) {
63- _ = file .Close ()
64- }(file )
65-
6692 patterns := []string {
6793 `{ "properties": { "STREET": [ "CRANLEIGH" ] } }` ,
6894 `{ "properties": { "STREET": [ "17TH" ], "ODD_EVEN": [ "E"] } }` ,
@@ -82,10 +108,7 @@ func TestCityLots(t *testing.T) {
82108 "0011008" : 1 ,
83109 }
84110
85- scanner := bufio .NewScanner (file )
86- buf := make ([]byte , oneMeg )
87- scanner .Buffer (buf , oneMeg )
88-
111+ var err error
89112 m := NewCoreMatcher ()
90113 for i := range names {
91114 err = m .AddPattern (names [i ], patterns [i ])
@@ -96,20 +119,13 @@ func TestCityLots(t *testing.T) {
96119 fj := NewFJ (m )
97120 results := make (map [X ]int )
98121
99- lineCount := 0
100- var lines [][]byte
101- for scanner .Scan () {
102- lineCount ++
103- lines = append (lines , []byte (scanner .Text ()))
104- }
105- lineCount = 0
122+ lines := getCityLotsLines (t )
106123 before := time .Now ()
107124 for _ , line := range lines {
108125 matches , err := fj .FlattenAndMatch (line )
109126 if err != nil {
110127 t .Error ("Matches4JSON: " + err .Error ())
111128 }
112- lineCount ++
113129 for _ , match := range matches {
114130 count , ok := results [match ]
115131 if ! ok {
@@ -121,7 +137,7 @@ func TestCityLots(t *testing.T) {
121137 fmt .Println ()
122138
123139 elapsed := float64 (time .Since (before ).Milliseconds ())
124- perSecond := float64 (lineCount ) / (elapsed / 1000.0 )
140+ perSecond := float64 (cityLotsLineCount ) / (elapsed / 1000.0 )
125141 fmt .Printf ("%.2f matches/second\n \n " , perSecond )
126142
127143 if perSecond < thresholdPerformance {
@@ -135,11 +151,6 @@ func TestCityLots(t *testing.T) {
135151 t .Errorf (message1 + message2 )
136152 }
137153
138- err = scanner .Err ()
139- if err != nil {
140- t .Error ("Scanner error: " + err .Error ())
141- }
142-
143154 if len (results ) != len (wanted ) {
144155 t .Errorf ("got %d results, wanted %d" , len (results ), len (wanted ))
145156 }
@@ -185,18 +196,7 @@ func TestMySoftwareHatesMe(t *testing.T) {
185196
186197// exercise shellstyle matching a little, is much faster than TestCityLots because it's only working wth one field
187198func TestBigShellStyle (t * testing.T ) {
188- file , err := os .Open ("../testdata/citylots.jlines" )
189- if err != nil {
190- t .Error ("Can't open file: " + err .Error ())
191- }
192- defer func (file * os.File ) {
193- _ = file .Close ()
194- }(file )
195-
196- scanner := bufio .NewScanner (file )
197- buf := make ([]byte , oneMeg )
198- scanner .Buffer (buf , oneMeg )
199-
199+ lines := getCityLotsLines (t )
200200 m := NewCoreMatcher ()
201201
202202 wanted := map [X ]int {
@@ -231,12 +231,6 @@ func TestBigShellStyle(t *testing.T) {
231231 */
232232 fmt .Println (matcherStats (m ))
233233
234- lineCount := 0
235- var lines [][]byte
236- for scanner .Scan () {
237- lineCount ++
238- lines = append (lines , []byte (scanner .Text ()))
239- }
240234 lCounts := make (map [X ]int )
241235 before := time .Now ()
242236 fj := NewFJ (m )
@@ -256,7 +250,7 @@ func TestBigShellStyle(t *testing.T) {
256250 }
257251 }
258252 elapsed := float64 (time .Since (before ).Milliseconds ())
259- perSecond := float64 (lineCount ) / (elapsed / 1000.0 )
253+ perSecond := float64 (cityLotsLineCount ) / (elapsed / 1000.0 )
260254 fmt .Printf ("%.2f matches/second with letter patterns\n \n " , perSecond )
261255
262256 for k , wc := range wanted {
0 commit comments