Skip to content

Commit 00fcf80

Browse files
committed
Updates scanner to support Lua extension
Fixed up the Scanner logic to mirror changes made to support Lua extension in Lex. Added a compat layer so that the existing Lua type can be used with `Scanner` vs trying to refactor the implementation to remove the channel. Doing so I think would result in further gains. Benchmarks: ``` goarch: arm64 pkg: github.com/nginxinc/nginx-go-crossplane BenchmarkLex/simple-10 61224 18869 ns/op 103049 B/op 39 allocs/op BenchmarkLex/with-comments-10 56320 19776 ns/op 103113 B/op 45 allocs/op BenchmarkLex/messy-10 25918 47312 ns/op 104400 B/op 168 allocs/op BenchmarkLex/quote-behavior-10 72890 15389 ns/op 102960 B/op 26 allocs/op BenchmarkLex/quoted-right-brace-10 44002 27143 ns/op 103561 B/op 54 allocs/op BenchmarkLex/comments-between-args-10 79369 15303 ns/op 102937 B/op 27 allocs/op BenchmarkLexWithLua/lua-basic-10 51590 23743 ns/op 103385 B/op 49 allocs/op BenchmarkLexWithLua/lua-block-simple-10 24564 48282 ns/op 104488 B/op 157 allocs/op BenchmarkLexWithLua/lua-block-larger-10 23427 48567 ns/op 104376 B/op 144 allocs/op BenchmarkLexWithLua/lua-block-tricky-10 33526 36308 ns/op 103896 B/op 117 allocs/op BenchmarkScanner/simple-10 170299 7450 ns/op 4648 B/op 36 allocs/op BenchmarkScanner/with-comments-10 120178 9462 ns/op 4712 B/op 42 allocs/op BenchmarkScanner/messy-10 43105 27796 ns/op 6000 B/op 165 allocs/op BenchmarkScanner/quote-behavior-10 207045 5587 ns/op 4560 B/op 23 allocs/op BenchmarkScanner/quoted-right-brace-10 79261 15685 ns/op 5160 B/op 51 allocs/op BenchmarkScanner/comments-between-args-10 216628 5411 ns/op 4536 B/op 24 allocs/op BenchmarkScannerWithLua/lua-basic-10 80594 15127 ns/op 7867 B/op 66 allocs/op BenchmarkScannerWithLua/lua-block-simple-10 28033 42450 ns/op 10922 B/op 156 allocs/op BenchmarkScannerWithLua/lua-block-larger-10 33932 33655 ns/op 10771 B/op 72 allocs/op BenchmarkScannerWithLua/lua-block-tricky-10 51888 23334 ns/op 9050 B/op 79 allocs/op PASS ok github.com/nginxinc/nginx-go-crossplane 30.055s ```
1 parent 9956b1b commit 00fcf80

File tree

5 files changed

+272
-63
lines changed

5 files changed

+272
-63
lines changed

lex.go

+57-3
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ type LexOptions struct {
6565
// RegisterLexer is an option that cna be used to add a lexer to tokenize external NGINX tokens.
6666
type RegisterLexer interface {
6767
applyLexOptions(options *LexOptions)
68+
applyScannerOptions(options *scannerOptions)
6869
}
6970

7071
type registerLexer struct {
@@ -82,6 +83,16 @@ func (rl registerLexer) applyLexOptions(o *LexOptions) {
8283
}
8384
}
8485

86+
func (rl registerLexer) applyScannerOptions(o *scannerOptions) {
87+
if o.extensions == nil {
88+
o.extensions = make(map[string]ScannerExt)
89+
}
90+
91+
for _, s := range rl.stringTokens {
92+
o.extensions[s] = &LexerScanner{lexer: rl.l}
93+
}
94+
}
95+
8596
// LexWithLexer registers a Lexer that implements tokenization of an NGINX configuration after one of the given
8697
// stringTokens is encountered by Lex.
8798
func LexWithLexer(l Lexer, stringTokens ...string) RegisterLexer { //nolint:ireturn
@@ -106,12 +117,38 @@ func Lex(reader io.Reader) chan NgxToken {
106117
// SubScanner provides an interface for scanning alternative grammars within NGINX configuration data.
107118
type SubScanner struct {
108119
scanner *bufio.Scanner
120+
parent *Scanner
109121
tokenLine int
110122
}
111123

112124
// Scan advances the scanner to the next token which will be available though the Text method. It returns false
113125
// when the scan stops by reaching the end of input.
114126
func (e *SubScanner) Scan() bool {
127+
if e.scanner != nil {
128+
return e.lexScan()
129+
}
130+
131+
if e.parent.err != nil {
132+
return false
133+
}
134+
135+
if !e.parent.scanner.Scan() {
136+
if err := e.parent.scanner.Err(); err != nil {
137+
e.parent.setErr(err)
138+
}
139+
return false
140+
}
141+
142+
// e.parent.prev = e.parent.scanner.Text()
143+
// if isEOL(e.parent.prev) {
144+
if t := e.parent.scanner.Text(); isEOL(t) {
145+
e.parent.lineno++
146+
}
147+
148+
return true
149+
}
150+
151+
func (e *SubScanner) lexScan() bool {
115152
if !e.scanner.Scan() {
116153
return false
117154
}
@@ -122,13 +159,30 @@ func (e *SubScanner) Scan() bool {
122159
}
123160

124161
// Err returns the fist non-EOF error encountered by the Scanner.
125-
func (e *SubScanner) Err() error { return e.scanner.Err() }
162+
func (e *SubScanner) Err() error {
163+
if e.scanner != nil {
164+
return e.scanner.Err()
165+
}
166+
return e.parent.Err()
167+
}
126168

127169
// Text returns the most recent token generated by a call to Scan.
128-
func (e *SubScanner) Text() string { return e.scanner.Text() }
170+
func (e *SubScanner) Text() string {
171+
if e.scanner != nil {
172+
return e.scanner.Text()
173+
}
174+
// return e.parent.prev
175+
return e.parent.scanner.Text()
176+
}
129177

130178
// Line returns the line number of the most recent token generated by a call to Scan.
131-
func (e *SubScanner) Line() int { return e.tokenLine }
179+
func (e *SubScanner) Line() int {
180+
if e.scanner != nil {
181+
return e.tokenLine
182+
}
183+
184+
return e.parent.lineno
185+
}
132186

133187
//nolint:gocyclo,funlen,gocognit,maintidx
134188
func tokenize(reader io.Reader, tokenCh chan NgxToken, options LexOptions) {

lex_test.go

+38-19
Original file line numberDiff line numberDiff line change
@@ -460,34 +460,53 @@ func TestLex(t *testing.T) {
460460
}
461461
}
462462

463-
var lexToken NgxToken //nolint: gochecknoglobals // trying to avoid return value being optimzed away
464-
465-
func BenchmarkLex(b *testing.B) {
463+
func benchmarkLex(b *testing.B, path string, options LexOptions) {
466464
var t NgxToken
467465

466+
file, err := os.Open(path)
467+
if err != nil {
468+
b.Fatal(err)
469+
}
470+
defer file.Close()
471+
b.ResetTimer()
472+
473+
for i := 0; i < b.N; i++ {
474+
if _, err := file.Seek(0, 0); err != nil {
475+
b.Fatal(err)
476+
}
477+
478+
for tok := range Lex(file) {
479+
t = tok
480+
}
481+
}
482+
483+
_ = t
484+
}
485+
486+
func BenchmarkLex(b *testing.B) {
468487
for _, bm := range lexFixtures {
488+
if strings.HasPrefix(bm.name, "lua") {
489+
continue
490+
}
491+
469492
b.Run(bm.name, func(b *testing.B) {
470493
path := getTestConfigPath(bm.name, "nginx.conf")
471-
file, err := os.Open(path)
472-
if err != nil {
473-
b.Fatal(err)
474-
}
475-
defer file.Close()
476-
b.ResetTimer()
494+
benchmarkLex(b, path, LexOptions{})
495+
})
496+
}
497+
}
477498

478-
for i := 0; i < b.N; i++ {
479-
if _, err := file.Seek(0, 0); err != nil {
480-
b.Fatal(err)
481-
}
499+
func BenchmarkLexWithLua(b *testing.B) {
500+
for _, bm := range lexFixtures {
501+
if !strings.HasPrefix(bm.name, "lua") {
502+
continue
503+
}
482504

483-
for tok := range Lex(file) {
484-
t = tok
485-
}
486-
}
505+
b.Run(bm.name, func(b *testing.B) {
506+
path := getTestConfigPath(bm.name, "nginx.conf")
507+
benchmarkLex(b, path, LexOptions{})
487508
})
488509
}
489-
490-
lexToken = t
491510
}
492511

493512
//nolint:gochecknoglobals

lua.go

+11
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,17 @@ func (l *Lua) Lex(s *SubScanner, matchedToken string) <-chan NgxToken {
175175
return tokenCh
176176
}
177177

178+
type LuaScanner struct {
179+
scanner *SubScanner
180+
matchedToken string
181+
lua *Lua
182+
ch <-chan NgxToken
183+
}
184+
185+
func (l *Lua) Init(s *SubScanner, matchedToken string) Tokenizer {
186+
return &LexerScanner{lexer: l, matchedToken: matchedToken}
187+
}
188+
178189
// RegisterBuilder registers a builder for generating Lua NGINX configuration.
179190
func (l *Lua) RegisterBuilder() RegisterBuilder { //nolint:ireturn
180191
return BuildWithBuilder(l, l.directiveNames()...)

scanner.go

+114-13
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,18 @@ import (
88
"strings"
99
)
1010

11+
type scannerOptions struct {
12+
extensions map[string]ScannerExt
13+
}
14+
15+
type ScannerOption interface {
16+
applyScannerOptions(options *scannerOptions)
17+
}
18+
19+
type scannerOptionFunc func(*scannerOptions)
20+
21+
func (opt scannerOptionFunc) applyScanner(opts *scannerOptions) { opt(opts) }
22+
1123
// Token is a lexical token of the NGINX configuration syntax.
1224
type Token struct {
1325
// Text is the string corresponding to the token. It could be a directive or symbol. The value is the actual token
@@ -20,6 +32,8 @@ type Token struct {
2032
IsQuoted bool
2133
}
2234

35+
func (t Token) String() string { return fmt.Sprintf("{%d, %s, %t}", t.Line, t.Text, t.IsQuoted) }
36+
2337
type scannerError struct {
2438
msg string
2539
line int
@@ -52,23 +66,33 @@ func LineNumber(err error) (int, bool) {
5266
//
5367
// Use NewScanner to construct a Scanner.
5468
type Scanner struct {
55-
scanner *bufio.Scanner
56-
lineno int
57-
tokenStartLine int
58-
tokenDepth int
59-
repeateSpecialChar bool // only '}' can be repeated
60-
prev string
61-
err error
69+
scanner *bufio.Scanner
70+
lineno int
71+
tokenStartLine int
72+
tokenDepth int
73+
repeateSpecialChar bool // only '}' can be repeated
74+
nextTokenIsDirective bool
75+
prev string
76+
err error
77+
options *scannerOptions
78+
ext Tokenizer
6279
}
6380

6481
// NewScanner returns a new Scanner to read from r.
65-
func NewScanner(r io.Reader) *Scanner {
82+
func NewScanner(r io.Reader, options ...ScannerOption) *Scanner {
83+
opts := &scannerOptions{}
84+
for _, opt := range options {
85+
opt.applyScannerOptions(opts)
86+
}
87+
6688
s := &Scanner{
67-
scanner: bufio.NewScanner(r),
68-
lineno: 1,
69-
tokenStartLine: 1,
70-
tokenDepth: 0,
71-
repeateSpecialChar: false,
89+
scanner: bufio.NewScanner(r),
90+
lineno: 1,
91+
tokenStartLine: 1,
92+
tokenDepth: 0,
93+
repeateSpecialChar: false,
94+
nextTokenIsDirective: true,
95+
options: opts,
7296
}
7397

7498
s.scanner.Split(bufio.ScanRunes)
@@ -93,6 +117,20 @@ func (s *Scanner) setErr(err error) {
93117
// Scan reads the next token from source and returns it.. It returns io.EOF at the end of the source. Scanner errors are
94118
// returned when encountered.
95119
func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
120+
if s.ext != nil {
121+
t, err := s.ext.Next()
122+
if err != nil {
123+
if !errors.Is(err, TokenizerDone) {
124+
s.setErr(err)
125+
return Token{}, s.err
126+
}
127+
128+
s.ext = nil
129+
} else {
130+
return t, nil
131+
}
132+
}
133+
96134
var tok strings.Builder
97135

98136
lexState := skipSpace
@@ -129,6 +167,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
129167
r = nextRune
130168
if isEOL(r) {
131169
s.lineno++
170+
s.nextTokenIsDirective = true
132171
}
133172
default:
134173
readNext = true
@@ -149,6 +188,16 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
149188
r = "\\" + r
150189
}
151190

191+
if tok.Len() > 0 {
192+
t := tok.String()
193+
if s.nextTokenIsDirective {
194+
if ext, ok := s.options.extensions[t]; ok {
195+
s.ext = ext.Tokenizer(&SubScanner{parent: s, tokenLine: s.tokenStartLine}, t)
196+
return Token{Text: t, Line: s.tokenStartLine}, nil
197+
}
198+
}
199+
}
200+
152201
switch lexState {
153202
case skipSpace:
154203
if !isSpace(r) {
@@ -166,11 +215,13 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
166215
tok.WriteString(r)
167216
lexState = inComment
168217
s.tokenStartLine = s.lineno
218+
s.nextTokenIsDirective = false
169219
continue
170220
}
171221
}
172222

173223
if isSpace(r) {
224+
s.nextTokenIsDirective = false
174225
return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
175226
}
176227

@@ -179,6 +230,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
179230
tok.WriteString(r)
180231
lexState = inVar
181232
s.repeateSpecialChar = false
233+
s.nextTokenIsDirective = false
182234
continue
183235
}
184236

@@ -223,6 +275,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
223275
}
224276

225277
tok.WriteString(r)
278+
s.nextTokenIsDirective = true
226279
return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
227280
}
228281

@@ -250,3 +303,51 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
250303
}
251304
}
252305
}
306+
307+
// ScannerExt is the interface that describes an extension for the [Scanner]. Scanner extensions enable scanning of
308+
// configurations that contain syntaxes that do not follow the usual grammar.
309+
type ScannerExt interface {
310+
Tokenizer(s *SubScanner, matchedToken string) Tokenizer
311+
}
312+
313+
// TokenizerDone is returned by [Tokenizer] when tokenization is complete.
314+
var TokenizerDone = errors.New("done")
315+
316+
// Tokenizer is the interface that wraps the Next method.
317+
//
318+
// Next returns the next token scanned from the NGINX configuration or an error if the configuration cannot be
319+
// tokenized. Return the special error, [TokenizerDone] when finished tokenizing.
320+
type Tokenizer interface {
321+
Next() (Token, error)
322+
}
323+
324+
// LexerScanner is a compatibility layer between Lexers and Scanner.
325+
type LexerScanner struct {
326+
lexer Lexer
327+
scanner *SubScanner
328+
matchedToken string
329+
ch <-chan NgxToken
330+
}
331+
332+
func (s *LexerScanner) Tokenizer(scanner *SubScanner, matchedtoken string) Tokenizer {
333+
s.scanner = scanner
334+
s.matchedToken = matchedtoken
335+
return s
336+
}
337+
338+
func (s *LexerScanner) Next() (Token, error) {
339+
if s.ch == nil {
340+
s.ch = s.lexer.Lex(s.scanner, s.matchedToken)
341+
}
342+
343+
ngxTok, ok := <-s.ch
344+
if !ok {
345+
return Token{}, TokenizerDone
346+
}
347+
348+
if ngxTok.Error != nil {
349+
return Token{}, newScannerErrf(ngxTok.Line, ngxTok.Error.Error())
350+
}
351+
352+
return Token{Text: ngxTok.Value, Line: ngxTok.Line, IsQuoted: ngxTok.IsQuoted}, nil
353+
}

0 commit comments

Comments
 (0)