@@ -8,6 +8,18 @@ import (
8
8
"strings"
9
9
)
10
10
11
+ type scannerOptions struct {
12
+ extensions map [string ]ScannerExt
13
+ }
14
+
15
+ type ScannerOption interface {
16
+ applyScannerOptions (options * scannerOptions )
17
+ }
18
+
19
+ type scannerOptionFunc func (* scannerOptions )
20
+
21
+ func (opt scannerOptionFunc ) applyScanner (opts * scannerOptions ) { opt (opts ) }
22
+
11
23
// Token is a lexical token of the NGINX configuration syntax.
12
24
type Token struct {
13
25
// Text is the string corresponding to the token. It could be a directive or symbol. The value is the actual token
@@ -20,6 +32,8 @@ type Token struct {
20
32
IsQuoted bool
21
33
}
22
34
35
+ func (t Token ) String () string { return fmt .Sprintf ("{%d, %s, %t}" , t .Line , t .Text , t .IsQuoted ) }
36
+
23
37
type scannerError struct {
24
38
msg string
25
39
line int
@@ -52,23 +66,33 @@ func LineNumber(err error) (int, bool) {
52
66
//
53
67
// Use NewScanner to construct a Scanner.
54
68
type Scanner struct {
55
- scanner * bufio.Scanner
56
- lineno int
57
- tokenStartLine int
58
- tokenDepth int
59
- repeateSpecialChar bool // only '}' can be repeated
60
- prev string
61
- err error
69
+ scanner * bufio.Scanner
70
+ lineno int
71
+ tokenStartLine int
72
+ tokenDepth int
73
+ repeateSpecialChar bool // only '}' can be repeated
74
+ nextTokenIsDirective bool
75
+ prev string
76
+ err error
77
+ options * scannerOptions
78
+ ext Tokenizer
62
79
}
63
80
64
81
// NewScanner returns a new Scanner to read from r.
65
- func NewScanner (r io.Reader ) * Scanner {
82
+ func NewScanner (r io.Reader , options ... ScannerOption ) * Scanner {
83
+ opts := & scannerOptions {}
84
+ for _ , opt := range options {
85
+ opt .applyScannerOptions (opts )
86
+ }
87
+
66
88
s := & Scanner {
67
- scanner : bufio .NewScanner (r ),
68
- lineno : 1 ,
69
- tokenStartLine : 1 ,
70
- tokenDepth : 0 ,
71
- repeateSpecialChar : false ,
89
+ scanner : bufio .NewScanner (r ),
90
+ lineno : 1 ,
91
+ tokenStartLine : 1 ,
92
+ tokenDepth : 0 ,
93
+ repeateSpecialChar : false ,
94
+ nextTokenIsDirective : true ,
95
+ options : opts ,
72
96
}
73
97
74
98
s .scanner .Split (bufio .ScanRunes )
@@ -93,6 +117,20 @@ func (s *Scanner) setErr(err error) {
93
117
// Scan reads the next token from source and returns it.. It returns io.EOF at the end of the source. Scanner errors are
94
118
// returned when encountered.
95
119
func (s * Scanner ) Scan () (Token , error ) { //nolint: funlen, gocognit, gocyclo
120
+ if s .ext != nil {
121
+ t , err := s .ext .Next ()
122
+ if err != nil {
123
+ if ! errors .Is (err , TokenizerDone ) {
124
+ s .setErr (err )
125
+ return Token {}, s .err
126
+ }
127
+
128
+ s .ext = nil
129
+ } else {
130
+ return t , nil
131
+ }
132
+ }
133
+
96
134
var tok strings.Builder
97
135
98
136
lexState := skipSpace
@@ -129,6 +167,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
129
167
r = nextRune
130
168
if isEOL (r ) {
131
169
s .lineno ++
170
+ s .nextTokenIsDirective = true
132
171
}
133
172
default :
134
173
readNext = true
@@ -149,6 +188,16 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
149
188
r = "\\ " + r
150
189
}
151
190
191
+ if tok .Len () > 0 {
192
+ t := tok .String ()
193
+ if s .nextTokenIsDirective {
194
+ if ext , ok := s .options .extensions [t ]; ok {
195
+ s .ext = ext .Tokenizer (& SubScanner {parent : s , tokenLine : s .tokenStartLine }, t )
196
+ return Token {Text : t , Line : s .tokenStartLine }, nil
197
+ }
198
+ }
199
+ }
200
+
152
201
switch lexState {
153
202
case skipSpace :
154
203
if ! isSpace (r ) {
@@ -166,11 +215,13 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
166
215
tok .WriteString (r )
167
216
lexState = inComment
168
217
s .tokenStartLine = s .lineno
218
+ s .nextTokenIsDirective = false
169
219
continue
170
220
}
171
221
}
172
222
173
223
if isSpace (r ) {
224
+ s .nextTokenIsDirective = false
174
225
return Token {Text : tok .String (), Line : s .tokenStartLine }, nil
175
226
}
176
227
@@ -179,6 +230,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
179
230
tok .WriteString (r )
180
231
lexState = inVar
181
232
s .repeateSpecialChar = false
233
+ s .nextTokenIsDirective = false
182
234
continue
183
235
}
184
236
@@ -223,6 +275,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
223
275
}
224
276
225
277
tok .WriteString (r )
278
+ s .nextTokenIsDirective = true
226
279
return Token {Text : tok .String (), Line : s .tokenStartLine }, nil
227
280
}
228
281
@@ -250,3 +303,51 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
250
303
}
251
304
}
252
305
}
306
+
307
+ // ScannerExt is the interface that describes an extension for the [Scanner]. Scanner extensions enable scanning of
308
+ // configurations that contain syntaxes that do not follow the usual grammar.
309
+ type ScannerExt interface {
310
+ Tokenizer (s * SubScanner , matchedToken string ) Tokenizer
311
+ }
312
+
313
+ // TokenizerDone is returned by [Tokenizer] when tokenization is complete.
314
+ var TokenizerDone = errors .New ("done" )
315
+
316
+ // Tokenizer is the interface that wraps the Next method.
317
+ //
318
+ // Next returns the next token scanned from the NGINX configuration or an error if the configuration cannot be
319
+ // tokenized. Return the special error, [TokenizerDone] when finished tokenizing.
320
+ type Tokenizer interface {
321
+ Next () (Token , error )
322
+ }
323
+
324
+ // LexerScanner is a compatibility layer between Lexers and Scanner.
325
+ type LexerScanner struct {
326
+ lexer Lexer
327
+ scanner * SubScanner
328
+ matchedToken string
329
+ ch <- chan NgxToken
330
+ }
331
+
332
+ func (s * LexerScanner ) Tokenizer (scanner * SubScanner , matchedtoken string ) Tokenizer {
333
+ s .scanner = scanner
334
+ s .matchedToken = matchedtoken
335
+ return s
336
+ }
337
+
338
+ func (s * LexerScanner ) Next () (Token , error ) {
339
+ if s .ch == nil {
340
+ s .ch = s .lexer .Lex (s .scanner , s .matchedToken )
341
+ }
342
+
343
+ ngxTok , ok := <- s .ch
344
+ if ! ok {
345
+ return Token {}, TokenizerDone
346
+ }
347
+
348
+ if ngxTok .Error != nil {
349
+ return Token {}, newScannerErrf (ngxTok .Line , ngxTok .Error .Error ())
350
+ }
351
+
352
+ return Token {Text : ngxTok .Value , Line : ngxTok .Line , IsQuoted : ngxTok .IsQuoted }, nil
353
+ }
0 commit comments