Skip to content

Commit 8dc6b03

Browse files
committed
Add backslash continuations and make lexer obey yyDebug so it isn't so noisy
1 parent 4d27ead commit 8dc6b03

File tree

2 files changed

+50
-20
lines changed

2 files changed

+50
-20
lines changed

parser/lexer.go

+46-20
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
package parser
22

3-
// FIXME need to implement end of line continuations with \
4-
// If find \ at end of line, read next line then skip indent calcs
5-
63
// FIXME need to implement formfeed
74

5+
// Lexer should count line numbers too!
6+
87
import (
98
"bufio"
109
"bytes"
@@ -58,14 +57,16 @@ func NewLex(r io.Reader) *yyLex {
5857
func (x *yyLex) refill() {
5958
var err error
6059
x.line, err = x.reader.ReadString('\n')
61-
fmt.Printf("line = %q, err = %v\n", x.line, err)
60+
if yyDebug >= 2 {
61+
fmt.Printf("line = %q, err = %v\n", x.line, err)
62+
}
6263
switch err {
6364
case nil:
6465
case io.EOF:
6566
x.eof = true
6667
default:
6768
x.eof = true
68-
x.Error(fmt.Sprintf("Error reading input: %v", err))
69+
x.Errorf("Error reading input: %v", err)
6970
}
7071
}
7172

@@ -238,16 +239,18 @@ const (
238239
// The parser calls this method to get each new token. This
239240
// implementation returns operators and NUM.
240241
func (x *yyLex) Lex(yylval *yySymType) (ret int) {
241-
defer func() {
242-
name := tokenToString[ret]
243-
if ret == NAME {
244-
fmt.Printf("LEX> %q (%d) = %q\n", name, ret, yylval.str)
245-
} else if ret == STRING || ret == NUMBER {
246-
fmt.Printf("LEX> %q (%d) = %T{%v}\n", name, ret, yylval.obj, yylval.obj)
247-
} else {
248-
fmt.Printf("LEX> %q (%d) \n", name, ret)
249-
}
250-
}()
242+
if yyDebug >= 2 {
243+
defer func() {
244+
name := tokenToString[ret]
245+
if ret == NAME {
246+
fmt.Printf("LEX> %q (%d) = %q\n", name, ret, yylval.str)
247+
} else if ret == STRING || ret == NUMBER {
248+
fmt.Printf("LEX> %q (%d) = %T{%v}\n", name, ret, yylval.obj, yylval.obj)
249+
} else {
250+
fmt.Printf("LEX> %q (%d) \n", name, ret)
251+
}
252+
}()
253+
}
251254

252255
for {
253256
switch x.state {
@@ -323,6 +326,16 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
323326
return NEWLINE
324327
}
325328

329+
// Check if continuation character
330+
if x.line[0] == '\\' && (len(x.line) <= 1 || x.line[1] == '\n') {
331+
if x.eof {
332+
return eof
333+
}
334+
x.refill()
335+
x.state = parseTokens
336+
continue
337+
}
338+
326339
// Read a number if available
327340
token, value := x.readNumber()
328341
if token != eof {
@@ -646,6 +659,11 @@ found:
646659
escape := false
647660
for i, c := range x.line {
648661
if escape {
662+
// Continuation line - remove \ then continue
663+
if c == '\n' {
664+
buf.Truncate(buf.Len() - 1)
665+
goto readMore
666+
}
649667
buf.WriteRune(c)
650668
escape = false
651669
} else {
@@ -663,11 +681,12 @@ found:
663681
}
664682
}
665683
if !multiLineString {
666-
x.Error("Unterminated single quoted string")
684+
x.Errorf("Unterminated %sx%s string", stringEnd, stringEnd)
667685
return eofError, nil
668686
}
687+
readMore:
669688
if x.eof {
670-
x.Error("Unterminated triple quoted string")
689+
x.Errorf("Unterminated %sx%s string", stringEnd, stringEnd)
671690
return eofError, nil
672691
}
673692
x.refill()
@@ -685,9 +704,16 @@ foundEndOfString:
685704
// The parser calls this method on a parse error.
686705
func (x *yyLex) Error(s string) {
687706
x.error = true
688-
log.Printf("Parse error: %s", s)
689-
log.Printf("Parse buffer %q", x.line)
690-
log.Printf("State %#v", x)
707+
if yyDebug >= 1 {
708+
log.Printf("Parse error: %s", s)
709+
log.Printf("Parse buffer %q", x.line)
710+
log.Printf("State %#v", x)
711+
}
712+
}
713+
714+
// Call this to write formatted errors
715+
func (x *yyLex) Errorf(format string, a ...interface{}) {
716+
x.Error(fmt.Sprintf(format, a...))
691717
}
692718

693719
// Set the debug level 0 = off, 4 = max

parser/lexer_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ func TestLexerReadString(t *testing.T) {
210210
{`"a\"c"`, STRING, py.String(`a\"c`), ``},
211211
{`"a\\"+`, STRING, py.String(`a\\`), `+`},
212212
{`"a`, eofError, nil, `a`},
213+
{"\"a\\\nb\"c", STRING, py.String(`ab`), `c`},
213214

214215
{`''a`, STRING, py.String(``), `a`},
215216
{`U'abc'`, STRING, py.String(`abc`), ``},
@@ -218,6 +219,7 @@ func TestLexerReadString(t *testing.T) {
218219
{`'a\'c'`, STRING, py.String(`a\'c`), ``},
219220
{`'\n`, eofError, nil, `\n`},
220221
{`'a`, eofError, nil, `a`},
222+
{"'\\\n\\\npotato\\\nX\\\n'c", STRING, py.String(`potatoX`), `c`},
221223

222224
{`""""""a`, STRING, py.String(``), `a`},
223225
{`u"""abc"""`, STRING, py.String(`abc`), ``},
@@ -227,6 +229,7 @@ func TestLexerReadString(t *testing.T) {
227229
{`"""a`, eofError, nil, `a`},
228230
{"\"\"\"a\nb\nc\n\"\"\"\n", STRING, py.String("a\nb\nc\n"), "\n"},
229231
{"\"\"\"a\nb\nc\na", eofError, nil, "a"},
232+
{"\"\"\"a\\\nb\"\"\"c", STRING, py.String(`ab`), `c`},
230233

231234
{`''''''a`, STRING, py.String(``), `a`},
232235
{`U'''abc'''`, STRING, py.String(`abc`), ``},
@@ -236,6 +239,7 @@ func TestLexerReadString(t *testing.T) {
236239
{`'''a`, eofError, nil, `a`},
237240
{"'''a\nb\nc\n'''\n", STRING, py.String("a\nb\nc\n"), "\n"},
238241
{"'''a\nb\nc\na", eofError, nil, "a"},
242+
{"'''\\\na\\\nb\\\n'''c", STRING, py.String(`ab`), `c`},
239243

240244
{`b""a`, STRING, py.Bytes{}, "a"},
241245
{`b'abc'`, STRING, py.Bytes(string(`abc`)), ``},

0 commit comments

Comments
 (0)