Add backslash continuations and make lexer obey yyDebug so it isn't so noisy

ncw · ncw · commit 8dc6b0365d3e · 2014-10-31T16:40:50.000Z
diff --git a/parser/lexer.go b/parser/lexer.go
@@ -1,10 +1,9 @@
 package parser
 
-// FIXME need to implement end of line continuations with \
-// If find \ at end of line, read next line then skip indent calcs
-
 // FIXME need to implement formfeed
 
+// Lexer should count line numbers too!
+
 import (
 	"bufio"
 	"bytes"
@@ -58,14 +57,16 @@ func NewLex(r io.Reader) *yyLex {
 func (x *yyLex) refill() {
 	var err error
 	x.line, err = x.reader.ReadString('\n')
-	fmt.Printf("line = %q, err = %v\n", x.line, err)
+	if yyDebug >= 2 {
+		fmt.Printf("line = %q, err = %v\n", x.line, err)
+	}
 	switch err {
 	case nil:
 	case io.EOF:
 		x.eof = true
 	default:
 		x.eof = true
-		x.Error(fmt.Sprintf("Error reading input: %v", err))
+		x.Errorf("Error reading input: %v", err)
 	}
 }
 
@@ -238,16 +239,18 @@ const (
 // The parser calls this method to get each new token.  This
 // implementation returns operators and NUM.
 func (x *yyLex) Lex(yylval *yySymType) (ret int) {
-	defer func() {
-		name := tokenToString[ret]
-		if ret == NAME {
-			fmt.Printf("LEX> %q (%d) = %q\n", name, ret, yylval.str)
-		} else if ret == STRING || ret == NUMBER {
-			fmt.Printf("LEX> %q (%d) = %T{%v}\n", name, ret, yylval.obj, yylval.obj)
-		} else {
-			fmt.Printf("LEX> %q (%d) \n", name, ret)
-		}
-	}()
+	if yyDebug >= 2 {
+		defer func() {
+			name := tokenToString[ret]
+			if ret == NAME {
+				fmt.Printf("LEX> %q (%d) = %q\n", name, ret, yylval.str)
+			} else if ret == STRING || ret == NUMBER {
+				fmt.Printf("LEX> %q (%d) = %T{%v}\n", name, ret, yylval.obj, yylval.obj)
+			} else {
+				fmt.Printf("LEX> %q (%d) \n", name, ret)
+			}
+		}()
+	}
 
 	for {
 		switch x.state {
@@ -323,6 +326,16 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
 				return NEWLINE
 			}
 
+			// Check if continuation character
+			if x.line[0] == '\\' && (len(x.line) <= 1 || x.line[1] == '\n') {
+				if x.eof {
+					return eof
+				}
+				x.refill()
+				x.state = parseTokens
+				continue
+			}
+
 			// Read a number if available
 			token, value := x.readNumber()
 			if token != eof {
@@ -646,6 +659,11 @@ found:
 		escape := false
 		for i, c := range x.line {
 			if escape {
+				// Continuation line - remove \ then continue
+				if c == '\n' {
+					buf.Truncate(buf.Len() - 1)
+					goto readMore
+				}
 				buf.WriteRune(c)
 				escape = false
 			} else {
@@ -663,11 +681,12 @@ found:
 			}
 		}
 		if !multiLineString {
-			x.Error("Unterminated single quoted string")
+			x.Errorf("Unterminated %sx%s string", stringEnd, stringEnd)
 			return eofError, nil
 		}
+	readMore:
 		if x.eof {
-			x.Error("Unterminated triple quoted string")
+			x.Errorf("Unterminated %sx%s string", stringEnd, stringEnd)
 			return eofError, nil
 		}
 		x.refill()
@@ -685,9 +704,16 @@ foundEndOfString:
 // The parser calls this method on a parse error.
 func (x *yyLex) Error(s string) {
 	x.error = true
-	log.Printf("Parse error: %s", s)
-	log.Printf("Parse buffer %q", x.line)
-	log.Printf("State %#v", x)
+	if yyDebug >= 1 {
+		log.Printf("Parse error: %s", s)
+		log.Printf("Parse buffer %q", x.line)
+		log.Printf("State %#v", x)
+	}
+}
+
+// Call this to write formatted errors
+func (x *yyLex) Errorf(format string, a ...interface{}) {
+	x.Error(fmt.Sprintf(format, a...))
 }
 
 // Set the debug level 0 = off, 4 = max
diff --git a/parser/lexer_test.go b/parser/lexer_test.go
@@ -210,6 +210,7 @@ func TestLexerReadString(t *testing.T) {
 		{`"a\"c"`, STRING, py.String(`a\"c`), ``},
 		{`"a\\"+`, STRING, py.String(`a\\`), `+`},
 		{`"a`, eofError, nil, `a`},
+		{"\"a\\\nb\"c", STRING, py.String(`ab`), `c`},
 
 		{`''a`, STRING, py.String(``), `a`},
 		{`U'abc'`, STRING, py.String(`abc`), ``},
@@ -218,6 +219,7 @@ func TestLexerReadString(t *testing.T) {
 		{`'a\'c'`, STRING, py.String(`a\'c`), ``},
 		{`'\n`, eofError, nil, `\n`},
 		{`'a`, eofError, nil, `a`},
+		{"'\\\n\\\npotato\\\nX\\\n'c", STRING, py.String(`potatoX`), `c`},
 
 		{`""""""a`, STRING, py.String(``), `a`},
 		{`u"""abc"""`, STRING, py.String(`abc`), ``},
@@ -227,6 +229,7 @@ func TestLexerReadString(t *testing.T) {
 		{`"""a`, eofError, nil, `a`},
 		{"\"\"\"a\nb\nc\n\"\"\"\n", STRING, py.String("a\nb\nc\n"), "\n"},
 		{"\"\"\"a\nb\nc\na", eofError, nil, "a"},
+		{"\"\"\"a\\\nb\"\"\"c", STRING, py.String(`ab`), `c`},
 
 		{`''''''a`, STRING, py.String(``), `a`},
 		{`U'''abc'''`, STRING, py.String(`abc`), ``},
@@ -236,6 +239,7 @@ func TestLexerReadString(t *testing.T) {
 		{`'''a`, eofError, nil, `a`},
 		{"'''a\nb\nc\n'''\n", STRING, py.String("a\nb\nc\n"), "\n"},
 		{"'''a\nb\nc\na", eofError, nil, "a"},
+		{"'''\\\na\\\nb\\\n'''c", STRING, py.String(`ab`), `c`},
 
 		{`b""a`, STRING, py.Bytes{}, "a"},
 		{`b'abc'`, STRING, py.Bytes(string(`abc`)), ``},