Fix grammar for multiple entry points for compile mode

ncw · ncw · commit 0adc14aa2ede · 2014-11-10T18:03:36.000Z
diff --git a/ast/ast.go b/ast/ast.go
@@ -1,5 +1,8 @@
 package ast
 
+// FIXME make base AstNode with position in
+// also keep a list of children in the parent node to simplify walking the tree?
+
 import (
 	"fmt"
 
diff --git a/parser/grammar.y b/parser/grammar.y
@@ -12,21 +12,21 @@ import (
 %}
 
 %union {
-	str string
-	obj py.Object
-	ast ast.Ast
-	mod ast.Mod
-	stmt ast.Stmt
-	stmts []ast.Stmt
-	stmts1 []ast.Stmt // nl_or_stmt accumulator
-	stmts2 []ast.Stmt // small_stmts accumulator
-	stmts3 []ast.Stmt // stmts accumulator
-	pos ast.Pos // kept up to date by the lexer
+	str	string
+	obj	py.Object
+	ast	ast.Ast
+	mod	ast.Mod
+	stmt	ast.Stmt
+	stmts	[]ast.Stmt
+	stmts1	[]ast.Stmt	// nl_or_stmt accumulator
+	stmts2	[]ast.Stmt	// small_stmts accumulator
+	stmts3	[]ast.Stmt	// stmts accumulator
+	pos	ast.Pos		// kept up to date by the lexer
 }
 
 %type <str> strings
 %type <ast> atom
-%type <mod> inputs file_input
+%type <mod> inputs file_input single_input eval_input
 %type <stmts> simple_stmt stmt 
 %type <stmts1> nl_or_stmt 
 %type <stmts2> small_stmts
@@ -101,6 +101,8 @@ import (
 
 %token '(' ')' '[' ']' ':' ',' ';' '+' '-' '*' '/' '|' '&' '<' '>' '=' '.' '%' '{' '}' '^' '~' '@'
 
+%token SINGLE_INPUT FILE_INPUT EVAL_INPUT
+
 // Note:  Changing the grammar specified in this file will most likely
 //        require corresponding changes in the parser module
 //        (../Modules/parsermodule.c).  If you can't make the changes to
@@ -119,18 +121,45 @@ import (
 
 %%
 
-// FIXME figure out how to tell the parser to start from a given node
-// inputs: single_input | file_input | eval_input
-// In the mean time just do file_input
-// inputs: single_input | file_input | eval_input
+// Start of grammar. This has 3 pseudo tokens which say which
+// direction through the rest of the grammar we take.
 inputs:
-	file_input
+	SINGLE_INPUT single_input
+	{
+		yylex.(*yyLex).mod = $2
+		return 0
+	}
+|	FILE_INPUT file_input
+	{
+		yylex.(*yyLex).mod = $2
+		return 0
+	}
+|	EVAL_INPUT eval_input
 	{
-		yylex.(*yyLex).mod = $1
+		yylex.(*yyLex).mod = $2
 		return 0
 	}
 
-single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+single_input:
+	NEWLINE
+	{
+		$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}}
+	}
+|	simple_stmt
+	{
+		$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}, Body: $1}
+	}
+|	compound_stmt NEWLINE
+	{
+		$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}, Body: []ast.Stmt{$1}}
+	}
+
+//file_input: (NEWLINE | stmt)* ENDMARKER
+file_input:
+	nl_or_stmt ENDMARKER
+	{
+		$$ = &ast.Module{ModBase: ast.ModBase{$<pos>$}, Body: $1}
+	}
 
 // (NEWLINE | stmt)*
 nl_or_stmt:
@@ -145,19 +174,15 @@ nl_or_stmt:
 		$$ = append($$, $2...)
 	}
 
-//file_input: (NEWLINE | stmt)* ENDMARKER
-file_input:
-	nl_or_stmt ENDMARKER
+//eval_input: testlist NEWLINE* ENDMARKER
+eval_input:
+	testlist nls ENDMARKER
 	{
-		$$ = &ast.Module{ModBase: ast.ModBase{$<pos>$}, Body: $1}
 	}
 
 // NEWLINE*
 nls: | nls NEWLINE
 
-//eval_input: testlist NEWLINE* ENDMARKER
-eval_input: testlist nls ENDMARKER
-
 optional_arglist: | arglist
 
 optional_arglist_call: | '(' optional_arglist ')'
diff --git a/parser/lexer.go b/parser/lexer.go
@@ -46,15 +46,33 @@ type yyLex struct {
 	parenthesis   int     // number of open ( )
 	brace         int     // number of open { }
 	mod           ast.Mod // output
+	startToken    int     // initial token to output
 }
 
-func NewLex(r io.Reader) *yyLex {
+// Create a new lexer
+//
+// The mode argument specifies what kind of code must be compiled; it
+// can be 'exec' if source consists of a sequence of statements,
+// 'eval' if it consists of a single expression, or 'single' if it
+// consists of a single interactive statement
+func NewLex(r io.Reader, mode string) (*yyLex, error) {
 	x := &yyLex{
 		reader:      bufio.NewReader(r),
 		indentStack: []int{0},
 		state:       readString,
 	}
-	return x
+	switch mode {
+	case "exec":
+		x.startToken = FILE_INPUT
+	case "eval":
+		x.startToken = EVAL_INPUT
+	case "single":
+		x.startToken = SINGLE_INPUT
+		x.interactive = true
+	default:
+		return nil, py.ExceptionNewf(py.ValueError, "compile mode must be 'exec', 'eval' or 'single'")
+	}
+	return x, nil
 }
 
 // Refill line
@@ -222,6 +240,9 @@ func init() {
 	tokenToString[DEDENT] = "DEDENT"
 	tokenToString[STRING] = "STRING"
 	tokenToString[NUMBER] = "NUMBER"
+	tokenToString[FILE_INPUT] = "FILE_INPUT"
+	tokenToString[SINGLE_INPUT] = "SINGLE_INPUT"
+	tokenToString[EVAL_INPUT] = "EVAL_INPUT"
 }
 
 // True if there are any open brackets
@@ -309,6 +330,13 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
 		defer func() { fmt.Printf("LEX> %v\n", newLexToken(ret, yylval)) }()
 	}
 
+	// Return initial token
+	if x.startToken != eof {
+		token := x.startToken
+		x.startToken = eof
+		return token
+	}
+
 	// FIXME keep x.pos up to date
 	x.pos.Lineno = 42
 	x.pos.ColOffset = 43
@@ -318,13 +346,14 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
 			// Read x.line
 			x.refill()
 			x.state++
-			// an empty line while reading interactive input should return a NEWLINE
-			if x.interactive && (x.line == "" || x.line == "\n") {
+			if x.line == "" && x.eof {
+				x.state = checkEof
+				// an empty line while reading interactive input should return a NEWLINE
 				// Don't output NEWLINE if brackets are open
-				if x.openBrackets() {
-					continue
+				if x.interactive && !x.openBrackets() {
+					return NEWLINE
 				}
-				return NEWLINE
+				continue
 			}
 		case readIndent:
 			// Read the initial indent and get rid of it
@@ -799,20 +828,26 @@ func SetDebug(level int) {
 }
 
 // Parse a file
-func Parse(in io.Reader) (ast.Mod, error) {
-	lex := NewLex(in)
+func Parse(in io.Reader, mode string) (ast.Mod, error) {
+	lex, err := NewLex(in, mode)
+	if err != nil {
+		return nil, err
+	}
 	yyParse(lex)
 	return lex.mod, lex.ErrorReturn()
 }
 
 // Parse a string
-func ParseString(in string) (ast.Ast, error) {
-	return Parse(bytes.NewBufferString(in))
+func ParseString(in string, mode string) (ast.Ast, error) {
+	return Parse(bytes.NewBufferString(in), mode)
 }
 
 // Lex a file only, returning a sequence of tokens
-func Lex(in io.Reader) (lts LexTokens, err error) {
-	lex := NewLex(in)
+func Lex(in io.Reader, mode string) (lts LexTokens, err error) {
+	lex, err := NewLex(in, mode)
+	if err != nil {
+		return nil, err
+	}
 	yylval := yySymType{}
 	for {
 		ret := lex.Lex(&yylval)
@@ -827,6 +862,6 @@ func Lex(in io.Reader) (lts LexTokens, err error) {
 }
 
 // Lex a string
-func LexString(in string) (lts LexTokens, err error) {
-	return Lex(bytes.NewBufferString(in))
+func LexString(in string, mode string) (lts LexTokens, err error) {
+	return Lex(bytes.NewBufferString(in), mode)
 }
diff --git a/parser/lexer_test.go b/parser/lexer_test.go