Skip to content

Commit e352670

Browse files
committed
parser: fix "single" mode and return correct EOF error
1 parent 5775015 commit e352670

File tree

8 files changed

+6200
-6153
lines changed

8 files changed

+6200
-6153
lines changed

notes.txt

-26
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ Things to do before release
1818
===========================
1919

2020
* Line numbers
21-
* compile single
22-
* interactive interpreter
2321
* Subclass builtins
2422
* pygen
2523

@@ -29,36 +27,12 @@ FIXME recursive types for __repr__ in list, dict, tuple
2927
>>> L
3028
[[...]]
3129

32-
interactive interpreter
33-
calls compile(..."single) until doesn't get "SyntaxError: unexpected EOF while parsing" ?
34-
see pythonrun.c
35-
PyRun_InteractiveOneObject(FILE *fp, PyObject *filename, PyCompilerFlags *flags)
36-
...
37-
mod = PyParser_ASTFromFileObject(fp, filename, enc,
38-
Py_single_input, ps1, ps2,
39-
flags, &errcode, arena);
40-
Py_XDECREF(v);
41-
Py_XDECREF(w);
42-
Py_XDECREF(oenc);
43-
if (mod == NULL) {
44-
PyArena_Free(arena);
45-
if (errcode == E_EOF) {
46-
PyErr_Clear();
47-
return E_EOF;
48-
}
49-
PyErr_Print();
50-
return -1;
51-
}
52-
53-
5430
Limitations & Missing parts
5531
===========================
5632
* string keys only in dictionaries
5733
* intermediate fix - stop it panicing!
5834
* line numbers missing in SyntaxErrors
5935
* \N{...} escapes not implemented
60-
* Interactive interpreter does single lines only
61-
* compile(..., "single") not working
6236
* lots of builtins still to implement
6337
* FIXME eq && ne should throw an error for a type which doesn' have eq implemented
6438
* repr/str

parser/grammar.y

+6-4
Original file line numberDiff line numberDiff line change
@@ -259,18 +259,20 @@ inputs:
259259
}
260260

261261
single_input:
262-
NEWLINE
262+
/* NEWLINE
263263
{
264-
// panic("FIXME no coverage")
264+
// This is in the python grammar, but the interpreter
265+
// just gives "unexpected EOF while parsing" when you
266+
// give it a \n
265267
$$ = &ast.Interactive{ModBase: ast.ModBase{Pos: $<pos>$}}
266268
}
267-
| simple_stmt
269+
|*/ simple_stmt
268270
{
269271
$$ = &ast.Interactive{ModBase: ast.ModBase{Pos: $<pos>$}, Body: $1}
270272
}
271273
| compound_stmt NEWLINE
272274
{
273-
// panic("FIXME no coverage")
275+
// NB: compound_stmt in single_input is followed by extra NEWLINE!
274276
$$ = &ast.Interactive{ModBase: ast.ModBase{Pos: $<pos>$}, Body: []ast.Stmt{$1}}
275277
}
276278

parser/grammar_data_test.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ var grammarTestData = []struct {
2929
{"b'abc' b'''123'''", "eval", "Expression(body=Bytes(s=b'abc123'))", nil, ""},
3030
{"1234", "eval", "Expression(body=Num(n=1234))", nil, ""},
3131
{"01234", "eval", "", py.SyntaxError, "illegal decimal with leading zero"},
32-
{"1234d", "eval", "", py.SyntaxError, "invalid syntax"},
32+
{"1234d", "eval", "", py.SyntaxError, "unexpected EOF while parsing"},
3333
{"1234d", "exec", "", py.SyntaxError, "invalid syntax"},
34+
{"1234d", "single", "", py.SyntaxError, "unexpected EOF while parsing"},
3435
{"0x1234", "eval", "Expression(body=Num(n=4660))", nil, ""},
3536
{"12.34", "eval", "Expression(body=Num(n=12.34))", nil, ""},
3637
{"1,", "eval", "Expression(body=Tuple(elts=[Num(n=1)], ctx=Load()))", nil, ""},
@@ -311,5 +312,9 @@ var grammarTestData = []struct {
311312
{"@dec(a,b,c=d,*args,**kwargs)\ndef fn():\n pass\n", "exec", "Module(body=[FunctionDef(name='fn', args=arguments(args=[], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[Pass()], decorator_list=[Call(func=Name(id='dec', ctx=Load()), args=[Name(id='a', ctx=Load()), Name(id='b', ctx=Load())], keywords=[keyword(arg='c', value=Name(id='d', ctx=Load()))], starargs=Name(id='args', ctx=Load()), kwargs=Name(id='kwargs', ctx=Load()))], returns=None)])", nil, ""},
312313
{"@dec1\n@dec2()\n@dec3(a)\n@dec4(a,b)\ndef fn():\n pass\n", "exec", "Module(body=[FunctionDef(name='fn', args=arguments(args=[], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[Pass()], decorator_list=[Name(id='dec1', ctx=Load()), Call(func=Name(id='dec2', ctx=Load()), args=[], keywords=[], starargs=None, kwargs=None), Call(func=Name(id='dec3', ctx=Load()), args=[Name(id='a', ctx=Load())], keywords=[], starargs=None, kwargs=None), Call(func=Name(id='dec4', ctx=Load()), args=[Name(id='a', ctx=Load()), Name(id='b', ctx=Load())], keywords=[], starargs=None, kwargs=None)], returns=None)])", nil, ""},
313314
{"@dec1\n@dec2()\n@dec3(a)\n@dec4(a,b)\nclass A(B):\n pass\n", "exec", "Module(body=[ClassDef(name='A', bases=[Name(id='B', ctx=Load())], keywords=[], starargs=None, kwargs=None, body=[Pass()], decorator_list=[Name(id='dec1', ctx=Load()), Call(func=Name(id='dec2', ctx=Load()), args=[], keywords=[], starargs=None, kwargs=None), Call(func=Name(id='dec3', ctx=Load()), args=[Name(id='a', ctx=Load())], keywords=[], starargs=None, kwargs=None), Call(func=Name(id='dec4', ctx=Load()), args=[Name(id='a', ctx=Load()), Name(id='b', ctx=Load())], keywords=[], starargs=None, kwargs=None)])])", nil, ""},
315+
{"", "single", "", py.SyntaxError, "unexpected EOF while parsing"},
316+
{"\n", "single", "", py.SyntaxError, "unexpected EOF while parsing"},
314317
{"pass\n", "single", "Interactive(body=[Pass()])", nil, ""},
318+
{"if True:\n pass\n\n", "single", "Interactive(body=[If(test=NameConstant(value=True), body=[Pass()], orelse=[])])", nil, ""},
319+
{"while True:\n pass\nelse:\n return\n", "single", "Interactive(body=[While(test=NameConstant(value=True), body=[Pass()], orelse=[Return(value=None)])])", nil, ""},
315320
}

parser/lexer.go

+65-31
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ type yyLex struct {
4747
parenthesis int // number of open ( )
4848
brace int // number of open { }
4949
mod ast.Mod // output
50-
startToken int // initial token to output
50+
tokens []int // buffered tokens to output
5151
}
5252

5353
// Create a new lexer
@@ -64,19 +64,41 @@ func NewLex(r io.Reader, mode string) (*yyLex, error) {
6464
}
6565
switch mode {
6666
case "exec":
67-
x.startToken = FILE_INPUT
67+
x.queue(FILE_INPUT)
6868
x.exec = true
6969
case "eval":
70-
x.startToken = EVAL_INPUT
70+
x.queue(EVAL_INPUT)
7171
case "single":
72-
x.startToken = SINGLE_INPUT
72+
x.queue(SINGLE_INPUT)
7373
x.interactive = true
7474
default:
7575
return nil, py.ExceptionNewf(py.ValueError, "compile mode must be 'exec', 'eval' or 'single'")
7676
}
7777
return x, nil
7878
}
7979

80+
// queue tokens for later return
81+
func (x *yyLex) queue(tokens ...int) {
82+
x.tokens = append(x.tokens, tokens...)
83+
}
84+
85+
// Return whether the token queue is empty
86+
func (x *yyLex) queueEmpty() bool {
87+
return len(x.tokens) == 0
88+
}
89+
90+
// dequeue a token for return
91+
//
92+
// panic if no token available
93+
func (x *yyLex) dequeue() int {
94+
if x.queueEmpty() {
95+
panic("token queue empty")
96+
}
97+
token := x.tokens[0]
98+
x.tokens = x.tokens[1:]
99+
return token
100+
}
101+
80102
// Refill line
81103
func (x *yyLex) refill() {
82104
var err error
@@ -328,6 +350,14 @@ func (lts LexTokens) String() string {
328350
return buf.String()
329351
}
330352

353+
// Queue any remaining DEDENTS
354+
func (x *yyLex) queueDedents() {
355+
for i := len(x.indentStack) - 1; i >= 1; i-- {
356+
x.queue(DEDENT)
357+
}
358+
x.indentStack = x.indentStack[:1]
359+
}
360+
331361
// The parser calls this method to get each new token. This
332362
// implementation returns operators and NUM.
333363
func (x *yyLex) Lex(yylval *yySymType) (ret int) {
@@ -340,11 +370,9 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
340370
}()
341371
}
342372

343-
// Return initial token
344-
if x.startToken != eof {
345-
token := x.startToken
346-
x.startToken = eof
347-
return token
373+
// Return queued tokens if there are any
374+
if !x.queueEmpty() {
375+
return x.dequeue()
348376
}
349377

350378
// FIXME keep x.pos up to date
@@ -358,10 +386,13 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
358386
x.state++
359387
if x.line == "" && x.eof {
360388
x.state = checkEof
361-
// an empty line while reading interactive input should return a NEWLINE
362-
// Don't output NEWLINE if brackets are open
389+
// During interactive input of statements an entirely blank logical
390+
// line (i.e. one containing not even whitespace or a comment)
391+
// terminates a multi-line statement.
363392
if x.interactive && !x.openBrackets() {
364-
return NEWLINE
393+
x.queueDedents()
394+
x.queue(NEWLINE)
395+
return x.dequeue()
365396
}
366397
continue
367398
}
@@ -380,32 +411,33 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
380411
}
381412
x.state++
382413
case checkIndent:
414+
x.state++
383415
// Don't output INDENT or DEDENT if brackets are open
384416
if x.openBrackets() {
385-
x.state++
386417
continue
387418
}
388419
// See if indent has changed and issue INDENT / DEDENT
389420
indent := countIndent(x.currentIndent)
390-
indentStackTop := x.indentStack[len(x.indentStack)-1]
391-
switch {
392-
case indent > indentStackTop:
421+
i := len(x.indentStack) - 1
422+
indentStackTop := x.indentStack[i]
423+
if indent == indentStackTop {
424+
continue
425+
} else if indent > indentStackTop {
393426
x.indentStack = append(x.indentStack, indent)
394-
x.state++
395427
return INDENT
396-
case indent < indentStackTop:
397-
for i := len(x.indentStack) - 1; i >= 0; i-- {
428+
} else {
429+
for ; i >= 0; i-- {
398430
if x.indentStack[i] == indent {
399431
goto foundIndent
400432
}
433+
x.queue(DEDENT)
401434
}
402435
x.SyntaxError("Inconsistent indent")
403436
return eof
404437
foundIndent:
405-
x.indentStack = x.indentStack[:len(x.indentStack)-1]
406-
return DEDENT
438+
x.indentStack = x.indentStack[:i+1]
439+
return x.dequeue()
407440
}
408-
x.state++
409441
case parseTokens:
410442
// Skip white space
411443
x.line = strings.TrimLeft(x.line, " \t")
@@ -493,18 +525,16 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
493525
return eof
494526
case checkEof:
495527
if x.eof {
496-
// Return any remaining DEDENTS
497-
if len(x.indentStack) > 1 {
498-
x.indentStack = x.indentStack[:len(x.indentStack)-1]
499-
x.state = checkEof
500-
return DEDENT
501-
}
528+
x.queueDedents()
502529
// then return ENDMARKER
503530
x.state = isEof
504-
if x.interactive {
531+
if !x.interactive {
532+
x.queue(ENDMARKER)
533+
}
534+
if x.queueEmpty() {
505535
continue
506536
}
507-
return ENDMARKER
537+
return x.dequeue()
508538
}
509539
x.state = readString
510540
case isEof:
@@ -857,7 +887,11 @@ func (x *yyLex) SyntaxErrorf(format string, a ...interface{}) {
857887
func (x *yyLex) ErrorReturn() error {
858888
if x.error {
859889
if x.errorString == "" {
860-
x.errorString = "invalid syntax"
890+
if x.eof && !x.exec {
891+
x.errorString = "unexpected EOF while parsing"
892+
} else {
893+
x.errorString = "invalid syntax"
894+
}
861895
}
862896
return py.ExceptionNewf(py.SyntaxError, "%s", x.errorString)
863897
}

parser/lexer_test.go

+50
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,56 @@ func TestLex(t *testing.T) {
361361
{"$asdasd", "invalid syntax", "eval", LexTokens{
362362
{EVAL_INPUT, nil},
363363
}},
364+
{"if True:\n pass\n\n", "", "single", LexTokens{
365+
{SINGLE_INPUT, nil},
366+
{IF, nil},
367+
{TRUE, nil},
368+
{':', nil},
369+
{NEWLINE, nil},
370+
{INDENT, nil},
371+
{PASS, nil},
372+
{NEWLINE, nil},
373+
{DEDENT, nil},
374+
{NEWLINE, nil},
375+
}},
376+
{"while True:\n pass\nelse:\n return\n", "", "single", LexTokens{
377+
{SINGLE_INPUT, nil},
378+
{WHILE, nil},
379+
{TRUE, nil},
380+
{':', nil},
381+
{NEWLINE, nil},
382+
{INDENT, nil},
383+
{PASS, nil},
384+
{NEWLINE, nil},
385+
{DEDENT, nil},
386+
{ELSE, nil},
387+
{':', nil},
388+
{NEWLINE, nil},
389+
{INDENT, nil},
390+
{RETURN, nil},
391+
{NEWLINE, nil},
392+
{DEDENT, nil},
393+
{NEWLINE, nil},
394+
}},
395+
{"while True:\n pass\nelse:\n return\n", "", "exec", LexTokens{
396+
{FILE_INPUT, nil},
397+
{WHILE, nil},
398+
{TRUE, nil},
399+
{':', nil},
400+
{NEWLINE, nil},
401+
{INDENT, nil},
402+
{PASS, nil},
403+
{NEWLINE, nil},
404+
{DEDENT, nil},
405+
{ELSE, nil},
406+
{':', nil},
407+
{NEWLINE, nil},
408+
{INDENT, nil},
409+
{RETURN, nil},
410+
{NEWLINE, nil},
411+
{DEDENT, nil},
412+
{ENDMARKER, nil},
413+
}},
364414
} {
365415
lts, err := LexString(test.in, test.mode)
366416
errString := ""

parser/make_grammar_test.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@
2525
("b'abc' b'''123'''", "eval"),
2626
("1234", "eval"),
2727
("01234", "eval", SyntaxError, "illegal decimal with leading zero"),
28-
("1234d", "eval", SyntaxError, "invalid syntax"),
28+
("1234d", "eval", SyntaxError),
2929
("1234d", "exec", SyntaxError),
30+
("1234d", "single", SyntaxError),
3031
("0x1234", "eval"),
3132
("12.34", "eval"),
3233
("1,", "eval"),
@@ -461,10 +462,11 @@ class A(B):
461462
""", "exec"),
462463

463464
# single input
464-
#("\n", "single"),
465+
("", "single", SyntaxError),
466+
("\n", "single", SyntaxError),
465467
("pass\n", "single"),
466-
# FIXME ("if True:\n pass\n\n", "single"),
467-
468+
("if True:\n pass\n\n", "single"),
469+
("while True:\n pass\nelse:\n return\n", "single"),
468470
]
469471

470472
def dump(source, mode):

0 commit comments

Comments
 (0)