Skip to content

Commit 8c9be74

Browse files
committed
Parse string literals into AST
1 parent dc76883 commit 8c9be74

File tree

4 files changed

+44
-9
lines changed

4 files changed

+44
-9
lines changed

ast/ast.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
type Identifier py.String
1313
type String py.String
14-
type PyBytes py.Bytes
1514
type Object py.Object
1615
type Singleton py.Object
1716

@@ -516,7 +515,7 @@ type Str struct {
516515

517516
type Bytes struct {
518517
ExprBase
519-
S PyBytes
518+
S py.Bytes
520519
}
521520

522521
type NameConstant struct {

parser/grammar.y

+34-6
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ import (
1717

1818
// FIXME can put code blocks in not just at the end - help with list initialisation
1919

20+
// FIXME is the expStack needed at all? Aren't the yylval put into a
21+
// stack anyway by yacc? And in rsc cc he sets yylval to empty every
22+
// lex.
23+
2024
// A stack of []ast.Expr
2125
type exprsStack [][]ast.Expr
2226

@@ -81,7 +85,7 @@ func (es *stmtsStack) Add(stmt ...ast.Stmt) {
8185
comma bool
8286
}
8387

84-
%type <str> strings
88+
%type <obj> strings
8589
%type <mod> inputs file_input single_input eval_input
8690
%type <stmts> simple_stmt stmt
8791
%type <stmtsStack> nl_or_stmt small_stmts stmts
@@ -99,8 +103,8 @@ func (es *stmtsStack) Add(stmt ...ast.Stmt) {
99103
%token <str> NAME
100104
%token INDENT
101105
%token DEDENT
102-
%token <str> STRING
103-
%token <str> NUMBER
106+
%token <obj> STRING
107+
%token <obj> NUMBER
104108

105109
%token PLINGEQ // !=
106110
%token PERCEQ // %=
@@ -1051,9 +1055,27 @@ trailers:
10511055

10521056
strings:
10531057
STRING
1058+
{
1059+
$$ = $1
1060+
}
10541061
| strings STRING
10551062
{
1056-
$$ += $2
1063+
switch a := $$.(type) {
1064+
case py.String:
1065+
switch b := $2.(type) {
1066+
case py.String:
1067+
$$ = a + b
1068+
default:
1069+
yylex.Error("SyntaxError: cannot mix string and nonstring literals")
1070+
}
1071+
case py.Bytes:
1072+
switch b := $2.(type) {
1073+
case py.Bytes:
1074+
$$ = append(a, b...)
1075+
default:
1076+
yylex.Error("SyntaxError: cannot mix bytes and nonbytes literals")
1077+
}
1078+
}
10571079
}
10581080

10591081
atom:
@@ -1100,8 +1122,14 @@ atom:
11001122
}
11011123
| strings
11021124
{
1103-
// FIXME
1104-
$$ = nil
1125+
switch s := $1.(type) {
1126+
case py.String:
1127+
$$ = &ast.Str{ExprBase: ast.ExprBase{$<pos>$}, S: s}
1128+
case py.Bytes:
1129+
$$ = &ast.Bytes{ExprBase: ast.ExprBase{$<pos>$}, S: s}
1130+
default:
1131+
panic("not Bytes or String in strings")
1132+
}
11051133
}
11061134
| ELIPSIS
11071135
{

parser/grammar_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ func TestGrammar(t *testing.T) {
2525
{"None\n", "eval", "Expression(body=NameConstant(value=None))"},
2626
{"...", "eval", "Expression(body=Ellipsis())"},
2727
{"abc123", "eval", "Expression(body=Name(id='abc123', ctx=Load()))"},
28+
{"\"abc\"", "eval", "Expression(body=Str(s='abc'))"},
29+
{"\"abc\" \"\"\"123\"\"\"", "eval", "Expression(body=Str(s='abc123'))"},
30+
{"b'abc'", "eval", "Expression(body=Bytes(s=b'abc'))"},
31+
{"b'abc' b'''123'''", "eval", "Expression(body=Bytes(s=b'abc123'))"},
2832
// END TESTS
2933
} {
3034
Ast, err := ParseString(test.in, test.mode)

parser/make_grammar_test.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
("None\n", "eval"),
1818
("...", "eval"),
1919
("abc123", "eval"),
20+
('"abc"', "eval"),
21+
('"abc" """123"""', "eval"),
22+
("b'abc'", "eval"),
23+
("b'abc' b'''123'''", "eval"),
2024
]
2125

2226
def dump(source, mode):
@@ -26,7 +30,7 @@ def dump(source, mode):
2630

2731
def escape(x):
2832
"""Encode strings with backslashes for python/go"""
29-
return x.encode("unicode_escape").decode("utf-8")
33+
return x.replace('\\', "\\\\").replace('"', r'\"').replace("\n", r'\n').replace("\t", r'\t')
3034

3135
def main():
3236
"""Read in grammar_test.go, and re-write the tests section"""

0 commit comments

Comments
 (0)