Skip to content

Commit 404b23b

Browse files
authored
Fix lookahead of soft keywords in the PEG parser (GH-20436)
Automerge-Triggered-By: @gvanrossum
1 parent 21fda91 commit 404b23b

File tree

4 files changed

+31
-2
lines changed

4 files changed

+31
-2
lines changed

Lib/test/test_peg_generator/test_c_parser.py

+12
Original file line numberDiff line numberDiff line change
@@ -432,3 +432,15 @@ def test_soft_keywords_parse(self) -> None:
432432
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
433433
"""
434434
self.run_test(grammar_source, test_source)
435+
436+
def test_soft_keywords_lookahead(self) -> None:
437+
grammar_source = """
438+
start: &"if" "if" expr '+' expr NEWLINE
439+
expr: NAME
440+
"""
441+
test_source = """
442+
valid_cases = ["if if + if"]
443+
invalid_cases = ["if if"]
444+
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
445+
"""
446+
self.run_test(grammar_source, test_source)

Parser/pegen/pegen.c

+9
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,15 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
718718
return (res != NULL) == positive;
719719
}
720720

721+
int
722+
_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
723+
{
724+
int mark = p->mark;
725+
void *res = func(p, arg);
726+
p->mark = mark;
727+
return (res != NULL) == positive;
728+
}
729+
721730
int
722731
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
723732
{

Parser/pegen/pegen.h

+1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ int _PyPegen_is_memoized(Parser *p, int type, void *pres);
119119

120120
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
121121
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
122+
int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
122123
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
123124

124125
Token *_PyPegen_expect_token(Parser *p, int type);

Tools/peg_generator/pegen/c_generator.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ class NodeTypes(Enum):
5858
STRING_TOKEN = 2
5959
GENERIC_TOKEN = 3
6060
KEYWORD = 4
61-
CUT_OPERATOR = 5
61+
SOFT_KEYWORD = 5
62+
CUT_OPERATOR = 6
6263

6364

6465
BASE_NODETYPES = {
@@ -123,7 +124,7 @@ def soft_keyword_helper(self, value: str) -> FunctionCall:
123124
function="_PyPegen_expect_soft_keyword",
124125
arguments=["p", value],
125126
return_type="expr_ty",
126-
nodetype=NodeTypes.NAME_TOKEN,
127+
nodetype=NodeTypes.SOFT_KEYWORD,
127128
comment=f"soft_keyword='{value}'",
128129
)
129130

@@ -217,6 +218,12 @@ def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
217218
arguments=[positive, call.function, *call.arguments],
218219
return_type="int",
219220
)
221+
elif call.nodetype == NodeTypes.SOFT_KEYWORD:
222+
return FunctionCall(
223+
function=f"_PyPegen_lookahead_with_string",
224+
arguments=[positive, call.function, *call.arguments],
225+
return_type="int",
226+
)
220227
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
221228
return FunctionCall(
222229
function=f"_PyPegen_lookahead_with_int",

0 commit comments

Comments
 (0)