Skip to content

Commit 3156d19

Browse files
authored
gh-100445: Improve error message for unterminated strings with escapes (#100446)
1 parent baefbb2 commit 3156d19

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

Lib/test/test_syntax.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2298,8 +2298,14 @@ def test_error_parenthesis(self):
22982298

22992299
def test_error_string_literal(self):
23002300

2301-
self._check_error("'blech", "unterminated string literal")
2302-
self._check_error('"blech', "unterminated string literal")
2301+
self._check_error("'blech", r"unterminated string literal \(.*\)$")
2302+
self._check_error('"blech', r"unterminated string literal \(.*\)$")
2303+
self._check_error(
2304+
r'"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
2305+
)
2306+
self._check_error(
2307+
r'r"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
2308+
)
23032309
self._check_error("'''blech", "unterminated triple-quoted string literal")
23042310
self._check_error('"""blech', "unterminated triple-quoted string literal")
23052311

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve error message for unterminated strings with escapes.

Parser/lexer/lexer.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
972972
int quote = c;
973973
int quote_size = 1; /* 1 or 3 */
974974
int end_quote_size = 0;
975+
int has_escaped_quote = 0;
975976

976977
/* Nodes of type STRING, especially multi line strings
977978
must be handled differently in order to get both
@@ -1037,8 +1038,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
10371038
return MAKE_TOKEN(ERRORTOKEN);
10381039
}
10391040
else {
1040-
_PyTokenizer_syntaxerror(tok, "unterminated string literal (detected at"
1041-
" line %d)", start);
1041+
if (has_escaped_quote) {
1042+
_PyTokenizer_syntaxerror(
1043+
tok,
1044+
"unterminated string literal (detected at line %d); "
1045+
"perhaps you escaped the end quote?",
1046+
start
1047+
);
1048+
} else {
1049+
_PyTokenizer_syntaxerror(
1050+
tok, "unterminated string literal (detected at line %d)", start
1051+
);
1052+
}
10421053
if (c != '\n') {
10431054
tok->done = E_EOLS;
10441055
}
@@ -1052,6 +1063,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
10521063
end_quote_size = 0;
10531064
if (c == '\\') {
10541065
c = tok_nextc(tok); /* skip escaped char */
1066+
if (c == quote) { /* but record whether the escaped char was a quote */
1067+
has_escaped_quote = 1;
1068+
}
10551069
if (c == '\r') {
10561070
c = tok_nextc(tok);
10571071
}

0 commit comments

Comments
 (0)