Skip to content

Commit 03ae82d

Browse files
[3.13] gh-124188: Fix PyErr_ProgramTextObject() (GH-124189) (GH-124423)
* Detect source file encoding. * Use the "replace" error handler even for UTF-8 (default) encoding. * Remove the BOM. * Fix detection of too long lines if they contain NUL. * Return the head rather than the tail for truncated long lines. (cherry picked from commit e2f7107) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 167d8d2 commit 03ae82d

File tree

6 files changed

+328
-117
lines changed

6 files changed

+328
-117
lines changed

Lib/test/support/script_helper.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,13 @@ def make_script(script_dir, script_basename, source, omit_suffix=False):
232232
if not omit_suffix:
233233
script_filename += os.extsep + 'py'
234234
script_name = os.path.join(script_dir, script_filename)
235-
# The script should be encoded to UTF-8, the default string encoding
236-
with open(script_name, 'w', encoding='utf-8') as script_file:
237-
script_file.write(source)
235+
if isinstance(source, str):
236+
# The script should be encoded to UTF-8, the default string encoding
237+
with open(script_name, 'w', encoding='utf-8') as script_file:
238+
script_file.write(source)
239+
else:
240+
with open(script_name, 'wb') as script_file:
241+
script_file.write(source)
238242
importlib.invalidate_caches()
239243
return script_name
240244

Lib/test/test_compiler_codegen.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,5 +152,8 @@ def g():
152152

153153
def test_syntax_error__return_not_in_function(self):
154154
snippet = "return 42"
155-
with self.assertRaisesRegex(SyntaxError, "'return' outside function"):
155+
with self.assertRaisesRegex(SyntaxError, "'return' outside function") as cm:
156156
self.codegen_test(snippet, None)
157+
self.assertIsNone(cm.exception.text)
158+
self.assertEqual(cm.exception.offset, 1)
159+
self.assertEqual(cm.exception.end_offset, 10)

Lib/test/test_eof.py

Lines changed: 129 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""test script for a few new invalid token catches"""
22

33
import sys
4+
from codecs import BOM_UTF8
45
from test import support
56
from test.support import os_helper
67
from test.support import script_helper
@@ -11,67 +12,158 @@ class EOFTestCase(unittest.TestCase):
1112
def test_EOF_single_quote(self):
1213
expect = "unterminated string literal (detected at line 1) (<string>, line 1)"
1314
for quote in ("'", "\""):
14-
try:
15+
with self.assertRaises(SyntaxError) as cm:
1516
eval(f"""{quote}this is a test\
1617
""")
17-
except SyntaxError as msg:
18-
self.assertEqual(str(msg), expect)
19-
self.assertEqual(msg.offset, 1)
20-
else:
21-
raise support.TestFailed
18+
self.assertEqual(str(cm.exception), expect)
19+
self.assertEqual(cm.exception.offset, 1)
2220

2321
def test_EOFS(self):
24-
expect = ("unterminated triple-quoted string literal (detected at line 1) (<string>, line 1)")
25-
try:
26-
eval("""'''this is a test""")
27-
except SyntaxError as msg:
28-
self.assertEqual(str(msg), expect)
29-
self.assertEqual(msg.offset, 1)
30-
else:
31-
raise support.TestFailed
22+
expect = ("unterminated triple-quoted string literal (detected at line 3) (<string>, line 1)")
23+
with self.assertRaises(SyntaxError) as cm:
24+
eval("""ä = '''thîs is \na \ntest""")
25+
self.assertEqual(str(cm.exception), expect)
26+
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
27+
self.assertEqual(cm.exception.offset, 5)
28+
29+
with self.assertRaises(SyntaxError) as cm:
30+
eval("""ä = '''thîs is \na \ntest""".encode())
31+
self.assertEqual(str(cm.exception), expect)
32+
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
33+
self.assertEqual(cm.exception.offset, 5)
34+
35+
with self.assertRaises(SyntaxError) as cm:
36+
eval(BOM_UTF8 + """ä = '''thîs is \na \ntest""".encode())
37+
self.assertEqual(str(cm.exception), expect)
38+
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
39+
self.assertEqual(cm.exception.offset, 5)
40+
41+
with self.assertRaises(SyntaxError) as cm:
42+
eval("""# coding: latin1\nä = '''thîs is \na \ntest""".encode('latin1'))
43+
self.assertEqual(str(cm.exception), "unterminated triple-quoted string literal (detected at line 4) (<string>, line 2)")
44+
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
45+
self.assertEqual(cm.exception.offset, 5)
3246

3347
def test_EOFS_with_file(self):
3448
expect = ("(<string>, line 1)")
3549
with os_helper.temp_dir() as temp_dir:
36-
file_name = script_helper.make_script(temp_dir, 'foo', """'''this is \na \ntest""")
37-
rc, out, err = script_helper.assert_python_failure(file_name)
38-
self.assertIn(b'unterminated triple-quoted string literal (detected at line 3)', err)
50+
file_name = script_helper.make_script(temp_dir, 'foo',
51+
"""ä = '''thîs is \na \ntest""")
52+
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
53+
err = err.decode().splitlines()
54+
self.assertEqual(err[-3:], [
55+
" ä = '''thîs is ",
56+
' ^',
57+
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])
58+
59+
file_name = script_helper.make_script(temp_dir, 'foo',
60+
"""ä = '''thîs is \na \ntest""".encode())
61+
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
62+
err = err.decode().splitlines()
63+
self.assertEqual(err[-3:], [
64+
" ä = '''thîs is ",
65+
' ^',
66+
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])
67+
68+
file_name = script_helper.make_script(temp_dir, 'foo',
69+
BOM_UTF8 + """ä = '''thîs is \na \ntest""".encode())
70+
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
71+
err = err.decode().splitlines()
72+
self.assertEqual(err[-3:], [
73+
" ä = '''thîs is ",
74+
' ^',
75+
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])
76+
77+
file_name = script_helper.make_script(temp_dir, 'foo',
78+
"""# coding: latin1\nä = '''thîs is \na \ntest""".encode('latin1'))
79+
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
80+
err = err.decode().splitlines()
81+
self.assertEqual(err[-3:], [
82+
" ä = '''thîs is ",
83+
' ^',
84+
'SyntaxError: unterminated triple-quoted string literal (detected at line 4)'])
3985

4086
@warnings_helper.ignore_warnings(category=SyntaxWarning)
4187
def test_eof_with_line_continuation(self):
4288
expect = "unexpected EOF while parsing (<string>, line 1)"
43-
try:
89+
with self.assertRaises(SyntaxError) as cm:
4490
compile('"\\Xhh" \\', '<string>', 'exec')
45-
except SyntaxError as msg:
46-
self.assertEqual(str(msg), expect)
47-
else:
48-
raise support.TestFailed
91+
self.assertEqual(str(cm.exception), expect)
4992

5093
def test_line_continuation_EOF(self):
5194
"""A continuation at the end of input must be an error; bpo2180."""
5295
expect = 'unexpected EOF while parsing (<string>, line 1)'
53-
with self.assertRaises(SyntaxError) as excinfo:
54-
exec('x = 5\\')
55-
self.assertEqual(str(excinfo.exception), expect)
56-
with self.assertRaises(SyntaxError) as excinfo:
96+
with self.assertRaises(SyntaxError) as cm:
97+
exec('ä = 5\\')
98+
self.assertEqual(str(cm.exception), expect)
99+
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
100+
self.assertEqual(cm.exception.offset, 7)
101+
102+
with self.assertRaises(SyntaxError) as cm:
103+
exec('ä = 5\\'.encode())
104+
self.assertEqual(str(cm.exception), expect)
105+
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
106+
self.assertEqual(cm.exception.offset, 7)
107+
108+
with self.assertRaises(SyntaxError) as cm:
109+
exec('# coding:latin1\nä = 5\\'.encode('latin1'))
110+
self.assertEqual(str(cm.exception),
111+
'unexpected EOF while parsing (<string>, line 2)')
112+
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
113+
self.assertEqual(cm.exception.offset, 7)
114+
115+
with self.assertRaises(SyntaxError) as cm:
116+
exec(BOM_UTF8 + 'ä = 5\\'.encode())
117+
self.assertEqual(str(cm.exception), expect)
118+
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
119+
self.assertEqual(cm.exception.offset, 7)
120+
121+
with self.assertRaises(SyntaxError) as cm:
57122
exec('\\')
58-
self.assertEqual(str(excinfo.exception), expect)
123+
self.assertEqual(str(cm.exception), expect)
59124

60125
@unittest.skipIf(not sys.executable, "sys.executable required")
61126
def test_line_continuation_EOF_from_file_bpo2180(self):
62127
"""Ensure tok_nextc() does not add too many ending newlines."""
63128
with os_helper.temp_dir() as temp_dir:
64129
file_name = script_helper.make_script(temp_dir, 'foo', '\\')
65-
rc, out, err = script_helper.assert_python_failure(file_name)
66-
self.assertIn(b'unexpected EOF while parsing', err)
67-
self.assertIn(b'line 1', err)
68-
self.assertIn(b'\\', err)
69-
70-
file_name = script_helper.make_script(temp_dir, 'foo', 'y = 6\\')
71-
rc, out, err = script_helper.assert_python_failure(file_name)
72-
self.assertIn(b'unexpected EOF while parsing', err)
73-
self.assertIn(b'line 1', err)
74-
self.assertIn(b'y = 6\\', err)
130+
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
131+
err = err.decode().splitlines()
132+
self.assertEqual(err[-2:], [
133+
' \\',
134+
'SyntaxError: unexpected EOF while parsing'])
135+
self.assertEqual(err[-3][-8:], ', line 1', err)
136+
137+
file_name = script_helper.make_script(temp_dir, 'foo', 'ä = 6\\')
138+
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
139+
err = err.decode().splitlines()
140+
self.assertEqual(err[-3:], [
141+
' ä = 6\\',
142+
' ^',
143+
'SyntaxError: unexpected EOF while parsing'])
144+
self.assertEqual(err[-4][-8:], ', line 1', err)
145+
146+
file_name = script_helper.make_script(temp_dir, 'foo',
147+
'# coding:latin1\n'
148+
'ä = 7\\'.encode('latin1'))
149+
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
150+
err = err.decode().splitlines()
151+
self.assertEqual(err[-3:], [
152+
' ä = 7\\',
153+
' ^',
154+
'SyntaxError: unexpected EOF while parsing'])
155+
self.assertEqual(err[-4][-8:], ', line 2', err)
156+
157+
file_name = script_helper.make_script(temp_dir, 'foo',
158+
BOM_UTF8 + 'ä = 8\\'.encode())
159+
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
160+
err = err.decode().splitlines()
161+
self.assertEqual(err[-3:], [
162+
' ä = 8\\',
163+
' ^',
164+
'SyntaxError: unexpected EOF while parsing'])
165+
self.assertEqual(err[-4][-8:], ', line 1', err)
166+
75167

76168
if __name__ == "__main__":
77169
unittest.main()

0 commit comments

Comments
 (0)