Skip to content

Commit a09d390

Browse files
authored
[3.11] gh-96670: Raise SyntaxError when parsing NULL bytes (GH-97594) (#104195)
1 parent c5dafea commit a09d390

File tree

9 files changed

+77
-22
lines changed

9 files changed

+77
-22
lines changed

Include/cpython/fileobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#endif
44

55
PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *);
6+
PyAPI_FUNC(char *) _Py_UniversalNewlineFgetsWithSize(char *, int, FILE*, PyObject *, size_t*);
67

78
/* The std printer acts as a preliminary sys.stderr until the new io
89
infrastructure is in place. */

Lib/test/test_ast.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,10 @@ def check_limit(prefix, repeated):
857857
check_limit("a", "[0]")
858858
check_limit("a", "*a")
859859

860+
def test_null_bytes(self):
861+
with self.assertRaises(SyntaxError,
862+
msg="source code string cannot contain null bytes"):
863+
ast.parse("a\0b")
860864

861865
class ASTHelpers_Test(unittest.TestCase):
862866
maxDiff = None

Lib/test/test_builtin.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,11 +334,10 @@ def test_compile(self):
334334
self.assertRaises(TypeError, compile)
335335
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'badmode')
336336
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'single', 0xff)
337-
self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
338337
self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
339338
mode='eval', source='0', filename='tmp')
340339
compile('print("\xe5")\n', '', 'exec')
341-
self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
340+
self.assertRaises(SyntaxError, compile, chr(0), 'f', 'exec')
342341
self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')
343342

344343
# test the optimize argument

Lib/test/test_cmd_line_script.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,31 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
657657
],
658658
)
659659

660+
def test_syntaxerror_null_bytes(self):
661+
script = "x = '\0' nothing to see here\n';import os;os.system('echo pwnd')\n"
662+
with os_helper.temp_dir() as script_dir:
663+
script_name = _make_test_script(script_dir, 'script', script)
664+
exitcode, stdout, stderr = assert_python_failure(script_name)
665+
self.assertEqual(
666+
stderr.splitlines()[-2:],
667+
[ b" x = '",
668+
b'SyntaxError: source code cannot contain null bytes'
669+
],
670+
)
671+
672+
def test_syntaxerror_null_bytes_in_multiline_string(self):
673+
scripts = ["\n'''\nmultilinestring\0\n'''", "\nf'''\nmultilinestring\0\n'''"] # Both normal and f-strings
674+
with os_helper.temp_dir() as script_dir:
675+
for script in scripts:
676+
script_name = _make_test_script(script_dir, 'script', script)
677+
_, _, stderr = assert_python_failure(script_name)
678+
self.assertEqual(
679+
stderr.splitlines()[-2:],
680+
[ b" multilinestring",
681+
b'SyntaxError: source code cannot contain null bytes'
682+
]
683+
)
684+
660685
def test_consistent_sys_path_for_direct_execution(self):
661686
# This test case ensures that the following all give the same
662687
# sys.path configuration:

Lib/test/test_compile.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ def test_particularly_evil_undecodable(self):
542542
with open(fn, "wb") as fp:
543543
fp.write(src)
544544
res = script_helper.run_python_until_end(fn)[0]
545-
self.assertIn(b"Non-UTF-8", res.err)
545+
self.assertIn(b"source code cannot contain null bytes", res.err)
546546

547547
def test_yet_more_evil_still_undecodable(self):
548548
# Issue #25388
@@ -552,7 +552,7 @@ def test_yet_more_evil_still_undecodable(self):
552552
with open(fn, "wb") as fp:
553553
fp.write(src)
554554
res = script_helper.run_python_until_end(fn)[0]
555-
self.assertIn(b"Non-UTF-8", res.err)
555+
self.assertIn(b"source code cannot contain null bytes", res.err)
556556

557557
@support.cpython_only
558558
def test_compiler_recursion_limit(self):
@@ -588,9 +588,9 @@ def check_limit(prefix, repeated, mode="single"):
588588
def test_null_terminated(self):
589589
# The source code is null-terminated internally, but bytes-like
590590
# objects are accepted, which could be not terminated.
591-
with self.assertRaisesRegex(ValueError, "cannot contain null"):
591+
with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
592592
compile("123\x00", "<dummy>", "eval")
593-
with self.assertRaisesRegex(ValueError, "cannot contain null"):
593+
with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
594594
compile(memoryview(b"123\x00"), "<dummy>", "eval")
595595
code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
596596
self.assertEqual(eval(code), 23)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The parser now raises :exc:`SyntaxError` when parsing source code containing
2+
null bytes. Backported from ``aab01e3``. Patch by Pablo Galindo

Objects/fileobject.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -230,16 +230,8 @@ _PyLong_FileDescriptor_Converter(PyObject *o, void *ptr)
230230
return 1;
231231
}
232232

233-
/*
234-
** Py_UniversalNewlineFgets is an fgets variation that understands
235-
** all of \r, \n and \r\n conventions.
236-
** The stream should be opened in binary mode.
237-
** The fobj parameter exists solely for legacy reasons and must be NULL.
238-
** Note that we need no error handling: fgets() treats error and eof
239-
** identically.
240-
*/
241233
char *
242-
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
234+
_Py_UniversalNewlineFgetsWithSize(char *buf, int n, FILE *stream, PyObject *fobj, size_t* size)
243235
{
244236
char *p = buf;
245237
int c;
@@ -265,11 +257,28 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
265257
}
266258
FUNLOCKFILE(stream);
267259
*p = '\0';
268-
if (p == buf)
260+
if (p == buf) {
269261
return NULL;
262+
}
263+
*size = p - buf;
270264
return buf;
271265
}
272266

267+
/*
268+
** Py_UniversalNewlineFgets is an fgets variation that understands
269+
** all of \r, \n and \r\n conventions.
270+
** The stream should be opened in binary mode.
271+
** The fobj parameter exists solely for legacy reasons and must be NULL.
272+
** Note that we need no error handling: fgets() treats error and eof
273+
** identically.
274+
*/
275+
276+
char *
277+
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) {
278+
size_t size;
279+
return _Py_UniversalNewlineFgetsWithSize(buf, n, stream, fobj, &size);
280+
}
281+
273282
/* **************************** std printer ****************************
274283
* The stdprinter is used during the boot strapping phase as a preliminary
275284
* file like object for sys.stderr.

Parser/tokenizer.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,11 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
376376
return 1;
377377
}
378378

379+
static inline int
380+
contains_null_bytes(const char* str, size_t size) {
381+
return memchr(str, 0, size) != NULL;
382+
}
383+
379384
static int
380385
tok_readline_recode(struct tok_state *tok) {
381386
PyObject *line;
@@ -831,17 +836,17 @@ tok_readline_raw(struct tok_state *tok)
831836
if (!tok_reserve_buf(tok, BUFSIZ)) {
832837
return 0;
833838
}
834-
char *line = Py_UniversalNewlineFgets(tok->inp,
835-
(int)(tok->end - tok->inp),
836-
tok->fp, NULL);
839+
int n_chars = (int)(tok->end - tok->inp);
840+
size_t line_size = 0;
841+
char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size);
837842
if (line == NULL) {
838843
return 1;
839844
}
840845
if (tok->fp_interactive &&
841846
tok_concatenate_interactive_new_line(tok, line) == -1) {
842847
return 0;
843848
}
844-
tok->inp = strchr(tok->inp, '\0');
849+
tok->inp += line_size;
845850
if (tok->inp == tok->buf) {
846851
return 0;
847852
}
@@ -1078,6 +1083,12 @@ tok_nextc(struct tok_state *tok)
10781083
return EOF;
10791084
}
10801085
tok->line_start = tok->cur;
1086+
1087+
if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
1088+
syntaxerror(tok, "source code cannot contain null bytes");
1089+
tok->cur = tok->inp;
1090+
return EOF;
1091+
}
10811092
}
10821093
Py_UNREACHABLE();
10831094
}
@@ -1987,8 +1998,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
19871998
/* Get rest of string */
19881999
while (end_quote_size != quote_size) {
19892000
c = tok_nextc(tok);
1990-
if (tok->done == E_DECODE)
2001+
if (tok->done == E_ERROR) {
2002+
return ERRORTOKEN;
2003+
}
2004+
if (tok->done == E_DECODE) {
19912005
break;
2006+
}
19922007
if (c == EOF || (quote_size == 1 && c == '\n')) {
19932008
assert(tok->multi_line_start != NULL);
19942009
// shift the tok_state's location into

Python/pythonrun.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1859,7 +1859,7 @@ _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyComp
18591859
}
18601860

18611861
if (strlen(str) != (size_t)size) {
1862-
PyErr_SetString(PyExc_ValueError,
1862+
PyErr_SetString(PyExc_SyntaxError,
18631863
"source code string cannot contain null bytes");
18641864
Py_CLEAR(*cmd_copy);
18651865
return NULL;

0 commit comments

Comments
 (0)