Skip to content

Commit 8499115

Browse files
[3.13] gh-123378: fix a crash in UnicodeError.__str__ (GH-124935) (#125099)
gh-123378: fix a crash in `UnicodeError.__str__` (GH-124935) (cherry picked from commit ba14dfa) Co-authored-by: Bénédikt Tran <[email protected]>
1 parent 4eab6e8 commit 8499115

File tree

3 files changed

+93
-45
lines changed

3 files changed

+93
-45
lines changed

Lib/test/test_exceptions.py

+24
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import weakref
99
import errno
1010
from codecs import BOM_UTF8
11+
from itertools import product
1112
from textwrap import dedent
1213

1314
from test.support import (captured_stderr, check_impl_detail,
@@ -1336,6 +1337,29 @@ def test_unicode_errors_no_object(self):
13361337
for klass in klasses:
13371338
self.assertEqual(str(klass.__new__(klass)), "")
13381339

1340+
def test_unicode_error_str_does_not_crash(self):
1341+
# Test that str(UnicodeError(...)) does not crash.
1342+
# See https://github.com/python/cpython/issues/123378.
1343+
1344+
for start, end, objlen in product(
1345+
range(-5, 5),
1346+
range(-5, 5),
1347+
range(7),
1348+
):
1349+
obj = 'a' * objlen
1350+
with self.subTest('encode', objlen=objlen, start=start, end=end):
1351+
exc = UnicodeEncodeError('utf-8', obj, start, end, '')
1352+
self.assertIsInstance(str(exc), str)
1353+
1354+
with self.subTest('translate', objlen=objlen, start=start, end=end):
1355+
exc = UnicodeTranslateError(obj, start, end, '')
1356+
self.assertIsInstance(str(exc), str)
1357+
1358+
encoded = obj.encode()
1359+
with self.subTest('decode', objlen=objlen, start=start, end=end):
1360+
exc = UnicodeDecodeError('utf-8', encoded, start, end, '')
1361+
self.assertIsInstance(str(exc), str)
1362+
13391363
@no_tracing
13401364
def test_badisinstance(self):
13411365
# Bug #2542: if issubclass(e, MyException) raises an exception,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix a crash in the :meth:`~object.__str__` method of :exc:`UnicodeError`
2+
objects when the :attr:`UnicodeError.start` and :attr:`UnicodeError.end`
3+
values are invalid or out-of-range. Patch by Bénédikt Tran.

Objects/exceptions.c

+66-45
Original file line numberDiff line numberDiff line change
@@ -2959,46 +2959,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
29592959
static PyObject *
29602960
UnicodeEncodeError_str(PyObject *self)
29612961
{
2962-
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
2962+
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
29632963
PyObject *result = NULL;
29642964
PyObject *reason_str = NULL;
29652965
PyObject *encoding_str = NULL;
29662966

2967-
if (!uself->object)
2967+
if (exc->object == NULL) {
29682968
/* Not properly initialized. */
29692969
return PyUnicode_FromString("");
2970+
}
29702971

29712972
/* Get reason and encoding as strings, which they might not be if
29722973
they've been modified after we were constructed. */
2973-
reason_str = PyObject_Str(uself->reason);
2974-
if (reason_str == NULL)
2974+
reason_str = PyObject_Str(exc->reason);
2975+
if (reason_str == NULL) {
29752976
goto done;
2976-
encoding_str = PyObject_Str(uself->encoding);
2977-
if (encoding_str == NULL)
2977+
}
2978+
encoding_str = PyObject_Str(exc->encoding);
2979+
if (encoding_str == NULL) {
29782980
goto done;
2981+
}
2982+
2983+
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
2984+
Py_ssize_t start = exc->start, end = exc->end;
29792985

2980-
if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
2981-
Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
2986+
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
2987+
Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
29822988
const char *fmt;
2983-
if (badchar <= 0xff)
2989+
if (badchar <= 0xff) {
29842990
fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U";
2985-
else if (badchar <= 0xffff)
2991+
}
2992+
else if (badchar <= 0xffff) {
29862993
fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
2987-
else
2994+
}
2995+
else {
29882996
fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
2997+
}
29892998
result = PyUnicode_FromFormat(
29902999
fmt,
29913000
encoding_str,
29923001
(int)badchar,
2993-
uself->start,
3002+
start,
29943003
reason_str);
29953004
}
29963005
else {
29973006
result = PyUnicode_FromFormat(
29983007
"'%U' codec can't encode characters in position %zd-%zd: %U",
29993008
encoding_str,
3000-
uself->start,
3001-
uself->end-1,
3009+
start,
3010+
end - 1,
30023011
reason_str);
30033012
}
30043013
done:
@@ -3072,41 +3081,46 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
30723081
static PyObject *
30733082
UnicodeDecodeError_str(PyObject *self)
30743083
{
3075-
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
3084+
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
30763085
PyObject *result = NULL;
30773086
PyObject *reason_str = NULL;
30783087
PyObject *encoding_str = NULL;
30793088

3080-
if (!uself->object)
3089+
if (exc->object == NULL) {
30813090
/* Not properly initialized. */
30823091
return PyUnicode_FromString("");
3092+
}
30833093

30843094
/* Get reason and encoding as strings, which they might not be if
30853095
they've been modified after we were constructed. */
3086-
reason_str = PyObject_Str(uself->reason);
3087-
if (reason_str == NULL)
3096+
reason_str = PyObject_Str(exc->reason);
3097+
if (reason_str == NULL) {
30883098
goto done;
3089-
encoding_str = PyObject_Str(uself->encoding);
3090-
if (encoding_str == NULL)
3099+
}
3100+
encoding_str = PyObject_Str(exc->encoding);
3101+
if (encoding_str == NULL) {
30913102
goto done;
3103+
}
3104+
3105+
Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
3106+
Py_ssize_t start = exc->start, end = exc->end;
30923107

3093-
if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) {
3094-
int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
3108+
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
3109+
int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff);
30953110
result = PyUnicode_FromFormat(
30963111
"'%U' codec can't decode byte 0x%02x in position %zd: %U",
30973112
encoding_str,
3098-
byte,
3099-
uself->start,
3113+
badbyte,
3114+
start,
31003115
reason_str);
31013116
}
31023117
else {
31033118
result = PyUnicode_FromFormat(
31043119
"'%U' codec can't decode bytes in position %zd-%zd: %U",
31053120
encoding_str,
3106-
uself->start,
3107-
uself->end-1,
3108-
reason_str
3109-
);
3121+
start,
3122+
end - 1,
3123+
reason_str);
31103124
}
31113125
done:
31123126
Py_XDECREF(reason_str);
@@ -3169,42 +3183,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
31693183
static PyObject *
31703184
UnicodeTranslateError_str(PyObject *self)
31713185
{
3172-
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
3186+
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
31733187
PyObject *result = NULL;
31743188
PyObject *reason_str = NULL;
31753189

3176-
if (!uself->object)
3190+
if (exc->object == NULL) {
31773191
/* Not properly initialized. */
31783192
return PyUnicode_FromString("");
3193+
}
31793194

31803195
/* Get reason as a string, which it might not be if it's been
31813196
modified after we were constructed. */
3182-
reason_str = PyObject_Str(uself->reason);
3183-
if (reason_str == NULL)
3197+
reason_str = PyObject_Str(exc->reason);
3198+
if (reason_str == NULL) {
31843199
goto done;
3200+
}
3201+
3202+
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
3203+
Py_ssize_t start = exc->start, end = exc->end;
31853204

3186-
if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
3187-
Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
3205+
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
3206+
Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
31883207
const char *fmt;
3189-
if (badchar <= 0xff)
3208+
if (badchar <= 0xff) {
31903209
fmt = "can't translate character '\\x%02x' in position %zd: %U";
3191-
else if (badchar <= 0xffff)
3210+
}
3211+
else if (badchar <= 0xffff) {
31923212
fmt = "can't translate character '\\u%04x' in position %zd: %U";
3193-
else
3213+
}
3214+
else {
31943215
fmt = "can't translate character '\\U%08x' in position %zd: %U";
3216+
}
31953217
result = PyUnicode_FromFormat(
31963218
fmt,
31973219
(int)badchar,
3198-
uself->start,
3199-
reason_str
3200-
);
3201-
} else {
3220+
start,
3221+
reason_str);
3222+
}
3223+
else {
32023224
result = PyUnicode_FromFormat(
32033225
"can't translate characters in position %zd-%zd: %U",
3204-
uself->start,
3205-
uself->end-1,
3206-
reason_str
3207-
);
3226+
start,
3227+
end - 1,
3228+
reason_str);
32083229
}
32093230
done:
32103231
Py_XDECREF(reason_str);

0 commit comments

Comments
 (0)