Skip to content

gh-128974: Fix UnicodeError.__str__ when custom attributes have side-effects #128975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 1, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Lib/test/test_capi/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,30 @@ def _check_no_crash(self, exc):
# ensure that the __str__() method does not crash
_ = str(exc)

def test_unicode_error_custom_str(self):

class Evil(str):
def __str__(self):
del exc.object
return self

for reason, encoding in [
("reason", Evil("encoding")),
(Evil("reason"), "encoding"),
(Evil("reason"), Evil("encoding")),
]:
with self.subTest(encoding=encoding, reason=reason):
with self.subTest(UnicodeEncodeError):
exc = UnicodeEncodeError(encoding, "x", 0, 1, reason)
self.assertRaises(TypeError, str, exc)
with self.subTest(UnicodeDecodeError):
exc = UnicodeDecodeError(encoding, b"x", 0, 1, reason)
self.assertRaises(TypeError, str, exc)

with self.subTest(UnicodeTranslateError):
exc = UnicodeTranslateError("x", 0, 1, Evil("reason"))
self.assertRaises(TypeError, str, exc)

def test_unicode_encode_error_get_start(self):
get_start = _testcapi.unicode_encode_get_start
self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix a crash in :meth:`UnicodeError.__str__ <object.__str__>` when custom
attributes implement :meth:`~object.__str__` with side-effects.
Patch by Bénédikt Tran.
59 changes: 44 additions & 15 deletions Objects/exceptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -2673,6 +2673,8 @@ SyntaxError_str(PySyntaxErrorObject *self)
if (!filename && !have_lineno)
return PyObject_Str(self->msg ? self->msg : Py_None);

// Even if 'filename' can be an instance of a subclass of 'str',
// we only render its "true" content and do not use str(filename).
if (filename && have_lineno)
result = PyUnicode_FromFormat("%S (%U, line %ld)",
self->msg ? self->msg : Py_None,
Expand Down Expand Up @@ -2790,29 +2792,47 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError,

/*
* Check the validity of 'attr' as a unicode or bytes object depending
* on 'as_bytes' and return a new reference on it if it is the case.
* on 'as_bytes'.
*
* The 'name' is the attribute name and is only used for error reporting.
*
* On success, this returns a strong reference on 'attr'.
* On failure, this sets a TypeError and returns NULL.
* On success, this returns 0.
* On failure, this sets a TypeError and returns -1.
*/
static PyObject *
as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
static int
check_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
{
assert(as_bytes == 0 || as_bytes == 1);
if (attr == NULL) {
PyErr_Format(PyExc_TypeError, "%s attribute not set", name);
return NULL;
PyErr_Format(PyExc_TypeError,
"UnicodeError '%s' attribute is not set",
name);
return -1;
}
if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) {
PyErr_Format(PyExc_TypeError,
"%s attribute must be %s",
name,
as_bytes ? "bytes" : "unicode");
return NULL;
"UnicodeError '%s' attribute must be a %s",
name, as_bytes ? "bytes" : "string");
return -1;
}
return Py_NewRef(attr);
return 0;
}


/*
* Check the validity of 'attr' as a unicode or bytes object depending
* on 'as_bytes' and return a new reference on it if it is the case.
*
* The 'name' is the attribute name and is only used for error reporting.
*
* On success, this returns a strong reference on 'attr'.
* On failure, this sets a TypeError and returns NULL.
*/
static PyObject *
as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
{
int rc = check_unicode_error_attribute(attr, name, as_bytes);
return rc < 0 ? NULL : Py_NewRef(attr);
}


Expand Down Expand Up @@ -3379,7 +3399,10 @@ UnicodeEncodeError_str(PyObject *self)
if (encoding_str == NULL) {
goto done;
}

// calls to PyObject_Str(...) above might mutate 'exc->object'
if (check_unicode_error_attribute(exc->object, "object", false) < 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this check is done too late. I would prefer to disallow creating inconsistent exception object, and so implement such check in UnicodeEncodeError_init() instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that it's possible to override exc.object in Python. A setter function is needed here to also check when the attribute is set.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this check is done too late

Agreed, but I was more worried about the crash itself rather than whether the API was correct in the first place or not. I can add a setter function if you want but I haven't looked at whether this would introduce other corner cases or not and how it would interact with the rest.

goto done;
}
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
Py_ssize_t start = exc->start, end = exc->end;

Expand Down Expand Up @@ -3499,7 +3522,10 @@ UnicodeDecodeError_str(PyObject *self)
if (encoding_str == NULL) {
goto done;
}

// calls to PyObject_Str(...) above might mutate 'exc->object'
if (check_unicode_error_attribute(exc->object, "object", true) < 0) {
goto done;
}
Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
Py_ssize_t start = exc->start, end = exc->end;

Expand Down Expand Up @@ -3595,7 +3621,10 @@ UnicodeTranslateError_str(PyObject *self)
if (reason_str == NULL) {
goto done;
}

// call to PyObject_Str(...) above might mutate 'exc->object'
if (check_unicode_error_attribute(exc->object, "object", false) < 0) {
goto done;
}
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
Py_ssize_t start = exc->start, end = exc->end;

Expand Down
Loading