diff --git a/Python/codecs.c b/Python/codecs.c index d5d9a4a8bcabb7..8cdebfa1b611ea 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -956,49 +956,18 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) return restuple; } -PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) + +// --- handler: 'backslashreplace' -------------------------------------------- + +static PyObject * +_PyCodec_BackslashReplaceUnicodeEncodeError(PyObject *exc) { PyObject *obj; Py_ssize_t objlen, start, end, slen; - if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { - if (_PyUnicodeError_GetParams(exc, - &obj, &objlen, - &start, &end, &slen, true) < 0) - { - return NULL; - } - PyObject *res = PyUnicode_New(4 * slen, 127); - if (res == NULL) { - Py_DECREF(obj); - return NULL; - } - Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); - const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj); - for (Py_ssize_t i = start; i < end; i++, outp += 4) { - const unsigned char ch = p[i]; - outp[0] = '\\'; - outp[1] = 'x'; - outp[2] = Py_hexdigits[(ch >> 4) & 0xf]; - outp[3] = Py_hexdigits[ch & 0xf]; - } - assert(_PyUnicode_CheckConsistency(res, 1)); - Py_DECREF(obj); - return Py_BuildValue("(Nn)", res, end); - } - - if ( - PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError) - || PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError) - ) { - if (_PyUnicodeError_GetParams(exc, - &obj, &objlen, - &start, &end, &slen, false) < 0) - { - return NULL; - } - } - else { - wrong_exception_type(exc); + if (_PyUnicodeError_GetParams(exc, + &obj, &objlen, + &start, &end, &slen, false) < 0) + { return NULL; } @@ -1035,6 +1004,65 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) } +static PyObject * +_PyCodec_BackslashReplaceUnicodeDecodeError(PyObject *exc) +{ + PyObject *obj; + Py_ssize_t objlen, start, end, slen; + if (_PyUnicodeError_GetParams(exc, + &obj, &objlen, + &start, &end, &slen, true) < 0) + { + return NULL; + } + + PyObject *res = PyUnicode_New(4 * slen, 127); + if (res == NULL) { + Py_DECREF(obj); + return NULL; + } + + Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); + const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj); + for (Py_ssize_t i = start; i < end; i++, outp += 4) { + const unsigned char ch = p[i]; + outp[0] = '\\'; + outp[1] = 'x'; + outp[2] = Py_hexdigits[(ch >> 4) & 0xf]; + outp[3] = Py_hexdigits[ch & 0xf]; + } + assert(_PyUnicode_CheckConsistency(res, 1)); + Py_DECREF(obj); + return Py_BuildValue("(Nn)", res, end); +} + + +static inline PyObject * +_PyCodec_BackslashReplaceUnicodeTranslateError(PyObject *exc) +{ + // Same implementation as for UnicodeEncodeError objects. + return _PyCodec_BackslashReplaceUnicodeEncodeError(exc); +} + + +PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) +{ + if (_PyIsUnicodeEncodeError(exc)) { + return _PyCodec_BackslashReplaceUnicodeEncodeError(exc); + } + else if (_PyIsUnicodeDecodeError(exc)) { + return _PyCodec_BackslashReplaceUnicodeDecodeError(exc); + } + else if (_PyIsUnicodeTranslateError(exc)) { + return _PyCodec_BackslashReplaceUnicodeTranslateError(exc); + } + else { + wrong_exception_type(exc); + return NULL; + } +} + + // --- handler: 'namereplace' ------------------------------------------------- PyObject *PyCodec_NameReplaceErrors(PyObject *exc) @@ -1502,7 +1530,8 @@ xmlcharrefreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc) } -static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) +static inline PyObject * +backslashreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc) { return PyCodec_BackslashReplaceErrors(exc); }