Skip to content

Commit fa6a814

Browse files
authored
gh-129173: refactor PyCodec_ReplaceErrors into separate functions (#129893)
The logic of `PyCodec_ReplaceErrors` is now split into separate functions, each of which handling a specific exception type.
1 parent 4d3a7ea commit fa6a814

File tree

1 file changed

+83
-40
lines changed

1 file changed

+83
-40
lines changed

Python/codecs.c

+83-40
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730
}
731731

732732

733+
/*
734+
* Create a Unicode string containing 'count' copies of the official
735+
* Unicode REPLACEMENT CHARACTER (0xFFFD).
736+
*/
737+
static PyObject *
738+
codec_handler_unicode_replacement_character(Py_ssize_t count)
739+
{
740+
PyObject *res = PyUnicode_New(count, Py_UNICODE_REPLACEMENT_CHARACTER);
741+
if (res == NULL) {
742+
return NULL;
743+
}
744+
assert(count == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
745+
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
746+
for (Py_ssize_t i = 0; i < count; ++i) {
747+
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
748+
}
749+
assert(_PyUnicode_CheckConsistency(res, 1));
750+
return res;
751+
}
752+
753+
733754
// --- handler: 'strict' ------------------------------------------------------
734755

735756
PyObject *PyCodec_StrictErrors(PyObject *exc)
@@ -774,50 +795,71 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
774795
}
775796

776797

777-
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
798+
// --- handler: 'replace' -----------------------------------------------------
799+
800+
static PyObject *
801+
_PyCodec_ReplaceUnicodeEncodeError(PyObject *exc)
778802
{
779803
Py_ssize_t start, end, slen;
804+
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
805+
&start, &end, &slen, false) < 0)
806+
{
807+
return NULL;
808+
}
809+
PyObject *res = PyUnicode_New(slen, '?');
810+
if (res == NULL) {
811+
return NULL;
812+
}
813+
assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
814+
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
815+
memset(outp, '?', sizeof(Py_UCS1) * slen);
816+
assert(_PyUnicode_CheckConsistency(res, 1));
817+
return Py_BuildValue("(Nn)", res, end);
818+
}
780819

781-
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
782-
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
783-
&start, &end, &slen, false) < 0) {
784-
return NULL;
785-
}
786-
PyObject *res = PyUnicode_New(slen, '?');
787-
if (res == NULL) {
788-
return NULL;
789-
}
790-
assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
791-
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
792-
memset(outp, '?', sizeof(Py_UCS1) * slen);
793-
assert(_PyUnicode_CheckConsistency(res, 1));
794-
return Py_BuildValue("(Nn)", res, end);
820+
821+
static PyObject *
822+
_PyCodec_ReplaceUnicodeDecodeError(PyObject *exc)
823+
{
824+
Py_ssize_t end;
825+
if (PyUnicodeDecodeError_GetEnd(exc, &end) < 0) {
826+
return NULL;
795827
}
796-
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
797-
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
798-
NULL, &end, NULL, true) < 0) {
799-
return NULL;
800-
}
801-
return Py_BuildValue("(Cn)",
802-
(int)Py_UNICODE_REPLACEMENT_CHARACTER,
803-
end);
828+
PyObject *res = codec_handler_unicode_replacement_character(1);
829+
if (res == NULL) {
830+
return NULL;
804831
}
805-
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
806-
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
807-
&start, &end, &slen, false) < 0) {
808-
return NULL;
809-
}
810-
PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER);
811-
if (res == NULL) {
812-
return NULL;
813-
}
814-
assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
815-
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
816-
for (Py_ssize_t i = 0; i < slen; ++i) {
817-
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
818-
}
819-
assert(_PyUnicode_CheckConsistency(res, 1));
820-
return Py_BuildValue("(Nn)", res, end);
832+
return Py_BuildValue("(Nn)", res, end);
833+
}
834+
835+
836+
static PyObject *
837+
_PyCodec_ReplaceUnicodeTranslateError(PyObject *exc)
838+
{
839+
Py_ssize_t start, end, slen;
840+
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
841+
&start, &end, &slen, false) < 0)
842+
{
843+
return NULL;
844+
}
845+
PyObject *res = codec_handler_unicode_replacement_character(slen);
846+
if (res == NULL) {
847+
return NULL;
848+
}
849+
return Py_BuildValue("(Nn)", res, end);
850+
}
851+
852+
853+
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
854+
{
855+
if (_PyIsUnicodeEncodeError(exc)) {
856+
return _PyCodec_ReplaceUnicodeEncodeError(exc);
857+
}
858+
else if (_PyIsUnicodeDecodeError(exc)) {
859+
return _PyCodec_ReplaceUnicodeDecodeError(exc);
860+
}
861+
else if (_PyIsUnicodeTranslateError(exc)) {
862+
return _PyCodec_ReplaceUnicodeTranslateError(exc);
821863
}
822864
else {
823865
wrong_exception_type(exc);
@@ -1468,7 +1510,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14681510
}
14691511

14701512

1471-
static PyObject *replace_errors(PyObject *self, PyObject *exc)
1513+
static inline PyObject *
1514+
replace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14721515
{
14731516
return PyCodec_ReplaceErrors(exc);
14741517
}

0 commit comments

Comments
 (0)