Skip to content

Commit e24a1ac

Browse files
authored
gh-129173: Use _PyUnicodeError_GetParams in PyCodec_SurrogateEscapeErrors (GH-129175)
1 parent 519c2c6 commit e24a1ac

File tree

1 file changed

+81
-64
lines changed

1 file changed

+81
-64
lines changed

Python/codecs.c

Lines changed: 81 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,76 +1359,91 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
13591359
}
13601360

13611361

1362+
// --- handler: 'surrogateescape' ---------------------------------------------
1363+
13621364
static PyObject *
1363-
PyCodec_SurrogateEscapeErrors(PyObject *exc)
1365+
_PyCodec_SurrogateEscapeUnicodeEncodeError(PyObject *exc)
13641366
{
1365-
PyObject *restuple;
1366-
PyObject *object;
1367-
Py_ssize_t i;
1368-
Py_ssize_t start;
1369-
Py_ssize_t end;
1370-
PyObject *res;
1367+
PyObject *obj;
1368+
Py_ssize_t start, end, slen;
1369+
if (_PyUnicodeError_GetParams(exc,
1370+
&obj, NULL,
1371+
&start, &end, &slen, false) < 0)
1372+
{
1373+
return NULL;
1374+
}
13711375

1372-
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
1373-
char *outp;
1374-
if (PyUnicodeEncodeError_GetStart(exc, &start))
1375-
return NULL;
1376-
if (PyUnicodeEncodeError_GetEnd(exc, &end))
1377-
return NULL;
1378-
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
1379-
return NULL;
1380-
res = PyBytes_FromStringAndSize(NULL, end-start);
1381-
if (!res) {
1382-
Py_DECREF(object);
1383-
return NULL;
1384-
}
1385-
outp = PyBytes_AsString(res);
1386-
for (i = start; i < end; i++) {
1387-
/* object is guaranteed to be "ready" */
1388-
Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
1389-
if (ch < 0xdc80 || ch > 0xdcff) {
1390-
/* Not a UTF-8b surrogate, fail with original exception */
1391-
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1392-
Py_DECREF(res);
1393-
Py_DECREF(object);
1394-
return NULL;
1395-
}
1396-
*outp++ = ch - 0xdc00;
1397-
}
1398-
restuple = Py_BuildValue("(On)", res, end);
1399-
Py_DECREF(res);
1400-
Py_DECREF(object);
1401-
return restuple;
1376+
PyObject *res = PyBytes_FromStringAndSize(NULL, slen);
1377+
if (res == NULL) {
1378+
Py_DECREF(obj);
1379+
return NULL;
14021380
}
1403-
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
1404-
PyObject *str;
1405-
const unsigned char *p;
1406-
Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
1407-
int consumed = 0;
1408-
if (PyUnicodeDecodeError_GetStart(exc, &start))
1409-
return NULL;
1410-
if (PyUnicodeDecodeError_GetEnd(exc, &end))
1411-
return NULL;
1412-
if (!(object = PyUnicodeDecodeError_GetObject(exc)))
1413-
return NULL;
1414-
p = (const unsigned char*)PyBytes_AS_STRING(object);
1415-
while (consumed < 4 && consumed < end-start) {
1416-
/* Refuse to escape ASCII bytes. */
1417-
if (p[start+consumed] < 128)
1418-
break;
1419-
ch[consumed] = 0xdc00 + p[start+consumed];
1420-
consumed++;
1421-
}
1422-
Py_DECREF(object);
1423-
if (!consumed) {
1424-
/* codec complained about ASCII byte. */
1381+
1382+
char *outp = PyBytes_AsString(res);
1383+
for (Py_ssize_t i = start; i < end; i++) {
1384+
Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i);
1385+
if (ch < 0xdc80 || ch > 0xdcff) {
1386+
/* Not a UTF-8b surrogate, fail with original exception. */
1387+
Py_DECREF(obj);
1388+
Py_DECREF(res);
14251389
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
14261390
return NULL;
14271391
}
1428-
str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
1429-
if (str == NULL)
1430-
return NULL;
1431-
return Py_BuildValue("(Nn)", str, start+consumed);
1392+
*outp++ = ch - 0xdc00;
1393+
}
1394+
Py_DECREF(obj);
1395+
1396+
return Py_BuildValue("(Nn)", res, end);
1397+
}
1398+
1399+
1400+
static PyObject *
1401+
_PyCodec_SurrogateEscapeUnicodeDecodeError(PyObject *exc)
1402+
{
1403+
PyObject *obj;
1404+
Py_ssize_t start, end, slen;
1405+
if (_PyUnicodeError_GetParams(exc,
1406+
&obj, NULL,
1407+
&start, &end, &slen, true) < 0)
1408+
{
1409+
return NULL;
1410+
}
1411+
1412+
Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
1413+
int consumed = 0;
1414+
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
1415+
while (consumed < 4 && consumed < slen) {
1416+
/* Refuse to escape ASCII bytes. */
1417+
if (p[start + consumed] < 128) {
1418+
break;
1419+
}
1420+
ch[consumed] = 0xdc00 + p[start + consumed];
1421+
consumed++;
1422+
}
1423+
Py_DECREF(obj);
1424+
1425+
if (consumed == 0) {
1426+
/* Codec complained about ASCII byte. */
1427+
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1428+
return NULL;
1429+
}
1430+
1431+
PyObject *str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
1432+
if (str == NULL) {
1433+
return NULL;
1434+
}
1435+
return Py_BuildValue("(Nn)", str, start + consumed);
1436+
}
1437+
1438+
1439+
static PyObject *
1440+
PyCodec_SurrogateEscapeErrors(PyObject *exc)
1441+
{
1442+
if (_PyIsUnicodeEncodeError(exc)) {
1443+
return _PyCodec_SurrogateEscapeUnicodeEncodeError(exc);
1444+
}
1445+
else if (_PyIsUnicodeDecodeError(exc)) {
1446+
return _PyCodec_SurrogateEscapeUnicodeDecodeError(exc);
14321447
}
14331448
else {
14341449
wrong_exception_type(exc);
@@ -1485,11 +1500,13 @@ surrogatepass_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14851500
}
14861501

14871502

1488-
static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
1503+
static inline PyObject *
1504+
surrogateescape_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14891505
{
14901506
return PyCodec_SurrogateEscapeErrors(exc);
14911507
}
14921508

1509+
14931510
PyStatus
14941511
_PyCodec_InitRegistry(PyInterpreterState *interp)
14951512
{

0 commit comments

Comments
 (0)