@@ -1359,76 +1359,91 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
1359
1359
}
1360
1360
1361
1361
1362
+ // --- handler: 'surrogateescape' ---------------------------------------------
1363
+
1362
1364
static PyObject *
1363
- PyCodec_SurrogateEscapeErrors (PyObject * exc )
1365
+ _PyCodec_SurrogateEscapeUnicodeEncodeError (PyObject * exc )
1364
1366
{
1365
- PyObject * restuple ;
1366
- PyObject * object ;
1367
- Py_ssize_t i ;
1368
- Py_ssize_t start ;
1369
- Py_ssize_t end ;
1370
- PyObject * res ;
1367
+ PyObject * obj ;
1368
+ Py_ssize_t start , end , slen ;
1369
+ if (_PyUnicodeError_GetParams (exc ,
1370
+ & obj , NULL ,
1371
+ & start , & end , & slen , false) < 0 )
1372
+ {
1373
+ return NULL ;
1374
+ }
1371
1375
1372
- if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )) {
1373
- char * outp ;
1374
- if (PyUnicodeEncodeError_GetStart (exc , & start ))
1375
- return NULL ;
1376
- if (PyUnicodeEncodeError_GetEnd (exc , & end ))
1377
- return NULL ;
1378
- if (!(object = PyUnicodeEncodeError_GetObject (exc )))
1379
- return NULL ;
1380
- res = PyBytes_FromStringAndSize (NULL , end - start );
1381
- if (!res ) {
1382
- Py_DECREF (object );
1383
- return NULL ;
1384
- }
1385
- outp = PyBytes_AsString (res );
1386
- for (i = start ; i < end ; i ++ ) {
1387
- /* object is guaranteed to be "ready" */
1388
- Py_UCS4 ch = PyUnicode_READ_CHAR (object , i );
1389
- if (ch < 0xdc80 || ch > 0xdcff ) {
1390
- /* Not a UTF-8b surrogate, fail with original exception */
1391
- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1392
- Py_DECREF (res );
1393
- Py_DECREF (object );
1394
- return NULL ;
1395
- }
1396
- * outp ++ = ch - 0xdc00 ;
1397
- }
1398
- restuple = Py_BuildValue ("(On)" , res , end );
1399
- Py_DECREF (res );
1400
- Py_DECREF (object );
1401
- return restuple ;
1376
+ PyObject * res = PyBytes_FromStringAndSize (NULL , slen );
1377
+ if (res == NULL ) {
1378
+ Py_DECREF (obj );
1379
+ return NULL ;
1402
1380
}
1403
- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeDecodeError )) {
1404
- PyObject * str ;
1405
- const unsigned char * p ;
1406
- Py_UCS2 ch [4 ]; /* decode up to 4 bad bytes. */
1407
- int consumed = 0 ;
1408
- if (PyUnicodeDecodeError_GetStart (exc , & start ))
1409
- return NULL ;
1410
- if (PyUnicodeDecodeError_GetEnd (exc , & end ))
1411
- return NULL ;
1412
- if (!(object = PyUnicodeDecodeError_GetObject (exc )))
1413
- return NULL ;
1414
- p = (const unsigned char * )PyBytes_AS_STRING (object );
1415
- while (consumed < 4 && consumed < end - start ) {
1416
- /* Refuse to escape ASCII bytes. */
1417
- if (p [start + consumed ] < 128 )
1418
- break ;
1419
- ch [consumed ] = 0xdc00 + p [start + consumed ];
1420
- consumed ++ ;
1421
- }
1422
- Py_DECREF (object );
1423
- if (!consumed ) {
1424
- /* codec complained about ASCII byte. */
1381
+
1382
+ char * outp = PyBytes_AsString (res );
1383
+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
1384
+ Py_UCS4 ch = PyUnicode_READ_CHAR (obj , i );
1385
+ if (ch < 0xdc80 || ch > 0xdcff ) {
1386
+ /* Not a UTF-8b surrogate, fail with original exception. */
1387
+ Py_DECREF (obj );
1388
+ Py_DECREF (res );
1425
1389
PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1426
1390
return NULL ;
1427
1391
}
1428
- str = PyUnicode_FromKindAndData (PyUnicode_2BYTE_KIND , ch , consumed );
1429
- if (str == NULL )
1430
- return NULL ;
1431
- return Py_BuildValue ("(Nn)" , str , start + consumed );
1392
+ * outp ++ = ch - 0xdc00 ;
1393
+ }
1394
+ Py_DECREF (obj );
1395
+
1396
+ return Py_BuildValue ("(Nn)" , res , end );
1397
+ }
1398
+
1399
+
1400
+ static PyObject *
1401
+ _PyCodec_SurrogateEscapeUnicodeDecodeError (PyObject * exc )
1402
+ {
1403
+ PyObject * obj ;
1404
+ Py_ssize_t start , end , slen ;
1405
+ if (_PyUnicodeError_GetParams (exc ,
1406
+ & obj , NULL ,
1407
+ & start , & end , & slen , true) < 0 )
1408
+ {
1409
+ return NULL ;
1410
+ }
1411
+
1412
+ Py_UCS2 ch [4 ]; /* decode up to 4 bad bytes. */
1413
+ int consumed = 0 ;
1414
+ const unsigned char * p = (const unsigned char * )PyBytes_AS_STRING (obj );
1415
+ while (consumed < 4 && consumed < slen ) {
1416
+ /* Refuse to escape ASCII bytes. */
1417
+ if (p [start + consumed ] < 128 ) {
1418
+ break ;
1419
+ }
1420
+ ch [consumed ] = 0xdc00 + p [start + consumed ];
1421
+ consumed ++ ;
1422
+ }
1423
+ Py_DECREF (obj );
1424
+
1425
+ if (consumed == 0 ) {
1426
+ /* Codec complained about ASCII byte. */
1427
+ PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1428
+ return NULL ;
1429
+ }
1430
+
1431
+ PyObject * str = PyUnicode_FromKindAndData (PyUnicode_2BYTE_KIND , ch , consumed );
1432
+ if (str == NULL ) {
1433
+ return NULL ;
1434
+ }
1435
+ return Py_BuildValue ("(Nn)" , str , start + consumed );
1436
+ }
1437
+
1438
+
1439
+ static PyObject *
1440
+ PyCodec_SurrogateEscapeErrors (PyObject * exc )
1441
+ {
1442
+ if (_PyIsUnicodeEncodeError (exc )) {
1443
+ return _PyCodec_SurrogateEscapeUnicodeEncodeError (exc );
1444
+ }
1445
+ else if (_PyIsUnicodeDecodeError (exc )) {
1446
+ return _PyCodec_SurrogateEscapeUnicodeDecodeError (exc );
1432
1447
}
1433
1448
else {
1434
1449
wrong_exception_type (exc );
@@ -1485,11 +1500,13 @@ surrogatepass_errors(PyObject *Py_UNUSED(self), PyObject *exc)
1485
1500
}
1486
1501
1487
1502
1488
- static PyObject * surrogateescape_errors (PyObject * self , PyObject * exc )
1503
+ static inline PyObject *
1504
+ surrogateescape_errors (PyObject * Py_UNUSED (self ), PyObject * exc )
1489
1505
{
1490
1506
return PyCodec_SurrogateEscapeErrors (exc );
1491
1507
}
1492
1508
1509
+
1493
1510
PyStatus
1494
1511
_PyCodec_InitRegistry (PyInterpreterState * interp )
1495
1512
{
0 commit comments