@@ -864,108 +864,107 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
864
864
865
865
PyObject * PyCodec_BackslashReplaceErrors (PyObject * exc )
866
866
{
867
- PyObject * object ;
868
- Py_ssize_t i ;
869
- Py_ssize_t start ;
870
- Py_ssize_t end ;
871
- PyObject * res ;
872
- Py_UCS1 * outp ;
873
- int ressize ;
874
- Py_UCS4 c ;
875
-
867
+ PyObject * obj ;
868
+ Py_ssize_t objlen , start , end , slen ;
876
869
if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeDecodeError )) {
877
- const unsigned char * p ;
878
- if (PyUnicodeDecodeError_GetStart (exc , & start ))
879
- return NULL ;
880
- if (PyUnicodeDecodeError_GetEnd (exc , & end ))
881
- return NULL ;
882
- if (!(object = PyUnicodeDecodeError_GetObject (exc )))
870
+ if (_PyUnicodeError_GetParams (exc ,
871
+ & obj , & objlen ,
872
+ & start , & end , & slen , true) < 0 )
873
+ {
883
874
return NULL ;
884
- p = ( const unsigned char * ) PyBytes_AS_STRING ( object );
885
- res = PyUnicode_New (4 * ( end - start ) , 127 );
875
+ }
876
+ PyObject * res = PyUnicode_New (4 * slen , 127 );
886
877
if (res == NULL ) {
887
- Py_DECREF (object );
878
+ Py_DECREF (obj );
888
879
return NULL ;
889
880
}
890
- outp = PyUnicode_1BYTE_DATA (res );
891
- for (i = start ; i < end ; i ++ , outp += 4 ) {
892
- unsigned char c = p [i ];
881
+ Py_UCS1 * outp = PyUnicode_1BYTE_DATA (res );
882
+ const unsigned char * p = (const unsigned char * )PyBytes_AS_STRING (obj );
883
+ for (Py_ssize_t i = start ; i < end ; i ++ , outp += 4 ) {
884
+ const unsigned char ch = p [i ];
893
885
outp [0 ] = '\\' ;
894
886
outp [1 ] = 'x' ;
895
- outp [2 ] = Py_hexdigits [(c >> 4 ) & 0xf ];
896
- outp [3 ] = Py_hexdigits [c & 0xf ];
887
+ outp [2 ] = Py_hexdigits [(ch >> 4 ) & 0xf ];
888
+ outp [3 ] = Py_hexdigits [ch & 0xf ];
897
889
}
898
-
899
890
assert (_PyUnicode_CheckConsistency (res , 1 ));
900
- Py_DECREF (object );
891
+ Py_DECREF (obj );
901
892
return Py_BuildValue ("(Nn)" , res , end );
902
893
}
903
- if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )) {
904
- if (PyUnicodeEncodeError_GetStart (exc , & start ))
905
- return NULL ;
906
- if (PyUnicodeEncodeError_GetEnd (exc , & end ))
907
- return NULL ;
908
- if (!(object = PyUnicodeEncodeError_GetObject (exc )))
909
- return NULL ;
910
- }
911
- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeTranslateError )) {
912
- if (PyUnicodeTranslateError_GetStart (exc , & start ))
913
- return NULL ;
914
- if (PyUnicodeTranslateError_GetEnd (exc , & end ))
915
- return NULL ;
916
- if (!(object = PyUnicodeTranslateError_GetObject (exc )))
894
+
895
+ if (
896
+ PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )
897
+ || PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeTranslateError )
898
+ ) {
899
+ if (_PyUnicodeError_GetParams (exc ,
900
+ & obj , & objlen ,
901
+ & start , & end , & slen , false) < 0 )
902
+ {
917
903
return NULL ;
904
+ }
918
905
}
919
906
else {
920
907
wrong_exception_type (exc );
921
908
return NULL ;
922
909
}
923
910
924
- if (end - start > PY_SSIZE_T_MAX / (1 + 1 + 8 ))
925
- end = start + PY_SSIZE_T_MAX / (1 + 1 + 8 );
926
- for (i = start , ressize = 0 ; i < end ; ++ i ) {
911
+ // The number of characters that each character 'ch' contributes
912
+ // in the result is 1 + 1 + k, where k >= min{t >= 1 | 16^t > ch}
913
+ // and will be formatted as "\\" + ('U'|'u'|'x') + HEXDIGITS,
914
+ // where the number of hexdigits is either 2, 4, or 8 (not 6).
915
+ // Since the Unicode range is below 10^7, we choose k = 8 whence
916
+ // each "block" requires at most 1 + 1 + 8 characters.
917
+ if (slen > PY_SSIZE_T_MAX / (1 + 1 + 8 )) {
918
+ end = start + PY_SSIZE_T_MAX / (1 + 1 + 8 );
919
+ end = Py_MIN (end , objlen );
920
+ slen = Py_MAX (0 , end - start );
921
+ }
922
+
923
+ Py_ssize_t ressize = 0 ;
924
+ for (Py_ssize_t i = start ; i < end ; ++ i ) {
927
925
/* object is guaranteed to be "ready" */
928
- c = PyUnicode_READ_CHAR (object , i );
926
+ Py_UCS4 c = PyUnicode_READ_CHAR (obj , i );
929
927
if (c >= 0x10000 ) {
930
- ressize += 1 + 1 + 8 ;
928
+ ressize += 1 + 1 + 8 ;
931
929
}
932
930
else if (c >= 0x100 ) {
933
- ressize += 1 + 1 + 4 ;
931
+ ressize += 1 + 1 + 4 ;
932
+ }
933
+ else {
934
+ ressize += 1 + 1 + 2 ;
934
935
}
935
- else
936
- ressize += 1 + 1 + 2 ;
937
936
}
938
- res = PyUnicode_New (ressize , 127 );
937
+ PyObject * res = PyUnicode_New (ressize , 127 );
939
938
if (res == NULL ) {
940
- Py_DECREF (object );
939
+ Py_DECREF (obj );
941
940
return NULL ;
942
941
}
943
- outp = PyUnicode_1BYTE_DATA (res );
944
- for (i = start ; i < end ; ++ i ) {
945
- c = PyUnicode_READ_CHAR (object , i );
942
+ Py_UCS1 * outp = PyUnicode_1BYTE_DATA (res );
943
+ for (Py_ssize_t i = start ; i < end ; ++ i ) {
944
+ Py_UCS4 c = PyUnicode_READ_CHAR (obj , i );
946
945
* outp ++ = '\\' ;
947
946
if (c >= 0x00010000 ) {
948
947
* outp ++ = 'U' ;
949
- * outp ++ = Py_hexdigits [(c >> 28 )& 0xf ];
950
- * outp ++ = Py_hexdigits [(c >> 24 )& 0xf ];
951
- * outp ++ = Py_hexdigits [(c >> 20 )& 0xf ];
952
- * outp ++ = Py_hexdigits [(c >> 16 )& 0xf ];
953
- * outp ++ = Py_hexdigits [(c >> 12 )& 0xf ];
954
- * outp ++ = Py_hexdigits [(c >> 8 ) & 0xf ];
948
+ * outp ++ = Py_hexdigits [(c >> 28 ) & 0xf ];
949
+ * outp ++ = Py_hexdigits [(c >> 24 ) & 0xf ];
950
+ * outp ++ = Py_hexdigits [(c >> 20 ) & 0xf ];
951
+ * outp ++ = Py_hexdigits [(c >> 16 ) & 0xf ];
952
+ * outp ++ = Py_hexdigits [(c >> 12 ) & 0xf ];
953
+ * outp ++ = Py_hexdigits [(c >> 8 ) & 0xf ];
955
954
}
956
955
else if (c >= 0x100 ) {
957
956
* outp ++ = 'u' ;
958
- * outp ++ = Py_hexdigits [(c >> 12 )& 0xf ];
959
- * outp ++ = Py_hexdigits [(c >> 8 ) & 0xf ];
957
+ * outp ++ = Py_hexdigits [(c >> 12 ) & 0xf ];
958
+ * outp ++ = Py_hexdigits [(c >> 8 ) & 0xf ];
960
959
}
961
- else
960
+ else {
962
961
* outp ++ = 'x' ;
963
- * outp ++ = Py_hexdigits [(c >>4 )& 0xf ];
964
- * outp ++ = Py_hexdigits [c & 0xf ];
962
+ }
963
+ * outp ++ = Py_hexdigits [(c >> 4 ) & 0xf ];
964
+ * outp ++ = Py_hexdigits [c & 0xf ];
965
965
}
966
-
967
966
assert (_PyUnicode_CheckConsistency (res , 1 ));
968
- Py_DECREF (object );
967
+ Py_DECREF (obj );
969
968
return Py_BuildValue ("(Nn)" , res , end );
970
969
}
971
970
0 commit comments