@@ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730
730
}
731
731
732
732
733
+ /*
734
+ * Create a Unicode string containing 'count' copies of the official
735
+ * Unicode REPLACEMENT CHARACTER (0xFFFD).
736
+ */
737
+ static PyObject *
738
+ codec_handler_unicode_replacement_character (Py_ssize_t count )
739
+ {
740
+ PyObject * res = PyUnicode_New (count , Py_UNICODE_REPLACEMENT_CHARACTER );
741
+ if (res == NULL ) {
742
+ return NULL ;
743
+ }
744
+ assert (count == 0 || PyUnicode_KIND (res ) == PyUnicode_2BYTE_KIND );
745
+ Py_UCS2 * outp = PyUnicode_2BYTE_DATA (res );
746
+ for (Py_ssize_t i = 0 ; i < count ; ++ i ) {
747
+ outp [i ] = Py_UNICODE_REPLACEMENT_CHARACTER ;
748
+ }
749
+ assert (_PyUnicode_CheckConsistency (res , 1 ));
750
+ return res ;
751
+ }
752
+
753
+
733
754
// --- handler: 'strict' ------------------------------------------------------
734
755
735
756
PyObject * PyCodec_StrictErrors (PyObject * exc )
@@ -774,50 +795,71 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
774
795
}
775
796
776
797
777
- PyObject * PyCodec_ReplaceErrors (PyObject * exc )
798
+ // --- handler: 'replace' -----------------------------------------------------
799
+
800
+ static PyObject *
801
+ _PyCodec_ReplaceUnicodeEncodeError (PyObject * exc )
778
802
{
779
803
Py_ssize_t start , end , slen ;
804
+ if (_PyUnicodeError_GetParams (exc , NULL , NULL ,
805
+ & start , & end , & slen , false) < 0 )
806
+ {
807
+ return NULL ;
808
+ }
809
+ PyObject * res = PyUnicode_New (slen , '?' );
810
+ if (res == NULL ) {
811
+ return NULL ;
812
+ }
813
+ assert (PyUnicode_KIND (res ) == PyUnicode_1BYTE_KIND );
814
+ Py_UCS1 * outp = PyUnicode_1BYTE_DATA (res );
815
+ memset (outp , '?' , sizeof (Py_UCS1 ) * slen );
816
+ assert (_PyUnicode_CheckConsistency (res , 1 ));
817
+ return Py_BuildValue ("(Nn)" , res , end );
818
+ }
780
819
781
- if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )) {
782
- if (_PyUnicodeError_GetParams (exc , NULL , NULL ,
783
- & start , & end , & slen , false) < 0 ) {
784
- return NULL ;
785
- }
786
- PyObject * res = PyUnicode_New (slen , '?' );
787
- if (res == NULL ) {
788
- return NULL ;
789
- }
790
- assert (PyUnicode_KIND (res ) == PyUnicode_1BYTE_KIND );
791
- Py_UCS1 * outp = PyUnicode_1BYTE_DATA (res );
792
- memset (outp , '?' , sizeof (Py_UCS1 ) * slen );
793
- assert (_PyUnicode_CheckConsistency (res , 1 ));
794
- return Py_BuildValue ("(Nn)" , res , end );
820
+
821
+ static PyObject *
822
+ _PyCodec_ReplaceUnicodeDecodeError (PyObject * exc )
823
+ {
824
+ Py_ssize_t end ;
825
+ if (PyUnicodeDecodeError_GetEnd (exc , & end ) < 0 ) {
826
+ return NULL ;
795
827
}
796
- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeDecodeError )) {
797
- if (_PyUnicodeError_GetParams (exc , NULL , NULL ,
798
- NULL , & end , NULL , true) < 0 ) {
799
- return NULL ;
800
- }
801
- return Py_BuildValue ("(Cn)" ,
802
- (int )Py_UNICODE_REPLACEMENT_CHARACTER ,
803
- end );
828
+ PyObject * res = codec_handler_unicode_replacement_character (1 );
829
+ if (res == NULL ) {
830
+ return NULL ;
804
831
}
805
- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeTranslateError )) {
806
- if (_PyUnicodeError_GetParams (exc , NULL , NULL ,
807
- & start , & end , & slen , false) < 0 ) {
808
- return NULL ;
809
- }
810
- PyObject * res = PyUnicode_New (slen , Py_UNICODE_REPLACEMENT_CHARACTER );
811
- if (res == NULL ) {
812
- return NULL ;
813
- }
814
- assert (slen == 0 || PyUnicode_KIND (res ) == PyUnicode_2BYTE_KIND );
815
- Py_UCS2 * outp = PyUnicode_2BYTE_DATA (res );
816
- for (Py_ssize_t i = 0 ; i < slen ; ++ i ) {
817
- outp [i ] = Py_UNICODE_REPLACEMENT_CHARACTER ;
818
- }
819
- assert (_PyUnicode_CheckConsistency (res , 1 ));
820
- return Py_BuildValue ("(Nn)" , res , end );
832
+ return Py_BuildValue ("(Nn)" , res , end );
833
+ }
834
+
835
+
836
+ static PyObject *
837
+ _PyCodec_ReplaceUnicodeTranslateError (PyObject * exc )
838
+ {
839
+ Py_ssize_t start , end , slen ;
840
+ if (_PyUnicodeError_GetParams (exc , NULL , NULL ,
841
+ & start , & end , & slen , false) < 0 )
842
+ {
843
+ return NULL ;
844
+ }
845
+ PyObject * res = codec_handler_unicode_replacement_character (slen );
846
+ if (res == NULL ) {
847
+ return NULL ;
848
+ }
849
+ return Py_BuildValue ("(Nn)" , res , end );
850
+ }
851
+
852
+
853
+ PyObject * PyCodec_ReplaceErrors (PyObject * exc )
854
+ {
855
+ if (_PyIsUnicodeEncodeError (exc )) {
856
+ return _PyCodec_ReplaceUnicodeEncodeError (exc );
857
+ }
858
+ else if (_PyIsUnicodeDecodeError (exc )) {
859
+ return _PyCodec_ReplaceUnicodeDecodeError (exc );
860
+ }
861
+ else if (_PyIsUnicodeTranslateError (exc )) {
862
+ return _PyCodec_ReplaceUnicodeTranslateError (exc );
821
863
}
822
864
else {
823
865
wrong_exception_type (exc );
@@ -1468,7 +1510,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc)
1468
1510
}
1469
1511
1470
1512
1471
- static PyObject * replace_errors (PyObject * self , PyObject * exc )
1513
+ static inline PyObject *
1514
+ replace_errors (PyObject * Py_UNUSED (self ), PyObject * exc )
1472
1515
{
1473
1516
return PyCodec_ReplaceErrors (exc );
1474
1517
}
0 commit comments