@@ -2961,46 +2961,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
2961
2961
static PyObject *
2962
2962
UnicodeEncodeError_str (PyObject * self )
2963
2963
{
2964
- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
2964
+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
2965
2965
PyObject * result = NULL ;
2966
2966
PyObject * reason_str = NULL ;
2967
2967
PyObject * encoding_str = NULL ;
2968
2968
2969
- if (! uself -> object )
2969
+ if (exc -> object == NULL ) {
2970
2970
/* Not properly initialized. */
2971
2971
return PyUnicode_FromString ("" );
2972
+ }
2972
2973
2973
2974
/* Get reason and encoding as strings, which they might not be if
2974
2975
they've been modified after we were constructed. */
2975
- reason_str = PyObject_Str (uself -> reason );
2976
- if (reason_str == NULL )
2976
+ reason_str = PyObject_Str (exc -> reason );
2977
+ if (reason_str == NULL ) {
2977
2978
goto done ;
2978
- encoding_str = PyObject_Str (uself -> encoding );
2979
- if (encoding_str == NULL )
2979
+ }
2980
+ encoding_str = PyObject_Str (exc -> encoding );
2981
+ if (encoding_str == NULL ) {
2980
2982
goto done ;
2983
+ }
2984
+
2985
+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
2986
+ Py_ssize_t start = exc -> start , end = exc -> end ;
2981
2987
2982
- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
2983
- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
2988
+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
2989
+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
2984
2990
const char * fmt ;
2985
- if (badchar <= 0xff )
2991
+ if (badchar <= 0xff ) {
2986
2992
fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U" ;
2987
- else if (badchar <= 0xffff )
2993
+ }
2994
+ else if (badchar <= 0xffff ) {
2988
2995
fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U" ;
2989
- else
2996
+ }
2997
+ else {
2990
2998
fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U" ;
2999
+ }
2991
3000
result = PyUnicode_FromFormat (
2992
3001
fmt ,
2993
3002
encoding_str ,
2994
3003
(int )badchar ,
2995
- uself -> start ,
3004
+ start ,
2996
3005
reason_str );
2997
3006
}
2998
3007
else {
2999
3008
result = PyUnicode_FromFormat (
3000
3009
"'%U' codec can't encode characters in position %zd-%zd: %U" ,
3001
3010
encoding_str ,
3002
- uself -> start ,
3003
- uself -> end - 1 ,
3011
+ start ,
3012
+ end - 1 ,
3004
3013
reason_str );
3005
3014
}
3006
3015
done :
@@ -3074,41 +3083,46 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
3074
3083
static PyObject *
3075
3084
UnicodeDecodeError_str (PyObject * self )
3076
3085
{
3077
- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3086
+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
3078
3087
PyObject * result = NULL ;
3079
3088
PyObject * reason_str = NULL ;
3080
3089
PyObject * encoding_str = NULL ;
3081
3090
3082
- if (! uself -> object )
3091
+ if (exc -> object == NULL ) {
3083
3092
/* Not properly initialized. */
3084
3093
return PyUnicode_FromString ("" );
3094
+ }
3085
3095
3086
3096
/* Get reason and encoding as strings, which they might not be if
3087
3097
they've been modified after we were constructed. */
3088
- reason_str = PyObject_Str (uself -> reason );
3089
- if (reason_str == NULL )
3098
+ reason_str = PyObject_Str (exc -> reason );
3099
+ if (reason_str == NULL ) {
3090
3100
goto done ;
3091
- encoding_str = PyObject_Str (uself -> encoding );
3092
- if (encoding_str == NULL )
3101
+ }
3102
+ encoding_str = PyObject_Str (exc -> encoding );
3103
+ if (encoding_str == NULL ) {
3093
3104
goto done ;
3105
+ }
3106
+
3107
+ Py_ssize_t len = PyBytes_GET_SIZE (exc -> object );
3108
+ Py_ssize_t start = exc -> start , end = exc -> end ;
3094
3109
3095
- if (uself -> start < PyBytes_GET_SIZE ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3096
- int byte = (int )(PyBytes_AS_STRING ((( PyUnicodeErrorObject * ) self ) -> object )[uself -> start ]& 0xff );
3110
+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3111
+ int badbyte = (int )(PyBytes_AS_STRING (exc -> object )[start ] & 0xff );
3097
3112
result = PyUnicode_FromFormat (
3098
3113
"'%U' codec can't decode byte 0x%02x in position %zd: %U" ,
3099
3114
encoding_str ,
3100
- byte ,
3101
- uself -> start ,
3115
+ badbyte ,
3116
+ start ,
3102
3117
reason_str );
3103
3118
}
3104
3119
else {
3105
3120
result = PyUnicode_FromFormat (
3106
3121
"'%U' codec can't decode bytes in position %zd-%zd: %U" ,
3107
3122
encoding_str ,
3108
- uself -> start ,
3109
- uself -> end - 1 ,
3110
- reason_str
3111
- );
3123
+ start ,
3124
+ end - 1 ,
3125
+ reason_str );
3112
3126
}
3113
3127
done :
3114
3128
Py_XDECREF (reason_str );
@@ -3171,42 +3185,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
3171
3185
static PyObject *
3172
3186
UnicodeTranslateError_str (PyObject * self )
3173
3187
{
3174
- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3188
+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
3175
3189
PyObject * result = NULL ;
3176
3190
PyObject * reason_str = NULL ;
3177
3191
3178
- if (! uself -> object )
3192
+ if (exc -> object == NULL ) {
3179
3193
/* Not properly initialized. */
3180
3194
return PyUnicode_FromString ("" );
3195
+ }
3181
3196
3182
3197
/* Get reason as a string, which it might not be if it's been
3183
3198
modified after we were constructed. */
3184
- reason_str = PyObject_Str (uself -> reason );
3185
- if (reason_str == NULL )
3199
+ reason_str = PyObject_Str (exc -> reason );
3200
+ if (reason_str == NULL ) {
3186
3201
goto done ;
3202
+ }
3203
+
3204
+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
3205
+ Py_ssize_t start = exc -> start , end = exc -> end ;
3187
3206
3188
- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3189
- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
3207
+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3208
+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
3190
3209
const char * fmt ;
3191
- if (badchar <= 0xff )
3210
+ if (badchar <= 0xff ) {
3192
3211
fmt = "can't translate character '\\x%02x' in position %zd: %U" ;
3193
- else if (badchar <= 0xffff )
3212
+ }
3213
+ else if (badchar <= 0xffff ) {
3194
3214
fmt = "can't translate character '\\u%04x' in position %zd: %U" ;
3195
- else
3215
+ }
3216
+ else {
3196
3217
fmt = "can't translate character '\\U%08x' in position %zd: %U" ;
3218
+ }
3197
3219
result = PyUnicode_FromFormat (
3198
3220
fmt ,
3199
3221
(int )badchar ,
3200
- uself -> start ,
3201
- reason_str
3202
- );
3203
- } else {
3222
+ start ,
3223
+ reason_str );
3224
+ }
3225
+ else {
3204
3226
result = PyUnicode_FromFormat (
3205
3227
"can't translate characters in position %zd-%zd: %U" ,
3206
- uself -> start ,
3207
- uself -> end - 1 ,
3208
- reason_str
3209
- );
3228
+ start ,
3229
+ end - 1 ,
3230
+ reason_str );
3210
3231
}
3211
3232
done :
3212
3233
Py_XDECREF (reason_str );
0 commit comments