@@ -2959,46 +2959,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
2959
2959
static PyObject *
2960
2960
UnicodeEncodeError_str (PyObject * self )
2961
2961
{
2962
- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
2962
+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
2963
2963
PyObject * result = NULL ;
2964
2964
PyObject * reason_str = NULL ;
2965
2965
PyObject * encoding_str = NULL ;
2966
2966
2967
- if (! uself -> object )
2967
+ if (exc -> object == NULL ) {
2968
2968
/* Not properly initialized. */
2969
2969
return PyUnicode_FromString ("" );
2970
+ }
2970
2971
2971
2972
/* Get reason and encoding as strings, which they might not be if
2972
2973
they've been modified after we were constructed. */
2973
- reason_str = PyObject_Str (uself -> reason );
2974
- if (reason_str == NULL )
2974
+ reason_str = PyObject_Str (exc -> reason );
2975
+ if (reason_str == NULL ) {
2975
2976
goto done ;
2976
- encoding_str = PyObject_Str (uself -> encoding );
2977
- if (encoding_str == NULL )
2977
+ }
2978
+ encoding_str = PyObject_Str (exc -> encoding );
2979
+ if (encoding_str == NULL ) {
2978
2980
goto done ;
2981
+ }
2982
+
2983
+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
2984
+ Py_ssize_t start = exc -> start , end = exc -> end ;
2979
2985
2980
- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
2981
- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
2986
+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
2987
+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
2982
2988
const char * fmt ;
2983
- if (badchar <= 0xff )
2989
+ if (badchar <= 0xff ) {
2984
2990
fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U" ;
2985
- else if (badchar <= 0xffff )
2991
+ }
2992
+ else if (badchar <= 0xffff ) {
2986
2993
fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U" ;
2987
- else
2994
+ }
2995
+ else {
2988
2996
fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U" ;
2997
+ }
2989
2998
result = PyUnicode_FromFormat (
2990
2999
fmt ,
2991
3000
encoding_str ,
2992
3001
(int )badchar ,
2993
- uself -> start ,
3002
+ start ,
2994
3003
reason_str );
2995
3004
}
2996
3005
else {
2997
3006
result = PyUnicode_FromFormat (
2998
3007
"'%U' codec can't encode characters in position %zd-%zd: %U" ,
2999
3008
encoding_str ,
3000
- uself -> start ,
3001
- uself -> end - 1 ,
3009
+ start ,
3010
+ end - 1 ,
3002
3011
reason_str );
3003
3012
}
3004
3013
done :
@@ -3072,41 +3081,46 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
3072
3081
static PyObject *
3073
3082
UnicodeDecodeError_str (PyObject * self )
3074
3083
{
3075
- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3084
+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
3076
3085
PyObject * result = NULL ;
3077
3086
PyObject * reason_str = NULL ;
3078
3087
PyObject * encoding_str = NULL ;
3079
3088
3080
- if (! uself -> object )
3089
+ if (exc -> object == NULL ) {
3081
3090
/* Not properly initialized. */
3082
3091
return PyUnicode_FromString ("" );
3092
+ }
3083
3093
3084
3094
/* Get reason and encoding as strings, which they might not be if
3085
3095
they've been modified after we were constructed. */
3086
- reason_str = PyObject_Str (uself -> reason );
3087
- if (reason_str == NULL )
3096
+ reason_str = PyObject_Str (exc -> reason );
3097
+ if (reason_str == NULL ) {
3088
3098
goto done ;
3089
- encoding_str = PyObject_Str (uself -> encoding );
3090
- if (encoding_str == NULL )
3099
+ }
3100
+ encoding_str = PyObject_Str (exc -> encoding );
3101
+ if (encoding_str == NULL ) {
3091
3102
goto done ;
3103
+ }
3104
+
3105
+ Py_ssize_t len = PyBytes_GET_SIZE (exc -> object );
3106
+ Py_ssize_t start = exc -> start , end = exc -> end ;
3092
3107
3093
- if (uself -> start < PyBytes_GET_SIZE ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3094
- int byte = (int )(PyBytes_AS_STRING ((( PyUnicodeErrorObject * ) self ) -> object )[uself -> start ]& 0xff );
3108
+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3109
+ int badbyte = (int )(PyBytes_AS_STRING (exc -> object )[start ] & 0xff );
3095
3110
result = PyUnicode_FromFormat (
3096
3111
"'%U' codec can't decode byte 0x%02x in position %zd: %U" ,
3097
3112
encoding_str ,
3098
- byte ,
3099
- uself -> start ,
3113
+ badbyte ,
3114
+ start ,
3100
3115
reason_str );
3101
3116
}
3102
3117
else {
3103
3118
result = PyUnicode_FromFormat (
3104
3119
"'%U' codec can't decode bytes in position %zd-%zd: %U" ,
3105
3120
encoding_str ,
3106
- uself -> start ,
3107
- uself -> end - 1 ,
3108
- reason_str
3109
- );
3121
+ start ,
3122
+ end - 1 ,
3123
+ reason_str );
3110
3124
}
3111
3125
done :
3112
3126
Py_XDECREF (reason_str );
@@ -3169,42 +3183,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
3169
3183
static PyObject *
3170
3184
UnicodeTranslateError_str (PyObject * self )
3171
3185
{
3172
- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3186
+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
3173
3187
PyObject * result = NULL ;
3174
3188
PyObject * reason_str = NULL ;
3175
3189
3176
- if (! uself -> object )
3190
+ if (exc -> object == NULL ) {
3177
3191
/* Not properly initialized. */
3178
3192
return PyUnicode_FromString ("" );
3193
+ }
3179
3194
3180
3195
/* Get reason as a string, which it might not be if it's been
3181
3196
modified after we were constructed. */
3182
- reason_str = PyObject_Str (uself -> reason );
3183
- if (reason_str == NULL )
3197
+ reason_str = PyObject_Str (exc -> reason );
3198
+ if (reason_str == NULL ) {
3184
3199
goto done ;
3200
+ }
3201
+
3202
+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
3203
+ Py_ssize_t start = exc -> start , end = exc -> end ;
3185
3204
3186
- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3187
- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
3205
+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3206
+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
3188
3207
const char * fmt ;
3189
- if (badchar <= 0xff )
3208
+ if (badchar <= 0xff ) {
3190
3209
fmt = "can't translate character '\\x%02x' in position %zd: %U" ;
3191
- else if (badchar <= 0xffff )
3210
+ }
3211
+ else if (badchar <= 0xffff ) {
3192
3212
fmt = "can't translate character '\\u%04x' in position %zd: %U" ;
3193
- else
3213
+ }
3214
+ else {
3194
3215
fmt = "can't translate character '\\U%08x' in position %zd: %U" ;
3216
+ }
3195
3217
result = PyUnicode_FromFormat (
3196
3218
fmt ,
3197
3219
(int )badchar ,
3198
- uself -> start ,
3199
- reason_str
3200
- );
3201
- } else {
3220
+ start ,
3221
+ reason_str );
3222
+ }
3223
+ else {
3202
3224
result = PyUnicode_FromFormat (
3203
3225
"can't translate characters in position %zd-%zd: %U" ,
3204
- uself -> start ,
3205
- uself -> end - 1 ,
3206
- reason_str
3207
- );
3226
+ start ,
3227
+ end - 1 ,
3228
+ reason_str );
3208
3229
}
3209
3230
done :
3210
3231
Py_XDECREF (reason_str );
0 commit comments