@@ -265,6 +265,8 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
265265/* Forward declaration */
266266static inline int
267267_PyUnicodeWriter_WriteCharInline (_PyUnicodeWriter * writer , Py_UCS4 ch );
268+ static inline void
269+ _PyUnicodeWriter_InitWithBuffer (_PyUnicodeWriter * writer , PyObject * buffer );
268270static PyObject *
269271unicode_encode_utf8 (PyObject * unicode , _Py_error_handler error_handler ,
270272 const char * errors );
@@ -4877,16 +4879,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
48774879 _Py_error_handler error_handler , const char * errors ,
48784880 Py_ssize_t * consumed )
48794881{
4880- _PyUnicodeWriter writer ;
4881- const char * starts = s ;
4882- const char * end = s + size ;
4883-
4884- Py_ssize_t startinpos ;
4885- Py_ssize_t endinpos ;
4886- const char * errmsg = "" ;
4887- PyObject * error_handler_obj = NULL ;
4888- PyObject * exc = NULL ;
4889-
48904882 if (size == 0 ) {
48914883 if (consumed )
48924884 * consumed = 0 ;
@@ -4900,13 +4892,29 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
49004892 return get_latin1_char ((unsigned char )s [0 ]);
49014893 }
49024894
4903- _PyUnicodeWriter_Init (& writer );
4904- writer .min_length = size ;
4905- if (_PyUnicodeWriter_Prepare (& writer , writer .min_length , 127 ) == -1 )
4906- goto onError ;
4895+ const char * starts = s ;
4896+ const char * end = s + size ;
4897+
4898+ // fast path: try ASCII string.
4899+ PyObject * u = PyUnicode_New (size , 127 );
4900+ if (u == NULL ) {
4901+ return NULL ;
4902+ }
4903+ s += ascii_decode (s , end , PyUnicode_DATA (u ));
4904+ if (s == end ) {
4905+ return u ;
4906+ }
4907+
4908+ // Use _PyUnicodeWriter after fast path is failed.
4909+ _PyUnicodeWriter writer ;
4910+ _PyUnicodeWriter_InitWithBuffer (& writer , u );
4911+ writer .pos = s - starts ;
4912+
4913+ Py_ssize_t startinpos , endinpos ;
4914+ const char * errmsg = "" ;
4915+ PyObject * error_handler_obj = NULL ;
4916+ PyObject * exc = NULL ;
49074917
4908- writer .pos = ascii_decode (s , end , writer .data );
4909- s += writer .pos ;
49104918 while (s < end ) {
49114919 Py_UCS4 ch ;
49124920 int kind = writer .kind ;
@@ -6451,7 +6459,7 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
64516459 length after conversion to the true value. (But decoding error
64526460 handler might have to resize the string) */
64536461 _PyUnicodeWriter_Init (& writer );
6454- writer .min_length = size ;
6462+ writer .min_length = size ;
64556463 if (_PyUnicodeWriter_Prepare (& writer , size , 127 ) < 0 ) {
64566464 goto onError ;
64576465 }
@@ -6975,13 +6983,7 @@ PyUnicode_DecodeASCII(const char *s,
69756983 const char * errors )
69766984{
69776985 const char * starts = s ;
6978- _PyUnicodeWriter writer ;
6979- int kind ;
6980- void * data ;
6981- Py_ssize_t startinpos ;
6982- Py_ssize_t endinpos ;
6983- Py_ssize_t outpos ;
6984- const char * e ;
6986+ const char * e = s + size ;
69856987 PyObject * error_handler_obj = NULL ;
69866988 PyObject * exc = NULL ;
69876989 _Py_error_handler error_handler = _Py_ERROR_UNKNOWN ;
@@ -6993,20 +6995,25 @@ PyUnicode_DecodeASCII(const char *s,
69936995 if (size == 1 && (unsigned char )s [0 ] < 128 )
69946996 return get_latin1_char ((unsigned char )s [0 ]);
69956997
6996- _PyUnicodeWriter_Init ( & writer );
6997- writer . min_length = size ;
6998- if (_PyUnicodeWriter_Prepare ( & writer , writer . min_length , 127 ) < 0 )
6998+ // Shortcut for simple case
6999+ PyObject * u = PyUnicode_New ( size , 127 ) ;
7000+ if (u == NULL ) {
69997001 return NULL ;
7002+ }
7003+ Py_ssize_t outpos = ascii_decode (s , e , PyUnicode_DATA (u ));
7004+ if (outpos == size ) {
7005+ return u ;
7006+ }
70007007
7001- e = s + size ;
7002- data = writer .data ;
7003- outpos = ascii_decode (s , e , (Py_UCS1 * )data );
7008+ _PyUnicodeWriter writer ;
7009+ _PyUnicodeWriter_InitWithBuffer (& writer , u );
70047010 writer .pos = outpos ;
7005- if (writer .pos == size )
7006- return _PyUnicodeWriter_Finish (& writer );
70077011
7008- s += writer .pos ;
7009- kind = writer .kind ;
7012+ s += outpos ;
7013+ int kind = writer .kind ;
7014+ void * data = writer .data ;
7015+ Py_ssize_t startinpos , endinpos ;
7016+
70107017 while (s < e ) {
70117018 unsigned char c = (unsigned char )* s ;
70127019 if (c < 128 ) {
@@ -13506,6 +13513,16 @@ _PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
1350613513 assert (writer -> kind <= PyUnicode_1BYTE_KIND );
1350713514}
1350813515
13516+ // Initialize _PyUnicodeWriter with initial buffer
13517+ static inline void
13518+ _PyUnicodeWriter_InitWithBuffer (_PyUnicodeWriter * writer , PyObject * buffer )
13519+ {
13520+ memset (writer , 0 , sizeof (* writer ));
13521+ writer -> buffer = buffer ;
13522+ _PyUnicodeWriter_Update (writer );
13523+ writer -> min_length = writer -> size ;
13524+ }
13525+
1350913526int
1351013527_PyUnicodeWriter_PrepareInternal (_PyUnicodeWriter * writer ,
1351113528 Py_ssize_t length , Py_UCS4 maxchar )
0 commit comments