@@ -11,6 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
11
11
#include "Python.h"
12
12
#include "pycore_call.h" // _PyObject_CallNoArgs()
13
13
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
14
+ #include "pycore_lock.h" // PyMutex
14
15
#include "pycore_pyerrors.h" // _PyErr_FormatNote()
15
16
#include "pycore_pystate.h" // _PyInterpreterState_GET()
16
17
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
@@ -19,24 +20,10 @@ const char *Py_hexdigits = "0123456789abcdef";
19
20
20
21
/* --- Codec Registry ----------------------------------------------------- */
21
22
22
- /* Import the standard encodings package which will register the first
23
- codec search function.
24
-
25
- This is done in a lazy way so that the Unicode implementation does
26
- not downgrade startup time of scripts not needing it.
27
-
28
- ImportErrors are silently ignored by this function. Only one try is
29
- made.
30
-
31
- */
32
-
33
- static int _PyCodecRegistry_Init (void ); /* Forward */
34
-
35
23
int PyCodec_Register (PyObject * search_function )
36
24
{
37
25
PyInterpreterState * interp = _PyInterpreterState_GET ();
38
- if (interp -> codec_search_path == NULL && _PyCodecRegistry_Init ())
39
- goto onError ;
26
+ assert (interp -> codecs .initialized );
40
27
if (search_function == NULL ) {
41
28
PyErr_BadArgument ();
42
29
goto onError ;
@@ -45,7 +32,14 @@ int PyCodec_Register(PyObject *search_function)
45
32
PyErr_SetString (PyExc_TypeError , "argument must be callable" );
46
33
goto onError ;
47
34
}
48
- return PyList_Append (interp -> codec_search_path , search_function );
35
+ #ifdef Py_GIL_DISABLED
36
+ PyMutex_Lock (& interp -> codecs .search_path_mutex );
37
+ #endif
38
+ int ret = PyList_Append (interp -> codecs .search_path , search_function );
39
+ #ifdef Py_GIL_DISABLED
40
+ PyMutex_Unlock (& interp -> codecs .search_path_mutex );
41
+ #endif
42
+ return ret ;
49
43
50
44
onError :
51
45
return -1 ;
55
49
PyCodec_Unregister (PyObject * search_function )
56
50
{
57
51
PyInterpreterState * interp = _PyInterpreterState_GET ();
58
- PyObject * codec_search_path = interp -> codec_search_path ;
59
- /* Do nothing if codec_search_path is not created yet or was cleared. */
60
- if ( codec_search_path == NULL ) {
52
+ if ( interp -> codecs . initialized != 1 ) {
53
+ /* Do nothing if codecs state was cleared (only possible during
54
+ interpreter shutdown). */
61
55
return 0 ;
62
56
}
63
57
58
+ PyObject * codec_search_path = interp -> codecs .search_path ;
64
59
assert (PyList_CheckExact (codec_search_path ));
65
- Py_ssize_t n = PyList_GET_SIZE (codec_search_path );
66
- for (Py_ssize_t i = 0 ; i < n ; i ++ ) {
67
- PyObject * item = PyList_GET_ITEM (codec_search_path , i );
60
+ for (Py_ssize_t i = 0 ; i < PyList_GET_SIZE (codec_search_path ); i ++ ) {
61
+ #ifdef Py_GIL_DISABLED
62
+ PyMutex_Lock (& interp -> codecs .search_path_mutex );
63
+ #endif
64
+ PyObject * item = PyList_GetItemRef (codec_search_path , i );
65
+ int ret = 1 ;
68
66
if (item == search_function ) {
69
- if (interp -> codec_search_cache != NULL ) {
70
- assert (PyDict_CheckExact (interp -> codec_search_cache ));
71
- PyDict_Clear (interp -> codec_search_cache );
72
- }
73
- return PyList_SetSlice (codec_search_path , i , i + 1 , NULL );
67
+ // We hold a reference to the item, so its destructor can't run
68
+ // while we hold search_path_mutex.
69
+ ret = PyList_SetSlice (codec_search_path , i , i + 1 , NULL );
70
+ }
71
+ #ifdef Py_GIL_DISABLED
72
+ PyMutex_Unlock (& interp -> codecs .search_path_mutex );
73
+ #endif
74
+ Py_DECREF (item );
75
+ if (ret != 1 ) {
76
+ assert (interp -> codecs .search_cache != NULL );
77
+ assert (PyDict_CheckExact (interp -> codecs .search_cache ));
78
+ PyDict_Clear (interp -> codecs .search_cache );
79
+ return ret ;
74
80
}
75
81
}
76
82
return 0 ;
@@ -132,9 +138,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
132
138
}
133
139
134
140
PyInterpreterState * interp = _PyInterpreterState_GET ();
135
- if (interp -> codec_search_path == NULL && _PyCodecRegistry_Init ()) {
136
- return NULL ;
137
- }
141
+ assert (interp -> codecs .initialized );
138
142
139
143
/* Convert the encoding to a normalized Python string: all
140
144
characters are converted to lower case, spaces and hyphens are
@@ -147,7 +151,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
147
151
148
152
/* First, try to lookup the name in the registry dictionary */
149
153
PyObject * result ;
150
- if (PyDict_GetItemRef (interp -> codec_search_cache , v , & result ) < 0 ) {
154
+ if (PyDict_GetItemRef (interp -> codecs . search_cache , v , & result ) < 0 ) {
151
155
goto onError ;
152
156
}
153
157
if (result != NULL ) {
@@ -156,7 +160,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
156
160
}
157
161
158
162
/* Next, scan the search functions in order of registration */
159
- const Py_ssize_t len = PyList_Size (interp -> codec_search_path );
163
+ const Py_ssize_t len = PyList_Size (interp -> codecs . search_path );
160
164
if (len < 0 )
161
165
goto onError ;
162
166
if (len == 0 ) {
@@ -170,14 +174,15 @@ PyObject *_PyCodec_Lookup(const char *encoding)
170
174
for (i = 0 ; i < len ; i ++ ) {
171
175
PyObject * func ;
172
176
173
- func = PyList_GetItem (interp -> codec_search_path , i );
177
+ func = PyList_GetItemRef (interp -> codecs . search_path , i );
174
178
if (func == NULL )
175
179
goto onError ;
176
180
result = PyObject_CallOneArg (func , v );
181
+ Py_DECREF (func );
177
182
if (result == NULL )
178
183
goto onError ;
179
184
if (result == Py_None ) {
180
- Py_DECREF (result );
185
+ Py_CLEAR (result );
181
186
continue ;
182
187
}
183
188
if (!PyTuple_Check (result ) || PyTuple_GET_SIZE (result ) != 4 ) {
@@ -188,15 +193,15 @@ PyObject *_PyCodec_Lookup(const char *encoding)
188
193
}
189
194
break ;
190
195
}
191
- if (i == len ) {
196
+ if (result == NULL ) {
192
197
/* XXX Perhaps we should cache misses too ? */
193
198
PyErr_Format (PyExc_LookupError ,
194
199
"unknown encoding: %s" , encoding );
195
200
goto onError ;
196
201
}
197
202
198
203
/* Cache and return the result */
199
- if (PyDict_SetItem (interp -> codec_search_cache , v , result ) < 0 ) {
204
+ if (PyDict_SetItem (interp -> codecs . search_cache , v , result ) < 0 ) {
200
205
Py_DECREF (result );
201
206
goto onError ;
202
207
}
@@ -600,13 +605,12 @@ PyObject *_PyCodec_DecodeText(PyObject *object,
600
605
int PyCodec_RegisterError (const char * name , PyObject * error )
601
606
{
602
607
PyInterpreterState * interp = _PyInterpreterState_GET ();
603
- if (interp -> codec_search_path == NULL && _PyCodecRegistry_Init ())
604
- return -1 ;
608
+ assert (interp -> codecs .initialized );
605
609
if (!PyCallable_Check (error )) {
606
610
PyErr_SetString (PyExc_TypeError , "handler must be callable" );
607
611
return -1 ;
608
612
}
609
- return PyDict_SetItemString (interp -> codec_error_registry ,
613
+ return PyDict_SetItemString (interp -> codecs . error_registry ,
610
614
name , error );
611
615
}
612
616
@@ -616,13 +620,12 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
616
620
PyObject * PyCodec_LookupError (const char * name )
617
621
{
618
622
PyInterpreterState * interp = _PyInterpreterState_GET ();
619
- if (interp -> codec_search_path == NULL && _PyCodecRegistry_Init ())
620
- return NULL ;
623
+ assert (interp -> codecs .initialized );
621
624
622
625
if (name == NULL )
623
626
name = "strict" ;
624
627
PyObject * handler ;
625
- if (PyDict_GetItemStringRef (interp -> codec_error_registry , name , & handler ) < 0 ) {
628
+ if (PyDict_GetItemStringRef (interp -> codecs . error_registry , name , & handler ) < 0 ) {
626
629
return NULL ;
627
630
}
628
631
if (handler == NULL ) {
@@ -1375,7 +1378,8 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
1375
1378
return PyCodec_SurrogateEscapeErrors (exc );
1376
1379
}
1377
1380
1378
- static int _PyCodecRegistry_Init (void )
1381
+ PyStatus
1382
+ _PyCodec_InitRegistry (PyInterpreterState * interp )
1379
1383
{
1380
1384
static struct {
1381
1385
const char * name ;
@@ -1463,45 +1467,51 @@ static int _PyCodecRegistry_Init(void)
1463
1467
}
1464
1468
};
1465
1469
1466
- PyInterpreterState * interp = _PyInterpreterState_GET ();
1467
- PyObject * mod ;
1468
-
1469
- if (interp -> codec_search_path != NULL )
1470
- return 0 ;
1471
-
1472
- interp -> codec_search_path = PyList_New (0 );
1473
- if (interp -> codec_search_path == NULL ) {
1474
- return -1 ;
1470
+ assert (interp -> codecs .initialized == 0 );
1471
+ interp -> codecs .search_path = PyList_New (0 );
1472
+ if (interp -> codecs .search_path == NULL ) {
1473
+ return PyStatus_NoMemory ();
1475
1474
}
1476
-
1477
- interp -> codec_search_cache = PyDict_New ();
1478
- if (interp -> codec_search_cache == NULL ) {
1479
- return -1 ;
1475
+ interp -> codecs .search_cache = PyDict_New ();
1476
+ if (interp -> codecs .search_cache == NULL ) {
1477
+ return PyStatus_NoMemory ();
1480
1478
}
1481
-
1482
- interp -> codec_error_registry = PyDict_New ();
1483
- if (interp -> codec_error_registry == NULL ) {
1484
- return -1 ;
1479
+ interp -> codecs .error_registry = PyDict_New ();
1480
+ if (interp -> codecs .error_registry == NULL ) {
1481
+ return PyStatus_NoMemory ();
1485
1482
}
1486
-
1487
1483
for (size_t i = 0 ; i < Py_ARRAY_LENGTH (methods ); ++ i ) {
1488
1484
PyObject * func = PyCFunction_NewEx (& methods [i ].def , NULL , NULL );
1489
- if (! func ) {
1490
- return -1 ;
1485
+ if (func == NULL ) {
1486
+ return PyStatus_NoMemory () ;
1491
1487
}
1492
1488
1493
- int res = PyCodec_RegisterError (methods [i ].name , func );
1489
+ int res = PyDict_SetItemString (interp -> codecs .error_registry ,
1490
+ methods [i ].name , func );
1494
1491
Py_DECREF (func );
1495
- if (res ) {
1496
- return -1 ;
1492
+ if (res < 0 ) {
1493
+ return PyStatus_Error ( "Failed to insert into codec error registry" ) ;
1497
1494
}
1498
1495
}
1499
1496
1500
- mod = PyImport_ImportModule ("encodings" );
1497
+ interp -> codecs .initialized = 1 ;
1498
+
1499
+ // Importing `encodings' will call back into this module to register codec
1500
+ // search functions, so this is done after everything else is initialized.
1501
+ PyObject * mod = PyImport_ImportModule ("encodings" );
1501
1502
if (mod == NULL ) {
1502
- return -1 ;
1503
+ return PyStatus_Error ( "Failed to import encodings module" ) ;
1503
1504
}
1504
1505
Py_DECREF (mod );
1505
- interp -> codecs_initialized = 1 ;
1506
- return 0 ;
1506
+
1507
+ return PyStatus_Ok ();
1508
+ }
1509
+
1510
+ void
1511
+ _PyCodec_Fini (PyInterpreterState * interp )
1512
+ {
1513
+ Py_CLEAR (interp -> codecs .search_path );
1514
+ Py_CLEAR (interp -> codecs .search_cache );
1515
+ Py_CLEAR (interp -> codecs .error_registry );
1516
+ interp -> codecs .initialized = 0 ;
1507
1517
}
0 commit comments