From 546b934ae1d5fd72c9806f7242d2218f80bac740 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 21 Mar 2023 16:19:33 -0600 Subject: [PATCH 01/19] Factor out add_threadstate(). --- Python/pystate.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/Python/pystate.c b/Python/pystate.c index b17efdbefd124c..748f35b8fe4602 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1217,8 +1217,7 @@ free_threadstate(PyThreadState *tstate) static void init_threadstate(PyThreadState *tstate, - PyInterpreterState *interp, uint64_t id, - PyThreadState *next) + PyInterpreterState *interp, uint64_t id) { if (tstate->_status.initialized) { Py_FatalError("thread state already initialized"); @@ -1227,18 +1226,13 @@ init_threadstate(PyThreadState *tstate, assert(interp != NULL); tstate->interp = interp; + // next/prev are set in add_threadstate(). + assert(tstate->next == NULL); + assert(tstate->prev == NULL); + assert(id > 0); tstate->id = id; - assert(interp->threads.head == tstate); - assert((next != NULL && id != 1) || (next == NULL && id == 1)); - if (next != NULL) { - assert(next->prev == NULL || next->prev == tstate); - next->prev = tstate; - } - tstate->next = next; - assert(tstate->prev == NULL); - // thread_id and native_thread_id are set in bind_tstate(). tstate->py_recursion_limit = interp->ceval.recursion_limit, @@ -1259,6 +1253,22 @@ init_threadstate(PyThreadState *tstate, tstate->_status.initialized = 1; } +static void +add_threadstate(PyInterpreterState *interp, PyThreadState *tstate, + PyThreadState *next) +{ + assert(interp->threads.head != tstate); + assert((next != NULL && tstate->id != 1) || + (next == NULL && tstate->id == 1)); + if (next != NULL) { + assert(next->prev == NULL || next->prev == tstate); + next->prev = tstate; + } + tstate->next = next; + assert(tstate->prev == NULL); + interp->threads.head = tstate; +} + static PyThreadState * new_threadstate(PyInterpreterState *interp) { @@ -1298,9 +1308,9 @@ new_threadstate(PyInterpreterState *interp) &initial._main_interpreter._initial_thread, sizeof(*tstate)); } - interp->threads.head = tstate; - init_threadstate(tstate, interp, id, old_head); + init_threadstate(tstate, interp, id); + add_threadstate(interp, tstate, old_head); HEAD_UNLOCK(runtime); if (!used_newtstate) { From 0bcd136ce103558500e70360e75b7d861debfdf2 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 21 Mar 2023 16:30:04 -0600 Subject: [PATCH 02/19] Add _PyThreadState_InitDetached(). --- Include/internal/pycore_pystate.h | 1 + Python/pystate.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 7046ec8d9adaaf..00d7d219bd535e 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -126,6 +126,7 @@ PyAPI_FUNC(void) _PyThreadState_Bind(PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_Init( PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate); +extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *); static inline void diff --git a/Python/pystate.c b/Python/pystate.c index 748f35b8fe4602..82456d255079ad 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1357,6 +1357,20 @@ _PyThreadState_Init(PyThreadState *tstate) Py_FatalError("_PyThreadState_Init() is for internal use only"); } +void +_PyThreadState_InitDetached(PyThreadState *tstate, PyInterpreterState *interp) +{ + _PyRuntimeState *runtime = interp->runtime; + + HEAD_LOCK(runtime); + interp->threads.next_unique_id += 1; + uint64_t id = interp->threads.next_unique_id; + HEAD_UNLOCK(runtime); + + init_threadstate(tstate, interp, id); + // We do not call add_threadstate(). +} + void PyThreadState_Clear(PyThreadState *tstate) { From 35d531064e2210b68d0bbc20f5903750251b0d04 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 21 Mar 2023 16:46:31 -0600 Subject: [PATCH 03/19] Add _PyThreadState_ClearDetached(). --- Include/internal/pycore_pystate.h | 2 ++ Python/pystate.c | 36 ++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 00d7d219bd535e..3316110a72c204 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -126,7 +126,9 @@ PyAPI_FUNC(void) _PyThreadState_Bind(PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_Init( PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate); + extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *); +extern void _PyThreadState_ClearDetached(PyThreadState *); static inline void diff --git a/Python/pystate.c b/Python/pystate.c index 82456d255079ad..290dd7a870166b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1371,6 +1371,19 @@ _PyThreadState_InitDetached(PyThreadState *tstate, PyInterpreterState *interp) // We do not call add_threadstate(). } + +static void +clear_datastack(PyThreadState *tstate) +{ + _PyStackChunk *chunk = tstate->datastack_chunk; + tstate->datastack_chunk = NULL; + while (chunk != NULL) { + _PyStackChunk *prev = chunk->previous; + _PyObject_VirtualFree(chunk, chunk->size); + chunk = prev; + } +} + void PyThreadState_Clear(PyThreadState *tstate) { @@ -1445,7 +1458,6 @@ PyThreadState_Clear(PyThreadState *tstate) // XXX Do it as early in the function as possible. } - /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ static void tstate_delete_common(PyThreadState *tstate) @@ -1478,17 +1490,25 @@ tstate_delete_common(PyThreadState *tstate) unbind_tstate(tstate); // XXX Move to PyThreadState_Clear()? - _PyStackChunk *chunk = tstate->datastack_chunk; - tstate->datastack_chunk = NULL; - while (chunk != NULL) { - _PyStackChunk *prev = chunk->previous; - _PyObject_VirtualFree(chunk, chunk->size); - chunk = prev; - } + clear_datastack(tstate); tstate->_status.finalized = 1; } +void +_PyThreadState_ClearDetached(PyThreadState *tstate) +{ + assert(!tstate->_status.bound); + assert(!tstate->_status.bound_gilstate); + assert(tstate->datastack_chunk == NULL); + assert(tstate->thread_id == 0); + assert(tstate->native_thread_id == 0); + assert(tstate->next == NULL); + assert(tstate->prev == NULL); + + PyThreadState_Clear(tstate); + clear_datastack(tstate); +} static void zapthreads(PyInterpreterState *interp) From cba9e34b991f77d28510dc63c636896a21c257fd Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 21 Mar 2023 16:30:24 -0600 Subject: [PATCH 04/19] Add _PyRuntime.cached_objects.main_tstate. --- Include/internal/pycore_global_objects.h | 4 ++++ Include/internal/pycore_runtime_init.h | 3 +++ Python/pylifecycle.c | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 9957da1fc5f22a..858321d67df481 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -28,6 +28,10 @@ extern "C" { struct _Py_cached_objects { PyObject *interned_strings; + /* A thread state tied to the main interpreter, + used exclusively for when a global object (e.g. interned strings) + is resized (i.e. deallocated + allocated) from an arbitrary thread. */ + PyThreadState main_tstate; }; #define _Py_GLOBAL_OBJECT(NAME) \ diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index bdecac944dfd3a..b408ca0fff866f 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -58,6 +58,9 @@ extern PyTypeObject _PyExc_MemoryError; .types = { \ .next_version_tag = 1, \ }, \ + .cached_objects = { \ + .main_tstate = _PyThreadState_INIT, \ + }, \ .static_objects = { \ .singletons = { \ .small_ints = _Py_small_ints_INIT, \ diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index d0e85519d23464..1658b6c1ebe8ac 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -635,6 +635,8 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + _PyThreadState_InitDetached(&runtime->cached_objects.main_tstate, interp); + *tstate_p = tstate; return _PyStatus_OK(); } @@ -1928,6 +1930,8 @@ Py_FinalizeEx(void) // XXX Do this sooner during finalization. // XXX Ensure finalizer errors are handled properly. + _PyThreadState_ClearDetached(&runtime->cached_objects.main_tstate); + finalize_interp_clear(tstate); finalize_interp_delete(tstate->interp); From 3b1bb8bd677630e3f905896bb84a5fa2e0cb616b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 22 Mar 2023 10:11:37 -0600 Subject: [PATCH 05/19] Add _Py_AcquireGlobalObjectsState() and _Py_ReleaseGlobalObjectsState(). --- Include/internal/pycore_pystate.h | 3 ++ Python/pystate.c | 75 +++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 3316110a72c204..7e98456e38947f 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -130,6 +130,9 @@ PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate); extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *); extern void _PyThreadState_ClearDetached(PyThreadState *); +extern PyThreadState * _Py_AcquireGlobalObjectsState(PyInterpreterState *); +extern void _Py_ReleaseGlobalObjectsState(PyThreadState *); + static inline void _PyThreadState_UpdateTracingState(PyThreadState *tstate) diff --git a/Python/pystate.c b/Python/pystate.c index 290dd7a870166b..dbbe446e719868 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -565,6 +565,81 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) #endif +//--------------- +// global objects +//--------------- + +static PyThreadState * +bind_global_objects_state(_PyRuntimeState *runtime) +{ + PyThreadState *main_tstate = &runtime->cached_objects.main_tstate; + + bind_tstate(main_tstate); + /* Unlike _PyThreadState_Bind(), we do not modify gilstate TSS. */ + + return main_tstate; +} + +static void +unbind_global_objects_state(_PyRuntimeState *runtime) +{ + PyThreadState *main_tstate = &runtime->cached_objects.main_tstate; + assert(tstate_is_alive(main_tstate)); + assert(!main_tstate->_status.active); + assert(gilstate_tss_get(runtime) != main_tstate); + + unbind_tstate(main_tstate); + + /* This thread state may be bound/unbound repeatedly, + so we must erase evidence that it was ever bound (or unbound). */ + main_tstate->_status.bound = 0; + main_tstate->_status.unbound = 0; + + /* We must fully unlink the thread state from any OS thread, + to allow it to be bound more than once. */ + main_tstate->thread_id = 0; +#ifdef PY_HAVE_THREAD_NATIVE_ID + main_tstate->native_thread_id = 0; +#endif +} + +PyThreadState * +_Py_AcquireGlobalObjectsState(PyInterpreterState *interp) +{ + _PyRuntimeState *runtime = &_PyRuntime; + assert(interp != NULL); + assert(interp->runtime == runtime); + + PyThreadState *oldts = NULL; + /* All global objects are owned by the main interpreter. */ + if (!_Py_IsMainInterpreter(interp)) { + PyThreadState *main_tstate = bind_global_objects_state(runtime); + + oldts = _PyThreadState_Swap(runtime, main_tstate); + assert(oldts != NULL); + + unbind_global_objects_state(runtime); + } + + return oldts; +} + +void +_Py_ReleaseGlobalObjectsState(PyThreadState *oldts) +{ + if (oldts != NULL) { + /* The thread state was swapped in _Py_AcquireGlobalObjectsState(), + because the main interpreter wasn't running in the OS thread.. */ + _PyRuntimeState *runtime = &_PyRuntime; + assert(oldts->interp->runtime == runtime); + assert(!_Py_IsMainInterpreter(oldts->interp)); + + // The returned tstate should be _PyRuntime.cached_objects.main_tstate. + _PyThreadState_Swap(runtime, oldts); + } +} + + /*************************************/ /* the per-interpreter runtime state */ /*************************************/ From eb42aa1a83df5e29dc73dad442ae7306f92cca9d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 22 Mar 2023 10:51:16 -0600 Subject: [PATCH 06/19] Add _Py_AddToGlobalDict(). --- Include/internal/pycore_pystate.h | 2 ++ Python/pystate.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 7e98456e38947f..aba85e7779daae 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -133,6 +133,8 @@ extern void _PyThreadState_ClearDetached(PyThreadState *); extern PyThreadState * _Py_AcquireGlobalObjectsState(PyInterpreterState *); extern void _Py_ReleaseGlobalObjectsState(PyThreadState *); +extern PyObject * _Py_AddToGlobalDict(PyObject *, PyObject *, PyObject *); + static inline void _PyThreadState_UpdateTracingState(PyThreadState *tstate) diff --git a/Python/pystate.c b/Python/pystate.c index dbbe446e719868..a09e31d52e4062 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -639,6 +639,36 @@ _Py_ReleaseGlobalObjectsState(PyThreadState *oldts) } } +PyObject * +_Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) +{ + assert(dict != NULL); + + /* Swap to the main interpreter, if necessary. */ + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyThreadState *oldts = _Py_AcquireGlobalObjectsState(interp); + + /* This might trigger a resize, which is why we must "acquire" + the global object state. */ + PyObject *actual = PyDict_SetDefault(dict, key, value); + if (actual == NULL) { + /* Raising an exception from one interpreter in another + is problematic, so we clear it and let the caller deal + with the returned NULL. */ + assert(PyErr_ExceptionMatches(PyExc_MemoryError)); + PyErr_Clear(); + } + + /* Swap back, it it wasn't in the main interpreter already. */ + if (oldts != NULL) { + _Py_ReleaseGlobalObjectsState(oldts); + } + + // XXX Immortalize the key and value. + + return actual; +} + /*************************************/ /* the per-interpreter runtime state */ From 4e9da2da68655c6fdb106ef098bdf9510484ca97 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 22 Mar 2023 11:45:23 -0600 Subject: [PATCH 07/19] Drop _Py_AcquireGlobalObjectsState() and _Py_ReleaseGlobalObjectsState(). --- Include/internal/pycore_pystate.h | 3 -- Python/pystate.c | 66 +++++++++++++++---------------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index aba85e7779daae..f159b516e66b18 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -130,9 +130,6 @@ PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate); extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *); extern void _PyThreadState_ClearDetached(PyThreadState *); -extern PyThreadState * _Py_AcquireGlobalObjectsState(PyInterpreterState *); -extern void _Py_ReleaseGlobalObjectsState(PyThreadState *); - extern PyObject * _Py_AddToGlobalDict(PyObject *, PyObject *, PyObject *); diff --git a/Python/pystate.c b/Python/pystate.c index a09e31d52e4062..c9769ca340b4b8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -569,6 +569,9 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) // global objects //--------------- +/* The global objects thread state is meant to be used in a very limited + way and should not be used to actually run any Python code. */ + static PyThreadState * bind_global_objects_state(_PyRuntimeState *runtime) { @@ -603,53 +606,42 @@ unbind_global_objects_state(_PyRuntimeState *runtime) #endif } -PyThreadState * -_Py_AcquireGlobalObjectsState(PyInterpreterState *interp) +PyObject * +_Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) { + assert(dict != NULL); + assert(PyDict_CheckExact(dict)); + + /* All global objects are stored in _PyRuntime + and owned by the main interpreter. */ _PyRuntimeState *runtime = &_PyRuntime; - assert(interp != NULL); - assert(interp->runtime == runtime); + PyThreadState *curts = current_fast_get(runtime); + PyInterpreterState *interp = curts->interp; + assert(interp != NULL); // The GIL must be held. + /* Due to interpreter isolation we must hold a global lock, + starting at this point and ending before we return. + Note that the operations in this function are very fucused + and we should not expect any reentrancy. */ + // For now we rely on the GIL. + + /* Swap to the main interpreter, if necessary. */ PyThreadState *oldts = NULL; - /* All global objects are owned by the main interpreter. */ if (!_Py_IsMainInterpreter(interp)) { PyThreadState *main_tstate = bind_global_objects_state(runtime); oldts = _PyThreadState_Swap(runtime, main_tstate); assert(oldts != NULL); - - unbind_global_objects_state(runtime); - } - - return oldts; -} - -void -_Py_ReleaseGlobalObjectsState(PyThreadState *oldts) -{ - if (oldts != NULL) { - /* The thread state was swapped in _Py_AcquireGlobalObjectsState(), - because the main interpreter wasn't running in the OS thread.. */ - _PyRuntimeState *runtime = &_PyRuntime; - assert(oldts->interp->runtime == runtime); assert(!_Py_IsMainInterpreter(oldts->interp)); - // The returned tstate should be _PyRuntime.cached_objects.main_tstate. - _PyThreadState_Swap(runtime, oldts); + /* The limitations of the global objects thread state apply + from this point to the point we swap back to oldts. */ } -} - -PyObject * -_Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) -{ - assert(dict != NULL); - - /* Swap to the main interpreter, if necessary. */ - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyThreadState *oldts = _Py_AcquireGlobalObjectsState(interp); /* This might trigger a resize, which is why we must "acquire" - the global object state. */ + the global object state. Also note that PyDict_SetDefault() + must be compatible with our reentrancy and global objects state + constraints. */ PyObject *actual = PyDict_SetDefault(dict, key, value); if (actual == NULL) { /* Raising an exception from one interpreter in another @@ -661,9 +653,15 @@ _Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) /* Swap back, it it wasn't in the main interpreter already. */ if (oldts != NULL) { - _Py_ReleaseGlobalObjectsState(oldts); + // The returned tstate should be _PyRuntime.cached_objects.main_tstate. + _PyThreadState_Swap(runtime, oldts); + + unbind_global_objects_state(runtime); } + // This is where we would release the global lock, + // if we weren't relying on the GIL. + // XXX Immortalize the key and value. return actual; From 6216207cfa3410a20bed9161bb9f067ded8f97fe Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 22 Mar 2023 11:49:36 -0600 Subject: [PATCH 08/19] Add acquire_global_objects_lock() and release_global_objects_lock(). --- Python/pystate.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Python/pystate.c b/Python/pystate.c index c9769ca340b4b8..394b12d24065f2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -606,6 +606,22 @@ unbind_global_objects_state(_PyRuntimeState *runtime) #endif } +static inline void +acquire_global_objects_lock(_PyRuntimeState *runtime) +{ + /* For now we can rely on the GIL, so we don't actually + acquire a global lock here. */ + assert(current_fast_get(runtime) != NULL); +} + +static inline void +release_global_objects_lock(_PyRuntimeState *runtime) +{ + /* For now we can rely on the GIL, so we don't actually + release a global lock here. */ + assert(current_fast_get(runtime) != NULL); +} + PyObject * _Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) { @@ -623,7 +639,7 @@ _Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) starting at this point and ending before we return. Note that the operations in this function are very fucused and we should not expect any reentrancy. */ - // For now we rely on the GIL. + acquire_global_objects_lock(runtime); /* Swap to the main interpreter, if necessary. */ PyThreadState *oldts = NULL; @@ -659,8 +675,7 @@ _Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) unbind_global_objects_state(runtime); } - // This is where we would release the global lock, - // if we weren't relying on the GIL. + release_global_objects_lock(runtime); // XXX Immortalize the key and value. From 3c007c09531121ae5fef7b73c2499df5254ad4b4 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 13 Mar 2023 16:10:26 -0600 Subject: [PATCH 09/19] Add some TODO comments. --- Objects/unicodeobject.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b9fb53147b9b51..5ed748565e8a8e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14611,17 +14611,23 @@ PyUnicode_InternInPlace(PyObject **p) PyObject *interned = get_interned_dict(); assert(interned != NULL); + // XXX Swap to the main interpreter. + PyObject *t = PyDict_SetDefault(interned, s, s); if (t == NULL) { PyErr_Clear(); return; } + // XXX Swap back. + if (t != s) { Py_SETREF(*p, Py_NewRef(t)); return; } + // XXX Immortalize the object. + /* The two references in interned dict (key and value) are not counted by refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */ From 7d95514984a496c89a737d501c178e46b954b23e Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 14:34:22 -0600 Subject: [PATCH 10/19] Factor out store_interned(). --- Objects/unicodeobject.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5ed748565e8a8e..bed154c810b65a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14585,6 +14585,24 @@ _PyUnicode_InitTypes(PyInterpreterState *interp) } +static inline PyObject * +store_interned(PyObject *obj) +{ + PyObject *interned = get_interned_dict(); + assert(interned != NULL); + + // XXX Swap to the main interpreter. + + PyObject *t = PyDict_SetDefault(interned, obj, obj); + if (t == NULL) { + PyErr_Clear(); + } + + // XXX Swap back. + + return t; +} + void PyUnicode_InternInPlace(PyObject **p) { @@ -14608,21 +14626,11 @@ PyUnicode_InternInPlace(PyObject **p) return; } - PyObject *interned = get_interned_dict(); - assert(interned != NULL); - - // XXX Swap to the main interpreter. - - PyObject *t = PyDict_SetDefault(interned, s, s); - if (t == NULL) { - PyErr_Clear(); - return; - } - - // XXX Swap back. - + PyObject *t = store_interned(s); if (t != s) { - Py_SETREF(*p, Py_NewRef(t)); + if (t != NULL) { + Py_SETREF(*p, Py_NewRef(t)); + } return; } From 5c20b84033b1a71a0e8db0245bfb6973f0482b32 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 16:57:15 -0600 Subject: [PATCH 11/19] Store a thread state to use just for interned strings. --- Include/internal/pycore_unicodeobject.h | 5 +++++ Objects/unicodeobject.c | 30 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 19faceebf1d8ee..289a03afc145d5 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -33,6 +33,11 @@ struct _Py_unicode_runtime_ids { }; struct _Py_unicode_runtime_state { + struct { + PyThreadState *tstate; + /* The actual interned dict is at + _PyRuntime.cached_objects.interned_strings. */ + } interned; struct _Py_unicode_runtime_ids ids; }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bed154c810b65a..adb0e2b45a6683 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14585,6 +14585,34 @@ _PyUnicode_InitTypes(PyInterpreterState *interp) } +static PyThreadState * +get_interned_tstate(void) +{ + PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate; + if (tstate == NULL) { + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + /* We do not "bind" the thread state here. */ + tstate = _PyThreadState_New(main_interp); + if (tstate == NULL) { + PyErr_Clear(); + return NULL; + } + _PyRuntime.unicode_state.interned.tstate = tstate; + } + return tstate; +} + +static void +clear_interned_tstate(void) +{ + PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate; + if (tstate != NULL) { + _PyRuntime.unicode_state.interned.tstate = NULL; + PyThreadState_Clear(tstate); + PyThreadState_Delete(tstate); + } +} + static inline PyObject * store_interned(PyObject *obj) { @@ -14710,6 +14738,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) PyDict_Clear(interned); Py_DECREF(interned); set_interned_dict(NULL); + + clear_interned_tstate(); } From a3ae02aeceb894cd9d2a814a79fb78ebefd65e58 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 17:01:07 -0600 Subject: [PATCH 12/19] Always use the main interpreter when possibly resizing the interned dict. --- Objects/unicodeobject.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index adb0e2b45a6683..a2e972fa9991e5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14619,14 +14619,26 @@ store_interned(PyObject *obj) PyObject *interned = get_interned_dict(); assert(interned != NULL); - // XXX Swap to the main interpreter. + /* Swap to the main interpreter, if necessary. */ + PyThreadState *oldts = NULL; + if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + PyThreadState *main_tstate = get_interned_tstate(); + if (main_tstate == NULL) { + return NULL; + } + oldts = PyThreadState_Swap(main_tstate); + assert(oldts != NULL); + } PyObject *t = PyDict_SetDefault(interned, obj, obj); if (t == NULL) { PyErr_Clear(); } - // XXX Swap back. + /* Swap back. */ + if (oldts != NULL) { + PyThreadState_Swap(oldts); + } return t; } From d5fbc37edb8e2c3e983ef79e78c861e1e16b9ce8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 21 Mar 2023 16:54:23 -0600 Subject: [PATCH 13/19] Use _PyRuntime.cached_objects.main_tstate instead. --- Include/internal/pycore_unicodeobject.h | 6 +---- Objects/unicodeobject.c | 29 +------------------------ 2 files changed, 2 insertions(+), 33 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 289a03afc145d5..ed4feb603d6f38 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -33,12 +33,8 @@ struct _Py_unicode_runtime_ids { }; struct _Py_unicode_runtime_state { - struct { - PyThreadState *tstate; - /* The actual interned dict is at - _PyRuntime.cached_objects.interned_strings. */ - } interned; struct _Py_unicode_runtime_ids ids; + /* The interned dict is at _PyRuntime.cached_objects.interned_strings. */ }; /* fs_codec.encoding is initialized to NULL. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a2e972fa9991e5..e5174a03593950 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14588,29 +14588,7 @@ _PyUnicode_InitTypes(PyInterpreterState *interp) static PyThreadState * get_interned_tstate(void) { - PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate; - if (tstate == NULL) { - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - /* We do not "bind" the thread state here. */ - tstate = _PyThreadState_New(main_interp); - if (tstate == NULL) { - PyErr_Clear(); - return NULL; - } - _PyRuntime.unicode_state.interned.tstate = tstate; - } - return tstate; -} - -static void -clear_interned_tstate(void) -{ - PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate; - if (tstate != NULL) { - _PyRuntime.unicode_state.interned.tstate = NULL; - PyThreadState_Clear(tstate); - PyThreadState_Delete(tstate); - } + return &_PyRuntime.cached_objects.main_tstate; } static inline PyObject * @@ -14623,9 +14601,6 @@ store_interned(PyObject *obj) PyThreadState *oldts = NULL; if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { PyThreadState *main_tstate = get_interned_tstate(); - if (main_tstate == NULL) { - return NULL; - } oldts = PyThreadState_Swap(main_tstate); assert(oldts != NULL); } @@ -14750,8 +14725,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) PyDict_Clear(interned); Py_DECREF(interned); set_interned_dict(NULL); - - clear_interned_tstate(); } From 459325f611aad5af80728c5cfdff43f68492ac21 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 21 Mar 2023 14:34:09 -0600 Subject: [PATCH 14/19] Add _PyThreadState_IsBound() and _PyThreadState_Unbind(). --- Include/internal/pycore_pystate.h | 2 ++ Python/pystate.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index f159b516e66b18..26566b022f2285 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -121,7 +121,9 @@ static inline PyInterpreterState* _PyInterpreterState_GET(void) { // PyThreadState functions PyAPI_FUNC(PyThreadState *) _PyThreadState_New(PyInterpreterState *interp); +PyAPI_FUNC(int) _PyThreadState_IsBound(PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_Bind(PyThreadState *tstate); +PyAPI_FUNC(void) _PyThreadState_Unbind(PyThreadState *tstate); // We keep this around exclusively for stable ABI compatibility. PyAPI_FUNC(void) _PyThreadState_Init( PyThreadState *tstate); diff --git a/Python/pystate.c b/Python/pystate.c index 394b12d24065f2..80ea6dd082b4b8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1923,6 +1923,12 @@ PyThreadState_Swap(PyThreadState *newts) } +int +_PyThreadState_IsBound(PyThreadState *tstate) +{ + return tstate_is_bound(tstate); +} + void _PyThreadState_Bind(PyThreadState *tstate) { @@ -1934,6 +1940,14 @@ _PyThreadState_Bind(PyThreadState *tstate) } } +void +_PyThreadState_Unbind(PyThreadState *tstate) +{ + /* For now, we do not allow the initial tstate to be unbound. */ + assert(gilstate_tss_get(tstate->interp->runtime) != tstate); + unbind_tstate(tstate); +} + /***********************************/ /* routines for advanced debuggers */ From 4f25244cd227b7b64a18f26a688f0ab0745c2d5d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 21 Mar 2023 14:34:53 -0600 Subject: [PATCH 15/19] Make sure the one-off tstate is bound before using it. --- Objects/unicodeobject.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e5174a03593950..adc490affc3d7c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14601,8 +14601,15 @@ store_interned(PyObject *obj) PyThreadState *oldts = NULL; if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { PyThreadState *main_tstate = get_interned_tstate(); + int bound = _PyThreadState_IsBound(main_tstate); + if (!bound) { + _PyThreadState_Bind(main_tstate); + } oldts = PyThreadState_Swap(main_tstate); assert(oldts != NULL); + if (!bound) { + _PyThreadState_Unbind(main_tstate); + } } PyObject *t = PyDict_SetDefault(interned, obj, obj); From e68535ada0fe397c7d8c26e6f193b1e008122544 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 22 Mar 2023 10:21:25 -0600 Subject: [PATCH 16/19] Use _Py_AcquireGlobalObjectsState() in store_interned(). --- Include/internal/pycore_pystate.h | 2 -- Objects/unicodeobject.c | 25 +++++-------------------- Python/pystate.c | 14 -------------- 3 files changed, 5 insertions(+), 36 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 26566b022f2285..f159b516e66b18 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -121,9 +121,7 @@ static inline PyInterpreterState* _PyInterpreterState_GET(void) { // PyThreadState functions PyAPI_FUNC(PyThreadState *) _PyThreadState_New(PyInterpreterState *interp); -PyAPI_FUNC(int) _PyThreadState_IsBound(PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_Bind(PyThreadState *tstate); -PyAPI_FUNC(void) _PyThreadState_Unbind(PyThreadState *tstate); // We keep this around exclusively for stable ABI compatibility. PyAPI_FUNC(void) _PyThreadState_Init( PyThreadState *tstate); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index adc490affc3d7c..2bd1d96c935128 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14585,12 +14585,6 @@ _PyUnicode_InitTypes(PyInterpreterState *interp) } -static PyThreadState * -get_interned_tstate(void) -{ - return &_PyRuntime.cached_objects.main_tstate; -} - static inline PyObject * store_interned(PyObject *obj) { @@ -14598,20 +14592,11 @@ store_interned(PyObject *obj) assert(interned != NULL); /* Swap to the main interpreter, if necessary. */ - PyThreadState *oldts = NULL; - if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { - PyThreadState *main_tstate = get_interned_tstate(); - int bound = _PyThreadState_IsBound(main_tstate); - if (!bound) { - _PyThreadState_Bind(main_tstate); - } - oldts = PyThreadState_Swap(main_tstate); - assert(oldts != NULL); - if (!bound) { - _PyThreadState_Unbind(main_tstate); - } - } + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyThreadState *oldts = _Py_AcquireGlobalObjectsState(interp); + /* This might trigger a resize, which is why we must "acquire" + the global object state. */ PyObject *t = PyDict_SetDefault(interned, obj, obj); if (t == NULL) { PyErr_Clear(); @@ -14619,7 +14604,7 @@ store_interned(PyObject *obj) /* Swap back. */ if (oldts != NULL) { - PyThreadState_Swap(oldts); + _Py_ReleaseGlobalObjectsState(oldts); } return t; diff --git a/Python/pystate.c b/Python/pystate.c index 80ea6dd082b4b8..394b12d24065f2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1923,12 +1923,6 @@ PyThreadState_Swap(PyThreadState *newts) } -int -_PyThreadState_IsBound(PyThreadState *tstate) -{ - return tstate_is_bound(tstate); -} - void _PyThreadState_Bind(PyThreadState *tstate) { @@ -1940,14 +1934,6 @@ _PyThreadState_Bind(PyThreadState *tstate) } } -void -_PyThreadState_Unbind(PyThreadState *tstate) -{ - /* For now, we do not allow the initial tstate to be unbound. */ - assert(gilstate_tss_get(tstate->interp->runtime) != tstate); - unbind_tstate(tstate); -} - /***********************************/ /* routines for advanced debuggers */ From 22753b33ad22bd50ba9ce207de6a41311c1e6ca1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 22 Mar 2023 10:52:13 -0600 Subject: [PATCH 17/19] Use _Py_AddToGlobalDict(). --- Objects/unicodeobject.c | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2bd1d96c935128..891a65576ee29b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14585,31 +14585,6 @@ _PyUnicode_InitTypes(PyInterpreterState *interp) } -static inline PyObject * -store_interned(PyObject *obj) -{ - PyObject *interned = get_interned_dict(); - assert(interned != NULL); - - /* Swap to the main interpreter, if necessary. */ - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyThreadState *oldts = _Py_AcquireGlobalObjectsState(interp); - - /* This might trigger a resize, which is why we must "acquire" - the global object state. */ - PyObject *t = PyDict_SetDefault(interned, obj, obj); - if (t == NULL) { - PyErr_Clear(); - } - - /* Swap back. */ - if (oldts != NULL) { - _Py_ReleaseGlobalObjectsState(oldts); - } - - return t; -} - void PyUnicode_InternInPlace(PyObject **p) { @@ -14633,7 +14608,8 @@ PyUnicode_InternInPlace(PyObject **p) return; } - PyObject *t = store_interned(s); + PyObject *interned = get_interned_dict(); + PyObject *t = _Py_AddToGlobalDict(interned, s, s); if (t != s) { if (t != NULL) { Py_SETREF(*p, Py_NewRef(t)); @@ -14641,8 +14617,6 @@ PyUnicode_InternInPlace(PyObject **p) return; } - // XXX Immortalize the object. - /* The two references in interned dict (key and value) are not counted by refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */ From b649a84e4bf938fb60e735252aa9c800bbb11f65 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 14:23:54 -0600 Subject: [PATCH 18/19] Make objects stored in global containers immortal. --- Python/pystate.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index 394b12d24065f2..2ce817e2dad0ee 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -677,7 +677,14 @@ _Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) release_global_objects_lock(runtime); - // XXX Immortalize the key and value. + /* Immortalize the key and value. */ + if (actual == value) { + /* It was added. */ + _Py_SetImmortal(key); + if (value != key) { + _Py_SetImmortal(value); + } + } return actual; } From 9b0dc99ab9cac17a03d2edda519ab9175336a119 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 22 Mar 2023 12:33:48 -0600 Subject: [PATCH 19/19] Immortalize the interned strings dict. --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 891a65576ee29b..3de77d538db4c6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -239,6 +239,7 @@ static inline PyObject *get_interned_dict(void) static inline void set_interned_dict(PyObject *dict) { _Py_CACHED_OBJECT(interned_strings) = dict; + _Py_SetImmortal(dict); } #define _Py_RETURN_UNICODE_EMPTY() \