From 9420a1cc39ae90cf86a6c482e51ed2e3eccab3eb Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 13 Mar 2023 16:10:07 -0600 Subject: [PATCH 1/9] Initialize the interned dict in _PyUnicode_InitGlobalObjects(). --- Objects/unicodeobject.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b9fb53147b9b51..eb3029eadfe2d6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14533,13 +14533,12 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) return _PyStatus_OK(); } - // Initialize the global interned dict + /* Initialize the global interned dict. */ PyObject *interned = PyDict_New(); if (interned == NULL) { PyErr_Clear(); return _PyStatus_ERR("failed to create interned dict"); } - set_interned_dict(interned); /* Intern statically allocated string identifiers and deepfreeze strings. From cca77b93b35b4a3037805fb1a239ec9ad22a518d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 13 Mar 2023 16:10:26 -0600 Subject: [PATCH 2/9] Add some TODO comments. --- Objects/unicodeobject.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eb3029eadfe2d6..845db6b8034dc3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14610,17 +14610,23 @@ PyUnicode_InternInPlace(PyObject **p) PyObject *interned = get_interned_dict(); assert(interned != NULL); + // XXX Swap to the main interpreter. + PyObject *t = PyDict_SetDefault(interned, s, s); if (t == NULL) { PyErr_Clear(); return; } + // XXX Swap back. + if (t != s) { Py_SETREF(*p, Py_NewRef(t)); return; } + // XXX Immortalize the object. + /* The two references in interned dict (key and value) are not counted by refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */ From fa3974fe715af3b2c4017927d06d760487573e53 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 14:34:22 -0600 Subject: [PATCH 3/9] Factor out store_interned(). --- Objects/unicodeobject.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 845db6b8034dc3..277d17a5a03161 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14584,6 +14584,24 @@ _PyUnicode_InitTypes(PyInterpreterState *interp) } +static inline PyObject * +store_interned(PyObject *obj) +{ + PyObject *interned = get_interned_dict(); + assert(interned != NULL); + + // XXX Swap to the main interpreter. + + PyObject *t = PyDict_SetDefault(interned, obj, obj); + if (t == NULL) { + PyErr_Clear(); + } + + // XXX Swap back. + + return t; +} + void PyUnicode_InternInPlace(PyObject **p) { @@ -14607,21 +14625,11 @@ PyUnicode_InternInPlace(PyObject **p) return; } - PyObject *interned = get_interned_dict(); - assert(interned != NULL); - - // XXX Swap to the main interpreter. - - PyObject *t = PyDict_SetDefault(interned, s, s); - if (t == NULL) { - PyErr_Clear(); - return; - } - - // XXX Swap back. - + PyObject *t = store_interned(s); if (t != s) { - Py_SETREF(*p, Py_NewRef(t)); + if (t != NULL) { + Py_SETREF(*p, Py_NewRef(t)); + } return; } From eadd48a92801dc401ddfaa64ec828d2c2607e6ff Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 16:57:15 -0600 Subject: [PATCH 4/9] Store a thread state to use just for interned strings. --- Include/internal/pycore_unicodeobject.h | 5 +++++ Objects/unicodeobject.c | 29 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 19faceebf1d8ee..289a03afc145d5 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -33,6 +33,11 @@ struct _Py_unicode_runtime_ids { }; struct _Py_unicode_runtime_state { + struct { + PyThreadState *tstate; + /* The actual interned dict is at + _PyRuntime.cached_objects.interned_strings. */ + } interned; struct _Py_unicode_runtime_ids ids; }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 277d17a5a03161..3e658ff7b26fd6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14584,6 +14584,33 @@ _PyUnicode_InitTypes(PyInterpreterState *interp) } +static PyThreadState * +get_interned_tstate(void) +{ + PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate; + if (tstate == NULL) { + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + /* We do not "bind" the thread state here. */ + tstate = _PyThreadState_New(main_interp); + if (tstate == NULL) { + PyErr_Clear(); + return NULL; + } + } + return tstate; +} + +static void +clear_interned_tstate(void) +{ + PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate; + if (tstate != NULL) { + _PyRuntime.unicode_state.interned.tstate = NULL; + PyThreadState_Clear(tstate); + PyThreadState_Delete(tstate); + } +} + static inline PyObject * store_interned(PyObject *obj) { @@ -14709,6 +14736,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) PyDict_Clear(interned); Py_DECREF(interned); set_interned_dict(NULL); + + clear_interned_tstate(); } From 5d6e1b955684a60dbc0db7941fa8e490c842c96e Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 17:01:07 -0600 Subject: [PATCH 5/9] Always use the main interpreter when possibly resizing the interned dict. --- Objects/unicodeobject.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3e658ff7b26fd6..067d6deb87fc64 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14617,14 +14617,26 @@ store_interned(PyObject *obj) PyObject *interned = get_interned_dict(); assert(interned != NULL); - // XXX Swap to the main interpreter. + /* Swap to the main interpreter, if necessary. */ + PyThreadState *oldts = NULL; + if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + PyThreadState *main_tstate = get_interned_tstate(); + if (main_tstate == NULL) { + return NULL; + } + oldts = PyThreadState_Swap(main_tstate); + assert(oldts != NULL); + } PyObject *t = PyDict_SetDefault(interned, obj, obj); if (t == NULL) { PyErr_Clear(); } - // XXX Swap back. + /* Swap back. */ + if (oldts != NULL) { + PyThreadState_Swap(oldts); + } return t; } From 16ed5e0bfb1ff2807a9e00483524afac39712f08 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 14:23:54 -0600 Subject: [PATCH 6/9] Make interned strings immortal. --- Objects/unicodeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 067d6deb87fc64..ebe6e75355fc5e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14672,7 +14672,8 @@ PyUnicode_InternInPlace(PyObject **p) return; } - // XXX Immortalize the object. + /* Immortalize the object. */ + _Py_SetImmortal(s); /* The two references in interned dict (key and value) are not counted by refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of From d36f11922b05d582ccc13b77defc49e3ba4e8ff5 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 14:36:12 -0600 Subject: [PATCH 7/9] Temporarily skip immortalizing. --- Objects/unicodeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ebe6e75355fc5e..c413f63b5f1188 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14673,7 +14673,8 @@ PyUnicode_InternInPlace(PyObject **p) } /* Immortalize the object. */ - _Py_SetImmortal(s); + // XXX Uncomment this once the PEP 683 implementation has landed. + //_Py_SetImmortal(s); /* The two references in interned dict (key and value) are not counted by refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of From 6d5f129fe234edf892a900c3f2fabce3508ca9ec Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 14:17:54 -0600 Subject: [PATCH 8/9] Add the mutex for the interned dict. --- Include/internal/pycore_unicodeobject.h | 1 + Python/pystate.c | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 289a03afc145d5..27d45b07c32658 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -34,6 +34,7 @@ struct _Py_unicode_runtime_ids { struct _Py_unicode_runtime_state { struct { + PyThread_type_lock lock; PyThreadState *tstate; /* The actual interned dict is at _PyRuntime.cached_objects.interned_strings. */ diff --git a/Python/pystate.c b/Python/pystate.c index 60adb54685ce68..82826a4d512e91 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -356,7 +356,8 @@ _Py_COMP_DIAG_POP static int alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2, - PyThread_type_lock *plock3, PyThread_type_lock *plock4) + PyThread_type_lock *plock3, PyThread_type_lock *plock4, + PyThread_type_lock *plock5) { /* Force default allocator, since _PyRuntimeState_Fini() must use the same allocator than this function. */ @@ -389,12 +390,22 @@ alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2, return -1; } + PyThread_type_lock lock5 = PyThread_allocate_lock(); + if (lock4 == NULL) { + PyThread_free_lock(lock1); + PyThread_free_lock(lock2); + PyThread_free_lock(lock3); + PyThread_free_lock(lock4); + return -1; + } + PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); *plock1 = lock1; *plock2 = lock2; *plock3 = lock3; *plock4 = lock4; + *plock5 = lock5; return 0; } @@ -404,6 +415,7 @@ init_runtime(_PyRuntimeState *runtime, _Py_AuditHookEntry *audit_hook_head, Py_ssize_t unicode_next_index, PyThread_type_lock unicode_ids_mutex, + PyThread_type_lock interned_mutex, PyThread_type_lock interpreters_mutex, PyThread_type_lock xidregistry_mutex, PyThread_type_lock getargs_mutex) @@ -435,6 +447,7 @@ init_runtime(_PyRuntimeState *runtime, runtime->unicode_state.ids.next_index = unicode_next_index; runtime->unicode_state.ids.lock = unicode_ids_mutex; + runtime->unicode_state.interned.lock = interned_mutex; runtime->_initialized = 1; } @@ -452,8 +465,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) // is called multiple times. Py_ssize_t unicode_next_index = runtime->unicode_state.ids.next_index; - PyThread_type_lock lock1, lock2, lock3, lock4; - if (alloc_for_runtime(&lock1, &lock2, &lock3, &lock4) != 0) { + PyThread_type_lock lock1, lock2, lock3, lock4, lock5; + if (alloc_for_runtime(&lock1, &lock2, &lock3, &lock4, &lock5) != 0) { return _PyStatus_NO_MEMORY(); } @@ -474,7 +487,7 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) } init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head, - unicode_next_index, lock1, lock2, lock3, lock4); + unicode_next_index, lock1, lock2, lock3, lock4, lock5); return _PyStatus_OK(); } @@ -530,6 +543,7 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex); int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex); int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_state.ids.lock); + int reinit_interned = _PyThread_at_fork_reinit(&runtime->unicode_state.interned.lock); int reinit_getargs = _PyThread_at_fork_reinit(&runtime->getargs.mutex); PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); @@ -542,6 +556,7 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) || reinit_main_id < 0 || reinit_xidregistry < 0 || reinit_unicode_ids < 0 + || reinit_interned < 0 || reinit_getargs < 0) { return _PyStatus_ERR("Failed to reinitialize runtime locks"); From b634920ab3e11ce464da664f4268e2366d2bdce0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 20 Mar 2023 14:34:53 -0600 Subject: [PATCH 9/9] Lock around store_interned(). --- Objects/unicodeobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c413f63b5f1188..2561ced241d0c3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14664,7 +14664,9 @@ PyUnicode_InternInPlace(PyObject **p) return; } + PyThread_acquire_lock(_PyRuntime.unicode_state.interned.lock, WAIT_LOCK); PyObject *t = store_interned(s); + PyThread_release_lock(_PyRuntime.unicode_state.interned.lock); if (t != s) { if (t != NULL) { Py_SETREF(*p, Py_NewRef(t));