From d8bf7d981f3c29510818f123b46d9cf06bdea3f5 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 30 Sep 2024 10:46:26 -0700 Subject: [PATCH 1/3] gh-124785: re-work fix so tracerefs test passes --- Include/internal/pycore_global_objects.h | 1 + Lib/test/test_sys.py | 28 +++++--- Objects/unicodeobject.c | 82 ++++++++++++++++++++++-- 3 files changed, 99 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 913dce6f1ec0fe..d82e97ffb2844f 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -65,6 +65,7 @@ struct _Py_static_objects { struct _Py_interp_cached_objects { PyObject *interned_strings; + PyObject *interned_strings_legacy; /* AST */ PyObject *str_replace_inf; diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 9689ef8e96e072..9fdd6fa261238d 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1076,14 +1076,6 @@ def test_getallocatedblocks(self): # about the underlying implementation: the function might # return 0 or something greater. self.assertGreaterEqual(a, 0) - try: - # While we could imagine a Python session where the number of - # multiple buffer objects would exceed the sharing of references, - # it is unlikely to happen in a normal test run. - self.assertLess(a, sys.gettotalrefcount()) - except AttributeError: - # gettotalrefcount() not available - pass gc.collect() b = sys.getallocatedblocks() self.assertLessEqual(b, a) @@ -1091,6 +1083,26 @@ def test_getallocatedblocks(self): c = sys.getallocatedblocks() self.assertIn(c, range(b - 50, b + 50)) + @unittest.skipUnless(hasattr(sys, "getallocatedblocks"), + "sys.getallocatedblocks unavailable on this build") + def test_getallocatedblocks_refcount(self): + # While we could imagine a Python session where the number of multiple + # buffer objects would exceed the sharing of references, it is unlikely + # to happen given that we run this in a subinterpreter. + code = """if 1: + import sys + num_blocks = sys.getallocatedblocks() + try: + total_refcnt = sys.gettotalrefcount() + except AttributeError: + pass + else: + if num_blocks > total_refcnt: + raise AssertionError('allocated blocks exceeds total refcnt') + """ + self.assertEqual(support.run_in_subinterp(code), 0, + 'subinterp code failure, check stderr.') + def test_is_gil_enabled(self): if support.Py_GIL_DISABLED: self.assertIs(type(sys._is_gil_enabled()), bool) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0f502ccdaf5767..0a4c069460bd82 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -291,12 +291,18 @@ hashtable_unicode_compare(const void *key1, const void *key2) It's not safe to deallocate those strings until all interpreters that potentially use them are freed. By storing them in the main interpreter, we ensure they get freed after all other interpreters are freed. + + Subtle detail: it's only required to share the interned string dict in the + case that those kinds of legacy modules are actually imported. However, we + can't wait until the import happens so we share if those kind of modules are + allowed (the Py_RTFLAGS_MULTI_INTERP_EXTENSIONS flag is set). */ static bool has_shared_intern_dict(PyInterpreterState *interp) { PyInterpreterState *main_interp = _PyInterpreterState_Main(); - return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; + return (interp != main_interp && + !(interp->feature_flags & Py_RTFLAGS_MULTI_INTERP_EXTENSIONS)); } static int @@ -305,8 +311,20 @@ init_interned_dict(PyInterpreterState *interp) assert(get_interned_dict(interp) == NULL); PyObject *interned; if (has_shared_intern_dict(interp)) { - interned = get_interned_dict(_PyInterpreterState_Main()); - Py_INCREF(interned); + PyInterpreterState *main = _PyInterpreterState_Main(); + interned = _Py_INTERP_CACHED_OBJECT(main, interned_strings_legacy); + if (interned == NULL) { + // allocate for main interpreter. We share obmalloc in this case + // and we use a separate dict because it's cleaner to ensure these + // objects don't show up in the main interpreter (which they could + // if uswe use interned_strings). They will be shared by all + // subinterpreters that allow legacy single-phase init modules. + interned = PyDict_New(); + if (interned == NULL) { + return -1; + } + _Py_INTERP_CACHED_OBJECT(main, interned_strings_legacy) = interned; + } } else { interned = PyDict_New(); @@ -318,6 +336,61 @@ init_interned_dict(PyInterpreterState *interp) return 0; } +/* Clean the dict of interned strings that is used by subinterpreters that + * allow basic single-phase extensions modules (has_shared_intern_dict() is + * true). For those, they all share the interned_strings_legacy dict that's + * owned by the main interpreter. Only the main interpreter does cleanup on + * it. See GH-116510. + */ +static void +clear_interned_dict_legacy(PyInterpreterState *interp) +{ + PyObject *interned = _Py_INTERP_CACHED_OBJECT(interp, + interned_strings_legacy); + if (interned == NULL) { + return; + } + // This is similar but slightly different logic compared with + // _PyUnicode_ClearInterned(). These are strings created by + // subinterpreters but stored in a dict owned by the main interpreter. + // Immortalization loses the true reference count and so we need to ensure + // all those subinterpreters have exited before cleaning these strings up. + Py_ssize_t pos = 0; + PyObject *s, *ignored_value; + while (PyDict_Next(interned, &pos, &s, &ignored_value)) { + assert(PyUnicode_IS_READY(s)); +#ifdef Py_TRACE_REFS + _Py_AddToAllObjects(s); +#endif + switch (PyUnicode_CHECK_INTERNED(s)) { + case SSTATE_INTERNED_IMMORTAL: + case SSTATE_INTERNED_IMMORTAL_STATIC: + _Py_SetMortal(s, 2); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyThreadState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + break; + case SSTATE_INTERNED_MORTAL: + Py_SET_REFCNT(s, Py_REFCNT(s) + 2); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyThreadState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + break; + case SSTATE_NOT_INTERNED: + /* fall through */ + default: + Py_UNREACHABLE(); + } + _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; + } + PyDict_Clear(interned); + _Py_INTERP_CACHED_OBJECT(interp, interned_strings_legacy) = NULL; +} + static void clear_interned_dict(PyInterpreterState *interp) { @@ -326,8 +399,9 @@ clear_interned_dict(PyInterpreterState *interp) if (!has_shared_intern_dict(interp)) { // only clear if the dict belongs to this interpreter PyDict_Clear(interned); + Py_DECREF(interned); + clear_interned_dict_legacy(interp); } - Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } } From fe6addeda4d768eb4d401bca7b4bfa9bb5ce72dd Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 30 Sep 2024 16:38:36 -0700 Subject: [PATCH 2/3] Update Objects/unicodeobject.c Spelling fix. Co-authored-by: Eric Snow --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0a4c069460bd82..1c8a1b6897bc5a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -317,7 +317,7 @@ init_interned_dict(PyInterpreterState *interp) // allocate for main interpreter. We share obmalloc in this case // and we use a separate dict because it's cleaner to ensure these // objects don't show up in the main interpreter (which they could - // if uswe use interned_strings). They will be shared by all + // if we use interned_strings). They will be shared by all // subinterpreters that allow legacy single-phase init modules. interned = PyDict_New(); if (interned == NULL) { From 1d0b7c1a3e9e33f03f8adc9a53d3f050b1bbee40 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Oct 2024 09:27:22 -0700 Subject: [PATCH 3/3] Minor code cleanup as suggested by review. --- Objects/unicodeobject.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1c8a1b6897bc5a..e2ff5012e0575e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -358,7 +358,6 @@ clear_interned_dict_legacy(PyInterpreterState *interp) Py_ssize_t pos = 0; PyObject *s, *ignored_value; while (PyDict_Next(interned, &pos, &s, &ignored_value)) { - assert(PyUnicode_IS_READY(s)); #ifdef Py_TRACE_REFS _Py_AddToAllObjects(s); #endif @@ -381,7 +380,7 @@ clear_interned_dict_legacy(PyInterpreterState *interp) #endif break; case SSTATE_NOT_INTERNED: - /* fall through */ + _Py_FALLTHROUGH; default: Py_UNREACHABLE(); }