Skip to content

Commit f8abfa3

Browse files
gh-103323: Get the "Current" Thread State from a Thread-Local Variable (gh-103324)
We replace _PyRuntime.tstate_current with a thread-local variable. As part of this change, we add a _Py_thread_local macro in pyport.h (only for the core runtime) to smooth out the compiler differences. The main motivation here is in support of a per-interpreter GIL, but this change also provides some performance improvement opportunities. Note that we do not provide a fallback to the thread-local, either falling back to the old tstate_current or to thread-specific storage (PyThread_tss_*()). If that proves problematic then we can circle back. I consider it unlikely, but will run the buildbots to double-check. Also note that this does not change any of the code related to the GILState API, where it uses a thread state stored in thread-specific storage. I suspect we can combine that with _Py_tss_tstate (from here). However, that can be addressed separately and is not urgent (nor critical). (While this change was mostly done independently, I did take some inspiration from earlier (~2020) work by @markshannon (main...markshannon:threadstate_in_tls) and @vstinner (#23976).)
1 parent 7ef614c commit f8abfa3

File tree

5 files changed

+73
-18
lines changed

5 files changed

+73
-18
lines changed

Include/internal/pycore_pystate.h

+17-9
Original file line numberDiff line numberDiff line change
@@ -64,27 +64,35 @@ _Py_ThreadCanHandlePendingCalls(void)
6464
/* Variable and macro for in-line access to current thread
6565
and interpreter state */
6666

67-
static inline PyThreadState*
68-
_PyRuntimeState_GetThreadState(_PyRuntimeState *runtime)
69-
{
70-
return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current);
71-
}
67+
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
68+
extern _Py_thread_local PyThreadState *_Py_tss_tstate;
69+
#endif
70+
PyAPI_DATA(PyThreadState *) _PyThreadState_GetCurrent(void);
7271

7372
/* Get the current Python thread state.
7473
75-
Efficient macro reading directly the 'tstate_current' atomic
76-
variable. The macro is unsafe: it does not check for error and it can
77-
return NULL.
74+
This function is unsafe: it does not check for error and it can return NULL.
7875
7976
The caller must hold the GIL.
8077
8178
See also PyThreadState_Get() and _PyThreadState_UncheckedGet(). */
8279
static inline PyThreadState*
8380
_PyThreadState_GET(void)
8481
{
85-
return _PyRuntimeState_GetThreadState(&_PyRuntime);
82+
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
83+
return _Py_tss_tstate;
84+
#else
85+
return _PyThreadState_GetCurrent();
86+
#endif
87+
}
88+
89+
static inline PyThreadState*
90+
_PyRuntimeState_GetThreadState(_PyRuntimeState *Py_UNUSED(runtime))
91+
{
92+
return _PyThreadState_GET();
8693
}
8794

95+
8896
static inline void
8997
_Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate)
9098
{

Include/internal/pycore_runtime.h

-3
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,6 @@ typedef struct pyruntimestate {
119119

120120
unsigned long main_thread;
121121

122-
/* Assuming the current thread holds the GIL, this is the
123-
PyThreadState for the current thread. */
124-
_Py_atomic_address tstate_current;
125122
/* Used for the thread state bound to the current thread. */
126123
Py_tss_t autoTSSkey;
127124

Include/pyport.h

+21
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,27 @@ extern char * _getpty(int *, int, mode_t, int);
662662
# define WITH_THREAD
663663
#endif
664664

665+
#ifdef WITH_THREAD
666+
# ifdef Py_BUILD_CORE
667+
# ifdef HAVE_THREAD_LOCAL
668+
# error "HAVE_THREAD_LOCAL is already defined"
669+
# endif
670+
# define HAVE_THREAD_LOCAL 1
671+
# ifdef thread_local
672+
# define _Py_thread_local thread_local
673+
# elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
674+
# define _Py_thread_local _Thread_local
675+
# elif defined(_MSC_VER) /* AKA NT_THREADS */
676+
# define _Py_thread_local __declspec(thread)
677+
# elif defined(__GNUC__) /* includes clang */
678+
# define _Py_thread_local __thread
679+
# else
680+
// fall back to the PyThread_tss_*() API, or ignore.
681+
# undef HAVE_THREAD_LOCAL
682+
# endif
683+
# endif
684+
#endif
685+
665686
/* Check that ALT_SOABI is consistent with Py_TRACE_REFS:
666687
./configure --with-trace-refs should must be used to define Py_TRACE_REFS */
667688
#if defined(ALT_SOABI) && defined(Py_TRACE_REFS)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
We've replaced our use of ``_PyRuntime.tstate_current`` with a thread-local
2+
variable. This is a fairly low-level implementation detail, and there
3+
should be no change in behavior.

Python/pystate.c

+32-6
Original file line numberDiff line numberDiff line change
@@ -60,30 +60,56 @@ extern "C" {
6060
For each of these functions, the GIL must be held by the current thread.
6161
*/
6262

63+
64+
#ifdef HAVE_THREAD_LOCAL
65+
_Py_thread_local PyThreadState *_Py_tss_tstate = NULL;
66+
#endif
67+
6368
static inline PyThreadState *
64-
current_fast_get(_PyRuntimeState *runtime)
69+
current_fast_get(_PyRuntimeState *Py_UNUSED(runtime))
6570
{
66-
return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current);
71+
#ifdef HAVE_THREAD_LOCAL
72+
return _Py_tss_tstate;
73+
#else
74+
// XXX Fall back to the PyThread_tss_*() API.
75+
# error "no supported thread-local variable storage classifier"
76+
#endif
6777
}
6878

6979
static inline void
70-
current_fast_set(_PyRuntimeState *runtime, PyThreadState *tstate)
80+
current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate)
7181
{
7282
assert(tstate != NULL);
73-
_Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)tstate);
83+
#ifdef HAVE_THREAD_LOCAL
84+
_Py_tss_tstate = tstate;
85+
#else
86+
// XXX Fall back to the PyThread_tss_*() API.
87+
# error "no supported thread-local variable storage classifier"
88+
#endif
7489
}
7590

7691
static inline void
77-
current_fast_clear(_PyRuntimeState *runtime)
92+
current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime))
7893
{
79-
_Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)NULL);
94+
#ifdef HAVE_THREAD_LOCAL
95+
_Py_tss_tstate = NULL;
96+
#else
97+
// XXX Fall back to the PyThread_tss_*() API.
98+
# error "no supported thread-local variable storage classifier"
99+
#endif
80100
}
81101

82102
#define tstate_verify_not_active(tstate) \
83103
if (tstate == current_fast_get((tstate)->interp->runtime)) { \
84104
_Py_FatalErrorFormat(__func__, "tstate %p is still current", tstate); \
85105
}
86106

107+
PyThreadState *
108+
_PyThreadState_GetCurrent(void)
109+
{
110+
return current_fast_get(&_PyRuntime);
111+
}
112+
87113

88114
//------------------------------------------------
89115
// the thread state bound to the current OS thread

0 commit comments

Comments
 (0)