Skip to content

Commit 1928c5b

Browse files
jbmsrwgk
authored andcommitted
Eliminate duplicate TLS keys for loader_life_support stack
This revises the existing fix for pybind#2765 in pybind#3237 to reduce the amount of TLS storage used. The shared TLS key is stored in two different ways, depending on `PYBIND11_INTERNALS_VERSION`. If `PYBIND11_INTERNALS_VERSION == 4` (as is currently set), the TLS key is stored in the `internal::shared_data` map to avoid breaking ABI compatibility. If `PYBIND11_INTERNALS_VERSION > 4`, the TLS key is stored directly in the `internals` struct. Fix test_pytypes.py::test_issue2361 failure on PyPy3.7 Add github actions tests for unstable ABI
1 parent ce62ec5 commit 1928c5b

File tree

6 files changed

+196
-71
lines changed

6 files changed

+196
-71
lines changed

.github/workflows/ci.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,24 @@ jobs:
145145
if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))"
146146
run: cmake --build build2 --target cpptest
147147

148+
# Third build - C++17 mode with unstable ABI
149+
- name: Configure (unstable ABI)
150+
run: >
151+
cmake -S . -B build3
152+
-DPYBIND11_WERROR=ON
153+
-DDOWNLOAD_CATCH=ON
154+
-DDOWNLOAD_EIGEN=ON
155+
-DCMAKE_CXX_STANDARD=17
156+
-DPYBIND11_INTERNALS_VERSION=10000000
157+
"-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp"
158+
${{ matrix.args }}
159+
160+
- name: Build (unstable ABI)
161+
run: cmake --build build3 -j 2
162+
163+
- name: Python tests (unstable ABI)
164+
run: cmake --build build3 --target pytest
165+
148166
- name: Interface test
149167
run: cmake --build build2 --target test_cmake_build
150168

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ endif()
8989
option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT})
9090
option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT})
9191
option(PYBIND11_NOPYTHON "Disable search for Python" OFF)
92+
set(PYBIND11_INTERNALS_VERSION
93+
""
94+
CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.")
9295

9396
cmake_dependent_option(
9497
USE_PYTHON_INCLUDE_DIR
@@ -189,6 +192,10 @@ if(NOT TARGET pybind11_headers)
189192

190193
target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals
191194
cxx_right_angle_brackets)
195+
if(NOT "${PYBIND11_INTERNALS_VERSION}" STREQUAL "")
196+
target_compile_definitions(
197+
pybind11_headers INTERFACE "PYBIND11_INTERNALS_VERSION=${PYBIND11_INTERNALS_VERSION}")
198+
endif()
192199
else()
193200
# It is invalid to install a target twice, too.
194201
set(PYBIND11_INSTALL OFF)

include/pybind11/detail/internals.h

Lines changed: 141 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,24 @@
1212
#include "../pytypes.h"
1313
#include "smart_holder_sfinae_hooks_only.h"
1414

15+
/// Tracks the `internals` and `type_info` ABI version independent of the main library version.
16+
///
17+
/// Some portions of the code use an ABI that is conditional depending on this
18+
/// version number. That allows ABI-breaking changes to be "pre-implemented".
19+
/// Once the default version number is incremented, the conditional logic that
20+
/// no longer applies can be removed. Additionally, users that need not
21+
/// maintain ABI compatibility can increase the version number in order to take
22+
/// advantage of any functionality/efficiency improvements that depend on the
23+
/// newer ABI.
24+
///
25+
/// WARNING: If you choose to manually increase the ABI version, note that
26+
/// pybind11 may not be tested as thoroughly with a non-default ABI version, and
27+
/// further ABI-incompatible changes may be made before the ABI is officially
28+
/// changed to the new version.
29+
#ifndef PYBIND11_INTERNALS_VERSION
30+
# define PYBIND11_INTERNALS_VERSION 4
31+
#endif
32+
1533
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
1634

1735
using ExceptionTranslator = void (*)(std::exception_ptr);
@@ -26,30 +44,58 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass);
2644
// The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new
2745
// Thread Specific Storage (TSS) API.
2846
#if PY_VERSION_HEX >= 0x03070000
29-
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr
30-
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key))
31-
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value))
32-
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr)
33-
# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key)
47+
// Avoid unnecessary allocation of `Py_tss_t`, since we cannot use
48+
// `Py_LIMITED_API` anyway.
49+
# if PYBIND11_INTERNALS_VERSION > 4
50+
# define PYBIND11_TLS_KEY_REF Py_tss_t &
51+
# ifdef __GNUC__
52+
// Clang on macOS warns due to `Py_tss_NEEDS_INIT` not specifying an initializer
53+
// for every field.
54+
# define PYBIND11_TLS_KEY_INIT(var) \
55+
_Pragma("GCC diagnostic push") /**/ \
56+
_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \
57+
Py_tss_t var \
58+
= Py_tss_NEEDS_INIT; \
59+
_Pragma("GCC diagnostic pop")
60+
# else
61+
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t var = Py_tss_NEEDS_INIT;
62+
# endif
63+
# define PYBIND11_TLS_KEY_CREATE(var) (PyThread_tss_create(&(var)) == 0)
64+
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get(&(key))
65+
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set(&(key), (value))
66+
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set(&(key), nullptr)
67+
# define PYBIND11_TLS_FREE(key) PyThread_tss_delete(&(key))
68+
# else
69+
# define PYBIND11_TLS_KEY_REF Py_tss_t *
70+
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr;
71+
# define PYBIND11_TLS_KEY_CREATE(var) \
72+
(((var) = PyThread_tss_alloc()) != nullptr && (PyThread_tss_create((var)) == 0))
73+
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key))
74+
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value))
75+
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr)
76+
# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key)
77+
# endif
3478
#else
35-
// Usually an int but a long on Cygwin64 with Python 3.x
36-
# define PYBIND11_TLS_KEY_INIT(var) decltype(PyThread_create_key()) var = 0
79+
// Usually an int but a long on Cygwin64 with Python 3.x
80+
# define PYBIND11_TLS_KEY_REF decltype(PyThread_create_key())
81+
# define PYBIND11_TLS_KEY_INIT(var) PYBIND11_TLS_KEY_REF var = 0;
82+
# define PYBIND11_TLS_KEY_CREATE(var) (((var) = PyThread_create_key()) != -1)
3783
# define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key))
38-
# if PY_MAJOR_VERSION < 3
39-
# define PYBIND11_TLS_DELETE_VALUE(key) \
40-
PyThread_delete_key_value(key)
41-
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
42-
do { \
43-
PyThread_delete_key_value((key)); \
44-
PyThread_set_key_value((key), (value)); \
45-
} while (false)
84+
# if PY_MAJOR_VERSION < 3 || defined(PYPY_VERSION)
85+
// On CPython < 3.4 and on PyPy, `PyThread_set_key_value` strangely does not set
86+
// the value if it has already been set. Instead, it must first be deleted and
87+
// then set again.
88+
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_delete_key_value(key)
89+
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
90+
do { \
91+
PyThread_delete_key_value((key)); \
92+
PyThread_set_key_value((key), (value)); \
93+
} while (false)
4694
# else
47-
# define PYBIND11_TLS_DELETE_VALUE(key) \
48-
PyThread_set_key_value((key), nullptr)
49-
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
50-
PyThread_set_key_value((key), (value))
95+
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_set_key_value((key), nullptr)
96+
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_set_key_value((key), (value))
5197
# endif
52-
# define PYBIND11_TLS_FREE(key) (void)key
98+
# define PYBIND11_TLS_FREE(key) (void) key
5399
#endif
54100

55101
// Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly
@@ -107,22 +153,31 @@ struct internals {
107153
std::unordered_map<const PyObject *, std::vector<PyObject *>> patients;
108154
std::forward_list<ExceptionTranslator> registered_exception_translators;
109155
std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across extensions
156+
#if PYBIND11_INTERNALS_VERSION == 4
110157
std::vector<PyObject *> unused_loader_patient_stack_remove_at_v5;
158+
#endif
111159
std::forward_list<std::string> static_strings; // Stores the std::strings backing detail::c_str()
112160
PyTypeObject *static_property_type;
113161
PyTypeObject *default_metaclass;
114162
PyObject *instance_base;
115163
#if defined(WITH_THREAD)
116-
PYBIND11_TLS_KEY_INIT(tstate);
164+
PYBIND11_TLS_KEY_INIT(tstate)
165+
# if PYBIND11_INTERNALS_VERSION > 4
166+
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
167+
# endif // PYBIND11_INTERNALS_VERSION > 4
117168
PyInterpreterState *istate = nullptr;
118169
~internals() {
170+
# if PYBIND11_INTERNALS_VERSION > 4
171+
PYBIND11_TLS_FREE(loader_life_support_tls_key);
172+
# endif // PYBIND11_INTERNALS_VERSION > 4
173+
119174
// This destructor is called *after* Py_Finalize() in finalize_interpreter().
120-
// That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is called.
121-
// PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing.
122-
// PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
123-
// PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither
124-
// of those have anything to do with CPython internals.
125-
// PyMem_RawFree *requires* that the `tstate` be allocated with the CPython allocator.
175+
// That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is
176+
// called. PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does
177+
// nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
178+
// PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX).
179+
// Neither of those have anything to do with CPython internals. PyMem_RawFree *requires*
180+
// that the `tstate` be allocated with the CPython allocator.
126181
PYBIND11_TLS_FREE(tstate);
127182
}
128183
#endif
@@ -154,15 +209,6 @@ struct type_info {
154209
bool module_local : 1;
155210
};
156211

157-
/// Tracks the `internals` and `type_info` ABI version independent of the main library version
158-
#ifndef PYBIND11_USE_SMART_HOLDER_AS_DEFAULT
159-
#define PYBIND11_INTERNALS_VERSION 4
160-
#else
161-
// See README_smart_holder.rst:
162-
// Classic / Conservative / Progressive cross-module compatibility
163-
#define PYBIND11_INTERNALS_VERSION 1004
164-
#endif
165-
166212
/// On MSVC, debug and release builds are not ABI-compatible!
167213
#if defined(_MSC_VER) && defined(_DEBUG)
168214
# define PYBIND11_BUILD_TYPE "_debug"
@@ -221,11 +267,19 @@ struct type_info {
221267
# endif
222268
#endif
223269

270+
#ifndef PYBIND11_INTERNALS_SH_DEF
271+
# if defined(PYBIND11_USE_SMART_HOLDER_AS_DEFAULT)
272+
# define PYBIND11_INTERNALS_SH_DEF ""
273+
# else
274+
# define PYBIND11_INTERNALS_SH_DEF "_sh_def"
275+
# endif
276+
#endif
277+
224278
#define PYBIND11_INTERNALS_ID "__pybind11_internals_v" \
225-
PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__"
279+
PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE PYBIND11_INTERNALS_SH_DEF "__"
226280

227281
#define PYBIND11_MODULE_LOCAL_ID "__pybind11_module_local_v" \
228-
PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__"
282+
PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE PYBIND11_INTERNALS_SH_DEF "__"
229283

230284
/// Each module locally stores a pointer to the `internals` data. The data
231285
/// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`.
@@ -298,21 +352,21 @@ PYBIND11_NOINLINE internals &get_internals() {
298352
internals_ptr = new internals();
299353
#if defined(WITH_THREAD)
300354

301-
#if PY_VERSION_HEX < 0x03090000
302-
PyEval_InitThreads();
303-
#endif
355+
# if PY_VERSION_HEX < 0x03090000
356+
PyEval_InitThreads();
357+
# endif
304358
PyThreadState *tstate = PyThreadState_Get();
305-
#if PY_VERSION_HEX >= 0x03070000
306-
internals_ptr->tstate = PyThread_tss_alloc();
307-
if (!internals_ptr->tstate || (PyThread_tss_create(internals_ptr->tstate) != 0))
308-
pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!");
309-
PyThread_tss_set(internals_ptr->tstate, tstate);
310-
#else
311-
internals_ptr->tstate = PyThread_create_key();
312-
if (internals_ptr->tstate == -1)
313-
pybind11_fail("get_internals: could not successfully initialize the tstate TLS key!");
314-
PyThread_set_key_value(internals_ptr->tstate, tstate);
315-
#endif
359+
if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->tstate)) {
360+
pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!");
361+
}
362+
PYBIND11_TLS_REPLACE_VALUE(internals_ptr->tstate, tstate);
363+
364+
# if PYBIND11_INTERNALS_VERSION > 4
365+
if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->loader_life_support_tls_key)) {
366+
pybind11_fail("get_internals: could not successfully initialize the "
367+
"loader_life_support TSS key!");
368+
}
369+
# endif
316370
internals_ptr->istate = tstate->interp;
317371
#endif
318372
builtins[id] = capsule(internals_pp);
@@ -324,16 +378,48 @@ PYBIND11_NOINLINE internals &get_internals() {
324378
return **internals_pp;
325379
}
326380

327-
328381
// the internals struct (above) is shared between all the modules. local_internals are only
329382
// for a single module. Any changes made to internals may require an update to
330383
// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
331384
// restricted to a single module. Whether a module has local internals or not should not
332385
// impact any other modules, because the only things accessing the local internals is the
333386
// module that contains them.
334387
struct local_internals {
335-
type_map<type_info *> registered_types_cpp;
336-
std::forward_list<ExceptionTranslator> registered_exception_translators;
388+
type_map<type_info *> registered_types_cpp;
389+
std::forward_list<ExceptionTranslator> registered_exception_translators;
390+
#if defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4
391+
392+
// For ABI compatibility, we can't store the loader_life_support TLS key in
393+
// the `internals` struct directly. Instead, we store it in `shared_data` and
394+
// cache a copy in `local_internals`. If we allocated a separate TLS key for
395+
// each instance of `local_internals`, we could end up allocating hundreds of
396+
// TLS keys if hundreds of different pybind11 modules are loaded (which is a
397+
// plausible number).
398+
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
399+
400+
// Holds the shared TLS key for the loader_life_support stack.
401+
struct shared_loader_life_support_data {
402+
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
403+
shared_loader_life_support_data() {
404+
if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) {
405+
pybind11_fail("local_internals: could not successfully initialize the "
406+
"loader_life_support TLS key!");
407+
}
408+
}
409+
// We can't help but leak the TLS key, because Python never unloads extension modules.
410+
};
411+
412+
local_internals() {
413+
auto &internals = get_internals();
414+
// Get or create the `loader_life_support_stack_key`.
415+
auto &ptr = internals.shared_data["_life_support"];
416+
if (!ptr) {
417+
ptr = new shared_loader_life_support_data;
418+
}
419+
loader_life_support_tls_key
420+
= static_cast<shared_loader_life_support_data *>(ptr)->loader_life_support_tls_key;
421+
}
422+
#endif // defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4
337423
};
338424

339425
/// Works like `get_internals`, but for things which are locally registered.

include/pybind11/detail/type_caster_base.h

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,38 +35,51 @@ class loader_life_support {
3535
loader_life_support* parent = nullptr;
3636
std::unordered_set<PyObject *> keep_alive;
3737

38-
static loader_life_support** get_stack_pp() {
3938
#if defined(WITH_THREAD)
40-
thread_local static loader_life_support* per_thread_stack = nullptr;
41-
return &per_thread_stack;
39+
// Store stack pointer in thread-local storage.
40+
static PYBIND11_TLS_KEY_REF get_stack_tls_key() {
41+
# if PYBIND11_INTERNALS_VERSION == 4
42+
return get_local_internals().loader_life_support_tls_key;
43+
# else
44+
return get_internals().loader_life_support_tls_key;
45+
# endif
46+
}
47+
static loader_life_support *get_stack_top() {
48+
return static_cast<loader_life_support *>(PYBIND11_TLS_GET_VALUE(get_stack_tls_key()));
49+
}
50+
static void set_stack_top(loader_life_support *value) {
51+
PYBIND11_TLS_REPLACE_VALUE(get_stack_tls_key(), value);
52+
}
4253
#else
43-
static loader_life_support* global_stack = nullptr;
44-
return &global_stack;
45-
#endif
54+
// Use single global variable for stack.
55+
static loader_life_support **get_stack_pp() {
56+
static loader_life_support *global_stack = nullptr;
57+
return global_stack;
4658
}
59+
static loader_life_support *get_stack_top() { return *get_stack_pp(); }
60+
static void set_stack_top(loader_life_support *value) { *get_stack_pp() = value; }
61+
#endif
4762

4863
public:
4964
/// A new patient frame is created when a function is entered
5065
loader_life_support() {
51-
loader_life_support** stack = get_stack_pp();
52-
parent = *stack;
53-
*stack = this;
66+
parent = get_stack_top();
67+
set_stack_top(this);
5468
}
5569

5670
/// ... and destroyed after it returns
5771
~loader_life_support() {
58-
loader_life_support** stack = get_stack_pp();
59-
if (*stack != this)
72+
if (get_stack_top() != this)
6073
pybind11_fail("loader_life_support: internal error");
61-
*stack = parent;
74+
set_stack_top(parent);
6275
for (auto* item : keep_alive)
6376
Py_DECREF(item);
6477
}
6578

6679
/// This can only be used inside a pybind11-bound function, either by `argument_loader`
6780
/// at argument preparation time or by `py::cast()` at execution time.
6881
PYBIND11_NOINLINE static void add_patient(handle h) {
69-
loader_life_support* frame = *get_stack_pp();
82+
loader_life_support *frame = get_stack_top();
7083
if (!frame) {
7184
// NOTE: It would be nice to include the stack frames here, as this indicates
7285
// use of pybind11::cast<> outside the normal call framework, finding such

include/pybind11/gil.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ PYBIND11_NAMESPACE_END(detail)
5050
class gil_scoped_acquire {
5151
public:
5252
PYBIND11_NOINLINE gil_scoped_acquire() {
53-
auto const &internals = detail::get_internals();
53+
auto &internals = detail::get_internals();
5454
tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate);
5555

5656
if (!tstate) {
@@ -132,7 +132,7 @@ class gil_scoped_release {
132132
// `get_internals()` must be called here unconditionally in order to initialize
133133
// `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
134134
// initialization race could occur as multiple threads try `gil_scoped_acquire`.
135-
const auto &internals = detail::get_internals();
135+
auto &internals = detail::get_internals();
136136
tstate = PyEval_SaveThread();
137137
if (disassoc) {
138138
auto key = internals.tstate;

0 commit comments

Comments
 (0)