-
-
Notifications
You must be signed in to change notification settings - Fork 32.3k
gh-117953: Always Run Extension Init Func in Main Interpreter First #118157
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
a1a3b4c
9d630f1
2d7ee11
f9e132f
b09c445
8ca8033
122761c
bb3402a
164a9eb
0c74b25
7b9a29d
7965d57
2abce96
0b16f3f
f7d0ff9
f3b015f
048342f
6d5311e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
When a builtin or extension module is imported for the first time, while a | ||
subinterpreter is active, the module's init function is now run by the main | ||
interpreter first before import continues in the subinterpreter. | ||
Consequently, single-phase init modules now fail in an isolated | ||
subinterpreter without the init function running under that interpreter, | ||
whereas before it would run under the subinterpreter *before* failing, | ||
potentially leaving behind global state and callbacks and otherwise leaving | ||
the module in an inconsistent state. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1526,6 +1526,26 @@ _PyImport_CheckSubinterpIncompatibleExtensionAllowed(const char *name) | |
return 0; | ||
} | ||
|
||
static PyThreadState * | ||
switch_to_main_interpreter(PyThreadState *tstate) | ||
{ | ||
if (_Py_IsMainInterpreter(tstate->interp)) { | ||
return tstate; | ||
} | ||
PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main()); | ||
if (main_tstate == NULL) { | ||
return NULL; | ||
} | ||
main_tstate->_whence = _PyThreadState_WHENCE_EXEC; | ||
#ifndef NDEBUG | ||
PyThreadState *old_tstate = PyThreadState_Swap(main_tstate); | ||
assert(old_tstate == tstate); | ||
#else | ||
(void)PyThreadState_Swap(main_tstate); | ||
#endif | ||
return main_tstate; | ||
} | ||
|
||
static PyObject * | ||
get_core_module_dict(PyInterpreterState *interp, | ||
PyObject *name, PyObject *path) | ||
|
@@ -1874,38 +1894,82 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, | |
struct extensions_cache_value *cached = NULL; | ||
const char *name_buf = PyBytes_AS_STRING(info->name_encoded); | ||
|
||
/* We cannot know if the module is single-phase init or | ||
* multi-phase init until after we call its init function. Even | ||
* in isolated interpreters (that do not support single-phase init), | ||
* the init function will run without restriction. For multi-phase | ||
* init modules that isn't a problem because the init function only | ||
* runs PyModuleDef_Init() on the module's def and then returns it. | ||
* | ||
* However, for single-phase init the module's init function will | ||
* create the module, create other objects (and allocate other | ||
* memory), populate it and its module state, and initialze static | ||
* types. Some modules store other objects and data in global C | ||
* variables and register callbacks with the runtime/stdlib or | ||
* even external libraries (which is part of why we can't just | ||
* dlclose() the module in the error case). That's a problem | ||
* for isolated interpreters since all of the above happens | ||
* and only then * will the import fail. Memory will leak, | ||
* callbacks will still get used, and sometimes there | ||
* will be crashes (memory access violations | ||
* and use-after-free). | ||
* | ||
* To put it another way, if the module is single-phase init | ||
* then the import will probably break interpreter isolation | ||
* and should fail ASAP. However, the module's init function | ||
* will still get run. That means it may still store state | ||
* in the shared-object/DLL address space (which never gets | ||
* closed/cleared), including objects (e.g. static types). | ||
* This is a problem for isolated subinterpreters since each | ||
* has its own object allocator. If the loaded shared-object | ||
* still holds a reference to an object after the corresponding | ||
* interpreter has finalized then either we must let it leak | ||
* or else any later use of that object by another interpreter | ||
* (or across multiple init-fini cycles) will crash the process. | ||
* | ||
* To avoid all of that, we make sure the module's init function | ||
* is always run first with the main interpreter active. If it was | ||
* already the main interpreter then we can continue loading the | ||
* module like normal. Otherwise, right after the init function, | ||
* we take care of some import state bookkeeping, switch back | ||
* to the subinterpreter, check for single-phase init, | ||
* and then continue loading like normal. */ | ||
|
||
PyThreadState *main_tstate = NULL; | ||
if (!_Py_IsMainInterpreter(tstate->interp)) { | ||
/* We *could* leave in place a legacy interpreter here | ||
* (one that shares obmalloc/GIL with main interp), | ||
* but there isn't a big advantage, we anticipate | ||
* such interpreters will be increasingly uncommon, | ||
* and the code is a bit simpler if we always switch | ||
* to the main interpreter. */ | ||
main_tstate = switch_to_main_interpreter(tstate); | ||
if (main_tstate == NULL) { | ||
return NULL; | ||
} | ||
assert(main_tstate != tstate); | ||
// XXX Get import lock. | ||
} | ||
|
||
struct _Py_ext_module_loader_result res; | ||
if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { | ||
int rc = _PyImport_RunModInitFunc(p0, info, &res); | ||
if (rc < 0) { | ||
/* We discard res.def. */ | ||
assert(res.module == NULL); | ||
_Py_ext_module_loader_result_apply_error(&res, name_buf); | ||
return NULL; | ||
} | ||
assert(!PyErr_Occurred()); | ||
assert(res.err == NULL); | ||
|
||
mod = res.module; | ||
res.module = NULL; | ||
def = res.def; | ||
assert(def != NULL); | ||
|
||
if (res.kind == _Py_ext_module_kind_MULTIPHASE) { | ||
assert_multiphase_def(def); | ||
assert(mod == NULL); | ||
mod = PyModule_FromDefAndSpec(def, spec); | ||
if (mod == NULL) { | ||
goto error; | ||
} | ||
} | ||
else { | ||
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); | ||
assert_singlephase_def(def); | ||
assert(PyModule_Check(mod)); | ||
assert(!PyErr_Occurred()); | ||
assert(res.err == NULL); | ||
|
||
if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { | ||
goto error; | ||
} | ||
mod = res.module; | ||
res.module = NULL; | ||
def = res.def; | ||
assert(def != NULL); | ||
} | ||
|
||
/* Do anything else that should be done | ||
* while still using the main interpreter. */ | ||
if (res.kind == _Py_ext_module_kind_SINGLEPHASE) { | ||
/* Remember the filename as the __file__ attribute */ | ||
if (info->filename != NULL) { | ||
if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { | ||
|
@@ -1941,15 +2005,73 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, | |
if (cached == NULL) { | ||
goto error; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we do this without switching back? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I noticed that too and have already fixed it locally. 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
} | ||
} | ||
|
||
/* Switch back to the subinterpreter. */ | ||
if (main_tstate != NULL) { | ||
assert(main_tstate != tstate); | ||
|
||
/* Any module we got from the init function will have to be | ||
* reloaded in the subinterpreter. */ | ||
Py_CLEAR(mod); | ||
|
||
(void)PyThreadState_Swap(tstate); | ||
} | ||
|
||
/*****************************************************************/ | ||
/* At this point we are back to the interpreter we started with. */ | ||
/*****************************************************************/ | ||
|
||
/* Finally we handle the error return from _PyImport_RunModInitFunc(). */ | ||
if (rc < 0) { | ||
_Py_ext_module_loader_result_apply_error(&res, name_buf); | ||
goto error; | ||
} | ||
|
||
/* Update per-interpreter import state. */ | ||
PyObject *modules = get_modules_dict(tstate, true); | ||
if (finish_singlephase_extension( | ||
tstate, mod, cached, info->name, modules) < 0) | ||
{ | ||
if (res.kind == _Py_ext_module_kind_MULTIPHASE) { | ||
assert_multiphase_def(def); | ||
assert(mod == NULL); | ||
/* Note that we cheat a little by not repeating the calls | ||
* to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */ | ||
mod = PyModule_FromDefAndSpec(def, spec); | ||
if (mod == NULL) { | ||
goto error; | ||
} | ||
} | ||
else { | ||
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); | ||
assert_singlephase_def(def); | ||
|
||
if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { | ||
goto error; | ||
} | ||
assert(!PyErr_Occurred()); | ||
|
||
if (main_tstate != NULL) { | ||
/* We switched to the main interpreter to run the init | ||
* function, so now we will "reload" the module from the | ||
* cached data using the original subinterpreter. */ | ||
assert(mod == NULL); | ||
mod = reload_singlephase_extension(tstate, cached, info); | ||
if (mod == NULL) { | ||
goto error; | ||
} | ||
assert(!PyErr_Occurred()); | ||
assert(PyModule_Check(mod)); | ||
} | ||
else { | ||
assert(mod != NULL); | ||
assert(PyModule_Check(mod)); | ||
|
||
/* Update per-interpreter import state. */ | ||
PyObject *modules = get_modules_dict(tstate, true); | ||
if (finish_singlephase_extension( | ||
tstate, mod, cached, info->name, modules) < 0) | ||
{ | ||
goto error; | ||
} | ||
} | ||
} | ||
|
||
_Py_ext_module_loader_result_clear(&res); | ||
return mod; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this necessary if
switch_to_main_interpreter
is doing the same check?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It isn't necessary, but I wanted to be explicit about it. I'll see about cleaning that up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed