diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index eb8a9a0db46c22..b02769903a6f9b 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -22,13 +22,13 @@ extern int _PyImport_SetModuleString(const char *name, PyObject* module); extern void _PyImport_AcquireLock(PyInterpreterState *interp); extern int _PyImport_ReleaseLock(PyInterpreterState *interp); +// This is used exclusively for the sys and builtins modules: extern int _PyImport_FixupBuiltin( + PyThreadState *tstate, PyObject *mod, const char *name, /* UTF-8 encoded string */ PyObject *modules ); -extern int _PyImport_FixupExtensionObject(PyObject*, PyObject *, - PyObject *, PyObject *); // Export for many shared extensions, like '_json' PyAPI_FUNC(PyObject*) _PyImport_GetModuleAttr(PyObject *, PyObject *); @@ -52,7 +52,7 @@ struct _import_runtime_state { Only legacy (single-phase init) extension modules are added and only if they support multiple initialization (m_size >- 0) or are imported in the main interpreter. - This is initialized lazily in _PyImport_FixupExtensionObject(). + This is initialized lazily in fix_up_extension() in import.c. Modules are added there and looked up in _imp.find_extension(). */ _Py_hashtable_t *hashtable; } extensions; diff --git a/Include/internal/pycore_importdl.h b/Include/internal/pycore_importdl.h index c8583582b358ac..972b1e27eabd0b 100644 --- a/Include/internal/pycore_importdl.h +++ b/Include/internal/pycore_importdl.h @@ -14,10 +14,51 @@ extern "C" { extern const char *_PyImport_DynLoadFiletab[]; -extern PyObject *_PyImport_LoadDynamicModuleWithSpec(PyObject *spec, FILE *); - typedef PyObject *(*PyModInitFunction)(void); +struct _Py_ext_module_loader_info { + PyObject *filename; +#ifndef MS_WINDOWS + PyObject *filename_encoded; +#endif + PyObject *name; + PyObject *name_encoded; + /* path is always a borrowed ref of name or filename, + * depending on if it's builtin or not. */ + PyObject *path; + const char *hook_prefix; + const char *newcontext; +}; +extern void _Py_ext_module_loader_info_clear( + struct _Py_ext_module_loader_info *info); +extern int _Py_ext_module_loader_info_init_for_builtin( + struct _Py_ext_module_loader_info *p_info, + PyObject *name); +extern int _Py_ext_module_loader_info_init_from_spec( + struct _Py_ext_module_loader_info *info, + PyObject *spec); + +struct _Py_ext_module_loader_result { + PyModuleDef *def; + PyObject *module; + enum _Py_ext_module_loader_result_kind { + _Py_ext_module_loader_result_UNKNOWN = 0, + _Py_ext_module_loader_result_SINGLEPHASE = 1, + _Py_ext_module_loader_result_MULTIPHASE = 2, + _Py_ext_module_loader_result_INVALID = 3, + } kind; + char err[200]; +}; +extern void _Py_ext_module_loader_result_apply_error( + struct _Py_ext_module_loader_result *res); +extern PyModInitFunction _PyImport_GetModInitFunc( + struct _Py_ext_module_loader_info *info, + FILE *fp); +extern int _PyImport_RunModInitFunc( + PyModInitFunction p0, + struct _Py_ext_module_loader_info *info, + struct _Py_ext_module_loader_result *p_res); + /* Max length of module suffix searched for -- accommodates "module.slb" */ #define MAXSUFFIXSIZE 12 diff --git a/Include/moduleobject.h b/Include/moduleobject.h index 42b87cc4e91012..83f8c2030dbb8f 100644 --- a/Include/moduleobject.h +++ b/Include/moduleobject.h @@ -53,7 +53,7 @@ typedef struct PyModuleDef_Base { /* A copy of the module's __dict__ after the first time it was loaded. This is only set/used for legacy modules that do not support multiple initializations. - It is set by _PyImport_FixupExtensionObject(). */ + It is set by fix_up_extension() in import.c. */ PyObject* m_copy; } PyModuleDef_Base; diff --git a/Python/import.c b/Python/import.c index b040c7d5c0f7f5..74323c47bf192a 100644 --- a/Python/import.c +++ b/Python/import.c @@ -200,39 +200,54 @@ _PyImport_ClearModules(PyInterpreterState *interp) Py_SETREF(MODULES(interp), NULL); } +static inline PyObject * +get_modules_dict(PyThreadState *tstate, bool fatal) +{ + /* Technically, it would make sense to incref the dict, + * since sys.modules could be swapped out and decref'ed to 0 + * before the caller is done using it. However, that is highly + * unlikely, especially since we can rely on a global lock + * (i.e. the GIL) for thread-safety. */ + PyObject *modules = MODULES(tstate->interp); + if (modules == NULL) { + if (fatal) { + Py_FatalError("interpreter has no modules dictionary"); + } + _PyErr_SetString(tstate, PyExc_RuntimeError, + "unable to get sys.modules"); + return NULL; + } + return modules; +} + PyObject * PyImport_GetModuleDict(void) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (MODULES(interp) == NULL) { - Py_FatalError("interpreter has no modules dictionary"); - } - return MODULES(interp); + PyThreadState *tstate = _PyThreadState_GET(); + return get_modules_dict(tstate, true); } int _PyImport_SetModule(PyObject *name, PyObject *m) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *modules = MODULES(interp); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *modules = get_modules_dict(tstate, true); return PyObject_SetItem(modules, name, m); } int _PyImport_SetModuleString(const char *name, PyObject *m) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *modules = MODULES(interp); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *modules = get_modules_dict(tstate, true); return PyMapping_SetItemString(modules, name, m); } static PyObject * import_get_module(PyThreadState *tstate, PyObject *name) { - PyObject *modules = MODULES(tstate->interp); + PyObject *modules = get_modules_dict(tstate, false); if (modules == NULL) { - _PyErr_SetString(tstate, PyExc_RuntimeError, - "unable to get sys.modules"); return NULL; } @@ -297,10 +312,8 @@ PyImport_GetModule(PyObject *name) static PyObject * import_add_module(PyThreadState *tstate, PyObject *name) { - PyObject *modules = MODULES(tstate->interp); + PyObject *modules = get_modules_dict(tstate, false); if (modules == NULL) { - _PyErr_SetString(tstate, PyExc_RuntimeError, - "no import module dictionary"); return NULL; } @@ -397,7 +410,7 @@ remove_module(PyThreadState *tstate, PyObject *name) { PyObject *exc = _PyErr_GetRaisedException(tstate); - PyObject *modules = MODULES(tstate->interp); + PyObject *modules = get_modules_dict(tstate, true); if (PyDict_CheckExact(modules)) { // Error is reported to the caller (void)PyDict_Pop(modules, name, NULL); @@ -619,9 +632,9 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) (6). first time (not found in _PyRuntime.imports.extensions): 1. _imp_create_dynamic_impl() -> import_find_extension() - 2. _imp_create_dynamic_impl() -> _PyImport_LoadDynamicModuleWithSpec() - 3. _PyImport_LoadDynamicModuleWithSpec(): load - 4. _PyImport_LoadDynamicModuleWithSpec(): call + 2. _imp_create_dynamic_impl() -> _PyImport_RunDynamicModule() + 3. _PyImport_RunDynamicModule(): load + 4. _PyImport_RunDynamicModule(): call 5. -> PyModule_Create() -> PyModule_Create2() -> PyModule_CreateInitialized() 6. PyModule_CreateInitialized() -> PyModule_New() 7. PyModule_CreateInitialized(): allocate mod->md_state @@ -629,13 +642,15 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) 9. PyModule_CreateInitialized() -> PyModule_SetDocString() 10. PyModule_CreateInitialized(): set mod->md_def 11. : initialize the module - 12. _PyImport_LoadDynamicModuleWithSpec() -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() - 13. _PyImport_LoadDynamicModuleWithSpec(): set def->m_base.m_init - 14. _PyImport_LoadDynamicModuleWithSpec(): set __file__ - 15. _PyImport_LoadDynamicModuleWithSpec() -> _PyImport_FixupExtensionObject() - 16. _PyImport_FixupExtensionObject(): add it to interp->imports.modules_by_index - 17. _PyImport_FixupExtensionObject(): copy __dict__ into def->m_base.m_copy - 18. _PyImport_FixupExtensionObject(): add it to _PyRuntime.imports.extensions + 12. _PyImport_RunDynamicModule(): set def->m_base.m_init + 13. _PyImport_RunDynamicModule(): set __file__ + 14. _imp_create_dynamic_impl() -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + 15. _imp_create_dynamic_impl() -> fix_up_extension() + 16. fix_up_extension(): add it to _PyRuntime.imports.extensions + 17. fix_up_extension() -> fix_up_extension_for_interpreter() + 18. fix_up_extension_for_interpreter(): set it on sys.modules + 19. fix_up_extension_for_interpreter(): add it to interp->imports.modules_by_index + 20. fix_up_extension_for_interpreter(): copy __dict__ into def->m_base.m_copy (6). subsequent times (found in _PyRuntime.imports.extensions): 1. _imp_create_dynamic_impl() -> import_find_extension() @@ -654,11 +669,12 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) ...for single-phase init modules, where m_size >= 0: (6). not main interpreter and never loaded there - every time (not found in _PyRuntime.imports.extensions): - 1-16. (same as for m_size == -1) + 1-20. (same as for m_size == -1) (6). main interpreter - first time (not found in _PyRuntime.imports.extensions): - 1-16. (same as for m_size == -1) - 17. _PyImport_FixupExtensionObject(): add it to _PyRuntime.imports.extensions + 1-15. (same as for m_size == -1) + 16. fix_up_extension(): add it to _PyRuntime.imports.extensions + 17-20. (same as for m_size == -1) (6). previously loaded in main interpreter (found in _PyRuntime.imports.extensions): 1. _imp_create_dynamic_impl() -> import_find_extension() @@ -673,18 +689,18 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) (6). every time: 1. _imp_create_dynamic_impl() -> import_find_extension() (not found) - 2. _imp_create_dynamic_impl() -> _PyImport_LoadDynamicModuleWithSpec() - 3. _PyImport_LoadDynamicModuleWithSpec(): load module init func - 4. _PyImport_LoadDynamicModuleWithSpec(): call module init func - 5. _PyImport_LoadDynamicModuleWithSpec() -> PyModule_FromDefAndSpec() - 6. PyModule_FromDefAndSpec(): gather/check moduledef slots - 7. if there's a Py_mod_create slot: - 1. PyModule_FromDefAndSpec(): call its function - 8. else: - 1. PyModule_FromDefAndSpec() -> PyModule_NewObject() - 9: PyModule_FromDefAndSpec(): set mod->md_def - 10. PyModule_FromDefAndSpec() -> _add_methods_to_object() - 11. PyModule_FromDefAndSpec() -> PyModule_SetDocString() + 2. _imp_create_dynamic_impl() -> _PyImport_RunDynamicModule() + 3. _PyImport_RunDynamicModule(): load module init func + 4. _PyImport_RunDynamicModule(): call module init func + 5. _imp_create_dynamic_impl() -> PyModule_FromDefAndSpec() + 6. PyModule_FromDefAndSpec(): gather/check moduledef slots + 7. if there's a Py_mod_create slot: + 1. PyModule_FromDefAndSpec(): call its function + 8. else: + 1. PyModule_FromDefAndSpec() -> PyModule_NewObject() + 9: PyModule_FromDefAndSpec(): set mod->md_def + 10. PyModule_FromDefAndSpec() -> _add_methods_to_object() + 11. PyModule_FromDefAndSpec() -> PyModule_SetDocString() (10). every time: 1. _imp_exec_dynamic_impl() -> exec_builtin_or_dynamic() @@ -894,7 +910,7 @@ extensions_lock_release(void) (module name, module name) (for built-in modules) or by (filename, module name) (for dynamically loaded modules), containing these modules. A copy of the module's dictionary is stored by calling - _PyImport_FixupExtensionObject() immediately after the module initialization + fix_up_extension() immediately after the module initialization function succeeds. A copy can be retrieved from there by calling import_find_extension(). @@ -950,33 +966,63 @@ hashtable_destroy_str(void *ptr) #define HTSEP ':' +static int +_extensions_cache_init(void) +{ + _Py_hashtable_allocator_t alloc = {PyMem_RawMalloc, PyMem_RawFree}; + EXTENSIONS.hashtable = _Py_hashtable_new_full( + hashtable_hash_str, + hashtable_compare_str, + hashtable_destroy_str, // key + /* There's no need to decref the def since it's immortal. */ + NULL, // value + &alloc + ); + if (EXTENSIONS.hashtable == NULL) { + PyErr_NoMemory(); + return -1; + } + return 0; +} + +static _Py_hashtable_entry_t * +_extensions_cache_find_unlocked(PyObject *filename, PyObject *name, + void **p_key) +{ + if (EXTENSIONS.hashtable == NULL) { + return NULL; + } + void *key = hashtable_key_from_2_strings(filename, name, HTSEP); + if (key == NULL) { + return NULL; + } + _Py_hashtable_entry_t *entry = + _Py_hashtable_get_entry(EXTENSIONS.hashtable, key); + if (p_key != NULL) { + *p_key = key; + } + else { + hashtable_destroy_str(key); + } + return entry; +} + static PyModuleDef * _extensions_cache_get(PyObject *filename, PyObject *name) { PyModuleDef *def = NULL; - void *key = NULL; extensions_lock_acquire(); - if (EXTENSIONS.hashtable == NULL) { - goto finally; - } - - key = hashtable_key_from_2_strings(filename, name, HTSEP); - if (key == NULL) { - goto finally; - } - _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( - EXTENSIONS.hashtable, key); + _Py_hashtable_entry_t *entry = + _extensions_cache_find_unlocked(filename, name, NULL); if (entry == NULL) { + /* It was never added. */ goto finally; } def = (PyModuleDef *)entry->value; finally: extensions_lock_release(); - if (key != NULL) { - PyMem_RawFree(key); - } return def; } @@ -984,50 +1030,39 @@ static int _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) { int res = -1; + assert(def != NULL); extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { - _Py_hashtable_allocator_t alloc = {PyMem_RawMalloc, PyMem_RawFree}; - EXTENSIONS.hashtable = _Py_hashtable_new_full( - hashtable_hash_str, - hashtable_compare_str, - hashtable_destroy_str, // key - /* There's no need to decref the def since it's immortal. */ - NULL, // value - &alloc - ); - if (EXTENSIONS.hashtable == NULL) { - PyErr_NoMemory(); + if (_extensions_cache_init() < 0) { goto finally; } } - void *key = hashtable_key_from_2_strings(filename, name, HTSEP); - if (key == NULL) { - goto finally; - } - int already_set = 0; - _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( - EXTENSIONS.hashtable, key); + void *key = NULL; + _Py_hashtable_entry_t *entry = + _extensions_cache_find_unlocked(filename, name, &key); if (entry == NULL) { + /* It was never added. */ if (_Py_hashtable_set(EXTENSIONS.hashtable, key, def) < 0) { - PyMem_RawFree(key); PyErr_NoMemory(); goto finally; } + /* The hashtable owns the key now. */ + key = NULL; + } + else if (entry->value == NULL) { + /* It was previously deleted. */ + entry->value = def; } else { - if (entry->value == NULL) { - entry->value = def; - } - else { - /* We expect it to be static, so it must be the same pointer. */ - assert((PyModuleDef *)entry->value == def); - already_set = 1; - } - PyMem_RawFree(key); + /* We expect it to be static, so it must be the same pointer. */ + assert((PyModuleDef *)entry->value == def); + /* It was already added. */ + already_set = 1; } + if (!already_set) { /* We assume that all module defs are statically allocated and will never be freed. Otherwise, we would incref here. */ @@ -1037,13 +1072,15 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) finally: extensions_lock_release(); + if (key != NULL) { + hashtable_destroy_str(key); + } return res; } static void _extensions_cache_delete(PyObject *filename, PyObject *name) { - void *key = NULL; extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1051,13 +1088,8 @@ _extensions_cache_delete(PyObject *filename, PyObject *name) goto finally; } - key = hashtable_key_from_2_strings(filename, name, HTSEP); - if (key == NULL) { - goto finally; - } - - _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( - EXTENSIONS.hashtable, key); + _Py_hashtable_entry_t *entry = + _extensions_cache_find_unlocked(filename, name, NULL); if (entry == NULL) { /* It was never added. */ goto finally; @@ -1075,9 +1107,6 @@ _extensions_cache_delete(PyObject *filename, PyObject *name) finally: extensions_lock_release(); - if (key != NULL) { - PyMem_RawFree(key); - } } static void @@ -1158,43 +1187,74 @@ is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *filename) return 0; } -static int -fix_up_extension(PyObject *mod, PyObject *name, PyObject *filename) + +static enum _Py_ext_module_loader_result_kind +get_extension_kind(PyModuleDef *def) { - if (mod == NULL || !PyModule_Check(mod)) { - PyErr_BadInternalCall(); - return -1; + enum _Py_ext_module_loader_result_kind kind; + if (def == NULL) { + /* It must be a module created by reload_singlephase_extension() + * from m_copy. Ideally we'd do away with this case. */ + kind = _Py_ext_module_loader_result_SINGLEPHASE; } - - struct PyModuleDef *def = PyModule_GetDef(mod); - if (!def) { - PyErr_BadInternalCall(); - return -1; + else if (def->m_slots == NULL) { + kind = _Py_ext_module_loader_result_SINGLEPHASE; } - - PyThreadState *tstate = _PyThreadState_GET(); - if (_modules_by_index_set(tstate->interp, def, mod) < 0) { - return -1; + else { + kind = _Py_ext_module_loader_result_MULTIPHASE; } + return kind; +} - // bpo-44050: Extensions and def->m_base.m_copy can be updated - // when the extension module doesn't support sub-interpreters. - if (def->m_size == -1) { - if (!is_core_module(tstate->interp, name, filename)) { + +struct cached_singlephase_info { + PyObject *m_dict; +}; + +static int +update_extensions_cache(PyThreadState *tstate, + PyObject *path, PyObject *name, PyModuleDef *def, + struct cached_singlephase_info *singlephase) +{ + /* Copy the module's __dict__, if applicable. */ + if (singlephase == NULL) { + assert(def->m_base.m_copy == NULL); + } + else { + assert(def->m_base.m_init != NULL + || is_core_module(tstate->interp, name, path)); + if (singlephase->m_dict != NULL) { + assert(PyDict_Check(singlephase->m_dict)); + // gh-88216: Extensions and def->m_base.m_copy can be updated + // when the extension module doesn't support sub-interpreters. + assert(def->m_size == -1); + assert(!is_core_module(tstate->interp, name, path)); assert(PyUnicode_CompareWithASCIIString(name, "sys") != 0); assert(PyUnicode_CompareWithASCIIString(name, "builtins") != 0); + /* XXX gh-88216: The copied dict is owned by the current + * interpreter. That's a problem if the interpreter has + * its own obmalloc state or if the module is successfully + * imported into such an interpreter. If the interpreter + * has its own GIL then there may be data races and + * PyImport_ClearModulesByIndex() can crash. Normally, + * a single-phase init module cannot be imported in an + * isolated interpreter, but there are ways around that. + * Hence, heere be dragons! Ideally we would instead do + * something like make a read-only, immortal copy of the + * dict using PyMem_RawMalloc() and store *that* in m_copy. + * Then we'd need to make sure to clear that when the + * runtime is finalized, rather than in + * PyImport_ClearModulesByIndex(). */ if (def->m_base.m_copy) { /* Somebody already imported the module, likely under a different name. XXX this should really not happen. */ Py_CLEAR(def->m_base.m_copy); } - PyObject *dict = PyModule_GetDict(mod); - if (dict == NULL) { - return -1; - } - def->m_base.m_copy = PyDict_Copy(dict); + def->m_base.m_copy = PyDict_Copy(singlephase->m_dict); if (def->m_base.m_copy == NULL) { + // XXX Ignore this error? Doing so would effectively + // mark the module as not loadable. */ return -1; } } @@ -1202,7 +1262,11 @@ fix_up_extension(PyObject *mod, PyObject *name, PyObject *filename) // XXX Why special-case the main interpreter? if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) { - if (_extensions_cache_set(filename, name, def) < 0) { +#ifndef NDEBUG + PyModuleDef *cached = _extensions_cache_get(path, name); + assert(cached == NULL || cached == def); +#endif + if (_extensions_cache_set(path, name, def) < 0) { return -1; } } @@ -1210,42 +1274,66 @@ fix_up_extension(PyObject *mod, PyObject *name, PyObject *filename) return 0; } -int -_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, - PyObject *filename, PyObject *modules) +static int +fix_up_extension_for_interpreter(PyThreadState *tstate, + PyObject *mod, PyModuleDef *def, + PyObject *name, PyObject *modules) { - if (PyObject_SetItem(modules, name, mod) < 0) { + assert(mod != NULL && PyModule_Check(mod)); + assert(def == _PyModule_GetDef(mod)); + + if (_modules_by_index_set(tstate->interp, def, mod) < 0) { return -1; } - if (fix_up_extension(mod, name, filename) < 0) { - PyMapping_DelItem(modules, name); - return -1; + + if (modules != NULL) { + if (PyObject_SetItem(modules, name, mod) < 0) { + return -1; + } } + return 0; } +struct interpreter_specific_info { + PyObject *modules; + PyObject *name; + PyModuleDef *def; +}; -static PyObject * -import_find_extension(PyThreadState *tstate, PyObject *name, - PyObject *filename) +static int +fix_up_extension(PyThreadState *tstate, PyObject *mod, PyObject *filename, + struct interpreter_specific_info *fix_interp) { - /* Only single-phase init modules will be in the cache. */ - PyModuleDef *def = _extensions_cache_get(filename, name); - if (def == NULL) { - return NULL; + assert(mod != NULL && PyModule_Check(mod)); + + if (filename != NULL) { + /* Remember the filename as the __file__ attribute */ + if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) { + PyErr_Clear(); /* Not important enough to report */ + } } - /* It may have been successfully imported previously - in an interpreter that allows legacy modules - but is not allowed in the current interpreter. */ - const char *name_buf = PyUnicode_AsUTF8(name); - assert(name_buf != NULL); - if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { - return NULL; + /* Make interpreter-specific fixes. */ + if (fix_interp != NULL) { + if (fix_up_extension_for_interpreter( + tstate, mod, fix_interp->def, fix_interp->name, + fix_interp->modules) < 0) + { + return -1; + } } - PyObject *mod, *mdict; - PyObject *modules = MODULES(tstate->interp); + return 0; +} + + +static PyObject * +reload_singlephase_extension(PyThreadState *tstate, PyModuleDef *def, + struct _Py_ext_module_loader_info *info) +{ + PyObject *mod; + PyObject *modules = get_modules_dict(tstate, true); if (def->m_size == -1) { PyObject *m_copy = def->m_base.m_copy; @@ -1253,16 +1341,18 @@ import_find_extension(PyThreadState *tstate, PyObject *name, if (m_copy == NULL) { /* It might be a core module (e.g. sys & builtins), for which we don't set m_copy. */ - m_copy = get_core_module_dict(tstate->interp, name, filename); + m_copy = get_core_module_dict( + tstate->interp, info->name, info->path); if (m_copy == NULL) { + assert(!PyErr_Occurred()); return NULL; } } - mod = import_add_module(tstate, name); + mod = import_add_module(tstate, info->name); if (mod == NULL) { return NULL; } - mdict = PyModule_GetDict(mod); + PyObject *mdict = PyModule_GetDict(mod); if (mdict == NULL) { Py_DECREF(mod); return NULL; @@ -1271,37 +1361,80 @@ import_find_extension(PyThreadState *tstate, PyObject *name, Py_DECREF(mod); return NULL; } + /* We can't set mod->md_def if it's missing, + * because _PyImport_ClearModulesByIndex() might break + * due to violating interpreter isolation. See the note + * in fix_up_extension_for_interpreter(). Until that + * is solved, we leave md_def set to NULL. */ + assert(_PyModule_GetDef(mod) == NULL + || _PyModule_GetDef(mod) == def); } else { - if (def->m_base.m_init == NULL) + if (def->m_base.m_init == NULL) { + assert(!PyErr_Occurred()); return NULL; - mod = def->m_base.m_init(); - if (mod == NULL) + } + struct _Py_ext_module_loader_result res; + if (_PyImport_RunModInitFunc(def->m_base.m_init, info, &res) < 0) { + _Py_ext_module_loader_result_apply_error(&res); return NULL; - if (PyObject_SetItem(modules, name, mod) == -1) { + } + assert(!PyErr_Occurred()); + assert(res.kind == _Py_ext_module_loader_result_SINGLEPHASE); + mod = res.module; + // XXX __file__ doesn't get set! + if (PyObject_SetItem(modules, info->name, mod) == -1) { Py_DECREF(mod); return NULL; } } + if (_modules_by_index_set(tstate->interp, def, mod) < 0) { - PyMapping_DelItem(modules, name); + PyMapping_DelItem(modules, info->name); Py_DECREF(mod); return NULL; } + return mod; +} + +static PyObject * +import_find_extension(PyThreadState *tstate, + struct _Py_ext_module_loader_info *info) +{ + /* Only single-phase init modules will be in the cache. */ + PyModuleDef *def = _extensions_cache_get(info->path, info->name); + if (def == NULL) { + return NULL; + } + + /* It may have been successfully imported previously + in an interpreter that allows legacy modules + but is not allowed in the current interpreter. */ + const char *name_buf = PyUnicode_AsUTF8(info->name); + assert(name_buf != NULL); + if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { + return NULL; + } + + PyObject *mod = reload_singlephase_extension(tstate, def, info); + if (mod == NULL) { + return NULL; + } + int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose; if (verbose) { PySys_FormatStderr("import %U # previously loaded (%R)\n", - name, filename); + info->name, info->path); } return mod; } static int clear_singlephase_extension(PyInterpreterState *interp, - PyObject *name, PyObject *filename) + PyObject *name, PyObject *path) { - PyModuleDef *def = _extensions_cache_get(filename, name); + PyModuleDef *def = _extensions_cache_get(path, name); if (def == NULL) { if (PyErr_Occurred()) { return -1; @@ -1322,32 +1455,140 @@ clear_singlephase_extension(PyInterpreterState *interp, } /* Clear the cached module def. */ - _extensions_cache_delete(filename, name); + _extensions_cache_delete(path, name); return 0; } +static PyObject * +import_run_extension(PyThreadState *tstate, PyModInitFunction p0, + struct _Py_ext_module_loader_info *info, + PyObject *spec, PyObject *modules) +{ + PyObject *mod = NULL; + PyModuleDef *def = NULL; + bool iscore = is_core_module(tstate->interp, info->name, info->path); + + struct _Py_ext_module_loader_result res; + if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { + /* We discard res.def. */ + assert(res.module == NULL); + _Py_ext_module_loader_result_apply_error(&res); + goto finally; + } + assert(!PyErr_Occurred()); + + mod = res.module; + res.module = NULL; + def = res.def; + assert(def != NULL); + + if (res.kind ==_Py_ext_module_loader_result_MULTIPHASE) { + assert(mod == NULL); + mod = PyModule_FromDefAndSpec(def, spec); + if (mod == NULL) { + goto finally; + } + } + else { + assert(res.kind ==_Py_ext_module_loader_result_SINGLEPHASE); + assert(PyModule_Check(mod)); + + /* Make sure this module is allowed in this interpreter. */ + const char *name_buf = PyBytes_AS_STRING(info->name_encoded); + if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { + Py_CLEAR(mod); + goto finally; + } + + /* Do any final fixes to the module. */ + struct interpreter_specific_info interp_specific = { + .modules=modules, + .name=info->name, + .def=def, + }; + if (fix_up_extension( + tstate, mod, info->filename, &interp_specific) < 0) + { + Py_CLEAR(mod); + goto finally; + } + + /* Add the module def to the global cache. */ + struct cached_singlephase_info singlephase = {0}; + // gh-88216: Extensions and def->m_base.m_copy can be updated + // when the extension module doesn't support sub-interpreters. + if (def->m_size == -1 && !iscore) { + singlephase.m_dict = PyModule_GetDict(mod); + assert(singlephase.m_dict != NULL); + } + if (update_extensions_cache( + tstate, info->path, info->name, def, &singlephase) < 0) + { + Py_CLEAR(mod); + goto finally; + } + } + +finally: + return mod; +} + /*******************/ /* builtin modules */ /*******************/ int -_PyImport_FixupBuiltin(PyObject *mod, const char *name, PyObject *modules) +_PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, + PyObject *modules) { int res = -1; + assert(mod != NULL && PyModule_Check(mod)); + PyObject *nameobj; nameobj = PyUnicode_InternFromString(name); if (nameobj == NULL) { return -1; } - if (PyObject_SetItem(modules, nameobj, mod) < 0) { + + PyModuleDef *def = PyModule_GetDef(mod); + if (def == NULL) { + PyErr_BadInternalCall(); + goto finally; + } + + /* We only use _PyImport_FixupBuiltin() for the core builtin modules + * (sys and builtins). These modules are single-phase init with no + * module state, but we also don't populate def->m_base.m_copy + * for them. */ + assert(is_core_module(tstate->interp, nameobj, nameobj)); + assert(get_extension_kind(def) == + _Py_ext_module_loader_result_SINGLEPHASE); + assert(def->m_size == -1); + assert(def->m_base.m_copy == NULL); + + /* Do the normal fixes to the module. */ + struct interpreter_specific_info interp_specific = { + .modules=modules, + .name=nameobj, + .def=def, + }; + if (fix_up_extension(tstate, mod, NULL, &interp_specific) < 0) { goto finally; } - if (fix_up_extension(mod, nameobj, nameobj) < 0) { - PyMapping_DelItem(modules, nameobj); + + /* Add the module def to the global cache. */ + struct cached_singlephase_info singlephase = { + /* We don't want def->m_base.m_copy populated. */ + .m_dict=NULL, + }; + if (update_extensions_cache( + tstate, nameobj, nameobj, def, &singlephase) < 0) + { goto finally; } + res = 0; finally: @@ -1376,45 +1617,50 @@ is_builtin(PyObject *name) static PyObject* create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) { - PyObject *mod = import_find_extension(tstate, name, name); - if (mod || _PyErr_Occurred(tstate)) { - return mod; + struct _Py_ext_module_loader_info info; + if (_Py_ext_module_loader_info_init_for_builtin(&info, name) < 0) { + return NULL; } - PyObject *modules = MODULES(tstate->interp); + PyObject *mod = import_find_extension(tstate, &info); + if (mod != NULL) { + assert(!_PyErr_Occurred(tstate)); + assert(get_extension_kind(_PyModule_GetDef(mod)) + == _Py_ext_module_loader_result_SINGLEPHASE); + goto finally; + } + else if (_PyErr_Occurred(tstate)) { + goto finally; + } + + /* Look up the module in the inittab. */ + struct _inittab *found = NULL; for (struct _inittab *p = INITTAB; p->name != NULL; p++) { if (_PyUnicode_EqualToASCIIString(name, p->name)) { - if (p->initfunc == NULL) { - /* Cannot re-init internal module ("sys" or "builtins") */ - return import_add_module(tstate, name); - } - mod = (*p->initfunc)(); - if (mod == NULL) { - return NULL; - } - - if (PyObject_TypeCheck(mod, &PyModuleDef_Type)) { - return PyModule_FromDefAndSpec((PyModuleDef*)mod, spec); - } - else { - /* Remember pointer to module init function. */ - PyModuleDef *def = PyModule_GetDef(mod); - if (def == NULL) { - return NULL; - } - - def->m_base.m_init = p->initfunc; - if (_PyImport_FixupExtensionObject(mod, name, name, - modules) < 0) { - return NULL; - } - return mod; - } + found = p; + break; } } + if (found == NULL) { + // not found + mod = Py_NewRef(Py_None); + goto finally; + } - // not found - Py_RETURN_NONE; + PyModInitFunction p0 = (*found->initfunc); + if (p0 == NULL) { + /* Cannot re-init internal module ("sys" or "builtins") */ + mod = import_add_module(tstate, name); + goto finally; + } + + /* Now load it. */ + mod = import_run_extension( + tstate, p0, &info, spec, get_modules_dict(tstate, true)); + +finally: + _Py_ext_module_loader_info_clear(&info); + return mod; } @@ -3724,44 +3970,63 @@ static PyObject * _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) /*[clinic end generated code: output=83249b827a4fde77 input=c31b954f4cf4e09d]*/ { - PyObject *mod, *name, *path; FILE *fp; + PyThreadState *tstate = _PyThreadState_GET(); - name = PyObject_GetAttrString(spec, "name"); - if (name == NULL) { + struct _Py_ext_module_loader_info info; + if (_Py_ext_module_loader_info_init_from_spec(&info, spec) < 0) { return NULL; } - path = PyObject_GetAttrString(spec, "origin"); - if (path == NULL) { - Py_DECREF(name); - return NULL; + PyObject *mod = import_find_extension(tstate, &info); + if (mod != NULL) { + assert(!_PyErr_Occurred(tstate)); + assert(get_extension_kind(_PyModule_GetDef(mod)) + == _Py_ext_module_loader_result_SINGLEPHASE); + goto finally; + } + else if (_PyErr_Occurred(tstate)) { + goto finally; } + /* Otherwise it must be multi-phase init or the first time it's loaded. */ - PyThreadState *tstate = _PyThreadState_GET(); - mod = import_find_extension(tstate, name, path); - if (mod != NULL || _PyErr_Occurred(tstate)) { - assert(mod == NULL || !_PyErr_Occurred(tstate)); + if (PySys_Audit("import", "OOOOO", info.name, info.filename, + Py_None, Py_None, Py_None) < 0) + { goto finally; } + /* We would move this (and the fclose() below) into + * _PyImport_GetModInitFunc(), but it isn't clear if the intervening + * code relies on fp still being open. */ if (file != NULL) { - fp = _Py_fopen_obj(path, "r"); + fp = _Py_fopen_obj(info.filename, "r"); if (fp == NULL) { goto finally; } } - else + else { fp = NULL; + } - mod = _PyImport_LoadDynamicModuleWithSpec(spec, fp); + PyModInitFunction p0 = _PyImport_GetModInitFunc(&info, fp); + if (p0 == NULL) { + goto finally; + } + + mod = import_run_extension( + tstate, p0, &info, spec, get_modules_dict(tstate, true)); + if (mod == NULL) { + goto finally; + } - if (fp) + // XXX Shouldn't this happen in the error cases too (i.e. in "finally")? + if (fp) { fclose(fp); + } finally: - Py_DECREF(name); - Py_DECREF(path); + _Py_ext_module_loader_info_clear(&info); return mod; } diff --git a/Python/importdl.c b/Python/importdl.c index 7dfd301d77efb4..7bd761168f0afa 100644 --- a/Python/importdl.c +++ b/Python/importdl.c @@ -93,59 +93,128 @@ get_encoded_name(PyObject *name, const char **hook_prefix) { return NULL; } -PyObject * -_PyImport_LoadDynamicModuleWithSpec(PyObject *spec, FILE *fp) +void +_Py_ext_module_loader_info_clear(struct _Py_ext_module_loader_info *info) { + Py_CLEAR(info->filename); #ifndef MS_WINDOWS - PyObject *pathbytes = NULL; + Py_CLEAR(info->filename_encoded); #endif - PyObject *name_unicode = NULL, *name = NULL, *path = NULL, *m = NULL; - const char *name_buf, *hook_prefix; - const char *oldcontext, *newcontext; - dl_funcptr exportfunc; - PyModuleDef *def; - PyModInitFunction p0; + Py_CLEAR(info->name); + Py_CLEAR(info->name_encoded); + info->path = NULL; +} - name_unicode = PyObject_GetAttrString(spec, "name"); - if (name_unicode == NULL) { - return NULL; +int +_Py_ext_module_loader_info_init_for_builtin( + struct _Py_ext_module_loader_info *info, + PyObject *name) +{ + assert(PyUnicode_Check(name)); + +#ifndef NDEBUG + Py_ssize_t name_len = PyUnicode_GetLength(name); +#endif + assert(name_len > 0); + assert(PyUnicode_FindChar(name, '.', 0, name_len, -1) == -1); + PyObject *name_encoded = PyUnicode_AsEncodedString(name, "ascii", NULL); + if (name_encoded == NULL) { + return -1; } - if (!PyUnicode_Check(name_unicode)) { + + *info = (struct _Py_ext_module_loader_info){ + .name=Py_NewRef(name), + .name_encoded=name_encoded, + .path=name, + /* We won't need path. */ + .hook_prefix=ascii_only_prefix, + .newcontext=NULL, + }; + return 0; +} + +int +_Py_ext_module_loader_info_init_from_spec( + struct _Py_ext_module_loader_info *p_info, + PyObject *spec) +{ + struct _Py_ext_module_loader_info info = {0}; + + info.name = PyObject_GetAttrString(spec, "name"); + if (info.name == NULL) { + return -1; + } + if (!PyUnicode_Check(info.name)) { PyErr_SetString(PyExc_TypeError, "spec.name must be a string"); - goto error; + _Py_ext_module_loader_info_clear(&info); + return -1; } - newcontext = PyUnicode_AsUTF8(name_unicode); - if (newcontext == NULL) { - goto error; + + info.name_encoded = get_encoded_name(info.name, &info.hook_prefix); + if (info.name_encoded == NULL) { + _Py_ext_module_loader_info_clear(&info); + return -1; } - name = get_encoded_name(name_unicode, &hook_prefix); - if (name == NULL) { - goto error; + info.newcontext = PyUnicode_AsUTF8(info.name); + if (info.newcontext == NULL) { + _Py_ext_module_loader_info_clear(&info); + return -1; } - name_buf = PyBytes_AS_STRING(name); - path = PyObject_GetAttrString(spec, "origin"); - if (path == NULL) - goto error; + info.filename = PyObject_GetAttrString(spec, "origin"); + if (info.filename == NULL) { + _Py_ext_module_loader_info_clear(&info); + return -1; + } - if (PySys_Audit("import", "OOOOO", name_unicode, path, - Py_None, Py_None, Py_None) < 0) { - goto error; +#ifndef MS_WINDOWS + info.filename_encoded = PyUnicode_EncodeFSDefault(info.filename); + if (info.filename_encoded == NULL) { + _Py_ext_module_loader_info_clear(&info); + return -1; } +#endif + + info.path = info.filename; + *p_info = info; + return 0; +} + +void +_Py_ext_module_loader_result_apply_error( + struct _Py_ext_module_loader_result *res) +{ + if (res->err[0] != '\0') { + if (PyErr_Occurred()) { + _PyErr_FormatFromCause(PyExc_SystemError, res->err); + } + else { + PyErr_SetString(PyExc_SystemError, res->err); + } + } + else { + assert(PyErr_Occurred()); + } +} + +PyModInitFunction +_PyImport_GetModInitFunc(struct _Py_ext_module_loader_info *info, + FILE *fp) +{ + const char *name_buf = PyBytes_AS_STRING(info->name_encoded); + dl_funcptr exportfunc; #ifdef MS_WINDOWS - exportfunc = _PyImport_FindSharedFuncptrWindows(hook_prefix, name_buf, - path, fp); + exportfunc = _PyImport_FindSharedFuncptrWindows( + info->hook_prefix, name_buf, info->filename, fp); #else - pathbytes = PyUnicode_EncodeFSDefault(path); - if (pathbytes == NULL) - goto error; - exportfunc = _PyImport_FindSharedFuncptr(hook_prefix, name_buf, - PyBytes_AS_STRING(pathbytes), - fp); - Py_DECREF(pathbytes); + { + const char *path_buf = PyBytes_AS_STRING(info->filename_encoded); + exportfunc = _PyImport_FindSharedFuncptr( + info->hook_prefix, name_buf, path_buf, fp); + } #endif if (exportfunc == NULL) { @@ -154,101 +223,120 @@ _PyImport_LoadDynamicModuleWithSpec(PyObject *spec, FILE *fp) msg = PyUnicode_FromFormat( "dynamic module does not define " "module export function (%s_%s)", - hook_prefix, name_buf); - if (msg == NULL) - goto error; - PyErr_SetImportError(msg, name_unicode, path); - Py_DECREF(msg); + info->hook_prefix, name_buf); + if (msg != NULL) { + PyErr_SetImportError(msg, info->name, info->filename); + Py_DECREF(msg); + } } - goto error; + return NULL; } - p0 = (PyModInitFunction)exportfunc; + return (PyModInitFunction)exportfunc; +} + +int +_PyImport_RunModInitFunc(PyModInitFunction p0, + struct _Py_ext_module_loader_info *info, + struct _Py_ext_module_loader_result *p_res) +{ + struct _Py_ext_module_loader_result res = { + .kind=_Py_ext_module_loader_result_UNKNOWN, + }; + const char *name_buf = PyBytes_AS_STRING(info->name_encoded); /* Package context is needed for single-phase init */ - oldcontext = _PyImport_SwapPackageContext(newcontext); - m = p0(); + const char *oldcontext = _PyImport_SwapPackageContext(info->newcontext); + PyObject *m = p0(); _PyImport_SwapPackageContext(oldcontext); +#ifdef NDEBUG +# define SET_ERROR(...) \ + (void)snprintf(res.err, Py_ARRAY_LENGTH(res.err), __VA_ARGS__) +#else +# define SET_ERROR(...) \ + do { \ + int n = snprintf(res.err, Py_ARRAY_LENGTH(res.err), __VA_ARGS__); \ + assert(n < Py_ARRAY_LENGTH(res.err)); \ + } while (0) +#endif + if (m == NULL) { if (!PyErr_Occurred()) { - PyErr_Format( - PyExc_SystemError, + SET_ERROR( "initialization of %s failed without raising an exception", name_buf); } goto error; } else if (PyErr_Occurred()) { - _PyErr_FormatFromCause( - PyExc_SystemError, - "initialization of %s raised unreported exception", - name_buf); + SET_ERROR("initialization of %s raised unreported exception", + name_buf); + /* We would probably be correct to decref m here, + * but we weren't doing so before, + * so we stick with doing nothing. */ m = NULL; goto error; } + if (Py_IS_TYPE(m, NULL)) { /* This can happen when a PyModuleDef is returned without calling * PyModuleDef_Init on it */ - PyErr_Format(PyExc_SystemError, - "init function of %s returned uninitialized object", - name_buf); + res.kind = _Py_ext_module_loader_result_INVALID; + SET_ERROR("init function of %s returned uninitialized object", + name_buf); + /* Likewise, decref'ing here makes sense. However, the original + * code has a note about "prevent segfault in DECREF", + * so we play it safe and leave it alone. */ m = NULL; /* prevent segfault in DECREF */ goto error; } - if (PyObject_TypeCheck(m, &PyModuleDef_Type)) { - Py_DECREF(name_unicode); - Py_DECREF(name); - Py_DECREF(path); - return PyModule_FromDefAndSpec((PyModuleDef*)m, spec); - } - - /* Fall back to single-phase init mechanism */ - if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { - goto error; + if (PyObject_TypeCheck(m, &PyModuleDef_Type)) { + /* multi-phase init */ + res.kind = _Py_ext_module_loader_result_MULTIPHASE; + res.def = (PyModuleDef *)m; } + else { + /* single-phase init (legacy) */ + res.kind = _Py_ext_module_loader_result_SINGLEPHASE; + res.module = m; - if (hook_prefix == nonascii_prefix) { - /* don't allow legacy init for non-ASCII module names */ - PyErr_Format( - PyExc_SystemError, - "initialization of %s did not return PyModuleDef", - name_buf); - goto error; - } + if (!PyModule_Check(m)) { + SET_ERROR("initialization of %s did not return an extension " + "module", name_buf); + goto error; + } - /* Remember pointer to module init function. */ - def = PyModule_GetDef(m); - if (def == NULL) { - PyErr_Format(PyExc_SystemError, - "initialization of %s did not return an extension " - "module", name_buf); - goto error; - } - def->m_base.m_init = p0; + /* Remember pointer to module init function. */ + res.def = PyModule_GetDef(m); + if (res.def == NULL) { + PyErr_Clear(); + SET_ERROR("initialization of %s did not return a valid extension " + "module", name_buf); + goto error; + } + res.def->m_base.m_init = p0; - /* Remember the filename as the __file__ attribute */ - if (PyModule_AddObjectRef(m, "__file__", path) < 0) { - PyErr_Clear(); /* Not important enough to report */ + if (info->hook_prefix == nonascii_prefix) { + /* don't allow legacy init for non-ASCII module names */ + SET_ERROR("initialization of %s did not return PyModuleDef", + name_buf); + goto error; + } } +#undef SET_ERROR - PyObject *modules = PyImport_GetModuleDict(); - if (_PyImport_FixupExtensionObject(m, name_unicode, path, modules) < 0) - goto error; - - Py_DECREF(name_unicode); - Py_DECREF(name); - Py_DECREF(path); - - return m; + assert(!PyErr_Occurred()); + *p_res = res; + return 0; error: - Py_DECREF(name_unicode); - Py_XDECREF(name); - Py_XDECREF(path); - Py_XDECREF(m); - return NULL; + assert(PyErr_Occurred() || res.err[0] != '\0'); + Py_CLEAR(res.module); + res.def = NULL; + *p_res = res; + return -1; } #endif /* HAVE_DYNAMIC_LOADING */ diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index efb25878312d85..eaca859dc24700 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -777,7 +777,7 @@ pycore_init_builtins(PyThreadState *tstate) } PyObject *modules = _PyImport_GetModules(interp); - if (_PyImport_FixupBuiltin(bimod, "builtins", modules) < 0) { + if (_PyImport_FixupBuiltin(tstate, bimod, "builtins", modules) < 0) { goto error; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 7b4a643bccd1dd..b3c63057aa337f 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -3764,7 +3764,7 @@ _PySys_Create(PyThreadState *tstate, PyObject **sysmod_p) return status; } - if (_PyImport_FixupBuiltin(sysmod, "sys", modules) < 0) { + if (_PyImport_FixupBuiltin(tstate, sysmod, "sys", modules) < 0) { goto error; }