diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 218c3978e64dce..5be0ff6764c693 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -72,6 +72,8 @@ PyAPI_DATA(PyThreadState *) _PyThreadState_GetCurrent(void); extern int _PyThreadState_CheckConsistency(PyThreadState *tstate); #endif +extern int _PyThreadState_MustExit(PyThreadState *tstate); + /* Get the current Python thread state. This function is unsafe: it does not check for error and it can return NULL. diff --git a/Misc/NEWS.d/next/Library/2023-09-08-12-09-55.gh-issue-108987.x5AIG8.rst b/Misc/NEWS.d/next/Library/2023-09-08-12-09-55.gh-issue-108987.x5AIG8.rst new file mode 100644 index 00000000000000..16526ee748d869 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-08-12-09-55.gh-issue-108987.x5AIG8.rst @@ -0,0 +1,4 @@ +Fix :func:`_thread.start_new_thread` race condition. If a thread is created +during Python finalization, the newly spawned thread now exits immediately +instead of trying to access freed memory and lead to a crash. Patch by +Victor Stinner. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 18fd65ac9f420f..5edb6e9875d1ab 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -1063,22 +1063,22 @@ _localdummy_destroyed(PyObject *localweakref, PyObject *dummyweakref) /* Module functions */ struct bootstate { - PyInterpreterState *interp; + PyThreadState *tstate; PyObject *func; PyObject *args; PyObject *kwargs; - PyThreadState *tstate; - _PyRuntimeState *runtime; }; static void -thread_bootstate_free(struct bootstate *boot) +thread_bootstate_free(struct bootstate *boot, int decref) { - Py_DECREF(boot->func); - Py_DECREF(boot->args); - Py_XDECREF(boot->kwargs); - PyMem_Free(boot); + if (decref) { + Py_DECREF(boot->func); + Py_DECREF(boot->args); + Py_XDECREF(boot->kwargs); + } + PyMem_RawFree(boot); } @@ -1088,9 +1088,24 @@ thread_run(void *boot_raw) struct bootstate *boot = (struct bootstate *) boot_raw; PyThreadState *tstate = boot->tstate; - // gh-104690: If Python is being finalized and PyInterpreterState_Delete() - // was called, tstate becomes a dangling pointer. - assert(_PyThreadState_CheckConsistency(tstate)); + // gh-108987: If _thread.start_new_thread() is called before or while + // Python is being finalized, thread_run() can called *after*. + // _PyRuntimeState_SetFinalizing() is called. At this point, all Python + // threads must exit, except of the thread calling Py_Finalize() whch holds + // the GIL and must not exit. + // + // At this stage, tstate can be a dangling pointer (point to freed memory), + // it's ok to call _PyThreadState_MustExit() with a dangling pointer. + if (_PyThreadState_MustExit(tstate)) { + // Don't call PyThreadState_Clear() nor _PyThreadState_DeleteCurrent(). + // These functions are called on tstate indirectly by Py_Finalize() + // which calls _PyInterpreterState_Clear(). + // + // Py_DECREF() cannot be called because the GIL is not held: leak + // references on purpose. Python is being finalized anyway. + thread_bootstate_free(boot, 0); + goto exit; + } _PyThreadState_Bind(tstate); PyEval_AcquireThread(tstate); @@ -1109,14 +1124,17 @@ thread_run(void *boot_raw) Py_DECREF(res); } - thread_bootstate_free(boot); + thread_bootstate_free(boot, 1); + tstate->interp->threads.count--; PyThreadState_Clear(tstate); _PyThreadState_DeleteCurrent(tstate); +exit: // bpo-44434: Don't call explicitly PyThread_exit_thread(). On Linux with // the glibc, pthread_exit() can abort the whole process if dlopen() fails // to open the libgcc_s.so library (ex: EMFILE error). + return; } static PyObject * @@ -1140,7 +1158,6 @@ and False otherwise.\n"); static PyObject * thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs) { - _PyRuntimeState *runtime = &_PyRuntime; PyObject *func, *args, *kwargs = NULL; if (!PyArg_UnpackTuple(fargs, "start_new_thread", 2, 3, @@ -1179,20 +1196,21 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs) return NULL; } - struct bootstate *boot = PyMem_NEW(struct bootstate, 1); + // gh-109795: Use PyMem_RawMalloc() instead of PyMem_Malloc(), + // because it should be possible to call thread_bootstate_free() + // without holding the GIL. + struct bootstate *boot = PyMem_RawMalloc(sizeof(struct bootstate)); if (boot == NULL) { return PyErr_NoMemory(); } - boot->interp = _PyInterpreterState_GET(); - boot->tstate = _PyThreadState_New(boot->interp); + boot->tstate = _PyThreadState_New(interp); if (boot->tstate == NULL) { - PyMem_Free(boot); + PyMem_RawFree(boot); if (!PyErr_Occurred()) { return PyErr_NoMemory(); } return NULL; } - boot->runtime = runtime; boot->func = Py_NewRef(func); boot->args = Py_NewRef(args); boot->kwargs = Py_XNewRef(kwargs); @@ -1201,7 +1219,7 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs) if (ident == PYTHREAD_INVALID_THREAD_ID) { PyErr_SetString(ThreadError, "can't start new thread"); PyThreadState_Clear(boot->tstate); - thread_bootstate_free(boot); + thread_bootstate_free(boot, 1); return NULL; } return PyLong_FromUnsignedLong(ident); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index b44cb0b9b6e09d..c1ab5883568e7d 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -328,28 +328,6 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) } -/* Check if a Python thread must exit immediately, rather than taking the GIL - if Py_Finalize() has been called. - - When this function is called by a daemon thread after Py_Finalize() has been - called, the GIL does no longer exist. - - tstate must be non-NULL. */ -static inline int -tstate_must_exit(PyThreadState *tstate) -{ - /* bpo-39877: Access _PyRuntime directly rather than using - tstate->interp->runtime to support calls from Python daemon threads. - After Py_Finalize() has been called, tstate can be a dangling pointer: - point to PyThreadState freed memory. */ - PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(&_PyRuntime); - if (finalizing == NULL) { - finalizing = _PyInterpreterState_GetFinalizing(tstate->interp); - } - return (finalizing != NULL && finalizing != tstate); -} - - /* Take the GIL. The function saves errno at entry and restores its value at exit. @@ -365,7 +343,7 @@ take_gil(PyThreadState *tstate) // XXX It may be more correct to check tstate->_status.finalizing. // XXX assert(!tstate->_status.cleared); - if (tstate_must_exit(tstate)) { + if (_PyThreadState_MustExit(tstate)) { /* bpo-39877: If Py_Finalize() has been called and tstate is not the thread which called Py_Finalize(), exit immediately the thread. @@ -403,7 +381,7 @@ take_gil(PyThreadState *tstate) _Py_atomic_load_relaxed(&gil->locked) && gil->switch_number == saved_switchnum) { - if (tstate_must_exit(tstate)) { + if (_PyThreadState_MustExit(tstate)) { MUTEX_UNLOCK(gil->mutex); // gh-96387: If the loop requested a drop request in a previous // iteration, reset the request. Otherwise, drop_gil() can @@ -443,7 +421,7 @@ take_gil(PyThreadState *tstate) MUTEX_UNLOCK(gil->switch_mutex); #endif - if (tstate_must_exit(tstate)) { + if (_PyThreadState_MustExit(tstate)) { /* bpo-36475: If Py_Finalize() has been called and tstate is not the thread which called Py_Finalize(), exit immediately the thread. diff --git a/Python/pystate.c b/Python/pystate.c index 1fe88fdf5a8218..b77827ff8efa28 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1867,6 +1867,10 @@ PyThreadState_Swap(PyThreadState *newts) void _PyThreadState_Bind(PyThreadState *tstate) { + // gh-104690: If Python is being finalized and PyInterpreterState_Delete() + // was called, tstate becomes a dangling pointer. + assert(_PyThreadState_CheckConsistency(tstate)); + bind_tstate(tstate); // This makes sure there's a gilstate tstate bound // as soon as possible. @@ -2866,6 +2870,31 @@ _PyThreadState_CheckConsistency(PyThreadState *tstate) #endif +// Check if a Python thread must exit immediately, rather than taking the GIL +// if Py_Finalize() has been called. +// +// When this function is called by a daemon thread after Py_Finalize() has been +// called, the GIL does no longer exist. +// +// tstate can be a dangling pointer (point to freed memory): only tstate value +// is used, the pointer is not deferenced. +// +// tstate must be non-NULL. +int +_PyThreadState_MustExit(PyThreadState *tstate) +{ + /* bpo-39877: Access _PyRuntime directly rather than using + tstate->interp->runtime to support calls from Python daemon threads. + After Py_Finalize() has been called, tstate can be a dangling pointer: + point to PyThreadState freed memory. */ + PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(&_PyRuntime); + if (finalizing == NULL) { + finalizing = _PyInterpreterState_GetFinalizing(tstate->interp); + } + return (finalizing != NULL && finalizing != tstate); +} + + #ifdef __cplusplus } #endif