From 88fc80cc27167de0a28d2e2a7320d120275fde48 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 7 Apr 2021 11:30:02 +0100 Subject: [PATCH 1/4] Remove redundant tracing_possible field from interpreter state. --- Include/internal/pycore_interp.h | 6 ------ Python/ceval.c | 13 ++++--------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index fa0e26feb0f843..dbdd109bd78839 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -33,12 +33,6 @@ struct _pending_calls { struct _ceval_state { int recursion_limit; - /* Records whether tracing is on for any thread. Counts the number - of threads for which tstate->c_tracefunc is non-NULL, so if the - value is 0, we know we don't have to check this thread's - c_tracefunc. This speeds up the if statement in - _PyEval_EvalFrameDefault() after fast_next_opcode. */ - int tracing_possible; /* This single variable consolidates all requests to break out of the fast path in the eval loop. */ _Py_atomic_int eval_breaker; diff --git a/Python/ceval.c b/Python/ceval.c index ea31179f8629b8..c335b604c3de2a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1110,8 +1110,6 @@ match_class(PyThreadState *tstate, PyObject *subject, PyObject *type, static int do_raise(PyThreadState *tstate, PyObject *exc, PyObject *cause); static int unpack_iterable(PyThreadState *, PyObject *, int, int, PyObject **); -#define _Py_TracingPossible(ceval) ((ceval)->tracing_possible) - PyObject * PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) @@ -1308,7 +1306,7 @@ eval_frame_handle_pending(PyThreadState *tstate) #define DISPATCH() \ { \ - if (_Py_TracingPossible(ceval2) OR_DTRACE_LINE OR_LLTRACE) { \ + if (tstate->use_tracing OR_DTRACE_LINE OR_LLTRACE) { \ goto tracing_dispatch; \ } \ f->f_lasti = INSTR_OFFSET(); \ @@ -1596,8 +1594,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) int oparg; /* Current opcode argument, if any */ PyObject **fastlocals, **freevars; PyObject *retval = NULL; /* Return value */ - struct _ceval_state * const ceval2 = &tstate->interp->ceval; - _Py_atomic_int * const eval_breaker = &ceval2->eval_breaker; + _Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker; PyCodeObject *co; const _Py_CODEUNIT *first_instr; @@ -1783,7 +1780,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* line-by-line tracing support */ - if (_Py_TracingPossible(ceval2) && + if (tstate->use_tracing && tstate->c_tracefunc != NULL && !tstate->tracing) { int err; /* see maybe_call_line_trace() @@ -4544,7 +4541,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PUSH(val); PUSH(exc); JUMPTO(handler); - if (_Py_TracingPossible(ceval2)) { + if (tstate->use_tracing) { trace_info.instr_prev = INT_MAX; } /* Resume normal execution */ @@ -5627,9 +5624,7 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) return -1; } - struct _ceval_state *ceval2 = &tstate->interp->ceval; PyObject *traceobj = tstate->c_traceobj; - ceval2->tracing_possible += (func != NULL) - (tstate->c_tracefunc != NULL); tstate->c_tracefunc = NULL; tstate->c_traceobj = NULL; From 64580715ce10c72cec02b7f03e74c8f1b78b56a8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 7 Apr 2021 12:12:27 +0100 Subject: [PATCH 2/4] Move 'use_tracing' from tstate onto C stack, for fastest possible checking in dispatch logic. --- Include/cpython/pystate.h | 11 +++++++++- Python/ceval.c | 44 ++++++++++++++++++++++++--------------- Python/pystate.c | 3 ++- Python/sysmodule.c | 8 +++---- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index cfaee890f97151..7ee69879af65ac 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -29,6 +29,14 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); #define PyTrace_OPCODE 7 +typedef struct _cframe { + /* This struct will be threaded through the C stack + * allowing faster access to state that must can be modified + * outside of the interpreter must be accessed within it */ + int use_tracing; + struct _cframe *previous; +} CFrame; + typedef struct _err_stackitem { /* This struct represents an entry on the exception stack, which is a * per-coroutine state. (Coroutine in the computer science sense, @@ -61,7 +69,7 @@ struct _ts { This is to prevent the actual trace/profile code from being recorded in the trace/profile. */ int tracing; - int use_tracing; + CFrame *cframe; Py_tracefunc c_profilefunc; Py_tracefunc c_tracefunc; @@ -128,6 +136,7 @@ struct _ts { /* Unique thread state id. */ uint64_t id; + CFrame root_cframe; /* XXX signal handlers should also be here */ diff --git a/Python/ceval.c b/Python/ceval.c index c335b604c3de2a..8908e3c9a4aaac 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -38,6 +38,7 @@ typedef struct { PyCodeObject *code; // The code object for the bounds. May be NULL. int instr_prev; // Only valid if code != NULL. PyCodeAddressRange bounds; // Only valid if code != NULL. + CFrame cframe; } PyTraceInfo; @@ -1306,7 +1307,7 @@ eval_frame_handle_pending(PyThreadState *tstate) #define DISPATCH() \ { \ - if (tstate->use_tracing OR_DTRACE_LINE OR_LLTRACE) { \ + if (trace_info.cframe.use_tracing OR_DTRACE_LINE OR_LLTRACE) { \ goto tracing_dispatch; \ } \ f->f_lasti = INSTR_OFFSET(); \ @@ -1614,11 +1615,16 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* Mark trace_info as uninitialized */ trace_info.code = NULL; + CFrame *prev_cframe = tstate->cframe; + trace_info.cframe.use_tracing = prev_cframe->use_tracing; + trace_info.cframe.previous = prev_cframe; + tstate->cframe = &trace_info.cframe; + /* push frame */ tstate->frame = f; co = f->f_code; - if (tstate->use_tracing) { + if (trace_info.cframe.use_tracing) { if (tstate->c_tracefunc != NULL) { /* tstate->c_tracefunc, if defined, is a function that will be called on *every* entry @@ -1780,7 +1786,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* line-by-line tracing support */ - if (tstate->use_tracing && + if (trace_info.cframe.use_tracing && tstate->c_tracefunc != NULL && !tstate->tracing) { int err; /* see maybe_call_line_trace() @@ -4541,7 +4547,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PUSH(val); PUSH(exc); JUMPTO(handler); - if (tstate->use_tracing) { + if (trace_info.cframe.use_tracing) { trace_info.instr_prev = INT_MAX; } /* Resume normal execution */ @@ -4565,7 +4571,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) f->f_stackdepth = 0; f->f_state = FRAME_RAISED; exiting: - if (tstate->use_tracing) { + if (trace_info.cframe.use_tracing) { if (tstate->c_tracefunc) { if (call_trace_protected(tstate->c_tracefunc, tstate->c_traceobj, tstate, f, &trace_info, PyTrace_RETURN, retval)) { @@ -4582,6 +4588,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* pop frame */ exit_eval_frame: + + tstate->cframe = trace_info.cframe.previous; + tstate->cframe->use_tracing = trace_info.cframe.use_tracing; + if (PyDTrace_FUNCTION_RETURN_ENABLED()) dtrace_function_return(f); _Py_LeaveRecursiveCall(tstate); @@ -5505,7 +5515,7 @@ call_trace(Py_tracefunc func, PyObject *obj, if (tstate->tracing) return 0; tstate->tracing++; - tstate->use_tracing = 0; + tstate->cframe->use_tracing = 0; if (frame->f_lasti < 0) { frame->f_lineno = frame->f_code->co_firstlineno; } @@ -5515,7 +5525,7 @@ call_trace(Py_tracefunc func, PyObject *obj, } result = func(obj, frame, what, arg); frame->f_lineno = 0; - tstate->use_tracing = ((tstate->c_tracefunc != NULL) + tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL)); tstate->tracing--; return result; @@ -5526,15 +5536,15 @@ _PyEval_CallTracing(PyObject *func, PyObject *args) { PyThreadState *tstate = _PyThreadState_GET(); int save_tracing = tstate->tracing; - int save_use_tracing = tstate->use_tracing; + int save_use_tracing = tstate->cframe->use_tracing; PyObject *result; tstate->tracing = 0; - tstate->use_tracing = ((tstate->c_tracefunc != NULL) + tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL)); result = PyObject_Call(func, args, NULL); tstate->tracing = save_tracing; - tstate->use_tracing = save_use_tracing; + tstate->cframe->use_tracing = save_use_tracing; return result; } @@ -5588,7 +5598,7 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) tstate->c_profilefunc = NULL; tstate->c_profileobj = NULL; /* Must make sure that tracing is not ignored if 'profileobj' is freed */ - tstate->use_tracing = tstate->c_tracefunc != NULL; + tstate->cframe->use_tracing = tstate->c_tracefunc != NULL; Py_XDECREF(profileobj); Py_XINCREF(arg); @@ -5596,7 +5606,7 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) tstate->c_profilefunc = func; /* Flag that tracing or profiling is turned on */ - tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL); + tstate->cframe->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL); return 0; } @@ -5629,7 +5639,7 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) tstate->c_tracefunc = NULL; tstate->c_traceobj = NULL; /* Must make sure that profiling is not ignored if 'traceobj' is freed */ - tstate->use_tracing = (tstate->c_profilefunc != NULL); + tstate->cframe->use_tracing = (tstate->c_profilefunc != NULL); Py_XDECREF(traceobj); Py_XINCREF(arg); @@ -5637,7 +5647,7 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) tstate->c_tracefunc = func; /* Flag that tracing or profiling is turned on */ - tstate->use_tracing = ((func != NULL) + tstate->cframe->use_tracing = ((func != NULL) || (tstate->c_profilefunc != NULL)); return 0; @@ -5832,7 +5842,7 @@ PyEval_GetFuncDesc(PyObject *func) } #define C_TRACE(x, call) \ -if (tstate->use_tracing && tstate->c_profilefunc) { \ +if (trace_info->cframe.use_tracing && tstate->c_profilefunc) { \ if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \ tstate, tstate->frame, trace_info, \ PyTrace_C_CALL, func)) { \ @@ -5913,7 +5923,7 @@ call_function(PyThreadState *tstate, Py_ssize_t nargs = oparg - nkwargs; PyObject **stack = (*pp_stack) - nargs - nkwargs; - if (tstate->use_tracing) { + if (trace_info->cframe.use_tracing) { x = trace_call_function(tstate, trace_info, func, stack, nargs, kwnames); } else { @@ -5946,7 +5956,7 @@ do_call_core(PyThreadState *tstate, } else if (Py_IS_TYPE(func, &PyMethodDescr_Type)) { Py_ssize_t nargs = PyTuple_GET_SIZE(callargs); - if (nargs > 0 && tstate->use_tracing) { + if (nargs > 0 && trace_info->cframe.use_tracing) { /* We need to create a temporary bound method as argument for profiling. diff --git a/Python/pystate.c b/Python/pystate.c index c8b2530f68dfe0..436f874842cc84 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -624,7 +624,8 @@ new_threadstate(PyInterpreterState *interp, int init) tstate->recursion_headroom = 0; tstate->stackcheck_counter = 0; tstate->tracing = 0; - tstate->use_tracing = 0; + tstate->root_cframe.use_tracing = 0; + tstate->cframe = &tstate->root_cframe; tstate->gilstate_counter = 0; tstate->async_exc = NULL; tstate->thread_id = PyThread_get_thread_ident(); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 54d70ef0569759..a36d90f9de1682 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -252,7 +252,7 @@ sys_audit_tstate(PyThreadState *ts, const char *event, /* Disallow tracing in hooks unless explicitly enabled */ ts->tracing++; - ts->use_tracing = 0; + ts->cframe->use_tracing = 0; while ((hook = PyIter_Next(hooks)) != NULL) { _Py_IDENTIFIER(__cantrace__); PyObject *o; @@ -265,14 +265,14 @@ sys_audit_tstate(PyThreadState *ts, const char *event, break; } if (canTrace) { - ts->use_tracing = (ts->c_tracefunc || ts->c_profilefunc); + ts->cframe->use_tracing = (ts->c_tracefunc || ts->c_profilefunc); ts->tracing--; } PyObject* args[2] = {eventName, eventArgs}; o = _PyObject_FastCallTstate(ts, hook, args, 2); if (canTrace) { ts->tracing++; - ts->use_tracing = 0; + ts->cframe->use_tracing = 0; } if (!o) { break; @@ -280,7 +280,7 @@ sys_audit_tstate(PyThreadState *ts, const char *event, Py_DECREF(o); Py_CLEAR(hook); } - ts->use_tracing = (ts->c_tracefunc || ts->c_profilefunc); + ts->cframe->use_tracing = (ts->c_tracefunc || ts->c_profilefunc); ts->tracing--; if (_PyErr_Occurred(ts)) { goto exit; From 2cf3c9858612ffc139d7223db1d1cdf07dd52599 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 8 Apr 2021 11:45:01 +0100 Subject: [PATCH 3/4] Add more comments strssing the importance satck discipline when dealing with CFrames. --- Include/cpython/pystate.h | 15 +++++++++++++-- Python/ceval.c | 6 +++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 7ee69879af65ac..e3ccc543560849 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -31,8 +31,15 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); typedef struct _cframe { /* This struct will be threaded through the C stack - * allowing faster access to state that must can be modified - * outside of the interpreter must be accessed within it */ + * allowing fast access to per-thread state that needs + * to be accessed quickly by the interpreter, but can + * be modified outside of the interpreter. + * + * WARNING: This makes data on the C stack accessible from + * heap objects. Care must be taken to maintain stack + * discipline and make sure that instances of this struct cannot + * accessed outside of their lifetime. + */ int use_tracing; struct _cframe *previous; } CFrame; @@ -69,6 +76,9 @@ struct _ts { This is to prevent the actual trace/profile code from being recorded in the trace/profile. */ int tracing; + + /* Pointer to current CFrame in the C stack frame of the currently, + * or most recently, executing _PyEval_EvalFrameDefault. */ CFrame *cframe; Py_tracefunc c_profilefunc; @@ -136,6 +146,7 @@ struct _ts { /* Unique thread state id. */ uint64_t id; + CFrame root_cframe; /* XXX signal handlers should also be here */ diff --git a/Python/ceval.c b/Python/ceval.c index 8908e3c9a4aaac..6d679a1fa2a3f4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1615,6 +1615,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* Mark trace_info as uninitialized */ trace_info.code = NULL; + /* WARNING: Because the CFrame lives on the C stack, + * but can be accessed from a heap allocated object (tstate) + * strict stack discipline must be maintained. + */ CFrame *prev_cframe = tstate->cframe; trace_info.cframe.use_tracing = prev_cframe->use_tracing; trace_info.cframe.previous = prev_cframe; @@ -4588,7 +4592,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* pop frame */ exit_eval_frame: - + /* Restore previous cframe */ tstate->cframe = trace_info.cframe.previous; tstate->cframe->use_tracing = trace_info.cframe.use_tracing; From 5ba86501e929630a91bcff5b8e35446dc9dc2177 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 8 Apr 2021 12:20:49 +0100 Subject: [PATCH 4/4] Add NEWS --- .../Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst b/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst new file mode 100644 index 00000000000000..c05a720d7ff1e5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst @@ -0,0 +1,2 @@ +The flag to check whether tracing is enabled for the thread is now kept on +the C stack, instead of the heap, to streamline dispatch in the interpreter.