Skip to content

Support RETURN from tracing through a function #107925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 35 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
56133bb
Split `CALL_PY_EXACT_ARGS` into uops
gvanrossum Aug 5, 2023
907ff95
Fix merge so it works again (I think)
gvanrossum Aug 9, 2023
2c6be6d
Split into finer-grained uops
gvanrossum Aug 9, 2023
6d78ff2
Fix type error in stacking.py
gvanrossum Aug 10, 2023
0d8e66c
Add test
gvanrossum Aug 10, 2023
b75f30e
Add comment explaining _PUSH_FRAME's unused output effect
gvanrossum Aug 10, 2023
61c2822
Make PUSH_FRAME special case a little less myterious
gvanrossum Aug 10, 2023
f73ea90
Rename Instruction.write to write_case_body
gvanrossum Aug 10, 2023
12910fc
Move next_instr update to a more logical place
gvanrossum Aug 10, 2023
2fafa2c
Don't recompute macro cache offset
gvanrossum Aug 10, 2023
2717b07
Fold and refactor long line in stacking.py
gvanrossum Aug 10, 2023
e487908
Fold long lines in generate_cases.py
gvanrossum Aug 10, 2023
1d549af
Don't emit static assert to executor cases
gvanrossum Aug 10, 2023
f40fb1f
Factor away write_case_body (formerly Instruction.write)
gvanrossum Aug 10, 2023
4f6f8f8
Fold long lines
gvanrossum Aug 11, 2023
6facc8d
Make less of a special case of _PUSH_FRAME
gvanrossum Aug 11, 2023
94630d4
Stop special-casing _PUSH_FRAME altogether
gvanrossum Aug 11, 2023
cf8e2c0
Call _Py_EnterRecursivePy in _FRAME_PUSH
gvanrossum Aug 15, 2023
1e62876
Merge remote-tracking branch 'upstream/main' into call-uops
gvanrossum Aug 15, 2023
337a2eb
Add function-by-version cache
gvanrossum Aug 8, 2023
0218222
Trace into function calls
gvanrossum Aug 8, 2023
ab74ef0
Make RESUME a viable uop
gvanrossum Aug 12, 2023
7e6ef78
Clear func_version_cache in interpreter_clear()
gvanrossum Aug 12, 2023
1200014
Add a small essay on function and code versions
gvanrossum Aug 12, 2023
ece96f4
Move function cache clearing earlier in finalization
gvanrossum Aug 12, 2023
b408b2b
Cache borrowed references, cleared in func_dealloc
gvanrossum Aug 13, 2023
00ae0fa
Correctly set instr_fmt metadata for macros
gvanrossum Aug 13, 2023
fe843d8
Split RETURN_{VALUE,CONST} into uops (mostly works)
gvanrossum Aug 13, 2023
5bf745c
Change LLTRACE debug to trigger on PYTHONUOPSDEBUG >= 5, not 4
gvanrossum Aug 13, 2023
1e55726
Handle _POP_FRAME in superblock properly
gvanrossum Aug 14, 2023
c632262
Handle trace stack underflow
gvanrossum Aug 14, 2023
b59a3af
Add _Py_LeaveRecursiveCallPy to _POP_FRAME
gvanrossum Aug 15, 2023
158e27b
Ensure co_stacksize >= 1, else RETURN_CONST may crash
gvanrossum Aug 15, 2023
8d95157
Fix failing test_code (co_stacksize is never 0)
gvanrossum Aug 16, 2023
d90ecaf
Don't trace into functions if func_version != co_version
gvanrossum Aug 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ void _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *
PyObject *_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs);
PyObject *_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys);
int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp);
void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);


#ifdef __cplusplus
Expand Down
9 changes: 9 additions & 0 deletions Include/internal/pycore_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,22 @@ extern PyObject* _PyFunction_Vectorcall(

#define FUNC_MAX_WATCHERS 8

#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
struct _py_func_state {
uint32_t next_version;
// Borrowed references to function objects whose
// func_version % FUNC_VERSION_CACHE_SIZE
// once was equal to the index in the table.
// They are cleared when the function is deallocated.
PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE];
};

extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);

extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);

extern PyObject *_Py_set_function_type_params(
PyThreadState* unused, PyObject *func, PyObject *type_params);

Expand Down
108 changes: 75 additions & 33 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions Lib/test/test_capi/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2618,6 +2618,24 @@ def testfunc(it):
with self.assertRaises(StopIteration):
next(it)

def test_call_py_exact_args(self):
def testfunc(n):
def dummy(x):
return x+1
for i in range(n):
dummy(i)

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(10)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_PUSH_FRAME", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)



if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion Lib/test/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def func2():
("co_posonlyargcount", 0),
("co_kwonlyargcount", 0),
("co_nlocals", 1),
("co_stacksize", 0),
("co_stacksize", 1),
("co_flags", code.co_flags | inspect.CO_COROUTINE),
("co_firstlineno", 100),
("co_code", code2.co_code),
Expand Down
3 changes: 3 additions & 0 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,9 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
int nlocals, ncellvars, nfreevars;
get_localsplus_counts(con->localsplusnames, con->localspluskinds,
&nlocals, &ncellvars, &nfreevars);
if (con->stacksize == 0) {
con->stacksize = 1;
}

co->co_filename = Py_NewRef(con->filename);
co->co_name = Py_NewRef(con->name);
Expand Down
79 changes: 77 additions & 2 deletions Objects/funcobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,73 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
return NULL;
}

uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
/*
Function versions
-----------------

Function versions are used to detect when a function object has been
updated, invalidating inline cache data used by the `CALL` bytecode
(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).

They are also used by the Tier 2 superblock creation code to find
the function being called (and from there the code object).

How does a function's `func_version` field get initialized?

- `PyFunction_New` and friends initialize it to 0.
- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
- It is reset to 0 when various attributes like `__code__` are set.
- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
when the specializer needs a version and the version is 0.

The latter allocates versions using a counter in the interpreter state;
when the counter wraps around to 0, no more versions are allocated.
There is one other special case: functions with a non-standard
`vectorcall` field are not given a version.

When the function version is 0, the `CALL` bytecode is not specialized.

Code object versions
--------------------

So where to code objects get their `co_version`? There is a single
static global counter, `_Py_next_func_version`. This is initialized in
the generated (!) file `Python/deepfreeze/deepfreeze.c`, to 1 plus the
number of deep-frozen function objects in that file.
(In `_bootstrap_python.c` and `freeze_module.c` it is initialized to 1.)

Code objects get a new `co_version` allocated from this counter upon
creation. Since code objects are nominally immutable, `co_version` can
not be invalidated. The only way it can be 0 is when 2**32 or more
code objects have been created during the process's lifetime.
(The counter isn't reset by `fork()`, extending the lifetime.)
*/

void
_PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
{
func->func_version = version;
if (version != 0) {
PyInterpreterState *interp = _PyInterpreterState_GET();
interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE] = func;
}
}

PyFunctionObject *
_PyFunction_LookupByVersion(uint32_t version)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject *func = interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE];
if (func != NULL && func->func_version == version) {
return (PyFunctionObject *)Py_NewRef(func);
}
return NULL;
}

uint32_t
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
{
if (func->func_version != 0) {
return func->func_version;
Expand All @@ -236,7 +302,7 @@ uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
return 0;
}
uint32_t v = interp->func_state.next_version++;
func->func_version = v;
_PyFunction_SetVersion(func, v);
return v;
}

Expand Down Expand Up @@ -851,6 +917,15 @@ func_dealloc(PyFunctionObject *op)
if (op->func_weakreflist != NULL) {
PyObject_ClearWeakRefs((PyObject *) op);
}
if (op->func_version != 0) {
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject **slot =
interp->func_state.func_version_cache
+ (op->func_version % FUNC_VERSION_CACHE_SIZE);
if (*slot == op) {
*slot = NULL;
}
}
(void)func_clear(op);
// These aren't cleared by func_clear().
Py_DECREF(op->func_code);
Expand Down
Loading