From d95aef5b2e9f90be8cadee1a3bc43400c1904bf9 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 14 Apr 2025 16:49:57 -0600 Subject: [PATCH 1/7] Add _PyCode_GetVarCounts(). --- Include/internal/pycore_code.h | 43 +++++ Lib/test/test_code.py | 308 +++++++++++++++++++++++++++++++++ Modules/_testinternalcapi.c | 129 ++++++++++++++ Objects/codeobject.c | 216 +++++++++++++++++++++++ 4 files changed, 696 insertions(+) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index ad2e626c1ee893..26ccac6ad02c2a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -564,6 +564,49 @@ extern int _Py_ClearUnusedTLBC(PyInterpreterState *interp); #endif +typedef struct { + int total; + struct co_locals_counts { + int total; + struct { + int total; + int numposonly; + int numposorkw; + int numkwonly; + int varargs; + int varkwargs; + } args; + int numpure; + struct { + int total; + // numargs does not contribute to locals.total. + int numargs; + int numothers; + } cells; + struct { + int total; + int numpure; + int numcells; + } hidden; + } locals; + int numfree; // nonlocal + struct co_unbound_counts { + int total; + int numglobal; + int numattrs; + int numunknown; + } unbound; +} _PyCode_var_counts_t; + +PyAPI_FUNC(void) _PyCode_GetVarCounts( + PyCodeObject *, + _PyCode_var_counts_t *); +PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts( + PyCodeObject *, + _PyCode_var_counts_t *, + PyObject *global, + PyObject *attrs); + PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *); diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 7cf09ee7847dc1..03187e0d65f7a7 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -18,6 +18,32 @@ nlocals: 2 flags: 3 consts: ('',) +variable counts: + total: 2 + locals: + total: 2 + args: + total: 1 + posonly: 0 + posorkw: 1 + kwonly: 0 + varargs: False + varkwargs: False + pure: 1 + cells: + total: 1 + args: 1 + others: 0 + hidden: + total: 0 + pure: 0 + cells: 0 + free (nonlocal): 0 + unbound: + total: 0 + global: 0 + attrs: 0 + unknown: 0 >>> dump(f(4).__code__) name: g @@ -31,6 +57,32 @@ nlocals: 1 flags: 19 consts: ('None',) +variable counts: + total: 2 + locals: + total: 1 + args: + total: 1 + posonly: 0 + posorkw: 1 + kwonly: 0 + varargs: False + varkwargs: False + pure: 0 + cells: + total: 0 + args: 0 + others: 0 + hidden: + total: 0 + pure: 0 + cells: 0 + free (nonlocal): 1 + unbound: + total: 0 + global: 0 + attrs: 0 + unknown: 0 >>> def h(x, y): ... a = x + y @@ -51,6 +103,32 @@ nlocals: 5 flags: 3 consts: ('None',) +variable counts: + total: 5 + locals: + total: 5 + args: + total: 2 + posonly: 0 + posorkw: 2 + kwonly: 0 + varargs: False + varkwargs: False + pure: 3 + cells: + total: 0 + args: 0 + others: 0 + hidden: + total: 0 + pure: 0 + cells: 0 + free (nonlocal): 0 + unbound: + total: 0 + global: 0 + attrs: 0 + unknown: 0 >>> def attrs(obj): ... print(obj.attr1) @@ -69,6 +147,32 @@ nlocals: 1 flags: 3 consts: ('None',) +variable counts: + total: 5 + locals: + total: 1 + args: + total: 1 + posonly: 0 + posorkw: 1 + kwonly: 0 + varargs: False + varkwargs: False + pure: 0 + cells: + total: 0 + args: 0 + others: 0 + hidden: + total: 0 + pure: 0 + cells: 0 + free (nonlocal): 0 + unbound: + total: 4 + global: 1 + attrs: 3 + unknown: 0 >>> def optimize_away(): ... 'doc string' @@ -105,6 +209,32 @@ nlocals: 3 flags: 3 consts: ('None',) +variable counts: + total: 3 + locals: + total: 3 + args: + total: 3 + posonly: 0 + posorkw: 2 + kwonly: 1 + varargs: False + varkwargs: False + pure: 0 + cells: + total: 0 + args: 0 + others: 0 + hidden: + total: 0 + pure: 0 + cells: 0 + free (nonlocal): 0 + unbound: + total: 0 + global: 0 + attrs: 0 + unknown: 0 >>> def posonly_args(a,b,/,c): ... return a,b,c @@ -122,6 +252,32 @@ nlocals: 3 flags: 3 consts: ('None',) +variable counts: + total: 3 + locals: + total: 3 + args: + total: 3 + posonly: 2 + posorkw: 1 + kwonly: 0 + varargs: False + varkwargs: False + pure: 0 + cells: + total: 0 + args: 0 + others: 0 + hidden: + total: 0 + pure: 0 + cells: 0 + free (nonlocal): 0 + unbound: + total: 0 + global: 0 + attrs: 0 + unknown: 0 >>> def has_docstring(x: str): ... 'This is a one-line doc string' @@ -777,6 +933,158 @@ def test_local_kinds(self): kinds = _testinternalcapi.get_co_localskinds(func.__code__) self.assertEqual(kinds, expected) + @unittest.skipIf(_testinternalcapi is None, "missing _testinternalcapi") + def test_var_counts(self): + self.maxDiff = None + def new_var_counts(*, + posonly=0, + posorkw=0, + kwonly=0, + varargs=0, + varkwargs=0, + purelocals=0, + argcells=0, + othercells=0, + freevars=0, + globalvars=0, + attrs=0, + unknown=0, + ): + nargvars = posonly + posorkw + kwonly + varargs + varkwargs + nlocals = nargvars + purelocals + othercells + unbound = globalvars + attrs + unknown + return { + 'total': nlocals + freevars + unbound, + 'locals': { + 'total': nlocals, + 'args': { + 'total': nargvars, + 'numposonly': posonly, + 'numposorkw': posorkw, + 'numkwonly': kwonly, + 'varargs': varargs, + 'varkwargs': varkwargs, + }, + 'numpure': purelocals, + 'cells': { + 'total': argcells + othercells, + 'numargs': argcells, + 'numothers': othercells, + }, + 'hidden': { + 'total': 0, + 'numpure': 0, + 'numcells': 0, + }, + }, + 'numfree': freevars, + 'unbound': { + 'total': unbound, + 'numglobal': globalvars, + 'numattrs': attrs, + 'numunknown': unknown, + }, + } + + import test._code_definitions as defs + funcs = { + defs.spam_minimal: new_var_counts(), + defs.spam_full: new_var_counts( + posonly=2, + posorkw=2, + kwonly=2, + varargs=1, + varkwargs=1, + purelocals=4, + globalvars=3, + attrs=1, + ), + defs.spam: new_var_counts( + posorkw=1, + ), + defs.spam_N: new_var_counts( + posorkw=1, + purelocals=1, + ), + defs.spam_C: new_var_counts( + posorkw=1, + purelocals=1, + argcells=1, + othercells=1, + ), + defs.spam_NN: new_var_counts( + posorkw=1, + purelocals=1, + ), + defs.spam_NC: new_var_counts( + posorkw=1, + purelocals=1, + argcells=1, + othercells=1, + ), + defs.spam_CN: new_var_counts( + posorkw=1, + purelocals=1, + argcells=1, + othercells=1, + ), + defs.spam_CC: new_var_counts( + posorkw=1, + purelocals=1, + argcells=1, + othercells=1, + ), + defs.eggs_nested: new_var_counts( + posorkw=1, + ), + defs.eggs_closure: new_var_counts( + posorkw=1, + freevars=2, + ), + defs.eggs_nested_N: new_var_counts( + posorkw=1, + purelocals=1, + ), + defs.eggs_nested_C: new_var_counts( + posorkw=1, + purelocals=1, + argcells=1, + freevars=2, + ), + defs.eggs_closure_N: new_var_counts( + posorkw=1, + purelocals=1, + freevars=2, + ), + defs.eggs_closure_C: new_var_counts( + posorkw=1, + purelocals=1, + argcells=1, + othercells=1, + freevars=2, + ), + defs.ham_nested: new_var_counts( + posorkw=1, + ), + defs.ham_closure: new_var_counts( + posorkw=1, + freevars=3, + ), + defs.ham_C_nested: new_var_counts( + posorkw=1, + ), + defs.ham_C_closure: new_var_counts( + posorkw=1, + freevars=4, + ), + } + assert len(funcs) == len(defs.FUNCTIONS), (len(funcs), len(defs.FUNCTIONS)) + for func in defs.FUNCTIONS: + with self.subTest(func): + expected = funcs[func] + counts = _testinternalcapi.get_code_var_counts(func.__code__) + self.assertEqual(counts, expected) + def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 4301dfc2803f4a..2044176f18ce9f 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -999,6 +999,133 @@ get_co_localskinds(PyObject *self, PyObject *arg) return kinds; } +static PyObject * +get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) +{ + const char *codearg; + static char *kwlist[] = {"code", NULL}; + if (!PyArg_ParseTupleAndKeywords(_args, _kwargs, + "O!:get_code_var_counts", kwlist, + &PyCode_Type, &codearg)) + { + return NULL; + } + PyCodeObject *code = (PyCodeObject *)codearg; + + _PyCode_var_counts_t counts = {0}; + _PyCode_GetVarCounts(code, &counts); + if (_PyCode_SetUnboundVarCounts(code, &counts, NULL, NULL) < 0) { + return NULL; + } + +#define SET_COUNT(DICT, STRUCT, NAME) \ + do { \ + PyObject *count = PyLong_FromLong(STRUCT.NAME); \ + int res = PyDict_SetItemString(DICT, #NAME, count); \ + Py_DECREF(count); \ + if (res < 0) { \ + goto error; \ + } \ + } while (0) + + PyObject *locals = NULL; + PyObject *args = NULL; + PyObject *cells = NULL; + PyObject *hidden = NULL; + PyObject *unbound = NULL; + PyObject *countsobj = PyDict_New(); + if (countsobj == NULL) { + return NULL; + } + SET_COUNT(countsobj, counts, total); + + // locals + locals = PyDict_New(); + if (locals == NULL) { + goto error; + } + if (PyDict_SetItemString(countsobj, "locals", locals) < 0) { + goto error; + } + SET_COUNT(locals, counts.locals, total); + + // locals.args + args = PyDict_New(); + if (args == NULL) { + goto error; + } + if (PyDict_SetItemString(locals, "args", args) < 0) { + goto error; + } + SET_COUNT(args, counts.locals.args, total); + SET_COUNT(args, counts.locals.args, numposonly); + SET_COUNT(args, counts.locals.args, numposorkw); + SET_COUNT(args, counts.locals.args, numkwonly); + SET_COUNT(args, counts.locals.args, varargs); + SET_COUNT(args, counts.locals.args, varkwargs); + + // locals.numpure + SET_COUNT(locals, counts.locals, numpure); + + // locals.cells + cells = PyDict_New(); + if (cells == NULL) { + goto error; + } + if (PyDict_SetItemString(locals, "cells", cells) < 0) { + goto error; + } + SET_COUNT(cells, counts.locals.cells, total); + SET_COUNT(cells, counts.locals.cells, numargs); + SET_COUNT(cells, counts.locals.cells, numothers); + + // locals.hidden + hidden = PyDict_New(); + if (hidden == NULL) { + goto error; + } + if (PyDict_SetItemString(locals, "hidden", hidden) < 0) { + goto error; + } + SET_COUNT(hidden, counts.locals.hidden, total); + SET_COUNT(hidden, counts.locals.hidden, numpure); + SET_COUNT(hidden, counts.locals.hidden, numcells); + + // numfree + SET_COUNT(countsobj, counts, numfree); + + // unbound + unbound = PyDict_New(); + if (unbound == NULL) { + goto error; + } + if (PyDict_SetItemString(countsobj, "unbound", unbound) < 0) { + goto error; + } + SET_COUNT(unbound, counts.unbound, total); + SET_COUNT(unbound, counts.unbound, numglobal); + SET_COUNT(unbound, counts.unbound, numattrs); + SET_COUNT(unbound, counts.unbound, numunknown); + +#undef SET_COUNT + + Py_DECREF(locals); + Py_DECREF(args); + Py_DECREF(cells); + Py_DECREF(hidden); + Py_DECREF(unbound); + return countsobj; + +error: + Py_DECREF(countsobj); + Py_XDECREF(locals); + Py_XDECREF(args); + Py_XDECREF(cells); + Py_XDECREF(hidden); + Py_XDECREF(unbound); + return NULL; +} + static PyObject * jit_enabled(PyObject *self, PyObject *arg) { @@ -2120,6 +2247,8 @@ static PyMethodDef module_functions[] = { {"code_returns_only_none", code_returns_only_none, METH_O, NULL}, {"get_co_framesize", get_co_framesize, METH_O, NULL}, {"get_co_localskinds", get_co_localskinds, METH_O, NULL}, + {"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts), + METH_VARARGS | METH_KEYWORDS, NULL}, {"jit_enabled", jit_enabled, METH_NOARGS, NULL}, #ifdef _Py_TIER2 {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, diff --git a/Objects/codeobject.c b/Objects/codeobject.c index bf24a4af445356..9034a4732fc87e 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1690,6 +1690,222 @@ PyCode_GetFreevars(PyCodeObject *code) } +static int +identify_unbound_names(PyCodeObject *co, + PyObject *global, PyObject *attrs, + struct co_unbound_counts *counts) +{ + // This function is inspired by inspect.getclosurevars(). + // It would be nicer if we had something similar to co_localspluskinds, + // but for co_names. + assert(global != NULL); + assert(PySet_Check(global)); + assert(PySet_GET_SIZE(global) == 0 || counts != NULL); + assert(attrs != NULL); + assert(PySet_Check(attrs)); + assert(PySet_GET_SIZE(attrs) == 0 || counts != NULL); + assert(counts == NULL || counts->total == 0); + Py_ssize_t len = Py_SIZE(co); + for (int i = 0; i < len; i++) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (inst.op.code == LOAD_ATTR) { + PyObject *name = PyTuple_GET_ITEM(co->co_names, inst.op.arg>>1); + if (counts != NULL) { + if (PySet_Contains(attrs, name)) { + if (PyErr_Occurred()) { + return -1; + } + continue; + } + counts->total += 1; + counts->numattrs += 1; + } + if (PySet_Add(attrs, name) < 0) { + return -1; + } + } + else if (inst.op.code == LOAD_GLOBAL) { + PyObject *name = PyTuple_GET_ITEM(co->co_names, inst.op.arg>>1); + if (counts != NULL) { + if (PySet_Contains(global, name)) { + if (PyErr_Occurred()) { + return -1; + } + continue; + } + counts->total += 1; + counts->numglobal += 1; + } + if (PySet_Add(global, name) < 0) { + return -1; + } + } + } + return 0; +} + + +void +_PyCode_GetVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts) +{ + // Count the locals, cells, and free vars. + struct co_locals_counts locals = {0}; + int numfree = 0; + PyObject *kinds = co->co_localspluskinds; + Py_ssize_t numlocalplusfree = PyBytes_GET_SIZE(kinds); + for (int i = 0; i < numlocalplusfree; i++) { + _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); + if (kind & CO_FAST_FREE) { + assert(!(kind & CO_FAST_LOCAL)); + assert(!(kind & CO_FAST_HIDDEN)); + assert(!(kind & CO_FAST_ARG)); + numfree += 1; + } + else { + // Apparently not all non-free vars a CO_FAST_LOCAL. + assert(kind); + locals.total += 1; + if (kind & CO_FAST_ARG) { + locals.args.total += 1; + if (kind & CO_FAST_ARG_VAR) { + if (kind & CO_FAST_ARG_POS) { + assert(!(kind & CO_FAST_ARG_KW)); + assert(!locals.args.varargs); + locals.args.varargs = 1; + } + else { + assert(kind & CO_FAST_ARG_KW); + assert(!locals.args.varkwargs); + locals.args.varkwargs = 1; + } + } + else if (kind & CO_FAST_ARG_POS) { + if (kind & CO_FAST_ARG_KW) { + locals.args.numposorkw += 1; + } + else { + locals.args.numposonly += 1; + } + } + else { + assert(kind & CO_FAST_ARG_KW); + locals.args.numkwonly += 1; + } + if (kind & CO_FAST_CELL) { + locals.cells.total += 1; + locals.cells.numargs += 1; + } + // Args are never hidden currently. + assert(!(kind & CO_FAST_HIDDEN)); + } + else { + if (kind & CO_FAST_CELL) { + locals.cells.total += 1; + locals.cells.numothers += 1; + if (kind & CO_FAST_HIDDEN) { + locals.hidden.total += 1; + locals.hidden.numcells += 1; + } + } + else { + locals.numpure += 1; + if (kind & CO_FAST_HIDDEN) { + locals.hidden.total += 1; + locals.hidden.numpure += 1; + } + } + } + } + } + assert(locals.args.total == ( + co->co_argcount + co->co_kwonlyargcount + + !!(co->co_flags & CO_VARARGS) + + !!(co->co_flags & CO_VARKEYWORDS))); + assert(locals.args.numposonly == co->co_posonlyargcount); + assert(locals.args.numposonly + locals.args.numposorkw == co->co_argcount); + assert(locals.args.numkwonly == co->co_kwonlyargcount); + assert(locals.cells.total == co->co_ncellvars); + assert(locals.args.total + locals.numpure == co->co_nlocals); + assert(locals.total + locals.cells.numargs == co->co_nlocals + co->co_ncellvars); + assert(locals.total + numfree == co->co_nlocalsplus); + assert(numfree == co->co_nfreevars); + + // Get the unbound counts. + struct co_unbound_counts unbound = { + .total = PyTuple_GET_SIZE(co->co_names), + // numglobal and numattrs can be set later + // with _PyCode_SetUnboundVarCounts(). + .numunknown = PyTuple_GET_SIZE(co->co_names), + }; + + // "Return" the result. + *counts = (_PyCode_var_counts_t){ + .total = locals.total + numfree + unbound.total, + .locals = locals, + .numfree = numfree, + .unbound = unbound, + }; +} + +int +_PyCode_SetUnboundVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts, + PyObject *globalarg, PyObject *attrsarg) +{ + int res = -1; + PyObject *global = NULL; + PyObject *attrs = NULL; + PyObject *global_owned = NULL; + PyObject *attrs_owned = NULL; + if (globalarg != NULL) { + if (!PySet_Check(globalarg)) { + PyErr_Format(PyExc_TypeError, + "expected a set for \"global\", got %R", global); + goto finally; + } + global = globalarg; + } + else { + global_owned = PySet_New(NULL); + if (global_owned == NULL) { + goto finally; + } + global = global_owned; + } + if (attrsarg != NULL) { + if (!PySet_Check(attrsarg)) { + PyErr_Format(PyExc_TypeError, + "expected a set for \"attrs\", got %R", attrs); + goto finally; + } + attrs = attrsarg; + } + else { + attrs_owned = PySet_New(NULL); + if (attrs_owned == NULL) { + goto finally; + } + attrs = attrs_owned; + } + + struct co_unbound_counts unbound = {0}; + if (identify_unbound_names(co, global, attrs, &unbound) < 0) { + goto finally; + } + assert(unbound.numunknown == 0); + assert(unbound.total <= counts->unbound.total); + assert(counts->unbound.numunknown == counts->unbound.total); + unbound.numunknown = counts->unbound.total - unbound.total; + unbound.total = counts->unbound.total; + counts->unbound = unbound; + res = 0; + +finally: + Py_XDECREF(global_owned); + Py_XDECREF(attrs_owned); + return res; +} + + /* Here "value" means a non-None value, since a bare return is identical * to returning None explicitly. Likewise a missing return statement * at the end of the function is turned into "return None". */ From aaa98ac8219f392f49435c09122a65c497e61108 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 28 Apr 2025 21:33:01 -0600 Subject: [PATCH 2/7] Fix the tests. --- Lib/test/test_code.py | 156 ------------------------------------------ 1 file changed, 156 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 03187e0d65f7a7..0753fa9c481276 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -18,32 +18,6 @@ nlocals: 2 flags: 3 consts: ('',) -variable counts: - total: 2 - locals: - total: 2 - args: - total: 1 - posonly: 0 - posorkw: 1 - kwonly: 0 - varargs: False - varkwargs: False - pure: 1 - cells: - total: 1 - args: 1 - others: 0 - hidden: - total: 0 - pure: 0 - cells: 0 - free (nonlocal): 0 - unbound: - total: 0 - global: 0 - attrs: 0 - unknown: 0 >>> dump(f(4).__code__) name: g @@ -57,32 +31,6 @@ nlocals: 1 flags: 19 consts: ('None',) -variable counts: - total: 2 - locals: - total: 1 - args: - total: 1 - posonly: 0 - posorkw: 1 - kwonly: 0 - varargs: False - varkwargs: False - pure: 0 - cells: - total: 0 - args: 0 - others: 0 - hidden: - total: 0 - pure: 0 - cells: 0 - free (nonlocal): 1 - unbound: - total: 0 - global: 0 - attrs: 0 - unknown: 0 >>> def h(x, y): ... a = x + y @@ -103,32 +51,6 @@ nlocals: 5 flags: 3 consts: ('None',) -variable counts: - total: 5 - locals: - total: 5 - args: - total: 2 - posonly: 0 - posorkw: 2 - kwonly: 0 - varargs: False - varkwargs: False - pure: 3 - cells: - total: 0 - args: 0 - others: 0 - hidden: - total: 0 - pure: 0 - cells: 0 - free (nonlocal): 0 - unbound: - total: 0 - global: 0 - attrs: 0 - unknown: 0 >>> def attrs(obj): ... print(obj.attr1) @@ -147,32 +69,6 @@ nlocals: 1 flags: 3 consts: ('None',) -variable counts: - total: 5 - locals: - total: 1 - args: - total: 1 - posonly: 0 - posorkw: 1 - kwonly: 0 - varargs: False - varkwargs: False - pure: 0 - cells: - total: 0 - args: 0 - others: 0 - hidden: - total: 0 - pure: 0 - cells: 0 - free (nonlocal): 0 - unbound: - total: 4 - global: 1 - attrs: 3 - unknown: 0 >>> def optimize_away(): ... 'doc string' @@ -209,32 +105,6 @@ nlocals: 3 flags: 3 consts: ('None',) -variable counts: - total: 3 - locals: - total: 3 - args: - total: 3 - posonly: 0 - posorkw: 2 - kwonly: 1 - varargs: False - varkwargs: False - pure: 0 - cells: - total: 0 - args: 0 - others: 0 - hidden: - total: 0 - pure: 0 - cells: 0 - free (nonlocal): 0 - unbound: - total: 0 - global: 0 - attrs: 0 - unknown: 0 >>> def posonly_args(a,b,/,c): ... return a,b,c @@ -252,32 +122,6 @@ nlocals: 3 flags: 3 consts: ('None',) -variable counts: - total: 3 - locals: - total: 3 - args: - total: 3 - posonly: 2 - posorkw: 1 - kwonly: 0 - varargs: False - varkwargs: False - pure: 0 - cells: - total: 0 - args: 0 - others: 0 - hidden: - total: 0 - pure: 0 - cells: 0 - free (nonlocal): 0 - unbound: - total: 0 - global: 0 - attrs: 0 - unknown: 0 >>> def has_docstring(x: str): ... 'This is a one-line doc string' From c8181cc30877cf427b61a41a6cdf5398688535af Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 29 Apr 2025 12:56:11 -0600 Subject: [PATCH 3/7] Add PyFunction_GET_BUILTINS(). --- Include/cpython/funcobject.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index 598cd330bc9ca9..18249b95befe65 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -97,6 +97,11 @@ static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) { } #define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func)) +static inline PyObject* PyFunction_GET_BUILTINS(PyObject *func) { + return _PyFunction_CAST(func)->func_builtins; +} +#define PyFunction_GET_BUILTINS(func) PyFunction_GET_BUILTINS(_PyObject_CAST(func)) + static inline PyObject* PyFunction_GET_MODULE(PyObject *func) { return _PyFunction_CAST(func)->func_module; } From ddbe9bc596724be46fcd39c31d8215787aa37a63 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 29 Apr 2025 13:26:50 -0600 Subject: [PATCH 4/7] Optionally identify globals/builtins counts. --- Include/internal/pycore_code.h | 13 +++-- Lib/test/test_code.py | 82 +++++++++++++++++++++++++++- Modules/_testinternalcapi.c | 49 ++++++++++++++--- Objects/codeobject.c | 97 +++++++++++++++++++--------------- 4 files changed, 188 insertions(+), 53 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 26ccac6ad02c2a..fec3d3e6b848be 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -592,7 +592,12 @@ typedef struct { int numfree; // nonlocal struct co_unbound_counts { int total; - int numglobal; + struct { + int total; + int numglobal; + int numbuiltin; + int numunknown; + } globals; int numattrs; int numunknown; } unbound; @@ -604,8 +609,10 @@ PyAPI_FUNC(void) _PyCode_GetVarCounts( PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts( PyCodeObject *, _PyCode_var_counts_t *, - PyObject *global, - PyObject *attrs); + PyObject *globalnames, + PyObject *attrnames, + PyObject *globalsns, + PyObject *builtinsns); PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *); diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 0753fa9c481276..1b6dfe7c7890ad 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -796,7 +796,36 @@ def new_var_counts(*, ): nargvars = posonly + posorkw + kwonly + varargs + varkwargs nlocals = nargvars + purelocals + othercells - unbound = globalvars + attrs + unknown + if isinstance(globalvars, int): + globalvars = { + 'total': globalvars, + 'numglobal': 0, + 'numbuiltin': 0, + 'numunknown': globalvars, + } + else: + g_numunknown = 0 + if isinstance(globalvars, dict): + numglobal = globalvars['numglobal'] + numbuiltin = globalvars['numbuiltin'] + size = 2 + if 'numunknown' in globalvars: + g_numunknown = globalvars['numunknown'] + size += 1 + assert len(globalvars) == size, globalvars + else: + assert not isinstance(globalvars, str), repr(globalvars) + try: + numglobal, numbuiltin = globalvars + except ValueError: + numglobal, numbuiltin, g_numunknown = globalvars + globalvars = { + 'total': numglobal + numbuiltin + g_numunknown, + 'numglobal': numglobal, + 'numbuiltin': numbuiltin, + 'numunknown': g_numunknown, + } + unbound = globalvars['total'] + attrs + unknown return { 'total': nlocals + freevars + unbound, 'locals': { @@ -824,7 +853,7 @@ def new_var_counts(*, 'numfree': freevars, 'unbound': { 'total': unbound, - 'numglobal': globalvars, + 'globals': globalvars, 'numattrs': attrs, 'numunknown': unknown, }, @@ -929,6 +958,55 @@ def new_var_counts(*, counts = _testinternalcapi.get_code_var_counts(func.__code__) self.assertEqual(counts, expected) + def func_with_globals_and_builtins(): + mod1 = _testinternalcapi + mod2 = dis + mods = (mod1, mod2) + checks = tuple(callable(m) for m in mods) + return callable(mod2), tuple(mods), list(mods), checks + + func = func_with_globals_and_builtins + with self.subTest(f'{func} code'): + expected = new_var_counts( + purelocals=4, + globalvars=5, + ) + counts = _testinternalcapi.get_code_var_counts(func.__code__) + self.assertEqual(counts, expected) + + with self.subTest(f'{func} with own globals and builtins'): + expected = new_var_counts( + purelocals=4, + globalvars=(2, 3), + ) + counts = _testinternalcapi.get_code_var_counts(func) + self.assertEqual(counts, expected) + + with self.subTest(f'{func} without globals'): + expected = new_var_counts( + purelocals=4, + globalvars=(0, 3, 2), + ) + counts = _testinternalcapi.get_code_var_counts(func, globalsns={}) + self.assertEqual(counts, expected) + + with self.subTest(f'{func} without both'): + expected = new_var_counts( + purelocals=4, + globalvars=5, + ) + counts = _testinternalcapi.get_code_var_counts(func, globalsns={}, + builtinsns={}) + self.assertEqual(counts, expected) + + with self.subTest(f'{func} without builtins'): + expected = new_var_counts( + purelocals=4, + globalvars=(2, 0, 3), + ) + counts = _testinternalcapi.get_code_var_counts(func, builtinsns={}) + self.assertEqual(counts, expected) + def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 2044176f18ce9f..f441d84da2d5fe 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1002,19 +1002,41 @@ get_co_localskinds(PyObject *self, PyObject *arg) static PyObject * get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) { - const char *codearg; - static char *kwlist[] = {"code", NULL}; + PyObject *codearg; + PyObject *globalnames = NULL; + PyObject *attrnames = NULL; + PyObject *globalsns = NULL; + PyObject *builtinsns = NULL; + static char *kwlist[] = {"code", "globalnames", "attrnames", "globalsns", + "builtinsns", NULL}; if (!PyArg_ParseTupleAndKeywords(_args, _kwargs, - "O!:get_code_var_counts", kwlist, - &PyCode_Type, &codearg)) + "O|OOO!O!:get_code_var_counts", kwlist, + &codearg, &globalnames, &attrnames, + &PyDict_Type, &globalsns, &PyDict_Type, &builtinsns)) { return NULL; } + if (PyFunction_Check(codearg)) { + if (globalsns == NULL) { + globalsns = PyFunction_GET_GLOBALS(codearg); + } + if (builtinsns == NULL) { + builtinsns = PyFunction_GET_BUILTINS(codearg); + } + codearg = PyFunction_GET_CODE(codearg); + } + else if (!PyCode_Check(codearg)) { + PyErr_SetString(PyExc_TypeError, + "argument must be a code object or a function"); + return NULL; + } PyCodeObject *code = (PyCodeObject *)codearg; _PyCode_var_counts_t counts = {0}; _PyCode_GetVarCounts(code, &counts); - if (_PyCode_SetUnboundVarCounts(code, &counts, NULL, NULL) < 0) { + if (_PyCode_SetUnboundVarCounts( + code, &counts, globalnames, attrnames, globalsns, builtinsns) < 0) + { return NULL; } @@ -1033,6 +1055,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) PyObject *cells = NULL; PyObject *hidden = NULL; PyObject *unbound = NULL; + PyObject *globals = NULL; PyObject *countsobj = PyDict_New(); if (countsobj == NULL) { return NULL; @@ -1103,10 +1126,22 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) goto error; } SET_COUNT(unbound, counts.unbound, total); - SET_COUNT(unbound, counts.unbound, numglobal); SET_COUNT(unbound, counts.unbound, numattrs); SET_COUNT(unbound, counts.unbound, numunknown); + // unbound.globals + globals = PyDict_New(); + if (globals == NULL) { + goto error; + } + if (PyDict_SetItemString(unbound, "globals", globals) < 0) { + goto error; + } + SET_COUNT(globals, counts.unbound.globals, total); + SET_COUNT(globals, counts.unbound.globals, numglobal); + SET_COUNT(globals, counts.unbound.globals, numbuiltin); + SET_COUNT(globals, counts.unbound.globals, numunknown); + #undef SET_COUNT Py_DECREF(locals); @@ -1114,6 +1149,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) Py_DECREF(cells); Py_DECREF(hidden); Py_DECREF(unbound); + Py_DECREF(globals); return countsobj; error: @@ -1123,6 +1159,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) Py_XDECREF(cells); Py_XDECREF(hidden); Py_XDECREF(unbound); + Py_XDECREF(globals); return NULL; } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 9034a4732fc87e..88edb52b6ce369 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1692,18 +1692,21 @@ PyCode_GetFreevars(PyCodeObject *code) static int identify_unbound_names(PyCodeObject *co, - PyObject *global, PyObject *attrs, + PyObject *globalnames, PyObject *attrnames, + PyObject *globalsns, PyObject *builtinsns, struct co_unbound_counts *counts) { // This function is inspired by inspect.getclosurevars(). // It would be nicer if we had something similar to co_localspluskinds, // but for co_names. - assert(global != NULL); - assert(PySet_Check(global)); - assert(PySet_GET_SIZE(global) == 0 || counts != NULL); - assert(attrs != NULL); - assert(PySet_Check(attrs)); - assert(PySet_GET_SIZE(attrs) == 0 || counts != NULL); + assert(globalnames != NULL); + assert(PySet_Check(globalnames)); + assert(PySet_GET_SIZE(globalnames) == 0 || counts != NULL); + assert(attrnames != NULL); + assert(PySet_Check(attrnames)); + assert(PySet_GET_SIZE(attrnames) == 0 || counts != NULL); + assert(globalsns == NULL || PyDict_Check(globalsns)); + assert(builtinsns == NULL || PyDict_Check(builtinsns)); assert(counts == NULL || counts->total == 0); Py_ssize_t len = Py_SIZE(co); for (int i = 0; i < len; i++) { @@ -1711,7 +1714,7 @@ identify_unbound_names(PyCodeObject *co, if (inst.op.code == LOAD_ATTR) { PyObject *name = PyTuple_GET_ITEM(co->co_names, inst.op.arg>>1); if (counts != NULL) { - if (PySet_Contains(attrs, name)) { + if (PySet_Contains(attrnames, name)) { if (PyErr_Occurred()) { return -1; } @@ -1720,23 +1723,38 @@ identify_unbound_names(PyCodeObject *co, counts->total += 1; counts->numattrs += 1; } - if (PySet_Add(attrs, name) < 0) { + if (PySet_Add(attrnames, name) < 0) { return -1; } } else if (inst.op.code == LOAD_GLOBAL) { PyObject *name = PyTuple_GET_ITEM(co->co_names, inst.op.arg>>1); if (counts != NULL) { - if (PySet_Contains(global, name)) { + if (PySet_Contains(globalnames, name)) { if (PyErr_Occurred()) { return -1; } continue; } counts->total += 1; - counts->numglobal += 1; + counts->globals.total += 1; + counts->globals.numunknown += 1; + if (globalsns != NULL && PyDict_Contains(globalsns, name)) { + if (PyErr_Occurred()) { + return -1; + } + counts->globals.numglobal += 1; + counts->globals.numunknown -= 1; + } + if (builtinsns != NULL && PyDict_Contains(builtinsns, name)) { + if (PyErr_Occurred()) { + return -1; + } + counts->globals.numbuiltin += 1; + counts->globals.numunknown -= 1; + } } - if (PySet_Add(global, name) < 0) { + if (PySet_Add(globalnames, name) < 0) { return -1; } } @@ -1849,46 +1867,41 @@ _PyCode_GetVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts) int _PyCode_SetUnboundVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts, - PyObject *globalarg, PyObject *attrsarg) + PyObject *globalnames, PyObject *attrnames, + PyObject *globalsns, PyObject *builtinsns) { int res = -1; - PyObject *global = NULL; - PyObject *attrs = NULL; - PyObject *global_owned = NULL; - PyObject *attrs_owned = NULL; - if (globalarg != NULL) { - if (!PySet_Check(globalarg)) { - PyErr_Format(PyExc_TypeError, - "expected a set for \"global\", got %R", global); + PyObject *globalnames_owned = NULL; + PyObject *attrnames_owned = NULL; + if (globalnames == NULL) { + globalnames_owned = PySet_New(NULL); + if (globalnames_owned == NULL) { goto finally; } - global = globalarg; + globalnames = globalnames_owned; } - else { - global_owned = PySet_New(NULL); - if (global_owned == NULL) { - goto finally; - } - global = global_owned; + else if (!PySet_Check(globalnames)) { + PyErr_Format(PyExc_TypeError, + "expected a set for \"globalnames\", got %R", globalnames); + goto finally; } - if (attrsarg != NULL) { - if (!PySet_Check(attrsarg)) { - PyErr_Format(PyExc_TypeError, - "expected a set for \"attrs\", got %R", attrs); + if (attrnames == NULL) { + attrnames_owned = PySet_New(NULL); + if (attrnames_owned == NULL) { goto finally; } - attrs = attrsarg; + attrnames = attrnames_owned; } - else { - attrs_owned = PySet_New(NULL); - if (attrs_owned == NULL) { - goto finally; - } - attrs = attrs_owned; + else if (!PySet_Check(attrnames)) { + PyErr_Format(PyExc_TypeError, + "expected a set for \"attrnames\", got %R", attrnames); + goto finally; } struct co_unbound_counts unbound = {0}; - if (identify_unbound_names(co, global, attrs, &unbound) < 0) { + if (identify_unbound_names( + co, globalnames, attrnames, globalsns, builtinsns, &unbound) < 0) + { goto finally; } assert(unbound.numunknown == 0); @@ -1900,8 +1913,8 @@ _PyCode_SetUnboundVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts, res = 0; finally: - Py_XDECREF(global_owned); - Py_XDECREF(attrs_owned); + Py_XDECREF(globalnames_owned); + Py_XDECREF(attrnames_owned); return res; } From 195a1eccc636ab6c652823e035f641302411e538 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 29 Apr 2025 13:59:43 -0600 Subject: [PATCH 5/7] Pass tstate to _PyCode_SetUnboundVarCounts(). --- Include/internal/pycore_code.h | 1 + Modules/_testinternalcapi.c | 4 +++- Objects/codeobject.c | 20 +++++++++++--------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index fec3d3e6b848be..aea2d8b061d011 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -607,6 +607,7 @@ PyAPI_FUNC(void) _PyCode_GetVarCounts( PyCodeObject *, _PyCode_var_counts_t *); PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts( + PyThreadState *, PyCodeObject *, _PyCode_var_counts_t *, PyObject *globalnames, diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index f441d84da2d5fe..4bfe88f2cf920c 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1002,6 +1002,7 @@ get_co_localskinds(PyObject *self, PyObject *arg) static PyObject * get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *codearg; PyObject *globalnames = NULL; PyObject *attrnames = NULL; @@ -1035,7 +1036,8 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) _PyCode_var_counts_t counts = {0}; _PyCode_GetVarCounts(code, &counts); if (_PyCode_SetUnboundVarCounts( - code, &counts, globalnames, attrnames, globalsns, builtinsns) < 0) + tstate, code, &counts, globalnames, attrnames, + globalsns, builtinsns) < 0) { return NULL; } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 88edb52b6ce369..c4b1ba47b07dea 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1691,7 +1691,7 @@ PyCode_GetFreevars(PyCodeObject *code) static int -identify_unbound_names(PyCodeObject *co, +identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, PyObject *globalnames, PyObject *attrnames, PyObject *globalsns, PyObject *builtinsns, struct co_unbound_counts *counts) @@ -1715,7 +1715,7 @@ identify_unbound_names(PyCodeObject *co, PyObject *name = PyTuple_GET_ITEM(co->co_names, inst.op.arg>>1); if (counts != NULL) { if (PySet_Contains(attrnames, name)) { - if (PyErr_Occurred()) { + if (_PyErr_Occurred(tstate)) { return -1; } continue; @@ -1731,7 +1731,7 @@ identify_unbound_names(PyCodeObject *co, PyObject *name = PyTuple_GET_ITEM(co->co_names, inst.op.arg>>1); if (counts != NULL) { if (PySet_Contains(globalnames, name)) { - if (PyErr_Occurred()) { + if (_PyErr_Occurred(tstate)) { return -1; } continue; @@ -1740,14 +1740,14 @@ identify_unbound_names(PyCodeObject *co, counts->globals.total += 1; counts->globals.numunknown += 1; if (globalsns != NULL && PyDict_Contains(globalsns, name)) { - if (PyErr_Occurred()) { + if (_PyErr_Occurred(tstate)) { return -1; } counts->globals.numglobal += 1; counts->globals.numunknown -= 1; } if (builtinsns != NULL && PyDict_Contains(builtinsns, name)) { - if (PyErr_Occurred()) { + if (_PyErr_Occurred(tstate)) { return -1; } counts->globals.numbuiltin += 1; @@ -1866,7 +1866,8 @@ _PyCode_GetVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts) } int -_PyCode_SetUnboundVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts, +_PyCode_SetUnboundVarCounts(PyThreadState *tstate, + PyCodeObject *co, _PyCode_var_counts_t *counts, PyObject *globalnames, PyObject *attrnames, PyObject *globalsns, PyObject *builtinsns) { @@ -1881,7 +1882,7 @@ _PyCode_SetUnboundVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts, globalnames = globalnames_owned; } else if (!PySet_Check(globalnames)) { - PyErr_Format(PyExc_TypeError, + _PyErr_Format(tstate, PyExc_TypeError, "expected a set for \"globalnames\", got %R", globalnames); goto finally; } @@ -1893,14 +1894,15 @@ _PyCode_SetUnboundVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts, attrnames = attrnames_owned; } else if (!PySet_Check(attrnames)) { - PyErr_Format(PyExc_TypeError, + _PyErr_Format(tstate, PyExc_TypeError, "expected a set for \"attrnames\", got %R", attrnames); goto finally; } struct co_unbound_counts unbound = {0}; if (identify_unbound_names( - co, globalnames, attrnames, globalsns, builtinsns, &unbound) < 0) + tstate, co, globalnames, attrnames, globalsns, builtinsns, + &unbound) < 0) { goto finally; } From 998aeed0595bd9bb63b9acbb82e836686277e312 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 30 Apr 2025 11:41:54 -0600 Subject: [PATCH 6/7] Add some comments. --- Objects/codeobject.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index c4b1ba47b07dea..99153387ec3fe4 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1874,6 +1874,8 @@ _PyCode_SetUnboundVarCounts(PyThreadState *tstate, int res = -1; PyObject *globalnames_owned = NULL; PyObject *attrnames_owned = NULL; + + // Prep the name sets. if (globalnames == NULL) { globalnames_owned = PySet_New(NULL); if (globalnames_owned == NULL) { @@ -1899,6 +1901,7 @@ _PyCode_SetUnboundVarCounts(PyThreadState *tstate, goto finally; } + // Fill in unbound.globals and unbound.numattrs. struct co_unbound_counts unbound = {0}; if (identify_unbound_names( tstate, co, globalnames, attrnames, globalsns, builtinsns, From a727d7b769e172f9a33b40d781e4ecc2b968c200 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 30 Apr 2025 11:40:00 -0600 Subject: [PATCH 7/7] Fix a compiler warning. --- Objects/codeobject.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 99153387ec3fe4..d643eb9fd61ae9 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1849,11 +1849,12 @@ _PyCode_GetVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts) assert(numfree == co->co_nfreevars); // Get the unbound counts. + assert(PyTuple_GET_SIZE(co->co_names) >= 0); struct co_unbound_counts unbound = { - .total = PyTuple_GET_SIZE(co->co_names), + .total = (int)PyTuple_GET_SIZE(co->co_names), // numglobal and numattrs can be set later // with _PyCode_SetUnboundVarCounts(). - .numunknown = PyTuple_GET_SIZE(co->co_names), + .numunknown = (int)PyTuple_GET_SIZE(co->co_names), }; // "Return" the result.