From 5e73b7484f4a4e2b78f3862f43516e9a78e0c1e6 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 26 Jun 2021 17:59:28 +0800 Subject: [PATCH 01/49] WIP: Specialize CALL_FUNCTION for builtins --- Include/internal/pycore_code.h | 24 ++++++++ Include/opcode.h | 2 + Lib/opcode.py | 2 + Python/ceval.c | 102 +++++++++++++++++++++++++++++++++ Python/opcode_targets.h | 4 +- Python/specialize.c | 78 +++++++++++++++++++++++++ 6 files changed, 210 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 1e78b928865e78..3fb3eefd67c3e2 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -53,6 +53,14 @@ typedef struct { uint32_t builtin_keys_version; } _PyLoadGlobalCache; +typedef struct { + union { + PyCFunction cfunc; + /* TODO: func_version field for Python function calls*/ + uint64_t _; /* Just for alignment on 32-bit */ + }; +} _PyCallFunctionCache; + /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 @@ -68,6 +76,7 @@ typedef union { _PyAdaptiveEntry adaptive; _PyLoadAttrCache load_attr; _PyLoadGlobalCache load_global; + _PyCallFunctionCache call_function; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) @@ -319,10 +328,25 @@ cache_backoff(_PyAdaptiveEntry *entry) { entry->counter = BACKOFF; } +/* Corresponds to various function pointers +https://docs.python.org/3/c-api/structures.html#implementing-functions-and-methods +*/ +typedef enum { + PYCFUNCTION = 1, + PYCFUNCTION_O = 2, + PYCFUNCTION_NOARGS = 3, + PYCFUNCTION_WITH_KEYWORDS = 4, + _PYCFUNCTION_FAST = 5, + _PYCFUNCTION_FAST_WITH_KEYWORDS = 6, + PYCMETHOD = 7, /* Isn't normally used in builtins. */ +} _BuiltinCallKinds; + /* Specialization functions */ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +int _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, + PyObject *builtins, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); #define SPECIALIZATION_STATS 0 #define SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/opcode.h b/Include/opcode.h index 7f8376ff15ba95..5e7e75fe45098e 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -145,6 +145,8 @@ extern "C" { #define LOAD_GLOBAL_ADAPTIVE 36 #define LOAD_GLOBAL_MODULE 38 #define LOAD_GLOBAL_BUILTIN 39 +#define CALL_FUNCTION_ADAPTIVE 40 +#define CALL_FUNCTION_BUILTIN 41 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 7e5916a4245256..f2e1ef6185a20c 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -229,4 +229,6 @@ def jabs_op(name, op): "LOAD_GLOBAL_ADAPTIVE", "LOAD_GLOBAL_MODULE", "LOAD_GLOBAL_BUILTIN", + "CALL_FUNCTION_ADAPTIVE", + "CALL_FUNCTION_BUILTIN", ] diff --git a/Python/ceval.c b/Python/ceval.c index b5e3dd53c8439a..ff73cd15bd3b28 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -55,6 +55,12 @@ Py_LOCAL_INLINE(PyObject *) call_function( static PyObject * do_call_core( PyThreadState *tstate, PyObject *func, PyObject *callargs, PyObject *kwdict, int use_tracing); +Py_LOCAL_INLINE(PyObject *) call_function_builtin( + PyThreadState *tstate, + _PyAdaptiveEntry *cache0, + _PyCallFunctionCache *cache1, + PyObject ***pp_stack, + Py_ssize_t oparg, int use_tracing); #ifdef LLTRACE static int lltrace; @@ -4066,7 +4072,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) CHECK_EVAL_BREAKER(); DISPATCH(); } + case TARGET(CALL_FUNCTION_ADAPTIVE): { + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *callable = PEEK(cache->adaptive.original_oparg + 1); + next_instr--; + if (_Py_Specialize_CallFunction(stack_pointer, cache->adaptive.original_oparg, BUILTINS(), next_instr, cache) < 0) { + goto error; + } + DISPATCH(); + } + else { + STAT_INC(CALL_FUNCTION, deferred); + cache->adaptive.counter--; + oparg = cache->adaptive.original_oparg; + JUMP_TO_INSTRUCTION(CALL_FUNCTION); + } + } + case TARGET(CALL_FUNCTION_BUILTIN): { + /* Builtin functions, WITHOUT keywords */ + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyCallFunctionCache *cache1 = &caches[-1].call_function; + PyObject *callable = PEEK(cache0->original_oparg + 1); + DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); + DEOPT_IF(PyCFunction_GET_FUNCTION(callable) != cache1->cfunc, CALL_FUNCTION); + PyObject **sp, *res; + sp = stack_pointer; + res = call_function_builtin(tstate, cache0, cache1, &sp, + cache0->original_oparg, cframe.use_tracing); + stack_pointer = sp; + PUSH(res); + DEOPT_IF(res == NULL, CALL_FUNCTION); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); + DISPATCH(); + } case TARGET(CALL_FUNCTION_KW): { PyObject **sp, *res, *names; @@ -4297,6 +4339,7 @@ opname ## _miss: \ MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) +MISS_WITH_CACHE(CALL_FUNCTION) error: /* Double-check exception status. */ @@ -5872,6 +5915,65 @@ do_call_core(PyThreadState *tstate, return PyObject_Call(func, callargs, kwdict); } +/* Fast alternative for non-keyword calls to builtins. */ +Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION +call_function_builtin(PyThreadState *tstate, + _PyAdaptiveEntry *cache0, + _PyCallFunctionCache *cache1, + PyObject ***pp_stack, + Py_ssize_t oparg, + int use_tracing) +{ +#define MAYBE_TRACE(cfunc) if (use_tracing) {C_TRACE(x, cfunc);} else {x = cfunc;} + + PyObject **pfunc = (*pp_stack) - oparg - 1; + PyObject *x, *w; + PyObject **stack = (*pp_stack) - oparg; + + PyObject *func = *pfunc; /* Only for tracing purposes */ + PyObject *self = PyCFunction_GET_SELF(func); + PyCFunction cfunc = PyCFunction_GET_FUNCTION(func); + + switch ((_BuiltinCallKinds)cache0->index) { + case PYCFUNCTION_NOARGS: + case PYCFUNCTION_O: { + MAYBE_TRACE(cfunc(self, *stack)); + break; + } + case _PYCFUNCTION_FAST: { + MAYBE_TRACE(((_PyCFunctionFast)cfunc)(self, stack, oparg)); + break; + } + case _PYCFUNCTION_FAST_WITH_KEYWORDS: { + MAYBE_TRACE(((_PyCFunctionFastWithKeywords)cfunc)(self, stack, oparg, 0)); + break; + } + case PYCFUNCTION_WITH_KEYWORDS: { + PyObject *args = _PyTuple_FromArray(stack, oparg); + if (args == NULL) { + break; + } + MAYBE_TRACE(((PyCFunctionWithKeywords)cfunc)(self, args, NULL)); + Py_DECREF(args); + break; + } + /* Bulitins shouldn't have these flags */ + case PYCFUNCTION: + case PYCMETHOD: + default: + Py_UNREACHABLE(); + break; + } + assert((x != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + + /* Clear the stack of the function object. */ + while ((*pp_stack) > pfunc) { + w = EXT_POP(*pp_stack); + Py_DECREF(w); + } + + return x; +} /* Extract a slice index from a PyLong or an object with the nb_index slot defined, and store in *pi. Silently reduce values larger than PY_SSIZE_T_MAX to PY_SSIZE_T_MAX, diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index ecc95dabf4693e..dad50a165ac5bc 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -39,8 +39,8 @@ static void *opcode_targets[256] = { &&TARGET_POP_EXCEPT_AND_RERAISE, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_CALL_FUNCTION_ADAPTIVE, + &&TARGET_CALL_FUNCTION_BUILTIN, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index a8ae09ff0e3839..f44652ec785fee 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -158,12 +158,14 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) { static uint8_t adaptive_opcodes[256] = { [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, + [CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE, }; /* The number of cache entries required for a "family" of instructions. */ static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ + [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyCallFunctionCache */ }; /* Return the oparg for the cache_offset and instruction index. @@ -633,3 +635,79 @@ _Py_Specialize_LoadGlobal( cache0->counter = saturating_start(); return 0; } + +int +_Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, + PyObject *builtins, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + PyObject *callable = stack_pointer[-(original_oparg + 1)]; + _PyAdaptiveEntry *cache0 = &cache->adaptive; + _PyCallFunctionCache *cache1 = &cache[-1].call_function; + if (!PyCallable_Check(callable)) { + goto fail; + } + if (!PyDict_CheckExact(builtins)) { + goto fail; + } + PyDictObject *builtins_dict = (PyDictObject *)builtins; + if (builtins_dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) { + goto fail; + } + /* Specialize C methods */ + if (PyCFunction_CheckExact(callable)) { + PyCFunctionObject *meth = (PyCFunctionObject *)callable; + if (meth->m_ml == NULL) { + goto fail; + } + const char *name_ascii = meth->m_ml->ml_name; + /* Specialize builtins: check method actually exists in builtins */ + PyObject *value = PyDict_GetItemString(builtins, name_ascii); + if (value == NULL || + value != (PyObject *)meth) { + goto fail; + } + _BuiltinCallKinds kind = -1; + switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | + METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { + case METH_VARARGS: + case METH_VARARGS | METH_KEYWORDS: + kind = PYCFUNCTION_WITH_KEYWORDS; + break; + case METH_FASTCALL: + kind = _PYCFUNCTION_FAST; + break; + case METH_FASTCALL | METH_KEYWORDS: + kind = _PYCFUNCTION_FAST_WITH_KEYWORDS; + break; + case METH_NOARGS: + kind = PYCFUNCTION_NOARGS; + break; + case METH_O: + kind = PYCFUNCTION_O; + break; + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + kind = PYCMETHOD; + break; + default: + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags"); + goto fail; + } + assert(kind > 0); + PyCFunction cfunc = PyCFunction_GET_FUNCTION(meth); + assert(cfunc != NULL); + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN, _Py_OPARG(*instr)); + cache0->index = (uint16_t)kind; + cache1->cfunc = cfunc; + goto success; + } +fail: + STAT_INC(CALL_FUNCTION, specialization_failure); + assert(!PyErr_Occurred()); + cache_backoff(cache0); + return 0; +success: + STAT_INC(CALL_FUNCTION, specialization_success); + assert(!PyErr_Occurred()); + cache0->counter = saturating_start(); + return 0; +} \ No newline at end of file From 1539105c163f7442ae3fc80a9a2dae86eaa33468 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 27 Jun 2021 01:31:22 +0800 Subject: [PATCH 02/49] fix some GCC compilation warnings --- Python/ceval.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index ff73cd15bd3b28..f8d4b5ff327b4a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4075,7 +4075,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(CALL_FUNCTION_ADAPTIVE): { SpecializedCacheEntry *cache = GET_CACHE(); if (cache->adaptive.counter == 0) { - PyObject *callable = PEEK(cache->adaptive.original_oparg + 1); next_instr--; if (_Py_Specialize_CallFunction(stack_pointer, cache->adaptive.original_oparg, BUILTINS(), next_instr, cache) < 0) { goto error; @@ -5927,7 +5926,7 @@ call_function_builtin(PyThreadState *tstate, #define MAYBE_TRACE(cfunc) if (use_tracing) {C_TRACE(x, cfunc);} else {x = cfunc;} PyObject **pfunc = (*pp_stack) - oparg - 1; - PyObject *x, *w; + PyObject *x = NULL, *w; PyObject **stack = (*pp_stack) - oparg; PyObject *func = *pfunc; /* Only for tracing purposes */ @@ -5941,11 +5940,12 @@ call_function_builtin(PyThreadState *tstate, break; } case _PYCFUNCTION_FAST: { - MAYBE_TRACE(((_PyCFunctionFast)cfunc)(self, stack, oparg)); + MAYBE_TRACE(((_PyCFunctionFast)(void(*)(void))cfunc)(self, stack, oparg)); break; } case _PYCFUNCTION_FAST_WITH_KEYWORDS: { - MAYBE_TRACE(((_PyCFunctionFastWithKeywords)cfunc)(self, stack, oparg, 0)); + MAYBE_TRACE(((_PyCFunctionFastWithKeywords)(void(*)(void))cfunc)( + self, stack, oparg, 0)); break; } case PYCFUNCTION_WITH_KEYWORDS: { @@ -5953,7 +5953,7 @@ call_function_builtin(PyThreadState *tstate, if (args == NULL) { break; } - MAYBE_TRACE(((PyCFunctionWithKeywords)cfunc)(self, args, NULL)); + MAYBE_TRACE(((PyCFunctionWithKeywords)(void(*)(void))cfunc)(self, args, NULL)); Py_DECREF(args); break; } From 68e5451bc82921858e98ae6cc91d011b51463739 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 27 Jun 2021 02:28:21 +0800 Subject: [PATCH 03/49] hopefully fix the segfaults --- Python/ceval.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index f8d4b5ff327b4a..b6af60b338035c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4103,7 +4103,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) cache0->original_oparg, cframe.use_tracing); stack_pointer = sp; PUSH(res); - DEOPT_IF(res == NULL, CALL_FUNCTION); + if (res == NULL) { + /* Not deopting because this doesn't mean our optimization was wrong. + `res` can be NULL for valid reasons. Eg. getattr(x, 'invalid'). + In those cases an exception is set, so we must handle it. + */ + goto error; + } record_cache_hit(cache0); STAT_INC(CALL_FUNCTION, hit); DISPATCH(); From 1d841b0744591d3d64de8dced8a4fdf0237af29c Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 27 Jun 2021 23:42:11 +0800 Subject: [PATCH 04/49] Rename to CALL_CFUNCTION and generalize to all c functions --- Include/internal/pycore_code.h | 4 ++-- Include/opcode.h | 2 +- Lib/opcode.py | 2 +- Python/ceval.c | 33 ++++++++++++++++++++------------- Python/opcode_targets.h | 2 +- Python/specialize.c | 33 ++++++++++++++------------------- 6 files changed, 39 insertions(+), 37 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 3fb3eefd67c3e2..ddb73d30c807cd 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -338,7 +338,7 @@ typedef enum { PYCFUNCTION_WITH_KEYWORDS = 4, _PYCFUNCTION_FAST = 5, _PYCFUNCTION_FAST_WITH_KEYWORDS = 6, - PYCMETHOD = 7, /* Isn't normally used in builtins. */ + PYCMETHOD = 7, } _BuiltinCallKinds; /* Specialization functions */ @@ -346,7 +346,7 @@ typedef enum { int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, - PyObject *builtins, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); + _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); #define SPECIALIZATION_STATS 0 #define SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/opcode.h b/Include/opcode.h index 5e7e75fe45098e..c2283a2c143481 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -146,7 +146,7 @@ extern "C" { #define LOAD_GLOBAL_MODULE 38 #define LOAD_GLOBAL_BUILTIN 39 #define CALL_FUNCTION_ADAPTIVE 40 -#define CALL_FUNCTION_BUILTIN 41 +#define CALL_CFUNCTION 41 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index f2e1ef6185a20c..016ba9f1fde49f 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -230,5 +230,5 @@ def jabs_op(name, op): "LOAD_GLOBAL_MODULE", "LOAD_GLOBAL_BUILTIN", "CALL_FUNCTION_ADAPTIVE", - "CALL_FUNCTION_BUILTIN", + "CALL_CFUNCTION", ] diff --git a/Python/ceval.c b/Python/ceval.c index b6af60b338035c..2eccb6bac29ea8 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -55,7 +55,7 @@ Py_LOCAL_INLINE(PyObject *) call_function( static PyObject * do_call_core( PyThreadState *tstate, PyObject *func, PyObject *callargs, PyObject *kwdict, int use_tracing); -Py_LOCAL_INLINE(PyObject *) call_function_builtin( +Py_LOCAL_INLINE(PyObject *) call_cfunction( PyThreadState *tstate, _PyAdaptiveEntry *cache0, _PyCallFunctionCache *cache1, @@ -4076,7 +4076,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) SpecializedCacheEntry *cache = GET_CACHE(); if (cache->adaptive.counter == 0) { next_instr--; - if (_Py_Specialize_CallFunction(stack_pointer, cache->adaptive.original_oparg, BUILTINS(), next_instr, cache) < 0) { + if (_Py_Specialize_CallFunction(stack_pointer, cache->adaptive.original_oparg, next_instr, cache) < 0) { goto error; } DISPATCH(); @@ -4088,7 +4088,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) JUMP_TO_INSTRUCTION(CALL_FUNCTION); } } - case TARGET(CALL_FUNCTION_BUILTIN): { + case TARGET(CALL_CFUNCTION): { /* Builtin functions, WITHOUT keywords */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; @@ -4099,7 +4099,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject **sp, *res; sp = stack_pointer; - res = call_function_builtin(tstate, cache0, cache1, &sp, + res = call_cfunction(tstate, cache0, cache1, &sp, cache0->original_oparg, cframe.use_tracing); stack_pointer = sp; PUSH(res); @@ -5920,9 +5920,9 @@ do_call_core(PyThreadState *tstate, return PyObject_Call(func, callargs, kwdict); } -/* Fast alternative for non-keyword calls to builtins. */ +/* Fast alternative for non-keyword calls to C functions. */ Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION -call_function_builtin(PyThreadState *tstate, +call_cfunction(PyThreadState *tstate, _PyAdaptiveEntry *cache0, _PyCallFunctionCache *cache1, PyObject ***pp_stack, @@ -5941,18 +5941,24 @@ call_function_builtin(PyThreadState *tstate, switch ((_BuiltinCallKinds)cache0->index) { case PYCFUNCTION_NOARGS: - case PYCFUNCTION_O: { + case PYCFUNCTION_O: MAYBE_TRACE(cfunc(self, *stack)); break; - } - case _PYCFUNCTION_FAST: { + case _PYCFUNCTION_FAST: MAYBE_TRACE(((_PyCFunctionFast)(void(*)(void))cfunc)(self, stack, oparg)); break; - } - case _PYCFUNCTION_FAST_WITH_KEYWORDS: { + case _PYCFUNCTION_FAST_WITH_KEYWORDS: MAYBE_TRACE(((_PyCFunctionFastWithKeywords)(void(*)(void))cfunc)( self, stack, oparg, 0)); break; + case PYCFUNCTION: { + PyObject *args = _PyTuple_FromArray(stack, oparg); + if (args == NULL) { + break; + } + MAYBE_TRACE(cfunc(self, args)); + Py_DECREF(args); + break; } case PYCFUNCTION_WITH_KEYWORDS: { PyObject *args = _PyTuple_FromArray(stack, oparg); @@ -5963,8 +5969,9 @@ call_function_builtin(PyThreadState *tstate, Py_DECREF(args); break; } - /* Bulitins shouldn't have these flags */ - case PYCFUNCTION: + /* This flag only applies to PyMethodObject. + We're only optimizing for PyCfunctionObject + */ case PYCMETHOD: default: Py_UNREACHABLE(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index dad50a165ac5bc..ce346e6ca72029 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -40,7 +40,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_CALL_FUNCTION_ADAPTIVE, - &&TARGET_CALL_FUNCTION_BUILTIN, + &&TARGET_CALL_CFUNCTION, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index f44652ec785fee..baf0d87581039d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -636,9 +636,13 @@ _Py_Specialize_LoadGlobal( return 0; } +/* TODO: + - Specialize calling C types like int() with CALL_CTYPE + - Specialize python function calls. +*/ int _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, - PyObject *builtins, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) + _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; @@ -646,30 +650,19 @@ _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, if (!PyCallable_Check(callable)) { goto fail; } - if (!PyDict_CheckExact(builtins)) { - goto fail; - } - PyDictObject *builtins_dict = (PyDictObject *)builtins; - if (builtins_dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) { - goto fail; - } - /* Specialize C methods */ + /* Specialize C functions */ if (PyCFunction_CheckExact(callable)) { PyCFunctionObject *meth = (PyCFunctionObject *)callable; if (meth->m_ml == NULL) { goto fail; } const char *name_ascii = meth->m_ml->ml_name; - /* Specialize builtins: check method actually exists in builtins */ - PyObject *value = PyDict_GetItemString(builtins, name_ascii); - if (value == NULL || - value != (PyObject *)meth) { - goto fail; - } _BuiltinCallKinds kind = -1; switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { case METH_VARARGS: + kind = PYCFUNCTION; + break; case METH_VARARGS | METH_KEYWORDS: kind = PYCFUNCTION_WITH_KEYWORDS; break; @@ -685,17 +678,19 @@ _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, case METH_O: kind = PYCFUNCTION_O; break; + /* This case should never happen with PyCFunctionObject -- only + PyMethodObject. See zlib.compressobj()'s methods for an example. + */ case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - kind = PYCMETHOD; - break; + // kind = PYCMETHOD; default: SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags"); - goto fail; + return -1; } assert(kind > 0); PyCFunction cfunc = PyCFunction_GET_FUNCTION(meth); assert(cfunc != NULL); - *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION, _Py_OPARG(*instr)); cache0->index = (uint16_t)kind; cache1->cfunc = cfunc; goto success; From f41b623755ca1c7e52fcf2ecd364f9f5153e9c5b Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 27 Jun 2021 23:47:10 +0800 Subject: [PATCH 05/49] fix formatting, remove redundant check --- Python/specialize.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index baf0d87581039d..98d133cba5c28e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -647,9 +647,6 @@ _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; _PyCallFunctionCache *cache1 = &cache[-1].call_function; - if (!PyCallable_Check(callable)) { - goto fail; - } /* Specialize C functions */ if (PyCFunction_CheckExact(callable)) { PyCFunctionObject *meth = (PyCFunctionObject *)callable; @@ -705,4 +702,4 @@ _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, assert(!PyErr_Occurred()); cache0->counter = saturating_start(); return 0; -} \ No newline at end of file +} From de520bd8c08ed9c9957cf3e7644ea9ec5733333e Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 28 Jun 2021 22:01:27 +0800 Subject: [PATCH 06/49] goto fail rather than return -1 --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 98d133cba5c28e..6775b4ecb04f25 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -682,7 +682,7 @@ _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, // kind = PYCMETHOD; default: SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags"); - return -1; + goto fail; } assert(kind > 0); PyCFunction cfunc = PyCFunction_GET_FUNCTION(meth); From 0e0a3a46f93c89bd6fbc50619d52eeb1647f6839 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 28 Jun 2021 22:25:19 +0800 Subject: [PATCH 07/49] Create 2021-06-28-22-23-59.bpo-44525.sSvUKG.rst --- .../Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst new file mode 100644 index 00000000000000..3ce89f82da7277 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst @@ -0,0 +1,3 @@ +Specialize ``CALL_FUNCTION`` opcode with ``CALL_CFUNCTION``. This speeds up +calls to ``PyCFunctionObject``. As a result, many builtin functions and +C-extension functions should experience reduced call overhead. From 65de42d6abe58a80d4094d8ff16d53c08b18ff08 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 29 Jun 2021 22:59:43 +0800 Subject: [PATCH 08/49] Apply easier suggestions from Mark's review --- Include/internal/pycore_code.h | 6 +----- Python/ceval.c | 27 ++++++++++++--------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index ddb73d30c807cd..f5abc9ae4c802e 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -54,11 +54,7 @@ typedef struct { } _PyLoadGlobalCache; typedef struct { - union { - PyCFunction cfunc; - /* TODO: func_version field for Python function calls*/ - uint64_t _; /* Just for alignment on 32-bit */ - }; + PyCFunction cfunc; } _PyCallFunctionCache; /* Add specialized versions of entries to this union. diff --git a/Python/ceval.c b/Python/ceval.c index 2eccb6bac29ea8..463dc83ca2562d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -60,7 +60,7 @@ Py_LOCAL_INLINE(PyObject *) call_cfunction( _PyAdaptiveEntry *cache0, _PyCallFunctionCache *cache1, PyObject ***pp_stack, - Py_ssize_t oparg, int use_tracing); + Py_ssize_t oparg); #ifdef LLTRACE static int lltrace; @@ -4099,8 +4099,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject **sp, *res; sp = stack_pointer; - res = call_cfunction(tstate, cache0, cache1, &sp, - cache0->original_oparg, cframe.use_tracing); + assert(cframe.use_tracing == 0); + res = call_cfunction(tstate, cache0, cache1, &sp, cache0->original_oparg); stack_pointer = sp; PUSH(res); if (res == NULL) { @@ -5926,37 +5926,34 @@ call_cfunction(PyThreadState *tstate, _PyAdaptiveEntry *cache0, _PyCallFunctionCache *cache1, PyObject ***pp_stack, - Py_ssize_t oparg, - int use_tracing) + Py_ssize_t oparg) { -#define MAYBE_TRACE(cfunc) if (use_tracing) {C_TRACE(x, cfunc);} else {x = cfunc;} PyObject **pfunc = (*pp_stack) - oparg - 1; PyObject *x = NULL, *w; PyObject **stack = (*pp_stack) - oparg; - PyObject *func = *pfunc; /* Only for tracing purposes */ - PyObject *self = PyCFunction_GET_SELF(func); - PyCFunction cfunc = PyCFunction_GET_FUNCTION(func); + PyObject *self = PyCFunction_GET_SELF(*pfunc); + PyCFunction cfunc = PyCFunction_GET_FUNCTION(*pfunc); switch ((_BuiltinCallKinds)cache0->index) { case PYCFUNCTION_NOARGS: case PYCFUNCTION_O: - MAYBE_TRACE(cfunc(self, *stack)); + x = cfunc(self, *stack); break; case _PYCFUNCTION_FAST: - MAYBE_TRACE(((_PyCFunctionFast)(void(*)(void))cfunc)(self, stack, oparg)); + x = ((_PyCFunctionFast)(void(*)(void))cfunc)(self, stack, oparg); break; case _PYCFUNCTION_FAST_WITH_KEYWORDS: - MAYBE_TRACE(((_PyCFunctionFastWithKeywords)(void(*)(void))cfunc)( - self, stack, oparg, 0)); + x = ((_PyCFunctionFastWithKeywords)(void(*)(void))cfunc)( + self, stack, oparg, 0); break; case PYCFUNCTION: { PyObject *args = _PyTuple_FromArray(stack, oparg); if (args == NULL) { break; } - MAYBE_TRACE(cfunc(self, args)); + x = cfunc(self, args); Py_DECREF(args); break; } @@ -5965,7 +5962,7 @@ call_cfunction(PyThreadState *tstate, if (args == NULL) { break; } - MAYBE_TRACE(((PyCFunctionWithKeywords)(void(*)(void))cfunc)(self, args, NULL)); + x = ((PyCFunctionWithKeywords)(void(*)(void))cfunc)(self, args, NULL); Py_DECREF(args); break; } From 685557f79d4aeb2dc142a5e6a48c40aa7e915201 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 1 Jul 2021 00:57:30 +0800 Subject: [PATCH 09/49] Only specialize METH_FASTCALL and METH_O --- Include/internal/pycore_code.h | 25 ++----- Include/opcode.h | 3 +- Lib/opcode.py | 3 +- Python/ceval.c | 132 +++++++++++++-------------------- Python/opcode_targets.h | 4 +- Python/specialize.c | 90 +++++++++++++++------- 6 files changed, 128 insertions(+), 129 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index f5abc9ae4c802e..98e2e0179e588d 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -55,7 +55,7 @@ typedef struct { typedef struct { PyCFunction cfunc; -} _PyCallFunctionCache; +} _PyCallCFunctionCache; /* Add specialized versions of entries to this union. * @@ -72,7 +72,7 @@ typedef union { _PyAdaptiveEntry adaptive; _PyLoadAttrCache load_attr; _PyLoadGlobalCache load_global; - _PyCallFunctionCache call_function; + _PyCallCFunctionCache call_function; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) @@ -324,28 +324,15 @@ cache_backoff(_PyAdaptiveEntry *entry) { entry->counter = BACKOFF; } -/* Corresponds to various function pointers -https://docs.python.org/3/c-api/structures.html#implementing-functions-and-methods -*/ -typedef enum { - PYCFUNCTION = 1, - PYCFUNCTION_O = 2, - PYCFUNCTION_NOARGS = 3, - PYCFUNCTION_WITH_KEYWORDS = 4, - _PYCFUNCTION_FAST = 5, - _PYCFUNCTION_FAST_WITH_KEYWORDS = 6, - PYCMETHOD = 7, -} _BuiltinCallKinds; - /* Specialization functions */ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -int _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, - _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +int _Py_Specialize_CallFunction(PyObject *builtins, PyObject **stack_pointer, + uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); -#define SPECIALIZATION_STATS 0 -#define SPECIALIZATION_STATS_DETAILED 0 +#define SPECIALIZATION_STATS 1 +#define SPECIALIZATION_STATS_DETAILED 1 #if SPECIALIZATION_STATS diff --git a/Include/opcode.h b/Include/opcode.h index c2283a2c143481..cb0fc89cb1f65c 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -146,7 +146,8 @@ extern "C" { #define LOAD_GLOBAL_MODULE 38 #define LOAD_GLOBAL_BUILTIN 39 #define CALL_FUNCTION_ADAPTIVE 40 -#define CALL_CFUNCTION 41 +#define CALL_CFUNCTION_FAST 41 +#define CALL_CFUNCTION_O 42 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 016ba9f1fde49f..7652e68956253a 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -230,5 +230,6 @@ def jabs_op(name, op): "LOAD_GLOBAL_MODULE", "LOAD_GLOBAL_BUILTIN", "CALL_FUNCTION_ADAPTIVE", - "CALL_CFUNCTION", + "CALL_CFUNCTION_FAST", + "CALL_CFUNCTION_O", ] diff --git a/Python/ceval.c b/Python/ceval.c index 463dc83ca2562d..b06c3f5912b42b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -55,12 +55,6 @@ Py_LOCAL_INLINE(PyObject *) call_function( static PyObject * do_call_core( PyThreadState *tstate, PyObject *func, PyObject *callargs, PyObject *kwdict, int use_tracing); -Py_LOCAL_INLINE(PyObject *) call_cfunction( - PyThreadState *tstate, - _PyAdaptiveEntry *cache0, - _PyCallFunctionCache *cache1, - PyObject ***pp_stack, - Py_ssize_t oparg); #ifdef LLTRACE static int lltrace; @@ -4076,7 +4070,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) SpecializedCacheEntry *cache = GET_CACHE(); if (cache->adaptive.counter == 0) { next_instr--; - if (_Py_Specialize_CallFunction(stack_pointer, cache->adaptive.original_oparg, next_instr, cache) < 0) { + if (_Py_Specialize_CallFunction(BUILTINS(), stack_pointer, + cache->adaptive.original_oparg, next_instr, cache) < 0) { goto error; } DISPATCH(); @@ -4088,20 +4083,29 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) JUMP_TO_INSTRUCTION(CALL_FUNCTION); } } - case TARGET(CALL_CFUNCTION): { - /* Builtin functions, WITHOUT keywords */ + case TARGET(CALL_CFUNCTION_FAST): { + /* Builtin METH_FASTCALL functions, without keywords */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - _PyCallFunctionCache *cache1 = &caches[-1].call_function; - PyObject *callable = PEEK(cache0->original_oparg + 1); - DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); - DEOPT_IF(PyCFunction_GET_FUNCTION(callable) != cache1->cfunc, CALL_FUNCTION); + _PyCallCFunctionCache *cache1 = &caches[-1].call_function; + PyObject **callable = &PEEK(cache0->original_oparg + 1); + DEOPT_IF(!PyCFunction_CheckExact(*callable), CALL_FUNCTION); + PyCFunction cfunc = PyCFunction_GET_FUNCTION(*callable); + DEOPT_IF(cfunc != cache1->cfunc, CALL_FUNCTION); - PyObject **sp, *res; - sp = stack_pointer; assert(cframe.use_tracing == 0); - res = call_cfunction(tstate, cache0, cache1, &sp, cache0->original_oparg); - stack_pointer = sp; + // res = func(self, args, nargs) + PyObject *res = ((_PyCFunctionFast)(void(*)(void))cfunc)( + PyCFunction_GET_SELF(*callable), + stack_pointer - cache0->original_oparg, + cache0->original_oparg); + assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + + /* Clear the stack of the function object. */ + while (stack_pointer > callable) { + PyObject *x = EXT_POP(stack_pointer); + Py_DECREF(x); + } PUSH(res); if (res == NULL) { /* Not deopting because this doesn't mean our optimization was wrong. @@ -4114,6 +4118,36 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) STAT_INC(CALL_FUNCTION, hit); DISPATCH(); } + case TARGET(CALL_CFUNCTION_O): { + /* Builtin METH_O functions */ + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyCallCFunctionCache *cache1 = &caches[-1].call_function; + PyObject **callable = &PEEK(cache0->original_oparg + 1); + DEOPT_IF(!PyCFunction_CheckExact(*callable), CALL_FUNCTION); + PyCFunction cfunc = PyCFunction_GET_FUNCTION(*callable); + DEOPT_IF(cfunc != cache1->cfunc, CALL_FUNCTION); + + assert(cframe.use_tracing == 0); + // res = func(self, arg) + PyObject *res = cfunc( + PyCFunction_GET_SELF(*callable), + *(stack_pointer - cache0->original_oparg)); + assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + + /* Clear the stack of the function object. */ + while (stack_pointer > callable) { + PyObject *x = EXT_POP(stack_pointer); + Py_DECREF(x); + } + PUSH(res); + if (res == NULL) { + goto error; + } + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); + DISPATCH(); + } case TARGET(CALL_FUNCTION_KW): { PyObject **sp, *res, *names; @@ -5920,70 +5954,6 @@ do_call_core(PyThreadState *tstate, return PyObject_Call(func, callargs, kwdict); } -/* Fast alternative for non-keyword calls to C functions. */ -Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION -call_cfunction(PyThreadState *tstate, - _PyAdaptiveEntry *cache0, - _PyCallFunctionCache *cache1, - PyObject ***pp_stack, - Py_ssize_t oparg) -{ - - PyObject **pfunc = (*pp_stack) - oparg - 1; - PyObject *x = NULL, *w; - PyObject **stack = (*pp_stack) - oparg; - - PyObject *self = PyCFunction_GET_SELF(*pfunc); - PyCFunction cfunc = PyCFunction_GET_FUNCTION(*pfunc); - - switch ((_BuiltinCallKinds)cache0->index) { - case PYCFUNCTION_NOARGS: - case PYCFUNCTION_O: - x = cfunc(self, *stack); - break; - case _PYCFUNCTION_FAST: - x = ((_PyCFunctionFast)(void(*)(void))cfunc)(self, stack, oparg); - break; - case _PYCFUNCTION_FAST_WITH_KEYWORDS: - x = ((_PyCFunctionFastWithKeywords)(void(*)(void))cfunc)( - self, stack, oparg, 0); - break; - case PYCFUNCTION: { - PyObject *args = _PyTuple_FromArray(stack, oparg); - if (args == NULL) { - break; - } - x = cfunc(self, args); - Py_DECREF(args); - break; - } - case PYCFUNCTION_WITH_KEYWORDS: { - PyObject *args = _PyTuple_FromArray(stack, oparg); - if (args == NULL) { - break; - } - x = ((PyCFunctionWithKeywords)(void(*)(void))cfunc)(self, args, NULL); - Py_DECREF(args); - break; - } - /* This flag only applies to PyMethodObject. - We're only optimizing for PyCfunctionObject - */ - case PYCMETHOD: - default: - Py_UNREACHABLE(); - break; - } - assert((x != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); - - /* Clear the stack of the function object. */ - while ((*pp_stack) > pfunc) { - w = EXT_POP(*pp_stack); - Py_DECREF(w); - } - - return x; -} /* Extract a slice index from a PyLong or an object with the nb_index slot defined, and store in *pi. Silently reduce values larger than PY_SSIZE_T_MAX to PY_SSIZE_T_MAX, diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index ce346e6ca72029..711aa7a11b97e8 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -40,8 +40,8 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_CALL_FUNCTION_ADAPTIVE, - &&TARGET_CALL_CFUNCTION, - &&_unknown_opcode, + &&TARGET_CALL_CFUNCTION_FAST, + &&TARGET_CALL_CFUNCTION_O, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 6775b4ecb04f25..5374175aa36c34 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -74,6 +74,7 @@ _Py_PrintSpecializationStats(void) printf("Specialization stats:\n"); print_stats(&_specialization_stats[LOAD_ATTR], "load_attr"); print_stats(&_specialization_stats[LOAD_GLOBAL], "load_global"); + print_stats(&_specialization_stats[CALL_FUNCTION], "call_function"); } #if SPECIALIZATION_STATS_DETAILED @@ -641,57 +642,96 @@ _Py_Specialize_LoadGlobal( - Specialize python function calls. */ int -_Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, +_Py_Specialize_CallFunction(PyObject *builtins, + PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; - _PyCallFunctionCache *cache1 = &cache[-1].call_function; + _PyCallCFunctionCache *cache1 = &cache[-1].call_function; + PyTypeObject *type = Py_TYPE(callable); /* Specialize C functions */ if (PyCFunction_CheckExact(callable)) { PyCFunctionObject *meth = (PyCFunctionObject *)callable; if (meth->m_ml == NULL) { goto fail; } + PyCFunction cfunc = PyCFunction_GET_FUNCTION(meth); + assert(cfunc != NULL); const char *name_ascii = meth->m_ml->ml_name; - _BuiltinCallKinds kind = -1; + /* Don't optimize anything that isn't FASTCALL, has keywords, has varargs, or + has no args. Microbenchmarks show they don't benefit much to be worth a + specialized instruction. + */ switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { + case METH_FASTCALL: + // _PYCFUNCTION_FAST; + *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION_FAST, _Py_OPARG(*instr)); + cache1->cfunc = cfunc; + goto success; + case METH_O: + // PYCFUNCTION_O; + *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION_O, _Py_OPARG(*instr)); + cache1->cfunc = cfunc; + goto success; case METH_VARARGS: - kind = PYCFUNCTION; - break; + // PYCFUNCTION + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION"); + goto fail; case METH_VARARGS | METH_KEYWORDS: - kind = PYCFUNCTION_WITH_KEYWORDS; - break; - case METH_FASTCALL: - kind = _PYCFUNCTION_FAST; - break; + // PYCFUNCTION_WITH_KEYWORDS + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "PYCFUNCTION_WITH_KEYWORDS"); + goto fail; case METH_FASTCALL | METH_KEYWORDS: - kind = _PYCFUNCTION_FAST_WITH_KEYWORDS; - break; + // _PYCFUNCTION_FAST_WITH_KEYWORDS; + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "_PYCFUNCTION_FAST_WITH_KEYWORDS"); + goto fail; case METH_NOARGS: - kind = PYCFUNCTION_NOARGS; - break; - case METH_O: - kind = PYCFUNCTION_O; - break; + // PYCFUNCTION_NOARGS; + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION_NOARGS"); + goto fail; /* This case should never happen with PyCFunctionObject -- only PyMethodObject. See zlib.compressobj()'s methods for an example. */ case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - // kind = PYCMETHOD; + // PYCMETHOD default: SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags"); goto fail; } - assert(kind > 0); - PyCFunction cfunc = PyCFunction_GET_FUNCTION(meth); - assert(cfunc != NULL); - *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION, _Py_OPARG(*instr)); - cache0->index = (uint16_t)kind; - cache1->cfunc = cfunc; - goto success; } + /* These will be implemented in the future. Collecting stats for now. */ +#if SPECIALIZATION_STATS + if (PyFunction_Check(callable)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python function"); + goto fail; + } + if (PyInstanceMethod_Check(callable)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "new style bound method"); + goto fail; + } + if (PyMethod_Check(callable)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bound method"); + goto fail; + } + if (PyType_Check(callable)) { + if ((PyObject_HasAttrString(callable, "__dict__") || + PyObject_HasAttrString(callable, "__slots__")) && + PyObject_TypeCheck(callable, &PyType_Type) && + !PyType_CheckExact(callable)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python class"); + } + if (PyDict_GetItemString(builtins, ((PyTypeObject *)callable)->tp_name)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "__builtins__ type init"); + } + goto fail; + } + /* So far this catches things like weakref.weakref */ + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "???"); +#endif fail: STAT_INC(CALL_FUNCTION, specialization_failure); assert(!PyErr_Occurred()); From 5baa936024c8f5010f5851a9779dd83d9342f302 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 1 Jul 2021 00:58:42 +0800 Subject: [PATCH 10/49] Update 2021-06-28-22-23-59.bpo-44525.sSvUKG.rst --- .../2021-06-28-22-23-59.bpo-44525.sSvUKG.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst index 3ce89f82da7277..201998bcd1ecaf 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst @@ -1,3 +1,4 @@ -Specialize ``CALL_FUNCTION`` opcode with ``CALL_CFUNCTION``. This speeds up -calls to ``PyCFunctionObject``. As a result, many builtin functions and -C-extension functions should experience reduced call overhead. +Specialize ``CALL_FUNCTION`` opcode with ``CALL_CFUNCTION_O`` and +``CALL_CFUNCTION_FAST``. This speeds up calls to ``PyCFunctionObject``. +As a result, many builtin functions and C-extension functions should experience +reduced call overhead. From bc69360f6860f3c168c3b97a70966062aae2e727 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 1 Jul 2021 01:02:47 +0800 Subject: [PATCH 11/49] turn off specialization stats flag --- Include/internal/pycore_code.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 98e2e0179e588d..aa6e1ec34d87e8 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -331,8 +331,8 @@ int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNI int _Py_Specialize_CallFunction(PyObject *builtins, PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); -#define SPECIALIZATION_STATS 1 -#define SPECIALIZATION_STATS_DETAILED 1 +#define SPECIALIZATION_STATS 0 +#define SPECIALIZATION_STATS_DETAILED 0 #if SPECIALIZATION_STATS From 8671a606e38f41112bbf8c8be2f23fee7efa1eb1 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Jul 2021 17:13:48 +0800 Subject: [PATCH 12/49] Apply suggestions by Mark --- Include/internal/pycore_code.h | 4 -- Python/ceval.c | 68 +++++++++++++++++----------------- Python/specialize.c | 9 +---- 3 files changed, 35 insertions(+), 46 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index aa6e1ec34d87e8..e9a4f5bbde373c 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -53,9 +53,6 @@ typedef struct { uint32_t builtin_keys_version; } _PyLoadGlobalCache; -typedef struct { - PyCFunction cfunc; -} _PyCallCFunctionCache; /* Add specialized versions of entries to this union. * @@ -72,7 +69,6 @@ typedef union { _PyAdaptiveEntry adaptive; _PyLoadAttrCache load_attr; _PyLoadGlobalCache load_global; - _PyCallCFunctionCache call_function; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) diff --git a/Python/ceval.c b/Python/ceval.c index b06c3f5912b42b..91a474e8a03fd3 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4084,69 +4084,67 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } } case TARGET(CALL_CFUNCTION_FAST): { + assert(cframe.use_tracing == 0); /* Builtin METH_FASTCALL functions, without keywords */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - _PyCallCFunctionCache *cache1 = &caches[-1].call_function; - PyObject **callable = &PEEK(cache0->original_oparg + 1); - DEOPT_IF(!PyCFunction_CheckExact(*callable), CALL_FUNCTION); - PyCFunction cfunc = PyCFunction_GET_FUNCTION(*callable); - DEOPT_IF(cfunc != cache1->cfunc, CALL_FUNCTION); - - assert(cframe.use_tracing == 0); - // res = func(self, args, nargs) + uint8_t nargs = cache0->original_oparg; + PyObject **pfunc = &PEEK(nargs + 1); + PyObject *callable = *pfunc; + DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); + DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL, + CALL_FUNCTION); + + PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); + /* res = func(self, args, nargs) */ PyObject *res = ((_PyCFunctionFast)(void(*)(void))cfunc)( - PyCFunction_GET_SELF(*callable), - stack_pointer - cache0->original_oparg, - cache0->original_oparg); + PyCFunction_GET_SELF(callable), + &PEEK(nargs), + nargs); assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); /* Clear the stack of the function object. */ - while (stack_pointer > callable) { + while (stack_pointer > pfunc) { PyObject *x = EXT_POP(stack_pointer); Py_DECREF(x); } PUSH(res); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); if (res == NULL) { - /* Not deopting because this doesn't mean our optimization was wrong. - `res` can be NULL for valid reasons. Eg. getattr(x, 'invalid'). - In those cases an exception is set, so we must handle it. + /* Not deopting because this doesn't mean our optimization was + wrong. `res` can be NULL for valid reasons. Eg. getattr(x, + 'invalid'). In those cases an exception is set, so we must + handle it. */ goto error; } - record_cache_hit(cache0); - STAT_INC(CALL_FUNCTION, hit); DISPATCH(); } case TARGET(CALL_CFUNCTION_O): { + assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - _PyCallCFunctionCache *cache1 = &caches[-1].call_function; - PyObject **callable = &PEEK(cache0->original_oparg + 1); - DEOPT_IF(!PyCFunction_CheckExact(*callable), CALL_FUNCTION); - PyCFunction cfunc = PyCFunction_GET_FUNCTION(*callable); - DEOPT_IF(cfunc != cache1->cfunc, CALL_FUNCTION); + DEOPT_IF(cache0->original_oparg != 1, CALL_FUNCTION); - assert(cframe.use_tracing == 0); - // res = func(self, arg) - PyObject *res = cfunc( - PyCFunction_GET_SELF(*callable), - *(stack_pointer - cache0->original_oparg)); + PyObject *callable = SECOND(); + DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); + DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL_FUNCTION); + + PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); + PyObject *res = cfunc(PyCFunction_GET_SELF(callable), POP()); assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); /* Clear the stack of the function object. */ - while (stack_pointer > callable) { - PyObject *x = EXT_POP(stack_pointer); - Py_DECREF(x); - } - PUSH(res); + Py_DECREF(callable); + SET_TOP(res); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); if (res == NULL) { goto error; } - record_cache_hit(cache0); - STAT_INC(CALL_FUNCTION, hit); - DISPATCH(); + DISPATCH(); } case TARGET(CALL_FUNCTION_KW): { PyObject **sp, *res, *names; diff --git a/Python/specialize.c b/Python/specialize.c index 5374175aa36c34..1b4e94819da092 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -166,7 +166,7 @@ static uint8_t adaptive_opcodes[256] = { static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ - [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyCallFunctionCache */ + [CALL_FUNCTION] = 1, /* _PyAdaptiveEntry */ }; /* Return the oparg for the cache_offset and instruction index. @@ -648,16 +648,13 @@ _Py_Specialize_CallFunction(PyObject *builtins, { PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; - _PyCallCFunctionCache *cache1 = &cache[-1].call_function; PyTypeObject *type = Py_TYPE(callable); /* Specialize C functions */ if (PyCFunction_CheckExact(callable)) { PyCFunctionObject *meth = (PyCFunctionObject *)callable; - if (meth->m_ml == NULL) { + if (PyCFunction_GET_FUNCTION(callable) == NULL) { goto fail; } - PyCFunction cfunc = PyCFunction_GET_FUNCTION(meth); - assert(cfunc != NULL); const char *name_ascii = meth->m_ml->ml_name; /* Don't optimize anything that isn't FASTCALL, has keywords, has varargs, or has no args. Microbenchmarks show they don't benefit much to be worth a @@ -668,12 +665,10 @@ _Py_Specialize_CallFunction(PyObject *builtins, case METH_FASTCALL: // _PYCFUNCTION_FAST; *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION_FAST, _Py_OPARG(*instr)); - cache1->cfunc = cfunc; goto success; case METH_O: // PYCFUNCTION_O; *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION_O, _Py_OPARG(*instr)); - cache1->cfunc = cfunc; goto success; case METH_VARARGS: // PYCFUNCTION From a8b8b4fa3b44dedeea8e20df61dd76d55d219ace Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Jul 2021 17:22:01 +0800 Subject: [PATCH 13/49] reduce diff --- Include/internal/pycore_code.h | 1 - Python/specialize.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index e9a4f5bbde373c..49d0f28188a00d 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -53,7 +53,6 @@ typedef struct { uint32_t builtin_keys_version; } _PyLoadGlobalCache; - /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 diff --git a/Python/specialize.c b/Python/specialize.c index 1b4e94819da092..f54773ef11b8cb 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -671,21 +671,17 @@ _Py_Specialize_CallFunction(PyObject *builtins, *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION_O, _Py_OPARG(*instr)); goto success; case METH_VARARGS: - // PYCFUNCTION SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION"); goto fail; case METH_VARARGS | METH_KEYWORDS: - // PYCFUNCTION_WITH_KEYWORDS SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION_WITH_KEYWORDS"); goto fail; case METH_FASTCALL | METH_KEYWORDS: - // _PYCFUNCTION_FAST_WITH_KEYWORDS; SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "_PYCFUNCTION_FAST_WITH_KEYWORDS"); goto fail; case METH_NOARGS: - // PYCFUNCTION_NOARGS; SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION_NOARGS"); goto fail; /* This case should never happen with PyCFunctionObject -- only From 736d9afdefb9ef497faf0c3041fa967ffbdb0555 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Jul 2021 17:29:02 +0800 Subject: [PATCH 14/49] use PyMapping_HasKeyString since PyDict_GetItemString is discouraged --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index f54773ef11b8cb..60d4b9a0f90974 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -715,7 +715,7 @@ _Py_Specialize_CallFunction(PyObject *builtins, !PyType_CheckExact(callable)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python class"); } - if (PyDict_GetItemString(builtins, ((PyTypeObject *)callable)->tp_name)) { + if (PyMapping_HasKeyString(builtins, ((PyTypeObject *)callable)->tp_name)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "__builtins__ type init"); } goto fail; From 2e3195dc3774700ac99f36a0aa30b94779b63555 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Jul 2021 22:40:15 +0800 Subject: [PATCH 15/49] fix reference leak --- Python/ceval.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 91a474e8a03fd3..f531ee5b738b75 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4133,10 +4133,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL_FUNCTION); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); - PyObject *res = cfunc(PyCFunction_GET_SELF(callable), POP()); + PyObject *arg = POP(); + PyObject *res = cfunc(PyCFunction_GET_SELF(callable), arg); assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); /* Clear the stack of the function object. */ + Py_DECREF(arg); Py_DECREF(callable); SET_TOP(res); record_cache_hit(cache0); From d8b3a0980681bf898fe5072b6964fd050b6ecfa6 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Jul 2021 23:17:55 +0800 Subject: [PATCH 16/49] remove unused variable, add more specialization fails --- Python/specialize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 60d4b9a0f90974..2df16d52c0e105 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -655,7 +655,6 @@ _Py_Specialize_CallFunction(PyObject *builtins, if (PyCFunction_GET_FUNCTION(callable) == NULL) { goto fail; } - const char *name_ascii = meth->m_ml->ml_name; /* Don't optimize anything that isn't FASTCALL, has keywords, has varargs, or has no args. Microbenchmarks show they don't benefit much to be worth a specialized instruction. @@ -714,10 +713,13 @@ _Py_Specialize_CallFunction(PyObject *builtins, PyObject_TypeCheck(callable, &PyType_Type) && !PyType_CheckExact(callable)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python class"); + goto fail; } if (PyMapping_HasKeyString(builtins, ((PyTypeObject *)callable)->tp_name)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "__builtins__ type init"); + goto fail; } + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "C class"); goto fail; } /* So far this catches things like weakref.weakref */ From 25b002c166ed17ee3881da550b0cf51098a0ce62 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Jul 2021 23:33:55 +0800 Subject: [PATCH 17/49] don't allow specialized function calls when tracing --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index f531ee5b738b75..d651f19ab9625e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4068,7 +4068,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } case TARGET(CALL_FUNCTION_ADAPTIVE): { SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + if (cframe.use_tracing == 0 && cache->adaptive.counter == 0) { next_instr--; if (_Py_Specialize_CallFunction(BUILTINS(), stack_pointer, cache->adaptive.original_oparg, next_instr, cache) < 0) { From 557e4bcd5d923ac3a4eebf9e5fba1104f3885a3e Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Jul 2021 00:21:50 +0800 Subject: [PATCH 18/49] deopt when tracing --- Python/ceval.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index d651f19ab9625e..7d78b18f13c947 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4068,7 +4068,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } case TARGET(CALL_FUNCTION_ADAPTIVE): { SpecializedCacheEntry *cache = GET_CACHE(); - if (cframe.use_tracing == 0 && cache->adaptive.counter == 0) { + if (cache->adaptive.counter == 0) { next_instr--; if (_Py_Specialize_CallFunction(BUILTINS(), stack_pointer, cache->adaptive.original_oparg, next_instr, cache) < 0) { @@ -4084,7 +4084,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } } case TARGET(CALL_CFUNCTION_FAST): { - assert(cframe.use_tracing == 0); + DEOPT_IF(cframe.use_tracing, CALL_FUNCTION); /* Builtin METH_FASTCALL functions, without keywords */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; @@ -4122,7 +4122,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } case TARGET(CALL_CFUNCTION_O): { - assert(cframe.use_tracing == 0); + DEOPT_IF(cframe.use_tracing, CALL_FUNCTION); /* Builtin METH_O functions */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; From ea0d432be9fbe91215432ab6d815e2968f6c292e Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 8 Jul 2021 23:29:43 +0800 Subject: [PATCH 19/49] apply mark's comments --- Python/ceval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 5e30181d1839de..73163bb0d02526 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4122,7 +4122,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* Builtin METH_FASTCALL functions, without keywords */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - uint8_t nargs = cache0->original_oparg; + int nargs = cache0->original_oparg; PyObject **pfunc = &PEEK(nargs + 1); PyObject *callable = *pfunc; DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); @@ -4139,7 +4139,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* Clear the stack of the function object. */ while (stack_pointer > pfunc) { - PyObject *x = EXT_POP(stack_pointer); + PyObject *x = POP(); Py_DECREF(x); } PUSH(res); From 97749b7cfa87fb3fd4a40d8c3f1d7b6a75e619aa Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 8 Jul 2021 23:35:24 +0800 Subject: [PATCH 20/49] change deopts to asserts --- Python/ceval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 73163bb0d02526..7faf9c85282d8d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4118,7 +4118,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } } case TARGET(CALL_CFUNCTION_FAST): { - DEOPT_IF(cframe.use_tracing, CALL_FUNCTION); + assert(cframe.use_tracing == 0); /* Builtin METH_FASTCALL functions, without keywords */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; @@ -4156,7 +4156,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } case TARGET(CALL_CFUNCTION_O): { - DEOPT_IF(cframe.use_tracing, CALL_FUNCTION); + assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; From feb966a45ea6c5b48eca0c95a735f7172803e68d Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 13 Jul 2021 17:35:35 +0800 Subject: [PATCH 21/49] add blank lines between each case --- Python/ceval.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 7faf9c85282d8d..8e55561ece248c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4064,6 +4064,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) CHECK_EVAL_BREAKER(); DISPATCH(); } + case TARGET(CALL_METHOD_KW): { /* Designed to work in tandem with LOAD_METHOD. Same as CALL_METHOD but pops TOS to get a tuple of keyword names. */ @@ -4087,6 +4088,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) CHECK_EVAL_BREAKER(); DISPATCH(); } + case TARGET(CALL_FUNCTION): { PREDICTED(CALL_FUNCTION); PyObject **sp, *res; @@ -4100,6 +4102,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) CHECK_EVAL_BREAKER(); DISPATCH(); } + case TARGET(CALL_FUNCTION_ADAPTIVE): { SpecializedCacheEntry *cache = GET_CACHE(); if (cache->adaptive.counter == 0) { @@ -4117,6 +4120,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) JUMP_TO_INSTRUCTION(CALL_FUNCTION); } } + case TARGET(CALL_CFUNCTION_FAST): { assert(cframe.use_tracing == 0); /* Builtin METH_FASTCALL functions, without keywords */ @@ -4155,6 +4159,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } DISPATCH(); } + case TARGET(CALL_CFUNCTION_O): { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ @@ -4182,6 +4187,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } DISPATCH(); } + case TARGET(CALL_FUNCTION_KW): { PyObject **sp, *res, *names; From 9a5a407a06f0386e2e7f5ce16020d81d3282ff63 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 14 Jul 2021 01:18:13 +0800 Subject: [PATCH 22/49] Remove CALL_CFUNCTION_FAST --- Include/opcode.h | 3 +- Lib/opcode.py | 1 - .../2021-06-28-22-23-59.bpo-44525.sSvUKG.rst | 8 ++-- Python/ceval.c | 39 ------------------- Python/opcode_targets.h | 2 +- Python/specialize.c | 12 +++--- 6 files changed, 12 insertions(+), 53 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index cb0fc89cb1f65c..ba523e8dec3034 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -146,8 +146,7 @@ extern "C" { #define LOAD_GLOBAL_MODULE 38 #define LOAD_GLOBAL_BUILTIN 39 #define CALL_FUNCTION_ADAPTIVE 40 -#define CALL_CFUNCTION_FAST 41 -#define CALL_CFUNCTION_O 42 +#define CALL_CFUNCTION_O 41 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 7652e68956253a..e80b297947b378 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -230,6 +230,5 @@ def jabs_op(name, op): "LOAD_GLOBAL_MODULE", "LOAD_GLOBAL_BUILTIN", "CALL_FUNCTION_ADAPTIVE", - "CALL_CFUNCTION_FAST", "CALL_CFUNCTION_O", ] diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst index 201998bcd1ecaf..24d8114305d4f5 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst @@ -1,4 +1,4 @@ -Specialize ``CALL_FUNCTION`` opcode with ``CALL_CFUNCTION_O`` and -``CALL_CFUNCTION_FAST``. This speeds up calls to ``PyCFunctionObject``. -As a result, many builtin functions and C-extension functions should experience -reduced call overhead. +Setup initial specialization infrastructure for the ``CALL_FUNCTION`` opcode. +The first specialization, ``CALL_CFUNCTION_O``, speeds up calls to +``PyCFunctionObject`` if they have the ``METH_O`` flag set. More +specializations to come in the future. diff --git a/Python/ceval.c b/Python/ceval.c index 8e55561ece248c..8007abbf79d049 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4121,45 +4121,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } } - case TARGET(CALL_CFUNCTION_FAST): { - assert(cframe.use_tracing == 0); - /* Builtin METH_FASTCALL functions, without keywords */ - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - int nargs = cache0->original_oparg; - PyObject **pfunc = &PEEK(nargs + 1); - PyObject *callable = *pfunc; - DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); - DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL, - CALL_FUNCTION); - - PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); - /* res = func(self, args, nargs) */ - PyObject *res = ((_PyCFunctionFast)(void(*)(void))cfunc)( - PyCFunction_GET_SELF(callable), - &PEEK(nargs), - nargs); - assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); - - /* Clear the stack of the function object. */ - while (stack_pointer > pfunc) { - PyObject *x = POP(); - Py_DECREF(x); - } - PUSH(res); - record_cache_hit(cache0); - STAT_INC(CALL_FUNCTION, hit); - if (res == NULL) { - /* Not deopting because this doesn't mean our optimization was - wrong. `res` can be NULL for valid reasons. Eg. getattr(x, - 'invalid'). In those cases an exception is set, so we must - handle it. - */ - goto error; - } - DISPATCH(); - } - case TARGET(CALL_CFUNCTION_O): { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 711aa7a11b97e8..1b5d00a2526ae9 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -40,7 +40,6 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_CALL_FUNCTION_ADAPTIVE, - &&TARGET_CALL_CFUNCTION_FAST, &&TARGET_CALL_CFUNCTION_O, &&_unknown_opcode, &&_unknown_opcode, @@ -48,6 +47,7 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, + &&_unknown_opcode, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, diff --git a/Python/specialize.c b/Python/specialize.c index bbe19b1c78a825..9964432727a3a6 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -659,16 +659,16 @@ _Py_Specialize_CallFunction(PyObject *builtins, if (PyCFunction_GET_FUNCTION(callable) == NULL) { goto fail; } - /* Don't optimize anything that isn't FASTCALL, has keywords, has varargs, or - has no args. Microbenchmarks show they don't benefit much to be worth a - specialized instruction. + /* Currently not optimizing anything that is FASTCALL, has keywords, has + varargs, or has no args. Microbenchmarks show they don't benefit much to be + worth a specialized instruction. */ switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { case METH_FASTCALL: - // _PYCFUNCTION_FAST; - *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION_FAST, _Py_OPARG(*instr)); - goto success; + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "_PYCFUNCTION_FAST"); + goto fail; case METH_O: // PYCFUNCTION_O; *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION_O, _Py_OPARG(*instr)); From 4dafd8db9bc8e671bbebe78db51d546e3a7f7832 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 14 Jul 2021 18:11:14 +0800 Subject: [PATCH 23/49] Apply Mark's suggestions --- Include/internal/pycore_code.h | 14 +++++++----- Include/opcode.h | 2 +- Lib/opcode.py | 2 +- Python/ceval.c | 8 +++++-- Python/opcode_targets.h | 2 +- Python/specialize.c | 39 ++++++++++++++++++++++------------ 6 files changed, 43 insertions(+), 24 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index cfc310c8c80e86..e9a7dbc0805f26 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -317,15 +317,19 @@ cache_backoff(_PyAdaptiveEntry *entry) { entry->counter = BACKOFF; } +#define SPECIALIZATION_STATS 0 +#define SPECIALIZATION_STATS_DETAILED 0 + /* Specialization functions */ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -int _Py_Specialize_CallFunction(PyObject *builtins, PyObject **stack_pointer, - uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); - -#define SPECIALIZATION_STATS 0 -#define SPECIALIZATION_STATS_DETAILED 0 +int _Py_Specialize_CallFunction( +#if SPECIALIZATION_STATS + PyObject *builtins, +#endif + PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, + SpecializedCacheEntry *cache); #if SPECIALIZATION_STATS diff --git a/Include/opcode.h b/Include/opcode.h index ba523e8dec3034..f2679fe62cdfa6 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -146,7 +146,7 @@ extern "C" { #define LOAD_GLOBAL_MODULE 38 #define LOAD_GLOBAL_BUILTIN 39 #define CALL_FUNCTION_ADAPTIVE 40 -#define CALL_CFUNCTION_O 41 +#define CALL_FUNCTION_BUILTIN_O 41 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index e80b297947b378..6e7d1a69447fff 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -230,5 +230,5 @@ def jabs_op(name, op): "LOAD_GLOBAL_MODULE", "LOAD_GLOBAL_BUILTIN", "CALL_FUNCTION_ADAPTIVE", - "CALL_CFUNCTION_O", + "CALL_FUNCTION_BUILTIN_O", ] diff --git a/Python/ceval.c b/Python/ceval.c index 8007abbf79d049..211158fbe5d478 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4107,7 +4107,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) SpecializedCacheEntry *cache = GET_CACHE(); if (cache->adaptive.counter == 0) { next_instr--; - if (_Py_Specialize_CallFunction(BUILTINS(), stack_pointer, + if (_Py_Specialize_CallFunction( + #if SPECIALIZATION_STATS + BUILTINS(), + #endif + stack_pointer, cache->adaptive.original_oparg, next_instr, cache) < 0) { goto error; } @@ -4121,7 +4125,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } } - case TARGET(CALL_CFUNCTION_O): { + case TARGET(CALL_FUNCTION_BUILTIN_O): { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ SpecializedCacheEntry *caches = GET_CACHE(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 1b5d00a2526ae9..18b15a37851ce7 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -40,7 +40,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_CALL_FUNCTION_ADAPTIVE, - &&TARGET_CALL_CFUNCTION_O, + &&TARGET_CALL_FUNCTION_BUILTIN_O, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 9964432727a3a6..c11afb2e9973ff 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -646,7 +646,10 @@ _Py_Specialize_LoadGlobal( - Specialize python function calls. */ int -_Py_Specialize_CallFunction(PyObject *builtins, +_Py_Specialize_CallFunction( +#if SPECIALIZATION_STATS + PyObject *builtins, +#endif PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { @@ -670,41 +673,41 @@ _Py_Specialize_CallFunction(PyObject *builtins, "_PYCFUNCTION_FAST"); goto fail; case METH_O: - // PYCFUNCTION_O; - *instr = _Py_MAKECODEUNIT(CALL_CFUNCTION_O, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_O, + _Py_OPARG(*instr)); goto success; case METH_VARARGS: SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION"); goto fail; case METH_VARARGS | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION_WITH_KEYWORDS"); goto fail; case METH_FASTCALL | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "_PYCFUNCTION_FAST_WITH_KEYWORDS"); goto fail; case METH_NOARGS: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION_NOARGS"); + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "PYCFUNCTION_NOARGS"); goto fail; - /* This case should never happen with PyCFunctionObject -- only - PyMethodObject. See zlib.compressobj()'s methods for an example. - */ case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - // PYCMETHOD + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PyCMethod"); + goto fail; default: SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags"); goto fail; } } - /* These will be implemented in the future. Collecting stats for now. */ + /* These might be implemented in the future. Collecting stats for now. */ #if SPECIALIZATION_STATS if (PyFunction_Check(callable)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python function"); goto fail; } if (PyInstanceMethod_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "new style bound method"); + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "new style bound method"); goto fail; } if (PyMethod_Check(callable)) { @@ -716,11 +719,19 @@ _Py_Specialize_CallFunction(PyObject *builtins, PyObject_HasAttrString(callable, "__slots__")) && PyObject_TypeCheck(callable, &PyType_Type) && !PyType_CheckExact(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python class"); + if (PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "immutable python class"); + } + else { + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "python class"); + } goto fail; } if (PyMapping_HasKeyString(builtins, ((PyTypeObject *)callable)->tp_name)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "__builtins__ type init"); + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "__builtins__ type init"); goto fail; } SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "C class"); From 84d2367fffad888598ace9a74ea0efb0eea23fc1 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 14 Jul 2021 19:08:30 +0800 Subject: [PATCH 24/49] delete useless comment, add back useful one --- Python/specialize.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index c11afb2e9973ff..61d8034569075d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -662,10 +662,6 @@ _Py_Specialize_CallFunction( if (PyCFunction_GET_FUNCTION(callable) == NULL) { goto fail; } - /* Currently not optimizing anything that is FASTCALL, has keywords, has - varargs, or has no args. Microbenchmarks show they don't benefit much to be - worth a specialized instruction. - */ switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { case METH_FASTCALL: @@ -691,9 +687,10 @@ _Py_Specialize_CallFunction( SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION_NOARGS"); goto fail; + /* This case should never happen with PyCFunctionObject -- only + PyMethodObject. See zlib.compressobj()'s methods for an example. + */ case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PyCMethod"); - goto fail; default: SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags"); goto fail; From 12a53337e76379d61a6825f424e3405e147a27ba Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 14 Jul 2021 23:12:03 +0800 Subject: [PATCH 25/49] remove complicated checks for classes --- Include/internal/pycore_code.h | 3 --- Python/ceval.c | 3 --- Python/specialize.c | 28 ++++------------------------ 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index e9a7dbc0805f26..680a2006186789 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -325,9 +325,6 @@ cache_backoff(_PyAdaptiveEntry *entry) { int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_CallFunction( -#if SPECIALIZATION_STATS - PyObject *builtins, -#endif PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); diff --git a/Python/ceval.c b/Python/ceval.c index 211158fbe5d478..718ac46a4690e0 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4108,9 +4108,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) if (cache->adaptive.counter == 0) { next_instr--; if (_Py_Specialize_CallFunction( - #if SPECIALIZATION_STATS - BUILTINS(), - #endif stack_pointer, cache->adaptive.original_oparg, next_instr, cache) < 0) { goto error; diff --git a/Python/specialize.c b/Python/specialize.c index 61d8034569075d..2f425afbf3aaec 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -642,14 +642,11 @@ _Py_Specialize_LoadGlobal( } /* TODO: - - Specialize calling C types like int() with CALL_CTYPE + - Specialize calling C types like type() with CALL_FUNCTION_BUILTIN_TYPE - Specialize python function calls. */ int _Py_Specialize_CallFunction( -#if SPECIALIZATION_STATS - PyObject *builtins, -#endif PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { @@ -712,26 +709,9 @@ _Py_Specialize_CallFunction( goto fail; } if (PyType_Check(callable)) { - if ((PyObject_HasAttrString(callable, "__dict__") || - PyObject_HasAttrString(callable, "__slots__")) && - PyObject_TypeCheck(callable, &PyType_Type) && - !PyType_CheckExact(callable)) { - if (PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "immutable python class"); - } - else { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "python class"); - } - goto fail; - } - if (PyMapping_HasKeyString(builtins, ((PyTypeObject *)callable)->tp_name)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "__builtins__ type init"); - goto fail; - } - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "C class"); + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ? + "immutable class" : "mutable class"); goto fail; } /* So far this catches things like weakref.weakref */ From b99f65c787abbbf66a63a626a993361a5dd0b6a3 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 15 Jul 2021 18:50:34 +0800 Subject: [PATCH 26/49] apply suggestions by Mark --- Python/specialize.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 2f425afbf3aaec..92672111a1e048 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -652,7 +652,9 @@ _Py_Specialize_CallFunction( { PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; +#if SPECIALIZATION_STATS PyTypeObject *type = Py_TYPE(callable); +#endif /* Specialize C functions */ if (PyCFunction_CheckExact(callable)) { PyCFunctionObject *meth = (PyCFunctionObject *)callable; @@ -661,14 +663,14 @@ _Py_Specialize_CallFunction( } switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { - case METH_FASTCALL: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "_PYCFUNCTION_FAST"); - goto fail; case METH_O: *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_O, _Py_OPARG(*instr)); goto success; + case METH_FASTCALL: + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + "_PYCFUNCTION_FAST"); + goto fail; case METH_VARARGS: SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION"); goto fail; From 52393427c0f6351c558431e9442f1f00b7b2ab24 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 15 Jul 2021 19:05:40 +0800 Subject: [PATCH 27/49] actually move it into the block this time --- Python/specialize.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 92672111a1e048..052ca3c427b7b1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -652,9 +652,6 @@ _Py_Specialize_CallFunction( { PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; -#if SPECIALIZATION_STATS - PyTypeObject *type = Py_TYPE(callable); -#endif /* Specialize C functions */ if (PyCFunction_CheckExact(callable)) { PyCFunctionObject *meth = (PyCFunctionObject *)callable; @@ -697,6 +694,7 @@ _Py_Specialize_CallFunction( } /* These might be implemented in the future. Collecting stats for now. */ #if SPECIALIZATION_STATS + PyTypeObject *type = Py_TYPE(callable); if (PyFunction_Check(callable)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python function"); goto fail; From 725032998c9bea12890a19ada072f027741796a1 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 15 Jul 2021 20:41:50 +0800 Subject: [PATCH 28/49] Regen opcodes --- Include/opcode.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Include/opcode.h b/Include/opcode.h index 7bebb871edb444..93c436472641f8 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -149,6 +149,8 @@ extern "C" { #define LOAD_GLOBAL_ADAPTIVE 41 #define LOAD_GLOBAL_MODULE 42 #define LOAD_GLOBAL_BUILTIN 43 +#define CALL_FUNCTION_ADAPTIVE 44 +#define CALL_FUNCTION_BUILTIN_O 45 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, From c64907069651fc2c47f584d8705572ce2061aa30 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 16 Jul 2021 00:45:54 +0800 Subject: [PATCH 29/49] move type earlier --- Python/specialize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index fd05dd373e9358..57e15fd418b1ca 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -697,6 +697,9 @@ _Py_Specialize_CallFunction( { PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; +#if SPECIALIZATION_STATS + PyTypeObject *type = Py_TYPE(callable); +#endif /* Specialize C functions */ if (PyCFunction_CheckExact(callable)) { PyCFunctionObject *meth = (PyCFunctionObject *)callable; @@ -739,7 +742,6 @@ _Py_Specialize_CallFunction( } /* These might be implemented in the future. Collecting stats for now. */ #if SPECIALIZATION_STATS - PyTypeObject *type = Py_TYPE(callable); if (PyFunction_Check(callable)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python function"); goto fail; From 5dfce162804e33c74d9c65581a59e6518529e055 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 16 Jul 2021 17:37:24 +0800 Subject: [PATCH 30/49] change to assert --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index ba3531622df8e8..9999e3d4b5ee7d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4217,7 +4217,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* Builtin METH_O functions */ SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - DEOPT_IF(cache0->original_oparg != 1, CALL_FUNCTION); + assert(cache0->original_oparg == 1); PyObject *callable = SECOND(); DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); From 0da5ed211551908a08c8ae0c9d9ecadd87b4b73a Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 16 Jul 2021 17:51:57 +0800 Subject: [PATCH 31/49] increment unquickened stats --- Python/ceval.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/ceval.c b/Python/ceval.c index 9999e3d4b5ee7d..b666e4d3e0e1da 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4181,6 +4181,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(CALL_FUNCTION): { PREDICTED(CALL_FUNCTION); + STAT_INC(CALL_FUNCTION, unquickened); PyObject **sp, *res; sp = stack_pointer; res = call_function(tstate, &sp, oparg, NULL, cframe.use_tracing); From 40b919fd638b02fac55979edec3ec8df88ef2107 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 16 Jul 2021 21:23:45 +0800 Subject: [PATCH 32/49] Re-add CALL_FUNCTION_BUILTIN_FAST --- Include/opcode.h | 1 + Lib/opcode.py | 1 + .../2021-06-28-22-23-59.bpo-44525.sSvUKG.rst | 8 ++-- Python/ceval.c | 39 +++++++++++++++++++ Python/opcode_targets.h | 2 +- Python/specialize.c | 6 +-- 6 files changed, 50 insertions(+), 7 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index 93c436472641f8..ba3eeafb2a9dcf 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -151,6 +151,7 @@ extern "C" { #define LOAD_GLOBAL_BUILTIN 43 #define CALL_FUNCTION_ADAPTIVE 44 #define CALL_FUNCTION_BUILTIN_O 45 +#define CALL_FUNCTION_BUILTIN_FAST 46 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index aa4f7bb3675305..02d17683b1bc7d 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -235,4 +235,5 @@ def jabs_op(name, op): "LOAD_GLOBAL_BUILTIN", "CALL_FUNCTION_ADAPTIVE", "CALL_FUNCTION_BUILTIN_O", + "CALL_FUNCTION_BUILTIN_FAST", ] diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst index 24d8114305d4f5..50935c5fa8320a 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst @@ -1,4 +1,6 @@ Setup initial specialization infrastructure for the ``CALL_FUNCTION`` opcode. -The first specialization, ``CALL_CFUNCTION_O``, speeds up calls to -``PyCFunctionObject`` if they have the ``METH_O`` flag set. More -specializations to come in the future. +Implemented two initial specializations for ``PyCFunctionObject``: + +* ``CALL_FUNCTION_BUILTIN_O`` for ``METH_O`` flag. + +* ``CALL_FUNCTION_BUILTIN_FAST`` for ``METH_FASTCALL`` flag without keywords. diff --git a/Python/ceval.c b/Python/ceval.c index b666e4d3e0e1da..e9ce6249e4bdc5 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4241,6 +4241,45 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } + case TARGET(CALL_FUNCTION_BUILTIN_FAST): { + assert(cframe.use_tracing == 0); + /* Builtin METH_FASTCALL functions, without keywords */ + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + int nargs = cache0->original_oparg; + PyObject **pfunc = &PEEK(nargs + 1); + PyObject *callable = *pfunc; + DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); + DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL, + CALL_FUNCTION); + + PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); + /* res = func(self, args, nargs) */ + PyObject *res = ((_PyCFunctionFast)(void(*)(void))cfunc)( + PyCFunction_GET_SELF(callable), + &PEEK(nargs), + nargs); + assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + + /* Clear the stack of the function object. */ + while (stack_pointer > pfunc) { + PyObject *x = POP(); + Py_DECREF(x); + } + PUSH(res); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); + if (res == NULL) { + /* Not deopting because this doesn't mean our optimization was + wrong. `res` can be NULL for valid reasons. Eg. getattr(x, + 'invalid'). In those cases an exception is set, so we must + handle it. + */ + goto error; + } + DISPATCH(); + } + case TARGET(CALL_FUNCTION_KW): { PyObject **sp, *res, *names; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index d3b775ae110305..0cc36f0d7b2f02 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -45,7 +45,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_CALL_FUNCTION_ADAPTIVE, &&TARGET_CALL_FUNCTION_BUILTIN_O, - &&_unknown_opcode, + &&TARGET_CALL_FUNCTION_BUILTIN_FAST, &&_unknown_opcode, &&_unknown_opcode, &&TARGET_WITH_EXCEPT_START, diff --git a/Python/specialize.c b/Python/specialize.c index 57e15fd418b1ca..4c937469e76d73 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -713,9 +713,9 @@ _Py_Specialize_CallFunction( _Py_OPARG(*instr)); goto success; case METH_FASTCALL: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "_PYCFUNCTION_FAST"); - goto fail; + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_FAST, + _Py_OPARG(*instr)); + goto success; case METH_VARARGS: SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION"); goto fail; From 75e3540e924c320dcf46c0458186191b7bce71fd Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 16 Jul 2021 22:25:11 +0800 Subject: [PATCH 33/49] add check for C methods --- Python/specialize.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/specialize.c b/Python/specialize.c index 4c937469e76d73..b37688ccf2aa2c 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -755,6 +755,10 @@ _Py_Specialize_CallFunction( SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bound method"); goto fail; } + if (PyCMethod_Check(callable)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "builtin method"); + goto fail; + } if (PyType_Check(callable)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ? From 3c1129a8ef89e7c56834ca7cd6d36b2016b38d9f Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 16 Sep 2021 00:01:34 +0800 Subject: [PATCH 34/49] fix build errors --- Include/opcode.h | 63 ++++++++++++++++------------------------- Python/ceval.c | 6 ++-- Python/opcode_targets.h | 38 ++++++++++++------------- Python/specialize.c | 42 ++++++++++++++++----------- 4 files changed, 72 insertions(+), 77 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index f5a3b7eb7e3a29..f538003c697bf7 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -136,22 +136,6 @@ extern "C" { #define DICT_MERGE 164 #define DICT_UPDATE 165 #define CALL_METHOD_KW 166 -#define BINARY_SUBSCR_ADAPTIVE 7 -#define BINARY_SUBSCR_LIST_INT 8 -#define BINARY_SUBSCR_TUPLE_INT 13 -#define BINARY_SUBSCR_DICT 14 -#define JUMP_ABSOLUTE_QUICK 18 -#define LOAD_ATTR_ADAPTIVE 21 -#define LOAD_ATTR_SPLIT_KEYS 36 -#define LOAD_ATTR_WITH_HINT 38 -#define LOAD_ATTR_SLOT 39 -#define LOAD_ATTR_MODULE 40 -#define LOAD_GLOBAL_ADAPTIVE 41 -#define LOAD_GLOBAL_MODULE 42 -#define LOAD_GLOBAL_BUILTIN 43 -#define CALL_FUNCTION_ADAPTIVE 44 -#define CALL_FUNCTION_BUILTIN_O 45 -#define CALL_FUNCTION_BUILTIN_FAST 46 #define BINARY_ADD_ADAPTIVE 7 #define BINARY_ADD_INT 8 #define BINARY_ADD_FLOAT 13 @@ -161,28 +145,31 @@ extern "C" { #define BINARY_SUBSCR_LIST_INT 36 #define BINARY_SUBSCR_TUPLE_INT 38 #define BINARY_SUBSCR_DICT 39 -#define JUMP_ABSOLUTE_QUICK 40 -#define LOAD_ATTR_ADAPTIVE 41 -#define LOAD_ATTR_SPLIT_KEYS 42 -#define LOAD_ATTR_WITH_HINT 43 -#define LOAD_ATTR_SLOT 44 -#define LOAD_ATTR_MODULE 45 -#define LOAD_GLOBAL_ADAPTIVE 46 -#define LOAD_GLOBAL_MODULE 47 -#define LOAD_GLOBAL_BUILTIN 48 -#define LOAD_METHOD_ADAPTIVE 58 -#define LOAD_METHOD_CACHED 80 -#define LOAD_METHOD_CLASS 81 -#define LOAD_METHOD_MODULE 87 -#define STORE_ATTR_ADAPTIVE 88 -#define STORE_ATTR_SPLIT_KEYS 120 -#define STORE_ATTR_SLOT 122 -#define STORE_ATTR_WITH_HINT 123 -#define LOAD_FAST__LOAD_FAST 127 -#define STORE_FAST__LOAD_FAST 128 -#define LOAD_FAST__LOAD_CONST 134 -#define LOAD_CONST__LOAD_FAST 140 -#define STORE_FAST__STORE_FAST 143 +#define CALL_FUNCTION_ADAPTIVE 40 +#define CALL_FUNCTION_BUILTIN_O 41 +#define CALL_FUNCTION_BUILTIN_FAST 42 +#define JUMP_ABSOLUTE_QUICK 43 +#define LOAD_ATTR_ADAPTIVE 44 +#define LOAD_ATTR_SPLIT_KEYS 45 +#define LOAD_ATTR_WITH_HINT 46 +#define LOAD_ATTR_SLOT 47 +#define LOAD_ATTR_MODULE 48 +#define LOAD_GLOBAL_ADAPTIVE 58 +#define LOAD_GLOBAL_MODULE 80 +#define LOAD_GLOBAL_BUILTIN 81 +#define LOAD_METHOD_ADAPTIVE 87 +#define LOAD_METHOD_CACHED 88 +#define LOAD_METHOD_CLASS 120 +#define LOAD_METHOD_MODULE 122 +#define STORE_ATTR_ADAPTIVE 123 +#define STORE_ATTR_SPLIT_KEYS 127 +#define STORE_ATTR_SLOT 128 +#define STORE_ATTR_WITH_HINT 134 +#define LOAD_FAST__LOAD_FAST 140 +#define STORE_FAST__LOAD_FAST 143 +#define LOAD_FAST__LOAD_CONST 149 +#define LOAD_CONST__LOAD_FAST 150 +#define STORE_FAST__STORE_FAST 151 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Python/ceval.c b/Python/ceval.c index 41cc7dceb5a8db..e9f1d3b2c2478e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4621,7 +4621,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } - case TARGET(CALL_FUNCTION_ADAPTIVE): { + TARGET(CALL_FUNCTION_ADAPTIVE): { SpecializedCacheEntry *cache = GET_CACHE(); if (cache->adaptive.counter == 0) { next_instr--; @@ -4640,7 +4640,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } } - case TARGET(CALL_FUNCTION_BUILTIN_O): { + TARGET(CALL_FUNCTION_BUILTIN_O): { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ SpecializedCacheEntry *caches = GET_CACHE(); @@ -4668,7 +4668,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } - case TARGET(CALL_FUNCTION_BUILTIN_FAST): { + TARGET(CALL_FUNCTION_BUILTIN_FAST): { assert(cframe.use_tracing == 0); /* Builtin METH_FASTCALL functions, without keywords */ SpecializedCacheEntry *caches = GET_CACHE(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f3bfae545bcd48..e33f44d054f73b 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -39,15 +39,15 @@ static void *opcode_targets[256] = { &&TARGET_POP_EXCEPT_AND_RERAISE, &&TARGET_BINARY_SUBSCR_TUPLE_INT, &&TARGET_BINARY_SUBSCR_DICT, + &&TARGET_CALL_FUNCTION_ADAPTIVE, + &&TARGET_CALL_FUNCTION_BUILTIN_O, + &&TARGET_CALL_FUNCTION_BUILTIN_FAST, &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, - &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_LOAD_METHOD_ADAPTIVE, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_LOAD_METHOD_CACHED, - &&TARGET_LOAD_METHOD_CLASS, + &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_STORE_ATTR_ADAPTIVE, + &&TARGET_LOAD_METHOD_ADAPTIVE, + &&TARGET_LOAD_METHOD_CACHED, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,38 +119,38 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_STORE_ATTR_SPLIT_KEYS, + &&TARGET_LOAD_METHOD_CLASS, &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_STORE_ATTR_SPLIT_KEYS, + &&TARGET_STORE_ATTR_SLOT, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_STORE_FAST__STORE_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_MATCH_CLASS, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 2479684d416b33..6c2650fc703959 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -453,6 +453,15 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_NON_FUNCTION_SCOPE 11 #define SPEC_FAIL_DIFFERENT_TYPES 12 +/* CALL_FUNCTION */ + +#define SPEC_FAIL_PYCFUNCTION 10 +#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 13 +#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 14 +#define SPEC_FAIL_PYCFUNCTION_NOARGS 15 +#define SPEC_FAIL_BAD_CALL_FLAGS 16 +#define SPEC_FAIL_PYTHON_FUNCTION 17 +#define SPEC_FAIL_IMMUTABLE_CLASS 18 static int specialize_module_load_attr( @@ -1230,56 +1239,55 @@ _Py_Specialize_CallFunction( _Py_OPARG(*instr)); goto success; case METH_VARARGS: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "PYCFUNCTION"); + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION); goto fail; case METH_VARARGS | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "PYCFUNCTION_WITH_KEYWORDS"); + SPECIALIZATION_FAIL(CALL_FUNCTION, + SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS); goto fail; case METH_FASTCALL | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "_PYCFUNCTION_FAST_WITH_KEYWORDS"); + SPECIALIZATION_FAIL(CALL_FUNCTION, + SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS); goto fail; case METH_NOARGS: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "PYCFUNCTION_NOARGS"); + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION_NOARGS); goto fail; /* This case should never happen with PyCFunctionObject -- only PyMethodObject. See zlib.compressobj()'s methods for an example. */ case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: default: - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags"); + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_BAD_CALL_FLAGS); goto fail; } } /* These might be implemented in the future. Collecting stats for now. */ #if SPECIALIZATION_STATS if (PyFunction_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "python function"); + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYTHON_FUNCTION); goto fail; } + // new-style bound methods if (PyInstanceMethod_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, - "new style bound method"); + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_METHOD); goto fail; } if (PyMethod_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bound method"); + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_METHOD); goto fail; } + // builtin method if (PyCMethod_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "builtin method"); + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_METHOD); goto fail; } if (PyType_Check(callable)) { SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ? - "immutable class" : "mutable class"); + SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS); goto fail; } - /* So far this catches things like weakref.weakref */ - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "???"); + /* So far this misses things like weakref.weakref */ #endif fail: STAT_INC(CALL_FUNCTION, specialization_failure); @@ -1291,4 +1299,4 @@ _Py_Specialize_CallFunction( assert(!PyErr_Occurred()); cache0->counter = saturating_start(); return 0; -} \ No newline at end of file +} From 226c591d987e320af525a4bdbbaf888dd1b934f5 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 17 Sep 2021 17:20:37 +0800 Subject: [PATCH 35/49] Add CALL_FUNCTION_LEN --- Include/internal/pycore_code.h | 2 +- Include/opcode.h | 46 ++++++++++++++++++---------------- Lib/opcode.py | 4 ++- Python/ceval.c | 35 +++++++++++++++++++++++--- Python/opcode_targets.h | 36 +++++++++++++------------- Python/specialize.c | 16 ++++++++++-- 6 files changed, 91 insertions(+), 48 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index bf2868a62ddb3b..2f4ccc1ad49dbd 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -311,7 +311,7 @@ int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNI int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); -int _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +int _Py_Specialize_CallFunction(PyObject *builtins, PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/opcode.h b/Include/opcode.h index f538003c697bf7..8f3679355e7baf 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -148,28 +148,30 @@ extern "C" { #define CALL_FUNCTION_ADAPTIVE 40 #define CALL_FUNCTION_BUILTIN_O 41 #define CALL_FUNCTION_BUILTIN_FAST 42 -#define JUMP_ABSOLUTE_QUICK 43 -#define LOAD_ATTR_ADAPTIVE 44 -#define LOAD_ATTR_SPLIT_KEYS 45 -#define LOAD_ATTR_WITH_HINT 46 -#define LOAD_ATTR_SLOT 47 -#define LOAD_ATTR_MODULE 48 -#define LOAD_GLOBAL_ADAPTIVE 58 -#define LOAD_GLOBAL_MODULE 80 -#define LOAD_GLOBAL_BUILTIN 81 -#define LOAD_METHOD_ADAPTIVE 87 -#define LOAD_METHOD_CACHED 88 -#define LOAD_METHOD_CLASS 120 -#define LOAD_METHOD_MODULE 122 -#define STORE_ATTR_ADAPTIVE 123 -#define STORE_ATTR_SPLIT_KEYS 127 -#define STORE_ATTR_SLOT 128 -#define STORE_ATTR_WITH_HINT 134 -#define LOAD_FAST__LOAD_FAST 140 -#define STORE_FAST__LOAD_FAST 143 -#define LOAD_FAST__LOAD_CONST 149 -#define LOAD_CONST__LOAD_FAST 150 -#define STORE_FAST__STORE_FAST 151 +#define CALL_FUNCTION_LEN 43 +#define CALL_FUNCTION_ISINSTANCE 44 +#define JUMP_ABSOLUTE_QUICK 45 +#define LOAD_ATTR_ADAPTIVE 46 +#define LOAD_ATTR_SPLIT_KEYS 47 +#define LOAD_ATTR_WITH_HINT 48 +#define LOAD_ATTR_SLOT 58 +#define LOAD_ATTR_MODULE 80 +#define LOAD_GLOBAL_ADAPTIVE 81 +#define LOAD_GLOBAL_MODULE 87 +#define LOAD_GLOBAL_BUILTIN 88 +#define LOAD_METHOD_ADAPTIVE 120 +#define LOAD_METHOD_CACHED 122 +#define LOAD_METHOD_CLASS 123 +#define LOAD_METHOD_MODULE 127 +#define STORE_ATTR_ADAPTIVE 128 +#define STORE_ATTR_SPLIT_KEYS 134 +#define STORE_ATTR_SLOT 140 +#define STORE_ATTR_WITH_HINT 143 +#define LOAD_FAST__LOAD_FAST 149 +#define STORE_FAST__LOAD_FAST 150 +#define LOAD_FAST__LOAD_CONST 151 +#define LOAD_CONST__LOAD_FAST 153 +#define STORE_FAST__STORE_FAST 154 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 246e68d489745e..da384defb44489 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -231,7 +231,9 @@ def jabs_op(name, op): "BINARY_SUBSCR_DICT", "CALL_FUNCTION_ADAPTIVE", "CALL_FUNCTION_BUILTIN_O", - "CALL_FUNCTION_BUILTIN_FAST", + "CALL_FUNCTION_BUILTIN_FAST", + "CALL_FUNCTION_LEN", + "CALL_FUNCTION_ISINSTANCE", "JUMP_ABSOLUTE_QUICK", "LOAD_ATTR_ADAPTIVE", "LOAD_ATTR_SPLIT_KEYS", diff --git a/Python/ceval.c b/Python/ceval.c index e9f1d3b2c2478e..c23a3a8ecd9bb4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4626,6 +4626,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr if (cache->adaptive.counter == 0) { next_instr--; if (_Py_Specialize_CallFunction( + BUILTINS(), stack_pointer, cache->adaptive.original_oparg, next_instr, cache) < 0) { goto error; @@ -4643,9 +4644,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr TARGET(CALL_FUNCTION_BUILTIN_O): { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->original_oparg == 1); PyObject *callable = SECOND(); DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); @@ -4660,7 +4658,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(arg); Py_DECREF(callable); SET_TOP(res); - record_cache_hit(cache0); STAT_INC(CALL_FUNCTION, hit); if (res == NULL) { goto error; @@ -4707,6 +4704,36 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } + TARGET(CALL_FUNCTION_LEN): { + assert(cframe.use_tracing == 0); + /* len(o) */ + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyObjectCache *cache1 = &caches[-1].obj; + assert(cache0->original_oparg == 1); + + PyObject *callable = SECOND(); + DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); + + PyObject *res = NULL; + Py_ssize_t len_i = PyObject_Length(TOP()); + if (len_i >= 0) { + res = PyLong_FromSsize_t(len_i); + } + assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + + /* Clear the stack of the function object. */ + Py_DECREF(POP()); + Py_DECREF(callable); + SET_TOP(res); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); + if (res == NULL) { + goto error; + } + DISPATCH(); + } + TARGET(CALL_FUNCTION_KW): { PyObject **sp, *res, *names; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index e33f44d054f73b..d8c16326f6d995 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -42,12 +42,12 @@ static void *opcode_targets[256] = { &&TARGET_CALL_FUNCTION_ADAPTIVE, &&TARGET_CALL_FUNCTION_BUILTIN_O, &&TARGET_CALL_FUNCTION_BUILTIN_FAST, + &&TARGET_CALL_FUNCTION_LEN, + &&TARGET_CALL_FUNCTION_ISINSTANCE, &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_ATTR_SLOT, - &&TARGET_LOAD_ATTR_MODULE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, + &&TARGET_LOAD_ATTR_SLOT, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_LOAD_GLOBAL_BUILTIN, + &&TARGET_LOAD_ATTR_MODULE, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_METHOD_ADAPTIVE, - &&TARGET_LOAD_METHOD_CACHED, + &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,41 +119,41 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_LOAD_METHOD_CLASS, + &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_STORE_ATTR_ADAPTIVE, + &&TARGET_LOAD_METHOD_CACHED, + &&TARGET_LOAD_METHOD_CLASS, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&TARGET_STORE_ATTR_SPLIT_KEYS, - &&TARGET_STORE_ATTR_SLOT, + &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_STORE_ATTR_SPLIT_KEYS, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_STORE_ATTR_SLOT, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, + &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_MATCH_CLASS, &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, - &&TARGET_MATCH_CLASS, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, diff --git a/Python/specialize.c b/Python/specialize.c index 6c2650fc703959..f9f377c3bdc074 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -125,6 +125,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, BINARY_ADD, "binary_add"); err += add_stat_dict(stats, BINARY_SUBSCR, "binary_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); + err += add_stat_dict(stats, CALL_FUNCTION, "call_function"); if (err < 0) { Py_DECREF(stats); return NULL; @@ -241,7 +242,7 @@ static uint8_t cache_requirements[256] = { [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ [BINARY_ADD] = 0, [BINARY_SUBSCR] = 0, - [CALL_FUNCTION] = 1, /* _PyAdaptiveEntry */ + [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache */ [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ }; @@ -1214,11 +1215,13 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) */ int _Py_Specialize_CallFunction( + PyObject *builtins, PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; + _PyObjectCache *cache1 = &cache[-1].obj; #if SPECIALIZATION_STATS PyTypeObject *type = Py_TYPE(callable); #endif @@ -1230,10 +1233,19 @@ _Py_Specialize_CallFunction( } switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { - case METH_O: + case METH_O: { + /* len(o) */ + PyObject *builtin_len = PyDict_GetItemString(builtins, "len"); + if (builtin_len == callable) { + cache1->obj = builtin_len; // borrowed + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_LEN, + _Py_OPARG(*instr)); + goto success; + } *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_O, _Py_OPARG(*instr)); goto success; + } case METH_FASTCALL: *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_FAST, _Py_OPARG(*instr)); From 2dc27388766c611bd8a253904e77d650b3f1030d Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 17 Sep 2021 21:15:34 +0800 Subject: [PATCH 36/49] add CALL_FUNCTION_ISINSTANCE --- Python/ceval.c | 31 ++++++++++++++++++++++++++++--- Python/specialize.c | 20 ++++++++++++++++++-- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index c23a3a8ecd9bb4..6007e84a340fa3 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4715,15 +4715,40 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *callable = SECOND(); DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); - PyObject *res = NULL; Py_ssize_t len_i = PyObject_Length(TOP()); - if (len_i >= 0) { - res = PyLong_FromSsize_t(len_i); + PyObject *res = (len_i >= 0) ? PyLong_FromSsize_t(len_i) : NULL; + assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + + /* Clear the stack of the function object. */ + Py_DECREF(POP()); + Py_DECREF(callable); + SET_TOP(res); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); + if (res == NULL) { + goto error; } + DISPATCH(); + } + + TARGET(CALL_FUNCTION_ISINSTANCE): { + assert(cframe.use_tracing == 0); + /* isinstance(o, o2) */ + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyObjectCache *cache1 = &caches[-1].obj; + assert(cache0->original_oparg == 2); + + PyObject *callable = THIRD(); + DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); + + int retval = PyObject_IsInstance(SECOND(), TOP()); + PyObject *res = (retval >= 0) ? PyBool_FromLong(retval) : NULL; assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); /* Clear the stack of the function object. */ Py_DECREF(POP()); + Py_DECREF(POP()); Py_DECREF(callable); SET_TOP(res); record_cache_hit(cache0); diff --git a/Python/specialize.c b/Python/specialize.c index f9f377c3bdc074..8e5d7fc44af3ce 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1234,9 +1234,13 @@ _Py_Specialize_CallFunction( switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { case METH_O: { + if (original_oparg != 1) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE); + goto fail; + } /* len(o) */ PyObject *builtin_len = PyDict_GetItemString(builtins, "len"); - if (builtin_len == callable) { + if (callable == builtin_len) { cache1->obj = builtin_len; // borrowed *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_LEN, _Py_OPARG(*instr)); @@ -1246,10 +1250,22 @@ _Py_Specialize_CallFunction( _Py_OPARG(*instr)); goto success; } - case METH_FASTCALL: + case METH_FASTCALL: { + if (original_oparg == 2) { + /* isinstance(o1, o2) */ + PyObject *builtin_isinstance = PyDict_GetItemString( + builtins, "isinstance"); + if (callable == builtin_isinstance) { + cache1->obj = builtin_isinstance; // borrowed + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_ISINSTANCE, + _Py_OPARG(*instr)); + goto success; + } + } *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_FAST, _Py_OPARG(*instr)); goto success; + } case METH_VARARGS: SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION); goto fail; From 7bd4338bb562a5831bc2b27e55e3ac6f8fcf7597 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 17 Sep 2021 21:32:11 +0800 Subject: [PATCH 37/49] fix specialization stats --- Include/internal/pycore_code.h | 3 --- Python/specialize.c | 13 +++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2f4ccc1ad49dbd..deb9e175e2769f 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -300,9 +300,6 @@ cache_backoff(_PyAdaptiveEntry *entry) { entry->counter = ADAPTIVE_CACHE_BACKOFF; } -#define SPECIALIZATION_STATS 0 -#define SPECIALIZATION_STATS_DETAILED 0 - /* Specialization functions */ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); diff --git a/Python/specialize.c b/Python/specialize.c index 8e5d7fc44af3ce..952a35f79abde5 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -182,6 +182,7 @@ _Py_PrintSpecializationStats(void) print_stats(out, &_specialization_stats[BINARY_ADD], "binary_add"); print_stats(out, &_specialization_stats[BINARY_SUBSCR], "binary_subscr"); print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); + print_stats(out, &_specialization_stats[CALL_FUNCTION], "call_function"); if (out != stderr) { fclose(out); } @@ -1222,17 +1223,17 @@ _Py_Specialize_CallFunction( PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; _PyObjectCache *cache1 = &cache[-1].obj; -#if SPECIALIZATION_STATS +#if COLLECT_SPECIALIZATION_STATS PyTypeObject *type = Py_TYPE(callable); #endif /* Specialize C functions */ if (PyCFunction_CheckExact(callable)) { - PyCFunctionObject *meth = (PyCFunctionObject *)callable; if (PyCFunction_GET_FUNCTION(callable) == NULL) { goto fail; } - switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL | - METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { + switch (PyCFunction_GET_FLAGS(callable) & + (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | + METH_KEYWORDS | METH_METHOD)) { case METH_O: { if (original_oparg != 1) { SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE); @@ -1290,7 +1291,7 @@ _Py_Specialize_CallFunction( } } /* These might be implemented in the future. Collecting stats for now. */ -#if SPECIALIZATION_STATS +#if COLLECT_SPECIALIZATION_STATS if (PyFunction_Check(callable)) { SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYTHON_FUNCTION); goto fail; @@ -1310,7 +1311,7 @@ _Py_Specialize_CallFunction( goto fail; } if (PyType_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, + SPECIALIZATION_FAIL(CALL_FUNCTION, PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ? SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS); goto fail; From 2500ab6dae86225cba938bf0eb0ea0bfd09d9597 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 18 Oct 2021 23:11:42 +0800 Subject: [PATCH 38/49] Refactor --- Include/internal/pycore_code.h | 4 +- Include/opcode.h | 82 +++++-------- Python/ceval.c | 16 +-- Python/opcode_targets.h | 74 ++++-------- Python/specialize.c | 208 +++++++++++++++++---------------- 5 files changed, 169 insertions(+), 215 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 53b49bf6aafb7b..b558f887cd7952 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -309,10 +309,10 @@ int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *na int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *instr); -int _Py_Specialize_CallFunction(PyObject *builtins, PyObject **stack_pointer, uint8_t original_oparg, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +_Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins); #define PRINT_SPECIALIZATION_STATS 0 -#define PRINT_SPECIALIZATION_STATS_DETAILED 0 +#define PRINT_SPECIALIZATION_STATS_DETAILED 9 #define PRINT_SPECIALIZATION_STATS_TO_FILE 0 #ifdef Py_DEBUG diff --git a/Include/opcode.h b/Include/opcode.h index bf938970223370..22d968ee0d4c79 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -141,37 +141,6 @@ extern "C" { #define BINARY_ADD_FLOAT 13 #define BINARY_ADD_UNICODE 14 #define BINARY_ADD_UNICODE_INPLACE_FAST 18 -#define BINARY_SUBSCR_ADAPTIVE 21 -#define BINARY_SUBSCR_LIST_INT 36 -#define BINARY_SUBSCR_TUPLE_INT 38 -#define BINARY_SUBSCR_DICT 39 -#define CALL_FUNCTION_ADAPTIVE 40 -#define CALL_FUNCTION_BUILTIN_O 41 -#define CALL_FUNCTION_BUILTIN_FAST 42 -#define CALL_FUNCTION_LEN 43 -#define CALL_FUNCTION_ISINSTANCE 44 -#define JUMP_ABSOLUTE_QUICK 45 -#define LOAD_ATTR_ADAPTIVE 46 -#define LOAD_ATTR_SPLIT_KEYS 47 -#define LOAD_ATTR_WITH_HINT 48 -#define LOAD_ATTR_SLOT 58 -#define LOAD_ATTR_MODULE 80 -#define LOAD_GLOBAL_ADAPTIVE 81 -#define LOAD_GLOBAL_MODULE 87 -#define LOAD_GLOBAL_BUILTIN 88 -#define LOAD_METHOD_ADAPTIVE 120 -#define LOAD_METHOD_CACHED 122 -#define LOAD_METHOD_CLASS 123 -#define LOAD_METHOD_MODULE 127 -#define STORE_ATTR_ADAPTIVE 128 -#define STORE_ATTR_SPLIT_KEYS 134 -#define STORE_ATTR_SLOT 140 -#define STORE_ATTR_WITH_HINT 143 -#define LOAD_FAST__LOAD_FAST 149 -#define STORE_FAST__LOAD_FAST 150 -#define LOAD_FAST__LOAD_CONST 151 -#define LOAD_CONST__LOAD_FAST 153 -#define STORE_FAST__STORE_FAST 154 #define BINARY_MULTIPLY_ADAPTIVE 21 #define BINARY_MULTIPLY_INT 36 #define BINARY_MULTIPLY_FLOAT 38 @@ -179,29 +148,34 @@ extern "C" { #define BINARY_SUBSCR_LIST_INT 40 #define BINARY_SUBSCR_TUPLE_INT 41 #define BINARY_SUBSCR_DICT 42 -#define JUMP_ABSOLUTE_QUICK 43 -#define LOAD_ATTR_ADAPTIVE 44 -#define LOAD_ATTR_INSTANCE_VALUE 45 -#define LOAD_ATTR_WITH_HINT 46 -#define LOAD_ATTR_SLOT 47 -#define LOAD_ATTR_MODULE 48 -#define LOAD_GLOBAL_ADAPTIVE 58 -#define LOAD_GLOBAL_MODULE 80 -#define LOAD_GLOBAL_BUILTIN 81 -#define LOAD_METHOD_ADAPTIVE 87 -#define LOAD_METHOD_CACHED 88 -#define LOAD_METHOD_CLASS 120 -#define LOAD_METHOD_MODULE 122 -#define LOAD_METHOD_NO_DICT 123 -#define STORE_ATTR_ADAPTIVE 127 -#define STORE_ATTR_INSTANCE_VALUE 128 -#define STORE_ATTR_SLOT 134 -#define STORE_ATTR_WITH_HINT 140 -#define LOAD_FAST__LOAD_FAST 143 -#define STORE_FAST__LOAD_FAST 149 -#define LOAD_FAST__LOAD_CONST 150 -#define LOAD_CONST__LOAD_FAST 151 -#define STORE_FAST__STORE_FAST 153 +#define CALL_FUNCTION_ADAPTIVE 43 +#define CALL_FUNCTION_BUILTIN_O 44 +#define CALL_FUNCTION_BUILTIN_FAST 45 +#define CALL_FUNCTION_LEN 46 +#define CALL_FUNCTION_ISINSTANCE 47 +#define JUMP_ABSOLUTE_QUICK 48 +#define LOAD_ATTR_ADAPTIVE 58 +#define LOAD_ATTR_INSTANCE_VALUE 80 +#define LOAD_ATTR_WITH_HINT 81 +#define LOAD_ATTR_SLOT 87 +#define LOAD_ATTR_MODULE 88 +#define LOAD_GLOBAL_ADAPTIVE 120 +#define LOAD_GLOBAL_MODULE 122 +#define LOAD_GLOBAL_BUILTIN 123 +#define LOAD_METHOD_ADAPTIVE 127 +#define LOAD_METHOD_CACHED 128 +#define LOAD_METHOD_CLASS 134 +#define LOAD_METHOD_MODULE 140 +#define LOAD_METHOD_NO_DICT 143 +#define STORE_ATTR_ADAPTIVE 149 +#define STORE_ATTR_INSTANCE_VALUE 150 +#define STORE_ATTR_SLOT 151 +#define STORE_ATTR_WITH_HINT 153 +#define LOAD_FAST__LOAD_FAST 154 +#define STORE_FAST__LOAD_FAST 158 +#define LOAD_FAST__LOAD_CONST 159 +#define LOAD_CONST__LOAD_FAST 167 +#define STORE_FAST__STORE_FAST 168 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Python/ceval.c b/Python/ceval.c index e3fef02a6b3524..6e846575d9585a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4657,6 +4657,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr TARGET(CALL_FUNCTION) { PREDICTED(CALL_FUNCTION); + STAT_INC(CALL_FUNCTION, unquickened); PyObject *function; nargs = oparg; kwnames = NULL; @@ -4714,14 +4715,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } - TARGET(CALL_FUNCTION_ADAPTIVE): { + TARGET(CALL_FUNCTION_ADAPTIVE) { SpecializedCacheEntry *cache = GET_CACHE(); if (cache->adaptive.counter == 0) { next_instr--; + int nargs = cache->adaptive.original_oparg; if (_Py_Specialize_CallFunction( - BUILTINS(), - stack_pointer, - cache->adaptive.original_oparg, next_instr, cache) < 0) { + PEEK(nargs + 1), next_instr, nargs, cache, BUILTINS()) < 0) { goto error; } DISPATCH(); @@ -4734,7 +4734,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } } - TARGET(CALL_FUNCTION_BUILTIN_O): { + TARGET(CALL_FUNCTION_BUILTIN_O) { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ @@ -4758,7 +4758,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } - TARGET(CALL_FUNCTION_BUILTIN_FAST): { + TARGET(CALL_FUNCTION_BUILTIN_FAST) { assert(cframe.use_tracing == 0); /* Builtin METH_FASTCALL functions, without keywords */ SpecializedCacheEntry *caches = GET_CACHE(); @@ -4797,7 +4797,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } - TARGET(CALL_FUNCTION_LEN): { + TARGET(CALL_FUNCTION_LEN) { assert(cframe.use_tracing == 0); /* len(o) */ SpecializedCacheEntry *caches = GET_CACHE(); @@ -4824,7 +4824,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } - TARGET(CALL_FUNCTION_ISINSTANCE): { + TARGET(CALL_FUNCTION_ISINSTANCE) { assert(cframe.use_tracing == 0); /* isinstance(o, o2) */ SpecializedCacheEntry *caches = GET_CACHE(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 3f6439a30bca41..4179689e8a8663 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -48,11 +48,6 @@ static void *opcode_targets[256] = { &&TARGET_CALL_FUNCTION_LEN, &&TARGET_CALL_FUNCTION_ISINSTANCE, &&TARGET_JUMP_ABSOLUTE_QUICK, - &&TARGET_LOAD_ATTR_ADAPTIVE, - &&TARGET_LOAD_ATTR_INSTANCE_VALUE, - &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_ATTR_SLOT, - &&TARGET_LOAD_ATTR_MODULE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -62,8 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_LOAD_ATTR_SLOT, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, + &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -85,19 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, - &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_LOAD_GLOBAL_BUILTIN, + &&TARGET_LOAD_ATTR_INSTANCE_VALUE, + &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_LOAD_GLOBAL_BUILTIN, - &&TARGET_LOAD_METHOD_ADAPTIVE, - &&TARGET_LOAD_METHOD_CACHED, + &&TARGET_LOAD_ATTR_SLOT, + &&TARGET_LOAD_ATTR_MODULE, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -129,66 +119,46 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_LOAD_METHOD_ADAPTIVE, - &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&TARGET_LOAD_METHOD_CACHED, - &&TARGET_LOAD_METHOD_CLASS, - &&TARGET_LOAD_FAST, - &&TARGET_STORE_FAST, - &&TARGET_DELETE_FAST, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_LOAD_METHOD_CLASS, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_LOAD_METHOD_NO_DICT, + &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, + &&TARGET_LOAD_METHOD_ADAPTIVE, + &&TARGET_LOAD_METHOD_CACHED, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&TARGET_STORE_ATTR_SPLIT_KEYS, - &&TARGET_STORE_ATTR_SLOT, + &&TARGET_LOAD_METHOD_CLASS, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_CALL_FUNCTION_KW, - &&TARGET_CALL_FUNCTION_EX, - &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_LOAD_METHOD_MODULE, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_LOAD_METHOD_NO_DICT, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, - &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__LOAD_FAST, - &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_MATCH_CLASS, - &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, - &&TARGET_STORE_FAST__LOAD_FAST, - &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_ATTR_ADAPTIVE, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, + &&TARGET_STORE_ATTR_SLOT, &&TARGET_MATCH_CLASS, - &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, + &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_METHOD, &&TARGET_CALL_METHOD, &&TARGET_LIST_EXTEND, @@ -196,8 +166,8 @@ static void *opcode_targets[256] = { &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, &&TARGET_CALL_METHOD_KW, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 849d2ec5d6f852..acc19d45dc9820 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -461,7 +461,7 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_NON_FUNCTION_SCOPE 11 #define SPEC_FAIL_DIFFERENT_TYPES 12 -/* CALL_FUNCTION */ +/* Call function */ #define SPEC_FAIL_PYCFUNCTION 10 #define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 13 @@ -1236,122 +1236,132 @@ _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *ins return 0; } -/* TODO: - - Specialize calling C types like type() with CALL_FUNCTION_BUILTIN_TYPE - - Specialize python function calls. -*/ + int -_Py_Specialize_CallFunction( - PyObject *builtins, - PyObject **stack_pointer, uint8_t original_oparg, - _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, + SpecializedCacheEntry *cache, PyObject *builtins) { - PyObject *callable = stack_pointer[-(original_oparg + 1)]; _PyAdaptiveEntry *cache0 = &cache->adaptive; _PyObjectCache *cache1 = &cache[-1].obj; -#if COLLECT_SPECIALIZATION_STATS - PyTypeObject *type = Py_TYPE(callable); -#endif - /* Specialize C functions */ - if (PyCFunction_CheckExact(callable)) { - if (PyCFunction_GET_FUNCTION(callable) == NULL) { - goto fail; - } - switch (PyCFunction_GET_FLAGS(callable) & - (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | - METH_KEYWORDS | METH_METHOD)) { - case METH_O: { - if (original_oparg != 1) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE); - goto fail; - } - /* len(o) */ - PyObject *builtin_len = PyDict_GetItemString(builtins, "len"); - if (callable == builtin_len) { - cache1->obj = builtin_len; // borrowed - *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_LEN, - _Py_OPARG(*instr)); - goto success; - } - *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_O, + if (PyCFunction_GET_FUNCTION(callable) == NULL) { + return 1; + } + switch (PyCFunction_GET_FLAGS(callable) & + (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | + METH_KEYWORDS | METH_METHOD)) { + case METH_O: { + if (nargs != 1) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE); + return 1; + } + /* len(o) */ + PyObject *builtin_len = PyDict_GetItemString(builtins, "len"); + if (callable == builtin_len) { + cache1->obj = builtin_len; // borrowed + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_LEN, _Py_OPARG(*instr)); - goto success; + return 1; } - case METH_FASTCALL: { - if (original_oparg == 2) { - /* isinstance(o1, o2) */ - PyObject *builtin_isinstance = PyDict_GetItemString( - builtins, "isinstance"); - if (callable == builtin_isinstance) { - cache1->obj = builtin_isinstance; // borrowed - *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_ISINSTANCE, - _Py_OPARG(*instr)); - goto success; - } + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_O, + _Py_OPARG(*instr)); + return 0; + } + case METH_FASTCALL: { + if (nargs == 2) { + /* isinstance(o1, o2) */ + PyObject *builtin_isinstance = PyDict_GetItemString( + builtins, "isinstance"); + if (callable == builtin_isinstance) { + cache1->obj = builtin_isinstance; // borrowed + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_ISINSTANCE, + _Py_OPARG(*instr)); + return 0; } - *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_FAST, - _Py_OPARG(*instr)); - goto success; } - case METH_VARARGS: - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION); - goto fail; - case METH_VARARGS | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, - SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS); - goto fail; - case METH_FASTCALL | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, - SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS); - goto fail; - case METH_NOARGS: - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION_NOARGS); - goto fail; - /* This case should never happen with PyCFunctionObject -- only - PyMethodObject. See zlib.compressobj()'s methods for an example. - */ - case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - default: - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_BAD_CALL_FLAGS); - goto fail; + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_FAST, + _Py_OPARG(*instr)); + return 0; } + case METH_VARARGS: + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION); + return 1; + case METH_VARARGS | METH_KEYWORDS: + SPECIALIZATION_FAIL(CALL_FUNCTION, + SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS); + return 1; + case METH_FASTCALL | METH_KEYWORDS: + SPECIALIZATION_FAIL(CALL_FUNCTION, + SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS); + return 1; + case METH_NOARGS: + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION_NOARGS); + return 1; + /* This case should never happen with PyCFunctionObject -- only + PyMethodObject. See zlib.compressobj()'s methods for an example. + */ + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + default: + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_BAD_CALL_FLAGS); + return 1; } - /* These might be implemented in the future. Collecting stats for now. */ -#if COLLECT_SPECIALIZATION_STATS +} + +#if COLLECT_SPECIALIZATION_STATS_DETAILED +int +c_call_fail_kind(PyObject *callable) +{ +/* These might be implemented in the future. Collecting stats for now. */ if (PyFunction_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYTHON_FUNCTION); - goto fail; + return SPEC_FAIL_PYTHON_FUNCTION; } // new-style bound methods - if (PyInstanceMethod_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_METHOD); - goto fail; + else if (PyInstanceMethod_Check(callable)) { + return SPEC_FAIL_METHOD; } - if (PyMethod_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_METHOD); - goto fail; + else if (PyMethod_Check(callable)) { + return SPEC_FAIL_METHOD; } // builtin method - if (PyCMethod_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_METHOD); - goto fail; + else if (PyCMethod_Check(callable)) { + return SPEC_FAIL_METHOD; } - if (PyType_Check(callable)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, - PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ? - SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS); - goto fail; + else if (PyType_Check(callable)) { + PyTypeObject *type = Py_TYPE(callable); + return PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ? + SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS; } - /* So far this misses things like weakref.weakref */ + return SPEC_FAIL_OTHER; +} #endif -fail: - STAT_INC(CALL_FUNCTION, specialization_failure); - assert(!PyErr_Occurred()); - cache_backoff(cache0); - return 0; -success: - STAT_INC(CALL_FUNCTION, specialization_success); - assert(!PyErr_Occurred()); - cache0->counter = saturating_start(); + +/* TODO: + - Specialize calling types. + - Specialize python function calls. +*/ +int +_Py_Specialize_CallFunction( + PyObject *callable, _Py_CODEUNIT *instr, + int nargs, SpecializedCacheEntry *cache, + PyObject *builtins) +{; + int fail; + if (PyCFunction_CheckExact(callable)) { + fail = specialize_c_call(callable, instr, nargs, cache, builtins); + } + else { + SPECIALIZATION_FAIL(CALL_FUNCTION, c_call_fail_kind(callable)); + fail = 1; + } + _PyAdaptiveEntry *cache0 = &cache->adaptive; + if (fail) { + STAT_INC(CALL_FUNCTION, specialization_failure); + assert(!PyErr_Occurred()); + cache_backoff(cache0); + } + else { + STAT_INC(CALL_FUNCTION, specialization_success); + assert(!PyErr_Occurred()); + cache0->counter = saturating_start(); + } return 0; } From 08ef4d82b0a6e86c96868b9f51a347e1273de2f7 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 18 Oct 2021 23:23:13 +0800 Subject: [PATCH 39/49] convert to static --- Python/specialize.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index acc19d45dc9820..23b8388ca179c5 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1237,7 +1237,7 @@ _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *ins } -int +static int specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins) { @@ -1307,7 +1307,7 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, } #if COLLECT_SPECIALIZATION_STATS_DETAILED -int +static int c_call_fail_kind(PyObject *callable) { /* These might be implemented in the future. Collecting stats for now. */ From 41f6fa604ba2164f82cdde2ebc90c7650ac94d0a Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 00:31:50 +0800 Subject: [PATCH 40/49] fix news and formatting --- Include/internal/pycore_code.h | 2 +- .../2021-06-28-22-23-59.bpo-44525.sSvUKG.rst | 6 +++++- Python/specialize.c | 1 - 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index b558f887cd7952..d44921b64c9a32 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -309,7 +309,7 @@ int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *na int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *instr); -_Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins); +int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 9 diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst index 50935c5fa8320a..8963d028cf8a9b 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-28-22-23-59.bpo-44525.sSvUKG.rst @@ -1,6 +1,10 @@ Setup initial specialization infrastructure for the ``CALL_FUNCTION`` opcode. -Implemented two initial specializations for ``PyCFunctionObject``: +Implemented initial specializations for C function calls: * ``CALL_FUNCTION_BUILTIN_O`` for ``METH_O`` flag. * ``CALL_FUNCTION_BUILTIN_FAST`` for ``METH_FASTCALL`` flag without keywords. + +* ``CALL_FUNCTION_LEN`` for ``len(o)``. + +* ``CALL_FUNCTION_ISINSTANCE`` for ``isinstance(o, t)``. diff --git a/Python/specialize.c b/Python/specialize.c index 23b8388ca179c5..33a42a118d080c 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1241,7 +1241,6 @@ static int specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; _PyObjectCache *cache1 = &cache[-1].obj; if (PyCFunction_GET_FUNCTION(callable) == NULL) { return 1; From 8b113d1cf9af2903ae555dd85c852d61a59c50e5 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 00:54:59 +0800 Subject: [PATCH 41/49] remove typo --- Include/internal/pycore_code.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index d44921b64c9a32..d464f3d2a8131b 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -312,7 +312,7 @@ int _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins); #define PRINT_SPECIALIZATION_STATS 0 -#define PRINT_SPECIALIZATION_STATS_DETAILED 9 +#define PRINT_SPECIALIZATION_STATS_DETAILED 0 #define PRINT_SPECIALIZATION_STATS_TO_FILE 0 #ifdef Py_DEBUG From 9642df56cf50703fd368a942bc29db30879859de Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 01:21:29 +0800 Subject: [PATCH 42/49] remove nit --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 33a42a118d080c..f66f3099a5f2f9 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1342,7 +1342,7 @@ _Py_Specialize_CallFunction( PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins) -{; +{ int fail; if (PyCFunction_CheckExact(callable)) { fail = specialize_c_call(callable, instr, nargs, cache, builtins); From 8a74cff332be6eb7fb4e00bff647714ef1eb6abc Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 02:05:23 +0800 Subject: [PATCH 43/49] fix wrong return code --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index f66f3099a5f2f9..fa7431ec886898 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1259,7 +1259,7 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, cache1->obj = builtin_len; // borrowed *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_LEN, _Py_OPARG(*instr)); - return 1; + return 0; } *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_O, _Py_OPARG(*instr)); From 907c5cb07b2320ff08416de585ba4a64ec58fef8 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 02:44:37 +0800 Subject: [PATCH 44/49] partly address code review --- Python/specialize.c | 48 ++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index fa7431ec886898..810aaaee878b66 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1237,6 +1237,28 @@ _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *ins } +static int +builtin_call_fail_kind(int ml_flags) +{ + switch (ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | + METH_KEYWORDS | METH_METHOD)) { + case METH_VARARGS: + return SPEC_FAIL_PYCFUNCTION; + case METH_VARARGS | METH_KEYWORDS: + return SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS; + case METH_FASTCALL | METH_KEYWORDS: + return SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS; + case METH_NOARGS: + return SPEC_FAIL_PYCFUNCTION_NOARGS; + /* This case should never happen with PyCFunctionObject -- only + PyMethodObject. See zlib.compressobj()'s methods for an example. + */ + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + default: + return SPEC_FAIL_BAD_CALL_FLAGS; + } +} + static int specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins) @@ -1281,35 +1303,17 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, _Py_OPARG(*instr)); return 0; } - case METH_VARARGS: - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION); - return 1; - case METH_VARARGS | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, - SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS); - return 1; - case METH_FASTCALL | METH_KEYWORDS: - SPECIALIZATION_FAIL(CALL_FUNCTION, - SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS); - return 1; - case METH_NOARGS: - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYCFUNCTION_NOARGS); - return 1; - /* This case should never happen with PyCFunctionObject -- only - PyMethodObject. See zlib.compressobj()'s methods for an example. - */ - case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: default: - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_BAD_CALL_FLAGS); + SPECIALIZATION_FAIL(CALL_FUNCTION, + builtin_call_fail_kind(PyCFunction_GET_FLAGS(callable))); return 1; } } #if COLLECT_SPECIALIZATION_STATS_DETAILED static int -c_call_fail_kind(PyObject *callable) +call_fail_kind(PyObject *callable) { -/* These might be implemented in the future. Collecting stats for now. */ if (PyFunction_Check(callable)) { return SPEC_FAIL_PYTHON_FUNCTION; } @@ -1348,7 +1352,7 @@ _Py_Specialize_CallFunction( fail = specialize_c_call(callable, instr, nargs, cache, builtins); } else { - SPECIALIZATION_FAIL(CALL_FUNCTION, c_call_fail_kind(callable)); + SPECIALIZATION_FAIL(CALL_FUNCTION, call_fail_kind(callable)); fail = 1; } _PyAdaptiveEntry *cache0 = &cache->adaptive; From 3e094851c39c1eaca28f31d23d06734dec21391c Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 11:42:25 +0800 Subject: [PATCH 45/49] Exclude function if not collecting stats --- Python/specialize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 810aaaee878b66..ee573d29a474eb 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1236,7 +1236,7 @@ _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *ins return 0; } - +#if COLLECT_SPECIALIZATION_STATS_DETAILED static int builtin_call_fail_kind(int ml_flags) { @@ -1258,6 +1258,7 @@ builtin_call_fail_kind(int ml_flags) return SPEC_FAIL_BAD_CALL_FLAGS; } } +#endif static int specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, From b28d85c1611eee41675d6d09d46bd2747c2eacfe Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 18:43:42 +0800 Subject: [PATCH 46/49] check for error first --- Python/ceval.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 6e846575d9585a..27ae2cc4979cce 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4809,7 +4809,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); Py_ssize_t len_i = PyObject_Length(TOP()); - PyObject *res = (len_i >= 0) ? PyLong_FromSsize_t(len_i) : NULL; + if (len_i < 0) { + goto error; + } + PyObject *res = PyLong_FromSsize_t(len_i); assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); /* Clear the stack of the function object. */ @@ -4836,7 +4839,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); int retval = PyObject_IsInstance(SECOND(), TOP()); - PyObject *res = (retval >= 0) ? PyBool_FromLong(retval) : NULL; + if (retval < 0) { + goto error; + } + PyObject *res = PyBool_FromLong(retval); assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); /* Clear the stack of the function object. */ From 617424bd8d0a72398ad424972dd750945e37982c Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 18:48:59 +0800 Subject: [PATCH 47/49] Record cache hit earlier --- Python/ceval.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 27ae2cc4979cce..e5015e835a0e76 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4807,6 +4807,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *callable = SECOND(); DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); Py_ssize_t len_i = PyObject_Length(TOP()); if (len_i < 0) { @@ -4819,8 +4821,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(POP()); Py_DECREF(callable); SET_TOP(res); - record_cache_hit(cache0); - STAT_INC(CALL_FUNCTION, hit); if (res == NULL) { goto error; } @@ -4837,6 +4837,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *callable = THIRD(); DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); int retval = PyObject_IsInstance(SECOND(), TOP()); if (retval < 0) { @@ -4849,8 +4851,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(POP()); Py_DECREF(POP()); Py_DECREF(callable); - SET_TOP(res); - record_cache_hit(cache0); STAT_INC(CALL_FUNCTION, hit); if (res == NULL) { goto error; From e73b69f4ae969467e5ce2f1f7de52e9fe7b63ac5 Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 18:58:57 +0800 Subject: [PATCH 48/49] fix isinstance bug --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index e5015e835a0e76..0aa8997d48886f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4851,7 +4851,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(POP()); Py_DECREF(POP()); Py_DECREF(callable); - STAT_INC(CALL_FUNCTION, hit); + SET_TOP(res); if (res == NULL) { goto error; } From f1917207a0498847adc026c4f72b9c9fa05ceb4f Mon Sep 17 00:00:00 2001 From: Fidget-Spinner <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 19 Oct 2021 22:05:43 +0800 Subject: [PATCH 49/49] apply suggestions from review: move up cache hits --- Python/ceval.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 0aa8997d48886f..e16a1378b15a55 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4741,6 +4741,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *callable = SECOND(); DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL_FUNCTION); + _PyAdaptiveEntry *cache0 = &GET_CACHE()[0].adaptive; + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); PyObject *arg = POP(); @@ -4751,7 +4754,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(arg); Py_DECREF(callable); SET_TOP(res); - STAT_INC(CALL_FUNCTION, hit); if (res == NULL) { goto error; } @@ -4769,6 +4771,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL, CALL_FUNCTION); + record_cache_hit(cache0); + STAT_INC(CALL_FUNCTION, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); /* res = func(self, args, nargs) */ @@ -4784,8 +4788,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(x); } PUSH(res); - record_cache_hit(cache0); - STAT_INC(CALL_FUNCTION, hit); if (res == NULL) { /* Not deopting because this doesn't mean our optimization was wrong. `res` can be NULL for valid reasons. Eg. getattr(x,