From 317db3a6760c785d931174e81dfe0179f5b4d9c1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 5 Nov 2023 10:18:10 +0000 Subject: [PATCH 01/12] Replace jumps with deopts in tier 2 --- Include/internal/pycore_opcode_metadata.h | 140 +++++++++++++--------- Python/abstract_interp_cases.c.h | 14 ++- Python/bytecodes.c | 32 +++-- Python/executor_cases.c.h | 28 +++-- Python/generated_cases.c.h | 16 +-- Python/optimizer.c | 46 +++---- 6 files changed, 164 insertions(+), 112 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index bd5a401adf11bf..b87fddbd2142b6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -76,45 +76,49 @@ #define _STORE_ATTR_SLOT 348 #define _SPECIALIZE_COMPARE_OP 349 #define _COMPARE_OP 350 -#define _IS_NONE 351 -#define _SPECIALIZE_FOR_ITER 352 -#define _FOR_ITER 353 -#define _ITER_CHECK_LIST 354 -#define _ITER_JUMP_LIST 355 -#define _GUARD_NOT_EXHAUSTED_LIST 356 -#define _ITER_NEXT_LIST 357 -#define _ITER_CHECK_TUPLE 358 -#define _ITER_JUMP_TUPLE 359 -#define _GUARD_NOT_EXHAUSTED_TUPLE 360 -#define _ITER_NEXT_TUPLE 361 -#define _ITER_CHECK_RANGE 362 -#define _ITER_JUMP_RANGE 363 -#define _GUARD_NOT_EXHAUSTED_RANGE 364 -#define _ITER_NEXT_RANGE 365 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 366 -#define _GUARD_KEYS_VERSION 367 -#define _LOAD_ATTR_METHOD_WITH_VALUES 368 -#define _LOAD_ATTR_METHOD_NO_DICT 369 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 370 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 371 -#define _CHECK_ATTR_METHOD_LAZY_DICT 372 -#define _LOAD_ATTR_METHOD_LAZY_DICT 373 -#define _SPECIALIZE_CALL 374 -#define _CALL 375 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 376 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 377 -#define _CHECK_PEP_523 378 -#define _CHECK_FUNCTION_EXACT_ARGS 379 -#define _CHECK_STACK_SPACE 380 -#define _INIT_CALL_PY_EXACT_ARGS 381 -#define _PUSH_FRAME 382 -#define _SPECIALIZE_BINARY_OP 383 -#define _BINARY_OP 384 -#define _POP_JUMP_IF_FALSE 385 -#define _POP_JUMP_IF_TRUE 386 -#define _JUMP_TO_TOP 387 -#define _SAVE_RETURN_OFFSET 388 -#define _INSERT 389 +#define _POP_JUMP_IF_FALSE 351 +#define _POP_JUMP_IF_TRUE 352 +#define _IS_NONE 353 +#define _SPECIALIZE_FOR_ITER 354 +#define _FOR_ITER 355 +#define _ITER_CHECK_LIST 356 +#define _ITER_JUMP_LIST 357 +#define _GUARD_NOT_EXHAUSTED_LIST 358 +#define _ITER_NEXT_LIST 359 +#define _ITER_CHECK_TUPLE 360 +#define _ITER_JUMP_TUPLE 361 +#define _GUARD_NOT_EXHAUSTED_TUPLE 362 +#define _ITER_NEXT_TUPLE 363 +#define _ITER_CHECK_RANGE 364 +#define _ITER_JUMP_RANGE 365 +#define _GUARD_NOT_EXHAUSTED_RANGE 366 +#define _ITER_NEXT_RANGE 367 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 368 +#define _GUARD_KEYS_VERSION 369 +#define _LOAD_ATTR_METHOD_WITH_VALUES 370 +#define _LOAD_ATTR_METHOD_NO_DICT 371 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 372 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 373 +#define _CHECK_ATTR_METHOD_LAZY_DICT 374 +#define _LOAD_ATTR_METHOD_LAZY_DICT 375 +#define _SPECIALIZE_CALL 376 +#define _CALL 377 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 378 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 379 +#define _CHECK_PEP_523 380 +#define _CHECK_FUNCTION_EXACT_ARGS 381 +#define _CHECK_STACK_SPACE 382 +#define _INIT_CALL_PY_EXACT_ARGS 383 +#define _PUSH_FRAME 384 +#define _SPECIALIZE_BINARY_OP 385 +#define _BINARY_OP 386 +#define _GUARD_IS_TRUE_POP 387 +#define _GUARD_IS_FALSE_POP 388 +#define _GUARD_IS_NONE_POP 389 +#define _GUARD_IS_NOT_NONE_POP 390 +#define _JUMP_TO_TOP 391 +#define _SAVE_RETURN_OFFSET 392 +#define _INSERT 393 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -504,12 +508,16 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case ENTER_EXECUTOR: return 0; - case POP_JUMP_IF_FALSE: + case _POP_JUMP_IF_FALSE: return 1; - case POP_JUMP_IF_TRUE: + case _POP_JUMP_IF_TRUE: return 1; case _IS_NONE: return 1; + case POP_JUMP_IF_TRUE: + return 1; + case POP_JUMP_IF_FALSE: + return 1; case POP_JUMP_IF_NONE: return 1; case POP_JUMP_IF_NOT_NONE: @@ -724,9 +732,13 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case RESERVED: return 0; - case _POP_JUMP_IF_FALSE: + case _GUARD_IS_TRUE_POP: return 1; - case _POP_JUMP_IF_TRUE: + case _GUARD_IS_FALSE_POP: + return 1; + case _GUARD_IS_NONE_POP: + return 1; + case _GUARD_IS_NOT_NONE_POP: return 1; case _JUMP_TO_TOP: return 0; @@ -1132,12 +1144,16 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case ENTER_EXECUTOR: return 0; - case POP_JUMP_IF_FALSE: + case _POP_JUMP_IF_FALSE: return 0; - case POP_JUMP_IF_TRUE: + case _POP_JUMP_IF_TRUE: return 0; case _IS_NONE: return 1; + case POP_JUMP_IF_TRUE: + return 0; + case POP_JUMP_IF_FALSE: + return 0; case POP_JUMP_IF_NONE: return 0; case POP_JUMP_IF_NOT_NONE: @@ -1352,9 +1368,13 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case RESERVED: return 0; - case _POP_JUMP_IF_FALSE: + case _GUARD_IS_TRUE_POP: return 0; - case _POP_JUMP_IF_TRUE: + case _GUARD_IS_FALSE_POP: + return 0; + case _GUARD_IS_NONE_POP: + return 0; + case _GUARD_IS_NOT_NONE_POP: return 0; case _JUMP_TO_TOP: return 0; @@ -1632,9 +1652,11 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [JUMP] = { true, 0, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [JUMP_NO_INTERRUPT] = { true, 0, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG }, - [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [_IS_NONE] = { true, INSTR_FMT_IX, 0 }, + [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [JUMP_BACKWARD_NO_INTERRUPT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, @@ -1742,8 +1764,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, [RESERVED] = { true, INSTR_FMT_IX, 0 }, - [_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [_GUARD_IS_TRUE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_FALSE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_NONE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_NOT_NONE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1863,6 +1887,10 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [CONTAINS_OP] = { .nuops = 1, .uops = { { CONTAINS_OP, 0, 0 } } }, [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { CHECK_EG_MATCH, 0, 0 } } }, [CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { CHECK_EXC_MATCH, 0, 0 } } }, + [POP_JUMP_IF_TRUE] = { .nuops = 1, .uops = { { _POP_JUMP_IF_TRUE, 0, 0 } } }, + [POP_JUMP_IF_FALSE] = { .nuops = 1, .uops = { { _POP_JUMP_IF_FALSE, 0, 0 } } }, + [POP_JUMP_IF_NONE] = { .nuops = 2, .uops = { { _IS_NONE, 0, 0 }, { _POP_JUMP_IF_TRUE, 0, 0 } } }, + [POP_JUMP_IF_NOT_NONE] = { .nuops = 2, .uops = { { _IS_NONE, 0, 0 }, { _POP_JUMP_IF_FALSE, 0, 0 } } }, [GET_LEN] = { .nuops = 1, .uops = { { GET_LEN, 0, 0 } } }, [MATCH_CLASS] = { .nuops = 1, .uops = { { MATCH_CLASS, 0, 0 } } }, [MATCH_MAPPING] = { .nuops = 1, .uops = { { MATCH_MAPPING, 0, 0 } } }, @@ -1964,6 +1992,8 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT", [_SPECIALIZE_COMPARE_OP] = "_SPECIALIZE_COMPARE_OP", [_COMPARE_OP] = "_COMPARE_OP", + [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", + [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", [_IS_NONE] = "_IS_NONE", [_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER", [_FOR_ITER] = "_FOR_ITER", @@ -1998,8 +2028,10 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_PUSH_FRAME] = "_PUSH_FRAME", [_SPECIALIZE_BINARY_OP] = "_SPECIALIZE_BINARY_OP", [_BINARY_OP] = "_BINARY_OP", - [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", - [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", + [_GUARD_IS_TRUE_POP] = "_GUARD_IS_TRUE_POP", + [_GUARD_IS_FALSE_POP] = "_GUARD_IS_FALSE_POP", + [_GUARD_IS_NONE_POP] = "_GUARD_IS_NONE_POP", + [_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP", [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_SAVE_RETURN_OFFSET] = "_SAVE_RETURN_OFFSET", [_INSERT] = "_INSERT", @@ -2247,8 +2279,8 @@ const uint8_t _PyOpcode_Caches[256] = { [LOAD_ATTR] = 9, [COMPARE_OP] = 1, [JUMP_BACKWARD] = 1, - [POP_JUMP_IF_FALSE] = 1, [POP_JUMP_IF_TRUE] = 1, + [POP_JUMP_IF_FALSE] = 1, [POP_JUMP_IF_NONE] = 1, [POP_JUMP_IF_NOT_NONE] = 1, [FOR_ITER] = 1, diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 384a11212b3a2c..0d6f330bb55af3 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -914,12 +914,22 @@ break; } - case _POP_JUMP_IF_FALSE: { + case _GUARD_IS_TRUE_POP: { STACK_SHRINK(1); break; } - case _POP_JUMP_IF_TRUE: { + case _GUARD_IS_FALSE_POP: { + STACK_SHRINK(1); + break; + } + + case _GUARD_IS_NONE_POP: { + STACK_SHRINK(1); + break; + } + + case _GUARD_IS_NOT_NONE_POP: { STACK_SHRINK(1); break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f879ea5850aede..6a050f45ae2391 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2384,7 +2384,7 @@ dummy_func( goto enter_tier_one; } - inst(POP_JUMP_IF_FALSE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION @@ -2393,7 +2393,7 @@ dummy_func( JUMPBY(oparg * flag); } - inst(POP_JUMP_IF_TRUE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_TRUE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsTrue(cond); #if ENABLE_SPECIALIZATION @@ -2412,9 +2412,13 @@ dummy_func( } } - macro(POP_JUMP_IF_NONE) = _IS_NONE + POP_JUMP_IF_TRUE; + macro(POP_JUMP_IF_TRUE) = _POP_JUMP_IF_TRUE; - macro(POP_JUMP_IF_NOT_NONE) = _IS_NONE + POP_JUMP_IF_FALSE; + macro(POP_JUMP_IF_FALSE) = _POP_JUMP_IF_FALSE; + + macro(POP_JUMP_IF_NONE) = _IS_NONE + _POP_JUMP_IF_TRUE; + + macro(POP_JUMP_IF_NOT_NONE) = _IS_NONE + _POP_JUMP_IF_FALSE; inst(JUMP_BACKWARD_NO_INTERRUPT, (--)) { /* This bytecode is used in the `yield from` or `await` loop. @@ -3979,16 +3983,20 @@ dummy_func( ///////// Tier-2 only opcodes ///////// - op(_POP_JUMP_IF_FALSE, (flag -- )) { - if (Py_IsFalse(flag)) { - next_uop = current_executor->trace + oparg; - } + op (_GUARD_IS_TRUE_POP, (flag -- )) { + DEOPT_IF(Py_IsFalse(flag)); } - op(_POP_JUMP_IF_TRUE, (flag -- )) { - if (Py_IsTrue(flag)) { - next_uop = current_executor->trace + oparg; - } + op (_GUARD_IS_FALSE_POP, (flag -- )) { + DEOPT_IF(Py_IsTrue(flag)); + } + + op (_GUARD_IS_NONE_POP, (val -- )) { + DEOPT_IF(!Py_IsNone(val)); + } + + op (_GUARD_IS_NOT_NONE_POP, (val -- )) { + DEOPT_IF(Py_IsNone(val)); } op(_JUMP_TO_TOP, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d94a7cc4be0052..185a3239520eda 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3215,22 +3215,34 @@ break; } - case _POP_JUMP_IF_FALSE: { + case _GUARD_IS_TRUE_POP: { PyObject *flag; flag = stack_pointer[-1]; - if (Py_IsFalse(flag)) { - next_uop = current_executor->trace + oparg; - } + DEOPT_IF(Py_IsFalse(flag), _GUARD_IS_TRUE_POP); STACK_SHRINK(1); break; } - case _POP_JUMP_IF_TRUE: { + case _GUARD_IS_FALSE_POP: { PyObject *flag; flag = stack_pointer[-1]; - if (Py_IsTrue(flag)) { - next_uop = current_executor->trace + oparg; - } + DEOPT_IF(Py_IsTrue(flag), _GUARD_IS_FALSE_POP); + STACK_SHRINK(1); + break; + } + + case _GUARD_IS_NONE_POP: { + PyObject *val; + val = stack_pointer[-1]; + DEOPT_IF(!Py_IsNone(val), _GUARD_IS_NONE_POP); + STACK_SHRINK(1); + break; + } + + case _GUARD_IS_NOT_NONE_POP: { + PyObject *val; + val = stack_pointer[-1]; + DEOPT_IF(Py_IsNone(val), _GUARD_IS_NOT_NONE_POP); STACK_SHRINK(1); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1c853047a260a0..6d779864954aa7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3452,14 +3452,14 @@ goto enter_tier_one; } - TARGET(POP_JUMP_IF_FALSE) { + TARGET(POP_JUMP_IF_TRUE) { _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; next_instr += 2; - INSTRUCTION_STATS(POP_JUMP_IF_FALSE); + INSTRUCTION_STATS(POP_JUMP_IF_TRUE); PyObject *cond; cond = stack_pointer[-1]; assert(PyBool_Check(cond)); - int flag = Py_IsFalse(cond); + int flag = Py_IsTrue(cond); #if ENABLE_SPECIALIZATION this_instr[1].cache = (this_instr[1].cache << 1) | flag; #endif @@ -3468,14 +3468,14 @@ DISPATCH(); } - TARGET(POP_JUMP_IF_TRUE) { + TARGET(POP_JUMP_IF_FALSE) { _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; next_instr += 2; - INSTRUCTION_STATS(POP_JUMP_IF_TRUE); + INSTRUCTION_STATS(POP_JUMP_IF_FALSE); PyObject *cond; cond = stack_pointer[-1]; assert(PyBool_Check(cond)); - int flag = Py_IsTrue(cond); + int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION this_instr[1].cache = (this_instr[1].cache << 1) | flag; #endif @@ -3502,7 +3502,7 @@ Py_DECREF(value); } } - // POP_JUMP_IF_TRUE + // _POP_JUMP_IF_TRUE cond = b; { assert(PyBool_Check(cond)); @@ -3534,7 +3534,7 @@ Py_DECREF(value); } } - // POP_JUMP_IF_FALSE + // _POP_JUMP_IF_FALSE cond = b; { assert(PyBool_Check(cond)); diff --git a/Python/optimizer.c b/Python/optimizer.c index 065e1274671993..a866ea75e502d5 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -392,6 +392,18 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, }; +static const uint16_t +BRANCH_TO_GUARDS[4][2] = { + [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP, + [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_FALSE_POP, + [POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_FALSE_POP, + [POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_TRUE_POP, + [POP_JUMP_IF_NONE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_NOT_NONE_POP, + [POP_JUMP_IF_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NONE_POP, + [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_NONE_POP, + [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP, +}; + #define TRACE_STACK_SIZE 5 /* Returns 1 on success, @@ -526,45 +538,23 @@ translate_bytecode_to_trace( } switch (opcode) { - case POP_JUMP_IF_NONE: - { - RESERVE(2, 2); - ADD_TO_TRACE(_IS_NONE, 0, 0); - opcode = POP_JUMP_IF_TRUE; - goto pop_jump_if_bool; - } - case POP_JUMP_IF_NOT_NONE: - { - RESERVE(2, 2); - ADD_TO_TRACE(_IS_NONE, 0, 0); - opcode = POP_JUMP_IF_FALSE; - goto pop_jump_if_bool; - } - case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: { -pop_jump_if_bool: - RESERVE(1, 2); - max_length -= 2; // Really the start of the stubs + RESERVE(1, 0); int counter = instr[1].cache; int bitcount = _Py_popcount32(counter); - bool jump_likely = bitcount > 8; - bool jump_sense = opcode == POP_JUMP_IF_TRUE; - uint32_t uopcode = jump_sense ^ jump_likely ? - _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE; + int jump_likely = bitcount > 8; + uint32_t uopcode = BRANCH_TO_GUARDS[opcode - POP_JUMP_IF_FALSE][jump_likely]; _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; - _Py_CODEUNIT *target_instr = next_instr + oparg; - _Py_CODEUNIT *stub_target = jump_likely ? next_instr : target_instr; - DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, sense=%d, uopcode=%s\n", + DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n", uop_name(opcode), oparg, - counter, bitcount, jump_likely, jump_sense, uop_name(uopcode)); + counter, bitcount, jump_likely, uop_name(uopcode)); ADD_TO_TRACE(uopcode, max_length, 0); - ADD_TO_STUB(max_length, _SET_IP, INSTR_IP(stub_target, code), 0); - ADD_TO_STUB(max_length + 1, _EXIT_TRACE, 0, 0); if (jump_likely) { + _Py_CODEUNIT *target_instr = next_instr + oparg; DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n", instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code)); instr = target_instr; From 2db61ccc925c329710cef63cdd09197482fee210 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 11 Nov 2023 11:17:50 +0000 Subject: [PATCH 02/12] Fewer special cases of uop names --- Python/optimizer.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index a866ea75e502d5..44106b5b87ecc2 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -384,7 +384,7 @@ PyTypeObject _PyUOpExecutor_Type = { .tp_methods = executor_methods, }; -/* TO DO -- Generate this table */ +/* TO DO -- Generate these tables */ static const uint16_t _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE, @@ -764,6 +764,16 @@ translate_bytecode_to_trace( #define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31))) #define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31))) +static bool +is_branch(opcode) { + /* Currently there are no jumps in the buffer, + * but we expect the optimizer to add them + * in the future. */ + assert(opcode != _POP_JUMP_IF_FALSE && + opcode != _POP_JUMP_IF_TRUE); + return false; +} + /* Count the number of used uops, and mark them in the bit vector `used`. * This can be done in a single pass using simple reachability analysis, * as there are no backward jumps. @@ -785,16 +795,13 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) } /* All other micro-ops fall through, so i+1 is reachable */ SET_BIT(used, i+1); - switch(opcode) { - case NOP: - /* Don't count NOPs as used */ - count--; - UNSET_BIT(used, i); - break; - case _POP_JUMP_IF_FALSE: - case _POP_JUMP_IF_TRUE: - /* Mark target as reachable */ - SET_BIT(used, buffer[i].oparg); + if (is_branch(opcode)) { + /* Mark target as reachable */ + SET_BIT(used, buffer[i].oparg); + } + if (opcode == NOP) { + count--; + UNSET_BIT(used, i); } } return count; From f36132831dc1d8696436ce107d6003c6514cd17d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 11 Nov 2023 11:59:00 +0000 Subject: [PATCH 03/12] Fix refleak --- Python/bytecodes.c | 14 ++++++++++++-- Python/executor_cases.c.h | 14 ++++++++++++-- Python/optimizer.c | 2 +- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6a050f45ae2391..b7ddfda014aa17 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3985,18 +3985,28 @@ dummy_func( op (_GUARD_IS_TRUE_POP, (flag -- )) { DEOPT_IF(Py_IsFalse(flag)); + assert(Py_IsTrue(flag)); } op (_GUARD_IS_FALSE_POP, (flag -- )) { DEOPT_IF(Py_IsTrue(flag)); + assert(Py_IsFalse(flag)); } op (_GUARD_IS_NONE_POP, (val -- )) { - DEOPT_IF(!Py_IsNone(val)); + if (!Py_IsNone(val)) { + Py_DECREF(val); + DEOPT_IF(true); + } } op (_GUARD_IS_NOT_NONE_POP, (val -- )) { - DEOPT_IF(Py_IsNone(val)); + if (Py_IsNone(val)) { + DEOPT_IF(true); + } + else { + Py_DECREF(val); + } } op(_JUMP_TO_TOP, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 185a3239520eda..46cdb2b185398b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3219,6 +3219,7 @@ PyObject *flag; flag = stack_pointer[-1]; DEOPT_IF(Py_IsFalse(flag), _GUARD_IS_TRUE_POP); + assert(Py_IsTrue(flag)); STACK_SHRINK(1); break; } @@ -3227,6 +3228,7 @@ PyObject *flag; flag = stack_pointer[-1]; DEOPT_IF(Py_IsTrue(flag), _GUARD_IS_FALSE_POP); + assert(Py_IsFalse(flag)); STACK_SHRINK(1); break; } @@ -3234,7 +3236,10 @@ case _GUARD_IS_NONE_POP: { PyObject *val; val = stack_pointer[-1]; - DEOPT_IF(!Py_IsNone(val), _GUARD_IS_NONE_POP); + if (!Py_IsNone(val)) { + Py_DECREF(val); + DEOPT_IF(true, _GUARD_IS_NONE_POP); + } STACK_SHRINK(1); break; } @@ -3242,7 +3247,12 @@ case _GUARD_IS_NOT_NONE_POP: { PyObject *val; val = stack_pointer[-1]; - DEOPT_IF(Py_IsNone(val), _GUARD_IS_NOT_NONE_POP); + if (Py_IsNone(val)) { + DEOPT_IF(true, _GUARD_IS_NOT_NONE_POP); + } + else { + Py_DECREF(val); + } STACK_SHRINK(1); break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 44106b5b87ecc2..90c2eeb828e72b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -765,7 +765,7 @@ translate_bytecode_to_trace( #define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31))) static bool -is_branch(opcode) { +is_branch(int opcode) { /* Currently there are no jumps in the buffer, * but we expect the optimizer to add them * in the future. */ From 69cee8daae30c8363bee399641fc61130e3eab35 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 11 Nov 2023 15:20:50 +0000 Subject: [PATCH 04/12] Update test --- Lib/test/test_capi/test_misc.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index d526bbf62dda0b..fe5c36c0c0dec9 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2610,7 +2610,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_FALSE", uops) + self.assertIn("_GUARD_IS_TRUE_POP", uops) def test_pop_jump_if_none(self): def testfunc(a): @@ -2625,7 +2625,7 @@ def testfunc(a): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_TRUE", uops) + self.assertIn("_GUARD_IS_NOT_NONE_POP", uops) def test_pop_jump_if_not_none(self): def testfunc(a): @@ -2641,7 +2641,7 @@ def testfunc(a): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_FALSE", uops) + self.assertIn("_GUARD_IS_NONE_POP", uops) def test_pop_jump_if_true(self): def testfunc(n): @@ -2656,7 +2656,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_TRUE", uops) + self.assertIn("_GUARD_IS_FALSE_POP", uops) def test_jump_backward(self): def testfunc(n): @@ -2806,7 +2806,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_TRUE", uops) + self.assertIn("_GUARD_IS_FALSE_POP", uops) if __name__ == "__main__": From bb9a1a2e4d7b60e601525d7ed05dede441c10fa3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 12 Nov 2023 02:15:14 +0000 Subject: [PATCH 05/12] Add target field to uop IR --- Include/internal/pycore_uops.h | 5 +++-- Python/optimizer.c | 30 ++++++++++++++++++------------ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index d8a7d978f1304e..0ecbd2dfd1af73 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -13,8 +13,9 @@ extern "C" { #define _Py_UOP_MAX_TRACE_LENGTH 128 typedef struct { - uint32_t opcode; - uint32_t oparg; + uint16_t opcode; + uint16_t oparg; + uint32_t target; uint64_t operand; // A cache entry } _PyUOpInstruction; diff --git a/Python/optimizer.c b/Python/optimizer.c index 1b94fa2c3e757a..5a696144ec27ee 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -446,7 +446,8 @@ translate_bytecode_to_trace( #define DPRINTF(level, ...) #endif -#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND) \ + +#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ DPRINTF(2, \ " ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \ uop_name(OPCODE), \ @@ -458,6 +459,7 @@ translate_bytecode_to_trace( trace[trace_length].opcode = (OPCODE); \ trace[trace_length].oparg = (OPARG); \ trace[trace_length].operand = (OPERAND); \ + trace[trace_length].target = (TARGET); \ trace_length++; #define INSTR_IP(INSTR, CODE) \ @@ -493,7 +495,7 @@ translate_bytecode_to_trace( if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ OPT_STAT_INC(trace_stack_overflow); \ - ADD_TO_TRACE(_SET_IP, 0, 0); \ + ADD_TO_TRACE(_SET_IP, 0, 0, 0); \ goto done; \ } \ trace_stack[trace_stack_depth].code = code; \ @@ -517,18 +519,22 @@ translate_bytecode_to_trace( top: // Jump here after _PUSH_FRAME or likely branches for (;;) { RESERVE_RAW(3, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE - ADD_TO_TRACE(_SET_IP, INSTR_IP(instr, code), 0); - ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0); + ADD_TO_TRACE(_SET_IP, INSTR_IP(instr, code), 0, 0); + ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, INSTR_IP(instr, code)); uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; uint32_t extras = 0; - while (opcode == EXTENDED_ARG) { + if (opcode == EXTENDED_ARG) { instr++; extras += 1; opcode = instr->op.code; oparg = (oparg << 8) | instr->op.arg; + if (opcode == EXTENDED_ARG) { + instr--; + goto done; + } } if (opcode == ENTER_EXECUTOR) { @@ -554,7 +560,7 @@ translate_bytecode_to_trace( DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n", uop_name(opcode), oparg, counter, bitcount, jump_likely, uop_name(uopcode)); - ADD_TO_TRACE(uopcode, max_length, 0); + ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(instr, code)); if (jump_likely) { _Py_CODEUNIT *target_instr = next_instr + oparg; DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n", @@ -569,7 +575,7 @@ translate_bytecode_to_trace( { if (instr + 2 - oparg == initial_instr && code == initial_code) { RESERVE(1, 0); - ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0); + ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); } else { OPT_STAT_INC(inner_loop); @@ -653,7 +659,7 @@ translate_bytecode_to_trace( expansion->uops[i].offset); Py_FatalError("garbled expansion"); } - ADD_TO_TRACE(uop, oparg, operand); + ADD_TO_TRACE(uop, oparg, operand, INSTR_IP(instr, code)); if (uop == _POP_FRAME) { TRACE_STACK_POP(); DPRINTF(2, @@ -682,7 +688,7 @@ translate_bytecode_to_trace( PyUnicode_AsUTF8(new_code->co_filename), new_code->co_firstlineno); OPT_STAT_INC(recursive_call); - ADD_TO_TRACE(_SET_IP, 0, 0); + ADD_TO_TRACE(_SET_IP, 0, 0, 0); goto done; } if (new_code->co_version != func_version) { @@ -690,7 +696,7 @@ translate_bytecode_to_trace( // Perhaps it may happen again, so don't bother tracing. // TODO: Reason about this -- is it better to bail or not? DPRINTF(2, "Bailing because co_version != func_version\n"); - ADD_TO_TRACE(_SET_IP, 0, 0); + ADD_TO_TRACE(_SET_IP, 0, 0, 0); goto done; } // Increment IP to the return address @@ -707,7 +713,7 @@ translate_bytecode_to_trace( 2 * INSTR_IP(instr, code)); goto top; } - ADD_TO_TRACE(_SET_IP, 0, 0); + ADD_TO_TRACE(_SET_IP, 0, 0, 0); goto done; } } @@ -732,7 +738,7 @@ translate_bytecode_to_trace( assert(code == initial_code); // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE if (trace_length > 4) { - ADD_TO_TRACE(_EXIT_TRACE, 0, 0); + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, INSTR_IP(instr, code)); DPRINTF(1, "Created a trace for %s (%s:%d) at byte offset %d -- length %d+%d\n", PyUnicode_AsUTF8(code->co_qualname), From 9cda14b21cf1af77ab3a91d2c27c1b8c09d6fb25 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 12 Nov 2023 04:48:01 +0000 Subject: [PATCH 06/12] Remove redundant SET_IP and _CHECK_VALIDITY micro-ops --- Python/ceval.c | 3 +++ Python/optimizer.c | 36 +++++++++++++----------------------- Python/optimizer_analysis.c | 20 +++++++++----------- 3 files changed, 25 insertions(+), 34 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 42100393f60ff7..ec783d5fa9690c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1066,11 +1066,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int deoptimize: // On DEOPT_IF we just repeat the last instruction. // This presumes nothing was popped from the stack (nor pushed). + //assert(next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable) == frame->instr_ptr); DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 " @ %d]\n", opcode, operand, (int)(next_uop-current_executor->trace-1)); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); UOP_STAT_INC(opcode, miss); frame->return_offset = 0; // Dispatch to frame->instr_ptr _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); Py_DECREF(current_executor); // Fall through // Jump here from ENTER_EXECUTOR @@ -1081,6 +1083,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Jump here from _EXIT_TRACE exit_trace: _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); Py_DECREF(current_executor); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); goto enter_tier_one; diff --git a/Python/optimizer.c b/Python/optimizer.c index 5a696144ec27ee..19322c105fc276 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -465,18 +465,6 @@ translate_bytecode_to_trace( #define INSTR_IP(INSTR, CODE) \ ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive))) -#define ADD_TO_STUB(INDEX, OPCODE, OPARG, OPERAND) \ - DPRINTF(2, " ADD_TO_STUB(%d, %s, %d, %" PRIu64 ")\n", \ - (INDEX), \ - uop_name(OPCODE), \ - (OPARG), \ - (uint64_t)(OPERAND)); \ - assert(reserved > 0); \ - reserved--; \ - trace[(INDEX)].opcode = (OPCODE); \ - trace[(INDEX)].oparg = (OPARG); \ - trace[(INDEX)].operand = (OPERAND); - // Reserve space for n uops #define RESERVE_RAW(n, opname) \ if (trace_length + (n) > max_length) { \ @@ -485,7 +473,7 @@ translate_bytecode_to_trace( OPT_STAT_INC(trace_too_long); \ goto done; \ } \ - reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest + reserved = (n); // Keep ADD_TO_TRACE honest // Reserve space for main+stub uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE #define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 3, uop_name(opcode)) @@ -495,7 +483,7 @@ translate_bytecode_to_trace( if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ OPT_STAT_INC(trace_stack_overflow); \ - ADD_TO_TRACE(_SET_IP, 0, 0, 0); \ + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \ goto done; \ } \ trace_stack[trace_stack_depth].code = code; \ @@ -515,17 +503,19 @@ translate_bytecode_to_trace( PyUnicode_AsUTF8(code->co_filename), code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); - + uint32_t target = 0; top: // Jump here after _PUSH_FRAME or likely branches for (;;) { + target = INSTR_IP(instr, code); RESERVE_RAW(3, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE - ADD_TO_TRACE(_SET_IP, INSTR_IP(instr, code), 0, 0); - ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, INSTR_IP(instr, code)); + ADD_TO_TRACE(_SET_IP, target, 0, target); + ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; uint32_t extras = 0; + if (opcode == EXTENDED_ARG) { instr++; extras += 1; @@ -560,7 +550,7 @@ translate_bytecode_to_trace( DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n", uop_name(opcode), oparg, counter, bitcount, jump_likely, uop_name(uopcode)); - ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(instr, code)); + ADD_TO_TRACE(uopcode, max_length, 0, target); if (jump_likely) { _Py_CODEUNIT *target_instr = next_instr + oparg; DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n", @@ -659,7 +649,7 @@ translate_bytecode_to_trace( expansion->uops[i].offset); Py_FatalError("garbled expansion"); } - ADD_TO_TRACE(uop, oparg, operand, INSTR_IP(instr, code)); + ADD_TO_TRACE(uop, oparg, operand, target); if (uop == _POP_FRAME) { TRACE_STACK_POP(); DPRINTF(2, @@ -688,7 +678,7 @@ translate_bytecode_to_trace( PyUnicode_AsUTF8(new_code->co_filename), new_code->co_firstlineno); OPT_STAT_INC(recursive_call); - ADD_TO_TRACE(_SET_IP, 0, 0, 0); + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } if (new_code->co_version != func_version) { @@ -696,7 +686,7 @@ translate_bytecode_to_trace( // Perhaps it may happen again, so don't bother tracing. // TODO: Reason about this -- is it better to bail or not? DPRINTF(2, "Bailing because co_version != func_version\n"); - ADD_TO_TRACE(_SET_IP, 0, 0, 0); + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } // Increment IP to the return address @@ -713,7 +703,7 @@ translate_bytecode_to_trace( 2 * INSTR_IP(instr, code)); goto top; } - ADD_TO_TRACE(_SET_IP, 0, 0, 0); + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } } @@ -738,7 +728,7 @@ translate_bytecode_to_trace( assert(code == initial_code); // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE if (trace_length > 4) { - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, INSTR_IP(instr, code)); + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); DPRINTF(1, "Created a trace for %s (%s:%d) at byte offset %d -- length %d+%d\n", PyUnicode_AsUTF8(code->co_qualname), diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 3c8596463fd2b4..2ac804bfa0f1b3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -17,21 +17,15 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { // Note that we don't enter stubs, those SET_IPs are needed. int last_set_ip = -1; - bool need_ip = true; bool maybe_invalid = false; for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; if (opcode == _SET_IP) { - if (!need_ip && last_set_ip >= 0) { - buffer[last_set_ip].opcode = NOP; - } - need_ip = false; + buffer[pc].opcode = NOP; last_set_ip = pc; } else if (opcode == _CHECK_VALIDITY) { if (maybe_invalid) { - /* Exiting the trace requires that IP is correct */ - need_ip = true; maybe_invalid = false; } else { @@ -42,12 +36,16 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) break; } else { - // If opcode has ERROR or DEOPT, set need_ip to true - if (_PyOpcode_opcode_metadata[opcode].flags & (HAS_ERROR_FLAG | HAS_DEOPT_FLAG) || opcode == _PUSH_FRAME) { - need_ip = true; - } if (_PyOpcode_opcode_metadata[opcode].flags & HAS_ESCAPES_FLAG) { maybe_invalid = true; + if (last_set_ip >= 0) { + buffer[last_set_ip].opcode = _SET_IP; + } + } + if ((_PyOpcode_opcode_metadata[opcode].flags & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { + if (last_set_ip >= 0) { + buffer[last_set_ip].opcode = _SET_IP; + } } } } From 6c61feb0f530e20120beac0fb8ea03cc9bfc552b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 12 Nov 2023 05:07:05 +0000 Subject: [PATCH 07/12] Extend whitelist of non-escaping API functions. --- Include/internal/pycore_opcode_metadata.h | 86 +++++++++++------------ Tools/cases_generator/flags.py | 10 +++ 2 files changed, 53 insertions(+), 43 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 69ac38829f9632..6afe8995d42e7a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1488,24 +1488,24 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [END_SEND] = { true, INSTR_FMT_IX, 0 }, [INSTRUMENTED_END_SEND] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNARY_NEGATIVE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [UNARY_NOT] = { true, INSTR_FMT_IX, 0 }, [_SPECIALIZE_TO_BOOL] = { true, INSTR_FMT_IXC, HAS_ESCAPES_FLAG }, [_TO_BOOL] = { true, INSTR_FMT_IXC0, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, - [TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, - [TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, + [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_GUARD_BOTH_INT] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, - [_BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [_BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [_BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG }, + [_BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG }, + [_BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG }, + [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, + [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, + [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, [_GUARD_BOTH_FLOAT] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, 0 }, [_BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, 0 }, @@ -1558,16 +1558,16 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [RERAISE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [END_ASYNC_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CLEANUP_THROW] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ASSERTION_ERROR] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [LOAD_ASSERTION_ERROR] = { true, INSTR_FMT_IX, 0 }, [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DELETE_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_UNPACK_SEQUENCE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_STORE_ATTR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ESCAPES_FLAG }, [_STORE_ATTR] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1632,8 +1632,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [_CHECK_ATTR_CLASS] = { true, INSTR_FMT_IXC0, HAS_DEOPT_FLAG }, - [_LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [_LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG }, + [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [_GUARD_DORV_VALUES] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, @@ -1648,7 +1648,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [COMPARE_OP_FLOAT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [COMPARE_OP_INT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [COMPARE_OP_STR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, + [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [CONTAINS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CHECK_EG_MATCH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CHECK_EXC_MATCH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1659,13 +1659,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [JUMP] = { true, 0, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [JUMP_NO_INTERRUPT] = { true, 0, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, - [_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, - [_IS_NONE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, - [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, - [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, - [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, - [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, + [_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [_IS_NONE] = { true, INSTR_FMT_IX, 0 }, + [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [JUMP_BACKWARD_NO_INTERRUPT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [GET_LEN] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [MATCH_CLASS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1681,13 +1681,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_ITER_JUMP_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [_GUARD_NOT_EXHAUSTED_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, - [_ITER_NEXT_LIST] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, - [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [_ITER_NEXT_LIST] = { true, INSTR_FMT_IX, 0 }, + [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG }, [_ITER_CHECK_TUPLE] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_ITER_JUMP_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [_GUARD_NOT_EXHAUSTED_TUPLE] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, - [_ITER_NEXT_TUPLE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, - [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [_ITER_NEXT_TUPLE] = { true, INSTR_FMT_IX, 0 }, + [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG }, [_ITER_CHECK_RANGE] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_ITER_JUMP_RANGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [_GUARD_NOT_EXHAUSTED_RANGE] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, @@ -1708,10 +1708,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [LOAD_ATTR_METHOD_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [_LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG }, + [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG }, + [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [_CHECK_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, @@ -1720,7 +1720,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_CALL] = { true, INSTR_FMT_IBC0, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, + [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_CHECK_PEP_523] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [_CHECK_FUNCTION_EXACT_ARGS] = { true, INSTR_FMT_IBC0, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [_CHECK_STACK_SPACE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, @@ -1729,7 +1729,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [CALL_PY_WITH_DEFAULTS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [CALL_TYPE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [CALL_TYPE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_TUPLE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1756,7 +1756,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [CONVERT_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [FORMAT_SIMPLE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FORMAT_WITH_SPEC] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, + [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_SPECIALIZE_BINARY_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_BINARY_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [BINARY_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1764,17 +1764,17 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [INSTRUMENTED_INSTRUCTION] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_JUMP_FORWARD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [INSTRUMENTED_JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG }, - [INSTRUMENTED_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, + [INSTRUMENTED_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, + [INSTRUMENTED_POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, + [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [CACHE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [RESERVED] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, - [_GUARD_IS_TRUE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [_GUARD_IS_FALSE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [_GUARD_IS_NONE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [_GUARD_IS_NOT_NONE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [_GUARD_IS_TRUE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_FALSE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_NONE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_NOT_NONE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 42e1763ad4d484..1b436e0924d9a4 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -31,6 +31,14 @@ "_PyLong_IsNonNegativeCompact", "_PyLong_CompactValue", "_Py_NewRef", + "_Py_IsImmortal", + "_Py_STR", + "_PyLong_Add", + "_PyLong_Multiply", + "_PyLong_Subtract", + "Py_NewRef", + "_PyList_ITEMS", + "_PyTuple_ITEMS", ) def makes_escaping_api_call(instr: parsing.Node) -> bool: @@ -48,6 +56,8 @@ def makes_escaping_api_call(instr: parsing.Node) -> bool: continue if tkn.text.endswith("Check"): continue + if tkn.text.startswith("Py_Is"): + continue if tkn.text.endswith("CheckExact"): continue if tkn.text in WHITELIST: From 0a093fdec35ae41eb8a3e77d4100a3f905164f88 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 12 Nov 2023 05:56:29 +0000 Subject: [PATCH 08/12] Add more functions to whitelist --- Include/internal/pycore_opcode_metadata.h | 16 ++++++++-------- Tools/cases_generator/flags.py | 20 ++++++++++++++++---- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 6afe8995d42e7a..cfbd2e920656c7 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1469,7 +1469,7 @@ extern const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SI const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [NOP] = { true, INSTR_FMT_IX, 0 }, [RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [RESUME_CHECK] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [RESUME_CHECK] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_CLOSURE] = { true, 0, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG }, @@ -1528,7 +1528,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [LIST_APPEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LIST_APPEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [SET_ADD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_STORE_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ESCAPES_FLAG }, [_STORE_SUBSCR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1593,7 +1593,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG }, - [COPY_FREE_VARS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, + [COPY_FREE_VARS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1658,7 +1658,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP] = { true, 0, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [JUMP_NO_INTERRUPT] = { true, 0, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG }, [_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [_IS_NONE] = { true, INSTR_FMT_IX, 0 }, @@ -1701,7 +1701,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [SETUP_CLEANUP] = { true, 0, 0 }, [SETUP_WITH] = { true, 0, 0 }, [POP_BLOCK] = { true, 0, 0 }, - [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_GUARD_KEYS_VERSION] = { true, INSTR_FMT_IXC0, HAS_DEOPT_FLAG }, [_LOAD_ATTR_METHOD_WITH_VALUES] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, @@ -1721,11 +1721,11 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_CHECK_PEP_523] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [_CHECK_PEP_523] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_CHECK_FUNCTION_EXACT_ARGS] = { true, INSTR_FMT_IBC0, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [_CHECK_STACK_SPACE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [_INIT_CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, - [_PUSH_FRAME] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [_PUSH_FRAME] = { true, INSTR_FMT_IX, 0 }, [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [CALL_PY_WITH_DEFAULTS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, @@ -1740,7 +1740,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ISINSTANCE] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, [CALL_METHOD_DESCRIPTOR_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 1b436e0924d9a4..991222c9e98360 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -5,7 +5,7 @@ import parsing from typing import AbstractSet -WHITELIST = ( +NON_ESCAPING_FUNCTIONS = ( "Py_INCREF", "_PyDictOrValues_IsValues", "_PyObject_DictOrValuesPointer", @@ -39,9 +39,20 @@ "Py_NewRef", "_PyList_ITEMS", "_PyTuple_ITEMS", + "_PyList_AppendTakeRef", + "_Py_atomic_load_uintptr_relaxed", + "_PyFrame_GetCode", ) +ESCAPING_FUNCTIONS = ( + "import_name", + "import_from", +) + + def makes_escaping_api_call(instr: parsing.Node) -> bool: + if "CALL_INTRINSIC" in instr.name: + return True; tkns = iter(instr.tokens) for tkn in tkns: if tkn.kind != lx.IDENTIFIER: @@ -52,6 +63,8 @@ def makes_escaping_api_call(instr: parsing.Node) -> bool: return False if next_tkn.kind != lx.LPAREN: continue + if tkn.text in ESCAPING_FUNCTIONS: + return True if not tkn.text.startswith("Py") and not tkn.text.startswith("_Py"): continue if tkn.text.endswith("Check"): @@ -60,7 +73,7 @@ def makes_escaping_api_call(instr: parsing.Node) -> bool: continue if tkn.text.endswith("CheckExact"): continue - if tkn.text in WHITELIST: + if tkn.text in NON_ESCAPING_FUNCTIONS: continue return True return False @@ -111,8 +124,7 @@ def fromInstruction(instr: parsing.Node) -> "InstructionFlags": or variable_used(instr, "resume_with_error") ), HAS_ESCAPES_FLAG=( - variable_used(instr, "tstate") - or makes_escaping_api_call(instr) + makes_escaping_api_call(instr) ), ) From 222ee176012bd2a58d3c035f804b286c7aa86232 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 12 Nov 2023 06:23:59 +0000 Subject: [PATCH 09/12] Tidy up --- Python/ceval.c | 1 - Python/optimizer.c | 14 ++------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 13d1e897c24305..d684c72cc9e302 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1062,7 +1062,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int deoptimize: // On DEOPT_IF we just repeat the last instruction. // This presumes nothing was popped from the stack (nor pushed). - //assert(next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable) == frame->instr_ptr); DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 " @ %d]\n", opcode, operand, (int)(next_uop-current_executor->trace-1)); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); UOP_STAT_INC(opcode, miss); diff --git a/Python/optimizer.c b/Python/optimizer.c index 552b038a8ed0e0..e14ad89bbe2921 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -394,7 +394,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { }; static const uint16_t -BRANCH_TO_GUARDS[4][2] = { +BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP, [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_FALSE_POP, [POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_FALSE_POP, @@ -545,7 +545,7 @@ translate_bytecode_to_trace( int counter = instr[1].cache; int bitcount = _Py_popcount32(counter); int jump_likely = bitcount > 8; - uint32_t uopcode = BRANCH_TO_GUARDS[opcode - POP_JUMP_IF_FALSE][jump_likely]; + uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely]; _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n", uop_name(opcode), oparg, @@ -762,16 +762,6 @@ translate_bytecode_to_trace( #define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31))) #define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31))) -static bool -is_branch(int opcode) { - /* Currently there are no jumps in the buffer, - * but we expect the optimizer to add them - * in the future. */ - assert(opcode != _POP_JUMP_IF_FALSE && - opcode != _POP_JUMP_IF_TRUE); - return false; -} - /* Count the number of used uops, and mark them in the bit vector `used`. * This can be done in a single pass using simple reachability analysis, * as there are no backward jumps. From 4d1e2950cf5724f97389f2d0920e8152255a45c6 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 12 Nov 2023 06:32:39 +0000 Subject: [PATCH 10/12] Add another non-escaping function --- Include/internal/pycore_opcode_metadata.h | 2 +- Tools/cases_generator/flags.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index cfbd2e920656c7..4d98b23df5d927 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1723,7 +1723,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_CHECK_PEP_523] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_CHECK_FUNCTION_EXACT_ARGS] = { true, INSTR_FMT_IBC0, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [_CHECK_STACK_SPACE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [_CHECK_STACK_SPACE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [_INIT_CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_PUSH_FRAME] = { true, INSTR_FMT_IX, 0 }, [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 991222c9e98360..df57a8c72681fd 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -42,6 +42,7 @@ "_PyList_AppendTakeRef", "_Py_atomic_load_uintptr_relaxed", "_PyFrame_GetCode", + "_PyThreadState_HasStackSpace", ) ESCAPING_FUNCTIONS = ( From b5737c805b8d2569403179799458c1d5947ce158 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 15 Nov 2023 09:43:10 +0000 Subject: [PATCH 11/12] Fix type annotation --- Tools/cases_generator/flags.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index df57a8c72681fd..0066c9e74512c3 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -51,7 +51,7 @@ ) -def makes_escaping_api_call(instr: parsing.Node) -> bool: +def makes_escaping_api_call(instr: parsing.InstDef) -> bool: if "CALL_INTRINSIC" in instr.name: return True; tkns = iter(instr.tokens) @@ -98,7 +98,7 @@ def __post_init__(self) -> None: self.bitmask = {name: (1 << i) for i, name in enumerate(self.names())} @staticmethod - def fromInstruction(instr: parsing.Node) -> "InstructionFlags": + def fromInstruction(instr: parsing.InstDef) -> "InstructionFlags": has_free = ( variable_used(instr, "PyCell_New") or variable_used(instr, "PyCell_GET") From 8e2f0f64f20409c1957bf9c99547d01b3168982f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 15 Nov 2023 14:11:24 +0000 Subject: [PATCH 12/12] Address review comments --- Python/optimizer_analysis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2ac804bfa0f1b3..0f9bc085f22f1c 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -36,13 +36,13 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) break; } else { - if (_PyOpcode_opcode_metadata[opcode].flags & HAS_ESCAPES_FLAG) { + if (OPCODE_HAS_ESCAPES(opcode)) { maybe_invalid = true; if (last_set_ip >= 0) { buffer[last_set_ip].opcode = _SET_IP; } } - if ((_PyOpcode_opcode_metadata[opcode].flags & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { + if (OPCODE_HAS_ERROR(opcode) || opcode == _PUSH_FRAME) { if (last_set_ip >= 0) { buffer[last_set_ip].opcode = _SET_IP; }