From 392c520b883228aaf8a75b44a3c029c456b355cf Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 20 Jun 2024 12:03:29 +0100 Subject: [PATCH 1/9] Make BINARY_SUBSCR_GETITEM suitable for tier 2 --- Include/internal/pycore_opcode_metadata.h | 3 +- Include/internal/pycore_uop_ids.h | 287 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 8 + Python/bytecodes.c | 31 ++- Python/executor_cases.c.h | 52 +++- Python/generated_cases.c.h | 72 ++++-- Python/optimizer_cases.c.h | 16 +- 7 files changed, 287 insertions(+), 182 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 7b495238d7a9f3..d83ca1d906c6fa 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -505,7 +505,7 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case BINARY_SUBSCR_DICT: return 1; case BINARY_SUBSCR_GETITEM: - return 1; + return 0; case BINARY_SUBSCR_LIST_INT: return 1; case BINARY_SUBSCR_STR_INT: @@ -1230,6 +1230,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_SLICE] = { .nuops = 1, .uops = { { _BINARY_SLICE, 0, 0 } } }, [BINARY_SUBSCR] = { .nuops = 1, .uops = { { _BINARY_SUBSCR, 0, 0 } } }, [BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_DICT, 0, 0 } } }, + [BINARY_SUBSCR_GETITEM] = { .nuops = 4, .uops = { { _CHECK_PEP_523, 0, 0 }, { _BINARY_SUBSCR_GET_FUNC, 0, 0 }, { _BINARY_SUBSCR_INIT_CALL, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, [BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_LIST_INT, 0, 0 } } }, [BINARY_SUBSCR_STR_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_STR_INT, 0, 0 } } }, [BINARY_SUBSCR_TUPLE_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_TUPLE_INT, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 85e689c096949f..55a417cc99f13b 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -22,7 +22,8 @@ extern "C" { #define _BINARY_SLICE BINARY_SLICE #define _BINARY_SUBSCR 310 #define _BINARY_SUBSCR_DICT BINARY_SUBSCR_DICT -#define _BINARY_SUBSCR_GETITEM BINARY_SUBSCR_GETITEM +#define _BINARY_SUBSCR_GET_FUNC 311 +#define _BINARY_SUBSCR_INIT_CALL 312 #define _BINARY_SUBSCR_LIST_INT BINARY_SUBSCR_LIST_INT #define _BINARY_SUBSCR_STR_INT BINARY_SUBSCR_STR_INT #define _BINARY_SUBSCR_TUPLE_INT BINARY_SUBSCR_TUPLE_INT @@ -33,51 +34,51 @@ extern "C" { #define _BUILD_SLICE BUILD_SLICE #define _BUILD_STRING BUILD_STRING #define _BUILD_TUPLE BUILD_TUPLE -#define _CALL 311 +#define _CALL 313 #define _CALL_ALLOC_AND_ENTER_INIT CALL_ALLOC_AND_ENTER_INIT -#define _CALL_BUILTIN_CLASS 312 -#define _CALL_BUILTIN_FAST 313 -#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 314 -#define _CALL_BUILTIN_O 315 +#define _CALL_BUILTIN_CLASS 314 +#define _CALL_BUILTIN_FAST 315 +#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 316 +#define _CALL_BUILTIN_O 317 #define _CALL_FUNCTION_EX CALL_FUNCTION_EX #define _CALL_INTRINSIC_1 CALL_INTRINSIC_1 #define _CALL_INTRINSIC_2 CALL_INTRINSIC_2 #define _CALL_ISINSTANCE CALL_ISINSTANCE #define _CALL_KW CALL_KW #define _CALL_LEN CALL_LEN -#define _CALL_METHOD_DESCRIPTOR_FAST 316 -#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 317 -#define _CALL_METHOD_DESCRIPTOR_NOARGS 318 -#define _CALL_METHOD_DESCRIPTOR_O 319 -#define _CALL_NON_PY_GENERAL 320 -#define _CALL_STR_1 321 -#define _CALL_TUPLE_1 322 +#define _CALL_METHOD_DESCRIPTOR_FAST 318 +#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 319 +#define _CALL_METHOD_DESCRIPTOR_NOARGS 320 +#define _CALL_METHOD_DESCRIPTOR_O 321 +#define _CALL_NON_PY_GENERAL 322 +#define _CALL_STR_1 323 +#define _CALL_TUPLE_1 324 #define _CALL_TYPE_1 CALL_TYPE_1 -#define _CHECK_ATTR_CLASS 323 -#define _CHECK_ATTR_METHOD_LAZY_DICT 324 -#define _CHECK_ATTR_MODULE 325 -#define _CHECK_ATTR_WITH_HINT 326 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 327 +#define _CHECK_ATTR_CLASS 325 +#define _CHECK_ATTR_METHOD_LAZY_DICT 326 +#define _CHECK_ATTR_MODULE 327 +#define _CHECK_ATTR_WITH_HINT 328 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 329 #define _CHECK_EG_MATCH CHECK_EG_MATCH #define _CHECK_EXC_MATCH CHECK_EXC_MATCH -#define _CHECK_FUNCTION 328 -#define _CHECK_FUNCTION_EXACT_ARGS 329 -#define _CHECK_FUNCTION_VERSION 330 -#define _CHECK_IS_NOT_PY_CALLABLE 331 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 332 -#define _CHECK_METHOD_VERSION 333 -#define _CHECK_PEP_523 334 -#define _CHECK_PERIODIC 335 -#define _CHECK_STACK_SPACE 336 -#define _CHECK_STACK_SPACE_OPERAND 337 -#define _CHECK_VALIDITY 338 -#define _CHECK_VALIDITY_AND_SET_IP 339 -#define _COLD_EXIT 340 -#define _COMPARE_OP 341 -#define _COMPARE_OP_FLOAT 342 -#define _COMPARE_OP_INT 343 -#define _COMPARE_OP_STR 344 -#define _CONTAINS_OP 345 +#define _CHECK_FUNCTION 330 +#define _CHECK_FUNCTION_EXACT_ARGS 331 +#define _CHECK_FUNCTION_VERSION 332 +#define _CHECK_IS_NOT_PY_CALLABLE 333 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 334 +#define _CHECK_METHOD_VERSION 335 +#define _CHECK_PEP_523 336 +#define _CHECK_PERIODIC 337 +#define _CHECK_STACK_SPACE 338 +#define _CHECK_STACK_SPACE_OPERAND 339 +#define _CHECK_VALIDITY 340 +#define _CHECK_VALIDITY_AND_SET_IP 341 +#define _COLD_EXIT 342 +#define _COMPARE_OP 343 +#define _COMPARE_OP_FLOAT 344 +#define _COMPARE_OP_INT 345 +#define _COMPARE_OP_STR 346 +#define _CONTAINS_OP 347 #define _CONTAINS_OP_DICT CONTAINS_OP_DICT #define _CONTAINS_OP_SET CONTAINS_OP_SET #define _CONVERT_VALUE CONVERT_VALUE @@ -89,53 +90,53 @@ extern "C" { #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 346 +#define _DEOPT 348 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DYNAMIC_EXIT 347 +#define _DYNAMIC_EXIT 349 #define _END_SEND END_SEND -#define _ERROR_POP_N 348 +#define _ERROR_POP_N 350 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 349 -#define _FATAL_ERROR 350 +#define _EXPAND_METHOD 351 +#define _FATAL_ERROR 352 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 351 -#define _FOR_ITER_GEN_FRAME 352 -#define _FOR_ITER_TIER_TWO 353 +#define _FOR_ITER 353 +#define _FOR_ITER_GEN_FRAME 354 +#define _FOR_ITER_TIER_TWO 355 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BOTH_FLOAT 354 -#define _GUARD_BOTH_INT 355 -#define _GUARD_BOTH_UNICODE 356 -#define _GUARD_BUILTINS_VERSION 357 -#define _GUARD_DORV_NO_DICT 358 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 359 -#define _GUARD_GLOBALS_VERSION 360 -#define _GUARD_IS_FALSE_POP 361 -#define _GUARD_IS_NONE_POP 362 -#define _GUARD_IS_NOT_NONE_POP 363 -#define _GUARD_IS_TRUE_POP 364 -#define _GUARD_KEYS_VERSION 365 -#define _GUARD_NOS_FLOAT 366 -#define _GUARD_NOS_INT 367 -#define _GUARD_NOT_EXHAUSTED_LIST 368 -#define _GUARD_NOT_EXHAUSTED_RANGE 369 -#define _GUARD_NOT_EXHAUSTED_TUPLE 370 -#define _GUARD_TOS_FLOAT 371 -#define _GUARD_TOS_INT 372 -#define _GUARD_TYPE_VERSION 373 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 374 -#define _INIT_CALL_PY_EXACT_ARGS 375 -#define _INIT_CALL_PY_EXACT_ARGS_0 376 -#define _INIT_CALL_PY_EXACT_ARGS_1 377 -#define _INIT_CALL_PY_EXACT_ARGS_2 378 -#define _INIT_CALL_PY_EXACT_ARGS_3 379 -#define _INIT_CALL_PY_EXACT_ARGS_4 380 +#define _GUARD_BOTH_FLOAT 356 +#define _GUARD_BOTH_INT 357 +#define _GUARD_BOTH_UNICODE 358 +#define _GUARD_BUILTINS_VERSION 359 +#define _GUARD_DORV_NO_DICT 360 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 361 +#define _GUARD_GLOBALS_VERSION 362 +#define _GUARD_IS_FALSE_POP 363 +#define _GUARD_IS_NONE_POP 364 +#define _GUARD_IS_NOT_NONE_POP 365 +#define _GUARD_IS_TRUE_POP 366 +#define _GUARD_KEYS_VERSION 367 +#define _GUARD_NOS_FLOAT 368 +#define _GUARD_NOS_INT 369 +#define _GUARD_NOT_EXHAUSTED_LIST 370 +#define _GUARD_NOT_EXHAUSTED_RANGE 371 +#define _GUARD_NOT_EXHAUSTED_TUPLE 372 +#define _GUARD_TOS_FLOAT 373 +#define _GUARD_TOS_INT 374 +#define _GUARD_TYPE_VERSION 375 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 376 +#define _INIT_CALL_PY_EXACT_ARGS 377 +#define _INIT_CALL_PY_EXACT_ARGS_0 378 +#define _INIT_CALL_PY_EXACT_ARGS_1 379 +#define _INIT_CALL_PY_EXACT_ARGS_2 380 +#define _INIT_CALL_PY_EXACT_ARGS_3 381 +#define _INIT_CALL_PY_EXACT_ARGS_4 382 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -152,65 +153,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 381 -#define _IS_NONE 382 +#define _INTERNAL_INCREMENT_OPT_COUNTER 383 +#define _IS_NONE 384 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 383 -#define _ITER_CHECK_RANGE 384 -#define _ITER_CHECK_TUPLE 385 -#define _ITER_JUMP_LIST 386 -#define _ITER_JUMP_RANGE 387 -#define _ITER_JUMP_TUPLE 388 -#define _ITER_NEXT_LIST 389 -#define _ITER_NEXT_RANGE 390 -#define _ITER_NEXT_TUPLE 391 -#define _JUMP_TO_TOP 392 +#define _ITER_CHECK_LIST 385 +#define _ITER_CHECK_RANGE 386 +#define _ITER_CHECK_TUPLE 387 +#define _ITER_JUMP_LIST 388 +#define _ITER_JUMP_RANGE 389 +#define _ITER_JUMP_TUPLE 390 +#define _ITER_NEXT_LIST 391 +#define _ITER_NEXT_RANGE 392 +#define _ITER_NEXT_TUPLE 393 +#define _JUMP_TO_TOP 394 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 393 -#define _LOAD_ATTR_CLASS 394 -#define _LOAD_ATTR_CLASS_0 395 -#define _LOAD_ATTR_CLASS_1 396 +#define _LOAD_ATTR 395 +#define _LOAD_ATTR_CLASS 396 +#define _LOAD_ATTR_CLASS_0 397 +#define _LOAD_ATTR_CLASS_1 398 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 397 -#define _LOAD_ATTR_INSTANCE_VALUE_0 398 -#define _LOAD_ATTR_INSTANCE_VALUE_1 399 -#define _LOAD_ATTR_METHOD_LAZY_DICT 400 -#define _LOAD_ATTR_METHOD_NO_DICT 401 -#define _LOAD_ATTR_METHOD_WITH_VALUES 402 -#define _LOAD_ATTR_MODULE 403 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 404 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 405 +#define _LOAD_ATTR_INSTANCE_VALUE 399 +#define _LOAD_ATTR_INSTANCE_VALUE_0 400 +#define _LOAD_ATTR_INSTANCE_VALUE_1 401 +#define _LOAD_ATTR_METHOD_LAZY_DICT 402 +#define _LOAD_ATTR_METHOD_NO_DICT 403 +#define _LOAD_ATTR_METHOD_WITH_VALUES 404 +#define _LOAD_ATTR_MODULE 405 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 406 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 407 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY -#define _LOAD_ATTR_SLOT 406 -#define _LOAD_ATTR_SLOT_0 407 -#define _LOAD_ATTR_SLOT_1 408 -#define _LOAD_ATTR_WITH_HINT 409 +#define _LOAD_ATTR_SLOT 408 +#define _LOAD_ATTR_SLOT_0 409 +#define _LOAD_ATTR_SLOT_1 410 +#define _LOAD_ATTR_WITH_HINT 411 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 410 -#define _LOAD_CONST_INLINE_BORROW 411 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 412 -#define _LOAD_CONST_INLINE_WITH_NULL 413 +#define _LOAD_CONST_INLINE 412 +#define _LOAD_CONST_INLINE_BORROW 413 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 414 +#define _LOAD_CONST_INLINE_WITH_NULL 415 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 414 -#define _LOAD_FAST_0 415 -#define _LOAD_FAST_1 416 -#define _LOAD_FAST_2 417 -#define _LOAD_FAST_3 418 -#define _LOAD_FAST_4 419 -#define _LOAD_FAST_5 420 -#define _LOAD_FAST_6 421 -#define _LOAD_FAST_7 422 +#define _LOAD_FAST 416 +#define _LOAD_FAST_0 417 +#define _LOAD_FAST_1 418 +#define _LOAD_FAST_2 419 +#define _LOAD_FAST_3 420 +#define _LOAD_FAST_4 421 +#define _LOAD_FAST_5 422 +#define _LOAD_FAST_6 423 +#define _LOAD_FAST_7 424 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 423 -#define _LOAD_GLOBAL_BUILTINS 424 -#define _LOAD_GLOBAL_MODULE 425 +#define _LOAD_GLOBAL 425 +#define _LOAD_GLOBAL_BUILTINS 426 +#define _LOAD_GLOBAL_MODULE 427 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -225,51 +226,51 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 426 -#define _POP_JUMP_IF_TRUE 427 +#define _POP_JUMP_IF_FALSE 428 +#define _POP_JUMP_IF_TRUE 429 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 428 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 430 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 429 +#define _PUSH_FRAME 431 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 430 -#define _REPLACE_WITH_TRUE 431 +#define _PY_FRAME_GENERAL 432 +#define _REPLACE_WITH_TRUE 433 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 432 -#define _SEND 433 +#define _SAVE_RETURN_OFFSET 434 +#define _SEND 435 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 434 -#define _STORE_ATTR 435 -#define _STORE_ATTR_INSTANCE_VALUE 436 -#define _STORE_ATTR_SLOT 437 -#define _STORE_ATTR_WITH_HINT 438 +#define _START_EXECUTOR 436 +#define _STORE_ATTR 437 +#define _STORE_ATTR_INSTANCE_VALUE 438 +#define _STORE_ATTR_SLOT 439 +#define _STORE_ATTR_WITH_HINT 440 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 439 -#define _STORE_FAST_0 440 -#define _STORE_FAST_1 441 -#define _STORE_FAST_2 442 -#define _STORE_FAST_3 443 -#define _STORE_FAST_4 444 -#define _STORE_FAST_5 445 -#define _STORE_FAST_6 446 -#define _STORE_FAST_7 447 +#define _STORE_FAST 441 +#define _STORE_FAST_0 442 +#define _STORE_FAST_1 443 +#define _STORE_FAST_2 444 +#define _STORE_FAST_3 445 +#define _STORE_FAST_4 446 +#define _STORE_FAST_5 447 +#define _STORE_FAST_6 448 +#define _STORE_FAST_7 449 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 448 +#define _STORE_SUBSCR 450 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 449 -#define _TO_BOOL 450 +#define _TIER2_RESUME_CHECK 451 +#define _TO_BOOL 452 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -279,13 +280,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 451 +#define _UNPACK_SEQUENCE 453 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 451 +#define MAX_UOP_ID 453 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5aef6ba6825933..3065fbaee4865b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -79,6 +79,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_GET_FUNC] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_INIT_CALL] = 0, [_LIST_APPEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -281,6 +283,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_BINARY_SLICE] = "_BINARY_SLICE", [_BINARY_SUBSCR] = "_BINARY_SUBSCR", [_BINARY_SUBSCR_DICT] = "_BINARY_SUBSCR_DICT", + [_BINARY_SUBSCR_GET_FUNC] = "_BINARY_SUBSCR_GET_FUNC", + [_BINARY_SUBSCR_INIT_CALL] = "_BINARY_SUBSCR_INIT_CALL", [_BINARY_SUBSCR_LIST_INT] = "_BINARY_SUBSCR_LIST_INT", [_BINARY_SUBSCR_STR_INT] = "_BINARY_SUBSCR_STR_INT", [_BINARY_SUBSCR_TUPLE_INT] = "_BINARY_SUBSCR_TUPLE_INT", @@ -636,6 +640,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _BINARY_SUBSCR_DICT: return 2; + case _BINARY_SUBSCR_GET_FUNC: + return 2; + case _BINARY_SUBSCR_INIT_CALL: + return 3; case _LIST_APPEND: return 2 + (oparg-1); case _SET_ADD: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 50444bcc0d200c..9ef39fe316af5c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -681,30 +681,37 @@ dummy_func( ERROR_IF(rc <= 0, error); // not found or error } - inst(BINARY_SUBSCR_GETITEM, (unused/1, container, sub -- unused)) { - DEOPT_IF(tstate->interp->eval_frame); + op(_BINARY_SUBSCR_GET_FUNC, (container, unused -- container, unused, getitem)) { PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *cached = ht->_spec_cache.getitem; - DEOPT_IF(cached == NULL); - assert(PyFunction_Check(cached)); - PyFunctionObject *getitem = (PyFunctionObject *)cached; + getitem = ht->_spec_cache.getitem; + DEOPT_IF(getitem == NULL); + assert(PyFunction_Check(getitem)); uint32_t cached_version = ht->_spec_cache.getitem_version; - DEOPT_IF(getitem->func_version != cached_version); - PyCodeObject *code = (PyCodeObject *)getitem->func_code; + DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); STAT_INC(BINARY_SUBSCR, hit); Py_INCREF(getitem); - _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2); - STACK_SHRINK(2); + } + + op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem --new_frame: _PyInterpreterFrame* )) { + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + SYNC_SP(); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; - frame->return_offset = (uint16_t)(next_instr - this_instr); - DISPATCH_INLINED(new_frame); + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); } + macro(BINARY_SUBSCR_GETITEM) = + unused/1 + // Skip over the counter + _CHECK_PEP_523 + + _BINARY_SUBSCR_GET_FUNC + + _BINARY_SUBSCR_INIT_CALL + + _PUSH_FRAME; + inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ERROR_IF(_PyList_AppendTakeRef((PyListObject *)list, v) < 0, error); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b4e5261f57ab8b..8e2772dea58372 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -828,7 +828,57 @@ break; } - /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ + case _BINARY_SUBSCR_GET_FUNC: { + PyObject *container; + PyObject *getitem; + container = stack_pointer[-2]; + PyTypeObject *tp = Py_TYPE(container); + if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + getitem = ht->_spec_cache.getitem; + if (getitem == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + assert(PyFunction_Check(getitem)); + uint32_t cached_version = ht->_spec_cache.getitem_version; + if (((PyFunctionObject *)getitem)->func_version != cached_version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + assert(code->co_argcount == 2); + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(BINARY_SUBSCR, hit); + Py_INCREF(getitem); + stack_pointer[0] = getitem; + stack_pointer += 1; + break; + } + + case _BINARY_SUBSCR_INIT_CALL: { + PyObject *getitem; + PyObject *sub; + PyObject *container; + _PyInterpreterFrame *new_frame; + getitem = stack_pointer[-1]; + sub = stack_pointer[-2]; + container = stack_pointer[-3]; + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + stack_pointer += -3; + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + stack_pointer[0] = (PyObject *)new_frame; + stack_pointer += 1; + break; + } case _LIST_APPEND: { PyObject *v; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3980f9852e6396..bce234c8f6b658 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -391,36 +391,60 @@ } TARGET(BINARY_SUBSCR_GETITEM) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM); static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size"); - PyObject *sub; PyObject *container; + PyObject *getitem; + PyObject *sub; + _PyInterpreterFrame *new_frame; /* Skip 1 cache entry */ - sub = stack_pointer[-1]; + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); + } + // _BINARY_SUBSCR_GET_FUNC container = stack_pointer[-2]; - DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); - PyTypeObject *tp = Py_TYPE(container); - DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *cached = ht->_spec_cache.getitem; - DEOPT_IF(cached == NULL, BINARY_SUBSCR); - assert(PyFunction_Check(cached)); - PyFunctionObject *getitem = (PyFunctionObject *)cached; - uint32_t cached_version = ht->_spec_cache.getitem_version; - DEOPT_IF(getitem->func_version != cached_version, BINARY_SUBSCR); - PyCodeObject *code = (PyCodeObject *)getitem->func_code; - assert(code->co_argcount == 2); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); - STAT_INC(BINARY_SUBSCR, hit); - Py_INCREF(getitem); - _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2); - STACK_SHRINK(2); - new_frame->localsplus[0] = container; - new_frame->localsplus[1] = sub; - frame->return_offset = (uint16_t)(next_instr - this_instr); - DISPATCH_INLINED(new_frame); + { + PyTypeObject *tp = Py_TYPE(container); + DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + getitem = ht->_spec_cache.getitem; + DEOPT_IF(getitem == NULL, BINARY_SUBSCR); + assert(PyFunction_Check(getitem)); + uint32_t cached_version = ht->_spec_cache.getitem_version; + DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + assert(code->co_argcount == 2); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); + Py_INCREF(getitem); + } + // _BINARY_SUBSCR_INIT_CALL + sub = stack_pointer[-1]; + { + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + stack_pointer += -2; + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + } + // _PUSH_FRAME + { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + LLTRACE_RESUME_FRAME(); + } + DISPATCH(); } TARGET(BINARY_SUBSCR_LIST_INT) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 7274bd2a6fc02b..ec358a0a3d3a38 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -511,7 +511,21 @@ break; } - /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 */ + case _BINARY_SUBSCR_GET_FUNC: { + _Py_UopsSymbol *getitem; + getitem = sym_new_not_null(ctx); + stack_pointer[0] = getitem; + stack_pointer += 1; + break; + } + + case _BINARY_SUBSCR_INIT_CALL: { + _PyInterpreterFrame *new_frame; + new_frame = sym_new_not_null(ctx); + stack_pointer[-3] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -2; + break; + } case _LIST_APPEND: { stack_pointer += -1; From 2af2a919174e5cee5b9ae8341460443ee72945c8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 20 Jun 2024 12:51:31 +0100 Subject: [PATCH 2/9] Add guard for code object when projecting traces through calls --- Include/internal/pycore_uop_ids.h | 191 +++++++++++++------------ Include/internal/pycore_uop_metadata.h | 4 + Python/bytecodes.c | 5 +- Python/executor_cases.c.h | 14 +- Python/generated_cases.c.h | 2 +- Python/optimizer.c | 4 + Python/optimizer_cases.c.h | 4 + 7 files changed, 124 insertions(+), 100 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 55a417cc99f13b..1b674ba4358d48 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -114,29 +114,30 @@ extern "C" { #define _GUARD_BOTH_INT 357 #define _GUARD_BOTH_UNICODE 358 #define _GUARD_BUILTINS_VERSION 359 -#define _GUARD_DORV_NO_DICT 360 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 361 -#define _GUARD_GLOBALS_VERSION 362 -#define _GUARD_IS_FALSE_POP 363 -#define _GUARD_IS_NONE_POP 364 -#define _GUARD_IS_NOT_NONE_POP 365 -#define _GUARD_IS_TRUE_POP 366 -#define _GUARD_KEYS_VERSION 367 -#define _GUARD_NOS_FLOAT 368 -#define _GUARD_NOS_INT 369 -#define _GUARD_NOT_EXHAUSTED_LIST 370 -#define _GUARD_NOT_EXHAUSTED_RANGE 371 -#define _GUARD_NOT_EXHAUSTED_TUPLE 372 -#define _GUARD_TOS_FLOAT 373 -#define _GUARD_TOS_INT 374 -#define _GUARD_TYPE_VERSION 375 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 376 -#define _INIT_CALL_PY_EXACT_ARGS 377 -#define _INIT_CALL_PY_EXACT_ARGS_0 378 -#define _INIT_CALL_PY_EXACT_ARGS_1 379 -#define _INIT_CALL_PY_EXACT_ARGS_2 380 -#define _INIT_CALL_PY_EXACT_ARGS_3 381 -#define _INIT_CALL_PY_EXACT_ARGS_4 382 +#define _GUARD_CODE 360 +#define _GUARD_DORV_NO_DICT 361 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 362 +#define _GUARD_GLOBALS_VERSION 363 +#define _GUARD_IS_FALSE_POP 364 +#define _GUARD_IS_NONE_POP 365 +#define _GUARD_IS_NOT_NONE_POP 366 +#define _GUARD_IS_TRUE_POP 367 +#define _GUARD_KEYS_VERSION 368 +#define _GUARD_NOS_FLOAT 369 +#define _GUARD_NOS_INT 370 +#define _GUARD_NOT_EXHAUSTED_LIST 371 +#define _GUARD_NOT_EXHAUSTED_RANGE 372 +#define _GUARD_NOT_EXHAUSTED_TUPLE 373 +#define _GUARD_TOS_FLOAT 374 +#define _GUARD_TOS_INT 375 +#define _GUARD_TYPE_VERSION 376 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 377 +#define _INIT_CALL_PY_EXACT_ARGS 378 +#define _INIT_CALL_PY_EXACT_ARGS_0 379 +#define _INIT_CALL_PY_EXACT_ARGS_1 380 +#define _INIT_CALL_PY_EXACT_ARGS_2 381 +#define _INIT_CALL_PY_EXACT_ARGS_3 382 +#define _INIT_CALL_PY_EXACT_ARGS_4 383 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -153,65 +154,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 383 -#define _IS_NONE 384 +#define _INTERNAL_INCREMENT_OPT_COUNTER 384 +#define _IS_NONE 385 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 385 -#define _ITER_CHECK_RANGE 386 -#define _ITER_CHECK_TUPLE 387 -#define _ITER_JUMP_LIST 388 -#define _ITER_JUMP_RANGE 389 -#define _ITER_JUMP_TUPLE 390 -#define _ITER_NEXT_LIST 391 -#define _ITER_NEXT_RANGE 392 -#define _ITER_NEXT_TUPLE 393 -#define _JUMP_TO_TOP 394 +#define _ITER_CHECK_LIST 386 +#define _ITER_CHECK_RANGE 387 +#define _ITER_CHECK_TUPLE 388 +#define _ITER_JUMP_LIST 389 +#define _ITER_JUMP_RANGE 390 +#define _ITER_JUMP_TUPLE 391 +#define _ITER_NEXT_LIST 392 +#define _ITER_NEXT_RANGE 393 +#define _ITER_NEXT_TUPLE 394 +#define _JUMP_TO_TOP 395 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 395 -#define _LOAD_ATTR_CLASS 396 -#define _LOAD_ATTR_CLASS_0 397 -#define _LOAD_ATTR_CLASS_1 398 +#define _LOAD_ATTR 396 +#define _LOAD_ATTR_CLASS 397 +#define _LOAD_ATTR_CLASS_0 398 +#define _LOAD_ATTR_CLASS_1 399 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 399 -#define _LOAD_ATTR_INSTANCE_VALUE_0 400 -#define _LOAD_ATTR_INSTANCE_VALUE_1 401 -#define _LOAD_ATTR_METHOD_LAZY_DICT 402 -#define _LOAD_ATTR_METHOD_NO_DICT 403 -#define _LOAD_ATTR_METHOD_WITH_VALUES 404 -#define _LOAD_ATTR_MODULE 405 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 406 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 407 +#define _LOAD_ATTR_INSTANCE_VALUE 400 +#define _LOAD_ATTR_INSTANCE_VALUE_0 401 +#define _LOAD_ATTR_INSTANCE_VALUE_1 402 +#define _LOAD_ATTR_METHOD_LAZY_DICT 403 +#define _LOAD_ATTR_METHOD_NO_DICT 404 +#define _LOAD_ATTR_METHOD_WITH_VALUES 405 +#define _LOAD_ATTR_MODULE 406 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 407 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 408 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY -#define _LOAD_ATTR_SLOT 408 -#define _LOAD_ATTR_SLOT_0 409 -#define _LOAD_ATTR_SLOT_1 410 -#define _LOAD_ATTR_WITH_HINT 411 +#define _LOAD_ATTR_SLOT 409 +#define _LOAD_ATTR_SLOT_0 410 +#define _LOAD_ATTR_SLOT_1 411 +#define _LOAD_ATTR_WITH_HINT 412 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 412 -#define _LOAD_CONST_INLINE_BORROW 413 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 414 -#define _LOAD_CONST_INLINE_WITH_NULL 415 +#define _LOAD_CONST_INLINE 413 +#define _LOAD_CONST_INLINE_BORROW 414 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 415 +#define _LOAD_CONST_INLINE_WITH_NULL 416 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 416 -#define _LOAD_FAST_0 417 -#define _LOAD_FAST_1 418 -#define _LOAD_FAST_2 419 -#define _LOAD_FAST_3 420 -#define _LOAD_FAST_4 421 -#define _LOAD_FAST_5 422 -#define _LOAD_FAST_6 423 -#define _LOAD_FAST_7 424 +#define _LOAD_FAST 417 +#define _LOAD_FAST_0 418 +#define _LOAD_FAST_1 419 +#define _LOAD_FAST_2 420 +#define _LOAD_FAST_3 421 +#define _LOAD_FAST_4 422 +#define _LOAD_FAST_5 423 +#define _LOAD_FAST_6 424 +#define _LOAD_FAST_7 425 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 425 -#define _LOAD_GLOBAL_BUILTINS 426 -#define _LOAD_GLOBAL_MODULE 427 +#define _LOAD_GLOBAL 426 +#define _LOAD_GLOBAL_BUILTINS 427 +#define _LOAD_GLOBAL_MODULE 428 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -226,51 +227,51 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 428 -#define _POP_JUMP_IF_TRUE 429 +#define _POP_JUMP_IF_FALSE 429 +#define _POP_JUMP_IF_TRUE 430 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 430 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 431 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 431 +#define _PUSH_FRAME 432 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 432 -#define _REPLACE_WITH_TRUE 433 +#define _PY_FRAME_GENERAL 433 +#define _REPLACE_WITH_TRUE 434 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 434 -#define _SEND 435 +#define _SAVE_RETURN_OFFSET 435 +#define _SEND 436 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 436 -#define _STORE_ATTR 437 -#define _STORE_ATTR_INSTANCE_VALUE 438 -#define _STORE_ATTR_SLOT 439 -#define _STORE_ATTR_WITH_HINT 440 +#define _START_EXECUTOR 437 +#define _STORE_ATTR 438 +#define _STORE_ATTR_INSTANCE_VALUE 439 +#define _STORE_ATTR_SLOT 440 +#define _STORE_ATTR_WITH_HINT 441 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 441 -#define _STORE_FAST_0 442 -#define _STORE_FAST_1 443 -#define _STORE_FAST_2 444 -#define _STORE_FAST_3 445 -#define _STORE_FAST_4 446 -#define _STORE_FAST_5 447 -#define _STORE_FAST_6 448 -#define _STORE_FAST_7 449 +#define _STORE_FAST 442 +#define _STORE_FAST_0 443 +#define _STORE_FAST_1 444 +#define _STORE_FAST_2 445 +#define _STORE_FAST_3 446 +#define _STORE_FAST_4 447 +#define _STORE_FAST_5 448 +#define _STORE_FAST_6 449 +#define _STORE_FAST_7 450 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 450 +#define _STORE_SUBSCR 451 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 451 -#define _TO_BOOL 452 +#define _TIER2_RESUME_CHECK 452 +#define _TO_BOOL 453 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -280,13 +281,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 453 +#define _UNPACK_SEQUENCE 454 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 453 +#define MAX_UOP_ID 454 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3065fbaee4865b..635afa48728437 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -258,6 +258,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_DYNAMIC_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_START_EXECUTOR] = HAS_DEOPT_FLAG, + [_GUARD_CODE] = HAS_DEOPT_FLAG, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, @@ -370,6 +371,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_BOTH_INT] = "_GUARD_BOTH_INT", [_GUARD_BOTH_UNICODE] = "_GUARD_BOTH_UNICODE", [_GUARD_BUILTINS_VERSION] = "_GUARD_BUILTINS_VERSION", + [_GUARD_CODE] = "_GUARD_CODE", [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", @@ -998,6 +1000,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _START_EXECUTOR: return 0; + case _GUARD_CODE: + return 0; case _FATAL_ERROR: return 0; case _CHECK_VALIDITY_AND_SET_IP: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9ef39fe316af5c..75793127d96cb5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -699,7 +699,6 @@ dummy_func( op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem --new_frame: _PyInterpreterFrame* )) { new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); - SYNC_SP(); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); @@ -4300,6 +4299,10 @@ dummy_func( DEOPT_IF(!((_PyExecutorObject *)executor)->vm_data.valid); } + tier2 op(_GUARD_CODE, (version/2 -- )) { + DEOPT_IF(((PyCodeObject *)frame->f_executable)->co_version != version); + } + tier2 op(_FATAL_ERROR, (--)) { assert(0); Py_FatalError("Fatal error uop executed."); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8e2772dea58372..1ea0745a2e9490 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -871,12 +871,11 @@ sub = stack_pointer[-2]; container = stack_pointer[-3]; new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); - stack_pointer += -3; new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); - stack_pointer[0] = (PyObject *)new_frame; - stack_pointer += 1; + stack_pointer[-3] = (PyObject *)new_frame; + stack_pointer += -2; break; } @@ -4558,6 +4557,15 @@ break; } + case _GUARD_CODE: { + uint32_t version = (uint32_t)CURRENT_OPERAND(); + if (((PyCodeObject *)frame->f_executable)->co_version != version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + case _FATAL_ERROR: { assert(0); Py_FatalError("Fatal error uop executed."); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index bce234c8f6b658..88086bf5128290 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -425,7 +425,6 @@ sub = stack_pointer[-1]; { new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); - stack_pointer += -2; new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); @@ -435,6 +434,7 @@ // Write it out explicitly because it's subtly different. // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); + stack_pointer += -2; _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->previous = frame; CALL_STAT_INC(inlined_py_calls); diff --git a/Python/optimizer.c b/Python/optimizer.c index c9b187d2e108dd..30393ae465f873 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -893,9 +893,13 @@ translate_bytecode_to_trace( operand = 0; } ADD_TO_TRACE(uop, oparg, operand, target); + /* We need to guard that the runtime code object is + * the same one we are projecting into */ code = new_code; func = new_func; instr = _PyCode_CODE(code); + assert(code->co_version != 0); + ADD_TO_TRACE(_GUARD_CODE, 0, code->co_version, 0); DPRINTF(2, "Continuing in %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index ec358a0a3d3a38..07d13b7f126993 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2079,6 +2079,10 @@ break; } + case _GUARD_CODE: { + break; + } + case _FATAL_ERROR: { break; } From 4948eb122c08f1ba5cfe3c5c1b777c1e45900539 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 21 Jun 2024 12:55:05 +0100 Subject: [PATCH 3/9] Don't project through _PUSH_FRAME in BINARY_SUBSCR_GETITEM --- Python/optimizer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 30393ae465f873..38c85b743d927d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -836,6 +836,12 @@ translate_bytecode_to_trace( if (uop == _PUSH_FRAME) { assert(i + 1 == nuops); + if (opcode == FOR_ITER_GEN || BINARY_SUBSCR_GETITEM) { + DPRINTF(2, "Bailing due to dynamic target\n"); + ADD_TO_TRACE(uop, oparg, 0, target); + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + goto done; + } int func_version_offset = offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT) // Add one to account for the actual opcode/oparg pair: @@ -867,12 +873,6 @@ translate_bytecode_to_trace( ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } - if (opcode == FOR_ITER_GEN) { - DPRINTF(2, "Bailing due to dynamic target\n"); - ADD_TO_TRACE(uop, oparg, 0, target); - ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); - goto done; - } // Increment IP to the return address instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); From 75bdac33bd86a69a570c990424f11d07f74624b8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 21 Jun 2024 17:37:59 +0100 Subject: [PATCH 4/9] Fix typo --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 38c85b743d927d..2ed0eef737a431 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -836,7 +836,7 @@ translate_bytecode_to_trace( if (uop == _PUSH_FRAME) { assert(i + 1 == nuops); - if (opcode == FOR_ITER_GEN || BINARY_SUBSCR_GETITEM) { + if (opcode == FOR_ITER_GEN || opcode == BINARY_SUBSCR_GETITEM) { DPRINTF(2, "Bailing due to dynamic target\n"); ADD_TO_TRACE(uop, oparg, 0, target); ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); From cde0f02f6a13b30a7c0c1e2e9bd528ad37c6d123 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 25 Jun 2024 08:19:37 +0100 Subject: [PATCH 5/9] Refactor BINARY_SUBSCR_GETITEM to use less stack space --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 8 ++++---- Python/bytecodes.c | 11 +++++++---- Python/executor_cases.c.h | 20 +++++++++----------- Python/generated_cases.c.h | 8 +++++--- Python/optimizer_cases.c.h | 10 +++------- 7 files changed, 30 insertions(+), 31 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d83ca1d906c6fa..aadc85e3d4699b 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1230,7 +1230,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_SLICE] = { .nuops = 1, .uops = { { _BINARY_SLICE, 0, 0 } } }, [BINARY_SUBSCR] = { .nuops = 1, .uops = { { _BINARY_SUBSCR, 0, 0 } } }, [BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_DICT, 0, 0 } } }, - [BINARY_SUBSCR_GETITEM] = { .nuops = 4, .uops = { { _CHECK_PEP_523, 0, 0 }, { _BINARY_SUBSCR_GET_FUNC, 0, 0 }, { _BINARY_SUBSCR_INIT_CALL, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, + [BINARY_SUBSCR_GETITEM] = { .nuops = 4, .uops = { { _CHECK_PEP_523, 0, 0 }, { _BINARY_SUBSCR_CHECK_FUNC, 0, 0 }, { _BINARY_SUBSCR_INIT_CALL, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, [BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_LIST_INT, 0, 0 } } }, [BINARY_SUBSCR_STR_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_STR_INT, 0, 0 } } }, [BINARY_SUBSCR_TUPLE_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_TUPLE_INT, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 1b674ba4358d48..7b6f94d4ca1afd 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -21,8 +21,8 @@ extern "C" { #define _BINARY_OP_SUBTRACT_INT 309 #define _BINARY_SLICE BINARY_SLICE #define _BINARY_SUBSCR 310 +#define _BINARY_SUBSCR_CHECK_FUNC 311 #define _BINARY_SUBSCR_DICT BINARY_SUBSCR_DICT -#define _BINARY_SUBSCR_GET_FUNC 311 #define _BINARY_SUBSCR_INIT_CALL 312 #define _BINARY_SUBSCR_LIST_INT BINARY_SUBSCR_LIST_INT #define _BINARY_SUBSCR_STR_INT BINARY_SUBSCR_STR_INT diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 635afa48728437..cdc5de5d1b9f64 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -79,7 +79,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SUBSCR_GET_FUNC] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SUBSCR_INIT_CALL] = 0, [_LIST_APPEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -283,8 +283,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_BINARY_OP_SUBTRACT_INT] = "_BINARY_OP_SUBTRACT_INT", [_BINARY_SLICE] = "_BINARY_SLICE", [_BINARY_SUBSCR] = "_BINARY_SUBSCR", + [_BINARY_SUBSCR_CHECK_FUNC] = "_BINARY_SUBSCR_CHECK_FUNC", [_BINARY_SUBSCR_DICT] = "_BINARY_SUBSCR_DICT", - [_BINARY_SUBSCR_GET_FUNC] = "_BINARY_SUBSCR_GET_FUNC", [_BINARY_SUBSCR_INIT_CALL] = "_BINARY_SUBSCR_INIT_CALL", [_BINARY_SUBSCR_LIST_INT] = "_BINARY_SUBSCR_LIST_INT", [_BINARY_SUBSCR_STR_INT] = "_BINARY_SUBSCR_STR_INT", @@ -642,10 +642,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _BINARY_SUBSCR_DICT: return 2; - case _BINARY_SUBSCR_GET_FUNC: + case _BINARY_SUBSCR_CHECK_FUNC: return 2; case _BINARY_SUBSCR_INIT_CALL: - return 3; + return 2; case _LIST_APPEND: return 2 + (oparg-1); case _SET_ADD: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 75793127d96cb5..152217800525ab 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -681,11 +681,11 @@ dummy_func( ERROR_IF(rc <= 0, error); // not found or error } - op(_BINARY_SUBSCR_GET_FUNC, (container, unused -- container, unused, getitem)) { + op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) { PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - getitem = ht->_spec_cache.getitem; + PyObject *getitem = ht->_spec_cache.getitem; DEOPT_IF(getitem == NULL); assert(PyFunction_Check(getitem)); uint32_t cached_version = ht->_spec_cache.getitem_version; @@ -697,7 +697,10 @@ dummy_func( Py_INCREF(getitem); } - op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem --new_frame: _PyInterpreterFrame* )) { + op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) { + PyTypeObject *tp = Py_TYPE(container); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; @@ -707,7 +710,7 @@ dummy_func( macro(BINARY_SUBSCR_GETITEM) = unused/1 + // Skip over the counter _CHECK_PEP_523 + - _BINARY_SUBSCR_GET_FUNC + + _BINARY_SUBSCR_CHECK_FUNC + _BINARY_SUBSCR_INIT_CALL + _PUSH_FRAME; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1ea0745a2e9490..66c62551bf935e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -828,9 +828,8 @@ break; } - case _BINARY_SUBSCR_GET_FUNC: { + case _BINARY_SUBSCR_CHECK_FUNC: { PyObject *container; - PyObject *getitem; container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(container); if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) { @@ -838,7 +837,7 @@ JUMP_TO_JUMP_TARGET(); } PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - getitem = ht->_spec_cache.getitem; + PyObject *getitem = ht->_spec_cache.getitem; if (getitem == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -857,25 +856,24 @@ } STAT_INC(BINARY_SUBSCR, hit); Py_INCREF(getitem); - stack_pointer[0] = getitem; - stack_pointer += 1; break; } case _BINARY_SUBSCR_INIT_CALL: { - PyObject *getitem; PyObject *sub; PyObject *container; _PyInterpreterFrame *new_frame; - getitem = stack_pointer[-1]; - sub = stack_pointer[-2]; - container = stack_pointer[-3]; + sub = stack_pointer[-1]; + container = stack_pointer[-2]; + PyTypeObject *tp = Py_TYPE(container); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); - stack_pointer[-3] = (PyObject *)new_frame; - stack_pointer += -2; + stack_pointer[-2] = (PyObject *)new_frame; + stack_pointer += -1; break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 88086bf5128290..ff29d3fae4e211 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -396,7 +396,6 @@ INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM); static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size"); PyObject *container; - PyObject *getitem; PyObject *sub; _PyInterpreterFrame *new_frame; /* Skip 1 cache entry */ @@ -404,13 +403,13 @@ { DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); } - // _BINARY_SUBSCR_GET_FUNC + // _BINARY_SUBSCR_CHECK_FUNC container = stack_pointer[-2]; { PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - getitem = ht->_spec_cache.getitem; + PyObject *getitem = ht->_spec_cache.getitem; DEOPT_IF(getitem == NULL, BINARY_SUBSCR); assert(PyFunction_Check(getitem)); uint32_t cached_version = ht->_spec_cache.getitem_version; @@ -424,6 +423,9 @@ // _BINARY_SUBSCR_INIT_CALL sub = stack_pointer[-1]; { + PyTypeObject *tp = Py_TYPE(container); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 07d13b7f126993..009bcf369f242e 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -511,19 +511,15 @@ break; } - case _BINARY_SUBSCR_GET_FUNC: { - _Py_UopsSymbol *getitem; - getitem = sym_new_not_null(ctx); - stack_pointer[0] = getitem; - stack_pointer += 1; + case _BINARY_SUBSCR_CHECK_FUNC: { break; } case _BINARY_SUBSCR_INIT_CALL: { _PyInterpreterFrame *new_frame; new_frame = sym_new_not_null(ctx); - stack_pointer[-3] = (_Py_UopsSymbol *)new_frame; - stack_pointer += -2; + stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -1; break; } From 138cd41c31ff23e73ec52681635e29a6edcf7d23 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 25 Jun 2024 12:22:46 +0100 Subject: [PATCH 6/9] Be more consistent about what terminates a trace in the optimizer --- Include/internal/pycore_optimizer.h | 11 +++++++++++ Python/optimizer.c | 12 ++++++------ Python/optimizer_analysis.c | 12 +++--------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index fd7833fd231299..ebfcb0cbe3f28f 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -150,6 +150,17 @@ PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr); +static inline int is_terminator(const _PyUOpInstruction *uop) +{ + int opcode = uop->opcode; + return ( + opcode == _EXIT_TRACE || + opcode == _JUMP_TO_TOP || + opcode == _COLD_EXIT || + opcode == _DYNAMIC_EXIT + ); +} + #ifdef __cplusplus } #endif diff --git a/Python/optimizer.c b/Python/optimizer.c index 2ed0eef737a431..e78ccfbfb4b291 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -948,7 +948,9 @@ translate_bytecode_to_trace( progress_needed ? "no progress" : "too short"); return 0; } - if (trace[trace_length-1].opcode != _JUMP_TO_TOP) { + if (!is_terminator(&trace[trace_length-1])) { + /* Allow space for _EXIT_TRACE */ + max_length += 2; ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); } DPRINTF(1, @@ -1131,7 +1133,7 @@ sanity_check(_PyExecutorObject *executor) CHECK(inst->format == UOP_FORMAT_JUMP); CHECK(inst->error_target < executor->code_size); } - if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _COLD_EXIT) { + if (is_terminator(inst)) { ended = true; i++; break; @@ -1269,9 +1271,7 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length) if (opcode == _NOP) { nop_count++; } - if (opcode == _EXIT_TRACE || - opcode == _JUMP_TO_TOP || - opcode == _COLD_EXIT) { + if (is_terminator(&buffer[i]) { return i+1-nop_count; } } @@ -1320,7 +1320,7 @@ uop_optimize( else if (oparg < _PyUop_Replication[opcode]) { buffer[pc].opcode = opcode + oparg + 1; } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + else if (is_terminator(&buffer[pc])) { break; } assert(_PyOpcode_uop_name[buffer[pc].opcode]); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0e45bd8e31a54d..cc8827bd34dc4a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -53,14 +53,6 @@ #define DPRINTF(level, ...) #endif - - -static inline bool -op_is_end(uint32_t opcode) -{ - return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP; -} - static int get_mutations(PyObject* dict) { assert(PyDict_CheckExact(dict)); @@ -289,7 +281,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, prechecked_function_version = (uint32_t)buffer[pc].operand; break; default: - if (op_is_end(opcode)) { + if (is_terminator(inst)) { return 1; } break; @@ -548,6 +540,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } case _JUMP_TO_TOP: case _EXIT_TRACE: + case _COLD_EXIT: + case _DYNAMIC_EXIT: return pc + 1; default: { From 8ea721e8b7ce40c9e5ba46abb273d6fadc6d8c63 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 25 Jun 2024 13:50:06 +0100 Subject: [PATCH 7/9] fix whitespace --- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 152217800525ab..6bb1272988750a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -690,7 +690,7 @@ dummy_func( assert(PyFunction_Check(getitem)); uint32_t cached_version = ht->_spec_cache.getitem_version; DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); STAT_INC(BINARY_SUBSCR, hit); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 66c62551bf935e..55039f02c2c8aa 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -848,7 +848,7 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ff29d3fae4e211..d05cfab1685f01 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -414,7 +414,7 @@ assert(PyFunction_Check(getitem)); uint32_t cached_version = ht->_spec_cache.getitem_version; DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); From da2671ef90c9150de745fe7a9ab60f88f222b7b5 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 1 Aug 2024 10:42:06 -0700 Subject: [PATCH 8/9] Revert "Add guard for code object when projecting traces through calls" This reverts commit 2af2a919174e5cee5b9ae8341460443ee72945c8. --- Include/internal/pycore_uop_ids.h | 199 ++++++++++++------------- Include/internal/pycore_uop_metadata.h | 4 - Python/bytecodes.c | 5 +- Python/executor_cases.c.h | 15 +- Python/generated_cases.c.h | 4 +- Python/optimizer.c | 4 - Python/optimizer_cases.c.h | 4 - 7 files changed, 106 insertions(+), 129 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 3ac78f1f4db576..27d7f96863fa8c 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -114,32 +114,31 @@ extern "C" { #define _GUARD_BOTH_INT 357 #define _GUARD_BOTH_UNICODE 358 #define _GUARD_BUILTINS_VERSION 359 -#define _GUARD_CODE 360 -#define _GUARD_DORV_NO_DICT 361 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 362 -#define _GUARD_GLOBALS_VERSION 363 -#define _GUARD_IS_FALSE_POP 364 -#define _GUARD_IS_NONE_POP 365 -#define _GUARD_IS_NOT_NONE_POP 366 -#define _GUARD_IS_TRUE_POP 367 -#define _GUARD_KEYS_VERSION 368 -#define _GUARD_NOS_FLOAT 369 -#define _GUARD_NOS_INT 370 -#define _GUARD_NOT_EXHAUSTED_LIST 371 -#define _GUARD_NOT_EXHAUSTED_RANGE 372 -#define _GUARD_NOT_EXHAUSTED_TUPLE 373 -#define _GUARD_TOS_FLOAT 374 -#define _GUARD_TOS_INT 375 -#define _GUARD_TYPE_VERSION 376 +#define _GUARD_DORV_NO_DICT 360 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 361 +#define _GUARD_GLOBALS_VERSION 362 +#define _GUARD_IS_FALSE_POP 363 +#define _GUARD_IS_NONE_POP 364 +#define _GUARD_IS_NOT_NONE_POP 365 +#define _GUARD_IS_TRUE_POP 366 +#define _GUARD_KEYS_VERSION 367 +#define _GUARD_NOS_FLOAT 368 +#define _GUARD_NOS_INT 369 +#define _GUARD_NOT_EXHAUSTED_LIST 370 +#define _GUARD_NOT_EXHAUSTED_RANGE 371 +#define _GUARD_NOT_EXHAUSTED_TUPLE 372 +#define _GUARD_TOS_FLOAT 373 +#define _GUARD_TOS_INT 374 +#define _GUARD_TYPE_VERSION 375 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 377 -#define _INIT_CALL_PY_EXACT_ARGS 378 -#define _INIT_CALL_PY_EXACT_ARGS_0 379 -#define _INIT_CALL_PY_EXACT_ARGS_1 380 -#define _INIT_CALL_PY_EXACT_ARGS_2 381 -#define _INIT_CALL_PY_EXACT_ARGS_3 382 -#define _INIT_CALL_PY_EXACT_ARGS_4 383 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 376 +#define _INIT_CALL_PY_EXACT_ARGS 377 +#define _INIT_CALL_PY_EXACT_ARGS_0 378 +#define _INIT_CALL_PY_EXACT_ARGS_1 379 +#define _INIT_CALL_PY_EXACT_ARGS_2 380 +#define _INIT_CALL_PY_EXACT_ARGS_3 381 +#define _INIT_CALL_PY_EXACT_ARGS_4 382 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER @@ -153,65 +152,65 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE #define _INSTRUMENTED_RESUME INSTRUMENTED_RESUME -#define _INTERNAL_INCREMENT_OPT_COUNTER 384 -#define _IS_NONE 385 +#define _INTERNAL_INCREMENT_OPT_COUNTER 383 +#define _IS_NONE 384 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 386 -#define _ITER_CHECK_RANGE 387 -#define _ITER_CHECK_TUPLE 388 -#define _ITER_JUMP_LIST 389 -#define _ITER_JUMP_RANGE 390 -#define _ITER_JUMP_TUPLE 391 -#define _ITER_NEXT_LIST 392 -#define _ITER_NEXT_RANGE 393 -#define _ITER_NEXT_TUPLE 394 -#define _JUMP_TO_TOP 395 +#define _ITER_CHECK_LIST 385 +#define _ITER_CHECK_RANGE 386 +#define _ITER_CHECK_TUPLE 387 +#define _ITER_JUMP_LIST 388 +#define _ITER_JUMP_RANGE 389 +#define _ITER_JUMP_TUPLE 390 +#define _ITER_NEXT_LIST 391 +#define _ITER_NEXT_RANGE 392 +#define _ITER_NEXT_TUPLE 393 +#define _JUMP_TO_TOP 394 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 396 -#define _LOAD_ATTR_CLASS 397 -#define _LOAD_ATTR_CLASS_0 398 -#define _LOAD_ATTR_CLASS_1 399 +#define _LOAD_ATTR 395 +#define _LOAD_ATTR_CLASS 396 +#define _LOAD_ATTR_CLASS_0 397 +#define _LOAD_ATTR_CLASS_1 398 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 400 -#define _LOAD_ATTR_INSTANCE_VALUE_0 401 -#define _LOAD_ATTR_INSTANCE_VALUE_1 402 -#define _LOAD_ATTR_METHOD_LAZY_DICT 403 -#define _LOAD_ATTR_METHOD_NO_DICT 404 -#define _LOAD_ATTR_METHOD_WITH_VALUES 405 -#define _LOAD_ATTR_MODULE 406 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 407 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 408 -#define _LOAD_ATTR_PROPERTY_FRAME 409 -#define _LOAD_ATTR_SLOT 410 -#define _LOAD_ATTR_SLOT_0 411 -#define _LOAD_ATTR_SLOT_1 412 -#define _LOAD_ATTR_WITH_HINT 413 +#define _LOAD_ATTR_INSTANCE_VALUE 399 +#define _LOAD_ATTR_INSTANCE_VALUE_0 400 +#define _LOAD_ATTR_INSTANCE_VALUE_1 401 +#define _LOAD_ATTR_METHOD_LAZY_DICT 402 +#define _LOAD_ATTR_METHOD_NO_DICT 403 +#define _LOAD_ATTR_METHOD_WITH_VALUES 404 +#define _LOAD_ATTR_MODULE 405 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 406 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 407 +#define _LOAD_ATTR_PROPERTY_FRAME 408 +#define _LOAD_ATTR_SLOT 409 +#define _LOAD_ATTR_SLOT_0 410 +#define _LOAD_ATTR_SLOT_1 411 +#define _LOAD_ATTR_WITH_HINT 412 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 414 -#define _LOAD_CONST_INLINE_BORROW 415 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 416 -#define _LOAD_CONST_INLINE_WITH_NULL 417 +#define _LOAD_CONST_INLINE 413 +#define _LOAD_CONST_INLINE_BORROW 414 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 415 +#define _LOAD_CONST_INLINE_WITH_NULL 416 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 418 -#define _LOAD_FAST_0 419 -#define _LOAD_FAST_1 420 -#define _LOAD_FAST_2 421 -#define _LOAD_FAST_3 422 -#define _LOAD_FAST_4 423 -#define _LOAD_FAST_5 424 -#define _LOAD_FAST_6 425 -#define _LOAD_FAST_7 426 +#define _LOAD_FAST 417 +#define _LOAD_FAST_0 418 +#define _LOAD_FAST_1 419 +#define _LOAD_FAST_2 420 +#define _LOAD_FAST_3 421 +#define _LOAD_FAST_4 422 +#define _LOAD_FAST_5 423 +#define _LOAD_FAST_6 424 +#define _LOAD_FAST_7 425 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 427 -#define _LOAD_GLOBAL_BUILTINS 428 -#define _LOAD_GLOBAL_MODULE 429 +#define _LOAD_GLOBAL 426 +#define _LOAD_GLOBAL_BUILTINS 427 +#define _LOAD_GLOBAL_MODULE 428 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -224,55 +223,55 @@ extern "C" { #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 430 -#define _MONITOR_CALL 431 +#define _MAYBE_EXPAND_METHOD 429 +#define _MONITOR_CALL 430 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 432 -#define _POP_JUMP_IF_TRUE 433 +#define _POP_JUMP_IF_FALSE 431 +#define _POP_JUMP_IF_TRUE 432 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 434 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 433 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 435 +#define _PUSH_FRAME 434 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 436 -#define _REPLACE_WITH_TRUE 437 +#define _PY_FRAME_GENERAL 435 +#define _REPLACE_WITH_TRUE 436 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 438 -#define _SEND 439 -#define _SEND_GEN_FRAME 440 +#define _SAVE_RETURN_OFFSET 437 +#define _SEND 438 +#define _SEND_GEN_FRAME 439 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 441 -#define _STORE_ATTR 442 -#define _STORE_ATTR_INSTANCE_VALUE 443 -#define _STORE_ATTR_SLOT 444 -#define _STORE_ATTR_WITH_HINT 445 +#define _START_EXECUTOR 440 +#define _STORE_ATTR 441 +#define _STORE_ATTR_INSTANCE_VALUE 442 +#define _STORE_ATTR_SLOT 443 +#define _STORE_ATTR_WITH_HINT 444 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 446 -#define _STORE_FAST_0 447 -#define _STORE_FAST_1 448 -#define _STORE_FAST_2 449 -#define _STORE_FAST_3 450 -#define _STORE_FAST_4 451 -#define _STORE_FAST_5 452 -#define _STORE_FAST_6 453 -#define _STORE_FAST_7 454 +#define _STORE_FAST 445 +#define _STORE_FAST_0 446 +#define _STORE_FAST_1 447 +#define _STORE_FAST_2 448 +#define _STORE_FAST_3 449 +#define _STORE_FAST_4 450 +#define _STORE_FAST_5 451 +#define _STORE_FAST_6 452 +#define _STORE_FAST_7 453 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 455 +#define _STORE_SUBSCR 454 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 456 -#define _TO_BOOL 457 +#define _TIER2_RESUME_CHECK 455 +#define _TO_BOOL 456 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -282,13 +281,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 458 +#define _UNPACK_SEQUENCE 457 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 458 +#define MAX_UOP_ID 457 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3995728702ce30..f5c666454dcbef 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -265,7 +265,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, - [_GUARD_CODE] = HAS_DEOPT_FLAG, [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, @@ -379,7 +378,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_BOTH_INT] = "_GUARD_BOTH_INT", [_GUARD_BOTH_UNICODE] = "_GUARD_BOTH_UNICODE", [_GUARD_BUILTINS_VERSION] = "_GUARD_BUILTINS_VERSION", - [_GUARD_CODE] = "_GUARD_CODE", [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", @@ -1028,8 +1026,6 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _START_EXECUTOR: return 0; - case _GUARD_CODE: - return 0; case _FATAL_ERROR: return 0; case _CHECK_VALIDITY_AND_SET_IP: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9084846a5ff6ad..fb7202ef036d67 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -786,6 +786,7 @@ dummy_func( PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; PyObject *getitem = ht->_spec_cache.getitem; new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + SYNC_SP(); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); @@ -4758,10 +4759,6 @@ dummy_func( assert(((_PyExecutorObject *)executor)->vm_data.valid); } - tier2 op(_GUARD_CODE, (version/2 -- )) { - DEOPT_IF(((PyCodeObject *)frame->f_executable)->co_version != version); - } - tier2 op(_FATAL_ERROR, (--)) { assert(0); Py_FatalError("Fatal error uop executed."); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1b25babd1be07b..ba28df6cbfd627 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1007,11 +1007,13 @@ PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; PyObject *getitem = ht->_spec_cache.getitem; new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); - stack_pointer[-2].bits = (uintptr_t)new_frame; - stack_pointer += -1; + stack_pointer[0].bits = (uintptr_t)new_frame; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; } @@ -5286,15 +5288,6 @@ break; } - case _GUARD_CODE: { - uint32_t version = (uint32_t)CURRENT_OPERAND(); - if (((PyCodeObject *)frame->f_executable)->co_version != version) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - break; - } - case _FATAL_ERROR: { assert(0); Py_FatalError("Fatal error uop executed."); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index bd21a58a3c65c4..1cc1ced22134ed 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -505,6 +505,8 @@ PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; PyObject *getitem = ht->_spec_cache.getitem; new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); @@ -514,8 +516,6 @@ // Write it out explicitly because it's subtly different. // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->previous = frame; CALL_STAT_INC(inlined_py_calls); diff --git a/Python/optimizer.c b/Python/optimizer.c index 7915c7f0bb5748..b917d86b5c56dd 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -855,13 +855,9 @@ translate_bytecode_to_trace( operand = 0; } ADD_TO_TRACE(uop, oparg, operand, target); - /* We need to guard that the runtime code object is - * the same one we are projecting into */ code = new_code; func = new_func; instr = _PyCode_CODE(code); - assert(code->co_version != 0); - ADD_TO_TRACE(_GUARD_CODE, 0, code->co_version, 0); DPRINTF(2, "Continuing in %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 7f23097c637c89..d1354716345bac 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2260,10 +2260,6 @@ break; } - case _GUARD_CODE: { - break; - } - case _FATAL_ERROR: { break; } From d4df441468362b4514f79cf7bc4d954044cdc14b Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 1 Aug 2024 10:42:52 -0700 Subject: [PATCH 9/9] typo --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index b917d86b5c56dd..40865e0f677b8b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1210,7 +1210,7 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length) if (opcode == _NOP) { nop_count++; } - if (is_terminator(&buffer[i]) { + if (is_terminator(&buffer[i])) { return i+1-nop_count; } }