Skip to content
2 changes: 1 addition & 1 deletion Include/cpython/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ void _Py_BloomFilter_Init(_PyBloomFilter *);
void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj);
PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation);
extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);
PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);

/* For testing */
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
Expand Down
4 changes: 1 addition & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2609,9 +2609,7 @@ dummy_func(
_PyErr_Clear(tstate);
}
/* iterator ended normally */
Py_DECREF(iter);
STACK_SHRINK(1);
/* The translator sets the deopt target just past END_FOR */
/* The translator sets the deopt target just past the matching END_FOR */
DEOPT_IF(true);
}
// Common case: no jump, leave it to the code generator
Expand Down
4 changes: 1 addition & 3 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 38 additions & 11 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@

#define MAX_EXECUTORS_SIZE 256

#ifdef Py_DEBUG
static int base_opcode(PyCodeObject *code, int offset)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
static int base_opcode(PyCodeObject *code, int offset)
static int
base_opcode(PyCodeObject *code, int offset)

{
int opcode = _Py_GetBaseOpcode(code, offset);
if (opcode == ENTER_EXECUTOR) {
int oparg = _PyCode_CODE(code)[offset].op.arg;
_PyExecutorObject *ex = code->co_executors->executors[oparg];
return ex->vm_data.opcode;
}
return opcode;
}
#endif

static bool
has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
Expand Down Expand Up @@ -422,6 +434,14 @@ _PyUOp_Replacements[MAX_UOP_ID + 1] = {
[_FOR_ITER] = _FOR_ITER_TIER_TWO,
};

static const uint8_t
is_for_iter_test[MAX_UOP_ID + 1] = {
[_GUARD_NOT_EXHAUSTED_RANGE] = 1,
[_GUARD_NOT_EXHAUSTED_LIST] = 1,
[_GUARD_NOT_EXHAUSTED_TUPLE] = 1,
[_FOR_ITER_TIER_TWO] = 1,
};

static const uint16_t
BRANCH_TO_GUARD[4][2] = {
[POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP,
Expand Down Expand Up @@ -571,7 +591,6 @@ translate_bytecode_to_trace(

uint32_t opcode = instr->op.code;
uint32_t oparg = instr->op.arg;
uint32_t extended = 0;

DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);

Expand All @@ -585,7 +604,6 @@ translate_bytecode_to_trace(

if (opcode == EXTENDED_ARG) {
instr++;
extended = 1;
opcode = instr->op.code;
oparg = (oparg << 8) | instr->op.arg;
if (opcode == EXTENDED_ARG) {
Expand Down Expand Up @@ -746,12 +764,13 @@ translate_bytecode_to_trace(
case OPARG_REPLACED:
uop = _PyUOp_Replacements[uop];
assert(uop != 0);
if (uop == _FOR_ITER_TIER_TWO) {
target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 2 + extended;
assert(_PyCode_CODE(code)[target-2].op.code == END_FOR ||
_PyCode_CODE(code)[target-2].op.code == INSTRUMENTED_END_FOR);
assert(_PyCode_CODE(code)[target-1].op.code == POP_TOP);
}
#ifdef Py_DEBUG
uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255);
uint32_t jump_target = next_inst + oparg;
assert(base_opcode(code, jump_target) == END_FOR ||
base_opcode(code, jump_target) == INSTRUMENTED_END_FOR);
assert(base_opcode(code, jump_target+1) == POP_TOP);
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably best to include the case also in { ... } to limit the scope of the two variables declared in debug mode.

break;
default:
fprintf(stderr,
Expand Down Expand Up @@ -971,7 +990,15 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length)
int opcode = inst->opcode;
int32_t target = (int32_t)uop_get_target(inst);
if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) {
if (target != current_jump_target) {
int32_t jump_target = target;
if (is_for_iter_test[opcode]) {
/* Target the POP_TOP immediately after the END_FOR,
* leaving only the iterator on the stack. */
int extended_arg = inst->oparg > 255;
int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg;
jump_target = next_inst + inst->oparg + 1;
}
if (jump_target != current_jump_target) {
uint16_t exit_op;
if (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) {
if (opcode == _TIER2_RESUME_CHECK) {
Expand All @@ -984,8 +1011,8 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length)
else {
exit_op = _DEOPT;
}
make_exit(&buffer[next_spare], exit_op, target);
current_jump_target = target;
make_exit(&buffer[next_spare], exit_op, jump_target);
current_jump_target = jump_target;
current_jump = next_spare;
next_spare++;
}
Expand Down
7 changes: 5 additions & 2 deletions Python/optimizer_symbols.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,15 @@ _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val)
return true;
}


bool
_Py_uop_sym_set_null(_Py_UopsSymbol *sym)
{
if (_Py_uop_sym_is_not_null(sym)) {
sym_set_bottom(sym);
return false;
}
sym_set_flag(sym, IS_NULL);
return !_Py_uop_sym_is_bottom(sym);
return true;
Comment on lines +170 to +175
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this refactoring matter? If so, why not do the same for set_non_null below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not a refactoring.
Calling _Py_uop_sym_set_null on a non-NULL symbol would fail an assertion in _Py_uop_sym_is_bottom

And yes, it should be applied to set_non_null as well.

}

bool
Expand Down
6 changes: 4 additions & 2 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ print_gc_stats(FILE *out, GCStats *stats)
}
}

#ifdef _Py_TIER2
static void
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
{
Expand Down Expand Up @@ -249,7 +250,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
stats->optimizer_failure_reason_no_memory);
fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);

for (int i = 0; i <= MAX_UOP_ID; i++) {
if (stats->opcode[i].execution_count) {
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
Expand All @@ -258,7 +258,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
}
}

for (int i = 0; i < 256; i++) {
if (stats->unsupported_opcode[i]) {
fprintf(
Expand Down Expand Up @@ -289,6 +288,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
}
}
}
#endif

static void
print_rare_event_stats(FILE *out, RareEventStats *stats)
Expand All @@ -309,7 +309,9 @@ print_stats(FILE *out, PyStats *stats)
print_call_stats(out, &stats->call_stats);
print_object_stats(out, &stats->object_stats);
print_gc_stats(out, stats->gc_stats);
#ifdef _Py_TIER2
print_optimization_stats(out, &stats->optimization_stats);
#endif
print_rare_event_stats(out, &stats->rare_event_stats);
}

Expand Down