Skip to content

gh-98831: Modernize the FOR_ITER family of instructions #101626

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 47 additions & 36 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2066,27 +2066,35 @@ dummy_func(
PREDICT(LOAD_CONST);
}

// stack effect: ( -- __0)
inst(FOR_ITER) {
// Most members of this family are "secretly" super-instructions.
// When the loop is exhausted, they jump, and the jump target is
// always END_FOR, which pops two values off the stack.
// This is optimized by skipping that instruction and combining
// its effect (popping 'iter' instead of pushing 'next'.)

family(for_iter, INLINE_CACHE_ENTRIES_FOR_ITER) = {
FOR_ITER,
FOR_ITER_LIST,
FOR_ITER_TUPLE,
FOR_ITER_RANGE,
FOR_ITER_GEN,
};

inst(FOR_ITER, (unused/1, iter -- iter, next)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be counter/1 instead of unused/1, and then the counter used in the body?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In all cases where the counter is manipulated using ADAPTIVE_COUNTER_IS_ZERO / DECREMENT_ADAPTIVE_COUNTER I'm leaving this idiom unchanged -- the generator won't let you write back to cache entries, so it would become asymmetrical, and the common case is DECREMENT_ADAPTIVE_COUNTER anyway.

#if ENABLE_SPECIALIZATION
_PyForIterCache *cache = (_PyForIterCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
assert(cframe.use_tracing == 0);
next_instr--;
_Py_Specialize_ForIter(TOP(), next_instr, oparg);
_Py_Specialize_ForIter(iter, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(FOR_ITER, deferred);
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
#endif /* ENABLE_SPECIALIZATION */
/* before: [iter]; after: [iter, iter()] *or* [] */
PyObject *iter = TOP();
PyObject *next = (*Py_TYPE(iter)->tp_iternext)(iter);
if (next != NULL) {
PUSH(next);
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER);
}
else {
/* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */
next = (*Py_TYPE(iter)->tp_iternext)(iter);
if (next == NULL) {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
goto error;
Expand All @@ -2098,70 +2106,74 @@ dummy_func(
}
/* iterator ended normally */
assert(_Py_OPCODE(next_instr[INLINE_CACHE_ENTRIES_FOR_ITER + oparg]) == END_FOR);
STACK_SHRINK(1);
Py_DECREF(iter);
/* Skip END_FOR */
STACK_SHRINK(1);
/* Jump forward oparg, then skip following END_FOR instruction */
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1);
DISPATCH();
}
// Common case: no jump, leave it to the code generator
}

// stack effect: ( -- __0)
inst(FOR_ITER_LIST) {
inst(FOR_ITER_LIST, (unused/1, iter -- iter, next)) {
assert(cframe.use_tracing == 0);
_PyListIterObject *it = (_PyListIterObject *)TOP();
DEOPT_IF(Py_TYPE(it) != &PyListIter_Type, FOR_ITER);
DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER);
_PyListIterObject *it = (_PyListIterObject *)iter;
STAT_INC(FOR_ITER, hit);
PyListObject *seq = it->it_seq;
if (seq) {
if (it->it_index < PyList_GET_SIZE(seq)) {
PyObject *next = PyList_GET_ITEM(seq, it->it_index++);
PUSH(Py_NewRef(next));
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER);
next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++));
goto end_for_iter_list; // End of this instruction
}
it->it_seq = NULL;
Py_DECREF(seq);
}
Py_DECREF(iter);
STACK_SHRINK(1);
Py_DECREF(it);
/* Jump forward oparg, then skip following END_FOR instruction */
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1);
DISPATCH();
end_for_iter_list:
// Common case: no jump, leave it to the code generator
}

// stack effect: ( -- __0)
inst(FOR_ITER_TUPLE) {
inst(FOR_ITER_TUPLE, (unused/1, iter -- iter, next)) {
assert(cframe.use_tracing == 0);
_PyTupleIterObject *it = (_PyTupleIterObject *)TOP();
_PyTupleIterObject *it = (_PyTupleIterObject *)iter;
DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER);
STAT_INC(FOR_ITER, hit);
PyTupleObject *seq = it->it_seq;
if (seq) {
if (it->it_index < PyTuple_GET_SIZE(seq)) {
PyObject *next = PyTuple_GET_ITEM(seq, it->it_index++);
PUSH(Py_NewRef(next));
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER);
next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++));
goto end_for_iter_tuple; // End of this instruction
}
it->it_seq = NULL;
Py_DECREF(seq);
}
Py_DECREF(iter);
STACK_SHRINK(1);
Py_DECREF(it);
/* Jump forward oparg, then skip following END_FOR instruction */
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1);
DISPATCH();
end_for_iter_tuple:
// Common case: no jump, leave it to the code generator
}

// stack effect: ( -- __0)
inst(FOR_ITER_RANGE) {
// This is slightly different, when the loop isn't terminated we
// jump over the immediately following STORE_FAST instruction.
inst(FOR_ITER_RANGE, (unused/1, iter -- iter, unused)) {
assert(cframe.use_tracing == 0);
_PyRangeIterObject *r = (_PyRangeIterObject *)TOP();
_PyRangeIterObject *r = (_PyRangeIterObject *)iter;
DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER);
STAT_INC(FOR_ITER, hit);
_Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER];
assert(_PyOpcode_Deopt[_Py_OPCODE(next)] == STORE_FAST);
if (r->len <= 0) {
STACK_SHRINK(1);
Py_DECREF(r);
// Jump over END_FOR instruction.
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1);
}
else {
Expand All @@ -2174,11 +2186,13 @@ dummy_func(
// The STORE_FAST is already done.
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + 1);
}
DISPATCH();
}

inst(FOR_ITER_GEN) {
// This is *not* a super-instruction, unique in the family.
inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) {
assert(cframe.use_tracing == 0);
PyGenObject *gen = (PyGenObject *)TOP();
PyGenObject *gen = (PyGenObject *)iter;
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER);
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
STAT_INC(FOR_ITER, hit);
Expand Down Expand Up @@ -3173,9 +3187,6 @@ family(call, INLINE_CACHE_ENTRIES_CALL) = {
CALL_NO_KW_LIST_APPEND, CALL_NO_KW_METHOD_DESCRIPTOR_FAST, CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS,
CALL_NO_KW_METHOD_DESCRIPTOR_O, CALL_NO_KW_STR_1, CALL_NO_KW_TUPLE_1,
CALL_NO_KW_TYPE_1 };
family(for_iter, INLINE_CACHE_ENTRIES_FOR_ITER) = {
FOR_ITER, FOR_ITER_LIST,
FOR_ITER_RANGE };
family(store_fast) = { STORE_FAST, STORE_FAST__LOAD_FAST, STORE_FAST__STORE_FAST };
family(unpack_sequence, INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE) = {
UNPACK_SEQUENCE, UNPACK_SEQUENCE_LIST,
Expand Down
66 changes: 42 additions & 24 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 15 additions & 15 deletions Python/opcode_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,15 +261,15 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
case GET_YIELD_FROM_ITER:
return 1;
case FOR_ITER:
return -1;
return 1;
case FOR_ITER_LIST:
return -1;
return 1;
case FOR_ITER_TUPLE:
return -1;
return 1;
case FOR_ITER_RANGE:
return -1;
return 1;
case FOR_ITER_GEN:
return -1;
return 1;
case BEFORE_ASYNC_WITH:
return 1;
case BEFORE_WITH:
Expand Down Expand Up @@ -607,15 +607,15 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
case GET_YIELD_FROM_ITER:
return 1;
case FOR_ITER:
return -1;
return 2;
case FOR_ITER_LIST:
return -1;
return 2;
case FOR_ITER_TUPLE:
return -1;
return 2;
case FOR_ITER_RANGE:
return -1;
return 2;
case FOR_ITER_GEN:
return -1;
return 2;
case BEFORE_ASYNC_WITH:
return 2;
case BEFORE_WITH:
Expand Down Expand Up @@ -829,11 +829,11 @@ struct opcode_metadata {
[MATCH_KEYS] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[GET_ITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[GET_YIELD_FROM_ITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[FOR_ITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[FOR_ITER_LIST] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[FOR_ITER_TUPLE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[FOR_ITER_RANGE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[FOR_ITER_GEN] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[FOR_ITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC },
[FOR_ITER_LIST] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC },
[FOR_ITER_TUPLE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC },
[FOR_ITER_RANGE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC },
[FOR_ITER_GEN] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC },
[BEFORE_ASYNC_WITH] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[BEFORE_WITH] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[WITH_EXCEPT_START] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
Expand Down