Skip to content

Commit 55824d0

Browse files
authored
GH-113853: Guarantee forward progress in executors (GH-113854)
1 parent 0d8fec7 commit 55824d0

File tree

5 files changed

+121
-74
lines changed

5 files changed

+121
-74
lines changed

Include/cpython/optimizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
6565
PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);
6666

6767
int
68-
_PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer);
68+
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);
6969

7070
extern _PyOptimizerObject _PyOptimizer_Default;
7171

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Guarantee that all executors make progress. This then guarantees that tier 2
2+
execution always makes progress.

Python/bytecodes.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2325,12 +2325,18 @@ dummy_func(
23252325
// Double-check that the opcode isn't instrumented or something:
23262326
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
23272327
OPT_STAT_INC(attempts);
2328-
int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
2328+
_Py_CODEUNIT *start = this_instr;
2329+
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
2330+
while (oparg > 255) {
2331+
oparg >>= 8;
2332+
start--;
2333+
}
2334+
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
23292335
ERROR_IF(optimized < 0, error);
23302336
if (optimized) {
23312337
// Rewind and enter the executor:
2332-
assert(this_instr->op.code == ENTER_EXECUTOR);
2333-
next_instr = this_instr;
2338+
assert(start->op.code == ENTER_EXECUTOR);
2339+
next_instr = start;
23342340
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
23352341
}
23362342
else {
@@ -2370,10 +2376,12 @@ dummy_func(
23702376
GOTO_TIER_TWO();
23712377
}
23722378
else {
2373-
code->co_executors->executors[oparg & 255] = NULL;
2379+
/* ENTER_EXECUTOR will be the first code unit of the instruction */
2380+
assert(oparg < 256);
2381+
code->co_executors->executors[oparg] = NULL;
23742382
opcode = this_instr->op.code = executor->vm_data.opcode;
23752383
this_instr->op.arg = executor->vm_data.oparg;
2376-
oparg = (oparg & (~255)) | executor->vm_data.oparg;
2384+
oparg = executor->vm_data.oparg;
23772385
Py_DECREF(executor);
23782386
next_instr = this_instr;
23792387
DISPATCH_GOTO();

Python/generated_cases.c.h

Lines changed: 13 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

Lines changed: 92 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
162162
}
163163

164164
int
165-
_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer)
165+
_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer)
166166
{
167-
assert(src->op.code == JUMP_BACKWARD);
168167
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
169168
assert(PyCode_Check(code));
170169
PyInterpreterState *interp = _PyInterpreterState_GET();
171-
if (!has_space_for_executor(code, src)) {
170+
if (!has_space_for_executor(code, start)) {
172171
return 0;
173172
}
174173
_PyOptimizerObject *opt = interp->optimizer;
175174
_PyExecutorObject *executor = NULL;
176-
/* Start optimizing at the destination to guarantee forward progress */
177-
int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
175+
int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
178176
if (err <= 0) {
179177
assert(executor == NULL);
180178
return err;
181179
}
182-
int index = get_index_for_executor(code, src);
180+
int index = get_index_for_executor(code, start);
183181
if (index < 0) {
184182
/* Out of memory. Don't raise and assume that the
185183
* error will show up elsewhere.
@@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
190188
Py_DECREF(executor);
191189
return 0;
192190
}
193-
insert_executor(code, src, index, executor);
191+
insert_executor(code, start, index, executor);
194192
Py_DECREF(executor);
195193
return 1;
196194
}
@@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
316314
#define CONFIDENCE_RANGE 1000
317315
#define CONFIDENCE_CUTOFF 333
318316

319-
/* Returns 1 on success,
320-
* 0 if it failed to produce a worthwhile trace,
321-
* and -1 on an error.
322-
*/
323-
static int
324-
translate_bytecode_to_trace(
325-
PyCodeObject *code,
326-
_Py_CODEUNIT *instr,
327-
_PyUOpInstruction *trace,
328-
int buffer_size,
329-
_PyBloomFilter *dependencies)
330-
{
331-
PyCodeObject *initial_code = code;
332-
_Py_BloomFilter_Add(dependencies, initial_code);
333-
_Py_CODEUNIT *initial_instr = instr;
334-
int trace_length = 0;
335-
int max_length = buffer_size;
336-
struct {
337-
PyCodeObject *code;
338-
_Py_CODEUNIT *instr;
339-
} trace_stack[TRACE_STACK_SIZE];
340-
int trace_stack_depth = 0;
341-
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
342-
343-
#ifdef Py_DEBUG
344-
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
345-
int lltrace = 0;
346-
if (python_lltrace != NULL && *python_lltrace >= '0') {
347-
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
348-
}
349-
#endif
350-
351317
#ifdef Py_DEBUG
352318
#define DPRINTF(level, ...) \
353319
if (lltrace >= (level)) { printf(__VA_ARGS__); }
@@ -403,13 +369,47 @@ translate_bytecode_to_trace(
403369
code = trace_stack[trace_stack_depth].code; \
404370
instr = trace_stack[trace_stack_depth].instr;
405371

372+
/* Returns 1 on success,
373+
* 0 if it failed to produce a worthwhile trace,
374+
* and -1 on an error.
375+
*/
376+
static int
377+
translate_bytecode_to_trace(
378+
PyCodeObject *code,
379+
_Py_CODEUNIT *instr,
380+
_PyUOpInstruction *trace,
381+
int buffer_size,
382+
_PyBloomFilter *dependencies)
383+
{
384+
bool progress_needed = true;
385+
PyCodeObject *initial_code = code;
386+
_Py_BloomFilter_Add(dependencies, initial_code);
387+
_Py_CODEUNIT *initial_instr = instr;
388+
int trace_length = 0;
389+
int max_length = buffer_size;
390+
struct {
391+
PyCodeObject *code;
392+
_Py_CODEUNIT *instr;
393+
} trace_stack[TRACE_STACK_SIZE];
394+
int trace_stack_depth = 0;
395+
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
396+
397+
#ifdef Py_DEBUG
398+
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
399+
int lltrace = 0;
400+
if (python_lltrace != NULL && *python_lltrace >= '0') {
401+
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
402+
}
403+
#endif
404+
406405
DPRINTF(4,
407406
"Optimizing %s (%s:%d) at byte offset %d\n",
408407
PyUnicode_AsUTF8(code->co_qualname),
409408
PyUnicode_AsUTF8(code->co_filename),
410409
code->co_firstlineno,
411410
2 * INSTR_IP(initial_instr, code));
412411
uint32_t target = 0;
412+
413413
top: // Jump here after _PUSH_FRAME or likely branches
414414
for (;;) {
415415
target = INSTR_IP(instr, code);
@@ -421,6 +421,15 @@ translate_bytecode_to_trace(
421421
uint32_t oparg = instr->op.arg;
422422
uint32_t extended = 0;
423423

424+
if (opcode == ENTER_EXECUTOR) {
425+
assert(oparg < 256);
426+
_PyExecutorObject *executor =
427+
(_PyExecutorObject *)code->co_executors->executors[oparg];
428+
opcode = executor->vm_data.opcode;
429+
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
430+
oparg = executor->vm_data.oparg;
431+
}
432+
424433
if (opcode == EXTENDED_ARG) {
425434
instr++;
426435
extended = 1;
@@ -431,13 +440,23 @@ translate_bytecode_to_trace(
431440
goto done;
432441
}
433442
}
434-
435-
if (opcode == ENTER_EXECUTOR) {
436-
_PyExecutorObject *executor =
437-
(_PyExecutorObject *)code->co_executors->executors[oparg&255];
438-
opcode = executor->vm_data.opcode;
439-
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
440-
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
443+
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
444+
445+
/* Special case the first instruction,
446+
* so that we can guarantee forward progress */
447+
if (progress_needed) {
448+
progress_needed = false;
449+
if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
450+
instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
451+
initial_instr = instr;
452+
continue;
453+
}
454+
else {
455+
if (OPCODE_HAS_DEOPT(opcode)) {
456+
opcode = _PyOpcode_Deopt[opcode];
457+
}
458+
assert(!OPCODE_HAS_DEOPT(opcode));
459+
}
441460
}
442461

443462
switch (opcode) {
@@ -480,7 +499,9 @@ translate_bytecode_to_trace(
480499
case JUMP_BACKWARD:
481500
case JUMP_BACKWARD_NO_INTERRUPT:
482501
{
483-
if (instr + 2 - oparg == initial_instr && code == initial_code) {
502+
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
503+
if (target == initial_instr) {
504+
/* We have looped round to the start */
484505
RESERVE(1);
485506
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
486507
}
@@ -641,35 +662,33 @@ translate_bytecode_to_trace(
641662
}
642663
assert(code == initial_code);
643664
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
644-
if (trace_length > 4) {
645-
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
646-
DPRINTF(1,
647-
"Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
648-
PyUnicode_AsUTF8(code->co_qualname),
649-
PyUnicode_AsUTF8(code->co_filename),
650-
code->co_firstlineno,
651-
2 * INSTR_IP(initial_instr, code),
652-
trace_length);
653-
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
654-
return 1;
655-
}
656-
else {
665+
if (progress_needed || trace_length < 5) {
657666
OPT_STAT_INC(trace_too_short);
658667
DPRINTF(4,
659668
"No trace for %s (%s:%d) at byte offset %d\n",
660669
PyUnicode_AsUTF8(code->co_qualname),
661670
PyUnicode_AsUTF8(code->co_filename),
662671
code->co_firstlineno,
663672
2 * INSTR_IP(initial_instr, code));
673+
return 0;
664674
}
665-
return 0;
675+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
676+
DPRINTF(1,
677+
"Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
678+
PyUnicode_AsUTF8(code->co_qualname),
679+
PyUnicode_AsUTF8(code->co_filename),
680+
code->co_firstlineno,
681+
2 * INSTR_IP(initial_instr, code),
682+
trace_length);
683+
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
684+
return 1;
685+
}
666686

667687
#undef RESERVE
668688
#undef RESERVE_RAW
669689
#undef INSTR_IP
670690
#undef ADD_TO_TRACE
671691
#undef DPRINTF
672-
}
673692

674693
#define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
675694
#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
@@ -854,10 +873,20 @@ counter_optimize(
854873
int Py_UNUSED(curr_stackentries)
855874
)
856875
{
876+
int oparg = instr->op.arg;
877+
while (instr->op.code == EXTENDED_ARG) {
878+
instr++;
879+
oparg = (oparg << 8) | instr->op.arg;
880+
}
881+
if (instr->op.code != JUMP_BACKWARD) {
882+
/* Counter optimizer can only handle backward edges */
883+
return 0;
884+
}
885+
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
857886
_PyUOpInstruction buffer[3] = {
858887
{ .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
859888
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
860-
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) }
889+
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
861890
};
862891
_PyBloomFilter empty;
863892
_Py_BloomFilter_Init(&empty);

0 commit comments

Comments
 (0)