Skip to content

Commit 8deb8bc

Browse files
authored
gh-112287: Speed up Tier 2 (uop) interpreter a little (#112286)
This makes the Tier 2 interpreter a little faster. I calculated by about 3%, though I hesitate to claim an exact number. This starts by doubling the trace size limit (to 512), making it more likely that loops fit in a trace. The rest of the approach is to only load `oparg` and `operand` in cases that use them. The code generator know when these are used. For `oparg`, it will conditionally emit ``` oparg = CURRENT_OPARG(); ``` at the top of the case block. (The `oparg` variable may be referenced multiple times by the instructions code block, so it must be in a variable.) For `operand`, it will use `CURRENT_OPERAND()` directly instead of referencing the `operand` variable, which no longer exists. (There is only one place where this will be used.)
1 parent c4c6321 commit 8deb8bc

File tree

7 files changed

+131
-33
lines changed

7 files changed

+131
-33
lines changed

Include/internal/pycore_uops.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ extern "C" {
1010

1111
#include "pycore_frame.h" // _PyInterpreterFrame
1212

13-
#define _Py_UOP_MAX_TRACE_LENGTH 256
13+
#define _Py_UOP_MAX_TRACE_LENGTH 512
1414

1515
typedef struct {
1616
uint16_t opcode;
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Slightly optimize the Tier 2 (uop) interpreter by only loading ``oparg`` and
2+
``operand`` when needed. Also double the trace size limit again, to 512 this
3+
time.

Python/ceval.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -994,21 +994,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
994994

995995
OPT_STAT_INC(traces_executed);
996996
_PyUOpInstruction *next_uop = current_executor->trace;
997-
uint64_t operand;
998997
#ifdef Py_STATS
999998
uint64_t trace_uop_execution_counter = 0;
1000999
#endif
10011000

10021001
for (;;) {
10031002
opcode = next_uop->opcode;
1004-
oparg = next_uop->oparg;
1005-
operand = next_uop->operand;
10061003
DPRINTF(3,
10071004
"%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n",
10081005
(int)(next_uop - current_executor->trace),
10091006
_PyUopName(opcode),
1010-
oparg,
1011-
operand,
1007+
next_uop->oparg,
1008+
next_uop->operand,
10121009
next_uop->target,
10131010
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
10141011
next_uop++;
@@ -1025,8 +1022,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
10251022
default:
10261023
#ifdef Py_DEBUG
10271024
{
1028-
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 "\n",
1029-
opcode, oparg, operand);
1025+
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 " @ %d\n",
1026+
opcode, next_uop[-1].oparg, next_uop[-1].operand,
1027+
(int)(next_uop - current_executor->trace - 1));
10301028
Py_FatalError("Unknown uop");
10311029
}
10321030
#else
@@ -1055,7 +1053,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
10551053
STACK_SHRINK(1);
10561054
error_tier_two:
10571055
DPRINTF(2, "Error: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n",
1058-
opcode, _PyUopName(opcode), oparg, operand, next_uop[-1].target,
1056+
opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
10591057
(int)(next_uop - current_executor->trace - 1));
10601058
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
10611059
frame->return_offset = 0; // Don't leave this random
@@ -1068,7 +1066,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
10681066
// On DEOPT_IF we just repeat the last instruction.
10691067
// This presumes nothing was popped from the stack (nor pushed).
10701068
DPRINTF(2, "DEOPT: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n",
1071-
opcode, _PyUopName(opcode), oparg, operand, next_uop[-1].target,
1069+
opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
10721070
(int)(next_uop - current_executor->trace - 1));
10731071
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
10741072
UOP_STAT_INC(opcode, miss);

Python/ceval_macros.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,3 +397,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame);
397397
#define GOTO_TIER_TWO() goto enter_tier_two;
398398

399399
#define GOTO_TIER_ONE() goto exit_trace;
400+
401+
#define CURRENT_OPARG() (next_uop[-1].oparg)
402+
403+
#define CURRENT_OPERAND() (next_uop[-1].operand)

0 commit comments

Comments
 (0)