Skip to content

bpo-45885: Specialize COMPARE_OP #29734

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Dec 3, 2021
3 changes: 3 additions & 0 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,9 @@ PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
and where the hash values are equal (i.e. a very probable match) */
PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);

/* Equality check. Returns -1 on failure. */
PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *);

PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);

Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ typedef struct {
uint16_t defaults_len;
} _PyCallCache;


/* Add specialized versions of entries to this union.
*
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
Expand Down Expand Up @@ -272,6 +273,7 @@ int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT
int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins);
void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
SpecializedCacheEntry *cache);
void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);

#define PRINT_SPECIALIZATION_STATS 0
#define PRINT_SPECIALIZATION_STATS_DETAILED 0
Expand Down
78 changes: 41 additions & 37 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ def jabs_op(name, op):
"BINARY_OP_MULTIPLY_FLOAT",
"BINARY_OP_SUBTRACT_INT",
"BINARY_OP_SUBTRACT_FLOAT",
"COMPARE_OP_ADAPTIVE",
"COMPARE_OP_FLOAT_JUMP",
"COMPARE_OP_INT_JUMP",
"COMPARE_OP_STR_JUMP",
"BINARY_SUBSCR_ADAPTIVE",
"BINARY_SUBSCR_GETITEM",
"BINARY_SUBSCR_LIST_INT",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Specialized the ``COMPARE_OP`` opcode using the PEP 659 machinery.
14 changes: 14 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -11168,6 +11168,20 @@ unicode_compare_eq(PyObject *str1, PyObject *str2)
return (cmp == 0);
}

int
_PyUnicode_Equal(PyObject *str1, PyObject *str2)
{
assert(PyUnicode_CheckExact(str1));
assert(PyUnicode_CheckExact(str2));
if (str1 == str2) {
return 1;
}
if (PyUnicode_READY(str1) || PyUnicode_READY(str2)) {
return -1;
}
return unicode_compare_eq(str1, str2);
}


int
PyUnicode_Compare(PyObject *left, PyObject *right)
Expand Down
122 changes: 122 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -3670,6 +3670,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
}

TARGET(COMPARE_OP) {
PREDICTED(COMPARE_OP);
STAT_INC(COMPARE_OP, unquickened);
assert(oparg <= Py_GE);
PyObject *right = POP();
PyObject *left = TOP();
Expand All @@ -3684,6 +3686,125 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DISPATCH();
}

TARGET(COMPARE_OP_ADAPTIVE) {
assert(cframe.use_tracing == 0);
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
PyObject *right = TOP();
PyObject *left = SECOND();
next_instr--;
_Py_Specialize_CompareOp(left, right, next_instr, cache);
DISPATCH();
}
else {
STAT_INC(COMPARE_OP, deferred);
cache->adaptive.counter--;
oparg = cache->adaptive.original_oparg;
STAT_DEC(COMPARE_OP, unquickened);
JUMP_TO_INSTRUCTION(COMPARE_OP);
}
}

TARGET(COMPARE_OP_FLOAT_JUMP) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false)
SpecializedCacheEntry *caches = GET_CACHE();
int when_to_jump_mask = caches[0].adaptive.index;
PyObject *right = TOP();
PyObject *left = SECOND();
DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP);
double dleft = PyFloat_AS_DOUBLE(left);
double dright = PyFloat_AS_DOUBLE(right);
int sign = (dleft > dright) - (dleft < dright);
DEOPT_IF(isnan(dleft), COMPARE_OP);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potentially we could use the fact that nans are not equal to everything including themselves to add a fourth bit to the mask. Probably leave that for another PR as the maths starts getting a bit convoluted.

DEOPT_IF(isnan(dright), COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
NEXTOPARG();
STACK_SHRINK(2);
Py_DECREF(left);
Py_DECREF(right);
assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
int jump = (1 << (sign + 1)) & when_to_jump_mask;
if (!jump) {
next_instr++;
NOTRACE_DISPATCH();
}
else {
JUMPTO(oparg);
CHECK_EVAL_BREAKER();
NOTRACE_DISPATCH();
}
}

TARGET(COMPARE_OP_INT_JUMP) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false)
SpecializedCacheEntry *caches = GET_CACHE();
int when_to_jump_mask = caches[0].adaptive.index;
PyObject *right = TOP();
PyObject *left = SECOND();
DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP);
DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP);
DEOPT_IF((size_t)(Py_SIZE(right) + 1) > 2, COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1);
Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0];
Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0];
int sign = (ileft > iright) - (ileft < iright);
NEXTOPARG();
STACK_SHRINK(2);
Py_DECREF(left);
Py_DECREF(right);
assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
int jump = (1 << (sign + 1)) & when_to_jump_mask;
if (!jump) {
next_instr++;
NOTRACE_DISPATCH();
}
else {
JUMPTO(oparg);
CHECK_EVAL_BREAKER();
NOTRACE_DISPATCH();
}
}

TARGET(COMPARE_OP_STR_JUMP) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false)
SpecializedCacheEntry *caches = GET_CACHE();
int invert = caches[0].adaptive.index;
PyObject *right = TOP();
PyObject *left = SECOND();
DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
int res = _PyUnicode_Equal(left, right);
if (res < 0) {
goto error;
}
assert(caches[0].adaptive.original_oparg == Py_EQ ||
caches[0].adaptive.original_oparg == Py_NE);
NEXTOPARG();
assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
STACK_SHRINK(2);
Py_DECREF(left);
Py_DECREF(right);
assert(res == 0 || res == 1);
assert(invert == 0 || invert == 1);
int jump = res ^ invert;
if (!jump) {
next_instr++;
NOTRACE_DISPATCH();
}
else {
JUMPTO(oparg);
CHECK_EVAL_BREAKER();
NOTRACE_DISPATCH();
}
}

TARGET(IS_OP) {
PyObject *right = POP();
PyObject *left = TOP();
Expand Down Expand Up @@ -4970,6 +5091,7 @@ MISS_WITH_CACHE(LOAD_GLOBAL)
MISS_WITH_CACHE(LOAD_METHOD)
MISS_WITH_CACHE(CALL_FUNCTION)
MISS_WITH_CACHE(BINARY_OP)
MISS_WITH_CACHE(COMPARE_OP)
MISS_WITH_CACHE(BINARY_SUBSCR)
MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)

Expand Down
46 changes: 23 additions & 23 deletions Python/opcode_targets.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading