Skip to content

bpo-46939: Specialize calls to Python classes #31707

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ struct _typeobject {
* by code other than the specializer and interpreter. */
struct _specialization_cache {
PyObject *getitem;
PyObject *init;
};

/* The *real* layout of a type object when allocated on the heap */
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ typedef struct _PyInterpreterFrame {
PyFrameState f_state; /* What state the frame is in */
bool is_entry; // Whether this is the "root" frame for the current _PyCFrame.
bool is_generator;
PyObject *self; /* Borrowed reference used by __init__ frames to return self in RETURN_VALUE */
PyObject *localsplus[1];
} _PyInterpreterFrame;

Expand Down Expand Up @@ -118,6 +119,7 @@ _PyFrame_InitializeSpecials(
frame->f_state = FRAME_CREATED;
frame->is_entry = false;
frame->is_generator = false;
frame->self = NULL;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to reviewers: tied to frame state instead of some cache/call_shape so that subsequent nested calls don't destroy self (and we can identify which frame the self belongs to). Consider the following code:

class Tokenizer:
    def __init__(self):
        self.__next() # Kaboom!
    def __next(self):
        pass

for _ in range(10):
 print(Tokenizer())

}

/* Gets the pointer to the locals array
Expand Down
29 changes: 15 additions & 14 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ def jabs_op(name, op, entries=0):
"PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST",
"PRECALL_BOUND_METHOD",
"PRECALL_PYFUNC",
"PRECALL_PY_CLASS",
"RESUME_QUICK",
"STORE_ATTR_ADAPTIVE",
"STORE_ATTR_INSTANCE_VALUE",
Expand Down
6 changes: 3 additions & 3 deletions Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,7 @@ class C(object): pass
def func():
return sys._getframe()
x = func()
check(x, size('3Pi3c7P2ic??2P'))
check(x, size('3Pi3c7P2ic??3P'))
# function
def func(): pass
check(func, size('14Pi'))
Expand All @@ -1414,7 +1414,7 @@ def bar(cls):
check(bar, size('PP'))
# generator
def get_gen(): yield 1
check(get_gen(), size('P2P4P4c7P2ic??P'))
check(get_gen(), size('P2P4P4c7P2ic??2P'))
# iterator
check(iter('abc'), size('lP'))
# callable-iterator
Expand Down Expand Up @@ -1506,7 +1506,7 @@ def delx(self): del self.__x
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'1P' # Specializer cache
'2P' # Specializer cache
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Calls to Python classes are now specialized. Creating objects from Python
classes should now be faster. Patch by Ken Jin.
73 changes: 73 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1587,6 +1587,11 @@ pop_frame(PyThreadState *tstate, _PyInterpreterFrame *frame)
*/
typedef struct {
PyObject *kwnames;
/* __init__ is special because while it returns None, we need to return self
This tells CALL to pass the current self to the new frame (the __init__ frame).
Where it is eventually consumed by RETURN_VALUE.
*/
bool init_pass_self;
} CallShape;

static inline bool
Expand Down Expand Up @@ -1618,6 +1623,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyCFrame cframe;
CallShape call_shape;
call_shape.kwnames = NULL; // Borrowed reference. Reset by CALL instructions.
call_shape.init_pass_self = 0;

/* WARNING: Because the _PyCFrame lives on the C stack,
* but can be accessed from a heap allocated object (tstate)
Expand Down Expand Up @@ -2386,6 +2392,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(RETURN_VALUE) {
PyObject *retval = POP();
if (frame->self != NULL) {
if (Py_IsNone(retval)) {
Py_SETREF(retval, frame->self);
frame->self = NULL;
}
/* We need this to continue raising errors when bad-practice
__init__s return their non-None values. This is later
caught by the interpreter. */
else {
Py_CLEAR(frame->self);
}
}
assert(EMPTY());
frame->f_state = FRAME_RETURNED;
_PyFrame_SetStackPointer(frame, stack_pointer);
Expand Down Expand Up @@ -4617,6 +4635,44 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DISPATCH();
}

TARGET(PRECALL_PY_CLASS) {
SpecializedCacheEntry *cache = GET_CACHE();
_PyAdaptiveEntry *cache0 = &cache[0].adaptive;
_PyCallCache *cache1 = &cache[-1].call;
int original_oparg = cache->adaptive.original_oparg;
int is_method = (PEEK(original_oparg + 2) != NULL);
DEOPT_IF(is_method, PRECALL);
PyObject *cls = PEEK(original_oparg + 1);
DEOPT_IF(!PyType_Check(cls), PRECALL);
PyTypeObject *cls_t = (PyTypeObject *)cls;
DEOPT_IF(cls_t->tp_version_tag != cache0->version, PRECALL);
assert(cls_t->tp_flags & Py_TPFLAGS_HEAPTYPE);
PyObject *init = ((PyHeapTypeObject *)cls_t)->_spec_cache.init;
assert(PyFunction_Check(init));
DEOPT_IF(((PyFunctionObject *)(init))->func_version != cache1->func_version, PRECALL);
DEOPT_IF(cls_t->tp_new != PyBaseObject_Type.tp_new, PRECALL);
STAT_INC(PRECALL, hit);

PyObject *args = _PyTuple_FromArray(&PEEK(original_oparg), original_oparg);
if (args == NULL) {
goto error;
}
PyObject *self = PyBaseObject_Type.tp_new(cls_t, args, call_shape.kwnames);
Py_DECREF(args);
if (self == NULL) {
goto error;
}
Py_INCREF(init);
PEEK(original_oparg+1) = self;
PEEK(original_oparg+2) = init;
Py_DECREF(cls);

/* For use in RETURN_VALUE later */
assert(call_shape.init_pass_self == false);
call_shape.init_pass_self = true;
DISPATCH();
}

TARGET(KW_NAMES) {
assert(call_shape.kwnames == NULL);
assert(oparg < PyTuple_GET_SIZE(consts));
Expand Down Expand Up @@ -4651,6 +4707,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
cframe.current_frame = frame = new_frame;
if (call_shape.init_pass_self) {
assert(frame->self == NULL);
frame->self = Py_NewRef(frame->localsplus[0]);
call_shape.init_pass_self = false;
}
CALL_STAT_INC(inlined_py_calls);
goto start_frame;
}
Expand Down Expand Up @@ -4762,6 +4823,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
if (call_shape.init_pass_self) {
assert(frame->self == NULL);
frame->self = Py_NewRef(frame->localsplus[0]);
call_shape.init_pass_self = false;
}
goto start_frame;
}

Expand Down Expand Up @@ -4803,6 +4869,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
if (call_shape.init_pass_self) {
assert(frame->self == NULL);
frame->self = Py_NewRef(frame->localsplus[0]);
call_shape.init_pass_self = false;
}
goto start_frame;
}

Expand Down Expand Up @@ -5617,6 +5688,7 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)

error:
call_shape.kwnames = NULL;
call_shape.init_pass_self = false;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to reviewers: We don't set frame->self = NULL here because that means exceptions will destroy self. E.g. consider this:

class A:
  def __init__(self):
    try:
      A.a # Kaboom!
    except AttributeError:
      pass

for _ in range(10):
  print(A())

/* Double-check exception status. */
#ifdef NDEBUG
if (!_PyErr_Occurred(tstate)) {
Expand Down Expand Up @@ -5658,6 +5730,7 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)
assert(STACK_LEVEL() == 0);
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->f_state = FRAME_RAISED;
Py_CLEAR(frame->self);
TRACE_FUNCTION_UNWIND();
DTRACE_FUNCTION_EXIT();
goto exit_unwind;
Expand Down
14 changes: 7 additions & 7 deletions Python/opcode_targets.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 37 additions & 10 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -591,15 +591,16 @@ initial_counter_value(void) {
#define SPEC_FAIL_CALL_BAD_CALL_FLAGS 17
#define SPEC_FAIL_CALL_CLASS 18
#define SPEC_FAIL_CALL_PYTHON_CLASS 19
#define SPEC_FAIL_CALL_METHOD_DESCRIPTOR 20
#define SPEC_FAIL_CALL_BOUND_METHOD 21
#define SPEC_FAIL_CALL_STR 22
#define SPEC_FAIL_CALL_CLASS_NO_VECTORCALL 23
#define SPEC_FAIL_CALL_CLASS_MUTABLE 24
#define SPEC_FAIL_CALL_KWNAMES 25
#define SPEC_FAIL_CALL_METHOD_WRAPPER 26
#define SPEC_FAIL_CALL_OPERATOR_WRAPPER 27
#define SPEC_FAIL_CALL_PYFUNCTION 28
#define SPEC_FAIL_CALL_PYTHON_CLASS_NON_PY_INIT 20
#define SPEC_FAIL_CALL_METHOD_DESCRIPTOR 21
#define SPEC_FAIL_CALL_BOUND_METHOD 22
#define SPEC_FAIL_CALL_STR 23
#define SPEC_FAIL_CALL_CLASS_NO_VECTORCALL 24
#define SPEC_FAIL_CALL_CLASS_MUTABLE 25
#define SPEC_FAIL_CALL_KWNAMES 26
#define SPEC_FAIL_CALL_METHOD_WRAPPER 27
#define SPEC_FAIL_CALL_OPERATOR_WRAPPER 28
#define SPEC_FAIL_CALL_PYFUNCTION 29

/* COMPARE_OP */
#define SPEC_FAIL_COMPARE_OP_DIFFERENT_TYPES 12
Expand Down Expand Up @@ -1523,7 +1524,33 @@ specialize_class_call(
assert(_Py_OPCODE(*instr) == PRECALL_ADAPTIVE);
PyTypeObject *tp = _PyType_CAST(callable);
if (tp->tp_new == PyBaseObject_Type.tp_new) {
SPECIALIZATION_FAIL(PRECALL, SPEC_FAIL_CALL_PYTHON_CLASS);
_PyAdaptiveEntry *cache0 = &cache[0].adaptive;
_PyCallCache *cache1 = &cache[-1].call;
PyObject *descriptor = _PyType_Lookup(tp, &_Py_ID(__init__));
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
return -1;
}
PyFunctionObject *func = (PyFunctionObject *)descriptor;
PyCodeObject *fcode = (PyCodeObject *)func->func_code;
int kind = function_kind(fcode);
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(PRECALL, kind);
return -1;
}
assert(tp->tp_version_tag != 0);
cache0->version = tp->tp_version_tag;
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0 || version != (uint16_t)version) {
SPECIALIZATION_FAIL(PRECALL, SPEC_FAIL_OUT_OF_VERSIONS);
return -1;
}
cache1->func_version = version;
((PyHeapTypeObject *)tp)->_spec_cache.init = descriptor;
*instr = _Py_MAKECODEUNIT(PRECALL_PY_CLASS, _Py_OPARG(*instr));
return 0;
}
SPECIALIZATION_FAIL(PRECALL, SPEC_FAIL_CALL_PYTHON_CLASS_NON_PY_INIT);
return -1;
}
if (tp->tp_flags & Py_TPFLAGS_IMMUTABLETYPE) {
Expand Down