Skip to content

Commit 5aa707d

Browse files
authored
[mypyc] Use METH_FASTCALL with "__call__" (#9946)
Allocate a vectorcall function pointer as a struct field for native classes that include `__call__`, including nested functions. This lets us use METH_FASTCALL wrapper functions with `__call__` methods. See https://www.python.org/dev/peps/pep-0590/ for details of why we jump through these hoops. This makes the `nested_func` microbenchmark about 1.5x faster. Follow-up to #9894.
1 parent 5d2ea16 commit 5aa707d

File tree

5 files changed

+43
-11
lines changed

5 files changed

+43
-11
lines changed

mypyc/codegen/emitclass.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
"""Code generation for native classes and related wrappers."""
22

3-
43
from typing import Optional, List, Tuple, Dict, Callable, Mapping, Set
4+
55
from mypy.ordered_dict import OrderedDict
66

7-
from mypyc.common import PREFIX, NATIVE_PREFIX, REG_PREFIX, USE_FASTCALL
7+
from mypyc.common import PREFIX, NATIVE_PREFIX, REG_PREFIX, USE_FASTCALL, USE_VECTORCALL
88
from mypyc.codegen.emit import Emitter, HeaderDeclaration
99
from mypyc.codegen.emitfunc import native_function_header
1010
from mypyc.codegen.emitwrapper import (
@@ -35,7 +35,7 @@ def wrapper_slot(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
3535

3636
SLOT_DEFS = {
3737
'__init__': ('tp_init', lambda c, t, e: generate_init_for_class(c, t, e)),
38-
'__call__': ('tp_call', wrapper_slot),
38+
'__call__': ('tp_call', lambda c, t, e: generate_call_wrapper(c, t, e)),
3939
'__str__': ('tp_str', native_slot),
4040
'__repr__': ('tp_repr', native_slot),
4141
'__next__': ('tp_iternext', native_slot),
@@ -71,6 +71,15 @@ def wrapper_slot(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
7171
}
7272

7373

74+
def generate_call_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
75+
if USE_VECTORCALL:
76+
# Use vectorcall wrapper if supported (PEP 590).
77+
return 'PyVectorcall_Call'
78+
else:
79+
# On older Pythons use the legacy wrapper.
80+
return wrapper_slot(cl, fn, emitter)
81+
82+
7483
def generate_slots(cl: ClassIR, table: SlotTable, emitter: Emitter) -> Dict[str, str]:
7584
fields = OrderedDict() # type: Dict[str, str]
7685
# Sort for determinism on Python 3.5
@@ -241,6 +250,10 @@ def emit_line() -> None:
241250
flags = ['Py_TPFLAGS_DEFAULT', 'Py_TPFLAGS_HEAPTYPE', 'Py_TPFLAGS_BASETYPE']
242251
if generate_full:
243252
flags.append('Py_TPFLAGS_HAVE_GC')
253+
if cl.has_method('__call__') and USE_VECTORCALL:
254+
fields['tp_vectorcall_offset'] = 'offsetof({}, vectorcall)'.format(
255+
cl.struct_name(emitter.names))
256+
flags.append('_Py_TPFLAGS_HAVE_VECTORCALL')
244257
fields['tp_flags'] = ' | '.join(flags)
245258

246259
emitter.emit_line("static PyTypeObject {}_template_ = {{".format(emitter.type_struct_name(cl)))
@@ -277,6 +290,8 @@ def generate_object_struct(cl: ClassIR, emitter: Emitter) -> None:
277290
lines += ['typedef struct {',
278291
'PyObject_HEAD',
279292
'CPyVTableItem *vtable;']
293+
if cl.has_method('__call__') and USE_VECTORCALL:
294+
lines.append('vectorcallfunc vectorcall;')
280295
for base in reversed(cl.base_mro):
281296
if not base.is_trait:
282297
for attr, rtype in base.attributes.items():
@@ -451,6 +466,10 @@ def generate_setup_for_class(cl: ClassIR,
451466
else:
452467
emitter.emit_line('self->vtable = {};'.format(vtable_name))
453468

469+
if cl.has_method('__call__') and USE_VECTORCALL:
470+
name = cl.method_decl('__call__').cname(emitter.names)
471+
emitter.emit_line('self->vectorcall = {}{};'.format(PREFIX, name))
472+
454473
for base in reversed(cl.base_mro):
455474
for attr, rtype in base.attributes.items():
456475
emitter.emit_line('self->{} = {};'.format(

mypyc/codegen/emitmodule.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from mypyc.irbuild.mapper import Mapper
2525
from mypyc.common import (
2626
PREFIX, TOP_LEVEL_NAME, INT_PREFIX, MODULE_PREFIX, RUNTIME_C_FILES, USE_FASTCALL,
27-
shared_lib_name,
27+
USE_VECTORCALL, shared_lib_name,
2828
)
2929
from mypyc.codegen.cstring import encode_as_c_string, encode_bytes_as_c_string
3030
from mypyc.codegen.emit import EmitterContext, Emitter, HeaderDeclaration
@@ -1071,5 +1071,10 @@ def visit(item: T) -> None:
10711071

10721072

10731073
def is_fastcall_supported(fn: FuncIR) -> bool:
1074-
# TODO: Support METH_FASTCALL for all methods.
1075-
return USE_FASTCALL and (fn.class_name is None or fn.name not in ('__init__', '__call__'))
1074+
if fn.class_name is not None:
1075+
if fn.name == '__call__':
1076+
# We can use vectorcalls (PEP 590) when supported
1077+
return USE_VECTORCALL
1078+
# TODO: Support fastcall for __init__.
1079+
return USE_FASTCALL and fn.name != '__init__'
1080+
return USE_FASTCALL

mypyc/codegen/emitwrapper.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from mypy.nodes import ARG_POS, ARG_OPT, ARG_NAMED_OPT, ARG_NAMED, ARG_STAR, ARG_STAR2
1616

17-
from mypyc.common import PREFIX, NATIVE_PREFIX, DUNDER_PREFIX
17+
from mypyc.common import PREFIX, NATIVE_PREFIX, DUNDER_PREFIX, USE_VECTORCALL
1818
from mypyc.codegen.emit import Emitter
1919
from mypyc.ir.rtypes import (
2020
RType, is_object_rprimitive, is_int_rprimitive, is_bool_rprimitive, object_rprimitive
@@ -157,9 +157,13 @@ def generate_wrapper_function(fn: FuncIR,
157157
arg_ptrs += ['&obj_{}'.format(groups[ARG_STAR2][0].name) if groups[ARG_STAR2] else 'NULL']
158158
arg_ptrs += ['&obj_{}'.format(arg.name) for arg in reordered_args]
159159

160+
if fn.name == '__call__' and USE_VECTORCALL:
161+
nargs = 'PyVectorcall_NARGS(nargs)'
162+
else:
163+
nargs = 'nargs'
160164
emitter.emit_lines(
161-
'if (!CPyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser{})) {{'.format(
162-
''.join(', ' + n for n in arg_ptrs)),
165+
'if (!CPyArg_ParseStackAndKeywords(args, {}, kwnames, &parser{})) {{'.format(
166+
nargs, ''.join(', ' + n for n in arg_ptrs)),
163167
'return NULL;',
164168
'}')
165169
traceback_code = generate_traceback_code(fn, emitter, source_path, module_name)

mypyc/common.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,11 @@
5050
MAX_LITERAL_SHORT_INT = (sys.maxsize >> 1 if not IS_MIXED_32_64_BIT_BUILD
5151
else 2**30 - 1) # type: Final
5252

53-
# We can use faster wrapper functions on Python 3.7+ (fastcall/vectorcall).
54-
USE_FASTCALL = sys.version_info >= (3, 7)
53+
# We can use METH_FASTCALL faster wrapper functions on Python 3.7+.
54+
USE_FASTCALL = sys.version_info >= (3, 7) # type: Final
55+
56+
# We can use vectorcalls on Python 3.8+ (PEP 590).
57+
USE_VECTORCALL = sys.version_info >= (3, 8) # type: Final
5558

5659
# Runtime C library files
5760
RUNTIME_C_FILES = [

mypyc/irbuild/callable_class.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ def add_call_to_callable_class(builder: IRBuilder,
9797
call_fn_ir = FuncIR(call_fn_decl, args, blocks,
9898
fn_info.fitem.line, traceback_name=fn_info.fitem.name)
9999
fn_info.callable_class.ir.methods['__call__'] = call_fn_ir
100+
fn_info.callable_class.ir.method_decls['__call__'] = call_fn_decl
100101
return call_fn_ir
101102

102103

0 commit comments

Comments
 (0)