Skip to content

Commit 7c1f2a6

Browse files
committed
Simplify and speed up interpreter for f-strings. Split FORMAT_VALUE opcode into CONVERT_VALUE, FORMAT_SIMPLE and FORMAT_WITH_SPEC.
Compiler can then emit optimal sequence for each format expression.
1 parent 7ecd342 commit 7c1f2a6

File tree

13 files changed

+5022
-5006
lines changed

13 files changed

+5022
-5006
lines changed

Include/opcode.h

Lines changed: 20 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/dis.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
_have_code = (types.MethodType, types.FunctionType, types.CodeType,
1717
classmethod, staticmethod, type)
1818

19-
FORMAT_VALUE = opmap['FORMAT_VALUE']
19+
CONVERT_VALUE = opmap['CONVERT_VALUE']
2020
FORMAT_VALUE_CONVERTERS = (
2121
(None, ''),
2222
(str, 'str'),
@@ -415,13 +415,9 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
415415
elif op in hascompare:
416416
argval = cmp_op[arg]
417417
argrepr = argval
418-
elif op == FORMAT_VALUE:
419-
argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
420-
argval = (argval, bool(arg & 0x4))
421-
if argval[1]:
422-
if argrepr:
423-
argrepr += ', '
424-
argrepr += 'with format'
418+
elif op == CONVERT_VALUE:
419+
argval = (None, str, repr, ascii)[arg]
420+
argrepr = ('', 'str', 'repr', 'ascii')[arg]
425421
elif op == MAKE_FUNCTION:
426422
argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS)
427423
if arg & (1<<i))

Lib/importlib/_bootstrap_external.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ def _write_atomic(path, data, mode=0o666):
330330
# this might affected the first line number #32911)
331331
# Python 3.8a1 3400 (move frame block handling to compiler #17611)
332332
# Python 3.8a1 3401 (add END_ASYNC_FOR #33041)
333+
# Python 3.8a1 3402 (more efficient bytecodes for f-strings #33092)
333334
# Python 3.8a1 3410 (PEP570 Python Positional-Only Parameters #36540)
334335
# Python 3.8b2 3411 (Reverse evaluation order of key: value in dict
335336
# comprehensions #35224)

Lib/opcode.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,11 @@ def jabs_op(name, op):
9090

9191
def_op('POP_EXCEPT_AND_RERAISE', 37)
9292

93+
def_op('FORMAT_SIMPLE', 40)
94+
def_op('FORMAT_WITH_SPEC', 41)
95+
9396
def_op('WITH_EXCEPT_START', 49)
97+
9498
def_op('GET_AITER', 50)
9599
def_op('GET_ANEXT', 51)
96100
def_op('BEFORE_ASYNC_WITH', 52)
@@ -202,10 +206,11 @@ def jabs_op(name, op):
202206
def_op('MAP_ADD', 147)
203207
def_op('LOAD_CLASSDEREF', 148)
204208
hasfree.append(148)
209+
def_op('CONVERT_VALUE', 149)
205210

206211
def_op('MATCH_CLASS', 152)
207212

208-
def_op('FORMAT_VALUE', 155)
213+
209214
def_op('BUILD_CONST_KEY_MAP', 156)
210215
def_op('BUILD_STRING', 157)
211216

Lib/test/test_dis.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -334,20 +334,22 @@ def _fstring(a, b, c, d):
334334

335335
dis_fstring = """\
336336
%3d 0 LOAD_FAST 0 (a)
337-
2 FORMAT_VALUE 0
337+
2 FORMAT_SIMPLE
338338
4 LOAD_CONST 1 (' ')
339339
6 LOAD_FAST 1 (b)
340340
8 LOAD_CONST 2 ('4')
341-
10 FORMAT_VALUE 4 (with format)
341+
10 FORMAT_WITH_SPEC
342342
12 LOAD_CONST 1 (' ')
343343
14 LOAD_FAST 2 (c)
344-
16 FORMAT_VALUE 2 (repr)
345-
18 LOAD_CONST 1 (' ')
346-
20 LOAD_FAST 3 (d)
347-
22 LOAD_CONST 2 ('4')
348-
24 FORMAT_VALUE 6 (repr, with format)
349-
26 BUILD_STRING 7
350-
28 RETURN_VALUE
344+
16 CONVERT_VALUE 2 (repr)
345+
18 FORMAT_SIMPLE
346+
20 LOAD_CONST 1 (' ')
347+
22 LOAD_FAST 3 (d)
348+
24 CONVERT_VALUE 2 (repr)
349+
26 LOAD_CONST 2 ('4')
350+
28 FORMAT_WITH_SPEC
351+
30 BUILD_STRING 7
352+
32 RETURN_VALUE
351353
""" % (_fstring.__code__.co_firstlineno + 1,)
352354

353355
def _tryfinally(a, b):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Simplify and speed up interpreter for f-strings. Removes ``FORMAT_VALUE``
2+
opcode. Add ``CONVERT_VALUE``, ``FORMAT_SIMPLE`` and ``FORMAT_WITH_SPEC`` opcode.
3+
Compiler emits more efficient sequence for each format expression.

Programs/test_frozenmain.h

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/ceval.c

Lines changed: 41 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,14 @@ static void format_kwargs_error(PyThreadState *, PyObject *func, PyObject *kwarg
9999
static void format_awaitable_error(PyThreadState *, PyTypeObject *, int, int);
100100
static int get_exception_handler(PyCodeObject *, int, int*, int*, int*);
101101

102+
typedef PyObject *(*convertion_func_ptr)(PyObject *);
103+
104+
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
105+
[FVC_STR] = PyObject_Str,
106+
[FVC_REPR] = PyObject_Repr,
107+
[FVC_ASCII] = PyObject_ASCII
108+
};
109+
102110
#define NAME_ERROR_MSG \
103111
"name '%.200s' is not defined"
104112
#define UNBOUNDLOCAL_ERROR_MSG \
@@ -4346,7 +4354,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
43464354
oparg = cache->adaptive.original_oparg;
43474355
STAT_DEC(LOAD_METHOD, unquickened);
43484356
JUMP_TO_INSTRUCTION(LOAD_METHOD);
4349-
}
4357+
}
43504358
}
43514359

43524360
TARGET(LOAD_METHOD_CACHED): {
@@ -4364,7 +4372,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
43644372
assert(cache1->tp_version != 0);
43654373
assert(self_cls->tp_dictoffset >= 0);
43664374
assert(Py_TYPE(self_cls)->tp_dictoffset > 0);
4367-
4375+
43684376
// inline version of _PyObject_GetDictPtr for offset >= 0
43694377
PyObject *dict = self_cls->tp_dictoffset != 0 ?
43704378
*(PyObject **) ((char *)self + self_cls->tp_dictoffset) : NULL;
@@ -4627,62 +4635,47 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
46274635
DISPATCH();
46284636
}
46294637

4630-
TARGET(FORMAT_VALUE): {
4631-
/* Handles f-string value formatting. */
4638+
TARGET(CONVERT_VALUE): {
46324639
PyObject *result;
4633-
PyObject *fmt_spec;
4634-
PyObject *value;
4635-
PyObject *(*conv_fn)(PyObject *);
4636-
int which_conversion = oparg & FVC_MASK;
4637-
int have_fmt_spec = (oparg & FVS_MASK) == FVS_HAVE_SPEC;
4638-
4639-
fmt_spec = have_fmt_spec ? POP() : NULL;
4640-
value = POP();
4641-
4642-
/* See if any conversion is specified. */
4643-
switch (which_conversion) {
4644-
case FVC_NONE: conv_fn = NULL; break;
4645-
case FVC_STR: conv_fn = PyObject_Str; break;
4646-
case FVC_REPR: conv_fn = PyObject_Repr; break;
4647-
case FVC_ASCII: conv_fn = PyObject_ASCII; break;
4648-
default:
4649-
_PyErr_Format(tstate, PyExc_SystemError,
4650-
"unexpected conversion flag %d",
4651-
which_conversion);
4640+
PyObject *value = POP();
4641+
convertion_func_ptr conv_fn;
4642+
assert(oparg >= FVC_STR && oparg <= FVC_ASCII);
4643+
conv_fn = CONVERSION_FUNCTIONS[oparg];
4644+
result = conv_fn(value);
4645+
Py_DECREF(value);
4646+
if (result == NULL) {
46524647
goto error;
46534648
}
4649+
PUSH(result);
4650+
DISPATCH();
4651+
}
46544652

4655-
/* If there's a conversion function, call it and replace
4656-
value with that result. Otherwise, just use value,
4657-
without conversion. */
4658-
if (conv_fn != NULL) {
4659-
result = conv_fn(value);
4660-
Py_DECREF(value);
4653+
TARGET(FORMAT_SIMPLE): {
4654+
PyObject *value = TOP();
4655+
/* If value is a unicode object, then we know the result
4656+
* of format(value) is value itself. */
4657+
if (!PyUnicode_CheckExact(value)) {
4658+
PyObject *result = PyObject_Format(value, NULL);
46614659
if (result == NULL) {
4662-
Py_XDECREF(fmt_spec);
46634660
goto error;
46644661
}
4665-
value = result;
4666-
}
4667-
4668-
/* If value is a unicode object, and there's no fmt_spec,
4669-
then we know the result of format(value) is value
4670-
itself. In that case, skip calling format(). I plan to
4671-
move this optimization in to PyObject_Format()
4672-
itself. */
4673-
if (PyUnicode_CheckExact(value) && fmt_spec == NULL) {
4674-
/* Do nothing, just transfer ownership to result. */
4675-
result = value;
4676-
} else {
4677-
/* Actually call format(). */
4678-
result = PyObject_Format(value, fmt_spec);
4662+
SET_TOP(result);
46794663
Py_DECREF(value);
4680-
Py_XDECREF(fmt_spec);
4681-
if (result == NULL) {
4682-
goto error;
4683-
}
46844664
}
4665+
DISPATCH();
4666+
}
46854667

4668+
TARGET(FORMAT_WITH_SPEC): {
4669+
PyObject *fmt_spec = POP();
4670+
PyObject *value = POP();
4671+
PyObject *result;
4672+
/* Call format(). */
4673+
result = PyObject_Format(value, fmt_spec);
4674+
Py_DECREF(value);
4675+
Py_DECREF(fmt_spec);
4676+
if (result == NULL) {
4677+
goto error;
4678+
}
46864679
PUSH(result);
46874680
DISPATCH();
46884681
}

Python/compile.c

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,10 +1231,12 @@ stack_effect(int opcode, int oparg, int jump)
12311231
return 0;
12321232
case END_ASYNC_FOR:
12331233
return -4;
1234-
case FORMAT_VALUE:
1235-
/* If there's a fmt_spec on the stack, we go from 2->1,
1236-
else 1->1. */
1237-
return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
1234+
case CONVERT_VALUE:
1235+
return 0;
1236+
case FORMAT_WITH_SPEC:
1237+
return -1;
1238+
case FORMAT_SIMPLE:
1239+
return 0;
12381240
case LOAD_METHOD:
12391241
return 1;
12401242
case LOAD_ASSERTION_ERROR:
@@ -4486,26 +4488,26 @@ compiler_formatted_value(struct compiler *c, expr_ty e)
44864488

44874489
/* The expression to be formatted. */
44884490
VISIT(c, expr, e->v.FormattedValue.value);
4489-
4490-
switch (conversion) {
4491-
case 's': oparg = FVC_STR; break;
4492-
case 'r': oparg = FVC_REPR; break;
4493-
case 'a': oparg = FVC_ASCII; break;
4494-
case -1: oparg = FVC_NONE; break;
4495-
default:
4491+
if (e->v.FormattedValue.conversion != -1) {
4492+
switch (e->v.FormattedValue.conversion) {
4493+
case 's': oparg = FVC_STR; break;
4494+
case 'r': oparg = FVC_REPR; break;
4495+
case 'a': oparg = FVC_ASCII; break;
4496+
default:
44964497
PyErr_Format(PyExc_SystemError,
44974498
"Unrecognized conversion character %d", conversion);
4498-
return 0;
4499+
return 0;
4500+
}
4501+
ADDOP_I(c, CONVERT_VALUE, oparg);
44994502
}
45004503
if (e->v.FormattedValue.format_spec) {
4501-
/* Evaluate the format spec, and update our opcode arg. */
4504+
/* Evaluate the format spec, and emit format opcode. */
45024505
VISIT(c, expr, e->v.FormattedValue.format_spec);
4503-
oparg |= FVS_HAVE_SPEC;
4506+
ADDOP(c, FORMAT_WITH_SPEC);
4507+
} else {
4508+
ADDOP(c, FORMAT_SIMPLE);
45044509
}
45054510

4506-
/* And push our opcode and oparg */
4507-
ADDOP_I(c, FORMAT_VALUE, oparg);
4508-
45094511
return 1;
45104512
}
45114513

0 commit comments

Comments
 (0)