Skip to content

gh-95382: Improve performance of json encoder with indent #118105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
c1cfcf5
Fix docstring of _json.Encoder
eendebakpt Apr 18, 2024
30f2e72
Implement indent for _json
eendebakpt Apr 18, 2024
1da39f3
refactor
eendebakpt Apr 18, 2024
0d85551
update comments
eendebakpt Apr 19, 2024
cc02a13
fix decrefs
eendebakpt Apr 19, 2024
a444701
Apply suggestions from code review
eendebakpt Apr 20, 2024
8d4d48a
Merge branch 'main' into json_c_indent
JelleZijlstra Apr 21, 2024
ed989b8
review comments
eendebakpt Apr 21, 2024
5df567b
whitespace
eendebakpt Apr 21, 2024
5fdc279
whitespace
eendebakpt Apr 21, 2024
1ec550f
Merge branch 'main' into json_c_indent
eendebakpt Apr 21, 2024
5e47a41
address review comments
eendebakpt Apr 22, 2024
eefc508
Merge branch 'main' into json_c_indent
eendebakpt Apr 22, 2024
a7f4bc6
pep7
eendebakpt Apr 22, 2024
35601c7
pep7
eendebakpt Apr 22, 2024
311b7df
Update Modules/_json.c
eendebakpt Apr 24, 2024
ed2c806
Update Modules/_json.c
eendebakpt Apr 24, 2024
2faf554
Merge branch 'main' into json_c_indent
eendebakpt Apr 24, 2024
a407b84
review comments
eendebakpt Apr 25, 2024
ac86ee4
Update Modules/_json.c
eendebakpt Apr 25, 2024
3b55d64
review comments
eendebakpt Apr 25, 2024
bb4ff43
rename newline_indent variables
eendebakpt Apr 25, 2024
9ef9332
code style
eendebakpt Apr 25, 2024
ed029a6
rename variable
eendebakpt Apr 25, 2024
36e3313
Update Modules/_json.c
eendebakpt May 3, 2024
b69d08e
Merge branch 'main' into json_c_indent
eendebakpt May 3, 2024
e78ff6a
📜🤖 Added by blurb_it.
blurb-it[bot] May 3, 2024
5c40126
Update Misc/NEWS.d/next/Core and Builtins/2024-05-03-18-01-26.gh-issu…
eendebakpt May 3, 2024
a43f2f2
update news entry
eendebakpt May 3, 2024
f2b0c06
Merge branch 'main' into json_c_indent
eendebakpt May 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions Lib/json/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,15 +244,18 @@ def floatstr(o, allow_nan=self.allow_nan,
return text


if (_one_shot and c_make_encoder is not None
and self.indent is None):
if self.indent is None or isinstance(self.indent, str):
indent = self.indent
else:
indent = ' ' * self.indent
if _one_shot and c_make_encoder is not None:
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
markers, self.default, _encoder, indent,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
markers, self.default, _encoder, indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot)
return _iterencode(o, 0)
Expand All @@ -272,9 +275,6 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_intstr=int.__repr__,
):

if _indent is not None and not isinstance(_indent, str):
_indent = ' ' * _indent

def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve performance of :func:`json.dumps` and :func:`json.dump` when using the argument *indent*. Depending on the data the encoding using
:func:`json.dumps` with *indent* can be up to 2 to 3 times faster.
136 changes: 96 additions & 40 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self);
static int
encoder_clear(PyEncoderObject *self);
static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level);
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level);
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level);
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
static PyObject *
_encoded_const(PyObject *obj);
static void
Expand Down Expand Up @@ -1251,6 +1251,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return (PyObject *)s;
}

static PyObject *
_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
{
PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
if (newline_indent != NULL && indent_level) {
PyUnicode_AppendAndDel(&newline_indent,
PySequence_Repeat(indent, indent_level));
}
return newline_indent;
}

static PyObject *
encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
{
Expand All @@ -1267,10 +1278,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
_PyUnicodeWriter_Init(&writer);
writer.overallocate = 1;

if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
PyObject *newline_indent = NULL;
if (self->indent != Py_None) {
newline_indent = _create_newline_indent(self->indent, indent_level);
if (newline_indent == NULL) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
}
if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
_PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(newline_indent);
return NULL;
}
Py_XDECREF(newline_indent);

result = PyTuple_New(1);
if (result == NULL ||
Expand Down Expand Up @@ -1358,7 +1379,7 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)

static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *obj, Py_ssize_t indent_level)
PyObject *obj, PyObject *newline_indent)
{
/* Encode Python object obj to a JSON term */
PyObject *newobj;
Expand Down Expand Up @@ -1394,14 +1415,14 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
rv = encoder_listencode_list(s, writer, obj, indent_level);
rv = encoder_listencode_list(s, writer, obj, newline_indent);
_Py_LeaveRecursiveCall();
return rv;
}
else if (PyDict_Check(obj)) {
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
rv = encoder_listencode_dict(s, writer, obj, indent_level);
rv = encoder_listencode_dict(s, writer, obj, newline_indent);
_Py_LeaveRecursiveCall();
return rv;
}
Expand Down Expand Up @@ -1435,7 +1456,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_XDECREF(ident);
return -1;
}
rv = encoder_listencode_obj(s, writer, newobj, indent_level);
rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
_Py_LeaveRecursiveCall();

Py_DECREF(newobj);
Expand All @@ -1456,7 +1477,9 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,

static int
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
PyObject *key, PyObject *value, Py_ssize_t indent_level)
PyObject *key, PyObject *value,
PyObject *newline_indent,
PyObject *item_separator)
{
PyObject *keystr = NULL;
PyObject *encoded;
Expand Down Expand Up @@ -1493,7 +1516,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
*first = false;
}
else {
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
Py_DECREF(keystr);
return -1;
}
Expand All @@ -1511,21 +1534,23 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
return -1;
}
if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
return -1;
}
return 0;
}

static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *dct, Py_ssize_t indent_level)
PyObject *dct, PyObject *newline_indent)
{
/* Encode Python dict dct a JSON term */
PyObject *ident = NULL;
PyObject *items = NULL;
PyObject *key, *value;
bool first = true;
PyObject *new_newline_indent = NULL;
PyObject *separator_indent = NULL;

if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
Expand All @@ -1549,14 +1574,21 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteChar(writer, '{'))
goto bail;

PyObject *current_item_separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) {
/* TODO: DOES NOT RUN */
indent_level += 1;
/*
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
separator = _item_separator + newline_indent
buf += newline_indent
*/
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
if (new_newline_indent == NULL) {
goto bail;
}
separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
if (separator_indent == NULL) {
goto bail;
}
// update item separator with a borrowed reference
current_item_separator = separator_indent;
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
goto bail;
}
}

if (s->sort_keys || !PyDict_CheckExact(dct)) {
Expand All @@ -1574,15 +1606,19 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,

key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1);
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
if (encoder_encode_key_value(s, writer, &first, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail;
}
Py_CLEAR(items);

} else {
Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) {
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
if (encoder_encode_key_value(s, writer, &first, key, value,
new_newline_indent,
current_item_separator) < 0)
goto bail;
}
}
Expand All @@ -1592,29 +1628,36 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
goto bail;
Py_CLEAR(ident);
}
/* TODO DOES NOT RUN; dead code
if (s->indent != Py_None) {
indent_level -= 1;
Py_CLEAR(new_newline_indent);
Py_CLEAR(separator_indent);

if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
goto bail;
}
}

yield '\n' + (' ' * (_indent * _current_indent_level))
}*/
if (_PyUnicodeWriter_WriteChar(writer, '}'))
goto bail;
return 0;

bail:
Py_XDECREF(items);
Py_XDECREF(ident);
Py_XDECREF(separator_indent);
Py_XDECREF(new_newline_indent);
return -1;
}

static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *seq, Py_ssize_t indent_level)
PyObject *seq, PyObject *newline_indent)
{
PyObject *ident = NULL;
PyObject *s_fast = NULL;
Py_ssize_t i;
PyObject *new_newline_indent = NULL;
PyObject *separator_indent = NULL;

ident = NULL;
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
Expand Down Expand Up @@ -1643,22 +1686,31 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,

if (_PyUnicodeWriter_WriteChar(writer, '['))
goto bail;

PyObject *separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) {
/* TODO: DOES NOT RUN */
indent_level += 1;
/*
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
separator = _item_separator + newline_indent
buf += newline_indent
*/
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
if (new_newline_indent == NULL) {
goto bail;
}

if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
goto bail;
}

separator_indent = PyUnicode_Concat(separator, new_newline_indent);
if (separator_indent == NULL) {
goto bail;
}
separator = separator_indent; // assign separator with borrowed reference
}
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
if (i) {
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail;
}
if (encoder_listencode_obj(s, writer, obj, indent_level))
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
goto bail;
}
if (ident != NULL) {
Expand All @@ -1667,12 +1719,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_CLEAR(ident);
}

/* TODO: DOES NOT RUN
if (s->indent != Py_None) {
indent_level -= 1;
Py_CLEAR(new_newline_indent);
Py_CLEAR(separator_indent);
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
goto bail;
}
}

yield '\n' + (' ' * (_indent * _current_indent_level))
}*/
if (_PyUnicodeWriter_WriteChar(writer, ']'))
goto bail;
Py_DECREF(s_fast);
Expand All @@ -1681,6 +1735,8 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
bail:
Py_XDECREF(ident);
Py_DECREF(s_fast);
Py_XDECREF(separator_indent);
Py_XDECREF(new_newline_indent);
return -1;
}

Expand Down Expand Up @@ -1721,7 +1777,7 @@ encoder_clear(PyEncoderObject *self)
return 0;
}

PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent, key_separator, item_separator, sort_keys, skipkeys, allow_nan)");

static PyType_Slot PyEncoderType_slots[] = {
{Py_tp_doc, (void *)encoder_doc},
Expand Down
Loading