From c7b5cc26fb5dc84bcbd3603cef9289df1a6e7366 Mon Sep 17 00:00:00 2001 From: Aivars Kalvans Date: Wed, 27 Jul 2022 23:09:58 +0300 Subject: [PATCH 1/9] Fastpath for encoding dict to JSON When sorting of keys is not requsted and we are encoding a dict use PyDict_Next to iterate over keys and values. Leave the old path with PyMapping_Items that creates a list of key-value tuples for cases when sorting is requested. Don't use mapping items and iterator: we check and know we are using a dict and list. --- Modules/_json.c | 162 +++++++++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 78 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 7ea84efdb911bd..d25ec2c99d068b 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -12,6 +12,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() #include "structmember.h" // PyMemberDef +#include // bool typedef struct _PyScannerObject { @@ -1491,17 +1492,73 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, } } +static int +encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, + PyObject *key, PyObject *value, Py_ssize_t indent_level) +{ + PyObject *keystr = NULL; + PyObject *encoded; + + if (PyUnicode_Check(key)) { + Py_INCREF(key); + keystr = key; + } + else if (PyFloat_Check(key)) { + keystr = encoder_encode_float(s, key); + } + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyLong_Check because + True and False are also 1 and 0.*/ + keystr = _encoded_const(key); + } + else if (PyLong_Check(key)) { + keystr = PyLong_Type.tp_repr(key); + } + else if (s->skipkeys) { + return 0; + } + else { + PyErr_Format(PyExc_TypeError, + "keys must be str, int, float, bool or None, " + "not %.100s", Py_TYPE(key)->tp_name); + return -1; + } + + if (keystr == NULL) + return -1; + + if (*first) { + *first = false; + } else { + if (_PyUnicodeWriter_WriteStr(writer, s->item_separator)) { + Py_DECREF(keystr); + return -1; + } + } + + encoded = encoder_encode_string(s, keystr); + Py_DECREF(keystr); + if (encoded == NULL) + return -1; + + if (_steal_accumulate(writer, encoded) < 0) + return -1; + if (_PyUnicodeWriter_WriteStr(writer, s->key_separator)) + return -1; + if (encoder_listencode_obj(s, writer, value, indent_level)) + return -1; + return 0; +} + static int encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level) { /* Encode Python dict dct a JSON term */ - PyObject *kstr = NULL; PyObject *ident = NULL; - PyObject *it = NULL; - PyObject *items; - PyObject *item = NULL; - Py_ssize_t idx; + PyObject *items = NULL; + PyObject *key, *value; + bool first = true; if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); @@ -1535,84 +1592,35 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, */ } - items = PyMapping_Items(dct); - if (items == NULL) - goto bail; - if (s->sort_keys && PyList_Sort(items) < 0) { - Py_DECREF(items); - goto bail; - } - it = PyObject_GetIter(items); - Py_DECREF(items); - if (it == NULL) - goto bail; - idx = 0; - while ((item = PyIter_Next(it)) != NULL) { - PyObject *encoded, *key, *value; - if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { - PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + if (s->sort_keys) { + Py_ssize_t i; + + items = PyDict_Items(dct); + if (items == NULL || PyList_Sort(items) < 0) goto bail; - } - key = PyTuple_GET_ITEM(item, 0); - if (PyUnicode_Check(key)) { - Py_INCREF(key); - kstr = key; - } - else if (PyFloat_Check(key)) { - kstr = encoder_encode_float(s, key); - if (kstr == NULL) - goto bail; - } - else if (key == Py_True || key == Py_False || key == Py_None) { - /* This must come before the PyLong_Check because - True and False are also 1 and 0.*/ - kstr = _encoded_const(key); - if (kstr == NULL) - goto bail; - } - else if (PyLong_Check(key)) { - kstr = PyLong_Type.tp_repr(key); - if (kstr == NULL) { + + for (i = 0; i < PyList_GET_SIZE(items); i++) { + PyObject *item = PyList_GET_ITEM(items, i); + + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); goto bail; } - } - else if (s->skipkeys) { - Py_DECREF(item); - continue; - } - else { - PyErr_Format(PyExc_TypeError, - "keys must be str, int, float, bool or None, " - "not %.100s", Py_TYPE(key)->tp_name); - goto bail; - } - if (idx) { - if (_PyUnicodeWriter_WriteStr(writer, s->item_separator)) + key = PyTuple_GET_ITEM(item, 0); + value = PyTuple_GET_ITEM(item, 1); + if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0) goto bail; } + Py_CLEAR(items); - encoded = encoder_encode_string(s, kstr); - Py_CLEAR(kstr); - if (encoded == NULL) - goto bail; - if (_PyUnicodeWriter_WriteStr(writer, encoded)) { - Py_DECREF(encoded); - goto bail; + } else { + Py_ssize_t pos = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0) + goto bail; } - Py_DECREF(encoded); - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator)) - goto bail; - - value = PyTuple_GET_ITEM(item, 1); - if (encoder_listencode_obj(s, writer, value, indent_level)) - goto bail; - idx += 1; - Py_DECREF(item); } - if (PyErr_Occurred()) - goto bail; - Py_CLEAR(it); if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) @@ -1625,19 +1633,17 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, yield '\n' + (' ' * (_indent * _current_indent_level)) }*/ + if (_PyUnicodeWriter_WriteChar(writer, '}')) goto bail; return 0; bail: - Py_XDECREF(it); - Py_XDECREF(item); - Py_XDECREF(kstr); + Py_XDECREF(items); Py_XDECREF(ident); return -1; } - static int encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level) From 9137b1ba8d0675ed53a802c43a1c88e95625f6b5 Mon Sep 17 00:00:00 2001 From: Aivars Kalvans Date: Thu, 28 Jul 2022 17:14:46 +0300 Subject: [PATCH 2/9] NEWS --- .../next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst diff --git a/Misc/NEWS.d/next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst b/Misc/NEWS.d/next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst new file mode 100644 index 00000000000000..89fa9c2b27664d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst @@ -0,0 +1 @@ +Faster ``json.dumps()`` when sorting of keys is not requested (default). From 751e042c0b7d65dd438425e73b6e79994ead7bc1 Mon Sep 17 00:00:00 2001 From: Aivars Kalvans Date: Wed, 3 Aug 2022 20:34:10 +0300 Subject: [PATCH 3/9] explicit checks --- Modules/_json.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index d25ec2c99d068b..08e46a99d3d75d 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1543,9 +1543,9 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir if (_steal_accumulate(writer, encoded) < 0) return -1; - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator)) + if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) return -1; - if (encoder_listencode_obj(s, writer, value, indent_level)) + if (encoder_listencode_obj(s, writer, value, indent_level) < 0) return -1; return 0; } From 8ae1d21876974ed9c1a472f6132f47e51310c3ca Mon Sep 17 00:00:00 2001 From: Aivars Kalvans Date: Thu, 4 Aug 2022 12:36:40 +0300 Subject: [PATCH 4/9] smaller diff --- Modules/_json.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index 08e46a99d3d75d..557f9cd845e1f6 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1595,7 +1595,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, if (s->sort_keys) { Py_ssize_t i; - items = PyDict_Items(dct); + items = PyMapping_Items(dct); if (items == NULL || PyList_Sort(items) < 0) goto bail; From 767729545f64def55b154099879348a8acc0be61 Mon Sep 17 00:00:00 2001 From: Aivars Kalvans Date: Fri, 5 Aug 2022 09:35:27 +0300 Subject: [PATCH 5/9] Use PyDict_Items --- Modules/_json.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index 557f9cd845e1f6..08e46a99d3d75d 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1595,7 +1595,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, if (s->sort_keys) { Py_ssize_t i; - items = PyMapping_Items(dct); + items = PyDict_Items(dct); if (items == NULL || PyList_Sort(items) < 0) goto bail; From a21f77af310136d8a0ddd699dd45faf9a480da51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aivars=20Kalv=C4=81ns?= Date: Fri, 5 Aug 2022 23:03:07 +0300 Subject: [PATCH 6/9] Update Modules/_json.c Co-authored-by: Dong-hee Na --- Modules/_json.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index 08e46a99d3d75d..0fd545688b019b 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1633,7 +1633,6 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, yield '\n' + (' ' * (_indent * _current_indent_level)) }*/ - if (_PyUnicodeWriter_WriteChar(writer, '}')) goto bail; return 0; From 4734d70eac3909d321b4cc655aa533b7978082d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aivars=20Kalv=C4=81ns?= Date: Fri, 5 Aug 2022 23:03:23 +0300 Subject: [PATCH 7/9] Update Modules/_json.c Co-authored-by: Dong-hee Na --- Modules/_json.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index 0fd545688b019b..cdaac31c02c595 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1593,7 +1593,6 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } if (s->sort_keys) { - Py_ssize_t i; items = PyDict_Items(dct); if (items == NULL || PyList_Sort(items) < 0) From 79e68801834855cb2a9be7687881f199da346233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aivars=20Kalv=C4=81ns?= Date: Fri, 5 Aug 2022 23:03:34 +0300 Subject: [PATCH 8/9] Update Modules/_json.c Co-authored-by: Dong-hee Na --- Modules/_json.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index cdaac31c02c595..a295b206648f93 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1598,7 +1598,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, if (items == NULL || PyList_Sort(items) < 0) goto bail; - for (i = 0; i < PyList_GET_SIZE(items); i++) { + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { PyObject *item = PyList_GET_ITEM(items, i); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { From fda2ee70d884079bf3dcd6fc8371bb22e0644469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aivars=20Kalv=C4=81ns?= Date: Fri, 5 Aug 2022 23:03:59 +0300 Subject: [PATCH 9/9] Update Modules/_json.c Co-authored-by: Dong-hee Na --- Modules/_json.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index a295b206648f93..1c39b46937d792 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1524,13 +1524,15 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir return -1; } - if (keystr == NULL) + if (keystr == NULL) { return -1; + } if (*first) { *first = false; - } else { - if (_PyUnicodeWriter_WriteStr(writer, s->item_separator)) { + } + else { + if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1538,15 +1540,19 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir encoded = encoder_encode_string(s, keystr); Py_DECREF(keystr); - if (encoded == NULL) + if (encoded == NULL) { return -1; + } - if (_steal_accumulate(writer, encoded) < 0) + if (_steal_accumulate(writer, encoded) < 0) { return -1; - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) + } + if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; - if (encoder_listencode_obj(s, writer, value, indent_level) < 0) + } + if (encoder_listencode_obj(s, writer, value, indent_level) < 0) { return -1; + } return 0; }