Skip to content

Commit 2e915fd

Browse files
pythongh-95382: Use cache for indentations in the JSON encoder
1 parent 05adfbb commit 2e915fd

File tree

1 file changed

+106
-68
lines changed

1 file changed

+106
-68
lines changed

Modules/_json.c

Lines changed: 106 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self);
8585
static int
8686
encoder_clear(PyEncoderObject *self);
8787
static int
88-
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
88+
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache);
8989
static int
90-
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
90+
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache);
9191
static int
92-
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
92+
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache);
9393
static PyObject *
9494
_encoded_const(PyObject *obj);
9595
static void
@@ -1252,14 +1252,81 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12521252
}
12531253

12541254
static PyObject *
1255-
_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
1255+
_create_indent_cache(PyEncoderObject *s, Py_ssize_t indent_level)
12561256
{
12571257
PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
12581258
if (newline_indent != NULL && indent_level) {
12591259
PyUnicode_AppendAndDel(&newline_indent,
1260-
PySequence_Repeat(indent, indent_level));
1260+
PySequence_Repeat(s->indent, indent_level));
12611261
}
1262-
return newline_indent;
1262+
if (newline_indent == NULL) {
1263+
return NULL;
1264+
}
1265+
PyObject *indent_cache = PyList_New(2);
1266+
if (indent_cache == NULL) {
1267+
Py_XDECREF(newline_indent);
1268+
return NULL;
1269+
}
1270+
PyList_SET_ITEM(indent_cache, 0, newline_indent);
1271+
PyList_SET_ITEM(indent_cache, 1, Py_NewRef(Py_None)); // not used
1272+
return indent_cache;
1273+
}
1274+
1275+
static int
1276+
update_newline_indent(PyEncoderObject *s,
1277+
Py_ssize_t indent_level, PyObject *indent_cache)
1278+
{
1279+
assert(indent_level * 2 == PyList_GET_SIZE(indent_cache));
1280+
assert(indent_level > 0);
1281+
PyObject *newline_indent = PyList_GET_ITEM(indent_cache, (indent_level - 1)*2);
1282+
newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1283+
if (newline_indent == NULL) {
1284+
return -1;
1285+
}
1286+
if (PyList_Append(indent_cache, newline_indent) < 0) {
1287+
Py_DECREF(newline_indent);
1288+
return -1;
1289+
}
1290+
PyObject *separator_indent = PyUnicode_Concat(s->item_separator, newline_indent);
1291+
Py_DECREF(newline_indent);
1292+
if (PyList_Append(indent_cache, separator_indent) < 0) {
1293+
Py_DECREF(separator_indent);
1294+
return -1;
1295+
}
1296+
Py_DECREF(separator_indent);
1297+
return 0;
1298+
}
1299+
1300+
static PyObject *
1301+
do_indent(PyEncoderObject *s, _PyUnicodeWriter *writer,
1302+
Py_ssize_t indent_level, PyObject *indent_cache)
1303+
{
1304+
assert(indent_level > 0);
1305+
assert(s->indent != Py_None);
1306+
PyObject *newline_indent;
1307+
if (indent_level * 2 == PyList_GET_SIZE(indent_cache)) {
1308+
if (update_newline_indent(s, indent_level, indent_cache) < 0) {
1309+
return NULL;
1310+
}
1311+
}
1312+
assert(indent_level * 2 <= PyList_GET_SIZE(indent_cache) - 2);
1313+
1314+
newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2);
1315+
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1316+
return NULL;
1317+
}
1318+
return PyList_GET_ITEM(indent_cache, indent_level * 2 + 1);
1319+
}
1320+
1321+
static int
1322+
do_dedent(PyEncoderObject *s, _PyUnicodeWriter *writer,
1323+
Py_ssize_t indent_level, PyObject *indent_cache)
1324+
{
1325+
assert(indent_level >= 0);
1326+
assert(indent_level * 2 <= PyList_GET_SIZE(indent_cache) - 4);
1327+
assert(s->indent != Py_None);
1328+
PyObject *newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2);
1329+
return _PyUnicodeWriter_WriteStr(writer, newline_indent);
12631330
}
12641331

12651332
static PyObject *
@@ -1278,20 +1345,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
12781345
_PyUnicodeWriter_Init(&writer);
12791346
writer.overallocate = 1;
12801347

1281-
PyObject *newline_indent = NULL;
1348+
PyObject *indent_cache = NULL;
12821349
if (self->indent != Py_None) {
1283-
newline_indent = _create_newline_indent(self->indent, indent_level);
1284-
if (newline_indent == NULL) {
1350+
indent_cache = _create_indent_cache(self, indent_level);
1351+
if (indent_cache == NULL) {
12851352
_PyUnicodeWriter_Dealloc(&writer);
12861353
return NULL;
12871354
}
12881355
}
1289-
if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
1356+
if (encoder_listencode_obj(self, &writer, obj, 0, indent_cache)) {
12901357
_PyUnicodeWriter_Dealloc(&writer);
1291-
Py_XDECREF(newline_indent);
1358+
Py_XDECREF(indent_cache);
12921359
return NULL;
12931360
}
1294-
Py_XDECREF(newline_indent);
1361+
Py_XDECREF(indent_cache);
12951362

12961363
result = PyTuple_New(1);
12971364
if (result == NULL ||
@@ -1379,7 +1446,8 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
13791446

13801447
static int
13811448
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
1382-
PyObject *obj, PyObject *newline_indent)
1449+
PyObject *obj,
1450+
Py_ssize_t indent_level, PyObject *indent_cache)
13831451
{
13841452
/* Encode Python object obj to a JSON term */
13851453
PyObject *newobj;
@@ -1415,14 +1483,14 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
14151483
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
14161484
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
14171485
return -1;
1418-
rv = encoder_listencode_list(s, writer, obj, newline_indent);
1486+
rv = encoder_listencode_list(s, writer, obj, indent_level, indent_cache);
14191487
_Py_LeaveRecursiveCall();
14201488
return rv;
14211489
}
14221490
else if (PyDict_Check(obj)) {
14231491
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
14241492
return -1;
1425-
rv = encoder_listencode_dict(s, writer, obj, newline_indent);
1493+
rv = encoder_listencode_dict(s, writer, obj, indent_level, indent_cache);
14261494
_Py_LeaveRecursiveCall();
14271495
return rv;
14281496
}
@@ -1456,7 +1524,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
14561524
Py_XDECREF(ident);
14571525
return -1;
14581526
}
1459-
rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
1527+
rv = encoder_listencode_obj(s, writer, newobj, indent_level, indent_cache);
14601528
_Py_LeaveRecursiveCall();
14611529

14621530
Py_DECREF(newobj);
@@ -1478,7 +1546,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
14781546
static int
14791547
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
14801548
PyObject *key, PyObject *value,
1481-
PyObject *newline_indent,
1549+
Py_ssize_t indent_level, PyObject *indent_cache,
14821550
PyObject *item_separator)
14831551
{
14841552
PyObject *keystr = NULL;
@@ -1534,23 +1602,22 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
15341602
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
15351603
return -1;
15361604
}
1537-
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
1605+
if (encoder_listencode_obj(s, writer, value, indent_level, indent_cache) < 0) {
15381606
return -1;
15391607
}
15401608
return 0;
15411609
}
15421610

15431611
static int
15441612
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
1545-
PyObject *dct, PyObject *newline_indent)
1613+
PyObject *dct,
1614+
Py_ssize_t indent_level, PyObject *indent_cache)
15461615
{
15471616
/* Encode Python dict dct a JSON term */
15481617
PyObject *ident = NULL;
15491618
PyObject *items = NULL;
15501619
PyObject *key, *value;
15511620
bool first = true;
1552-
PyObject *new_newline_indent = NULL;
1553-
PyObject *separator_indent = NULL;
15541621

15551622
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
15561623
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
@@ -1574,19 +1641,11 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
15741641
if (_PyUnicodeWriter_WriteChar(writer, '{'))
15751642
goto bail;
15761643

1577-
PyObject *current_item_separator = s->item_separator; // borrowed reference
1644+
PyObject *separator = s->item_separator; // borrowed reference
15781645
if (s->indent != Py_None) {
1579-
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1580-
if (new_newline_indent == NULL) {
1581-
goto bail;
1582-
}
1583-
separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
1584-
if (separator_indent == NULL) {
1585-
goto bail;
1586-
}
1587-
// update item separator with a borrowed reference
1588-
current_item_separator = separator_indent;
1589-
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1646+
indent_level++;
1647+
separator = do_indent(s, writer, indent_level, indent_cache);
1648+
if (separator == NULL) {
15901649
goto bail;
15911650
}
15921651
}
@@ -1607,8 +1666,8 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
16071666
key = PyTuple_GET_ITEM(item, 0);
16081667
value = PyTuple_GET_ITEM(item, 1);
16091668
if (encoder_encode_key_value(s, writer, &first, key, value,
1610-
new_newline_indent,
1611-
current_item_separator) < 0)
1669+
indent_level, indent_cache,
1670+
separator) < 0)
16121671
goto bail;
16131672
}
16141673
Py_CLEAR(items);
@@ -1617,8 +1676,8 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
16171676
Py_ssize_t pos = 0;
16181677
while (PyDict_Next(dct, &pos, &key, &value)) {
16191678
if (encoder_encode_key_value(s, writer, &first, key, value,
1620-
new_newline_indent,
1621-
current_item_separator) < 0)
1679+
indent_level, indent_cache,
1680+
separator) < 0)
16221681
goto bail;
16231682
}
16241683
}
@@ -1629,12 +1688,8 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
16291688
Py_CLEAR(ident);
16301689
}
16311690
if (s->indent != Py_None) {
1632-
Py_CLEAR(new_newline_indent);
1633-
Py_CLEAR(separator_indent);
1634-
1635-
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1636-
goto bail;
1637-
}
1691+
indent_level--;
1692+
do_dedent(s, writer, indent_level, indent_cache);
16381693
}
16391694

16401695
if (_PyUnicodeWriter_WriteChar(writer, '}'))
@@ -1644,20 +1699,17 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
16441699
bail:
16451700
Py_XDECREF(items);
16461701
Py_XDECREF(ident);
1647-
Py_XDECREF(separator_indent);
1648-
Py_XDECREF(new_newline_indent);
16491702
return -1;
16501703
}
16511704

16521705
static int
16531706
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
1654-
PyObject *seq, PyObject *newline_indent)
1707+
PyObject *seq,
1708+
Py_ssize_t indent_level, PyObject *indent_cache)
16551709
{
16561710
PyObject *ident = NULL;
16571711
PyObject *s_fast = NULL;
16581712
Py_ssize_t i;
1659-
PyObject *new_newline_indent = NULL;
1660-
PyObject *separator_indent = NULL;
16611713

16621714
ident = NULL;
16631715
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
@@ -1689,28 +1741,19 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
16891741

16901742
PyObject *separator = s->item_separator; // borrowed reference
16911743
if (s->indent != Py_None) {
1692-
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1693-
if (new_newline_indent == NULL) {
1694-
goto bail;
1695-
}
1696-
1697-
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1744+
indent_level++;
1745+
separator = do_indent(s, writer, indent_level, indent_cache);
1746+
if (separator == NULL) {
16981747
goto bail;
16991748
}
1700-
1701-
separator_indent = PyUnicode_Concat(separator, new_newline_indent);
1702-
if (separator_indent == NULL) {
1703-
goto bail;
1704-
}
1705-
separator = separator_indent; // assign separator with borrowed reference
17061749
}
17071750
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
17081751
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
17091752
if (i) {
17101753
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
17111754
goto bail;
17121755
}
1713-
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
1756+
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache))
17141757
goto bail;
17151758
}
17161759
if (ident != NULL) {
@@ -1720,11 +1763,8 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
17201763
}
17211764

17221765
if (s->indent != Py_None) {
1723-
Py_CLEAR(new_newline_indent);
1724-
Py_CLEAR(separator_indent);
1725-
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1726-
goto bail;
1727-
}
1766+
indent_level--;
1767+
do_dedent(s, writer, indent_level, indent_cache);
17281768
}
17291769

17301770
if (_PyUnicodeWriter_WriteChar(writer, ']'))
@@ -1735,8 +1775,6 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
17351775
bail:
17361776
Py_XDECREF(ident);
17371777
Py_DECREF(s_fast);
1738-
Py_XDECREF(separator_indent);
1739-
Py_XDECREF(new_newline_indent);
17401778
return -1;
17411779
}
17421780

0 commit comments

Comments
 (0)