From afd9ea42c60d1396742ad348cd7ac508277fcb1e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 12 Apr 2022 09:53:14 -0700 Subject: [PATCH 1/5] json: make JSON scanner thread safe --- Modules/_json.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 41495e2012f152..f945d44a06137a 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -24,7 +24,6 @@ typedef struct _PyScannerObject { PyObject *parse_float; PyObject *parse_int; PyObject *parse_constant; - PyObject *memo; } PyScannerObject; static PyMemberDef scanner_members[] = { @@ -70,7 +69,7 @@ ascii_escape_unicode(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr); static PyObject * -scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +scan_once_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); static PyObject * @@ -631,7 +630,6 @@ scanner_traverse(PyScannerObject *self, visitproc visit, void *arg) Py_VISIT(self->parse_float); Py_VISIT(self->parse_int); Py_VISIT(self->parse_constant); - Py_VISIT(self->memo); return 0; } @@ -643,12 +641,11 @@ scanner_clear(PyScannerObject *self) Py_CLEAR(self->parse_float); Py_CLEAR(self->parse_int); Py_CLEAR(self->parse_constant); - Py_CLEAR(self->memo); return 0; } static PyObject * -_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +_parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON object from PyUnicode pystr. idx is the index of the first character after the opening curly brace. @@ -693,7 +690,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx); if (key == NULL) goto bail; - memokey = PyDict_SetDefault(s->memo, key, key); + memokey = PyDict_SetDefault(memo, key, key); if (memokey == NULL) { goto bail; } @@ -710,7 +707,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* read any JSON term */ - val = scan_once_unicode(s, pystr, idx, &next_idx); + val = scan_once_unicode(s, memo, pystr, idx, &next_idx); if (val == NULL) goto bail; @@ -774,7 +771,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss } static PyObject * -_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON array from PyUnicode pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -805,7 +802,7 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi while (1) { /* read any JSON term */ - val = scan_once_unicode(s, pystr, idx, &next_idx); + val = scan_once_unicode(s, memo, pystr, idx, &next_idx); if (val == NULL) goto bail; @@ -986,7 +983,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } static PyObject * -scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +scan_once_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read one JSON term (of any kind) from PyUnicode pystr. idx is the index of the first character of the term @@ -1022,7 +1019,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ if (_Py_EnterRecursiveCall(" while decoding a JSON object " "from a unicode string")) return NULL; - res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + res = _parse_object_unicode(s, memo, pystr, idx + 1, next_idx_ptr); _Py_LeaveRecursiveCall(); return res; case '[': @@ -1030,7 +1027,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ if (_Py_EnterRecursiveCall(" while decoding a JSON array " "from a unicode string")) return NULL; - res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + res = _parse_array_unicode(s, memo, pystr, idx + 1, next_idx_ptr); _Py_LeaveRecursiveCall(); return res; case 'n': @@ -1106,16 +1103,19 @@ scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx)) return NULL; - if (PyUnicode_Check(pystr)) { - rval = scan_once_unicode(self, pystr, idx, &next_idx); - } - else { + if (!PyUnicode_Check(pystr)) { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", Py_TYPE(pystr)->tp_name); return NULL; } - PyDict_Clear(self->memo); + + PyObject *memo = PyDict_New(); + if (memo == NULL) { + return NULL; + } + rval = scan_once_unicode(self, memo, pystr, idx, &next_idx); + Py_DECREF(memo); if (rval == NULL) return NULL; return _build_rval_index_tuple(rval, next_idx); @@ -1137,10 +1137,6 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } - s->memo = PyDict_New(); - if (s->memo == NULL) - goto bail; - /* All of these will fail "gracefully" so we don't need to verify them */ strict = PyObject_GetAttrString(ctx, "strict"); if (strict == NULL) From da7758aae8ec54219f3931d2a5716fa232d5244d Mon Sep 17 00:00:00 2001 From: AN Long Date: Sun, 12 Nov 2023 21:39:50 +0800 Subject: [PATCH 2/5] Add new entry --- .../next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst diff --git a/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst b/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst new file mode 100644 index 00000000000000..21852b57a4c198 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst @@ -0,0 +1 @@ +Make the ``memo`` dict thread safe without the GIL in mod:`json`. From 79f26fa096098737b6d700fbc9cd957ecc69dcd8 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sun, 12 Nov 2023 21:47:51 +0800 Subject: [PATCH 3/5] fix new entry --- .../next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst b/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst index 21852b57a4c198..e2deeb32ecc0c9 100644 --- a/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst +++ b/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst @@ -1 +1 @@ -Make the ``memo`` dict thread safe without the GIL in mod:`json`. +Make the ``memo`` dict thread safe without the GIL in :mod:`json`. From 3846ef24c268ad1bf8a0211e7c1ce40c10c92923 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sun, 12 Nov 2023 22:11:50 +0800 Subject: [PATCH 4/5] fix indentation --- Modules/_json.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index f945d44a06137a..0b1bfe34ad9304 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1105,8 +1105,8 @@ scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds) if (!PyUnicode_Check(pystr)) { PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); return NULL; } From fe5e1d01a20816cd860f87befab6f4a0e715c116 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 13 Nov 2023 13:33:42 +0900 Subject: [PATCH 5/5] Delete Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst --- .../next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst diff --git a/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst b/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst deleted file mode 100644 index e2deeb32ecc0c9..00000000000000 --- a/Misc/NEWS.d/next/Library/2023-11-12-21-39-36.gh-issue-111928.SiYBqt.rst +++ /dev/null @@ -1 +0,0 @@ -Make the ``memo`` dict thread safe without the GIL in :mod:`json`.