diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b194f20c3c433..6a40f6b251bb5 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -216,6 +216,7 @@ Other enhancements (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`). - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) +- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`) - The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 05c3ae4096ad5..8d04874b4c9bf 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -154,6 +154,8 @@ enum JSTYPES { JT_ARRAY, // Array structure JT_OBJECT, // Key/Value structure JT_INVALID, // Internal, do not return nor expect + JT_POS_INF, // Positive infinity + JT_NEG_INF, // Negative infinity }; typedef void * JSOBJ; @@ -290,6 +292,8 @@ typedef struct __JSONObjectDecoder { JSOBJ (*newTrue)(void *prv); JSOBJ (*newFalse)(void *prv); JSOBJ (*newNull)(void *prv); + JSOBJ (*newPosInf)(void *prv); + JSOBJ (*newNegInf)(void *prv); JSOBJ (*newObject)(void *prv, void *decoder); JSOBJ (*endObject)(void *prv, JSOBJ obj); JSOBJ (*newArray)(void *prv, void *decoder); diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index 26b00c0cacd31..4eb18ee13d70b 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -127,9 +127,16 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { JSUINT64 overflowLimit = LLONG_MAX; - if (*(offset) == '-') { + if (*(offset) == 'I') { + goto DECODE_INF; + } else if (*(offset) == 'N') { + goto DECODE_NAN; + } else if (*(offset) == '-') { offset++; intNeg = -1; + if (*(offset) == 'I') { + goto DECODE_INF; + } overflowLimit = LLONG_MIN; } @@ -281,6 +288,48 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { } } +DECODE_NAN: + offset++; + if (*(offset++) != 'a') goto SET_NAN_ERROR; + if (*(offset++) != 'N') goto SET_NAN_ERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SET_NAN_ERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); + +DECODE_INF: + offset++; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'f') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 't') goto SET_INF_ERROR; + if (*(offset++) != 'y') goto SET_INF_ERROR; + + ds->start = offset; + + if (intNeg == 1) { + ds->lastType = JT_POS_INF; + return ds->dec->newPosInf(ds->prv); + } else { + ds->lastType = JT_NEG_INF; + return ds->dec->newNegInf(ds->prv); + } + +SET_INF_ERROR: + if (intNeg == 1) { + const char *msg = "Unexpected character found when decoding 'Infinity'"; + return SetError(ds, -1, msg); + } else { + const char *msg = "Unexpected character found when decoding '-Infinity'"; + return SetError(ds, -1, msg); + } + + BREAK_EXP_LOOP: // FIXME: Check for arithmetic overflow here ds->lastType = JT_DOUBLE; @@ -1070,6 +1119,8 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) { case '7': case '8': case '9': + case 'I': + case 'N': case '-': return decode_numeric(ds); diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index 7a2e5a584443a..b2fc788478864 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -459,6 +459,10 @@ JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; } JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; } +JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); } + +JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); } + JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); } JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; } @@ -502,10 +506,11 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { JSONObjectDecoder dec = { Object_newString, Object_objectAddKey, Object_arrayAddItem, Object_newTrue, Object_newFalse, Object_newNull, - Object_newObject, Object_endObject, Object_newArray, - Object_endArray, Object_newInteger, Object_newLong, - Object_newDouble, Object_releaseObject, PyObject_Malloc, - PyObject_Free, PyObject_Realloc}; + Object_newPosInf, Object_newNegInf, Object_newObject, + Object_endObject, Object_newArray, Object_endArray, + Object_newInteger, Object_newLong, Object_newDouble, + Object_releaseObject, PyObject_Malloc, PyObject_Free, + PyObject_Realloc}; dec.preciseFloat = 0; dec.prv = NULL; diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 6e27b79458faf..3068261b9b3a5 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1597,3 +1597,12 @@ def test_json_indent_all_orients(self, orient, expected): def test_json_negative_indent_raises(self): with pytest.raises(ValueError, match="must be a nonnegative integer"): pd.DataFrame().to_json(indent=-1) + + def test_emca_262_nan_inf_support(self): + # GH 12213 + data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]' + result = pd.read_json(data) + expected = pd.DataFrame( + ["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"] + ) + tm.assert_frame_equal(result, expected)