Skip to content

JSON Support for parsing NaN, Infinity and -Infinity #30295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ Other enhancements
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/_libs/src/ujson/lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ enum JSTYPES {
JT_ARRAY, // Array structure
JT_OBJECT, // Key/Value structure
JT_INVALID, // Internal, do not return nor expect
JT_POS_INF, // Positive infinity
JT_NEG_INF, // Negative infinity
};

typedef void * JSOBJ;
Expand Down Expand Up @@ -290,6 +292,8 @@ typedef struct __JSONObjectDecoder {
JSOBJ (*newTrue)(void *prv);
JSOBJ (*newFalse)(void *prv);
JSOBJ (*newNull)(void *prv);
JSOBJ (*newPosInf)(void *prv);
JSOBJ (*newNegInf)(void *prv);
JSOBJ (*newObject)(void *prv, void *decoder);
JSOBJ (*endObject)(void *prv, JSOBJ obj);
JSOBJ (*newArray)(void *prv, void *decoder);
Expand Down
53 changes: 52 additions & 1 deletion pandas/_libs/src/ujson/lib/ultrajsondec.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,16 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) {

JSUINT64 overflowLimit = LLONG_MAX;

if (*(offset) == '-') {
if (*(offset) == 'I') {
goto DECODE_INF;
} else if (*(offset) == 'N') {
goto DECODE_NAN;
} else if (*(offset) == '-') {
offset++;
intNeg = -1;
if (*(offset) == 'I') {
goto DECODE_INF;
}
overflowLimit = LLONG_MIN;
}

Expand Down Expand Up @@ -281,6 +288,48 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) {
}
}

DECODE_NAN:
offset++;
if (*(offset++) != 'a') goto SET_NAN_ERROR;
if (*(offset++) != 'N') goto SET_NAN_ERROR;

ds->lastType = JT_NULL;
ds->start = offset;
return ds->dec->newNull(ds->prv);

SET_NAN_ERROR:
return SetError(ds, -1, "Unexpected character found when decoding 'NaN'");

DECODE_INF:
offset++;
if (*(offset++) != 'n') goto SET_INF_ERROR;
if (*(offset++) != 'f') goto SET_INF_ERROR;
if (*(offset++) != 'i') goto SET_INF_ERROR;
if (*(offset++) != 'n') goto SET_INF_ERROR;
if (*(offset++) != 'i') goto SET_INF_ERROR;
if (*(offset++) != 't') goto SET_INF_ERROR;
if (*(offset++) != 'y') goto SET_INF_ERROR;

ds->start = offset;

if (intNeg == 1) {
ds->lastType = JT_POS_INF;
return ds->dec->newPosInf(ds->prv);
} else {
ds->lastType = JT_NEG_INF;
return ds->dec->newNegInf(ds->prv);
}

SET_INF_ERROR:
if (intNeg == 1) {
const char *msg = "Unexpected character found when decoding 'Infinity'";
return SetError(ds, -1, msg);
} else {
const char *msg = "Unexpected character found when decoding '-Infinity'";
return SetError(ds, -1, msg);
}


BREAK_EXP_LOOP:
// FIXME: Check for arithmetic overflow here
ds->lastType = JT_DOUBLE;
Expand Down Expand Up @@ -1070,6 +1119,8 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) {
case '7':
case '8':
case '9':
case 'I':
case 'N':
case '-':
return decode_numeric(ds);

Expand Down
13 changes: 9 additions & 4 deletions pandas/_libs/src/ujson/python/JSONtoObj.c
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,10 @@ JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; }

JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; }

JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); }

JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); }

JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); }

JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; }
Expand Down Expand Up @@ -502,10 +506,11 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
JSONObjectDecoder dec = {
Object_newString, Object_objectAddKey, Object_arrayAddItem,
Object_newTrue, Object_newFalse, Object_newNull,
Object_newObject, Object_endObject, Object_newArray,
Object_endArray, Object_newInteger, Object_newLong,
Object_newDouble, Object_releaseObject, PyObject_Malloc,
PyObject_Free, PyObject_Realloc};
Object_newPosInf, Object_newNegInf, Object_newObject,
Object_endObject, Object_newArray, Object_endArray,
Object_newInteger, Object_newLong, Object_newDouble,
Object_releaseObject, PyObject_Malloc, PyObject_Free,
PyObject_Realloc};

dec.preciseFloat = 0;
dec.prv = NULL;
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1597,3 +1597,12 @@ def test_json_indent_all_orients(self, orient, expected):
def test_json_negative_indent_raises(self):
with pytest.raises(ValueError, match="must be a nonnegative integer"):
pd.DataFrame().to_json(indent=-1)

def test_emca_262_nan_inf_support(self):
# GH 12213
data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
result = pd.read_json(data)
expected = pd.DataFrame(
["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
)
tm.assert_frame_equal(result, expected)