diff --git a/doc/source/io.rst b/doc/source/io.rst index 36e4f264fb923..cc51fbd1e30ab 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1501,45 +1501,34 @@ Fallback Behavior If the JSON serializer cannot handle the container contents directly it will fallback in the following manner: -- if a ``toDict`` method is defined by the unrecognised object then that - will be called and its returned ``dict`` will be JSON serialized. -- if a ``default_handler`` has been passed to ``to_json`` that will - be called to convert the object. -- otherwise an attempt is made to convert the object to a ``dict`` by - parsing its contents. However if the object is complex this will often fail - with an ``OverflowError``. +- if the dtype is unsupported (e.g. ``np.complex``) then the ``default_handler``, if provided, will be called + for each value, otherwise an exception is raised. -Your best bet when encountering ``OverflowError`` during serialization -is to specify a ``default_handler``. For example ``timedelta`` can cause -problems: +- if an object is unsupported it will attempt the following: -.. ipython:: python - :suppress: - from datetime import timedelta - dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42]) + * check if the object has defined a ``toDict`` method and call it. + A ``toDict`` method should return a ``dict`` which will then be JSON serialized. -.. code-block:: ipython + * invoke the ``default_handler`` if one was provided. - In [141]: from datetime import timedelta + * convert the object to a ``dict`` by traversing its contents. However this will often fail + with an ``OverflowError`` or give unexpected results. - In [142]: dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42]) +In general the best approach for unsupported objects or dtypes is to provide a ``default_handler``. +For example: + +.. code-block:: python - In [143]: dftd.to_json() + DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json() # raises - --------------------------------------------------------------------------- - OverflowError Traceback (most recent call last) - OverflowError: Maximum recursion level reached + RuntimeError: Unhandled numpy dtype 15 -which can be dealt with by specifying a simple ``default_handler``: +can be dealt with by specifying a simple ``default_handler``: .. ipython:: python - dftd.to_json(default_handler=str) - - def my_handler(obj): - return obj.total_seconds() - dftd.to_json(default_handler=my_handler) + DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json(default_handler=str) .. _io.json_reader: diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 3bf0ce52498a6..8866b00bba0a0 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -462,6 +462,8 @@ Bug Fixes - Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`) - Bug in resampling when using a ``TimedeltaIndex`` and ``.asfreq()``, would previously not include the final fencepost (:issue:`12926`) +- Bug in ``DataFrame.to_json`` with unsupported `dtype` not passed to default handler (:issue:`12554`). + - Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`) - Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`) diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index 70fef01c0a3ea..8c89a50446c23 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -809,17 +809,48 @@ def test_mixed_timedelta_datetime(self): def test_default_handler(self): value = object() - frame = DataFrame({'a': ['a', value]}) - expected = frame.applymap(str) + frame = DataFrame({'a': [7, value]}) + expected = DataFrame({'a': [7, str(value)]}) result = pd.read_json(frame.to_json(default_handler=str)) assert_frame_equal(expected, result, check_index_type=False) + def test_default_handler_indirect(self): + from pandas.io.json import dumps + + def default(obj): + if isinstance(obj, complex): + return [('mathjs', 'Complex'), + ('re', obj.real), + ('im', obj.imag)] + return str(obj) + df_list = [9, DataFrame({'a': [1, 'STR', complex(4, -5)], + 'b': [float('nan'), None, 'N/A']}, + columns=['a', 'b'])] + expected = ('[9,[[1,null],["STR",null],[[["mathjs","Complex"],' + '["re",4.0],["im",-5.0]],"N\\/A"]]]') + self.assertEqual(expected, dumps(df_list, default_handler=default, + orient="values")) + + def test_default_handler_numpy_unsupported_dtype(self): + # GH12554 to_json raises 'Unhandled numpy dtype 15' + df = DataFrame({'a': [1, 2.3, complex(4, -5)], + 'b': [float('nan'), None, complex(1.2, 0)]}, + columns=['a', 'b']) + expected = ('[["(1+0j)","(nan+0j)"],' + '["(2.3+0j)","(nan+0j)"],' + '["(4-5j)","(1.2+0j)"]]') + self.assertEqual(expected, df.to_json(default_handler=str, + orient="values")) + def test_default_handler_raises(self): def my_handler_raises(obj): raise TypeError("raisin") self.assertRaises(TypeError, DataFrame({'a': [1, 2, object()]}).to_json, default_handler=my_handler_raises) + self.assertRaises(TypeError, + DataFrame({'a': [1, 2, complex(4, -5)]}).to_json, + default_handler=my_handler_raises) def test_categorical(self): # GH4377 df.to_json segfaults with non-ndarray blocks diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 2f8ac0077d92e..46ae623ae88a7 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -636,10 +636,6 @@ static int NpyTypeToJSONType(PyObject* obj, JSONTypeContext* tc, int npyType, vo } PRINTMARK(); - PyErr_Format ( - PyExc_RuntimeError, - "Unhandled numpy dtype %d", - npyType); return JT_INVALID; } @@ -791,6 +787,7 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) Py_INCREF(obj); ((PyObjectEncoder*) tc->encoder)->npyType = PyArray_TYPE(npyarr->array); ((PyObjectEncoder*) tc->encoder)->npyValue = npyarr->dataptr; + ((PyObjectEncoder*) tc->encoder)->npyCtxtPassthru = npyarr; } else { @@ -1917,6 +1914,26 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in return ret; } +void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc) +{ + PyObject *tmpObj = NULL; + PRINTMARK(); + tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL); + if (!PyErr_Occurred()) + { + if (tmpObj == NULL) + { + PyErr_SetString(PyExc_TypeError, "Failed to execute default handler"); + } + else + { + encode (tmpObj, (JSONObjectEncoder*) enc, NULL, 0); + } + } + Py_XDECREF(tmpObj); + return; +} + void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc) { PyObject *obj, *exc, *toDictFunc, *tmpObj, *values; @@ -1942,6 +1959,24 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc) PRINTMARK(); tc->prv = &(enc->basicTypeContext); tc->type = NpyTypeToJSONType(obj, tc, enc->npyType, enc->npyValue); + + if (tc->type == JT_INVALID) + { + if(enc->defaultHandler) + { + enc->npyType = -1; + PRINTMARK(); + Object_invokeDefaultHandler(enc->npyCtxtPassthru->getitem(enc->npyValue, enc->npyCtxtPassthru->array), enc); + } + else + { + PyErr_Format ( + PyExc_RuntimeError, + "Unhandled numpy dtype %d", + enc->npyType); + } + } + enc->npyCtxtPassthru = NULL; enc->npyType = -1; return; } @@ -2528,18 +2563,7 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc) if (enc->defaultHandler) { - PRINTMARK(); - tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL); - if (tmpObj == NULL || PyErr_Occurred()) - { - if (!PyErr_Occurred()) - { - PyErr_SetString(PyExc_TypeError, "Failed to execute default handler"); - } - goto INVALID; - } - encode (tmpObj, (JSONObjectEncoder*) enc, NULL, 0); - Py_DECREF(tmpObj); + Object_invokeDefaultHandler(obj, enc); goto INVALID; }