diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c78e27f098f13..c379d1a2a160d 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -35,7 +35,7 @@ Other enhancements when using the ``pyarrow`` engine. It is currently not yet supported when converting back to pandas (so it will become an integer or float dtype depending on the presence of missing data). (:issue:`28368`) -- +- :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) .. _whatsnew_1000.api_breaking: @@ -194,6 +194,7 @@ I/O - Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`) - Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`) - Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) +- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index ee6e7081bf00e..05c3ae4096ad5 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -244,6 +244,10 @@ typedef struct __JSONObjectEncoder { If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */ int encodeHTMLChars; + /* + Configuration for spaces of indent */ + int indent; + /* Set to an error message if error occurred */ const char *errorMsg; diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index d5b379bee585b..51c9b9244ecfc 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -728,6 +728,22 @@ FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin, while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; } +void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc) +{ + if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n'); +} + +// This function could be refactored to only accept enc as an argument, +// but this is a straight vendor from ujson source +void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value) +{ + int i; + if (enc->indent > 0) + while (value-- > 0) + for (i = 0; i < enc->indent; i++) + Buffer_AppendCharUnchecked(enc, ' '); +} + void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) { char *wstr; JSUINT32 uvalue = (value < 0) ? -value : value; @@ -960,6 +976,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, enc->iterBegin(obj, &tc); Buffer_AppendCharUnchecked(enc, '['); + Buffer_AppendIndentNewlineUnchecked (enc); while (enc->iterNext(obj, &tc)) { if (count > 0) { @@ -967,17 +984,20 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, #ifndef JSON_NO_EXTRA_WHITESPACE Buffer_AppendCharUnchecked(buffer, ' '); #endif + Buffer_AppendIndentNewlineUnchecked (enc); } iterObj = enc->iterGetValue(obj, &tc); enc->level++; + Buffer_AppendIndentUnchecked (enc, enc->level); encode(iterObj, enc, NULL, 0); count++; } enc->iterEnd(obj, &tc); - Buffer_Reserve(enc, 2); + Buffer_AppendIndentNewlineUnchecked (enc); + Buffer_AppendIndentUnchecked (enc, enc->level); Buffer_AppendCharUnchecked(enc, ']'); break; } @@ -987,6 +1007,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, enc->iterBegin(obj, &tc); Buffer_AppendCharUnchecked(enc, '{'); + Buffer_AppendIndentNewlineUnchecked (enc); while (enc->iterNext(obj, &tc)) { if (count > 0) { @@ -994,18 +1015,21 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, #ifndef JSON_NO_EXTRA_WHITESPACE Buffer_AppendCharUnchecked(enc, ' '); #endif + Buffer_AppendIndentNewlineUnchecked (enc); } iterObj = enc->iterGetValue(obj, &tc); objName = enc->iterGetName(obj, &tc, &szlen); enc->level++; + Buffer_AppendIndentUnchecked (enc, enc->level); encode(iterObj, enc, objName, szlen); count++; } enc->iterEnd(obj, &tc); - Buffer_Reserve(enc, 2); + Buffer_AppendIndentNewlineUnchecked (enc); + Buffer_AppendIndentUnchecked (enc, enc->level); Buffer_AppendCharUnchecked(enc, '}'); break; } diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index dc9b906c8d76c..22c42acea0150 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -2373,10 +2373,16 @@ char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { } PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = { - "obj", "ensure_ascii", "double_precision", "encode_html_chars", - "orient", "date_unit", "iso_dates", "default_handler", - NULL}; + static char *kwlist[] = {"obj", + "ensure_ascii", + "double_precision", + "encode_html_chars", + "orient", + "date_unit", + "iso_dates", + "default_handler", + "indent", + NULL}; char buffer[65536]; char *ret; @@ -2389,6 +2395,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { char *sdateFormat = NULL; PyObject *oisoDates = 0; PyObject *odefHandler = 0; + int indent = 0; PyObjectEncoder pyEncoder = {{ Object_beginTypeContext, @@ -2410,6 +2417,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { idoublePrecision, 1, // forceAscii 0, // encodeHTMLChars + 0, // indent }}; JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder; @@ -2434,10 +2442,10 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { PRINTMARK(); - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOO", kwlist, &oinput, - &oensureAscii, &idoublePrecision, + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, + &oinput, &oensureAscii, &idoublePrecision, &oencodeHTMLChars, &sOrient, &sdateFormat, - &oisoDates, &odefHandler)) { + &oisoDates, &odefHandler, &indent)) { return NULL; } @@ -2503,6 +2511,8 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { pyEncoder.defaultHandler = odefHandler; } + encoder->indent = indent; + pyEncoder.originalOutputFormat = pyEncoder.outputFormat; PRINTMARK(); ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer)); diff --git a/pandas/_typing.py b/pandas/_typing.py index de9fb5b944186..f1429d7b48410 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -25,7 +25,7 @@ FilePathOrBuffer = Union[str, Path, IO[AnyStr]] FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame") -Scalar = Union[str, int, float] +Scalar = Union[str, int, float, bool] Axis = Union[str, int] Ordered = Optional[bool] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eb8eae7034f39..0dadbe443f153 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8,6 +8,7 @@ import re from textwrap import dedent from typing import ( + Any, Callable, Dict, FrozenSet, @@ -61,7 +62,7 @@ from pandas.core.dtypes.missing import isna, notna import pandas as pd -from pandas._typing import Dtype, FilePathOrBuffer +from pandas._typing import Dtype, FilePathOrBuffer, Scalar from pandas.core import missing, nanops import pandas.core.algorithms as algos from pandas.core.base import PandasObject, SelectionMixin @@ -2249,17 +2250,18 @@ def to_excel( def to_json( self, - path_or_buf=None, - orient=None, - date_format=None, - double_precision=10, - force_ascii=True, - date_unit="ms", - default_handler=None, - lines=False, - compression="infer", - index=True, - ): + path_or_buf: Optional[FilePathOrBuffer] = None, + orient: Optional[str] = None, + date_format: Optional[str] = None, + double_precision: int = 10, + force_ascii: bool_t = True, + date_unit: str = "ms", + default_handler: Optional[Callable[[Any], Union[Scalar, List, Dict]]] = None, + lines: bool_t = False, + compression: Optional[str] = "infer", + index: bool_t = True, + indent: Optional[int] = None, + ) -> Optional[str]: """ Convert the object to a JSON string. @@ -2339,6 +2341,11 @@ def to_json( .. versionadded:: 0.23.0 + indent : integer, optional + Length of whitespace used to indent each record. + + .. versionadded:: 1.0.0 + Returns ------- None or str @@ -2349,6 +2356,13 @@ def to_json( -------- read_json + Notes + ----- + The behavior of ``indent=0`` varies from the stdlib, which does not + indent the output but does insert newlines. Currently, ``indent=0`` + and the default ``indent=None`` are equivalent in pandas, though this + may change in a future release. + Examples -------- @@ -2399,6 +2413,10 @@ def to_json( date_format = "iso" elif date_format is None: date_format = "epoch" + + config.is_nonnegative_int(indent) + indent = indent or 0 + return json.to_json( path_or_buf=path_or_buf, obj=self, @@ -2411,6 +2429,7 @@ def to_json( lines=lines, compression=compression, index=index, + indent=indent, ) def to_hdf(self, path_or_buf, key, **kwargs): diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index a00499287ac8f..73f4985e201f1 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1,6 +1,8 @@ +from collections import OrderedDict from io import StringIO from itertools import islice import os +from typing import Any, Callable, Dict, List, Optional, Type, Union import numpy as np @@ -11,6 +13,7 @@ from pandas.core.dtypes.common import ensure_str, is_period_dtype from pandas import DataFrame, MultiIndex, Series, isna, to_datetime +from pandas._typing import Scalar from pandas.core.reshape.concat import concat from pandas.io.common import ( @@ -31,20 +34,23 @@ TABLE_SCHEMA_VERSION = "0.20.0" +Serializable = Union[Scalar, List, Dict] + # interface to/from def to_json( path_or_buf, obj, - orient=None, - date_format="epoch", - double_precision=10, - force_ascii=True, - date_unit="ms", - default_handler=None, - lines=False, - compression="infer", - index=True, + orient: Optional[str] = None, + date_format: str = "epoch", + double_precision: int = 10, + force_ascii: bool = True, + date_unit: str = "ms", + default_handler: Optional[Callable[[Any], Serializable]] = None, + lines: bool = False, + compression: Optional[str] = "infer", + index: bool = True, + indent: int = 0, ): if not index and orient not in ["split", "table"]: @@ -59,7 +65,7 @@ def to_json( if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") if orient == "table" and isinstance(obj, DataFrame): - writer = JSONTableWriter + writer = JSONTableWriter # type: Type["Writer"] elif isinstance(obj, Series): writer = SeriesWriter elif isinstance(obj, DataFrame): @@ -76,6 +82,7 @@ def to_json( date_unit=date_unit, default_handler=default_handler, index=index, + indent=indent, ).write() if lines: @@ -97,18 +104,19 @@ class Writer: def __init__( self, obj, - orient, - date_format, - double_precision, - ensure_ascii, - date_unit, - index, - default_handler=None, + orient: Optional[str], + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Optional[Callable[[Any], Serializable]] = None, + indent: int = 0, ): self.obj = obj if orient is None: - orient = self._default_orient + orient = self._default_orient # type: ignore self.orient = orient self.date_format = date_format @@ -117,6 +125,7 @@ def __init__( self.date_unit = date_unit self.default_handler = default_handler self.index = index + self.indent = indent self.is_copy = None self._format_axes() @@ -133,17 +142,19 @@ def write(self): self.date_unit, self.date_format == "iso", self.default_handler, + self.indent, ) def _write( self, obj, - orient, - double_precision, - ensure_ascii, - date_unit, - iso_dates, - default_handler, + orient: Optional[str], + double_precision: int, + ensure_ascii: bool, + date_unit: str, + iso_dates: bool, + default_handler: Optional[Callable[[Any], Serializable]], + indent: int, ): return dumps( obj, @@ -153,6 +164,7 @@ def _write( date_unit=date_unit, iso_dates=iso_dates, default_handler=default_handler, + indent=indent, ) @@ -169,12 +181,13 @@ def _format_axes(self): def _write( self, obj, - orient, - double_precision, - ensure_ascii, - date_unit, - iso_dates, - default_handler, + orient: Optional[str], + double_precision: int, + ensure_ascii: bool, + date_unit: str, + iso_dates: bool, + default_handler: Optional[Callable[[Any], Serializable]], + indent: int, ): if not self.index and orient == "split": obj = {"name": obj.name, "data": obj.values} @@ -186,6 +199,7 @@ def _write( date_unit, iso_dates, default_handler, + indent, ) @@ -214,12 +228,13 @@ def _format_axes(self): def _write( self, obj, - orient, - double_precision, - ensure_ascii, - date_unit, - iso_dates, - default_handler, + orient: Optional[str], + double_precision: int, + ensure_ascii: bool, + date_unit: str, + iso_dates: bool, + default_handler: Optional[Callable[[Any], Serializable]], + indent: int, ): if not self.index and orient == "split": obj = obj.to_dict(orient="split") @@ -232,6 +247,7 @@ def _write( date_unit, iso_dates, default_handler, + indent, ) @@ -241,13 +257,14 @@ class JSONTableWriter(FrameWriter): def __init__( self, obj, - orient, - date_format, - double_precision, - ensure_ascii, - date_unit, - index, - default_handler=None, + orient: Optional[str], + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Optional[Callable[[Any], Serializable]] = None, + indent: int = 0, ): """ Adds a `schema` attribute with the Table Schema, resets @@ -255,6 +272,7 @@ def __init__( to know what the index is, forces orient to records, and forces date_format to 'iso'. """ + super().__init__( obj, orient, @@ -264,6 +282,7 @@ def __init__( date_unit, index, default_handler=default_handler, + indent=indent, ) if date_format != "iso": @@ -315,19 +334,20 @@ def _write( date_unit, iso_dates, default_handler, + indent, ): - data = super()._write( - obj, + table_obj = OrderedDict((("schema", self.schema), ("data", obj))) + serialized = super()._write( + table_obj, orient, double_precision, ensure_ascii, date_unit, iso_dates, default_handler, + indent, ) - serialized = '{{"schema": {schema}, "data": {data}}}'.format( - schema=dumps(self.schema), data=data - ) + return serialized diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index b2fc9ec217ca6..569e299860614 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.compat import PY35 + from pandas.core.dtypes.dtypes import CategoricalDtype, DatetimeTZDtype, PeriodDtype import pandas as pd @@ -20,6 +22,14 @@ ) +def assert_results_equal(result, expected): + """Helper function for comparing deserialized JSON with Py35 compat.""" + if PY35: + assert sorted(result.items()) == sorted(expected.items()) + else: + assert result == expected + + class TestBuildSchema: def setup_method(self, method): self.df = DataFrame( @@ -234,7 +244,8 @@ def test_build_series(self): ), ] ) - assert result == expected + + assert_results_equal(result, expected) def test_to_json(self): df = self.df.copy() @@ -323,7 +334,8 @@ def test_to_json(self): ), ] expected = OrderedDict([("schema", schema), ("data", data)]) - assert result == expected + + assert_results_equal(result, expected) def test_to_json_float_index(self): data = pd.Series(1, index=[1.0, 2.0]) @@ -352,7 +364,8 @@ def test_to_json_float_index(self): ), ] ) - assert result == expected + + assert_results_equal(result, expected) def test_to_json_period_index(self): idx = pd.period_range("2016", freq="Q-JAN", periods=2) @@ -372,7 +385,8 @@ def test_to_json_period_index(self): OrderedDict([("index", "2016-02-01T00:00:00.000Z"), ("values", 1)]), ] expected = OrderedDict([("schema", schema), ("data", data)]) - assert result == expected + + assert_results_equal(result, expected) def test_to_json_categorical_index(self): data = pd.Series(1, pd.CategoricalIndex(["a", "b"])) @@ -406,7 +420,8 @@ def test_to_json_categorical_index(self): ), ] ) - assert result == expected + + assert_results_equal(result, expected) def test_date_format_raises(self): with pytest.raises(ValueError): @@ -542,7 +557,8 @@ def test_categorical(self): ), ] ) - assert result == expected + + assert_results_equal(result, expected) @pytest.mark.parametrize( "idx,nm,prop", @@ -596,7 +612,8 @@ def test_timestamp_in_columns(self): ) result = df.to_json(orient="table") js = json.loads(result) - assert js["schema"]["fields"][1]["name"] == 1451606400000 + assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z" + # TODO - below expectation is not correct; see GH 28256 assert js["schema"]["fields"][2]["name"] == 10000 @pytest.mark.parametrize( diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9842a706f43d7..5c7cc0f8b1943 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import is_platform_32bit +from pandas.compat import PY35, is_platform_32bit import pandas.util._test_decorators as td import pandas as pd @@ -1647,3 +1647,156 @@ def test_tuple_labels(self, orient, expected): df = pd.DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")]) result = df.to_json(orient=orient) assert result == expected + + @pytest.mark.parametrize("indent", [1, 2, 4]) + def test_to_json_indent(self, indent): + # GH 12004 + df = pd.DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) + + result = df.to_json(indent=indent) + spaces = " " * indent + expected = """{{ +{spaces}"a":{{ +{spaces}{spaces}"0":"foo", +{spaces}{spaces}"1":"baz" +{spaces}}}, +{spaces}"b":{{ +{spaces}{spaces}"0":"bar", +{spaces}{spaces}"1":"qux" +{spaces}}} +}}""".format( + spaces=spaces + ) + + assert result == expected + + @pytest.mark.parametrize( + "orient,expected", + [ + ( + "split", + """{ + "columns":[ + "a", + "b" + ], + "index":[ + 0, + 1 + ], + "data":[ + [ + "foo", + "bar" + ], + [ + "baz", + "qux" + ] + ] +}""", + ), + ( + "records", + """[ + { + "a":"foo", + "b":"bar" + }, + { + "a":"baz", + "b":"qux" + } +]""", + ), + ( + "index", + """{ + "0":{ + "a":"foo", + "b":"bar" + }, + "1":{ + "a":"baz", + "b":"qux" + } +}""", + ), + ( + "columns", + """{ + "a":{ + "0":"foo", + "1":"baz" + }, + "b":{ + "0":"bar", + "1":"qux" + } +}""", + ), + ( + "values", + """[ + [ + "foo", + "bar" + ], + [ + "baz", + "qux" + ] +]""", + ), + ( + "table", + """{ + "schema":{ + "fields":[ + { + "name":"index", + "type":"integer" + }, + { + "name":"a", + "type":"string" + }, + { + "name":"b", + "type":"string" + } + ], + "primaryKey":[ + "index" + ], + "pandas_version":"0.20.0" + }, + "data":[ + { + "index":0, + "a":"foo", + "b":"bar" + }, + { + "index":1, + "a":"baz", + "b":"qux" + } + ] +}""", + ), + ], + ) + def test_json_indent_all_orients(self, orient, expected): + # GH 12004 + df = pd.DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) + result = df.to_json(orient=orient, indent=4) + + if PY35: + assert json.loads(result) == json.loads(expected) + else: + assert result == expected + + def test_json_negative_indent_raises(self): + with pytest.raises(ValueError, match="must be a nonnegative integer"): + pd.DataFrame().to_json(indent=-1)