Skip to content

EHN: Add index parameter to to_json #18591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ Other Enhancements
- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`).
- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
- :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)

.. _whatsnew_0220.api_breaking:

Expand Down
13 changes: 11 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1603,7 +1603,8 @@ def _repr_latex_(self):

def to_json(self, path_or_buf=None, orient=None, date_format=None,
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None, lines=False, compression=None):
default_handler=None, lines=False, compression=None,
index=True):
"""
Convert the object to a JSON string.

Expand Down Expand Up @@ -1671,6 +1672,13 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,

.. versionadded:: 0.21.0

index : boolean, default True
Whether to include the index values in the JSON string. Not
including the index (``index=False``) is only supported when
orient is 'split' or 'table'.

.. versionadded:: 0.22.0

Returns
-------
same type as input object with filtered info axis
Expand Down Expand Up @@ -1723,7 +1731,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
double_precision=double_precision,
force_ascii=force_ascii, date_unit=date_unit,
default_handler=default_handler,
lines=lines, compression=compression)
lines=lines, compression=compression,
index=index)

def to_hdf(self, path_or_buf, key, **kwargs):
"""Write the contained data to an HDF5 file using HDFStore.
Expand Down
78 changes: 60 additions & 18 deletions pandas/io/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@
# interface to/from
def to_json(path_or_buf, obj, orient=None, date_format='epoch',
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None, lines=False, compression=None):
default_handler=None, lines=False, compression=None,
index=True):

if not index and orient not in ['split', 'table']:
raise ValueError("'index=False' is only valid when 'orient' is "
"'split' or 'table'")

path_or_buf = _stringify_path(path_or_buf)
if lines and orient != 'records':
Expand All @@ -49,7 +54,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
s = writer(
obj, orient=orient, date_format=date_format,
double_precision=double_precision, ensure_ascii=force_ascii,
date_unit=date_unit, default_handler=default_handler).write()
date_unit=date_unit, default_handler=default_handler,
index=index).write()

if lines:
s = _convert_to_line_delimits(s)
Expand All @@ -69,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
class Writer(object):

def __init__(self, obj, orient, date_format, double_precision,
ensure_ascii, date_unit, default_handler=None):
ensure_ascii, date_unit, index, default_handler=None):
self.obj = obj

if orient is None:
Expand All @@ -81,6 +87,7 @@ def __init__(self, obj, orient, date_format, double_precision,
self.ensure_ascii = ensure_ascii
self.date_unit = date_unit
self.default_handler = default_handler
self.index = index

self.is_copy = None
self._format_axes()
Expand All @@ -89,14 +96,20 @@ def _format_axes(self):
raise AbstractMethodError(self)

def write(self):
return self._write(self.obj, self.orient, self.double_precision,
self.ensure_ascii, self.date_unit,
self.date_format == 'iso', self.default_handler)

def _write(self, obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler):
return dumps(
self.obj,
orient=self.orient,
double_precision=self.double_precision,
ensure_ascii=self.ensure_ascii,
date_unit=self.date_unit,
iso_dates=self.date_format == 'iso',
default_handler=self.default_handler
obj,
orient=orient,
double_precision=double_precision,
ensure_ascii=ensure_ascii,
date_unit=date_unit,
iso_dates=iso_dates,
default_handler=default_handler
)


Expand All @@ -108,6 +121,15 @@ def _format_axes(self):
raise ValueError("Series index must be unique for orient="
"'{orient}'".format(orient=self.orient))

def _write(self, obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler):
if not self.index and orient == 'split':
obj = {"name": obj.name, "data": obj.values}
return super(SeriesWriter, self)._write(obj, orient,
double_precision,
ensure_ascii, date_unit,
iso_dates, default_handler)


class FrameWriter(Writer):
_default_orient = 'columns'
Expand All @@ -123,12 +145,22 @@ def _format_axes(self):
raise ValueError("DataFrame columns must be unique for orient="
"'{orient}'.".format(orient=self.orient))

def _write(self, obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler):
if not self.index and orient == 'split':
obj = obj.to_dict(orient='split')
del obj["index"]
return super(FrameWriter, self)._write(obj, orient,
double_precision,
ensure_ascii, date_unit,
iso_dates, default_handler)


class JSONTableWriter(FrameWriter):
_default_orient = 'records'

def __init__(self, obj, orient, date_format, double_precision,
ensure_ascii, date_unit, default_handler=None):
ensure_ascii, date_unit, index, default_handler=None):
"""
Adds a `schema` attribut with the Table Schema, resets
the index (can't do in caller, because the schema inference needs
Expand All @@ -137,7 +169,7 @@ def __init__(self, obj, orient, date_format, double_precision,
"""
super(JSONTableWriter, self).__init__(
obj, orient, date_format, double_precision, ensure_ascii,
date_unit, default_handler=default_handler)
date_unit, index, default_handler=default_handler)

if date_format != 'iso':
msg = ("Trying to write with `orient='table'` and "
Expand All @@ -146,7 +178,7 @@ def __init__(self, obj, orient, date_format, double_precision,
.format(fmt=date_format))
raise ValueError(msg)

self.schema = build_table_schema(obj)
self.schema = build_table_schema(obj, index=self.index)

# NotImplementd on a column MultiIndex
if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
Expand All @@ -168,14 +200,24 @@ def __init__(self, obj, orient, date_format, double_precision,
if is_period_dtype(obj.index):
obj.index = obj.index.to_timestamp()

self.obj = obj.reset_index()
# exclude index from obj if index=False
if not self.index:
self.obj = obj.reset_index(drop=True)
else:
self.obj = obj.reset_index(drop=False)
self.date_format = 'iso'
self.orient = 'records'

def write(self):
data = super(JSONTableWriter, self).write()
self.index = index

def _write(self, obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler):
data = super(JSONTableWriter, self)._write(obj, orient,
double_precision,
ensure_ascii, date_unit,
iso_dates,
default_handler)
serialized = '{{"schema": {schema}, "data": {data}}}'.format(
schema=dumps(self.schema), data=data)
schema=dumps(self.schema), data=data)
return serialized


Expand Down
62 changes: 62 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
read_json, compat)
from datetime import timedelta
import pandas as pd
import json

from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
assert_series_equal, network,
Expand Down Expand Up @@ -1147,3 +1148,64 @@ def test_data_frame_size_after_to_json(self):
size_after = df.memory_usage(index=True, deep=True).sum()

assert size_before == size_after

@pytest.mark.parametrize('data, expected', [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!

(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'),
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
index=[['a', 'b'], ['c', 'd']]),
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
(Series([1, 2, 3], name='A'),
{'name': 'A', 'data': [1, 2, 3]}),
(Series([1, 2, 3], name='A').rename_axis('foo'),
{'name': 'A', 'data': [1, 2, 3]}),
(Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]),
{'name': 'A', 'data': [1, 2]}),
])
def test_index_false_to_json_split(self, data, expected):
# GH 17394
# Testing index=False in to_json with orient='split'

result = data.to_json(orient='split', index=False)
result = json.loads(result)

assert result == expected

@pytest.mark.parametrize('data', [
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])),
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')),
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
index=[['a', 'b'], ['c', 'd']])),
(Series([1, 2, 3], name='A')),
(Series([1, 2, 3], name='A').rename_axis('foo')),
(Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])),
])
def test_index_false_to_json_table(self, data):
# GH 17394
# Testing index=False in to_json with orient='table'

result = data.to_json(orient='table', index=False)
result = json.loads(result)

expected = {
'schema': pd.io.json.build_table_schema(data, index=False),
'data': DataFrame(data).to_dict(orient='records')
}

assert result == expected

@pytest.mark.parametrize('orient', [
'records', 'index', 'columns', 'values'
])
def test_index_false_error_to_json(self, orient):
# GH 17394
# Testing error message from to_json with index=False

df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])

with tm.assert_raises_regex(ValueError, "'index=False' is only "
"valid when 'orient' is "
"'split' or 'table'"):
df.to_json(orient=orient, index=False)