Skip to content

Commit 2efd67f

Browse files
reidy-pjorisvandenbossche
authored andcommitted
EHN: Add index parameter to to_json (#18591)
1 parent 371649b commit 2efd67f

File tree

4 files changed

+134
-20
lines changed

4 files changed

+134
-20
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ Other Enhancements
136136
- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`).
137137
- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
138138
- :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
139+
- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
139140

140141
.. _whatsnew_0220.api_breaking:
141142

pandas/core/generic.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -1603,7 +1603,8 @@ def _repr_latex_(self):
16031603

16041604
def to_json(self, path_or_buf=None, orient=None, date_format=None,
16051605
double_precision=10, force_ascii=True, date_unit='ms',
1606-
default_handler=None, lines=False, compression=None):
1606+
default_handler=None, lines=False, compression=None,
1607+
index=True):
16071608
"""
16081609
Convert the object to a JSON string.
16091610
@@ -1671,6 +1672,13 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
16711672
16721673
.. versionadded:: 0.21.0
16731674
1675+
index : boolean, default True
1676+
Whether to include the index values in the JSON string. Not
1677+
including the index (``index=False``) is only supported when
1678+
orient is 'split' or 'table'.
1679+
1680+
.. versionadded:: 0.22.0
1681+
16741682
Returns
16751683
-------
16761684
same type as input object with filtered info axis
@@ -1723,7 +1731,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
17231731
double_precision=double_precision,
17241732
force_ascii=force_ascii, date_unit=date_unit,
17251733
default_handler=default_handler,
1726-
lines=lines, compression=compression)
1734+
lines=lines, compression=compression,
1735+
index=index)
17271736

17281737
def to_hdf(self, path_or_buf, key, **kwargs):
17291738
"""Write the contained data to an HDF5 file using HDFStore.

pandas/io/json/json.py

+60-18
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@
2828
# interface to/from
2929
def to_json(path_or_buf, obj, orient=None, date_format='epoch',
3030
double_precision=10, force_ascii=True, date_unit='ms',
31-
default_handler=None, lines=False, compression=None):
31+
default_handler=None, lines=False, compression=None,
32+
index=True):
33+
34+
if not index and orient not in ['split', 'table']:
35+
raise ValueError("'index=False' is only valid when 'orient' is "
36+
"'split' or 'table'")
3237

3338
path_or_buf = _stringify_path(path_or_buf)
3439
if lines and orient != 'records':
@@ -49,7 +54,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
4954
s = writer(
5055
obj, orient=orient, date_format=date_format,
5156
double_precision=double_precision, ensure_ascii=force_ascii,
52-
date_unit=date_unit, default_handler=default_handler).write()
57+
date_unit=date_unit, default_handler=default_handler,
58+
index=index).write()
5359

5460
if lines:
5561
s = _convert_to_line_delimits(s)
@@ -69,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
6975
class Writer(object):
7076

7177
def __init__(self, obj, orient, date_format, double_precision,
72-
ensure_ascii, date_unit, default_handler=None):
78+
ensure_ascii, date_unit, index, default_handler=None):
7379
self.obj = obj
7480

7581
if orient is None:
@@ -81,6 +87,7 @@ def __init__(self, obj, orient, date_format, double_precision,
8187
self.ensure_ascii = ensure_ascii
8288
self.date_unit = date_unit
8389
self.default_handler = default_handler
90+
self.index = index
8491

8592
self.is_copy = None
8693
self._format_axes()
@@ -89,14 +96,20 @@ def _format_axes(self):
8996
raise AbstractMethodError(self)
9097

9198
def write(self):
99+
return self._write(self.obj, self.orient, self.double_precision,
100+
self.ensure_ascii, self.date_unit,
101+
self.date_format == 'iso', self.default_handler)
102+
103+
def _write(self, obj, orient, double_precision, ensure_ascii,
104+
date_unit, iso_dates, default_handler):
92105
return dumps(
93-
self.obj,
94-
orient=self.orient,
95-
double_precision=self.double_precision,
96-
ensure_ascii=self.ensure_ascii,
97-
date_unit=self.date_unit,
98-
iso_dates=self.date_format == 'iso',
99-
default_handler=self.default_handler
106+
obj,
107+
orient=orient,
108+
double_precision=double_precision,
109+
ensure_ascii=ensure_ascii,
110+
date_unit=date_unit,
111+
iso_dates=iso_dates,
112+
default_handler=default_handler
100113
)
101114

102115

@@ -108,6 +121,15 @@ def _format_axes(self):
108121
raise ValueError("Series index must be unique for orient="
109122
"'{orient}'".format(orient=self.orient))
110123

124+
def _write(self, obj, orient, double_precision, ensure_ascii,
125+
date_unit, iso_dates, default_handler):
126+
if not self.index and orient == 'split':
127+
obj = {"name": obj.name, "data": obj.values}
128+
return super(SeriesWriter, self)._write(obj, orient,
129+
double_precision,
130+
ensure_ascii, date_unit,
131+
iso_dates, default_handler)
132+
111133

112134
class FrameWriter(Writer):
113135
_default_orient = 'columns'
@@ -123,12 +145,22 @@ def _format_axes(self):
123145
raise ValueError("DataFrame columns must be unique for orient="
124146
"'{orient}'.".format(orient=self.orient))
125147

148+
def _write(self, obj, orient, double_precision, ensure_ascii,
149+
date_unit, iso_dates, default_handler):
150+
if not self.index and orient == 'split':
151+
obj = obj.to_dict(orient='split')
152+
del obj["index"]
153+
return super(FrameWriter, self)._write(obj, orient,
154+
double_precision,
155+
ensure_ascii, date_unit,
156+
iso_dates, default_handler)
157+
126158

127159
class JSONTableWriter(FrameWriter):
128160
_default_orient = 'records'
129161

130162
def __init__(self, obj, orient, date_format, double_precision,
131-
ensure_ascii, date_unit, default_handler=None):
163+
ensure_ascii, date_unit, index, default_handler=None):
132164
"""
133165
Adds a `schema` attribut with the Table Schema, resets
134166
the index (can't do in caller, because the schema inference needs
@@ -137,7 +169,7 @@ def __init__(self, obj, orient, date_format, double_precision,
137169
"""
138170
super(JSONTableWriter, self).__init__(
139171
obj, orient, date_format, double_precision, ensure_ascii,
140-
date_unit, default_handler=default_handler)
172+
date_unit, index, default_handler=default_handler)
141173

142174
if date_format != 'iso':
143175
msg = ("Trying to write with `orient='table'` and "
@@ -146,7 +178,7 @@ def __init__(self, obj, orient, date_format, double_precision,
146178
.format(fmt=date_format))
147179
raise ValueError(msg)
148180

149-
self.schema = build_table_schema(obj)
181+
self.schema = build_table_schema(obj, index=self.index)
150182

151183
# NotImplementd on a column MultiIndex
152184
if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
@@ -168,14 +200,24 @@ def __init__(self, obj, orient, date_format, double_precision,
168200
if is_period_dtype(obj.index):
169201
obj.index = obj.index.to_timestamp()
170202

171-
self.obj = obj.reset_index()
203+
# exclude index from obj if index=False
204+
if not self.index:
205+
self.obj = obj.reset_index(drop=True)
206+
else:
207+
self.obj = obj.reset_index(drop=False)
172208
self.date_format = 'iso'
173209
self.orient = 'records'
174-
175-
def write(self):
176-
data = super(JSONTableWriter, self).write()
210+
self.index = index
211+
212+
def _write(self, obj, orient, double_precision, ensure_ascii,
213+
date_unit, iso_dates, default_handler):
214+
data = super(JSONTableWriter, self)._write(obj, orient,
215+
double_precision,
216+
ensure_ascii, date_unit,
217+
iso_dates,
218+
default_handler)
177219
serialized = '{{"schema": {schema}, "data": {data}}}'.format(
178-
schema=dumps(self.schema), data=data)
220+
schema=dumps(self.schema), data=data)
179221
return serialized
180222

181223

pandas/tests/io/json/test_pandas.py

+62
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
read_json, compat)
1010
from datetime import timedelta
1111
import pandas as pd
12+
import json
1213

1314
from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
1415
assert_series_equal, network,
@@ -1147,3 +1148,64 @@ def test_data_frame_size_after_to_json(self):
11471148
size_after = df.memory_usage(index=True, deep=True).sum()
11481149

11491150
assert size_before == size_after
1151+
1152+
@pytest.mark.parametrize('data, expected', [
1153+
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
1154+
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
1155+
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'),
1156+
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
1157+
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
1158+
index=[['a', 'b'], ['c', 'd']]),
1159+
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
1160+
(Series([1, 2, 3], name='A'),
1161+
{'name': 'A', 'data': [1, 2, 3]}),
1162+
(Series([1, 2, 3], name='A').rename_axis('foo'),
1163+
{'name': 'A', 'data': [1, 2, 3]}),
1164+
(Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]),
1165+
{'name': 'A', 'data': [1, 2]}),
1166+
])
1167+
def test_index_false_to_json_split(self, data, expected):
1168+
# GH 17394
1169+
# Testing index=False in to_json with orient='split'
1170+
1171+
result = data.to_json(orient='split', index=False)
1172+
result = json.loads(result)
1173+
1174+
assert result == expected
1175+
1176+
@pytest.mark.parametrize('data', [
1177+
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])),
1178+
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')),
1179+
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
1180+
index=[['a', 'b'], ['c', 'd']])),
1181+
(Series([1, 2, 3], name='A')),
1182+
(Series([1, 2, 3], name='A').rename_axis('foo')),
1183+
(Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])),
1184+
])
1185+
def test_index_false_to_json_table(self, data):
1186+
# GH 17394
1187+
# Testing index=False in to_json with orient='table'
1188+
1189+
result = data.to_json(orient='table', index=False)
1190+
result = json.loads(result)
1191+
1192+
expected = {
1193+
'schema': pd.io.json.build_table_schema(data, index=False),
1194+
'data': DataFrame(data).to_dict(orient='records')
1195+
}
1196+
1197+
assert result == expected
1198+
1199+
@pytest.mark.parametrize('orient', [
1200+
'records', 'index', 'columns', 'values'
1201+
])
1202+
def test_index_false_error_to_json(self, orient):
1203+
# GH 17394
1204+
# Testing error message from to_json with index=False
1205+
1206+
df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
1207+
1208+
with tm.assert_raises_regex(ValueError, "'index=False' is only "
1209+
"valid when 'orient' is "
1210+
"'split' or 'table'"):
1211+
df.to_json(orient=orient, index=False)

0 commit comments

Comments
 (0)