EHN: Add index parameter to to_json (#18591)

reidy-p · jorisvandenbossche · commit 2efd67f379de · 2017-12-10T16:26:28.000+01:00
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -136,6 +136,7 @@ Other Enhancements
 - :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`).
 - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
 - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
+- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
 
 .. _whatsnew_0220.api_breaking:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1603,7 +1603,8 @@ def _repr_latex_(self):
 
     def to_json(self, path_or_buf=None, orient=None, date_format=None,
                 double_precision=10, force_ascii=True, date_unit='ms',
-                default_handler=None, lines=False, compression=None):
+                default_handler=None, lines=False, compression=None,
+                index=True):
         """
         Convert the object to a JSON string.
 
@@ -1671,6 +1672,13 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
 
             .. versionadded:: 0.21.0
 
+        index : boolean, default True
+            Whether to include the index values in the JSON string. Not
+            including the index (``index=False``) is only supported when
+            orient is 'split' or 'table'.
+
+            .. versionadded:: 0.22.0
+
         Returns
         -------
         same type as input object with filtered info axis
@@ -1723,7 +1731,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
                             double_precision=double_precision,
                             force_ascii=force_ascii, date_unit=date_unit,
                             default_handler=default_handler,
-                            lines=lines, compression=compression)
+                            lines=lines, compression=compression,
+                            index=index)
 
     def to_hdf(self, path_or_buf, key, **kwargs):
         """Write the contained data to an HDF5 file using HDFStore.
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -28,7 +28,12 @@
 # interface to/from
 def to_json(path_or_buf, obj, orient=None, date_format='epoch',
             double_precision=10, force_ascii=True, date_unit='ms',
-            default_handler=None, lines=False, compression=None):
+            default_handler=None, lines=False, compression=None,
+            index=True):
+
+    if not index and orient not in ['split', 'table']:
+        raise ValueError("'index=False' is only valid when 'orient' is "
+                         "'split' or 'table'")
 
     path_or_buf = _stringify_path(path_or_buf)
     if lines and orient != 'records':
@@ -49,7 +54,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
     s = writer(
         obj, orient=orient, date_format=date_format,
         double_precision=double_precision, ensure_ascii=force_ascii,
-        date_unit=date_unit, default_handler=default_handler).write()
+        date_unit=date_unit, default_handler=default_handler,
+        index=index).write()
 
     if lines:
         s = _convert_to_line_delimits(s)
@@ -69,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
 class Writer(object):
 
     def __init__(self, obj, orient, date_format, double_precision,
-                 ensure_ascii, date_unit, default_handler=None):
+                 ensure_ascii, date_unit, index, default_handler=None):
         self.obj = obj
 
         if orient is None:
@@ -81,6 +87,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         self.ensure_ascii = ensure_ascii
         self.date_unit = date_unit
         self.default_handler = default_handler
+        self.index = index
 
         self.is_copy = None
         self._format_axes()
@@ -89,14 +96,20 @@ def _format_axes(self):
         raise AbstractMethodError(self)
 
     def write(self):
+        return self._write(self.obj, self.orient, self.double_precision,
+                           self.ensure_ascii, self.date_unit,
+                           self.date_format == 'iso', self.default_handler)
+
+    def _write(self, obj, orient, double_precision, ensure_ascii,
+               date_unit, iso_dates, default_handler):
         return dumps(
-            self.obj,
-            orient=self.orient,
-            double_precision=self.double_precision,
-            ensure_ascii=self.ensure_ascii,
-            date_unit=self.date_unit,
-            iso_dates=self.date_format == 'iso',
-            default_handler=self.default_handler
+            obj,
+            orient=orient,
+            double_precision=double_precision,
+            ensure_ascii=ensure_ascii,
+            date_unit=date_unit,
+            iso_dates=iso_dates,
+            default_handler=default_handler
         )
 
 
@@ -108,6 +121,15 @@ def _format_axes(self):
             raise ValueError("Series index must be unique for orient="
                              "'{orient}'".format(orient=self.orient))
 
+    def _write(self, obj, orient, double_precision, ensure_ascii,
+               date_unit, iso_dates, default_handler):
+        if not self.index and orient == 'split':
+            obj = {"name": obj.name, "data": obj.values}
+        return super(SeriesWriter, self)._write(obj, orient,
+                                                double_precision,
+                                                ensure_ascii, date_unit,
+                                                iso_dates, default_handler)
+
 
 class FrameWriter(Writer):
     _default_orient = 'columns'
@@ -123,12 +145,22 @@ def _format_axes(self):
             raise ValueError("DataFrame columns must be unique for orient="
                              "'{orient}'.".format(orient=self.orient))
 
+    def _write(self, obj, orient, double_precision, ensure_ascii,
+               date_unit, iso_dates, default_handler):
+        if not self.index and orient == 'split':
+            obj = obj.to_dict(orient='split')
+            del obj["index"]
+        return super(FrameWriter, self)._write(obj, orient,
+                                               double_precision,
+                                               ensure_ascii, date_unit,
+                                               iso_dates, default_handler)
+
 
 class JSONTableWriter(FrameWriter):
     _default_orient = 'records'
 
     def __init__(self, obj, orient, date_format, double_precision,
-                 ensure_ascii, date_unit, default_handler=None):
+                 ensure_ascii, date_unit, index, default_handler=None):
         """
         Adds a `schema` attribut with the Table Schema, resets
         the index (can't do in caller, because the schema inference needs
@@ -137,7 +169,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         """
         super(JSONTableWriter, self).__init__(
             obj, orient, date_format, double_precision, ensure_ascii,
-            date_unit, default_handler=default_handler)
+            date_unit, index, default_handler=default_handler)
 
         if date_format != 'iso':
             msg = ("Trying to write with `orient='table'` and "
@@ -146,7 +178,7 @@ def __init__(self, obj, orient, date_format, double_precision,
                    .format(fmt=date_format))
             raise ValueError(msg)
 
-        self.schema = build_table_schema(obj)
+        self.schema = build_table_schema(obj, index=self.index)
 
         # NotImplementd on a column MultiIndex
         if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
@@ -168,14 +200,24 @@ def __init__(self, obj, orient, date_format, double_precision,
         if is_period_dtype(obj.index):
             obj.index = obj.index.to_timestamp()
 
-        self.obj = obj.reset_index()
+        # exclude index from obj if index=False
+        if not self.index:
+            self.obj = obj.reset_index(drop=True)
+        else:
+            self.obj = obj.reset_index(drop=False)
         self.date_format = 'iso'
         self.orient = 'records'
-
-    def write(self):
-        data = super(JSONTableWriter, self).write()
+        self.index = index
+
+    def _write(self, obj, orient, double_precision, ensure_ascii,
+               date_unit, iso_dates, default_handler):
+        data = super(JSONTableWriter, self)._write(obj, orient,
+                                                   double_precision,
+                                                   ensure_ascii, date_unit,
+                                                   iso_dates,
+                                                   default_handler)
         serialized = '{{"schema": {schema}, "data": {data}}}'.format(
-            schema=dumps(self.schema), data=data)
+                     schema=dumps(self.schema), data=data)
         return serialized
 
 
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -9,6 +9,7 @@
                     read_json, compat)
 from datetime import timedelta
 import pandas as pd
+import json
 
 from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
                                  assert_series_equal, network,
@@ -1147,3 +1148,64 @@ def test_data_frame_size_after_to_json(self):
         size_after = df.memory_usage(index=True, deep=True).sum()
 
         assert size_before == size_after
+
+    @pytest.mark.parametrize('data, expected', [
+        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
+            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
+        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'),
+            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
+        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
+                   index=[['a', 'b'], ['c', 'd']]),
+            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
+        (Series([1, 2, 3], name='A'),
+            {'name': 'A', 'data': [1, 2, 3]}),
+        (Series([1, 2, 3], name='A').rename_axis('foo'),
+            {'name': 'A', 'data': [1, 2, 3]}),
+        (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]),
+            {'name': 'A', 'data': [1, 2]}),
+    ])
+    def test_index_false_to_json_split(self, data, expected):
+        # GH 17394
+        # Testing index=False in to_json with orient='split'
+
+        result = data.to_json(orient='split', index=False)
+        result = json.loads(result)
+
+        assert result == expected
+
+    @pytest.mark.parametrize('data', [
+        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])),
+        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')),
+        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
+                   index=[['a', 'b'], ['c', 'd']])),
+        (Series([1, 2, 3], name='A')),
+        (Series([1, 2, 3], name='A').rename_axis('foo')),
+        (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])),
+    ])
+    def test_index_false_to_json_table(self, data):
+        # GH 17394
+        # Testing index=False in to_json with orient='table'
+
+        result = data.to_json(orient='table', index=False)
+        result = json.loads(result)
+
+        expected = {
+            'schema': pd.io.json.build_table_schema(data, index=False),
+            'data': DataFrame(data).to_dict(orient='records')
+        }
+
+        assert result == expected
+
+    @pytest.mark.parametrize('orient', [
+        'records', 'index', 'columns', 'values'
+    ])
+    def test_index_false_error_to_json(self, orient):
+        # GH 17394
+        # Testing error message from to_json with index=False
+
+        df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
+
+        with tm.assert_raises_regex(ValueError, "'index=False' is only "
+                                                "valid when 'orient' is "
+                                                "'split' or 'table'"):
+            df.to_json(orient=orient, index=False)