From f7351411b4699c28d5e5ad7e6436841079c27b14 Mon Sep 17 00:00:00 2001 From: Kieran O'Mahony Date: Sat, 27 Jul 2013 12:43:47 +1000 Subject: [PATCH] BUG: to_json should raise exception for non-unique index / columns (#4359) --- pandas/io/json.py | 16 ++++++++++ pandas/io/tests/test_json/test_pandas.py | 39 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/pandas/io/json.py b/pandas/io/json.py index fff4d0085b18a..d3bea36b57e77 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -78,6 +78,9 @@ class SeriesWriter(Writer): _default_orient = 'index' def _format_axes(self): + if not self.obj.index.is_unique and self.orient == 'index': + raise ValueError("Series index must be unique for orient=" + "'%s'" % self.orient) if self._needs_to_date(self.obj.index): self.copy_if_needed() self.obj.index = self._format_to_date(self.obj.index.to_series()) @@ -97,6 +100,15 @@ class FrameWriter(Writer): def _format_axes(self): """ try to axes if they are datelike """ + if not self.obj.index.is_unique and self.orient in ( + 'index', 'columns'): + raise ValueError("DataFrame index must be unique for orient=" + "'%s'." % self.orient) + if not self.obj.columns.is_unique and self.orient in ( + 'index', 'columns', 'records'): + raise ValueError("DataFrame columns must be unique for orient=" + "'%s'." % self.orient) + if self.orient == 'columns': axis = 'index' elif self.orient == 'index': @@ -134,10 +146,14 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, Series : default is 'index' allowed values are: {'split','records','index'} + The Series index must be unique for orient 'index'. DataFrame : default is 'columns' allowed values are: {'split','records','index','columns','values'} + The DataFrame index must be unique for orients 'index' and 'columns'. + The DataFrame columns must be unique for orients 'index', 'columns', + and 'records'. The format of the JSON string split : dict like {index -> [index], columns -> [columns], data -> [values]} diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index dfa46189974f2..21fae9a50c7dd 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -54,6 +54,34 @@ def setUp(self): self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() + def test_frame_non_unique_index(self): + df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1], + columns=['x', 'y']) + + self.assertRaises(ValueError, df.to_json, orient='index') + self.assertRaises(ValueError, df.to_json, orient='columns') + + assert_frame_equal( + df, read_json(df.to_json(orient='split'), orient='split')) + unser = read_json(df.to_json(orient='records'), orient='records') + self.assert_(df.columns.equals(unser.columns)) + np.testing.assert_equal(df.values, unser.values) + unser = read_json(df.to_json(orient='values'), orient='values') + np.testing.assert_equal(df.values, unser.values) + + def test_frame_non_unique_columns(self): + df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2], + columns=['x', 'x']) + + self.assertRaises(ValueError, df.to_json, orient='index') + self.assertRaises(ValueError, df.to_json, orient='columns') + self.assertRaises(ValueError, df.to_json, orient='records') + + assert_frame_equal(df, read_json(df.to_json(orient='split'), + orient='split', dtype=False)) + unser = read_json(df.to_json(orient='values'), orient='values') + np.testing.assert_equal(df.values, unser.values) + def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None): @@ -236,6 +264,17 @@ def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) self.assertRaises(ValueError, df.to_json, orient="garbage") + def test_series_non_unique_index(self): + s = Series(['a', 'b'], index=[1, 1]) + + self.assertRaises(ValueError, s.to_json, orient='index') + + assert_series_equal(s, read_json(s.to_json(orient='split'), + orient='split', typ='series')) + unser = read_json(s.to_json(orient='records'), + orient='records', typ='series') + np.testing.assert_equal(s.values, unser.values) + def test_series_from_json_to_json(self): def _check_orient(series, orient, dtype=None, numpy=False):