From 81647253d0a94e29821b9f8d6b0151200b3453c5 Mon Sep 17 00:00:00 2001 From: jameswinegar Date: Sat, 22 Sep 2018 20:24:16 -0500 Subject: [PATCH 01/12] Raise ValueError on DataFrame.to_dict when orient=index and index is not unique --- pandas/core/frame.py | 7 +++++-- pandas/tests/frame/test_convert_to.py | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 81d5c112885ec..5f01e4260f969 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1163,8 +1163,11 @@ def to_dict(self, orient='dict', into=dict): for k, v in zip(self.columns, np.atleast_1d(row))) for row in self.values] elif orient.lower().startswith('i'): - return into_c((t[0], dict(zip(self.columns, t[1:]))) - for t in self.itertuples()) + if np.unique(self.index).size == len(self.index): + return into_c((t[0], dict(zip(self.columns, t[1:]))) + for t in self.itertuples()) + else: + raise ValueError("DataFrame index must be unique for orient='index'.") else: raise ValueError("orient '{o}' not understood".format(o=orient)) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index a0e23d256c25b..82e4cec58bcba 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -71,6 +71,10 @@ def test_to_dict_timestamp(self): tm.assert_dict_equal(test_data_mixed.to_dict(orient='split'), expected_split_mixed) + def test_to_dict_index_not_unique_with_index_orient(self): + df = DataFrame({'A': [0, 1], 'A': [1,2]}) + pytest.raises(ValueError, df.to_dict, orient='index') + def test_to_dict_invalid_orient(self): df = DataFrame({'A': [0, 1]}) pytest.raises(ValueError, df.to_dict, orient='xinvalid') From 616db8974a11937e6ce3d556ff3dafbc3c2092e0 Mon Sep 17 00:00:00 2001 From: jameswinegar Date: Sat, 22 Sep 2018 20:57:37 -0500 Subject: [PATCH 02/12] change to numpy based size and use methods. Fix test to create dataframe with non-unique index --- pandas/core/frame.py | 2 +- pandas/tests/frame/test_convert_to.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5f01e4260f969..b052e9e2d9543 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1163,7 +1163,7 @@ def to_dict(self, orient='dict', into=dict): for k, v in zip(self.columns, np.atleast_1d(row))) for row in self.values] elif orient.lower().startswith('i'): - if np.unique(self.index).size == len(self.index): + if df.index.unique().size == df.index.size: return into_c((t[0], dict(zip(self.columns, t[1:]))) for t in self.itertuples()) else: diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 82e4cec58bcba..2972ad014c325 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -72,7 +72,7 @@ def test_to_dict_timestamp(self): expected_split_mixed) def test_to_dict_index_not_unique_with_index_orient(self): - df = DataFrame({'A': [0, 1], 'A': [1,2]}) + df = DataFrame({'a': [1, 2],'b': [0.5, 0.75]}, index=['A', 'A']) pytest.raises(ValueError, df.to_dict, orient='index') def test_to_dict_invalid_orient(self): From 9949fb7d2791b209861da6580839bf2f7729d40d Mon Sep 17 00:00:00 2001 From: jameswinegar Date: Sat, 22 Sep 2018 21:00:38 -0500 Subject: [PATCH 03/12] fix pep8 --- pandas/core/frame.py | 4 +++- pandas/tests/frame/test_convert_to.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b052e9e2d9543..9e5d41217021c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1167,7 +1167,9 @@ def to_dict(self, orient='dict', into=dict): return into_c((t[0], dict(zip(self.columns, t[1:]))) for t in self.itertuples()) else: - raise ValueError("DataFrame index must be unique for orient='index'.") + raise ValueError( + "DataFrame index must be unique for orient='index'." + ) else: raise ValueError("orient '{o}' not understood".format(o=orient)) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 2972ad014c325..185b36e465f11 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -72,7 +72,7 @@ def test_to_dict_timestamp(self): expected_split_mixed) def test_to_dict_index_not_unique_with_index_orient(self): - df = DataFrame({'a': [1, 2],'b': [0.5, 0.75]}, index=['A', 'A']) + df = DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) pytest.raises(ValueError, df.to_dict, orient='index') def test_to_dict_invalid_orient(self): From 8522e13de38f0b22a6994a6951a08e6a9aae887a Mon Sep 17 00:00:00 2001 From: jameswinegar Date: Sat, 22 Sep 2018 21:17:05 -0500 Subject: [PATCH 04/12] fix reference to dataframe as self --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9e5d41217021c..7a15edb16441e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1163,7 +1163,7 @@ def to_dict(self, orient='dict', into=dict): for k, v in zip(self.columns, np.atleast_1d(row))) for row in self.values] elif orient.lower().startswith('i'): - if df.index.unique().size == df.index.size: + if self.index.unique().size == self.index.size: return into_c((t[0], dict(zip(self.columns, t[1:]))) for t in self.itertuples()) else: From df58b9ddc20487a101b2977f4ea8c8e1d310668c Mon Sep 17 00:00:00 2001 From: jameswinegar Date: Sun, 23 Sep 2018 10:39:09 -0500 Subject: [PATCH 05/12] adjust for @jreback comments --- pandas/core/frame.py | 7 +++---- pandas/tests/frame/test_convert_to.py | 2 ++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7a15edb16441e..922ae376ef4a6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1163,13 +1163,12 @@ def to_dict(self, orient='dict', into=dict): for k, v in zip(self.columns, np.atleast_1d(row))) for row in self.values] elif orient.lower().startswith('i'): - if self.index.unique().size == self.index.size: - return into_c((t[0], dict(zip(self.columns, t[1:]))) - for t in self.itertuples()) - else: + if not self.index.is_unique: raise ValueError( "DataFrame index must be unique for orient='index'." ) + return into_c((t[0], dict(zip(self.columns, t[1:]))) + for t in self.itertuples()) else: raise ValueError("orient '{o}' not understood".format(o=orient)) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 185b36e465f11..61fe9d12c173c 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -72,6 +72,8 @@ def test_to_dict_timestamp(self): expected_split_mixed) def test_to_dict_index_not_unique_with_index_orient(self): + # GH22801 + # Data loss when indexes are not unique. Raise ValueError. df = DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) pytest.raises(ValueError, df.to_dict, orient='index') From 51e42da7befa49286b45e5f455093ef49200d6ca Mon Sep 17 00:00:00 2001 From: jameswinegar Date: Sun, 23 Sep 2018 10:57:30 -0500 Subject: [PATCH 06/12] add documentation and get origin to avoid merge conflict --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 618d7454c67fe..335bba027877e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -191,6 +191,7 @@ Other Enhancements - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). +- :func:`~DataFrame.to_dict` with orient on index and a non-unique index will raise a ValueError instead of losing data (:issue:`22801`). .. _whatsnew_0240.api_breaking: Backwards incompatible API changes From 0a49ffcb3266186d43f8eccfc3f94d63ae96fb41 Mon Sep 17 00:00:00 2001 From: jameswinegar Date: Sun, 23 Sep 2018 11:05:24 -0500 Subject: [PATCH 07/12] move documentation and reword for clarity and consistency --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 335bba027877e..caca8aeb5f829 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -191,7 +191,6 @@ Other Enhancements - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). -- :func:`~DataFrame.to_dict` with orient on index and a non-unique index will raise a ValueError instead of losing data (:issue:`22801`). .. _whatsnew_0240.api_breaking: Backwards incompatible API changes @@ -803,6 +802,7 @@ Reshaping - Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`) - Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) +- Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with ``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) Build Changes ^^^^^^^^^^^^^ From 6eb2e5aa2ece8145565e8ddb475b5b423890713e Mon Sep 17 00:00:00 2001 From: James Winegar Date: Tue, 25 Sep 2018 16:41:30 -0500 Subject: [PATCH 08/12] Update v0.24.0.txt --- doc/source/whatsnew/v0.24.0.txt | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index bae7a97be60b4..d87dd8e91206a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -368,6 +368,37 @@ is the case with :attr:`Period.end_time`, for example p.end_time +.. _whatsnew_0240.api_breaking.frame_to_dict_index_orient: + +Raise ValueError in ``DataFrame.to_dict(orient='index')`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with +``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) + +Previous Behavior: + +Calling :func:`DataFrame.to_dict` resulted in data loss when ``orient='index'`` +for non-unique indexes + +.. code-block:: ipython + + In [2]: df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) + + In [3]: df.to_dict(orient='index') + Out [3]: {'A': {'a': 2.0, 'b': 0.75}} + +Current Behavior: + +Calling :func:`DataFrame.to_dict` will now result in a ValueError being raised +when ``orient='index'`` for non-unique index values + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) + + df.to_dict(orient='index') + .. _whatsnew_0240.api.datetimelike.normalize: Tick DateOffset Normalize Restrictions @@ -804,7 +835,7 @@ Reshaping - Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) - Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) -- Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with ``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) +- Build Changes ^^^^^^^^^^^^^ From fb2eff0c7428dd260f6756a08042b76c7d049083 Mon Sep 17 00:00:00 2001 From: jameswinegar Date: Sun, 7 Oct 2018 20:04:59 -0500 Subject: [PATCH 09/12] less verbose whatsnew --- doc/source/whatsnew/v0.24.0.txt | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6ced0148cf5fe..6100f6c7530e3 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -381,29 +381,6 @@ Raise ValueError in ``DataFrame.to_dict(orient='index')`` Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with ``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) -Previous Behavior: - -Calling :func:`DataFrame.to_dict` resulted in data loss when ``orient='index'`` -for non-unique indexes - -.. code-block:: ipython - - In [2]: df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) - - In [3]: df.to_dict(orient='index') - Out [3]: {'A': {'a': 2.0, 'b': 0.75}} - -Current Behavior: - -Calling :func:`DataFrame.to_dict` will now result in a ValueError being raised -when ``orient='index'`` for non-unique index values - -.. ipython:: python - - df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) - - df.to_dict(orient='index') - .. _whatsnew_0240.api.datetimelike.normalize: Tick DateOffset Normalize Restrictions From 7a4240eff440bfa9acfd0952b726f2df16c775c1 Mon Sep 17 00:00:00 2001 From: James Winegar Date: Wed, 10 Oct 2018 17:53:11 -0500 Subject: [PATCH 10/12] Update v0.24.0.txt Add example that will raise value error. --- doc/source/whatsnew/v0.24.0.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6100f6c7530e3..46cd58d27f4d4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -381,6 +381,11 @@ Raise ValueError in ``DataFrame.to_dict(orient='index')`` Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with ``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) +.. ipython:: python + df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) + + df.to_dict(orient='index') + .. _whatsnew_0240.api.datetimelike.normalize: Tick DateOffset Normalize Restrictions From a5e89457216f382b6938081a3751cbae17ed75a1 Mon Sep 17 00:00:00 2001 From: James Winegar Date: Wed, 10 Oct 2018 20:27:50 -0500 Subject: [PATCH 11/12] Update v0.24.0.txt --- doc/source/whatsnew/v0.24.0.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 46cd58d27f4d4..e6456aa5bb976 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -382,9 +382,12 @@ Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with ``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) .. ipython:: python +:okexcept: + df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) - - df.to_dict(orient='index') + df + + df.to_dict(orient='index') .. _whatsnew_0240.api.datetimelike.normalize: From 6aec0749fe531940b8e57ccf4b0299d5937a6450 Mon Sep 17 00:00:00 2001 From: James Winegar Date: Wed, 10 Oct 2018 21:52:29 -0500 Subject: [PATCH 12/12] Update v0.24.0.txt --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e6456aa5bb976..d4be2f60a9e7a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -382,7 +382,7 @@ Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with ``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) .. ipython:: python -:okexcept: + :okexcept: df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) df