From d339f4f6f2ec3e8e6910e639c4fccad574094fb4 Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Mon, 12 Oct 2020 14:07:23 +0530 Subject: [PATCH 1/9] ENH: Add dtypes property for MultiIndex --- pandas/core/indexes/multi.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 41f046a7f5f8a..2063f8179273a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -699,6 +699,14 @@ def array(self): "'MultiIndex.to_numpy()' to get a NumPy array of tuples." ) + @cache_readonly + def dtypes(self): + """ + Return the dtypes as a Series for the underlying MultiIndex + """ + from pandas import Series + return Series({l.name: l.dtype for l in self.levels}) + @property def shape(self): """ From 689d55835f7510db2859ca4f4d7e11f7aa06f64e Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Mon, 12 Oct 2020 15:09:12 +0530 Subject: [PATCH 2/9] Add whatsnew, test and run black --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/multi.py | 1 + pandas/tests/indexes/multi/test_get_set.py | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d08e8e009811a..3180b9546815f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -191,6 +191,7 @@ Other enhancements - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`) - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) +- Added :meth:`MultiIndex.dtypes` (:issue:`37062`) .. _whatsnew_120.api_breaking.python: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2063f8179273a..bbe90bac4fbdf 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -705,6 +705,7 @@ def dtypes(self): Return the dtypes as a Series for the underlying MultiIndex """ from pandas import Series + return Series({l.name: l.dtype for l in self.levels}) @property diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index b9132f429905d..3f6f26536aa9c 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -27,6 +27,11 @@ def test_get_level_number_integer(idx): idx._get_level_number("fourth") +def test_get_dtypes(idx): + expected = pd.Series({"first": np.dtype("O"), "second": np.dtype("O")}) + assert expected.equals(idx.dtypes) + + def test_set_name_methods(idx, index_names): # so long as these are synonyms, we don't need to test set_names assert idx.rename == idx.set_names From de56f049f05e63f661268db914c3241b31ba87bf Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Wed, 14 Oct 2020 10:56:35 +0530 Subject: [PATCH 3/9] Update test and add entry in reference/indexing.rst --- doc/source/reference/indexing.rst | 1 + pandas/tests/indexes/multi/conftest.py | 12 ++++++++++++ pandas/tests/indexes/multi/test_get_set.py | 15 ++++++++++++--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index ba12c19763605..beff25b8dd986 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -290,6 +290,7 @@ MultiIndex properties MultiIndex.codes MultiIndex.nlevels MultiIndex.levshape + MultiIndex.dtypes MultiIndex components ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 67ebfcddf6c2d..72945372d6773 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -24,6 +24,18 @@ def idx(): return mi +@pytest.fixture +def idx_multitype(): + # a MultiIndex with several dtypes + first_axis = [1, 2, 3] + second_axis = list("abc") + third_axis = pd.date_range("20200101", periods=2, tz="UTC") + mi = pd.MultiIndex.from_product( + [first_axis, second_axis, third_axis], names=["int", "string", "dt"] + ) + return mi + + @pytest.fixture def idx_dup(): # compare tests/indexes/multi/conftest.py diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 3f6f26536aa9c..2419d41b03d65 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -3,6 +3,8 @@ import pandas as pd from pandas import CategoricalIndex, MultiIndex +from pandas.core.dtypes.dtypes import DatetimeTZDtype as DateTimeTZDtype + import pandas._testing as tm @@ -27,9 +29,16 @@ def test_get_level_number_integer(idx): idx._get_level_number("fourth") -def test_get_dtypes(idx): - expected = pd.Series({"first": np.dtype("O"), "second": np.dtype("O")}) - assert expected.equals(idx.dtypes) +def test_get_dtypes(idx_multitype): + # Test MultiIndex.dtypes (GH-37062) + expected = pd.Series( + { + "int": np.dtype("int64"), + "string": np.dtype("O"), + "dt": DateTimeTZDtype(tz="utc"), + } + ) + assert expected.equals(idx_multitype.dtypes) def test_set_name_methods(idx, index_names): From 6302290d416bb811ad92d237e5b574fdb3bc2983 Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Wed, 14 Oct 2020 11:22:55 +0530 Subject: [PATCH 4/9] Run isort --- pandas/tests/indexes/multi/test_get_set.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 2419d41b03d65..f27d17ac538fd 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -1,10 +1,10 @@ import numpy as np import pytest -import pandas as pd -from pandas import CategoricalIndex, MultiIndex from pandas.core.dtypes.dtypes import DatetimeTZDtype as DateTimeTZDtype +import pandas as pd +from pandas import CategoricalIndex, MultiIndex import pandas._testing as tm From 7e39db28275844bf08bc5816ab8c65c0a8ea86b3 Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Thu, 5 Nov 2020 01:14:37 +0530 Subject: [PATCH 5/9] Fix inconsistent use of pd namespace --- pandas/tests/indexes/multi/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index cd4ff52f2c683..9f10350626962 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -30,7 +30,7 @@ def idx_multitype(): first_axis = [1, 2, 3] second_axis = list("abc") third_axis = pd.date_range("20200101", periods=2, tz="UTC") - mi = pd.MultiIndex.from_product( + mi = MultiIndex.from_product( [first_axis, second_axis, third_axis], names=["int", "string", "dt"] ) return mi From eb526ae3d4982064c04b14096175143759af5b2e Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Sun, 29 Nov 2020 13:11:44 +0530 Subject: [PATCH 6/9] Remove ambiguous var name --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a92f4697e2fd5..c5c6a46c8293e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -708,7 +708,7 @@ def dtypes(self): """ from pandas import Series - return Series({l.name: l.dtype for l in self.levels}) + return Series({level.name: level.dtype for level in self.levels}) @property def shape(self) -> Shape: From 03aa868410ecda05acf9d5260db9eac407cd46d4 Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Tue, 8 Dec 2020 11:58:37 +0530 Subject: [PATCH 7/9] Minor code quality fixes --- pandas/core/indexes/multi.py | 2 +- pandas/tests/indexes/multi/conftest.py | 6 ++---- pandas/tests/indexes/multi/test_get_set.py | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c5c6a46c8293e..71a7a3265b46d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -702,7 +702,7 @@ def array(self): ) @cache_readonly - def dtypes(self): + def dtypes(self) -> "Series": """ Return the dtypes as a Series for the underlying MultiIndex """ diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 9f10350626962..f101312ae1579 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -27,11 +27,9 @@ def idx(): @pytest.fixture def idx_multitype(): # a MultiIndex with several dtypes - first_axis = [1, 2, 3] - second_axis = list("abc") - third_axis = pd.date_range("20200101", periods=2, tz="UTC") mi = MultiIndex.from_product( - [first_axis, second_axis, third_axis], names=["int", "string", "dt"] + [[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")], + names=["int", "string", "dt"], ) return mi diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index d34c3494515ff..1c4dbe51ae682 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -30,7 +30,7 @@ def test_get_level_number_integer(idx): def test_get_dtypes(idx_multitype): - # Test MultiIndex.dtypes (GH-37062) + # Test MultiIndex.dtypes (# Gh37062) expected = pd.Series( { "int": np.dtype("int64"), @@ -38,7 +38,7 @@ def test_get_dtypes(idx_multitype): "dt": DateTimeTZDtype(tz="utc"), } ) - assert expected.equals(idx_multitype.dtypes) + tm.assert_series_equal(expected, idx_multitype.dtypes) def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): From fe6d4019e8b99c838d518e66f2aca3e02f3823a2 Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Tue, 8 Dec 2020 23:41:18 +0530 Subject: [PATCH 8/9] Moved line to v1.3.0 --- doc/source/whatsnew/v1.2.0.rst | 1 - doc/source/whatsnew/v1.3.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 276ba92dc41d7..4294871b56bcb 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -286,7 +286,6 @@ Other enhancements - Calling a NumPy ufunc on a ``DataFrame`` with extension types now preserves the extension types when possible (:issue:`23743`). - Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`). - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) -- Added :meth:`MultiIndex.dtypes` (:issue:`37062`) - :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`) - :func:`read_parquet` gained a ``use_nullable_dtypes=True`` option to use nullable dtypes that use ``pd.NA`` as missing value indicator where possible diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b40f012f034b6..e62c7a0073dcf 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -19,7 +19,7 @@ Enhancements Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- Added :meth:`MultiIndex.dtypes` (:issue:`37062`) - .. --------------------------------------------------------------------------- From a083de956313ce6046df5a232fcc3fb5c0536500 Mon Sep 17 00:00:00 2001 From: Rahul Sathanapalli Date: Tue, 8 Dec 2020 23:53:00 +0530 Subject: [PATCH 9/9] Remove alias and fixture --- pandas/tests/indexes/multi/conftest.py | 10 ---------- pandas/tests/indexes/multi/test_get_set.py | 10 +++++++--- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index f101312ae1579..a77af84ee1ed0 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -24,16 +24,6 @@ def idx(): return mi -@pytest.fixture -def idx_multitype(): - # a MultiIndex with several dtypes - mi = MultiIndex.from_product( - [[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")], - names=["int", "string", "dt"], - ) - return mi - - @pytest.fixture def idx_dup(): # compare tests/indexes/multi/conftest.py diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 1c4dbe51ae682..83cebf90623fe 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.core.dtypes.dtypes import DatetimeTZDtype as DateTimeTZDtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd from pandas import CategoricalIndex, MultiIndex @@ -29,13 +29,17 @@ def test_get_level_number_integer(idx): idx._get_level_number("fourth") -def test_get_dtypes(idx_multitype): +def test_get_dtypes(): # Test MultiIndex.dtypes (# Gh37062) + idx_multitype = MultiIndex.from_product( + [[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")], + names=["int", "string", "dt"], + ) expected = pd.Series( { "int": np.dtype("int64"), "string": np.dtype("O"), - "dt": DateTimeTZDtype(tz="utc"), + "dt": DatetimeTZDtype(tz="utc"), } ) tm.assert_series_equal(expected, idx_multitype.dtypes)