From 02dd36a5c0884b36632fbcd289c53b92285f29e8 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 23 Nov 2021 12:48:46 -0800 Subject: [PATCH 1/4] BUG: DataFrame(EA2D) --- pandas/core/internals/construction.py | 2 +- pandas/tests/extension/base/dim2.py | 7 +++++++ pandas/tests/frame/test_constructors.py | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index a766f8321a641..c6fb293b3452a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -294,7 +294,7 @@ def ndarray_to_mgr( if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype): # GH#19157 - if isinstance(values, np.ndarray) and values.ndim > 1: + if isinstance(values, (np.ndarray, ExtensionArray)) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values values = [values[:, n] for n in range(values.shape[1])] diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index b4a817cbc37ec..1ac06d4c6932f 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -14,6 +14,13 @@ class Dim2CompatTests(BaseExtensionTests): + def test_frame_from_2d_array(self, data): + arr2d = data.repeat(2).reshape(-1, 2) + + df = pd.DataFrame(arr2d) + expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]}) + self.assert_frame_equal(df, expected) + def test_swapaxes(self, data): arr2d = data.repeat(2).reshape(-1, 2) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index fc5bffab118af..7347640fc05a7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -70,6 +70,14 @@ class TestDataFrameConstructors: + def test_constructor_from_2d_datetimearray(self): + dti = date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2) + + df = DataFrame(dta) + expected = DataFrame({0: dta[:, 0], 1: dta[:, 1]}) + tm.assert_frame_equal(df, expected) + def test_constructor_dict_with_tzaware_scalar(self): # GH#42505 dt = Timestamp("2019-11-03 01:00:00-0700").tz_convert("America/Los_Angeles") From 6ca443036b25a8b010312a43b2fcb4a9c572d275 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 23 Nov 2021 14:50:26 -0800 Subject: [PATCH 2/4] mypy fixup --- pandas/core/internals/construction.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index c6fb293b3452a..77f3db0d09df5 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -297,7 +297,12 @@ def ndarray_to_mgr( if isinstance(values, (np.ndarray, ExtensionArray)) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values - values = [values[:, n] for n in range(values.shape[1])] + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[slice, int]" + values = [ + values[:, n] # type: ignore[call-overload] + for n in range(values.shape[1]) + ] else: values = [values] From c220a71ed2a087cd79490563ec8f8fda928682b2 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 23 Nov 2021 13:21:37 -0800 Subject: [PATCH 3/4] BUG: Series[object].unstack preserve object dtype --- pandas/core/reshape/reshape.py | 4 +++- pandas/tests/extension/base/reshaping.py | 22 +++++---------------- pandas/tests/series/methods/test_unstack.py | 12 +++++++++++ 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 6c6b14653df75..a4f2c5c87ff49 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -213,7 +213,9 @@ def get_result(self, values, value_columns, fill_value): columns = self.get_new_columns(value_columns) index = self.new_index - return self.constructor(values, index=index, columns=columns) + return self.constructor( + values, index=index, columns=columns, dtype=values.dtype + ) def get_new_values(self, values, fill_value=None): diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 8f241679d5108..b7bb4c95372cc 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -3,12 +3,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import ( - is_datetime64tz_dtype, - is_interval_dtype, - is_period_dtype, -) - import pandas as pd from pandas.api.extensions import ExtensionArray from pandas.core.internals import ExtensionBlock @@ -327,17 +321,11 @@ def test_unstack(self, data, index, obj): expected = ser.astype(object).unstack( level=level, fill_value=data.dtype.na_value ) - if obj == "series": - # TODO: special cases belong in dtype-specific tests - if is_datetime64tz_dtype(data.dtype): - assert expected.dtypes.apply(is_datetime64tz_dtype).all() - expected = expected.astype(object) - if is_period_dtype(data.dtype): - assert expected.dtypes.apply(is_period_dtype).all() - expected = expected.astype(object) - if is_interval_dtype(data.dtype): - assert expected.dtypes.apply(is_interval_dtype).all() - expected = expected.astype(object) + if obj == "series" and not isinstance(ser.dtype, pd.SparseDtype): + # GH#34457 SparseArray.astype(object) gives Sparse[object] + # instead of np.dtype(object) + assert (expected.dtypes == object).all() + result = result.astype(object) self.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index 6f8f6d638dd56..23b068214dd91 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -10,6 +10,18 @@ import pandas._testing as tm +def test_unstack_preserves_object(): + mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]]) + + ser = Series(np.arange(4.0), index=mi, dtype=object) + + res1 = ser.unstack() + assert (res1.dtypes == object).all() + + res2 = ser.unstack(level=0) + assert (res2.dtypes == object).all() + + def test_unstack(): index = MultiIndex( levels=[["bar", "foo"], ["one", "three", "two"]], From c58b5e2ed60125ae74dc7a6ff929cde285768813 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 23 Nov 2021 18:21:34 -0800 Subject: [PATCH 4/4] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 7270227e113da..55c149d757811 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -703,6 +703,7 @@ Reshaping - Bug in :meth:`DataFrame.append` failing to retain ``index.name`` when appending a list of :class:`Series` objects (:issue:`44109`) - Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`) - Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`) +- Bug in :meth:`Series.unstack` with object doing unwanted type inference on resulting columns (:issue:`44595`) - Sparse