From 00284c05c1194798a51716c3004b098270f23b44 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Feb 2024 12:06:53 -0800 Subject: [PATCH 1/3] API: avoid passing manager to subclass __init__ --- pandas/core/frame.py | 34 ++++++++++++++++++++-------------- pandas/core/generic.py | 1 + pandas/core/series.py | 34 +++++++++++++++++++--------------- 3 files changed, 40 insertions(+), 29 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index abb043361483c..fecbb20fed69f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -650,25 +650,31 @@ def _constructor(self) -> type[DataFrame]: return DataFrame def _constructor_from_mgr(self, mgr, axes) -> DataFrame: - if self._constructor is DataFrame: - # we are pandas.DataFrame (or a subclass that doesn't override _constructor) - return DataFrame._from_mgr(mgr, axes=axes) - else: - assert axes is mgr.axes - return self._constructor(mgr) + df = DataFrame._from_mgr(mgr, axes=axes) - _constructor_sliced: Callable[..., Series] = Series + if type(self) is DataFrame: + # This would also work `if self._constructor is DataFrame`, but + # this check is slightly faster, benefiting the most-common case. + return df + + # We assume that the subclass __init__ knows how to handle a + # pd.DataFrame object. + return self._constructor(df) - def _sliced_from_mgr(self, mgr, axes) -> Series: - return Series._from_mgr(mgr, axes) + _constructor_sliced: Callable[..., Series] = Series def _constructor_sliced_from_mgr(self, mgr, axes) -> Series: - if self._constructor_sliced is Series: - ser = self._sliced_from_mgr(mgr, axes) - ser._name = None # caller is responsible for setting real name + ser = Series._from_mgr(mgr, axes) + ser._name = None # caller is responsible for setting real name + + if type(self) is DataFrame: + # This would also work `if self._constructor_sliced is Series`, but + # this check is slightly faster, benefiting the most-common case. return ser - assert axes is mgr.axes - return self._constructor_sliced(mgr) + + # We assume that the subclass __init__ knows how to handle a + # pd.Series object. + return self._constructor_sliced(ser) # ---------------------------------------------------------------------- # Constructors diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 668672ed05f72..04e5415fcefcf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -288,6 +288,7 @@ def _init_mgr( mgr = mgr.astype(dtype=dtype) return mgr + @final @classmethod def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self: """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 5956fa59528a7..409e49be8fbb4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -575,14 +575,17 @@ def _constructor(self) -> type[Series]: return Series def _constructor_from_mgr(self, mgr, axes): - if self._constructor is Series: - # we are pandas.Series (or a subclass that doesn't override _constructor) - ser = Series._from_mgr(mgr, axes=axes) - ser._name = None # caller is responsible for setting real name + ser = Series._from_mgr(mgr, axes=axes) + ser._name = None # caller is responsible for setting real name + + if type(self) is Series: + # This would also work `if self._constructor is Series`, but + # this check is slightly faster, benefiting the most-common case. return ser - else: - assert axes is mgr.axes - return self._constructor(mgr) + + # We assume that the subclass __init__ knows how to handle a + # pd.Series object. + return self._constructor(ser) @property def _constructor_expanddim(self) -> Callable[..., DataFrame]: @@ -594,18 +597,19 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]: return DataFrame - def _expanddim_from_mgr(self, mgr, axes) -> DataFrame: + def _constructor_expanddim_from_mgr(self, mgr, axes): from pandas.core.frame import DataFrame - return DataFrame._from_mgr(mgr, axes=mgr.axes) + df = DataFrame._from_mgr(mgr, axes=mgr.axes) - def _constructor_expanddim_from_mgr(self, mgr, axes): - from pandas.core.frame import DataFrame + if type(self) is Series: + # This would also work `if self._constructor_expanddim is DataFrame`, + # but this check is slightly faster, benefiting the most-common case. + return df - if self._constructor_expanddim is DataFrame: - return self._expanddim_from_mgr(mgr, axes) - assert axes is mgr.axes - return self._constructor_expanddim(mgr) + # We assume that the subclass __init__ knows how to handle a + # pd.DataFrame object. + return self._constructor_expanddim(df) # types @property From 227134ea05e3abdfcf565af322443f461796179f Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Feb 2024 12:11:10 -0800 Subject: [PATCH 2/3] shim for geopandas --- pandas/core/frame.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fecbb20fed69f..8833777b48b63 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -657,6 +657,11 @@ def _constructor_from_mgr(self, mgr, axes) -> DataFrame: # this check is slightly faster, benefiting the most-common case. return df + elif type(self).__name__ == "GeoDataFrame": + # Shim until geopandas can override their _constructor_from_mgr + # bc they have different behavior for Managers than for DataFrames + return self._constructor(mgr) + # We assume that the subclass __init__ knows how to handle a # pd.DataFrame object. return self._constructor(df) From 7cee7986153ee5cfdf5fbc1d048f5bb080f662ee Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 12 Mar 2024 09:56:08 -0700 Subject: [PATCH 3/3] add test --- pandas/tests/frame/test_subclass.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 316216e69a587..355953eac9d51 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -16,6 +16,17 @@ class TestDataFrameSubclassing: + def test_no_warning_on_mgr(self): + # GH#57032 + df = tm.SubclassedDataFrame( + {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"] + ) + with tm.assert_produces_warning(None): + # df.isna() goes through _constructor_from_mgr, which we want to + # *not* pass a Manager do __init__ + df.isna() + df["X"].isna() + def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it # In reference to PR 9632