From 913b56ff9b5dbaaaf93d4dfa89d889ad60e3049a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 25 Mar 2024 18:40:17 -0700 Subject: [PATCH 1/4] Backport PR #57553: API: avoid passing Manager to subclass __init__ --- pandas/core/frame.py | 39 ++++++++++++++++++----------- pandas/core/generic.py | 1 + pandas/core/series.py | 34 ++++++++++++++----------- pandas/tests/frame/test_subclass.py | 11 ++++++++ 4 files changed, 56 insertions(+), 29 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5c510d98596df..f06ab1da77da2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -656,26 +656,37 @@ class DataFrame(NDFrame, OpsMixin): def _constructor(self) -> Callable[..., DataFrame]: return DataFrame - def _constructor_from_mgr(self, mgr, axes): - if self._constructor is DataFrame: - # we are pandas.DataFrame (or a subclass that doesn't override _constructor) - return DataFrame._from_mgr(mgr, axes=axes) - else: - assert axes is mgr.axes + def _constructor_from_mgr(self, mgr, axes) -> DataFrame: + df = DataFrame._from_mgr(mgr, axes=axes) + + if type(self) is DataFrame: + # This would also work `if self._constructor is DataFrame`, but + # this check is slightly faster, benefiting the most-common case. + return df + + elif type(self).__name__ == "GeoDataFrame": + # Shim until geopandas can override their _constructor_from_mgr + # bc they have different behavior for Managers than for DataFrames return self._constructor(mgr) + # We assume that the subclass __init__ knows how to handle a + # pd.DataFrame object. + return self._constructor(df) + _constructor_sliced: Callable[..., Series] = Series - def _sliced_from_mgr(self, mgr, axes) -> Series: - return Series._from_mgr(mgr, axes) + def _constructor_sliced_from_mgr(self, mgr, axes) -> Series: + ser = Series._from_mgr(mgr, axes) + ser._name = None # caller is responsible for setting real name - def _constructor_sliced_from_mgr(self, mgr, axes): - if self._constructor_sliced is Series: - ser = self._sliced_from_mgr(mgr, axes) - ser._name = None # caller is responsible for setting real name + if type(self) is DataFrame: + # This would also work `if self._constructor_sliced is Series`, but + # this check is slightly faster, benefiting the most-common case. return ser - assert axes is mgr.axes - return self._constructor_sliced(mgr) + + # We assume that the subclass __init__ knows how to handle a + # pd.Series object. + return self._constructor_sliced(ser) # ---------------------------------------------------------------------- # Constructors diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2a86f75badecd..796357355fef4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -336,6 +336,7 @@ def _as_manager(self, typ: str, copy: bool_t = True) -> Self: # fastpath of passing a manager doesn't check the option/manager class return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) + @final @classmethod def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self: """ diff --git a/pandas/core/series.py b/pandas/core/series.py index c1782206d4b67..6fd019656d207 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -662,14 +662,17 @@ def _constructor(self) -> Callable[..., Series]: return Series def _constructor_from_mgr(self, mgr, axes): - if self._constructor is Series: - # we are pandas.Series (or a subclass that doesn't override _constructor) - ser = Series._from_mgr(mgr, axes=axes) - ser._name = None # caller is responsible for setting real name + ser = Series._from_mgr(mgr, axes=axes) + ser._name = None # caller is responsible for setting real name + + if type(self) is Series: + # This would also work `if self._constructor is Series`, but + # this check is slightly faster, benefiting the most-common case. return ser - else: - assert axes is mgr.axes - return self._constructor(mgr) + + # We assume that the subclass __init__ knows how to handle a + # pd.Series object. + return self._constructor(ser) @property def _constructor_expanddim(self) -> Callable[..., DataFrame]: @@ -681,18 +684,19 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]: return DataFrame - def _expanddim_from_mgr(self, mgr, axes) -> DataFrame: + def _constructor_expanddim_from_mgr(self, mgr, axes): from pandas.core.frame import DataFrame - return DataFrame._from_mgr(mgr, axes=mgr.axes) + df = DataFrame._from_mgr(mgr, axes=mgr.axes) - def _constructor_expanddim_from_mgr(self, mgr, axes): - from pandas.core.frame import DataFrame + if type(self) is Series: + # This would also work `if self._constructor_expanddim is DataFrame`, + # but this check is slightly faster, benefiting the most-common case. + return df - if self._constructor_expanddim is DataFrame: - return self._expanddim_from_mgr(mgr, axes) - assert axes is mgr.axes - return self._constructor_expanddim(mgr) + # We assume that the subclass __init__ knows how to handle a + # pd.DataFrame object. + return self._constructor_expanddim(df) # types @property diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index ef78ae62cb4d6..855b58229cbdb 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -26,6 +26,17 @@ def _constructor(self): class TestDataFrameSubclassing: + def test_no_warning_on_mgr(self): + # GH#57032 + df = tm.SubclassedDataFrame( + {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"] + ) + with tm.assert_produces_warning(None): + # df.isna() goes through _constructor_from_mgr, which we want to + # *not* pass a Manager do __init__ + df.isna() + df["X"].isna() + def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it # In reference to PR 9632 From b2a94cc9daf21ece207a50e395292fc011b20efb Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 26 Mar 2024 10:57:19 -0700 Subject: [PATCH 2/4] whatsnew, type ignores --- doc/source/whatsnew/v2.2.2.rst | 2 +- pandas/core/frame.py | 6 ++++-- pandas/core/resample.py | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 54084abab7817..ef332027ee9f5 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -15,7 +15,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) -- +- Avoid issuing a spurious ``DeprecationWarning`` when a custom :class:`DataFrame` or :class:`Series` subclass method is called (:issue:`57553`) .. --------------------------------------------------------------------------- .. _whatsnew_222.bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f06ab1da77da2..afcd4d014316e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1414,7 +1414,8 @@ def _get_values_for_csv( na_rep=na_rep, quoting=quoting, ) - return self._constructor_from_mgr(mgr, axes=mgr.axes) + # error: Incompatible return value type (got "DataFrame", expected "Self") + return self._constructor_from_mgr(mgr, axes=mgr.axes) # type: ignore[return-value] # ---------------------------------------------------------------------- @@ -5088,7 +5089,8 @@ def predicate(arr: ArrayLike) -> bool: return True mgr = self._mgr._get_data_subset(predicate).copy(deep=None) - return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self) + # error: Incompatible return value type (got "DataFrame", expected "Self") + return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self) # type: ignore[return-value] def insert( self, diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2d430ef4dcff6..0dd808a0ab296 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2548,7 +2548,8 @@ def _take_new_index( if axis == 1: raise NotImplementedError("axis 1 is not supported") new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) - return obj._constructor_from_mgr(new_mgr, axes=new_mgr.axes) + # error: Incompatible return value type (got "DataFrame", expected "NDFrameT") + return obj._constructor_from_mgr(new_mgr, axes=new_mgr.axes) # type: ignore[return-value] else: raise ValueError("'obj' should be either a Series or a DataFrame") From 653c53c4763621bf69ed01b4ef14fc2f8d66a4e6 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 28 Mar 2024 13:17:03 -0700 Subject: [PATCH 3/4] merge 2.2.2 file from main --- doc/source/whatsnew/v2.2.2.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index ef332027ee9f5..0dac3660c76b2 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -16,14 +16,17 @@ Fixed regressions - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) - Avoid issuing a spurious ``DeprecationWarning`` when a custom :class:`DataFrame` or :class:`Series` subclass method is called (:issue:`57553`) +- Fixed regression in precision of :func:`to_datetime` with string and ``unit`` input (:issue:`57051`) .. --------------------------------------------------------------------------- .. _whatsnew_222.bug_fixes: Bug fixes ~~~~~~~~~ +- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`) - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) +- :meth:`DataFrame.to_sql` was failing to find the right table when using the schema argument (:issue:`57539`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: From bea3c2009b3bf7fcc263c3a8102324e17e11f6fd Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 29 Mar 2024 14:46:22 -0700 Subject: [PATCH 4/4] rebase on 2.2.x whatsnew --- doc/source/whatsnew/v2.2.2.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 0dac3660c76b2..54084abab7817 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -15,18 +15,15 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) -- Avoid issuing a spurious ``DeprecationWarning`` when a custom :class:`DataFrame` or :class:`Series` subclass method is called (:issue:`57553`) -- Fixed regression in precision of :func:`to_datetime` with string and ``unit`` input (:issue:`57051`) +- .. --------------------------------------------------------------------------- .. _whatsnew_222.bug_fixes: Bug fixes ~~~~~~~~~ -- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`) - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) -- :meth:`DataFrame.to_sql` was failing to find the right table when using the schema argument (:issue:`57539`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: