diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index c7286616672b9..1d52a5595472b 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -488,3 +488,49 @@ registers the default "matplotlib" backend as follows. More information on how to implement a third-party plotting backend can be found at https://github.com/pandas-dev/pandas/blob/main/pandas/plotting/__init__.py#L1. + +.. _extending.pandas_priority: + +Arithmetic with 3rd party types +------------------------------- + +In order to control how arithmetic works between a custom type and a pandas type, +implement ``__pandas_priority__``. Similar to numpy's ``__array_priority__`` +semantics, arithmetic methods on :class:`DataFrame`, :class:`Series`, and :class:`Index` +objects will delegate to ``other``, if it has an attribute ``__pandas_priority__`` with a higher value. + +By default, pandas objects try to operate with other objects, even if they are not types known to pandas: + +.. code-block:: python + + >>> pd.Series([1, 2]) + [10, 20] + 0 11 + 1 22 + dtype: int64 + +In the example above, if ``[10, 20]`` was a custom type that can be understood as a list, pandas objects will still operate with it in the same way. + +In some cases, it is useful to delegate to the other type the operation. For example, consider I implement a +custom list object, and I want the result of adding my custom list with a pandas :class:`Series` to be an instance of my list +and not a :class:`Series` as seen in the previous example. This is now possible by defining the ``__pandas_priority__`` attribute +of my custom list, and setting it to a higher value, than the priority of the pandas objects I want to operate with. + +The ``__pandas_priority__`` of :class:`DataFrame`, :class:`Series`, and :class:`Index` are ``4000``, ``3000``, and ``2000`` respectively. The base ``ExtensionArray.__pandas_priority__`` is ``1000``. + +.. code-block:: python + + class CustomList(list): + __pandas_priority__ = 5000 + + def __radd__(self, other): + # return `self` and not the addition for simplicity + return self + + custom = CustomList() + series = pd.Series([1, 2, 3]) + + # Series refuses to add custom, since it's an unknown type with higher priority + assert series.__add__(custom) is NotImplemented + + # This will cause the custom class `__radd__` being used instead + assert series + custom is custom diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7e8403c94ceef..ccf76f0fbc7fd 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -28,6 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ +- Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - :meth:`MultiIndex.sort_values` now supports ``na_position`` (:issue:`51612`) - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`) - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index bbfe9b9bbb6c7..8c269244c37ce 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -235,6 +235,12 @@ class ExtensionArray: # Don't override this. _typ = "extension" + # similar to __array_priority__, positions ExtensionArray after Index, + # Series, and DataFrame. EA subclasses may override to choose which EA + # subclass takes priority. If overriding, the value should always be + # strictly less than 2000 to be below Index.__pandas_priority__. + __pandas_priority__ = 1000 + # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 70019030da182..bc28cc425a412 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -634,6 +634,10 @@ class DataFrame(NDFrame, OpsMixin): _hidden_attrs: frozenset[str] = NDFrame._hidden_attrs | frozenset([]) _mgr: BlockManager | ArrayManager + # similar to __array_priority__, positions DataFrame before Series, Index, + # and ExtensionArray. Should NOT be overridden by subclasses. + __pandas_priority__ = 4000 + @property def _constructor(self) -> Callable[..., DataFrame]: return DataFrame diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 620823b9703ab..2f658006cf93f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -351,6 +351,10 @@ class Index(IndexOpsMixin, PandasObject): # To hand over control to subclasses _join_precedence = 1 + # similar to __array_priority__, positions Index after Series and DataFrame + # but before ExtensionArray. Should NOT be overridden by subclasses. + __pandas_priority__ = 2000 + # Cython methods; see github.com/cython/cython/issues/2647 # for why we need to wrap these instead of making them class attributes # Moreover, cython will choose the appropriate-dtyped sub-function diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index 01fb9aa17fc48..f8f53310e773b 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -14,7 +14,6 @@ from pandas._libs.missing import is_matching_na from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCIndex, ABCSeries, ) @@ -75,10 +74,10 @@ def new_method(self, other): # For comparison ops, Index does *not* defer to Series pass else: - for cls in [ABCDataFrame, ABCSeries, ABCIndex]: - if isinstance(self, cls): - break - if isinstance(other, cls): + prio = getattr(other, "__pandas_priority__", None) + if prio is not None: + if prio > self.__pandas_priority__: + # e.g. other is DataFrame while self is Index/Series/EA return NotImplemented other = item_from_zerodim(other) diff --git a/pandas/core/series.py b/pandas/core/series.py index e8d6491e43007..58cd42eaa7ca3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -352,6 +352,10 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] base.IndexOpsMixin._hidden_attrs | NDFrame._hidden_attrs | frozenset([]) ) + # similar to __array_priority__, positions Series after DataFrame + # but before Index and ExtensionArray. Should NOT be overridden by subclasses. + __pandas_priority__ = 3000 + # Override cache_readonly bc Series is mutable # error: Incompatible types in assignment (expression has type "property", # base class "IndexOpsMixin" defined the type as "Callable[[IndexOpsMixin], bool]") diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index b17dce234043c..a97676578c079 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -267,3 +267,19 @@ def test_frame_setitem_dask_array_into_new_col(): tm.assert_frame_equal(result, expected) finally: pd.set_option("compute.use_numexpr", olduse) + + +def test_pandas_priority(): + # GH#48347 + + class MyClass: + __pandas_priority__ = 5000 + + def __radd__(self, other): + return self + + left = MyClass() + right = Series(range(3)) + + assert right.__add__(left) is NotImplemented + assert right + left is left