diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e2521cedb64cc..372dfa0f8ad42 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -859,7 +859,7 @@ Other - Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) - Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`) - Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`) - +- Fixed regression in logical operators raising ``ValueError`` when columns of :class:`DataFrame` are a :class:`CategoricalIndex` with unused categories (:issue:`38367`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d8b5dba424cbf..7b14a5c636abe 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -309,11 +309,11 @@ def should_reindex_frame_op( if fill_value is None and level is None and axis is default_axis: # TODO: any other cases we should handle here? - cols = left.columns.intersection(right.columns) # Intersection is always unique so we have to check the unique columns left_uniques = left.columns.unique() right_uniques = right.columns.unique() + cols = left_uniques.intersection(right_uniques) if len(cols) and not (cols.equals(left_uniques) and cols.equals(right_uniques)): # TODO: is there a shortcut available when len(cols) == 0? return True diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py index efabc666993ee..dca12c632a418 100644 --- a/pandas/tests/frame/test_logical_ops.py +++ b/pandas/tests/frame/test_logical_ops.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import CategoricalIndex, DataFrame, Interval, Series, isnull import pandas._testing as tm @@ -162,3 +162,24 @@ def test_logical_with_nas(self): result = d["a"].fillna(False, downcast=False) | d["b"] expected = Series([True, True]) tm.assert_series_equal(result, expected) + + def test_logical_ops_categorical_columns(self): + # GH#38367 + intervals = [Interval(1, 2), Interval(3, 4)] + data = DataFrame( + [[1, np.nan], [2, np.nan]], + columns=CategoricalIndex( + intervals, categories=intervals + [Interval(5, 6)] + ), + ) + mask = DataFrame( + [[False, False], [False, False]], columns=data.columns, dtype=bool + ) + result = mask | isnull(data) + expected = DataFrame( + [[False, True], [False, True]], + columns=CategoricalIndex( + intervals, categories=intervals + [Interval(5, 6)] + ), + ) + tm.assert_frame_equal(result, expected)