@@ -1693,9 +1693,8 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
1693
1693
# Indexing on codes is more efficient if categories are the same,
1694
1694
# so we can apply some optimizations based on the degree of
1695
1695
# dtype-matching.
1696
- codes = recode_for_categories (
1697
- target .codes , target .categories , self .categories , copy = False
1698
- )
1696
+ cat = self ._encode_with_my_categories (target )
1697
+ codes = cat ._codes
1699
1698
else :
1700
1699
codes = self .categories .get_indexer (target )
1701
1700
@@ -1867,8 +1866,8 @@ def _validate_setitem_value(self, value):
1867
1866
"without identical categories"
1868
1867
)
1869
1868
# is_dtype_equal implies categories_match_up_to_permutation
1870
- new_codes = self ._validate_listlike (value )
1871
- value = Categorical . from_codes ( new_codes , dtype = self . dtype )
1869
+ value = self ._encode_with_my_categories (value )
1870
+ return value . _codes
1872
1871
1873
1872
# wrap scalars and hashable-listlikes in list
1874
1873
rvalue = value if not is_hashable (value ) else [value ]
@@ -2100,8 +2099,8 @@ def equals(self, other: object) -> bool:
2100
2099
if not isinstance (other , Categorical ):
2101
2100
return False
2102
2101
elif self ._categories_match_up_to_permutation (other ):
2103
- other_codes = self ._validate_listlike (other )
2104
- return np .array_equal (self ._codes , other_codes )
2102
+ other = self ._encode_with_my_categories (other )
2103
+ return np .array_equal (self ._codes , other . _codes )
2105
2104
return False
2106
2105
2107
2106
@classmethod
@@ -2112,6 +2111,23 @@ def _concat_same_type(self, to_concat):
2112
2111
2113
2112
# ------------------------------------------------------------------
2114
2113
2114
+ def _encode_with_my_categories (self , other : "Categorical" ) -> "Categorical" :
2115
+ """
2116
+ Re-encode another categorical using this Categorical's categories.
2117
+
2118
+ Notes
2119
+ -----
2120
+ This assumes we have already checked
2121
+ self._categories_match_up_to_permutation(other).
2122
+ """
2123
+ # Indexing on codes is more efficient if categories are the same,
2124
+ # so we can apply some optimizations based on the degree of
2125
+ # dtype-matching.
2126
+ codes = recode_for_categories (
2127
+ other .codes , other .categories , self .categories , copy = False
2128
+ )
2129
+ return self ._from_backing_data (codes )
2130
+
2115
2131
def _categories_match_up_to_permutation (self , other : "Categorical" ) -> bool :
2116
2132
"""
2117
2133
Returns True if categoricals are the same dtype
0 commit comments