From 0d7a41ba7820ebbe130f5753963376e844cbf54b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 5 Nov 2020 07:16:05 -0800
Subject: [PATCH 1/2] REF: implement Categorical.encode_with_my_categories

---
 pandas/core/arrays/categorical.py | 30 +++++++++++++++++++++++-------
 pandas/core/dtypes/concat.py      |  2 +-
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 626fb495dec03..e5a5718d96cbe 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1694,9 +1694,8 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
             # Indexing on codes is more efficient if categories are the same,
             #  so we can apply some optimizations based on the degree of
             #  dtype-matching.
-            codes = recode_for_categories(
-                target.codes, target.categories, self.categories, copy=False
-            )
+            cat = self.encode_with_my_categories(target)
+            codes = cat._codes
         else:
             codes = self.categories.get_indexer(target)
 
@@ -1868,8 +1867,8 @@ def _validate_setitem_value(self, value):
                     "without identical categories"
                 )
             # is_dtype_equal implies categories_match_up_to_permutation
-            new_codes = self._validate_listlike(value)
-            value = Categorical.from_codes(new_codes, dtype=self.dtype)
+            value = self.encode_with_my_categories(value)
+            return value._codes
 
         # wrap scalars and hashable-listlikes in list
         rvalue = value if not is_hashable(value) else [value]
@@ -2101,8 +2100,8 @@ def equals(self, other: object) -> bool:
         if not isinstance(other, Categorical):
             return False
         elif self._categories_match_up_to_permutation(other):
-            other_codes = self._validate_listlike(other)
-            return np.array_equal(self._codes, other_codes)
+            other = self.encode_with_my_categories(other)
+            return np.array_equal(self._codes, other._codes)
         return False
 
     @classmethod
@@ -2113,6 +2112,23 @@ def _concat_same_type(self, to_concat):
 
     # ------------------------------------------------------------------
 
+    def encode_with_my_categories(self, other: "Categorical") -> "Categorical":
+        """
+        Re-encode another categorical using this Categorical's categories.
+
+        Notes
+        -----
+        This assumes we have already checked
+        self._categories_match_up_to_permutation(other).
+        """
+        # Indexing on codes is more efficient if categories are the same,
+        #  so we can apply some optimizations based on the degree of
+        #  dtype-matching.
+        codes = recode_for_categories(
+            other.codes, other.categories, self.categories, copy=False
+        )
+        return self._from_backing_data(codes)
+
     def _categories_match_up_to_permutation(self, other: "Categorical") -> bool:
         """
         Returns True if categoricals are the same dtype
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 99dc01ef421d1..11f8ed342fe2c 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -301,7 +301,7 @@ def _maybe_unwrap(x):
         categories = first.categories
         ordered = first.ordered
 
-        all_codes = [first._validate_listlike(x) for x in to_union]
+        all_codes = [first.encode_with_my_categories(x)._codes for x in to_union]
         new_codes = np.concatenate(all_codes)
 
         if sort_categories and not ignore_order and ordered:

From 46635294b5daf864ce4e80e2fdffeb2344481b29 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 7 Nov 2020 18:38:07 -0800
Subject: [PATCH 2/2] privatize

---
 pandas/core/arrays/categorical.py | 8 ++++----
 pandas/core/dtypes/concat.py      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 61d6ceade25b7..87a049c77dc32 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1693,7 +1693,7 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
             # Indexing on codes is more efficient if categories are the same,
             #  so we can apply some optimizations based on the degree of
             #  dtype-matching.
-            cat = self.encode_with_my_categories(target)
+            cat = self._encode_with_my_categories(target)
             codes = cat._codes
         else:
             codes = self.categories.get_indexer(target)
@@ -1866,7 +1866,7 @@ def _validate_setitem_value(self, value):
                     "without identical categories"
                 )
             # is_dtype_equal implies categories_match_up_to_permutation
-            value = self.encode_with_my_categories(value)
+            value = self._encode_with_my_categories(value)
             return value._codes
 
         # wrap scalars and hashable-listlikes in list
@@ -2099,7 +2099,7 @@ def equals(self, other: object) -> bool:
         if not isinstance(other, Categorical):
             return False
         elif self._categories_match_up_to_permutation(other):
-            other = self.encode_with_my_categories(other)
+            other = self._encode_with_my_categories(other)
             return np.array_equal(self._codes, other._codes)
         return False
 
@@ -2111,7 +2111,7 @@ def _concat_same_type(self, to_concat):
 
     # ------------------------------------------------------------------
 
-    def encode_with_my_categories(self, other: "Categorical") -> "Categorical":
+    def _encode_with_my_categories(self, other: "Categorical") -> "Categorical":
         """
         Re-encode another categorical using this Categorical's categories.
 
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 11f8ed342fe2c..a38d9cbad0d64 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -301,7 +301,7 @@ def _maybe_unwrap(x):
         categories = first.categories
         ordered = first.ordered
 
-        all_codes = [first.encode_with_my_categories(x)._codes for x in to_union]
+        all_codes = [first._encode_with_my_categories(x)._codes for x in to_union]
         new_codes = np.concatenate(all_codes)
 
         if sort_categories and not ignore_order and ordered: