pandas-dev · simonjayhawkins · Nov 28, 2020 · Nov 29, 2020 · Nov 29, 2020 · Nov 29, 2020
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -46,11 +46,13 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.generic import (
+    ABCDatetimeArray,
     ABCExtensionArray,
     ABCIndex,
     ABCIndexClass,
     ABCMultiIndex,
     ABCSeries,
+    ABCTimedeltaArray,
 )
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
@@ -191,8 +193,15 @@ def _reconstruct_data(
     -------
     ExtensionArray or np.ndarray
     """
-    if is_extension_array_dtype(dtype):
-        values = dtype.construct_array_type()._from_sequence(values)
+    if isinstance(values, ABCExtensionArray) and values.dtype == dtype:
+        # Catch DatetimeArray/TimedeltaArray
+        return values
+    elif is_extension_array_dtype(dtype):
+        cls = dtype.construct_array_type()
+        if isinstance(values, cls) and values.dtype == dtype:
+            return values
+
+        values = cls._from_sequence(values)
     elif is_bool_dtype(dtype):
         values = values.astype(dtype, copy=False)
 
@@ -654,6 +663,8 @@ def factorize(
 
     values = _ensure_arraylike(values)
     original = values
+    if not isinstance(values, ABCMultiIndex):
+        values = extract_array(values, extract_numpy=True)
 
     # GH35667, if na_sentinel=None, we will not dropna NaNs from the uniques
     # of values, assign na_sentinel=-1 to replace code value for NaN.
@@ -662,6 +673,19 @@ def factorize(
         na_sentinel = -1
         dropna = False
 
+    if (
+        isinstance(values, (ABCDatetimeArray, ABCTimedeltaArray))
+        and values.freq is not None
+    ):
+        codes, uniques = values.factorize(sort=sort)
+        if isinstance(original, ABCIndexClass):
+            uniques = original._shallow_copy(uniques, name=None)
+        elif isinstance(original, ABCSeries):
+            from pandas import Index
+
+            uniques = Index(uniques)
+        return codes, uniques
+
     if is_extension_array_dtype(values.dtype):
         values = extract_array(values)
         codes, uniques = values.factorize(na_sentinel=na_sentinel)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -1660,6 +1660,20 @@ def mean(self, skipna=True):
         # Don't have to worry about NA `result`, since no NA went in.
         return self._box_func(result)
 
+    # --------------------------------------------------------------
+
+    def factorize(self, na_sentinel=-1, sort: bool = False):
+        if self.freq is not None:
+            # We must be unique, so can short-circuit (and retain freq)
+            codes = np.arange(len(self), dtype=np.intp)
+            uniques = self.copy()  # TODO: copy or view?
+            if sort and self.freq.n < 0:
+                codes = codes[::-1]
+                uniques = uniques[::-1]
+            return codes, uniques
+        # FIXME: shouldn't get here; we are ignoring sort
+        return super().factorize(na_sentinel=na_sentinel)
+
 
 DatetimeLikeArrayMixin._add_comparison_ops()
 

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -48,6 +48,7 @@
 
 import pandas.core.algorithms as algos
 from pandas.core.arrays import datetimelike as dtl
+from pandas.core.arrays.base import ExtensionArray
 import pandas.core.common as com
 
 
@@ -766,6 +767,9 @@ def _check_timedeltalike_freq_compat(self, other):
 
         raise raise_on_incompatible(self, other)
 
+    def factorize(self, na_sentinel=-1):
+        return ExtensionArray.factorize(self, na_sentinel=na_sentinel)
+
 
 def raise_on_incompatible(left, right):
     """

diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -271,10 +271,12 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         arr, idx = idx1.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         # tz must be preserved
         idx1 = idx1.tz_localize("Asia/Tokyo")
@@ -283,6 +285,7 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         idx2 = pd.DatetimeIndex(
             ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
@@ -293,49 +296,65 @@ def test_factorize(self):
         arr, idx = idx2.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
         exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
         arr, idx = idx2.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
-        # freq must be preserved
+    def test_factorize_preserves_freq(self):
+        # GH#38120 freq should be preserved
         idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo")
         exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
+
         arr, idx = idx3.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
 
-    def test_factorize_tz(self, tz_naive_fixture):
+        arr, idx = pd.factorize(idx3)
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
+
+    def test_factorize_tz(self, tz_naive_fixture, index_or_series):
         tz = tz_naive_fixture
         # GH#13750
         base = pd.date_range("2016-11-05", freq="H", periods=100, tz=tz)
         idx = base.repeat(5)
 
         exp_arr = np.arange(100, dtype=np.intp).repeat(5)
 
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, exp_arr)
-            expected = base._with_freq(None)
-            tm.assert_index_equal(res, expected)
+        obj = index_or_series(idx)
 
-    def test_factorize_dst(self):
-        # GH 13750
-        idx = pd.date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern")
-
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
-            tm.assert_index_equal(res, idx)
-
-        idx = pd.date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern")
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        expected = base._with_freq(None)
+        tm.assert_index_equal(res, expected)
+        assert res.freq == expected.freq
 
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
-            tm.assert_index_equal(res, idx)
+    def test_factorize_dst(self, index_or_series):
+        # GH 13750
+        idx = date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern")
+        obj = index_or_series(idx)
+
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
+        tm.assert_index_equal(res, idx)
+        if index_or_series is Index:
+            assert res.freq == idx.freq
+
+        idx = date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern")
+        obj = index_or_series(idx)
+
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
+        tm.assert_index_equal(res, idx)
+        if index_or_series is Index:
+            assert res.freq == idx.freq
 
     @pytest.mark.parametrize(
         "arr, expected",

diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -75,17 +75,26 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         arr, idx = idx1.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
-        # freq must be preserved
+    def test_factorize_preserves_freq(self):
+        # GH#38120 freq should be preserved
         idx3 = timedelta_range("1 day", periods=4, freq="s")
         exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
         arr, idx = idx3.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
+
+        arr, idx = pd.factorize(idx3)
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
 
     def test_sort_values(self):
 

diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py
@@ -91,3 +91,13 @@ def test_multiindex_get_loc_list_raises(self):
         msg = "unhashable type"
         with pytest.raises(TypeError, match=msg):
             idx.get_loc([])
+
+    def test_multiindex_with_datatime_level_preserves_freq(self):
+        # https://github.com/pandas-dev/pandas/issues/35563
+        idx = Index(range(2), name="A")
+        dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
+        mi = MultiIndex.from_product([idx, dti])
+        df = DataFrame(np.random.randn(14, 2), index=mi)
+        result = df.loc[0].index
+        tm.assert_index_equal(result, dti)
+        assert result.freq == dti.freq
diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py
@@ -12,7 +12,6 @@ def get_result(obj, obj2=None):
     result = result.loc[(slice(None), 1), 5]
     result.index = result.index.droplevel(1)
     expected = get_result(frame[1], frame[5])
-    expected.index = expected.index._with_freq(None)
     tm.assert_series_equal(result, expected, check_names=False)