From 12004a0dd121b3650a78266c545e8150355f3329 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 13 Feb 2023 19:03:35 -0800
Subject: [PATCH 1/3] BUG: GroupBy.quantile with datetimelike and NaT

---
 pandas/core/groupby/groupby.py        | 23 ++++++++++++++++-------
 pandas/tests/groupby/test_quantile.py | 21 +++++++++++++++++++++
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 763494666d870..65f05bf185478 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -73,7 +73,6 @@ class providing the base-class of operations.
 from pandas.core.dtypes.cast import ensure_dtype_can_hold_na
 from pandas.core.dtypes.common import (
     is_bool_dtype,
-    is_datetime64_dtype,
     is_float_dtype,
     is_hashable,
     is_integer,
@@ -81,7 +80,7 @@ class providing the base-class of operations.
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
-    is_timedelta64_dtype,
+    needs_i8_conversion,
 )
 from pandas.core.dtypes.missing import (
     isna,
@@ -3192,12 +3191,11 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
                 inference = np.dtype(np.int64)
             elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray):
                 out = vals.to_numpy(dtype=float, na_value=np.nan)
-            elif is_datetime64_dtype(vals.dtype):
+            elif needs_i8_conversion(vals.dtype):
                 inference = vals.dtype
-                out = np.asarray(vals).astype(float)
-            elif is_timedelta64_dtype(vals.dtype):
-                inference = vals.dtype
-                out = np.asarray(vals).astype(float)
+                # In this case we need to delay the casting until after the
+                #  np.lexsort below.
+                return vals, inference
             elif isinstance(vals, ExtensionArray) and is_float_dtype(vals):
                 inference = np.dtype(np.float64)
                 out = vals.to_numpy(dtype=float, na_value=np.nan)
@@ -3236,6 +3234,10 @@ def post_processor(
                     is_integer_dtype(inference)
                     and interpolation in {"linear", "midpoint"}
                 ):
+                    if needs_i8_conversion(inference):
+                        vals = vals.astype("i8").view(orig_vals._ndarray.dtype)
+                        return orig_vals._from_backing_data(vals)
+
                     assert isinstance(inference, np.dtype)  # for mypy
                     return vals.astype(inference)
 
@@ -3272,6 +3274,8 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 mask = isna(values)
                 result_mask = None
 
+            is_datetimelike = needs_i8_conversion(values.dtype)
+
             vals, inference = pre_processor(values)
 
             ncols = 1
@@ -3289,6 +3293,11 @@ def blk_func(values: ArrayLike) -> ArrayLike:
             order = (vals, shaped_labels)
             sort_arr = np.lexsort(order).astype(np.intp, copy=False)
 
+            if is_datetimelike:
+                # This casting needs to happen after the lexsort in order
+                #  to ensure that NaTs are placed at the end and not the front
+                vals = vals.view("i8").astype(np.float64)
+
             if vals.ndim == 1:
                 # Ea is always 1d
                 func(
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
index 8cba3a8afdfae..949acf0c4b6af 100644
--- a/pandas/tests/groupby/test_quantile.py
+++ b/pandas/tests/groupby/test_quantile.py
@@ -445,3 +445,24 @@ def test_timestamp_groupby_quantile():
     )
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_quantile_dt64tz_period():
+    dti = pd.date_range("2016-01-01", periods=1000)
+    ser = pd.Series(dti)
+    df = ser.to_frame()
+    df[1] = dti.tz_localize("US/Pacific")
+    df[2] = dti.to_period("D")
+    df[3] = dti - dti[0]
+    df.iloc[-1] = pd.NaT
+
+    by = np.tile(np.arange(5), 200)
+    gb = df.groupby(by)
+
+    result = gb.quantile(0.5)
+
+    # Check that we match the group-by-group result
+    exp = {i: df.iloc[i::5].quantile(0.5) for i in range(5)}
+    expected = DataFrame(exp).T
+
+    tm.assert_frame_equal(result, expected)

From 8ec3d1cd2dd27ac9b21f3cb313c66c2032593d36 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 13 Feb 2023 19:05:54 -0800
Subject: [PATCH 2/3] GH refs

---
 doc/source/whatsnew/v2.0.0.rst        | 2 ++
 pandas/tests/groupby/test_quantile.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index d1b965e64e43b..bd6e2608f97ae 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1186,6 +1186,8 @@ Datetimelike
 - Bug in :func:`DataFrame.from_records` when given a :class:`DataFrame` input with timezone-aware datetime64 columns incorrectly dropping the timezone-awareness (:issue:`51162`)
 - Bug in :func:`to_datetime` was raising ``decimal.InvalidOperation`` when parsing date strings with ``errors='coerce'`` (:issue:`51084`)
 - Bug in :func:`to_datetime` with both ``unit`` and ``origin`` specified returning incorrect results (:issue:`42624`)
+- Bug in :meth:`GroupBy.quantile` with datetime or timedelta dtypes giving incorrect results for groups containing ``NaT`` (:issue:`51373`)
+- Bug in :meth:`Groupby.quantile` incorrectly raising with :class:`PeriodDtype` or :class:`DatetimeTZDtype` (:issue:`51373`)
 -
 
 Timedelta
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
index 949acf0c4b6af..4c5011f8c683d 100644
--- a/pandas/tests/groupby/test_quantile.py
+++ b/pandas/tests/groupby/test_quantile.py
@@ -448,6 +448,7 @@ def test_timestamp_groupby_quantile():
 
 
 def test_groupby_quantile_dt64tz_period():
+    # GH#51373
     dti = pd.date_range("2016-01-01", periods=1000)
     ser = pd.Series(dti)
     df = ser.to_frame()

From a6ff14464efa8b142805772f618f0968d7ef8228 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 13 Feb 2023 20:34:08 -0800
Subject: [PATCH 3/3] mypy, 32bit fixups

---
 pandas/core/groupby/groupby.py        | 18 +++++++++++++++---
 pandas/tests/groupby/test_quantile.py |  1 +
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 65f05bf185478..b25c767db42ff 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3195,7 +3195,11 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
                 inference = vals.dtype
                 # In this case we need to delay the casting until after the
                 #  np.lexsort below.
-                return vals, inference
+                # error: Incompatible return value type (got
+                # "Tuple[Union[ExtensionArray, ndarray[Any, Any]], Union[Any,
+                # ExtensionDtype]]", expected "Tuple[ndarray[Any, Any],
+                # Optional[Union[dtype[Any], ExtensionDtype]]]")
+                return vals, inference  # type: ignore[return-value]
             elif isinstance(vals, ExtensionArray) and is_float_dtype(vals):
                 inference = np.dtype(np.float64)
                 out = vals.to_numpy(dtype=float, na_value=np.nan)
@@ -3235,8 +3239,16 @@ def post_processor(
                     and interpolation in {"linear", "midpoint"}
                 ):
                     if needs_i8_conversion(inference):
-                        vals = vals.astype("i8").view(orig_vals._ndarray.dtype)
-                        return orig_vals._from_backing_data(vals)
+                        # error: Item "ExtensionArray" of "Union[ExtensionArray,
+                        # ndarray[Any, Any]]" has no attribute "_ndarray"
+                        vals = vals.astype("i8").view(
+                            orig_vals._ndarray.dtype  # type: ignore[union-attr]
+                        )
+                        # error: Item "ExtensionArray" of "Union[ExtensionArray,
+                        # ndarray[Any, Any]]" has no attribute "_from_backing_data"
+                        return orig_vals._from_backing_data(  # type: ignore[union-attr]
+                            vals
+                        )
 
                     assert isinstance(inference, np.dtype)  # for mypy
                     return vals.astype(inference)
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
index 4c5011f8c683d..79354e550d3f6 100644
--- a/pandas/tests/groupby/test_quantile.py
+++ b/pandas/tests/groupby/test_quantile.py
@@ -465,5 +465,6 @@ def test_groupby_quantile_dt64tz_period():
     # Check that we match the group-by-group result
     exp = {i: df.iloc[i::5].quantile(0.5) for i in range(5)}
     expected = DataFrame(exp).T
+    expected.index = expected.index.astype(np.int_)
 
     tm.assert_frame_equal(result, expected)