From cdd1a85abb5523eff9cda38b6bb828084202f5f6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 24 Jul 2023 13:25:48 -0700
Subject: [PATCH 1/3] ENH: Implement interchange protocol for DatetimeTZDtype

---
 doc/source/whatsnew/v2.1.0.rst            |  1 +
 pandas/core/interchange/column.py         | 13 +++++++++++--
 pandas/core/interchange/from_dataframe.py |  8 ++++----
 pandas/core/interchange/utils.py          |  9 ++++++---
 pandas/tests/interchange/test_impl.py     | 11 +++++++++++
 5 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 65f5328189eeb..f2e015d0b72e7 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -148,6 +148,7 @@ Other enhancements
 - Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`)
 - Classes that are useful for type-hinting have been added to the public API in the new submodule ``pandas.api.typing`` (:issue:`48577`)
 - Implemented :attr:`Series.dt.is_month_start`, :attr:`Series.dt.is_month_end`, :attr:`Series.dt.is_year_start`, :attr:`Series.dt.is_year_end`, :attr:`Series.dt.is_quarter_start`, :attr:`Series.dt.is_quarter_end`, :attr:`Series.dt.is_days_in_month`, :attr:`Series.dt.unit`, :meth:`Series.dt.is_normalize`, :meth:`Series.dt.day_name`, :meth:`Series.dt.month_name`, :meth:`Series.dt.tz_convert` for :class:`ArrowDtype` with ``pyarrow.timestamp`` (:issue:`52388`, :issue:`51718`)
+- Implemented :func:`api.interchange.from_dataframe` for :class:`DatetimeTZDtype` (:issue:`54239`)
 - Implemented ``__from_arrow__`` on :class:`DatetimeTZDtype`. (:issue:`52201`)
 - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide <extending.pandas_priority>` (:issue:`48347`)
 - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py
index ff4ff487e23ea..5fc2a821d5dbd 100644
--- a/pandas/core/interchange/column.py
+++ b/pandas/core/interchange/column.py
@@ -9,7 +9,10 @@
 from pandas.errors import NoBufferPresent
 from pandas.util._decorators import cache_readonly
 
-from pandas.core.dtypes.dtypes import ArrowDtype
+from pandas.core.dtypes.dtypes import (
+    ArrowDtype,
+    DatetimeTZDtype,
+)
 
 import pandas as pd
 from pandas.api.types import is_string_dtype
@@ -138,6 +141,8 @@ def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]:
             raise ValueError(f"Data type {dtype} not supported by interchange protocol")
         if isinstance(dtype, ArrowDtype):
             byteorder = dtype.numpy_dtype.byteorder
+        elif isinstance(dtype, DatetimeTZDtype):
+            byteorder = dtype.base.byteorder
         else:
             byteorder = dtype.byteorder
 
@@ -269,7 +274,11 @@ def _get_data_buffer(
             DtypeKind.BOOL,
             DtypeKind.DATETIME,
         ):
-            buffer = PandasBuffer(self._col.to_numpy(), allow_copy=self._allow_copy)
+            if self.dtype[0] == DtypeKind.DATETIME and len(self.dtype[2]) > 4:
+                np_arr = self._col.dt.tz_convert(None).to_numpy()
+            else:
+                np_arr = self._col.to_numpy()
+            buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
             dtype = self.dtype
         elif self.dtype[0] == DtypeKind.CATEGORICAL:
             codes = self._col.values._codes
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
index 0fe92f9b1be50..def6d4bac82b5 100644
--- a/pandas/core/interchange/from_dataframe.py
+++ b/pandas/core/interchange/from_dataframe.py
@@ -325,20 +325,20 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
     return np.asarray(str_list, dtype="object"), buffers
 
 
-def parse_datetime_format_str(format_str, data):
+def parse_datetime_format_str(format_str, data) -> pd.Series | np.ndarray:
     """Parse datetime `format_str` to interpret the `data`."""
     # timestamp 'ts{unit}:tz'
     timestamp_meta = re.match(r"ts([smun]):(.*)", format_str)
     if timestamp_meta:
         unit, tz = timestamp_meta.group(1), timestamp_meta.group(2)
-        if tz != "":
-            raise NotImplementedError("Timezones are not supported yet")
         if unit != "s":
             # the format string describes only a first letter of the unit, so
             # add one extra letter to convert the unit to numpy-style:
             # 'm' -> 'ms', 'u' -> 'us', 'n' -> 'ns'
             unit += "s"
         data = data.astype(f"datetime64[{unit}]")
+        if tz != "":
+            data = pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(tz)
         return data
 
     # date 'td{Days/Ms}'
@@ -358,7 +358,7 @@ def parse_datetime_format_str(format_str, data):
     raise NotImplementedError(f"DateTime kind is not supported: {format_str}")
 
 
-def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
+def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any]:
     """
     Convert a column holding DateTime data to a NumPy array.
 
diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py
index 46103d0d9e8f1..4ac063080e62d 100644
--- a/pandas/core/interchange/utils.py
+++ b/pandas/core/interchange/utils.py
@@ -4,7 +4,6 @@
 
 from __future__ import annotations
 
-import re
 import typing
 
 import numpy as np
@@ -14,6 +13,7 @@
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
     CategoricalDtype,
+    DatetimeTZDtype,
 )
 
 if typing.TYPE_CHECKING:
@@ -134,10 +134,13 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str:
 
     if lib.is_np_dtype(dtype, "M"):
         # Selecting the first char of resolution string:
-        # dtype.str -> '<M8[ns]'
-        resolution = re.findall(r"\[(.*)\]", dtype.str)[0][:1]
+        # dtype.str -> '<M8[ns]' -> 'n'
+        resolution = np.datetime_data(dtype)[0][0]
         return ArrowCTypes.TIMESTAMP.format(resolution=resolution, tz="")
 
+    elif isinstance(dtype, DatetimeTZDtype):
+        return ArrowCTypes.TIMESTAMP.format(resolution=dtype.unit[0], tz=dtype.tz)
+
     raise NotImplementedError(
         f"Conversion of {dtype} to Arrow C format string is not implemented."
     )
diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
index 5fce4f162d71f..bfb0eceaa0ca1 100644
--- a/pandas/tests/interchange/test_impl.py
+++ b/pandas/tests/interchange/test_impl.py
@@ -284,3 +284,14 @@ def test_empty_pyarrow(data):
     arrow_df = pa_from_dataframe(expected)
     result = from_dataframe(arrow_df)
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("tz", ["UTC", "US/Pacific"])
+@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
+def test_datetimetzdtype(tz, unit):
+    # GH 54239
+    tz_data = (
+        pd.date_range("2018-01-01", periods=5, freq="D").tz_localize(tz).as_unit(unit)
+    )
+    df = pd.DataFrame({"ts_tz": tz_data})
+    tm.assert_frame_equal(df, from_dataframe(df.__dataframe__()))

From bb0b2f0c285ec059211e054b76852677fd922df8 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 24 Jul 2023 17:23:36 -0700
Subject: [PATCH 2/3] Add type ignores

---
 pandas/core/interchange/column.py         | 2 +-
 pandas/core/interchange/from_dataframe.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py
index 5fc2a821d5dbd..c49f8d58cf073 100644
--- a/pandas/core/interchange/column.py
+++ b/pandas/core/interchange/column.py
@@ -142,7 +142,7 @@ def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]:
         if isinstance(dtype, ArrowDtype):
             byteorder = dtype.numpy_dtype.byteorder
         elif isinstance(dtype, DatetimeTZDtype):
-            byteorder = dtype.base.byteorder
+            byteorder = dtype.base.byteorder  # type: ignore[union-attr]
         else:
             byteorder = dtype.byteorder
 
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
index def6d4bac82b5..d3aece6e63798 100644
--- a/pandas/core/interchange/from_dataframe.py
+++ b/pandas/core/interchange/from_dataframe.py
@@ -389,7 +389,7 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
         length=col.size(),
     )
 
-    data = parse_datetime_format_str(format_str, data)
+    data = parse_datetime_format_str(format_str, data)  # type: ignore[assignment]
     data = set_nulls(data, col, buffers["validity"])
     return data, buffers
 

From b8b6005592b1b1b00145914a9d965b7f2abb589d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 25 Jul 2023 10:44:47 -0700
Subject: [PATCH 3/3] Add comment

---
 pandas/core/interchange/column.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py
index c49f8d58cf073..acfbc5d9e6c62 100644
--- a/pandas/core/interchange/column.py
+++ b/pandas/core/interchange/column.py
@@ -274,6 +274,8 @@ def _get_data_buffer(
             DtypeKind.BOOL,
             DtypeKind.DATETIME,
         ):
+            # self.dtype[2] is an ArrowCTypes.TIMESTAMP where the tz will make
+            # it longer than 4 characters
             if self.dtype[0] == DtypeKind.DATETIME and len(self.dtype[2]) > 4:
                 np_arr = self._col.dt.tz_convert(None).to_numpy()
             else: