From 678b3a72328ce5a101a8e49ffa7006bfe80cb751 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 20 Sep 2022 15:27:32 -0700 Subject: [PATCH 1/2] ENH: unique/factorize preserve non-nano --- doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/core/algorithms.py | 10 ---------- pandas/tests/test_algos.py | 8 +++----- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index ae062ca30a9fa..0e871e7a3cdf5 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -115,7 +115,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ -- +- :func:`factorize` and :func:`unique` preserve the original dtype when passed numpy timedelta64 or datetime64 with non-nanosecond resolution (:issue:`??`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a43b82380fe20..ff7081802077b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -41,7 +41,6 @@ from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, infer_dtype_from_array, - sanitize_to_nanoseconds, ) from pandas.core.dtypes.common import ( ensure_float64, @@ -51,7 +50,6 @@ is_bool_dtype, is_categorical_dtype, is_complex_dtype, - is_datetime64_dtype, is_extension_array_dtype, is_float_dtype, is_integer, @@ -61,7 +59,6 @@ is_object_dtype, is_scalar, is_signed_integer_dtype, - is_timedelta64_dtype, needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat @@ -184,8 +181,6 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: # datetimelike elif needs_i8_conversion(values.dtype): - if isinstance(values, np.ndarray): - values = sanitize_to_nanoseconds(values) npvalues = values.view("i8") npvalues = cast(np.ndarray, npvalues) return npvalues @@ -223,11 +218,6 @@ def _reconstruct_data( values = cls._from_sequence(values, dtype=dtype) else: - if is_datetime64_dtype(dtype): - dtype = np.dtype("datetime64[ns]") - elif is_timedelta64_dtype(dtype): - dtype = np.dtype("timedelta64[ns]") - values = values.astype(dtype, copy=False) return values diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 80271c13cd35d..e62cbeacb21d1 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -342,9 +342,7 @@ def test_datetime64_factorize(self, writable): data = np.array([np.datetime64("2020-01-01T00:00:00.000")]) data.setflags(write=writable) expected_codes = np.array([0], dtype=np.intp) - expected_uniques = np.array( - ["2020-01-01T00:00:00.000000000"], dtype="datetime64[ns]" - ) + expected_uniques = np.array(["2020-01-01T00:00:00.000"], dtype="datetime64[ms]") codes, uniques = pd.factorize(data) tm.assert_numpy_array_equal(codes, expected_codes) @@ -609,13 +607,13 @@ def test_datetime64_dtype_array_returned(self): def test_datetime_non_ns(self): a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]") result = pd.unique(a) - expected = np.array(["2000", "2001"], dtype="datetime64[ns]") + expected = a[1:] tm.assert_numpy_array_equal(result, expected) def test_timedelta_non_ns(self): a = np.array(["2000", "2000", "2001"], dtype="timedelta64[s]") result = pd.unique(a) - expected = np.array([2000000000000, 2001000000000], dtype="timedelta64[ns]") + expected = a[1:] tm.assert_numpy_array_equal(result, expected) def test_timedelta64_dtype_array_returned(self): From 1b18b522b904a9603794515b4a1235dc3f361088 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 Oct 2022 16:45:33 -0700 Subject: [PATCH 2/2] remove sanitize_to_nanoseconds --- pandas/core/dtypes/cast.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9830d22f3e2e5..cacf3da6f2749 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -30,7 +30,6 @@ OutOfBoundsTimedelta, Timedelta, Timestamp, - astype_overflowsafe, get_supported_reso, get_unit_from_dtype, is_supported_unit, @@ -52,7 +51,6 @@ from pandas.core.dtypes.astype import astype_nansafe from pandas.core.dtypes.common import ( - DT64NS_DTYPE, TD64NS_DTYPE, ensure_int8, ensure_int16, @@ -1433,23 +1431,6 @@ def maybe_cast_to_datetime( return cast(ArrayLike, value) -def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarray: - """ - Safely convert non-nanosecond datetime64 or timedelta64 values to nanosecond. - """ - dtype = values.dtype - if dtype.kind == "M" and dtype != DT64NS_DTYPE: - values = astype_overflowsafe(values, dtype=DT64NS_DTYPE) - - elif dtype.kind == "m" and dtype != TD64NS_DTYPE: - values = astype_overflowsafe(values, dtype=TD64NS_DTYPE) - - elif copy: - values = values.copy() - - return values - - def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: """ Convert dtypes with granularity less than nanosecond to nanosecond