From 8eb4b6c18d9e5cc9e169ad8a8935b1cfb829b815 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 19:07:41 -0800 Subject: [PATCH 1/5] ENH: IntegerArray.astype(dt64) --- pandas/_libs/tslib.pyx | 14 ++++---------- pandas/core/arrays/integer.py | 3 +++ pandas/core/tools/datetimes.py | 12 +++++------- pandas/tests/arrays/test_integer.py | 9 +++++++++ 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b78b623bfa187..561a2bddb93a8 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -14,7 +14,7 @@ PyDateTime_IMPORT cimport numpy as cnp -from numpy cimport float64_t, int64_t, ndarray +from numpy cimport float64_t, int64_t, ndarray, uint8_t import numpy as np cnp.import_array() @@ -351,7 +351,6 @@ def format_array_from_datetime( def array_with_unit_to_datetime( ndarray values, - ndarray mask, object unit, str errors='coerce' ): @@ -373,8 +372,6 @@ def array_with_unit_to_datetime( ---------- values : ndarray of object Date-like objects to convert. - mask : boolean ndarray - Not-a-time mask for non-nullable integer types conversion, can be None. unit : object Time unit to use during conversion. errors : str, default 'raise' @@ -395,6 +392,7 @@ def array_with_unit_to_datetime( bint need_to_iterate = True ndarray[int64_t] iresult ndarray[object] oresult + ndarray[uint8_t] mask object tz = None assert is_ignore or is_coerce or is_raise @@ -404,9 +402,6 @@ def array_with_unit_to_datetime( result = values.astype('M8[ns]') else: result, tz = array_to_datetime(values.astype(object), errors=errors) - if mask is not None: - iresult = result.view('i8') - iresult[mask] = NPY_NAT return result, tz m = cast_from_unit(None, unit) @@ -419,9 +414,8 @@ def array_with_unit_to_datetime( if values.dtype.kind == "i": # Note: this condition makes the casting="same_kind" redundant iresult = values.astype('i8', casting='same_kind', copy=False) - # If no mask, fill mask by comparing to NPY_NAT constant - if mask is None: - mask = iresult == NPY_NAT + # fill by comparing to NPY_NAT constant + mask = iresult == NPY_NAT iresult[mask] = 0 fvalues = iresult.astype('f8') * m need_to_iterate = False diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e2b66b1a006e4..fb33840ad757c 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -13,6 +13,7 @@ from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( is_bool_dtype, + is_datetime64_dtype, is_float, is_float_dtype, is_integer, @@ -469,6 +470,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan kwargs = dict(na_value=np.nan) + elif is_datetime64_dtype(dtype): + kwargs = dict(na_value=np.datetime64("NaT")) else: kwargs = {} diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5580146b37d25..c32b4d81c0988 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -323,15 +323,13 @@ def _convert_listlike_datetimes( # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): - # Explicitly pass NaT mask to array_with_unit_to_datetime - mask = arg.isna() - arg = arg._ndarray_values + result = arg.astype(f"datetime64[{unit}]") + tz_parsed = None else: - mask = None - result, tz_parsed = tslib.array_with_unit_to_datetime( - arg, mask, unit, errors=errors - ) + result, tz_parsed = tslib.array_with_unit_to_datetime( + arg, unit, errors=errors + ) if errors == "ignore": from pandas import Index diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 0a5a2362bd290..70a029bd74bda 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -633,6 +633,15 @@ def test_astype_specific_casting(self, dtype): expected = pd.Series([1, 2, 3, None], dtype=dtype) tm.assert_series_equal(result, expected) + def test_astype_dt64(self): + # GH#32435 + arr = pd.array([1, 2, 3, pd.NA]) * 10 ** 9 + + result = arr.astype("datetime64[ns]") + + expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]") + tm.assert_numpy_array_equal(result, expected) + def test_construct_cast_invalid(self, dtype): msg = "cannot safely" From 9eb798072dd1944595e81afd6d50f777a4b605e8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Mar 2020 16:21:36 -0700 Subject: [PATCH 2/5] typo fixup --- pandas/tests/frame/test_dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 713d8f3ceeedb..d1a7917bd127b 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -505,7 +505,7 @@ def test_df_where_change_dtype(self): @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) - def test_astype_from_datetimelike_to_objectt(self, dtype, unit): + def test_astype_from_datetimelike_to_object(self, dtype, unit): # tests astype to object dtype # gh-19223 / gh-12425 dtype = f"{dtype}[{unit}]" From ec217f9822fefe0039c2fcd6aa6535119710a332 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 15:18:57 -0700 Subject: [PATCH 3/5] troubleshoot windows builds --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 561a2bddb93a8..b073f66fde6c4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -392,7 +392,7 @@ def array_with_unit_to_datetime( bint need_to_iterate = True ndarray[int64_t] iresult ndarray[object] oresult - ndarray[uint8_t] mask + ndarray[uint8_t, ndim=1, cast=True] mask object tz = None assert is_ignore or is_coerce or is_raise From 09202f37565db4aab4ce8e3650d56a4bb352d00a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Mar 2020 12:22:21 -0700 Subject: [PATCH 4/5] revert to pre-30241 --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b073f66fde6c4..94e757624c136 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -392,7 +392,7 @@ def array_with_unit_to_datetime( bint need_to_iterate = True ndarray[int64_t] iresult ndarray[object] oresult - ndarray[uint8_t, ndim=1, cast=True] mask + ndarray mask object tz = None assert is_ignore or is_coerce or is_raise From 87dc843ecd99b4d28598a5255673c39d74d7b0c5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 12 Mar 2020 10:50:28 -0700 Subject: [PATCH 5/5] add tests --- pandas/tests/arrays/test_array.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index f42b16cf18f20..ad6e6e4a98057 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -222,6 +222,8 @@ def test_array_copy(): # integer ([1, 2], IntegerArray._from_sequence([1, 2])), ([1, None], IntegerArray._from_sequence([1, None])), + ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])), + ([1, np.nan], IntegerArray._from_sequence([1, np.nan])), # string (["a", "b"], StringArray._from_sequence(["a", "b"])), (["a", None], StringArray._from_sequence(["a", None])),