From c0e93b77c29576290bfc0086d595a898043a90f6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 6 Sep 2020 18:58:32 +0200 Subject: [PATCH] Backport PR #36115: REGR: append tz-aware DataFrame with tz-naive values --- doc/source/whatsnew/v1.1.2.rst | 1 + pandas/core/dtypes/concat.py | 6 ++++-- pandas/core/internals/concat.py | 8 ++++++-- pandas/tests/reshape/test_concat.py | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index dea3cb6f567ac..0d3c0b203a10d 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`) - Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`) +- Fix regression in :meth:`DataFrame.append` mixing tz-aware and tz-naive datetime columns (:issue:`35460`) - Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - Regression where :meth:`MultiIndex.get_loc` would return a slice spanning the full index when passed an empty list (:issue:`35878`) - Fix regression in invalid cache after an indexing operation; this can manifest when setting which does not update the data (:issue:`35521`) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 9902016475b22..dd005752a4832 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -148,15 +148,17 @@ def is_nonempty(x) -> bool: any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat) if any_ea: + # we ignore axis here, as internally concatting with EAs is always + # for axis=0 if not single_dtype: target_dtype = find_common_type([x.dtype for x in to_concat]) to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat] - if isinstance(to_concat[0], ExtensionArray) and axis == 0: + if isinstance(to_concat[0], ExtensionArray): cls = type(to_concat[0]) return cls._concat_same_type(to_concat) else: - return np.concatenate(to_concat, axis=axis) + return np.concatenate(to_concat) elif _contains_datetime or "timedelta" in typs: return concat_datetime(to_concat, axis=axis, typs=typs) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 2c0d4931a7bf2..5d06cb4c48ac3 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -23,7 +23,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algos -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import DatetimeArray, ExtensionArray from pandas.core.internals.blocks import make_block from pandas.core.internals.managers import BlockManager @@ -334,9 +334,13 @@ def _concatenate_join_units(join_units, concat_axis, copy): # the non-EA values are 2D arrays with shape (1, n) to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat] concat_values = concat_compat(to_concat, axis=0) - if not isinstance(concat_values, ExtensionArray): + if not isinstance(concat_values, ExtensionArray) or ( + isinstance(concat_values, DatetimeArray) and concat_values.tz is None + ): # if the result of concat is not an EA but an ndarray, reshape to # 2D to put it a non-EA Block + # special case DatetimeArray, which *is* an EA, but is put in a + # consolidated 2D block concat_values = np.atleast_2d(concat_values) else: concat_values = concat_compat(to_concat, axis=concat_axis,) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 0159fabd04d59..53d3a35328b68 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1110,6 +1110,23 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): result = df.append([s, s], ignore_index=True) tm.assert_frame_equal(result, expected) + def test_append_empty_tz_frame_with_datetime64ns(self): + # https://github.com/pandas-dev/pandas/issues/35460 + df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + + # pd.NaT gets inferred as tz-naive, so append result is tz-naive + result = df.append({"a": pd.NaT}, ignore_index=True) + expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") + tm.assert_frame_equal(result, expected) + + # also test with typed value to append + df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + result = df.append( + pd.Series({"a": pd.NaT}, dtype="datetime64[ns]"), ignore_index=True + ) + expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") + tm.assert_frame_equal(result, expected) + class TestConcatenate: def test_concat_copy(self):