diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0ab75355291f6..96c15d4626142 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -142,6 +142,7 @@ Other API changes - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) - When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`) - :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`) +- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`) - Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`) - diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 188b531b2b469..56fdbfccacc55 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1640,8 +1640,11 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]: return "interval" return "mixed" - if seen_date and not (seen_datetime or seen_timedelta): - return "date" + if seen_date: + if not seen_datetime and not seen_timedelta: + return "date" + return "mixed" + elif seen_datetime and not seen_timedelta: return "datetime" elif seen_timedelta and not seen_datetime: @@ -2570,10 +2573,15 @@ def maybe_convert_objects(ndarray[object] objects, if seen.datetimetz_: if is_datetime_with_singletz_array(objects): from pandas import DatetimeIndex - dti = DatetimeIndex(objects) - # unbox to DatetimeArray - return dti._data + try: + dti = DatetimeIndex(objects) + except OutOfBoundsDatetime: + # e.g. test_to_datetime_cache_coerce_50_lines_outofbounds + pass + else: + # unbox to DatetimeArray + return dti._data seen.object_ = True elif seen.datetime_: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 54fa9629fecd4..f1c7e5b5fae42 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1224,71 +1224,37 @@ def maybe_infer_to_datetimelike( v = np.array(value, copy=False) - shape = v.shape if v.ndim != 1: v = v.ravel() if not len(v): return value - def try_datetime(v: np.ndarray) -> np.ndarray | DatetimeArray: - # Coerce to datetime64, datetime64tz, or in corner cases - # object[datetimes] - from pandas.core.arrays.datetimes import sequence_to_datetimes - - try: - dta = sequence_to_datetimes(v) - except (ValueError, OutOfBoundsDatetime): - # ValueError for e.g. mixed tzs - # GH#19761 we may have mixed timezones, in which cast 'dta' is - # an ndarray[object]. Only 1 test - # relies on this behavior, see GH#40111 - return v.reshape(shape) - else: - return dta.reshape(shape) - - def try_timedelta(v: np.ndarray) -> np.ndarray: - # safe coerce to timedelta64 - - # will try first with a string & object conversion - try: - # bc we know v.dtype == object, this is equivalent to - # `np.asarray(to_timedelta(v))`, but using a lower-level API that - # does not require a circular import. - td_values = array_to_timedelta64(v).view("m8[ns]") - except OutOfBoundsTimedelta: - return v.reshape(shape) - else: - return td_values.reshape(shape) - - # TODO: this is _almost_ equivalent to lib.maybe_convert_objects, - # the main differences are described in GH#49340 and GH#49341 - # and maybe_convert_objects doesn't catch OutOfBoundsDatetime inferred_type = lib.infer_datetimelike_array(ensure_object(v)) - if inferred_type in ["period", "interval"]: + if inferred_type in ["period", "interval", "timedelta", "datetime"]: # Incompatible return value type (got "Union[ExtensionArray, ndarray]", # expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray, # IntervalArray]") return lib.maybe_convert_objects( # type: ignore[return-value] - v, convert_period=True, convert_interval=True + v, + convert_period=True, + convert_interval=True, + convert_timedelta=True, + convert_datetime=True, + dtype_if_all_nat=np.dtype("M8[ns]"), ) - if inferred_type == "datetime": - # Incompatible types in assignment (expression has type - # "Union[ndarray[Any, Any], DatetimeArray]", variable has type - # "ndarray[Any, Any]") - value = try_datetime(v) # type: ignore[assignment] - elif inferred_type == "timedelta": - value = try_timedelta(v) elif inferred_type == "nat": # if all NaT, return as datetime # only reached if we have at least 1 NaT and the rest (NaT or None or np.nan) + # This is slightly different from what we'd get with maybe_convert_objects, + # which only converts of all-NaT + from pandas.core.arrays.datetimes import sequence_to_datetimes - # Incompatible types in assignment (expression has type - # "Union[ndarray[Any, Any], DatetimeArray]", variable has type - # "ndarray[Any, Any]") - value = try_datetime(v) # type: ignore[assignment] + # Incompatible types in assignment (expression has type "DatetimeArray", + # variable has type "ndarray[Any, Any]") + value = sequence_to_datetimes(v) # type: ignore[assignment] assert value.dtype == "M8[ns]" return value diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index e1d16fed73a88..1dab8682ce887 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1340,7 +1340,6 @@ def test_infer_dtype_period_with_na(self, na_value): Timestamp("20170612", tz="US/Eastern"), Timestamp("20170311", tz="US/Eastern"), ], - [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)], ], @@ -1348,11 +1347,19 @@ def test_infer_dtype_period_with_na(self, na_value): def test_infer_datetimelike_array_datetime(self, data): assert lib.infer_datetimelike_array(data) == "datetime" + def test_infer_datetimelike_array_date_mixed(self): + # GH49341 pre-2.0 we these were inferred as "datetime" and "timedelta", + # respectively + data = [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")] + assert lib.infer_datetimelike_array(data) == "mixed" + + data = ([timedelta(2017, 6, 12), date(2017, 3, 11)],) + assert lib.infer_datetimelike_array(data) == "mixed" + @pytest.mark.parametrize( "data", [ [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], - [timedelta(2017, 6, 12), date(2017, 3, 11)], [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)], ], diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 1ab04daca60b7..b6e326271ec7d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -52,6 +52,20 @@ class TestSeriesConstructors: + def test_infer_with_date_and_datetime(self): + # GH#49341 pre-2.0 we inferred datetime-and-date to datetime64, which + # was inconsistent with Index behavior + ts = Timestamp(2016, 1, 1) + vals = [ts.to_pydatetime(), ts.date()] + + ser = Series(vals) + expected = Series(vals, dtype=object) + tm.assert_series_equal(ser, expected) + + idx = Index(vals) + expected = Index(vals, dtype=object) + tm.assert_index_equal(idx, expected) + def test_unparseable_strings_with_dt64_dtype(self): # pre-2.0 these would be silently ignored and come back with object dtype vals = ["aa"]