Skip to content

Commit d9cea17

Browse files
committed
ENH/DEPR: infer date objects to date[pyarrow] dtype
1 parent 285b197 commit d9cea17

34 files changed

+380
-138
lines changed

pandas/_libs/lib.pyx

+36
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,7 @@ cdef class Seen:
12621262
bint period_ # seen_period
12631263
bint interval_ # seen_interval
12641264
bint time_
1265+
bint date_
12651266

12661267
def __cinit__(self, bint coerce_numeric=False):
12671268
"""
@@ -1289,6 +1290,7 @@ cdef class Seen:
12891290
self.period_ = False
12901291
self.interval_ = False
12911292
self.time_ = False
1293+
self.date_ = False
12921294
self.coerce_numeric = coerce_numeric
12931295

12941296
cdef bint check_uint64_conflict(self) except -1:
@@ -2549,6 +2551,11 @@ def maybe_convert_objects(ndarray[object] objects,
25492551
else:
25502552
seen.object_ = True
25512553
break
2554+
elif PyDate_Check(val):
2555+
if convert_non_numeric:
2556+
seen.date_ = True
2557+
else:
2558+
seen.object_ = True
25522559
elif is_period_object(val):
25532560
if convert_non_numeric:
25542561
seen.period_ = True
@@ -2672,6 +2679,35 @@ def maybe_convert_objects(ndarray[object] objects,
26722679

26732680
seen.object_ = True
26742681

2682+
elif seen.date_:
2683+
if is_date_array(objects, skipna=True):
2684+
opt = get_option("future.infer_date")
2685+
if opt is True:
2686+
import pyarrow as pa
2687+
2688+
from pandas.core.arrays.arrow import ArrowDtype
2689+
2690+
obj = pa.array(objects)
2691+
dtype = ArrowDtype(obj.type)
2692+
return dtype.construct_array_type()(obj)
2693+
elif opt is False:
2694+
# explicitly set to keep the old behavior and avoid the warning
2695+
pass
2696+
else:
2697+
from pandas.util._exceptions import find_stack_level
2698+
warnings.warn(
2699+
"Pandas type inference with a sequence of `datetime.date` "
2700+
"objects is deprecated. In a future version, this will give "
2701+
"date32[pyarrow] dtype, which will require pyarrow to be "
2702+
"installed. To opt in to the new behavior immediately set "
2703+
"`pd.set_option('future.infer_time', True)`. To keep the "
2704+
"old behavior pass `dtype=object`.",
2705+
FutureWarning,
2706+
stacklevel=find_stack_level(),
2707+
)
2708+
2709+
seen.object_ = True
2710+
26752711
elif seen.nat_:
26762712
if not seen.object_ and not seen.numeric_ and not seen.bool_:
26772713
# all NaT, None, or nan (at least one NaT)

pandas/core/config_init.py

+9
Original file line numberDiff line numberDiff line change
@@ -892,3 +892,12 @@ def register_converter_cb(key) -> None:
892892
"(at which point this option will be deprecated).",
893893
validator=is_one_of_factory([True, False, None]),
894894
)
895+
896+
cf.register_option(
897+
"future.infer_date",
898+
None,
899+
"Whether to infer sequence of datetime.date objects as pyarrow date "
900+
"dtype, which will be the default in pandas 3.0 "
901+
"(at which point this option will be deprecated).",
902+
validator=is_one_of_factory([True, False, None]),
903+
)

pandas/core/construction.py

+24
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,30 @@ def array(
389389
stacklevel=find_stack_level(),
390390
)
391391

392+
elif inferred_dtype == "date":
393+
opt = get_option("future.infer_date")
394+
395+
if opt is True:
396+
import pyarrow as pa
397+
398+
obj = pa.array(data)
399+
dtype = ArrowDtype(obj.type)
400+
return dtype.construct_array_type()(obj)
401+
elif opt is False:
402+
# explicitly set to keep the old behavior and avoid the warning
403+
pass
404+
else:
405+
warnings.warn(
406+
"Pandas type inference with a sequence of `datetime.date` "
407+
"objects is deprecated. In a future version, this will give "
408+
"date32[pyarrow] dtype, which will require pyarrow to be "
409+
"installed. To opt in to the new behavior immediately set "
410+
"`pd.set_option('future.infer_time', True)`. To keep the "
411+
"old behavior pass `dtype=object`.",
412+
FutureWarning,
413+
stacklevel=find_stack_level(),
414+
)
415+
392416
# Pandas overrides NumPy for
393417
# 1. datetime64[ns,us,ms,s]
394418
# 2. timedelta64[ns,us,ms,s]

pandas/tests/arrays/categorical/test_constructors.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,12 @@ def test_constructor_date_objects(self):
369369
# we dont cast date objects to timestamps, matching Index constructor
370370
v = date.today()
371371

372-
cat = Categorical([v, v])
372+
msg = (
373+
"Pandas type inference with a sequence of `datetime.date` "
374+
"objects is deprecated"
375+
)
376+
with tm.assert_produces_warning(FutureWarning, match=msg):
377+
cat = Categorical([v, v])
373378
assert cat.categories.dtype == object
374379
assert type(cat.categories[0]) is date
375380

pandas/tests/dtypes/test_inference.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1059,7 +1059,7 @@ def test_maybe_convert_objects_time(self, future):
10591059

10601060
with pd.option_context("future.infer_time", future):
10611061
with tm.assert_produces_warning(warn, match=msg):
1062-
out = lib.maybe_convert_objects(objs, convert_time=True)
1062+
out = lib.maybe_convert_objects(objs, convert_non_numeric=True)
10631063
with tm.assert_produces_warning(warn, match=msg):
10641064
ser = Series(objs)
10651065
with tm.assert_produces_warning(warn, match=msg):
@@ -1566,7 +1566,7 @@ def test_other_dtypes_for_array(self, func):
15661566

15671567
def test_date(self):
15681568
dates = [date(2012, 1, day) for day in range(1, 20)]
1569-
index = Index(dates)
1569+
index = Index(dates, dtype=object)
15701570
assert index.inferred_type == "date"
15711571

15721572
dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan]

pandas/tests/extension/test_arrow.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,13 @@ def test_stack(self, data, columns):
730730
# FIXME: need to avoid doing inference when calling frame._constructor
731731
# in _stack_multi_columns
732732
warn = FutureWarning
733+
if pa.types.is_date(pa_dtype):
734+
# FIXME: need to avoid doing inference when calling frame._constructor
735+
# in _stack_multi_columns
736+
warn = FutureWarning
737+
warn_msg = (
738+
"Pandas type inference with a sequence of `datetime.date` objects"
739+
)
733740

734741
with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False):
735742
super().test_stack(data, columns)
@@ -798,9 +805,9 @@ def test_invert(self, data, request):
798805
class TestBaseMethods(base.BaseMethodsTests):
799806
def test_hash_pandas_object_works(self, data, as_frame):
800807
pa_dtype = data.dtype.pyarrow_dtype
801-
warn_msg = "Pandas type inference with a sequence of `datetime.time`"
808+
warn_msg = "Pandas type inference with a sequence of `datetime.(time|date)`"
802809
warn = None
803-
if pa.types.is_time(pa_dtype):
810+
if pa.types.is_time(pa_dtype) or pa.types.is_date(pa_dtype):
804811
# TODO(#48964) This warning will be avoided by implementing
805812
# ArrowExtensionArray.hash_pandas_object
806813
warn = FutureWarning
@@ -1688,7 +1695,15 @@ def test_pickle_roundtrip(data):
16881695

16891696
def test_astype_from_non_pyarrow(data):
16901697
# GH49795
1691-
pd_array = data._pa_array.to_pandas().array
1698+
msg = (
1699+
"Pandas type inference with a sequence of `datetime.date` objects is deprecated"
1700+
)
1701+
warn = None
1702+
if pa.types.is_date(data.dtype.pyarrow_dtype):
1703+
warn = FutureWarning
1704+
1705+
with tm.assert_produces_warning(warn, match=msg):
1706+
pd_array = data._pa_array.to_pandas().array
16921707
result = pd_array.astype(data.dtype)
16931708
assert not isinstance(pd_array.dtype, ArrowDtype)
16941709
assert isinstance(result.dtype, ArrowDtype)

pandas/tests/frame/methods/test_asfreq.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,12 @@ def test_asfreq_with_date_object_index(self, frame_or_series):
186186
ts = frame_or_series(np.random.randn(20), index=rng)
187187

188188
ts2 = ts.copy()
189-
ts2.index = [x.date() for x in ts2.index]
189+
msg = (
190+
"Pandas type inference with a sequence of `datetime.date` "
191+
"objects is deprecated"
192+
)
193+
with tm.assert_produces_warning(FutureWarning, match=msg):
194+
ts2.index = [x.date() for x in ts2.index]
190195

191196
result = ts2.asfreq("4H", method="ffill")
192197
expected = ts.asfreq("4H", method="ffill")

pandas/tests/frame/methods/test_join.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -510,16 +510,26 @@ def test_join_multiindex_dates(self):
510510
# GH 33692
511511
date = pd.Timestamp(2000, 1, 1).date()
512512

513-
df1_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
513+
msg = (
514+
"Pandas type inference with a sequence of `datetime.date` "
515+
"objects is deprecated"
516+
)
517+
with tm.assert_produces_warning(FutureWarning, match=msg):
518+
df1_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
514519
df1 = DataFrame({"col1": [0]}, index=df1_index)
515-
df2_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
520+
with tm.assert_produces_warning(FutureWarning, match=msg):
521+
df2_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
516522
df2 = DataFrame({"col2": [0]}, index=df2_index)
517-
df3_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
523+
with tm.assert_produces_warning(FutureWarning, match=msg):
524+
df3_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
518525
df3 = DataFrame({"col3": [0]}, index=df3_index)
519526

520527
result = df1.join([df2, df3])
521528

522-
expected_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
529+
with tm.assert_produces_warning(FutureWarning, match=msg):
530+
expected_index = MultiIndex.from_tuples(
531+
[(0, date)], names=["index_0", "date"]
532+
)
523533
expected = DataFrame(
524534
{"col1": [0], "col2": [0], "col3": [0]}, index=expected_index
525535
)

pandas/tests/frame/methods/test_reindex.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,9 @@ def test_reindex_date_fill_value(self):
185185
ts = df.iloc[0, 0]
186186
fv = ts.date()
187187

188-
res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)
188+
msg = "type inference with a sequence of `datetime.date` objects is deprecated"
189+
with tm.assert_produces_warning(FutureWarning, match=msg):
190+
res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)
189191

190192
expected = DataFrame(
191193
{"A": df["A"].tolist() + [fv], "B": df["B"].tolist() + [fv], "C": [fv] * 4},

pandas/tests/frame/test_constructors.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -1895,7 +1895,12 @@ def test_constructor_with_datetimes2(self):
18951895
datetimes = [ts.to_pydatetime() for ts in ind]
18961896
dates = [ts.date() for ts in ind]
18971897
df = DataFrame(datetimes, columns=["datetimes"])
1898-
df["dates"] = dates
1898+
msg = (
1899+
"Pandas type inference with a sequence of `datetime.date` "
1900+
"objects is deprecated"
1901+
)
1902+
with tm.assert_produces_warning(FutureWarning, match=msg):
1903+
df["dates"] = dates
18991904
result = df.dtypes
19001905
expected = Series(
19011906
[np.dtype("datetime64[ns]"), np.dtype("object")],
@@ -2361,7 +2366,12 @@ def test_datetime_date_tuple_columns_from_dict(self):
23612366
# GH 10863
23622367
v = date.today()
23632368
tup = v, v
2364-
result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup])
2369+
msg = (
2370+
"Pandas type inference with a sequence of `datetime.date` "
2371+
"objects is deprecated"
2372+
)
2373+
with tm.assert_produces_warning(FutureWarning, match=msg):
2374+
result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup])
23652375
expected = DataFrame([0, 1, 2], columns=Index(Series([tup])))
23662376
tm.assert_frame_equal(result, expected)
23672377

pandas/tests/groupby/aggregate/test_other.py

+27-21
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,22 @@ def test_agg_datetimes_mixed():
6868
for row in data
6969
]
7070

71-
df2 = DataFrame(
72-
{
73-
"key": [x[0] for x in data],
74-
"date": [x[1] for x in data],
75-
"value": [x[2] for x in data],
76-
}
77-
)
71+
msg = "Pandas type inference with a sequence of `datetime.date` objects"
72+
with tm.assert_produces_warning(FutureWarning, match=msg):
73+
df2 = DataFrame(
74+
{
75+
"key": [x[0] for x in data],
76+
"date": [x[1] for x in data],
77+
"value": [x[2] for x in data],
78+
}
79+
)
7880

7981
df1["weights"] = df1["value"] / df1["value"].sum()
8082
gb1 = df1.groupby("date").aggregate(np.sum)
8183

8284
df2["weights"] = df1["value"] / df1["value"].sum()
83-
gb2 = df2.groupby("date").aggregate(np.sum)
85+
with tm.assert_produces_warning(FutureWarning, match=msg):
86+
gb2 = df2.groupby("date").aggregate(np.sum)
8487

8588
assert len(gb1) == len(gb2)
8689

@@ -367,22 +370,25 @@ def test_agg_consistency():
367370
def P1(a):
368371
return np.percentile(a.dropna(), q=1)
369372

370-
df = DataFrame(
371-
{
372-
"col1": [1, 2, 3, 4],
373-
"col2": [10, 25, 26, 31],
374-
"date": [
375-
dt.date(2013, 2, 10),
376-
dt.date(2013, 2, 10),
377-
dt.date(2013, 2, 11),
378-
dt.date(2013, 2, 11),
379-
],
380-
}
381-
)
373+
msg = "Pandas type inference with a sequence of `datetime.date` objects"
374+
with tm.assert_produces_warning(FutureWarning, match=msg):
375+
df = DataFrame(
376+
{
377+
"col1": [1, 2, 3, 4],
378+
"col2": [10, 25, 26, 31],
379+
"date": [
380+
dt.date(2013, 2, 10),
381+
dt.date(2013, 2, 10),
382+
dt.date(2013, 2, 11),
383+
dt.date(2013, 2, 11),
384+
],
385+
}
386+
)
382387

383388
g = df.groupby("date")
384389

385-
expected = g.agg([P1])
390+
with tm.assert_produces_warning(FutureWarning, match=msg):
391+
expected = g.agg([P1])
386392
expected.columns = expected.columns.levels[0]
387393

388394
result = g.agg(P1)

0 commit comments

Comments
 (0)