diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index f6e90a3341424..ca3d7e1d3d013 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -336,6 +336,7 @@ Other Deprecations - Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`) - Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`) - Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`) +- Deprecated the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`) - Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`) .. --------------------------------------------------------------------------- @@ -379,6 +380,7 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`) - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`) +- Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`) - Timedelta diff --git a/pandas/conftest.py b/pandas/conftest.py index 90709381d1e16..44b805c632723 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -242,6 +242,14 @@ def keep(request): return request.param +@pytest.fixture(params=["both", "neither", "left", "right"]) +def inclusive_endpoints_fixture(request): + """ + Fixture for trying all interval 'inclusive' parameters. + """ + return request.param + + @pytest.fixture(params=["left", "right", "both", "neither"]) def closed(request): """ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 053318e629e69..d9f9c07a4f645 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -39,7 +39,7 @@ ) from pandas._typing import npt from pandas.errors import PerformanceWarning -from pandas.util._validators import validate_endpoints +from pandas.util._validators import validate_inclusive from pandas.core.dtypes.cast import astype_dt64_to_dt64tz from pandas.core.dtypes.common import ( @@ -394,7 +394,7 @@ def _generate_range( normalize=False, ambiguous="raise", nonexistent="raise", - closed=None, + inclusive="both", ): periods = dtl.validate_periods(periods) @@ -417,7 +417,7 @@ def _generate_range( if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") - left_closed, right_closed = validate_endpoints(closed) + left_inclusive, right_inclusive = validate_inclusive(inclusive) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz = _infer_tz_from_endpoints(start, end, tz) @@ -477,10 +477,15 @@ def _generate_range( arr = arr.astype("M8[ns]", copy=False) index = cls._simple_new(arr, freq=None, dtype=dtype) - if not left_closed and len(index) and index[0] == start: - index = index[1:] - if not right_closed and len(index) and index[-1] == end: - index = index[:-1] + if start == end: + if not left_inclusive and not right_inclusive: + index = index[1:-1] + else: + if not left_inclusive or not right_inclusive: + if not left_inclusive and len(index) and index[0] == start: + index = index[1:] + if not right_inclusive and len(index) and index[-1] == end: + index = index[:-1] dtype = tz_to_dtype(tz) return cls._simple_new(index._ndarray, freq=freq, dtype=dtype) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index fbbe6606ba522..d556466554ea4 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -881,7 +881,8 @@ def date_range( tz=None, normalize: bool = False, name: Hashable = None, - closed=None, + closed: str | None | lib.NoDefault = lib.no_default, + inclusive: str | None = None, **kwargs, ) -> DatetimeIndex: """ @@ -919,6 +920,14 @@ def date_range( closed : {None, 'left', 'right'}, optional Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None, the default). + + .. deprecated:: 1.4.0 + Argument `closed` has been deprecated to standardize boundary inputs. + Use `inclusive` instead, to set each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 1.4.0 **kwargs For compatibility. Has no effect on the result. @@ -1029,6 +1038,28 @@ def date_range( DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') """ + if inclusive is not None and not isinstance(closed, lib.NoDefault): + raise ValueError( + "Deprecated argument `closed` cannot be passed" + "if argument `inclusive` is not None" + ) + elif not isinstance(closed, lib.NoDefault): + warnings.warn( + "Argument `closed` is deprecated in favor of `inclusive`.", + FutureWarning, + stacklevel=2, + ) + if closed is None: + inclusive = "both" + elif closed in ("left", "right"): + inclusive = closed + else: + raise ValueError( + "Argument `closed` has to be either 'left', 'right' or None" + ) + elif inclusive is None: + inclusive = "both" + if freq is None and com.any_none(periods, start, end): freq = "D" @@ -1039,7 +1070,7 @@ def date_range( freq=freq, tz=tz, normalize=normalize, - closed=closed, + inclusive=inclusive, **kwargs, ) return DatetimeIndex._simple_new(dtarr, name=name) @@ -1055,7 +1086,8 @@ def bdate_range( name: Hashable = None, weekmask=None, holidays=None, - closed=None, + closed: lib.NoDefault = lib.no_default, + inclusive: str | None = None, **kwargs, ) -> DatetimeIndex: """ @@ -1090,6 +1122,14 @@ def bdate_range( closed : str, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None). + + .. deprecated:: 1.4.0 + Argument `closed` has been deprecated to standardize boundary inputs. + Use `inclusive` instead, to set each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 1.4.0 **kwargs For compatibility. Has no effect on the result. @@ -1143,6 +1183,7 @@ def bdate_range( normalize=normalize, name=name, closed=closed, + inclusive=inclusive, **kwargs, ) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 92f81ff3a00fa..3729f921f59ad 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -9,11 +9,6 @@ import pandas._testing as tm -@pytest.fixture(params=["both", "neither", "left", "right"]) -def inclusive_endpoints_fixture(request): - return request.param - - @pytest.fixture def float_frame_with_na(): """ diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index e9451770054c0..d4b21633309db 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -100,7 +100,7 @@ def test_groupby_with_timegrouper(self): expected = DataFrame( {"Quantity": 0}, index=date_range( - "20130901", "20131205", freq="5D", name="Date", closed="left" + "20130901", "20131205", freq="5D", name="Date", inclusive="left" ), ) expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 03cfeb245c11d..7559d7ce645e0 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -39,6 +39,34 @@ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) +def _get_expected_range( + begin_to_match, + end_to_match, + both_range, + inclusive_endpoints, +): + """Helper to get expected range from a both inclusive range""" + left_match = begin_to_match == both_range[0] + right_match = end_to_match == both_range[-1] + + if inclusive_endpoints == "left" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints == "right" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints == "neither" and left_match and right_match: + expected_range = both_range[1:-1] + elif inclusive_endpoints == "neither" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints == "neither" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints == "both": + expected_range = both_range[:] + else: + expected_range = both_range[:] + + return expected_range + + class TestTimestampEquivDateRange: # Older tests in TestTimeSeries constructed their `stamp` objects # using `date_range` instead of the `Timestamp` constructor. @@ -544,89 +572,114 @@ def test_range_tz_dateutil(self): assert dr[2] == end @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) - def test_range_closed(self, freq): + def test_range_closed(self, freq, inclusive_endpoints_fixture): begin = datetime(2011, 1, 1) end = datetime(2014, 1, 1) - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right - - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] + result_range = date_range( + begin, end, inclusive=inclusive_endpoints_fixture, freq=freq + ) + both_range = date_range(begin, end, inclusive="both", freq=freq) + expected_range = _get_expected_range( + begin, end, both_range, inclusive_endpoints_fixture + ) - tm.assert_index_equal(expected_left, left) - tm.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_range, result_range) - def test_range_closed_with_tz_aware_start_end(self): + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed_with_tz_aware_start_end( + self, freq, inclusive_endpoints_fixture + ): # GH12409, GH12684 begin = Timestamp("2011/1/1", tz="US/Eastern") end = Timestamp("2014/1/1", tz="US/Eastern") - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right - - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] + result_range = date_range( + begin, end, inclusive=inclusive_endpoints_fixture, freq=freq + ) + both_range = date_range(begin, end, inclusive="both", freq=freq) + expected_range = _get_expected_range( + begin, + end, + both_range, + inclusive_endpoints_fixture, + ) - tm.assert_index_equal(expected_left, left) - tm.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_range, result_range) + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_with_tz_closed_with_tz_aware_start_end( + self, freq, inclusive_endpoints_fixture + ): begin = Timestamp("2011/1/1") end = Timestamp("2014/1/1") begintz = Timestamp("2011/1/1", tz="US/Eastern") endtz = Timestamp("2014/1/1", tz="US/Eastern") - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq, tz="US/Eastern") - left = date_range(begin, end, closed="left", freq=freq, tz="US/Eastern") - right = date_range(begin, end, closed="right", freq=freq, tz="US/Eastern") - expected_left = left - expected_right = right - - if endtz == closed[-1]: - expected_left = closed[:-1] - if begintz == closed[0]: - expected_right = closed[1:] + result_range = date_range( + begin, + end, + inclusive=inclusive_endpoints_fixture, + freq=freq, + tz="US/Eastern", + ) + both_range = date_range( + begin, end, inclusive="both", freq=freq, tz="US/Eastern" + ) + expected_range = _get_expected_range( + begintz, + endtz, + both_range, + inclusive_endpoints_fixture, + ) - tm.assert_index_equal(expected_left, left) - tm.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_range, result_range) - @pytest.mark.parametrize("closed", ["right", "left", None]) - def test_range_closed_boundary(self, closed): + def test_range_closed_boundary(self, inclusive_endpoints_fixture): # GH#11804 right_boundary = date_range( - "2015-09-12", "2015-12-01", freq="QS-MAR", closed=closed + "2015-09-12", + "2015-12-01", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, ) left_boundary = date_range( - "2015-09-01", "2015-09-12", freq="QS-MAR", closed=closed + "2015-09-01", + "2015-09-12", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, ) both_boundary = date_range( - "2015-09-01", "2015-12-01", freq="QS-MAR", closed=closed + "2015-09-01", + "2015-12-01", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, ) - expected_right = expected_left = expected_both = both_boundary + neither_boundary = date_range( + "2015-09-11", + "2015-09-12", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + + expected_right = both_boundary + expected_left = both_boundary + expected_both = both_boundary - if closed == "right": + if inclusive_endpoints_fixture == "right": expected_left = both_boundary[1:] - if closed == "left": + elif inclusive_endpoints_fixture == "left": expected_right = both_boundary[:-1] - if closed is None: + elif inclusive_endpoints_fixture == "both": expected_right = both_boundary[1:] expected_left = both_boundary[:-1] + expected_neither = both_boundary[1:-1] + tm.assert_index_equal(right_boundary, expected_right) tm.assert_index_equal(left_boundary, expected_left) tm.assert_index_equal(both_boundary, expected_both) + tm.assert_index_equal(neither_boundary, expected_neither) def test_years_only(self): # GH 6961 @@ -679,6 +732,22 @@ def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture): ] tm.assert_index_equal(result, expected) + def test_range_where_start_equal_end(self, inclusive_endpoints_fixture): + # GH 43394 + start = "2021-09-02" + end = "2021-09-02" + result = date_range( + start=start, end=end, freq="D", inclusive=inclusive_endpoints_fixture + ) + + both_range = date_range(start=start, end=end, freq="D", inclusive="both") + if inclusive_endpoints_fixture == "neither": + expected = both_range[1:-1] + elif inclusive_endpoints_fixture in ("left", "right", "both"): + expected = both_range[:] + + tm.assert_index_equal(result, expected) + class TestDateRangeTZ: """Tests for date_range with timezones""" @@ -867,12 +936,12 @@ def test_daterange_bug_456(self): result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) - @pytest.mark.parametrize("closed", ["left", "right"]) - def test_bdays_and_open_boundaries(self, closed): + @pytest.mark.parametrize("inclusive", ["left", "right", "neither", "both"]) + def test_bdays_and_open_boundaries(self, inclusive): # GH 6673 start = "2018-07-21" # Saturday end = "2018-07-29" # Sunday - result = date_range(start, end, freq="B", closed=closed) + result = date_range(start, end, freq="B", inclusive=inclusive) bday_start = "2018-07-23" # Monday bday_end = "2018-07-27" # Friday @@ -1018,7 +1087,7 @@ def test_all_custom_freq(self, freq): def test_range_with_millisecond_resolution(self, start_end): # https://github.com/pandas-dev/pandas/issues/24110 start, end = start_end - result = date_range(start=start, end=end, periods=2, closed="left") + result = date_range(start=start, end=end, periods=2, inclusive="left") expected = DatetimeIndex([start]) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b886eab89cf37..25c0625d1d790 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1796,7 +1796,7 @@ def test_loc_setitem_with_expansion_and_existing_dst(self): start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") - idx = date_range(start, end, closed="left", freq="H") + idx = date_range(start, end, inclusive="left", freq="H") assert ts not in idx # i.e. result.loc setitem is with-expansion result = DataFrame(index=idx, columns=["value"]) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index a6491952375a4..70d37f83c7f0c 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -53,7 +53,7 @@ def test_asfreq(self, series_and_frame, freq, kind): else: start = obj.index[0].to_timestamp(how="start") end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start") - new_index = date_range(start=start, end=end, freq=freq, closed="left") + new_index = date_range(start=start, end=end, freq=freq, inclusive="left") expected = obj.to_timestamp().reindex(new_index).to_period(freq) result = obj.resample(freq, kind=kind).asfreq() tm.assert_almost_equal(result, expected) @@ -335,7 +335,7 @@ def test_resample_nonexistent_time_bin_edge(self): freq="D", tz="America/Sao_Paulo", nonexistent="shift_forward", - closed="left", + inclusive="left", ) tm.assert_index_equal(result.index, expected)