From 203733eee09a7aaaecd8ce45cf7fd80b273bead0 Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Sat, 11 Sep 2021 01:27:31 +0800 Subject: [PATCH 1/9] Change date_range inclusive --- doc/source/whatsnew/v1.4.0.rst | 2 + pandas/core/arrays/datetimelike.py | 65 +++++++++++ pandas/core/arrays/datetimes.py | 22 +++- pandas/core/indexes/datetimes.py | 43 ++++++- .../indexes/datetimes/test_date_range.py | 110 ++++++++++++------ pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/resample/test_period_index.py | 2 +- 7 files changed, 201 insertions(+), 45 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 8d081ed8068aa..f6a5b4246de15 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -299,6 +299,7 @@ Other Deprecations - Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`) - Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`) - +- Deprecated the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`) .. --------------------------------------------------------------------------- @@ -340,6 +341,7 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`) - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`) +- Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`) - Timedelta diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 36e7a090214ed..7ef6fa04f5bcc 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1823,6 +1823,71 @@ def validate_periods(periods): return periods +def validate_inclusiveness(inclusive): + """ + Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. + + Parameters + ---------- + inclusive : {"both", "neither", "left", "right"} + + Returns + ------- + left_inclusive : bool + right_inclusive : bool + + Raises + ------ + ValueError : if argument is not among valid values + """ + left_right_inclusive: tuple[bool, bool] | None = { + "both": (True, True), + "left": (True, False), + "right": (False, True), + "neither": (False, False), + }.get(inclusive) + + if left_right_inclusive is None: + raise ValueError( + "Inclusive has to be either 'both', 'neither', 'left', 'right'" + ) + left_inclusive, right_inclusive = left_right_inclusive + return left_inclusive, right_inclusive + + +def validate_endpoints(closed): + """ + Check that the `closed` argument is among [None, "left", "right"] + + Parameters + ---------- + closed : {None, "left", "right"} + + Returns + ------- + left_closed : bool + right_closed : bool + + Raises + ------ + ValueError : if argument is not among valid values + """ + left_closed = False + right_closed = False + + if closed is None: + left_closed = True + right_closed = True + elif closed == "left": + left_closed = True + elif closed == "right": + right_closed = True + else: + raise ValueError("Closed has to be either 'left', 'right' or None") + + return left_closed, right_closed + + def validate_inferred_freq(freq, inferred_freq, freq_infer): """ If the user passes a freq and another freq is inferred from passed data, diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 053318e629e69..28a3ad59dc1d9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -394,7 +394,7 @@ def _generate_range( normalize=False, ambiguous="raise", nonexistent="raise", - closed=None, + inclusive="both", ): periods = dtl.validate_periods(periods) @@ -417,7 +417,7 @@ def _generate_range( if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") - left_closed, right_closed = validate_endpoints(closed) + left_inclusive, right_inclusive = dtl.validate_inclusiveness(inclusive) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz = _infer_tz_from_endpoints(start, end, tz) @@ -477,10 +477,20 @@ def _generate_range( arr = arr.astype("M8[ns]", copy=False) index = cls._simple_new(arr, freq=None, dtype=dtype) - if not left_closed and len(index) and index[0] == start: - index = index[1:] - if not right_closed and len(index) and index[-1] == end: - index = index[:-1] + # do not remove when one side is inclusive + # and removing would leave index empty + to_remove_any = not ( + (left_inclusive or right_inclusive) + and len(index) == 1 + and start == index[0] + and start == end + ) + + if to_remove_any: + if (not left_inclusive) and len(index) and index[0] == start: + index = index[1:] + if (not right_inclusive) and len(index) and index[-1] == end: + index = index[:-1] dtype = tz_to_dtype(tz) return cls._simple_new(index._ndarray, freq=freq, dtype=dtype) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index fbbe6606ba522..5547bd9b15570 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -881,7 +881,8 @@ def date_range( tz=None, normalize: bool = False, name: Hashable = None, - closed=None, + closed: bool | lib.NoDefault = lib.no_default, + inclusive: str | None = None, **kwargs, ) -> DatetimeIndex: """ @@ -919,6 +920,12 @@ def date_range( closed : {None, 'left', 'right'}, optional Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None, the default). + .. deprecated:: 1.4.0 + Argument `closed` have been deprecated + to standardize boundary inputs. Use `inclusive` instead, to set + each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. **kwargs For compatibility. Has no effect on the result. @@ -1029,6 +1036,28 @@ def date_range( DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') """ + if inclusive is not None and closed is not lib.no_default: + raise ValueError( + "Deprecated argument `closed` cannot be passed" + "if argument `inclusive` is not None" + ) + elif closed is not lib.no_default: + warnings.warn( + "Argument `closed` is deprecated in favor of `inclusive`.", + FutureWarning, + stacklevel=2, + ) + if closed is None: + inclusive = "both" + elif closed in ("left", "right"): + inclusive = closed + else: + raise ValueError( + "Argument `closed` has to be either 'left', 'right' or None" + ) + elif inclusive is None: + inclusive = "both" + if freq is None and com.any_none(periods, start, end): freq = "D" @@ -1039,7 +1068,7 @@ def date_range( freq=freq, tz=tz, normalize=normalize, - closed=closed, + inclusive=inclusive, **kwargs, ) return DatetimeIndex._simple_new(dtarr, name=name) @@ -1055,7 +1084,8 @@ def bdate_range( name: Hashable = None, weekmask=None, holidays=None, - closed=None, + closed: lib.NoDefault = lib.no_default, + inclusive: str | None = None, **kwargs, ) -> DatetimeIndex: """ @@ -1090,6 +1120,12 @@ def bdate_range( closed : str, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None). + .. deprecated:: 1.4.0 + Argument `closed` have been deprecated + to standardize boundary inputs. Use `inclusive` instead, to set + each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. **kwargs For compatibility. Has no effect on the result. @@ -1143,6 +1179,7 @@ def bdate_range( normalize=normalize, name=name, closed=closed, + inclusive=inclusive, **kwargs, ) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 03cfeb245c11d..4d2a714e1905d 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -548,19 +548,25 @@ def test_range_closed(self, freq): begin = datetime(2011, 1, 1) end = datetime(2014, 1, 1) - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) + both = date_range(begin, end, inclusive="both", freq=freq) + left = date_range(begin, end, inclusive="left", freq=freq) + right = date_range(begin, end, inclusive="right", freq=freq) + neither = date_range(begin, end, inclusive="neither", freq=freq) + expected_left = left expected_right = right + expected_neither = neither - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] + if end == both[-1]: + expected_left = both[:-1] + if begin == both[0]: + expected_right = both[1:] + if end == both[-1] and begin == both[0]: + expected_neither = both[1:-1] tm.assert_index_equal(expected_left, left) tm.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_neither, neither) def test_range_closed_with_tz_aware_start_end(self): # GH12409, GH12684 @@ -568,19 +574,25 @@ def test_range_closed_with_tz_aware_start_end(self): end = Timestamp("2014/1/1", tz="US/Eastern") for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) + both = date_range(begin, end, inclusive="both", freq=freq) + left = date_range(begin, end, inclusive="left", freq=freq) + right = date_range(begin, end, inclusive="right", freq=freq) + neither = date_range(begin, end, inclusive="neither", freq=freq) + expected_left = left expected_right = right + expected_neither = neither - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] + if end == both[-1]: + expected_left = both[:-1] + if begin == both[0]: + expected_right = both[1:] + if end == both[-1] and begin == both[0]: + expected_neither = both[1:-1] tm.assert_index_equal(expected_left, left) tm.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_neither, neither) begin = Timestamp("2011/1/1") end = Timestamp("2014/1/1") @@ -588,45 +600,64 @@ def test_range_closed_with_tz_aware_start_end(self): endtz = Timestamp("2014/1/1", tz="US/Eastern") for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq, tz="US/Eastern") - left = date_range(begin, end, closed="left", freq=freq, tz="US/Eastern") - right = date_range(begin, end, closed="right", freq=freq, tz="US/Eastern") + both = date_range(begin, end, inclusive="both", freq=freq, tz="US/Eastern") + left = date_range(begin, end, inclusive="left", freq=freq, tz="US/Eastern") + right = date_range( + begin, end, inclusive="right", freq=freq, tz="US/Eastern" + ) + neither = date_range( + begin, end, inclusive="neither", freq=freq, tz="US/Eastern" + ) + expected_left = left expected_right = right + expected_neither = neither - if endtz == closed[-1]: - expected_left = closed[:-1] - if begintz == closed[0]: - expected_right = closed[1:] + if endtz == both[-1]: + expected_left = both[:-1] + if begintz == both[0]: + expected_right = both[1:] + if begintz == both[0] and endtz == both[-1]: + expected_neither = both[1:-1] tm.assert_index_equal(expected_left, left) tm.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_neither, neither) - @pytest.mark.parametrize("closed", ["right", "left", None]) - def test_range_closed_boundary(self, closed): + @pytest.mark.parametrize("inclusive", ["right", "left", "both", "neither"]) + def test_range_closed_boundary(self, inclusive): # GH#11804 right_boundary = date_range( - "2015-09-12", "2015-12-01", freq="QS-MAR", closed=closed + "2015-09-12", "2015-12-01", freq="QS-MAR", inclusive=inclusive ) left_boundary = date_range( - "2015-09-01", "2015-09-12", freq="QS-MAR", closed=closed + "2015-09-01", "2015-09-12", freq="QS-MAR", inclusive=inclusive ) both_boundary = date_range( - "2015-09-01", "2015-12-01", freq="QS-MAR", closed=closed + "2015-09-01", "2015-12-01", freq="QS-MAR", inclusive=inclusive ) - expected_right = expected_left = expected_both = both_boundary + neither_boundary = date_range( + "2015-09-11", "2015-09-12", freq="QS-MAR", inclusive=inclusive + ) + + expected_right = both_boundary + expected_left = both_boundary + expected_both = both_boundary - if closed == "right": + if inclusive == "right": expected_left = both_boundary[1:] - if closed == "left": + elif inclusive == "left": expected_right = both_boundary[:-1] - if closed is None: + elif inclusive == "both": expected_right = both_boundary[1:] expected_left = both_boundary[:-1] + expected_neither = both_boundary[1:-1] + tm.assert_index_equal(right_boundary, expected_right) tm.assert_index_equal(left_boundary, expected_left) tm.assert_index_equal(both_boundary, expected_both) + tm.assert_index_equal(neither_boundary, expected_neither) def test_years_only(self): # GH 6961 @@ -679,6 +710,17 @@ def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture): ] tm.assert_index_equal(result, expected) + def test_range_where_start_equal_end(self): + # GH 43394 + start = "2021-09-02" + end = "2021-09-02" + right_result = date_range(start=start, end=end, freq="D", inclusive="right") + left_result = date_range(start=start, end=end, freq="D", inclusive="left") + expected = date_range(start=start, end=end, freq="D", inclusive="both") + + tm.assert_index_equal(right_result, expected) + tm.assert_index_equal(left_result, expected) + class TestDateRangeTZ: """Tests for date_range with timezones""" @@ -867,12 +909,12 @@ def test_daterange_bug_456(self): result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) - @pytest.mark.parametrize("closed", ["left", "right"]) - def test_bdays_and_open_boundaries(self, closed): + @pytest.mark.parametrize("inclusive", ["left", "right", "neither", "both"]) + def test_bdays_and_open_boundaries(self, inclusive): # GH 6673 start = "2018-07-21" # Saturday end = "2018-07-29" # Sunday - result = date_range(start, end, freq="B", closed=closed) + result = date_range(start, end, freq="B", inclusive=inclusive) bday_start = "2018-07-23" # Monday bday_end = "2018-07-27" # Friday @@ -1018,7 +1060,7 @@ def test_all_custom_freq(self, freq): def test_range_with_millisecond_resolution(self, start_end): # https://github.com/pandas-dev/pandas/issues/24110 start, end = start_end - result = date_range(start=start, end=end, periods=2, closed="left") + result = date_range(start=start, end=end, periods=2, inclusive="left") expected = DatetimeIndex([start]) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b886eab89cf37..25c0625d1d790 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1796,7 +1796,7 @@ def test_loc_setitem_with_expansion_and_existing_dst(self): start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") - idx = date_range(start, end, closed="left", freq="H") + idx = date_range(start, end, inclusive="left", freq="H") assert ts not in idx # i.e. result.loc setitem is with-expansion result = DataFrame(index=idx, columns=["value"]) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index a6491952375a4..5654cac5e5462 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -53,7 +53,7 @@ def test_asfreq(self, series_and_frame, freq, kind): else: start = obj.index[0].to_timestamp(how="start") end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start") - new_index = date_range(start=start, end=end, freq=freq, closed="left") + new_index = date_range(start=start, end=end, freq=freq, inclusive="left") expected = obj.to_timestamp().reindex(new_index).to_period(freq) result = obj.resample(freq, kind=kind).asfreq() tm.assert_almost_equal(result, expected) From 05def2263b0f98cc01924f7d3ac861bda02a4ce7 Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Tue, 14 Sep 2021 23:32:49 +0800 Subject: [PATCH 2/9] to use validate_inclusive in util --- pandas/core/arrays/datetimelike.py | 65 ------------------------------ pandas/core/arrays/datetimes.py | 4 +- 2 files changed, 2 insertions(+), 67 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7ef6fa04f5bcc..36e7a090214ed 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1823,71 +1823,6 @@ def validate_periods(periods): return periods -def validate_inclusiveness(inclusive): - """ - Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. - - Parameters - ---------- - inclusive : {"both", "neither", "left", "right"} - - Returns - ------- - left_inclusive : bool - right_inclusive : bool - - Raises - ------ - ValueError : if argument is not among valid values - """ - left_right_inclusive: tuple[bool, bool] | None = { - "both": (True, True), - "left": (True, False), - "right": (False, True), - "neither": (False, False), - }.get(inclusive) - - if left_right_inclusive is None: - raise ValueError( - "Inclusive has to be either 'both', 'neither', 'left', 'right'" - ) - left_inclusive, right_inclusive = left_right_inclusive - return left_inclusive, right_inclusive - - -def validate_endpoints(closed): - """ - Check that the `closed` argument is among [None, "left", "right"] - - Parameters - ---------- - closed : {None, "left", "right"} - - Returns - ------- - left_closed : bool - right_closed : bool - - Raises - ------ - ValueError : if argument is not among valid values - """ - left_closed = False - right_closed = False - - if closed is None: - left_closed = True - right_closed = True - elif closed == "left": - left_closed = True - elif closed == "right": - right_closed = True - else: - raise ValueError("Closed has to be either 'left', 'right' or None") - - return left_closed, right_closed - - def validate_inferred_freq(freq, inferred_freq, freq_infer): """ If the user passes a freq and another freq is inferred from passed data, diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 28a3ad59dc1d9..90612fa92fc89 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -39,7 +39,7 @@ ) from pandas._typing import npt from pandas.errors import PerformanceWarning -from pandas.util._validators import validate_endpoints +from pandas.util._validators import validate_inclusive from pandas.core.dtypes.cast import astype_dt64_to_dt64tz from pandas.core.dtypes.common import ( @@ -417,7 +417,7 @@ def _generate_range( if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") - left_inclusive, right_inclusive = dtl.validate_inclusiveness(inclusive) + left_inclusive, right_inclusive = validate_inclusive(inclusive) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz = _infer_tz_from_endpoints(start, end, tz) From be085ddd2614c11b02c868e0b4f4c3adf85724f5 Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Tue, 14 Sep 2021 23:39:32 +0800 Subject: [PATCH 3/9] typing + docstring --- pandas/core/indexes/datetimes.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5547bd9b15570..caeacea3986ad 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -881,7 +881,7 @@ def date_range( tz=None, normalize: bool = False, name: Hashable = None, - closed: bool | lib.NoDefault = lib.no_default, + closed: str | None | lib.NoDefault = lib.no_default, inclusive: str | None = None, **kwargs, ) -> DatetimeIndex: @@ -920,10 +920,10 @@ def date_range( closed : {None, 'left', 'right'}, optional Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None, the default). + .. deprecated:: 1.4.0 - Argument `closed` have been deprecated - to standardize boundary inputs. Use `inclusive` instead, to set - each bound as closed or open. + Argument `closed` have been deprecated to standardize boundary inputs. + Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. **kwargs @@ -1120,10 +1120,10 @@ def bdate_range( closed : str, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None). + .. deprecated:: 1.4.0 - Argument `closed` have been deprecated - to standardize boundary inputs. Use `inclusive` instead, to set - each bound as closed or open. + Argument `closed` have been deprecated to standardize boundary inputs. + Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. **kwargs From ff0649c9698636109ab6a98ac128761c49ed4f27 Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Tue, 14 Sep 2021 23:52:42 +0800 Subject: [PATCH 4/9] Move inclusive_endpoints_fixture to outer conftest --- pandas/conftest.py | 5 +++++ pandas/tests/frame/conftest.py | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 90709381d1e16..99787e25ffbfe 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -299,6 +299,11 @@ def nselect_method(request): return request.param +@pytest.fixture(params=["both", "neither", "left", "right"]) +def inclusive_endpoints_fixture(request): + return request.param + + # ---------------------------------------------------------------- # Missing values & co. # ---------------------------------------------------------------- diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 92f81ff3a00fa..3729f921f59ad 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -9,11 +9,6 @@ import pandas._testing as tm -@pytest.fixture(params=["both", "neither", "left", "right"]) -def inclusive_endpoints_fixture(request): - return request.param - - @pytest.fixture def float_frame_with_na(): """ From 67015ff9d00f35b33d3eb87d0cd5a0ec403a1b9a Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Wed, 15 Sep 2021 01:52:17 +0800 Subject: [PATCH 5/9] Rework Tests --- pandas/core/arrays/datetimes.py | 2 +- .../indexes/datetimes/test_date_range.py | 199 +++++++++++------- 2 files changed, 120 insertions(+), 81 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 90612fa92fc89..d96dcb1e5aa83 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -481,7 +481,7 @@ def _generate_range( # and removing would leave index empty to_remove_any = not ( (left_inclusive or right_inclusive) - and len(index) == 1 + and len(index) and start == index[0] and start == end ) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 4d2a714e1905d..ef1c19da0a7c9 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -544,111 +544,145 @@ def test_range_tz_dateutil(self): assert dr[2] == end @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) - def test_range_closed(self, freq): + def test_range_closed(self, freq, inclusive_endpoints_fixture): begin = datetime(2011, 1, 1) end = datetime(2014, 1, 1) - both = date_range(begin, end, inclusive="both", freq=freq) - left = date_range(begin, end, inclusive="left", freq=freq) - right = date_range(begin, end, inclusive="right", freq=freq) - neither = date_range(begin, end, inclusive="neither", freq=freq) - - expected_left = left - expected_right = right - expected_neither = neither - - if end == both[-1]: - expected_left = both[:-1] - if begin == both[0]: - expected_right = both[1:] - if end == both[-1] and begin == both[0]: - expected_neither = both[1:-1] - - tm.assert_index_equal(expected_left, left) - tm.assert_index_equal(expected_right, right) - tm.assert_index_equal(expected_neither, neither) + result_range = date_range( + begin, end, inclusive=inclusive_endpoints_fixture, freq=freq + ) + both_range = date_range(begin, end, inclusive="both", freq=freq) + + left_match = begin == both_range[0] + right_match = end == both_range[-1] + + # Scenarios where datetimes are to be removed + if inclusive_endpoints_fixture == "left" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints_fixture == "right" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints_fixture == "neither" and left_match and right_match: + expected_range = both_range[1:-1] + elif inclusive_endpoints_fixture == "neither" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints_fixture == "neither" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints_fixture == "both": + expected_range = both_range[:] + else: + expected_range = both_range[:] + + tm.assert_index_equal(expected_range, result_range) - def test_range_closed_with_tz_aware_start_end(self): + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed_with_tz_aware_start_end( + self, freq, inclusive_endpoints_fixture + ): # GH12409, GH12684 begin = Timestamp("2011/1/1", tz="US/Eastern") end = Timestamp("2014/1/1", tz="US/Eastern") - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - both = date_range(begin, end, inclusive="both", freq=freq) - left = date_range(begin, end, inclusive="left", freq=freq) - right = date_range(begin, end, inclusive="right", freq=freq) - neither = date_range(begin, end, inclusive="neither", freq=freq) - - expected_left = left - expected_right = right - expected_neither = neither - - if end == both[-1]: - expected_left = both[:-1] - if begin == both[0]: - expected_right = both[1:] - if end == both[-1] and begin == both[0]: - expected_neither = both[1:-1] - - tm.assert_index_equal(expected_left, left) - tm.assert_index_equal(expected_right, right) - tm.assert_index_equal(expected_neither, neither) + result_range = date_range( + begin, end, inclusive=inclusive_endpoints_fixture, freq=freq + ) + both_range = date_range(begin, end, inclusive="both", freq=freq) + + left_match = begin == both_range[0] + right_match = end == both_range[-1] + + if inclusive_endpoints_fixture == "left" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints_fixture == "right" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints_fixture == "neither" and left_match and right_match: + expected_range = both_range[1:-1] + elif inclusive_endpoints_fixture == "neither" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints_fixture == "neither" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints_fixture == "both": + expected_range = both_range[:] + else: + expected_range = both_range[:] + + tm.assert_index_equal(expected_range, result_range) + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_with_tz_closed_with_tz_aware_start_end( + self, freq, inclusive_endpoints_fixture + ): begin = Timestamp("2011/1/1") end = Timestamp("2014/1/1") begintz = Timestamp("2011/1/1", tz="US/Eastern") endtz = Timestamp("2014/1/1", tz="US/Eastern") - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - both = date_range(begin, end, inclusive="both", freq=freq, tz="US/Eastern") - left = date_range(begin, end, inclusive="left", freq=freq, tz="US/Eastern") - right = date_range( - begin, end, inclusive="right", freq=freq, tz="US/Eastern" - ) - neither = date_range( - begin, end, inclusive="neither", freq=freq, tz="US/Eastern" - ) - - expected_left = left - expected_right = right - expected_neither = neither - - if endtz == both[-1]: - expected_left = both[:-1] - if begintz == both[0]: - expected_right = both[1:] - if begintz == both[0] and endtz == both[-1]: - expected_neither = both[1:-1] - - tm.assert_index_equal(expected_left, left) - tm.assert_index_equal(expected_right, right) - tm.assert_index_equal(expected_neither, neither) + result_range = date_range( + begin, + end, + inclusive=inclusive_endpoints_fixture, + freq=freq, + tz="US/Eastern", + ) + both_range = date_range( + begin, end, inclusive="both", freq=freq, tz="US/Eastern" + ) - @pytest.mark.parametrize("inclusive", ["right", "left", "both", "neither"]) - def test_range_closed_boundary(self, inclusive): + left_match = begintz == both_range[0] + right_match = endtz == both_range[-1] + + if inclusive_endpoints_fixture == "left" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints_fixture == "right" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints_fixture == "neither" and left_match and right_match: + expected_range = both_range[1:-1] + elif inclusive_endpoints_fixture == "neither" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints_fixture == "neither" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints_fixture == "both": + expected_range = both_range[:] + else: + expected_range = both_range[:] + + tm.assert_index_equal(expected_range, result_range) + + def test_range_closed_boundary(self, inclusive_endpoints_fixture): # GH#11804 right_boundary = date_range( - "2015-09-12", "2015-12-01", freq="QS-MAR", inclusive=inclusive + "2015-09-12", + "2015-12-01", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, ) left_boundary = date_range( - "2015-09-01", "2015-09-12", freq="QS-MAR", inclusive=inclusive + "2015-09-01", + "2015-09-12", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, ) both_boundary = date_range( - "2015-09-01", "2015-12-01", freq="QS-MAR", inclusive=inclusive + "2015-09-01", + "2015-12-01", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, ) neither_boundary = date_range( - "2015-09-11", "2015-09-12", freq="QS-MAR", inclusive=inclusive + "2015-09-11", + "2015-09-12", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, ) expected_right = both_boundary expected_left = both_boundary expected_both = both_boundary - if inclusive == "right": + if inclusive_endpoints_fixture == "right": expected_left = both_boundary[1:] - elif inclusive == "left": + elif inclusive_endpoints_fixture == "left": expected_right = both_boundary[:-1] - elif inclusive == "both": + elif inclusive_endpoints_fixture == "both": expected_right = both_boundary[1:] expected_left = both_boundary[:-1] @@ -710,16 +744,21 @@ def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture): ] tm.assert_index_equal(result, expected) - def test_range_where_start_equal_end(self): + def test_range_where_start_equal_end(self, inclusive_endpoints_fixture): # GH 43394 start = "2021-09-02" end = "2021-09-02" - right_result = date_range(start=start, end=end, freq="D", inclusive="right") - left_result = date_range(start=start, end=end, freq="D", inclusive="left") - expected = date_range(start=start, end=end, freq="D", inclusive="both") + result = date_range( + start=start, end=end, freq="D", inclusive=inclusive_endpoints_fixture + ) - tm.assert_index_equal(right_result, expected) - tm.assert_index_equal(left_result, expected) + both_range = date_range(start=start, end=end, freq="D", inclusive="both") + if inclusive_endpoints_fixture == "neither": + expected = both_range[1:-1] + elif inclusive_endpoints_fixture in ("left", "right", "both"): + expected = date_range(start=start, end=end, freq="D", inclusive="both") + + tm.assert_index_equal(result, expected) class TestDateRangeTZ: From e80329f475b4def492af25837ff09bc22340fbf2 Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Wed, 15 Sep 2021 02:08:24 +0800 Subject: [PATCH 6/9] remove line --- pandas/tests/indexes/datetimes/test_date_range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index ef1c19da0a7c9..ec7de57590ee3 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -756,7 +756,7 @@ def test_range_where_start_equal_end(self, inclusive_endpoints_fixture): if inclusive_endpoints_fixture == "neither": expected = both_range[1:-1] elif inclusive_endpoints_fixture in ("left", "right", "both"): - expected = date_range(start=start, end=end, freq="D", inclusive="both") + expected = both_range[:] tm.assert_index_equal(result, expected) From 245c50e451e670de2701ade237c169b21b33a462 Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Thu, 16 Sep 2021 00:13:05 +0800 Subject: [PATCH 7/9] Amend logic + factoring of tests --- pandas/conftest.py | 13 ++- pandas/core/arrays/datetimes.py | 23 ++--- pandas/core/indexes/datetimes.py | 8 +- .../indexes/datetimes/test_date_range.py | 98 ++++++++----------- 4 files changed, 66 insertions(+), 76 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 99787e25ffbfe..44b805c632723 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -242,6 +242,14 @@ def keep(request): return request.param +@pytest.fixture(params=["both", "neither", "left", "right"]) +def inclusive_endpoints_fixture(request): + """ + Fixture for trying all interval 'inclusive' parameters. + """ + return request.param + + @pytest.fixture(params=["left", "right", "both", "neither"]) def closed(request): """ @@ -299,11 +307,6 @@ def nselect_method(request): return request.param -@pytest.fixture(params=["both", "neither", "left", "right"]) -def inclusive_endpoints_fixture(request): - return request.param - - # ---------------------------------------------------------------- # Missing values & co. # ---------------------------------------------------------------- diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d96dcb1e5aa83..d9f9c07a4f645 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -477,20 +477,15 @@ def _generate_range( arr = arr.astype("M8[ns]", copy=False) index = cls._simple_new(arr, freq=None, dtype=dtype) - # do not remove when one side is inclusive - # and removing would leave index empty - to_remove_any = not ( - (left_inclusive or right_inclusive) - and len(index) - and start == index[0] - and start == end - ) - - if to_remove_any: - if (not left_inclusive) and len(index) and index[0] == start: - index = index[1:] - if (not right_inclusive) and len(index) and index[-1] == end: - index = index[:-1] + if start == end: + if not left_inclusive and not right_inclusive: + index = index[1:-1] + else: + if not left_inclusive or not right_inclusive: + if not left_inclusive and len(index) and index[0] == start: + index = index[1:] + if not right_inclusive and len(index) and index[-1] == end: + index = index[:-1] dtype = tz_to_dtype(tz) return cls._simple_new(index._ndarray, freq=freq, dtype=dtype) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index caeacea3986ad..d1aa89ac3c5fa 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -926,6 +926,8 @@ def date_range( Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 1.4.0 **kwargs For compatibility. Has no effect on the result. @@ -1036,12 +1038,12 @@ def date_range( DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') """ - if inclusive is not None and closed is not lib.no_default: + if inclusive is not None and not isinstance(closed, lib.NoDefault): raise ValueError( "Deprecated argument `closed` cannot be passed" "if argument `inclusive` is not None" ) - elif closed is not lib.no_default: + elif not isinstance(closed, lib.NoDefault): warnings.warn( "Argument `closed` is deprecated in favor of `inclusive`.", FutureWarning, @@ -1126,6 +1128,8 @@ def bdate_range( Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 1.4.0 **kwargs For compatibility. Has no effect on the result. diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index ec7de57590ee3..7559d7ce645e0 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -39,6 +39,34 @@ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) +def _get_expected_range( + begin_to_match, + end_to_match, + both_range, + inclusive_endpoints, +): + """Helper to get expected range from a both inclusive range""" + left_match = begin_to_match == both_range[0] + right_match = end_to_match == both_range[-1] + + if inclusive_endpoints == "left" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints == "right" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints == "neither" and left_match and right_match: + expected_range = both_range[1:-1] + elif inclusive_endpoints == "neither" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints == "neither" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints == "both": + expected_range = both_range[:] + else: + expected_range = both_range[:] + + return expected_range + + class TestTimestampEquivDateRange: # Older tests in TestTimeSeries constructed their `stamp` objects # using `date_range` instead of the `Timestamp` constructor. @@ -552,25 +580,9 @@ def test_range_closed(self, freq, inclusive_endpoints_fixture): begin, end, inclusive=inclusive_endpoints_fixture, freq=freq ) both_range = date_range(begin, end, inclusive="both", freq=freq) - - left_match = begin == both_range[0] - right_match = end == both_range[-1] - - # Scenarios where datetimes are to be removed - if inclusive_endpoints_fixture == "left" and right_match: - expected_range = both_range[:-1] - elif inclusive_endpoints_fixture == "right" and left_match: - expected_range = both_range[1:] - elif inclusive_endpoints_fixture == "neither" and left_match and right_match: - expected_range = both_range[1:-1] - elif inclusive_endpoints_fixture == "neither" and right_match: - expected_range = both_range[:-1] - elif inclusive_endpoints_fixture == "neither" and left_match: - expected_range = both_range[1:] - elif inclusive_endpoints_fixture == "both": - expected_range = both_range[:] - else: - expected_range = both_range[:] + expected_range = _get_expected_range( + begin, end, both_range, inclusive_endpoints_fixture + ) tm.assert_index_equal(expected_range, result_range) @@ -586,24 +598,12 @@ def test_range_closed_with_tz_aware_start_end( begin, end, inclusive=inclusive_endpoints_fixture, freq=freq ) both_range = date_range(begin, end, inclusive="both", freq=freq) - - left_match = begin == both_range[0] - right_match = end == both_range[-1] - - if inclusive_endpoints_fixture == "left" and right_match: - expected_range = both_range[:-1] - elif inclusive_endpoints_fixture == "right" and left_match: - expected_range = both_range[1:] - elif inclusive_endpoints_fixture == "neither" and left_match and right_match: - expected_range = both_range[1:-1] - elif inclusive_endpoints_fixture == "neither" and right_match: - expected_range = both_range[:-1] - elif inclusive_endpoints_fixture == "neither" and left_match: - expected_range = both_range[1:] - elif inclusive_endpoints_fixture == "both": - expected_range = both_range[:] - else: - expected_range = both_range[:] + expected_range = _get_expected_range( + begin, + end, + both_range, + inclusive_endpoints_fixture, + ) tm.assert_index_equal(expected_range, result_range) @@ -626,24 +626,12 @@ def test_range_with_tz_closed_with_tz_aware_start_end( both_range = date_range( begin, end, inclusive="both", freq=freq, tz="US/Eastern" ) - - left_match = begintz == both_range[0] - right_match = endtz == both_range[-1] - - if inclusive_endpoints_fixture == "left" and right_match: - expected_range = both_range[:-1] - elif inclusive_endpoints_fixture == "right" and left_match: - expected_range = both_range[1:] - elif inclusive_endpoints_fixture == "neither" and left_match and right_match: - expected_range = both_range[1:-1] - elif inclusive_endpoints_fixture == "neither" and right_match: - expected_range = both_range[:-1] - elif inclusive_endpoints_fixture == "neither" and left_match: - expected_range = both_range[1:] - elif inclusive_endpoints_fixture == "both": - expected_range = both_range[:] - else: - expected_range = both_range[:] + expected_range = _get_expected_range( + begintz, + endtz, + both_range, + inclusive_endpoints_fixture, + ) tm.assert_index_equal(expected_range, result_range) From 2c5d65d8f5ad539a26dd074fdef77725077066e7 Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Thu, 16 Sep 2021 01:10:09 +0800 Subject: [PATCH 8/9] closed -> inclusive in tests --- pandas/tests/groupby/test_timegrouper.py | 2 +- pandas/tests/resample/test_period_index.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index e9451770054c0..d4b21633309db 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -100,7 +100,7 @@ def test_groupby_with_timegrouper(self): expected = DataFrame( {"Quantity": 0}, index=date_range( - "20130901", "20131205", freq="5D", name="Date", closed="left" + "20130901", "20131205", freq="5D", name="Date", inclusive="left" ), ) expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 5654cac5e5462..70d37f83c7f0c 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -335,7 +335,7 @@ def test_resample_nonexistent_time_bin_edge(self): freq="D", tz="America/Sao_Paulo", nonexistent="shift_forward", - closed="left", + inclusive="left", ) tm.assert_index_equal(result.index, expected) From 4b85743916be05492350c23f45dfa2ce1694a081 Mon Sep 17 00:00:00 2001 From: Zheyuan Chen Date: Thu, 16 Sep 2021 02:04:58 +0800 Subject: [PATCH 9/9] docstring grammar --- pandas/core/indexes/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d1aa89ac3c5fa..d556466554ea4 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -922,7 +922,7 @@ def date_range( the 'left', 'right', or both sides (None, the default). .. deprecated:: 1.4.0 - Argument `closed` have been deprecated to standardize boundary inputs. + Argument `closed` has been deprecated to standardize boundary inputs. Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. @@ -1124,7 +1124,7 @@ def bdate_range( the 'left', 'right', or both sides (None). .. deprecated:: 1.4.0 - Argument `closed` have been deprecated to standardize boundary inputs. + Argument `closed` has been deprecated to standardize boundary inputs. Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open.