From 398bf7d73ad019a3a9c611660acccde301fd3945 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sun, 12 Jan 2020 00:26:53 +0800 Subject: [PATCH 01/11] BUG: Series rolling count ignores min_periods (GH26996) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/window/rolling.py | 4 +++- pandas/tests/window/test_expanding.py | 7 +++++++ pandas/tests/window/test_rolling.py | 7 +++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5f79accc5c679..d2ef90945a842 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1112,6 +1112,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f612826132fd7..a89991dbf14af 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1185,6 +1185,8 @@ def count(self): window = self._get_window() window = min(window, len(obj)) if not self.center else window + min_periods = self.min_periods if self.min_periods is not None else 0 + min_periods = min(min_periods, len(obj)) if not self.center else min_periods results = [] for b in blocks: @@ -1192,7 +1194,7 @@ def count(self): result = self._constructor( result, window=window, - min_periods=0, + min_periods=min_periods, center=self.center, axis=self.axis, closed=self.closed, diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index fc4bd50f25c73..58ad20e473560 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -113,3 +113,10 @@ def test_expanding_axis(self, axis_frame): result = df.expanding(3, axis=axis_frame).sum() tm.assert_frame_equal(result, expected) + + +def test_expanding_count_with_min_periods(): + # GH 26996 + result = Series(range(5)).expanding(min_periods=3).count() + expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 04fab93b71c4a..26606973f5210 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -426,3 +426,10 @@ def test_min_periods1(): result = df["a"].rolling(3, center=True, min_periods=1).max() expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") tm.assert_series_equal(result, expected) + + +def test_rolling_count_with_min_periods(): + # GH 26996 + result = Series(range(5)).rolling(3, min_periods=3).count() + expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_series_equal(result, expected) From 47f4e61f27ab28a2f2bbece5f3597272214bfcb6 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 17 Jan 2020 23:53:34 +0800 Subject: [PATCH 02/11] ENH: handles min_periods argument in rolling.count (GH26996) --- pandas/core/window/rolling.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 43dd9a911603b..c79394a79974b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1185,8 +1185,14 @@ def count(self): window = self._get_window() window = min(window, len(obj)) if not self.center else window - min_periods = self.min_periods if self.min_periods is not None else 0 - min_periods = min(min_periods, len(obj)) if not self.center else min_periods + + # We set the default value min_periods to be 0 because count method + # is meant to count NAs, we don't want it by default requires all + # values in the window to be valid to produce a valid count + min_periods = 0 if self.min_periods is None else self.min_periods + + # this is required as window is mutate above + min_periods = min(min_periods, window) results = [] for b in blocks: From df2a3e926b1dc7f1642d67e7d1bddb5af5482c32 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sat, 18 Jan 2020 23:52:20 +0800 Subject: [PATCH 03/11] DOC: moved whatsnew to V1.1.0 --- doc/source/whatsnew/v1.0.0.rst | 1 - doc/source/whatsnew/v1.1.0.rst | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ed508b609fc09..fa562838c8f7c 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1112,7 +1112,6 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) -- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) Reshaping ^^^^^^^^^ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index b5a7b19f160a4..e18a6331751e5 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -133,9 +133,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- -- - +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) Reshaping ^^^^^^^^^ From bfe10f08696d0073db27bac898d6629fbef09f5d Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 21 Jan 2020 10:46:12 +0800 Subject: [PATCH 04/11] TST: added more tests (GH26996) --- doc/source/whatsnew/v1.0.0.rst | 1 + doc/source/whatsnew/v1.1.0.rst | 3 ++- pandas/tests/window/test_rolling.py | 36 +++++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3bd86bb02155f..00bfe5af828dc 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1131,6 +1131,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) Reshaping ^^^^^^^^^ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a979b12c18f16..2711db0e60eaa 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -133,7 +133,8 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) +- +- Reshaping ^^^^^^^^^ diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 719809dc8cfd6..b1ba19e4b7688 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -448,8 +448,36 @@ def test_min_periods1(): tm.assert_series_equal(result, expected) -def test_rolling_count_with_min_periods(): +@pytest.mark.parametrize("test_series", [True, False]) +def test_rolling_count_with_min_periods(test_series): # GH 26996 - result = Series(range(5)).rolling(3, min_periods=3).count() - expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0]) - tm.assert_series_equal(result, expected) + if test_series: + result = Series(range(5)).rolling(3, min_periods=3).count() + expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_series_equal(result, expected) + else: + result = DataFrame(range(5)).rolling(3, min_periods=3).count() + expected = DataFrame([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +def test_rolling_count_default_min_periods_with_null_values(test_series): + # GH 26996 + # We need rolling count to have default min_periods=0, + # as the method is meant to count how many non-null values, + # we want to by default produce a valid count even if + # there are very few valid entries in the window + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] + + if test_series: + ser = Series(values) + result = ser.rolling(3).count() + expected = Series(expected_counts) + tm.assert_series_equal(result, expected) + else: + df = DataFrame(values) + result = df.rolling(3).count() + expected = DataFrame(expected_counts) + tm.assert_frame_equal(result, expected) From c0046b6f55c68ffc9d63bf8624fc1e926a4b00df Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 21 Jan 2020 23:11:18 +0800 Subject: [PATCH 05/11] BUG: updated rolling and expanding count for consistency (GH26996) Updated the behavior of rolling and expanding count so that it becomes consistent with all other rolling and expanding functions. Also updated many test cases to reflect this change of behavior. --- pandas/core/window/rolling.py | 17 +++--- .../window/moments/test_moments_expanding.py | 6 +-- .../window/moments/test_moments_rolling.py | 52 +++++++++++++------ pandas/tests/window/test_api.py | 4 +- pandas/tests/window/test_dtypes.py | 8 +-- pandas/tests/window/test_rolling.py | 4 +- 6 files changed, 55 insertions(+), 36 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c79394a79974b..33a9405786050 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1186,13 +1186,10 @@ def count(self): window = self._get_window() window = min(window, len(obj)) if not self.center else window - # We set the default value min_periods to be 0 because count method - # is meant to count NAs, we don't want it by default requires all - # values in the window to be valid to produce a valid count - min_periods = 0 if self.min_periods is None else self.min_periods - - # this is required as window is mutate above - min_periods = min(min_periods, window) + min_periods = self.min_periods + if min_periods is not None and not self.center: + # this is required as window is mutated above + min_periods = min(min_periods, window) results = [] for b in blocks: @@ -1665,7 +1662,11 @@ def _get_cov(X, Y): mean = lambda x: x.rolling( window, self.min_periods, center=self.center ).mean(**kwargs) - count = (X + Y).rolling(window=window, center=self.center).count(**kwargs) + count = ( + (X + Y) + .rolling(window=window, min_periods=0, center=self.center) + .count(**kwargs) + ) bias_adj = count / (count - ddof) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj diff --git a/pandas/tests/window/moments/test_moments_expanding.py b/pandas/tests/window/moments/test_moments_expanding.py index 322082187f531..9dfaecee9caeb 100644 --- a/pandas/tests/window/moments/test_moments_expanding.py +++ b/pandas/tests/window/moments/test_moments_expanding.py @@ -40,9 +40,9 @@ def test_expanding_corr(self): tm.assert_almost_equal(rolling_result, result) def test_expanding_count(self): - result = self.series.expanding().count() + result = self.series.expanding(min_periods=0).count() tm.assert_almost_equal( - result, self.series.rolling(window=len(self.series)).count() + result, self.series.rolling(window=len(self.series), min_periods=0).count() ) def test_expanding_quantile(self): @@ -369,7 +369,7 @@ def test_expanding_consistency(self, min_periods): ) self._test_moments_consistency( min_periods=min_periods, - count=lambda x: x.expanding().count(), + count=lambda x: x.expanding(min_periods=min_periods).count(), mean=lambda x: x.expanding(min_periods=min_periods).mean(), corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 9acb4ffcb40b8..2de3c25b6d78e 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -777,8 +777,8 @@ def get_result(obj, window, min_periods=None, center=False): series_result = get_result(series, window=win, min_periods=minp) frame_result = get_result(frame, window=win, min_periods=minp) else: - series_result = get_result(series, window=win) - frame_result = get_result(frame, window=win) + series_result = get_result(series, window=win, min_periods=0) + frame_result = get_result(frame, window=win, min_periods=0) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() @@ -851,10 +851,11 @@ def get_result(obj, window, min_periods=None, center=False): pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 )[9:].reset_index(drop=True) else: - result = get_result(obj, 20, center=True) - expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20)[ - 9: - ].reset_index(drop=True) + result = get_result(obj, 20, min_periods=0, center=True) + print(result) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 + )[9:].reset_index(drop=True) tm.assert_series_equal(result, expected) @@ -893,21 +894,27 @@ def get_result(obj, window, min_periods=None, center=False): else: series_xp = ( get_result( - self.series.reindex(list(self.series.index) + s), window=25 + self.series.reindex(list(self.series.index) + s), + window=25, + min_periods=0, ) .shift(-12) .reindex(self.series.index) ) frame_xp = ( get_result( - self.frame.reindex(list(self.frame.index) + s), window=25 + self.frame.reindex(list(self.frame.index) + s), + window=25, + min_periods=0, ) .shift(-12) .reindex(self.frame.index) ) - series_rs = get_result(self.series, window=25, center=True) - frame_rs = get_result(self.frame, window=25, center=True) + series_rs = get_result( + self.series, window=25, min_periods=0, center=True + ) + frame_rs = get_result(self.frame, window=25, min_periods=0, center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) @@ -964,7 +971,11 @@ def test_rolling_consistency(self, window, min_periods, center): self._test_moments_consistency_is_constant( min_periods=min_periods, - count=lambda x: (x.rolling(window=window, center=center).count()), + count=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ), mean=lambda x: ( x.rolling( window=window, min_periods=min_periods, center=center @@ -989,19 +1000,26 @@ def test_rolling_consistency(self, window, min_periods, center): ).var(ddof=0) ), var_debiasing_factors=lambda x: ( - x.rolling(window=window, center=center) + x.rolling(window=window, min_periods=min_periods, center=center) .count() .divide( - (x.rolling(window=window, center=center).count() - 1.0).replace( - 0.0, np.nan - ) + ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + - 1.0 + ).replace(0.0, np.nan) ) ), ) self._test_moments_consistency( min_periods=min_periods, - count=lambda x: (x.rolling(window=window, center=center).count()), + count=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ), mean=lambda x: ( x.rolling( window=window, min_periods=min_periods, center=center @@ -1071,7 +1089,7 @@ def test_rolling_consistency(self, window, min_periods, center): if name == "count": rolling_f_result = rolling_f() rolling_apply_f_result = x.rolling( - window=window, min_periods=0, center=center + window=window, min_periods=min_periods, center=center ).apply(func=f, raw=True) else: if name in ["cov", "corr"]: diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 5e70e13209de5..680237db0535b 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -237,10 +237,10 @@ def test_count_nonnumeric_types(self): columns=cols, ) - result = df.rolling(window=2).count() + result = df.rolling(window=2, min_periods=0).count() tm.assert_frame_equal(result, expected) - result = df.rolling(1).count() + result = df.rolling(1, min_periods=0).count() expected = df.notna().astype(float) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index b1c9b66ab09d3..35f93b1262f59 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -34,7 +34,7 @@ class Dtype: def get_expects(self): expects = { "sr1": { - "count": Series([1, 2, 2, 2, 2], dtype="float64"), + "count": Series([np.nan, 2, 2, 2, 2], dtype="float64"), "max": Series([np.nan, 1, 2, 3, 4], dtype="float64"), "min": Series([np.nan, 0, 1, 2, 3], dtype="float64"), "sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"), @@ -44,7 +44,7 @@ def get_expects(self): "median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), }, "sr2": { - "count": Series([1, 2, 2, 2, 2], dtype="float64"), + "count": Series([np.nan, 2, 2, 2, 2], dtype="float64"), "max": Series([np.nan, 10, 8, 6, 4], dtype="float64"), "min": Series([np.nan, 8, 6, 4, 2], dtype="float64"), "sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"), @@ -54,7 +54,7 @@ def get_expects(self): "median": Series([np.nan, 9, 7, 5, 3], dtype="float64"), }, "sr3": { - "count": Series([1, 2, 2, 1, 1], dtype="float64"), + "count": Series([np.nan, 2, 2, 1, 1], dtype="float64"), "max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"), "min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"), "sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"), @@ -67,7 +67,7 @@ def get_expects(self): }, "df": { "count": DataFrame( - {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, + {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, dtype="float64", ), "max": DataFrame( diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index b1ba19e4b7688..47429741164f3 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -344,7 +344,7 @@ def test_rolling_axis_count(self, axis_frame): else: expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) - result = df.rolling(2, axis=axis_frame).count() + result = df.rolling(2, axis=axis_frame, min_periods=0).count() tm.assert_frame_equal(result, expected) def test_readonly_array(self): @@ -469,7 +469,7 @@ def test_rolling_count_default_min_periods_with_null_values(test_series): # we want to by default produce a valid count even if # there are very few valid entries in the window values = [1, 2, 3, np.nan, 4, 5, 6] - expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] + expected_counts = [np.nan, np.nan, 3.0, 2.0, 2.0, 2.0, 3.0] if test_series: ser = Series(values) From 8807ba16b356a829dee6a86e2865407d9b0492b0 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 22 Jan 2020 23:07:40 +0800 Subject: [PATCH 06/11] CLN: further cleaned code (GH26996) --- pandas/core/window/rolling.py | 13 ++----------- pandas/tests/window/moments/test_moments_rolling.py | 4 ++-- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 33a9405786050..08cbc5064d2db 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1182,22 +1182,13 @@ class _Rolling_and_Expanding(_Rolling): def count(self): blocks, obj = self._create_blocks() - - window = self._get_window() - window = min(window, len(obj)) if not self.center else window - - min_periods = self.min_periods - if min_periods is not None and not self.center: - # this is required as window is mutated above - min_periods = min(min_periods, window) - results = [] for b in blocks: result = b.notna().astype(int) result = self._constructor( result, - window=window, - min_periods=min_periods, + window=self._get_window(), + min_periods=self.min_periods, center=self.center, axis=self.axis, closed=self.closed, diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 2de3c25b6d78e..83e4ee25558b5 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -835,8 +835,8 @@ def get_result(obj, window, min_periods=None, center=False): nan_mask = ~nan_mask tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) else: - result = get_result(self.series, len(self.series) + 1) - expected = get_result(self.series, len(self.series)) + result = get_result(self.series, len(self.series) + 1, min_periods=0) + expected = get_result(self.series, len(self.series), min_periods=0) nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) From bcce1291eaf4a7ebd085d23dd0e0cd6c9ded2c3f Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 24 Jan 2020 14:20:15 +0800 Subject: [PATCH 07/11] TST: cleaned up tests (GH26996) --- pandas/tests/window/test_rolling.py | 32 +++++++++-------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 47429741164f3..1f4289bf6fad9 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -448,21 +448,16 @@ def test_min_periods1(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("test_series", [True, False]) -def test_rolling_count_with_min_periods(test_series): +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_count_with_min_periods(constructor): # GH 26996 - if test_series: - result = Series(range(5)).rolling(3, min_periods=3).count() - expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0]) - tm.assert_series_equal(result, expected) - else: - result = DataFrame(range(5)).rolling(3, min_periods=3).count() - expected = DataFrame([np.nan, np.nan, 3.0, 3.0, 3.0]) - tm.assert_frame_equal(result, expected) + result = constructor(range(5)).rolling(3, min_periods=3).count() + expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_equal(result, expected) -@pytest.mark.parametrize("test_series", [True, False]) -def test_rolling_count_default_min_periods_with_null_values(test_series): +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_count_default_min_periods_with_null_values(constructor): # GH 26996 # We need rolling count to have default min_periods=0, # as the method is meant to count how many non-null values, @@ -471,13 +466,6 @@ def test_rolling_count_default_min_periods_with_null_values(test_series): values = [1, 2, 3, np.nan, 4, 5, 6] expected_counts = [np.nan, np.nan, 3.0, 2.0, 2.0, 2.0, 3.0] - if test_series: - ser = Series(values) - result = ser.rolling(3).count() - expected = Series(expected_counts) - tm.assert_series_equal(result, expected) - else: - df = DataFrame(values) - result = df.rolling(3).count() - expected = DataFrame(expected_counts) - tm.assert_frame_equal(result, expected) + result = constructor(values).rolling(3).count() + expected = constructor(expected_counts) + tm.assert_equal(result, expected) From 1a4352b5bcd4aa0eb01e25db21114bc66f434573 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 24 Jan 2020 15:28:38 +0800 Subject: [PATCH 08/11] BUG: changed min_periods default to 0 for rolling and expanding (GH26996) --- pandas/_libs/tslibs/timestamps.pyx | 3 +++ pandas/core/window/rolling.py | 2 +- pandas/tests/window/test_dtypes.py | 8 ++++---- pandas/tests/window/test_expanding.py | 20 ++++++++++++++++---- pandas/tests/window/test_rolling.py | 6 +----- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 36566b55e74ad..82ce99bbae285 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -375,11 +375,13 @@ class Timestamp(_Timestamp): # Mixing pydatetime positional and keyword arguments is forbidden! cdef _TSObject ts + print('haha') _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] if tzinfo is not None: + print('tzinfo not None') if not PyTZInfo_Check(tzinfo): # tzinfo must be a datetime.tzinfo object, GH#17690 raise TypeError( @@ -392,6 +394,7 @@ class Timestamp(_Timestamp): tz, tzinfo = tzinfo, None if isinstance(ts_input, str): + print('ts_input is str') # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 08cbc5064d2db..fc0431d01cba7 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1188,7 +1188,7 @@ def count(self): result = self._constructor( result, window=self._get_window(), - min_periods=self.min_periods, + min_periods=self.min_periods if self.min_periods else 0, center=self.center, axis=self.axis, closed=self.closed, diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index 35f93b1262f59..b1c9b66ab09d3 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -34,7 +34,7 @@ class Dtype: def get_expects(self): expects = { "sr1": { - "count": Series([np.nan, 2, 2, 2, 2], dtype="float64"), + "count": Series([1, 2, 2, 2, 2], dtype="float64"), "max": Series([np.nan, 1, 2, 3, 4], dtype="float64"), "min": Series([np.nan, 0, 1, 2, 3], dtype="float64"), "sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"), @@ -44,7 +44,7 @@ def get_expects(self): "median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), }, "sr2": { - "count": Series([np.nan, 2, 2, 2, 2], dtype="float64"), + "count": Series([1, 2, 2, 2, 2], dtype="float64"), "max": Series([np.nan, 10, 8, 6, 4], dtype="float64"), "min": Series([np.nan, 8, 6, 4, 2], dtype="float64"), "sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"), @@ -54,7 +54,7 @@ def get_expects(self): "median": Series([np.nan, 9, 7, 5, 3], dtype="float64"), }, "sr3": { - "count": Series([np.nan, 2, 2, 1, 1], dtype="float64"), + "count": Series([1, 2, 2, 1, 1], dtype="float64"), "max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"), "min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"), "sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"), @@ -67,7 +67,7 @@ def get_expects(self): }, "df": { "count": DataFrame( - {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, + {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, dtype="float64", ), "max": DataFrame( diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 58ad20e473560..6b6367fd80b26 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -115,8 +115,20 @@ def test_expanding_axis(self, axis_frame): tm.assert_frame_equal(result, expected) -def test_expanding_count_with_min_periods(): +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_expanding_count_with_min_periods(constructor): # GH 26996 - result = Series(range(5)).expanding(min_periods=3).count() - expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0]) - tm.assert_series_equal(result, expected) + result = constructor(range(5)).expanding(min_periods=3).count() + expected = constructor([np.nan, np.nan, 3.0, 4.0, 5.0]) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_expanding_count_default_min_periods_with_null_values(constructor): + # GH 26996 + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0] + + result = constructor(values).expanding().count() + expected = constructor(expected_counts) + tm.assert_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 1f4289bf6fad9..ab2c7fcb7a0dc 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -459,12 +459,8 @@ def test_rolling_count_with_min_periods(constructor): @pytest.mark.parametrize("constructor", [Series, DataFrame]) def test_rolling_count_default_min_periods_with_null_values(constructor): # GH 26996 - # We need rolling count to have default min_periods=0, - # as the method is meant to count how many non-null values, - # we want to by default produce a valid count even if - # there are very few valid entries in the window values = [1, 2, 3, np.nan, 4, 5, 6] - expected_counts = [np.nan, np.nan, 3.0, 2.0, 2.0, 2.0, 3.0] + expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] result = constructor(values).rolling(3).count() expected = constructor(expected_counts) From 9fa453174bcd03f7a57d39b406cdef674a8269bb Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 24 Jan 2020 15:30:38 +0800 Subject: [PATCH 09/11] BUG: reverted non-relevant changes accidentally added (GH26996) --- pandas/_libs/tslibs/timestamps.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 82ce99bbae285..36566b55e74ad 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -375,13 +375,11 @@ class Timestamp(_Timestamp): # Mixing pydatetime positional and keyword arguments is forbidden! cdef _TSObject ts - print('haha') _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] if tzinfo is not None: - print('tzinfo not None') if not PyTZInfo_Check(tzinfo): # tzinfo must be a datetime.tzinfo object, GH#17690 raise TypeError( @@ -394,7 +392,6 @@ class Timestamp(_Timestamp): tz, tzinfo = tzinfo, None if isinstance(ts_input, str): - print('ts_input is str') # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): From 987c033eb45889ccf83eb1590afb31d44012a14d Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sat, 25 Jan 2020 09:40:38 +0800 Subject: [PATCH 10/11] BUG: small change in whatsnew (GH26996) --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 55c975a23f7ba..b06ed684cd525 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1132,7 +1132,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) -- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`) Reshaping ^^^^^^^^^ From a00532a8cd1b73c854e40819a7ca5dd2fa48fd25 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sun, 26 Jan 2020 00:50:10 +0800 Subject: [PATCH 11/11] CLN: slight cleanup (GH26996) --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index fc0431d01cba7..580c7cc0554d0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1188,7 +1188,7 @@ def count(self): result = self._constructor( result, window=self._get_window(), - min_periods=self.min_periods if self.min_periods else 0, + min_periods=self.min_periods or 0, center=self.center, axis=self.axis, closed=self.closed,