Skip to content

Commit c0046b6

Browse files
committed
BUG: updated rolling and expanding count for consistency (GH26996)
Updated the behavior of rolling and expanding count so that it becomes consistent with all other rolling and expanding functions. Also updated many test cases to reflect this change of behavior.
1 parent be0926b commit c0046b6

File tree

6 files changed

+55
-36
lines changed

6 files changed

+55
-36
lines changed

pandas/core/window/rolling.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -1186,13 +1186,10 @@ def count(self):
11861186
window = self._get_window()
11871187
window = min(window, len(obj)) if not self.center else window
11881188

1189-
# We set the default value min_periods to be 0 because count method
1190-
# is meant to count NAs, we don't want it by default requires all
1191-
# values in the window to be valid to produce a valid count
1192-
min_periods = 0 if self.min_periods is None else self.min_periods
1193-
1194-
# this is required as window is mutate above
1195-
min_periods = min(min_periods, window)
1189+
min_periods = self.min_periods
1190+
if min_periods is not None and not self.center:
1191+
# this is required as window is mutated above
1192+
min_periods = min(min_periods, window)
11961193

11971194
results = []
11981195
for b in blocks:
@@ -1665,7 +1662,11 @@ def _get_cov(X, Y):
16651662
mean = lambda x: x.rolling(
16661663
window, self.min_periods, center=self.center
16671664
).mean(**kwargs)
1668-
count = (X + Y).rolling(window=window, center=self.center).count(**kwargs)
1665+
count = (
1666+
(X + Y)
1667+
.rolling(window=window, min_periods=0, center=self.center)
1668+
.count(**kwargs)
1669+
)
16691670
bias_adj = count / (count - ddof)
16701671
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
16711672

pandas/tests/window/moments/test_moments_expanding.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ def test_expanding_corr(self):
4040
tm.assert_almost_equal(rolling_result, result)
4141

4242
def test_expanding_count(self):
43-
result = self.series.expanding().count()
43+
result = self.series.expanding(min_periods=0).count()
4444
tm.assert_almost_equal(
45-
result, self.series.rolling(window=len(self.series)).count()
45+
result, self.series.rolling(window=len(self.series), min_periods=0).count()
4646
)
4747

4848
def test_expanding_quantile(self):
@@ -369,7 +369,7 @@ def test_expanding_consistency(self, min_periods):
369369
)
370370
self._test_moments_consistency(
371371
min_periods=min_periods,
372-
count=lambda x: x.expanding().count(),
372+
count=lambda x: x.expanding(min_periods=min_periods).count(),
373373
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
374374
corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y),
375375
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),

pandas/tests/window/moments/test_moments_rolling.py

+35-17
Original file line numberDiff line numberDiff line change
@@ -777,8 +777,8 @@ def get_result(obj, window, min_periods=None, center=False):
777777
series_result = get_result(series, window=win, min_periods=minp)
778778
frame_result = get_result(frame, window=win, min_periods=minp)
779779
else:
780-
series_result = get_result(series, window=win)
781-
frame_result = get_result(frame, window=win)
780+
series_result = get_result(series, window=win, min_periods=0)
781+
frame_result = get_result(frame, window=win, min_periods=0)
782782

783783
last_date = series_result.index[-1]
784784
prev_date = last_date - 24 * offsets.BDay()
@@ -851,10 +851,11 @@ def get_result(obj, window, min_periods=None, center=False):
851851
pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15
852852
)[9:].reset_index(drop=True)
853853
else:
854-
result = get_result(obj, 20, center=True)
855-
expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20)[
856-
9:
857-
].reset_index(drop=True)
854+
result = get_result(obj, 20, min_periods=0, center=True)
855+
print(result)
856+
expected = get_result(
857+
pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0
858+
)[9:].reset_index(drop=True)
858859

859860
tm.assert_series_equal(result, expected)
860861

@@ -893,21 +894,27 @@ def get_result(obj, window, min_periods=None, center=False):
893894
else:
894895
series_xp = (
895896
get_result(
896-
self.series.reindex(list(self.series.index) + s), window=25
897+
self.series.reindex(list(self.series.index) + s),
898+
window=25,
899+
min_periods=0,
897900
)
898901
.shift(-12)
899902
.reindex(self.series.index)
900903
)
901904
frame_xp = (
902905
get_result(
903-
self.frame.reindex(list(self.frame.index) + s), window=25
906+
self.frame.reindex(list(self.frame.index) + s),
907+
window=25,
908+
min_periods=0,
904909
)
905910
.shift(-12)
906911
.reindex(self.frame.index)
907912
)
908913

909-
series_rs = get_result(self.series, window=25, center=True)
910-
frame_rs = get_result(self.frame, window=25, center=True)
914+
series_rs = get_result(
915+
self.series, window=25, min_periods=0, center=True
916+
)
917+
frame_rs = get_result(self.frame, window=25, min_periods=0, center=True)
911918

912919
if fill_value is not None:
913920
series_xp = series_xp.fillna(fill_value)
@@ -964,7 +971,11 @@ def test_rolling_consistency(self, window, min_periods, center):
964971

965972
self._test_moments_consistency_is_constant(
966973
min_periods=min_periods,
967-
count=lambda x: (x.rolling(window=window, center=center).count()),
974+
count=lambda x: (
975+
x.rolling(
976+
window=window, min_periods=min_periods, center=center
977+
).count()
978+
),
968979
mean=lambda x: (
969980
x.rolling(
970981
window=window, min_periods=min_periods, center=center
@@ -989,19 +1000,26 @@ def test_rolling_consistency(self, window, min_periods, center):
9891000
).var(ddof=0)
9901001
),
9911002
var_debiasing_factors=lambda x: (
992-
x.rolling(window=window, center=center)
1003+
x.rolling(window=window, min_periods=min_periods, center=center)
9931004
.count()
9941005
.divide(
995-
(x.rolling(window=window, center=center).count() - 1.0).replace(
996-
0.0, np.nan
997-
)
1006+
(
1007+
x.rolling(
1008+
window=window, min_periods=min_periods, center=center
1009+
).count()
1010+
- 1.0
1011+
).replace(0.0, np.nan)
9981012
)
9991013
),
10001014
)
10011015

10021016
self._test_moments_consistency(
10031017
min_periods=min_periods,
1004-
count=lambda x: (x.rolling(window=window, center=center).count()),
1018+
count=lambda x: (
1019+
x.rolling(
1020+
window=window, min_periods=min_periods, center=center
1021+
).count()
1022+
),
10051023
mean=lambda x: (
10061024
x.rolling(
10071025
window=window, min_periods=min_periods, center=center
@@ -1071,7 +1089,7 @@ def test_rolling_consistency(self, window, min_periods, center):
10711089
if name == "count":
10721090
rolling_f_result = rolling_f()
10731091
rolling_apply_f_result = x.rolling(
1074-
window=window, min_periods=0, center=center
1092+
window=window, min_periods=min_periods, center=center
10751093
).apply(func=f, raw=True)
10761094
else:
10771095
if name in ["cov", "corr"]:

pandas/tests/window/test_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,10 @@ def test_count_nonnumeric_types(self):
237237
columns=cols,
238238
)
239239

240-
result = df.rolling(window=2).count()
240+
result = df.rolling(window=2, min_periods=0).count()
241241
tm.assert_frame_equal(result, expected)
242242

243-
result = df.rolling(1).count()
243+
result = df.rolling(1, min_periods=0).count()
244244
expected = df.notna().astype(float)
245245
tm.assert_frame_equal(result, expected)
246246

pandas/tests/window/test_dtypes.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class Dtype:
3434
def get_expects(self):
3535
expects = {
3636
"sr1": {
37-
"count": Series([1, 2, 2, 2, 2], dtype="float64"),
37+
"count": Series([np.nan, 2, 2, 2, 2], dtype="float64"),
3838
"max": Series([np.nan, 1, 2, 3, 4], dtype="float64"),
3939
"min": Series([np.nan, 0, 1, 2, 3], dtype="float64"),
4040
"sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"),
@@ -44,7 +44,7 @@ def get_expects(self):
4444
"median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"),
4545
},
4646
"sr2": {
47-
"count": Series([1, 2, 2, 2, 2], dtype="float64"),
47+
"count": Series([np.nan, 2, 2, 2, 2], dtype="float64"),
4848
"max": Series([np.nan, 10, 8, 6, 4], dtype="float64"),
4949
"min": Series([np.nan, 8, 6, 4, 2], dtype="float64"),
5050
"sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"),
@@ -54,7 +54,7 @@ def get_expects(self):
5454
"median": Series([np.nan, 9, 7, 5, 3], dtype="float64"),
5555
},
5656
"sr3": {
57-
"count": Series([1, 2, 2, 1, 1], dtype="float64"),
57+
"count": Series([np.nan, 2, 2, 1, 1], dtype="float64"),
5858
"max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"),
5959
"min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"),
6060
"sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"),
@@ -67,7 +67,7 @@ def get_expects(self):
6767
},
6868
"df": {
6969
"count": DataFrame(
70-
{0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])},
70+
{0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
7171
dtype="float64",
7272
),
7373
"max": DataFrame(

pandas/tests/window/test_rolling.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def test_rolling_axis_count(self, axis_frame):
344344
else:
345345
expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]})
346346

347-
result = df.rolling(2, axis=axis_frame).count()
347+
result = df.rolling(2, axis=axis_frame, min_periods=0).count()
348348
tm.assert_frame_equal(result, expected)
349349

350350
def test_readonly_array(self):
@@ -469,7 +469,7 @@ def test_rolling_count_default_min_periods_with_null_values(test_series):
469469
# we want to by default produce a valid count even if
470470
# there are very few valid entries in the window
471471
values = [1, 2, 3, np.nan, 4, 5, 6]
472-
expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0]
472+
expected_counts = [np.nan, np.nan, 3.0, 2.0, 2.0, 2.0, 3.0]
473473

474474
if test_series:
475475
ser = Series(values)

0 commit comments

Comments
 (0)