Skip to content

TST: Move some consistency expanding tests to test_expanding #44163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
362 changes: 1 addition & 361 deletions pandas/tests/window/moments/test_moments_consistency_expanding.py
Original file line number Diff line number Diff line change
@@ -1,171 +1,10 @@
import numpy as np
import pytest

from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
isna,
notna,
)
from pandas import Series
import pandas._testing as tm


def test_expanding_corr(series):
A = series.dropna()
B = (A + np.random.randn(len(A)))[:-5]

result = A.expanding().corr(B)

rolling_result = A.rolling(window=len(A), min_periods=1).corr(B)

tm.assert_almost_equal(rolling_result, result)


def test_expanding_count(series):
result = series.expanding(min_periods=0).count()
tm.assert_almost_equal(
result, series.rolling(window=len(series), min_periods=0).count()
)


def test_expanding_quantile(series):
result = series.expanding().quantile(0.5)

rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5)

tm.assert_almost_equal(result, rolling_result)


def test_expanding_cov(series):
A = series
B = (A + np.random.randn(len(A)))[:-5]

result = A.expanding().cov(B)

rolling_result = A.rolling(window=len(A), min_periods=1).cov(B)

tm.assert_almost_equal(rolling_result, result)


def test_expanding_cov_pairwise(frame):
result = frame.expanding().cov()

rolling_result = frame.rolling(window=len(frame), min_periods=1).cov()

tm.assert_frame_equal(result, rolling_result)


def test_expanding_corr_pairwise(frame):
result = frame.expanding().corr()

rolling_result = frame.rolling(window=len(frame), min_periods=1).corr()
tm.assert_frame_equal(result, rolling_result)


@pytest.mark.parametrize(
"func,static_comp",
[("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
ids=["sum", "mean", "max", "min"],
)
def test_expanding_func(func, static_comp, frame_or_series):
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
result = getattr(data.expanding(min_periods=1, axis=0), func)()
assert isinstance(result, frame_or_series)

if frame_or_series is Series:
tm.assert_almost_equal(result[10], static_comp(data[:11]))
else:
tm.assert_series_equal(
result.iloc[10], static_comp(data[:11]), check_names=False
)


@pytest.mark.parametrize(
"func,static_comp",
[("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
ids=["sum", "mean", "max", "min"],
)
def test_expanding_min_periods(func, static_comp):
ser = Series(np.random.randn(50))

result = getattr(ser.expanding(min_periods=30, axis=0), func)()
assert result[:29].isna().all()
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))

# min_periods is working correctly
result = getattr(ser.expanding(min_periods=15, axis=0), func)()
assert isna(result.iloc[13])
assert notna(result.iloc[14])

ser2 = Series(np.random.randn(20))
result = getattr(ser2.expanding(min_periods=5, axis=0), func)()
assert isna(result[3])
assert notna(result[4])

# min_periods=0
result0 = getattr(ser.expanding(min_periods=0, axis=0), func)()
result1 = getattr(ser.expanding(min_periods=1, axis=0), func)()
tm.assert_almost_equal(result0, result1)

result = getattr(ser.expanding(min_periods=1, axis=0), func)()
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))


def test_expanding_apply(engine_and_raw, frame_or_series):
engine, raw = engine_and_raw
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
result = data.expanding(min_periods=1).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
assert isinstance(result, frame_or_series)

if frame_or_series is Series:
tm.assert_almost_equal(result[9], np.mean(data[:11]))
else:
tm.assert_series_equal(result.iloc[9], np.mean(data[:11]), check_names=False)


def test_expanding_min_periods_apply(engine_and_raw):
engine, raw = engine_and_raw
ser = Series(np.random.randn(50))

result = ser.expanding(min_periods=30).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
assert result[:29].isna().all()
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))

# min_periods is working correctly
result = ser.expanding(min_periods=15).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
assert isna(result.iloc[13])
assert notna(result.iloc[14])

ser2 = Series(np.random.randn(20))
result = ser2.expanding(min_periods=5).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
assert isna(result[3])
assert notna(result[4])

# min_periods=0
result0 = ser.expanding(min_periods=0).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
result1 = ser.expanding(min_periods=1).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
tm.assert_almost_equal(result0, result1)

result = ser.expanding(min_periods=1).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))


@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum])
def test_expanding_apply_consistency_sum_nans(consistency_data, min_periods, f):
Expand Down Expand Up @@ -334,202 +173,3 @@ def test_expanding_consistency_var_debiasing_factors(consistency_data, min_perio
x.expanding().count() - 1.0
).replace(0.0, np.nan)
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)


@pytest.mark.parametrize(
"f",
[
lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)),
lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)),
],
)
def test_moment_functions_zero_length_pairwise(f):

df1 = DataFrame()
df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
df2["a"] = df2["a"].astype("float64")

df1_expected = DataFrame(
index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([])
)
df2_expected = DataFrame(
index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
columns=Index(["a"], name="foo"),
dtype="float64",
)

df1_result = f(df1)
tm.assert_frame_equal(df1_result, df1_expected)

df2_result = f(df2)
tm.assert_frame_equal(df2_result, df2_expected)


@pytest.mark.parametrize(
"f",
[
lambda x: x.expanding().count(),
lambda x: x.expanding(min_periods=5).cov(x, pairwise=False),
lambda x: x.expanding(min_periods=5).corr(x, pairwise=False),
lambda x: x.expanding(min_periods=5).max(),
lambda x: x.expanding(min_periods=5).min(),
lambda x: x.expanding(min_periods=5).sum(),
lambda x: x.expanding(min_periods=5).mean(),
lambda x: x.expanding(min_periods=5).std(),
lambda x: x.expanding(min_periods=5).var(),
lambda x: x.expanding(min_periods=5).skew(),
lambda x: x.expanding(min_periods=5).kurt(),
lambda x: x.expanding(min_periods=5).quantile(0.5),
lambda x: x.expanding(min_periods=5).median(),
lambda x: x.expanding(min_periods=5).apply(sum, raw=False),
lambda x: x.expanding(min_periods=5).apply(sum, raw=True),
],
)
def test_moment_functions_zero_length(f):
# GH 8056
s = Series(dtype=np.float64)
s_expected = s
df1 = DataFrame()
df1_expected = df1
df2 = DataFrame(columns=["a"])
df2["a"] = df2["a"].astype("float64")
df2_expected = df2

s_result = f(s)
tm.assert_series_equal(s_result, s_expected)

df1_result = f(df1)
tm.assert_frame_equal(df1_result, df1_expected)

df2_result = f(df2)
tm.assert_frame_equal(df2_result, df2_expected)


def test_expanding_apply_empty_series(engine_and_raw):
engine, raw = engine_and_raw
ser = Series([], dtype=np.float64)
tm.assert_series_equal(
ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine)
)


def test_expanding_apply_min_periods_0(engine_and_raw):
# GH 8080
engine, raw = engine_and_raw
s = Series([None, None, None])
result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine)
expected = Series([1.0, 2.0, 3.0])
tm.assert_series_equal(result, expected)


def test_expanding_cov_diff_index():
# GH 7512
s1 = Series([1, 2, 3], index=[0, 1, 2])
s2 = Series([1, 3], index=[0, 2])
result = s1.expanding().cov(s2)
expected = Series([None, None, 2.0])
tm.assert_series_equal(result, expected)

s2a = Series([1, None, 3], index=[0, 1, 2])
result = s1.expanding().cov(s2a)
tm.assert_series_equal(result, expected)

s1 = Series([7, 8, 10], index=[0, 1, 3])
s2 = Series([7, 9, 10], index=[0, 2, 3])
result = s1.expanding().cov(s2)
expected = Series([None, None, None, 4.5])
tm.assert_series_equal(result, expected)


def test_expanding_corr_diff_index():
# GH 7512
s1 = Series([1, 2, 3], index=[0, 1, 2])
s2 = Series([1, 3], index=[0, 2])
result = s1.expanding().corr(s2)
expected = Series([None, None, 1.0])
tm.assert_series_equal(result, expected)

s2a = Series([1, None, 3], index=[0, 1, 2])
result = s1.expanding().corr(s2a)
tm.assert_series_equal(result, expected)

s1 = Series([7, 8, 10], index=[0, 1, 3])
s2 = Series([7, 9, 10], index=[0, 2, 3])
result = s1.expanding().corr(s2)
expected = Series([None, None, None, 1.0])
tm.assert_series_equal(result, expected)


def test_expanding_cov_pairwise_diff_length():
# GH 7512
df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo"))
df1a = DataFrame(
[[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo")
)
df2 = DataFrame(
[[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo")
)
df2a = DataFrame(
[[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo")
)
# TODO: xref gh-15826
# .loc is not preserving the names
result1 = df1.expanding().cov(df2, pairwise=True).loc[2]
result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
expected = DataFrame(
[[-3.0, -6.0], [-5.0, -10.0]],
columns=Index(["A", "B"], name="foo"),
index=Index(["X", "Y"], name="foo"),
)
tm.assert_frame_equal(result1, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)
tm.assert_frame_equal(result4, expected)


def test_expanding_corr_pairwise_diff_length():
# GH 7512
df1 = DataFrame(
[[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar")
)
df1a = DataFrame(
[[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"]
)
df2 = DataFrame(
[[5, 6], [None, None], [2, 1]],
columns=["X", "Y"],
index=Index(range(3), name="bar"),
)
df2a = DataFrame(
[[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"]
)
result1 = df1.expanding().corr(df2, pairwise=True).loc[2]
result2 = df1.expanding().corr(df2a, pairwise=True).loc[2]
result3 = df1a.expanding().corr(df2, pairwise=True).loc[2]
result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2]
expected = DataFrame(
[[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"])
)
tm.assert_frame_equal(result1, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)
tm.assert_frame_equal(result4, expected)


def test_expanding_apply_args_kwargs(engine_and_raw):
def mean_w_arg(x, const):
return np.mean(x) + const

engine, raw = engine_and_raw

df = DataFrame(np.random.rand(20, 3))

expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0

result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,))
tm.assert_frame_equal(result, expected)

result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20})
tm.assert_frame_equal(result, expected)
Loading