From 416a0bc4deb7dcfae32b09b45eea624f883b2598 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 23 Oct 2021 20:01:48 -0700 Subject: [PATCH] TST: Move some consistency expanding tests to test_expanding --- .../test_moments_consistency_expanding.py | 362 +----------------- pandas/tests/window/test_expanding.py | 357 +++++++++++++++++ 2 files changed, 358 insertions(+), 361 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py index df3e79fb79eca..d0fe7bf9fc2d2 100644 --- a/pandas/tests/window/moments/test_moments_consistency_expanding.py +++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -1,171 +1,10 @@ import numpy as np import pytest -from pandas import ( - DataFrame, - Index, - MultiIndex, - Series, - isna, - notna, -) +from pandas import Series import pandas._testing as tm -def test_expanding_corr(series): - A = series.dropna() - B = (A + np.random.randn(len(A)))[:-5] - - result = A.expanding().corr(B) - - rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) - - tm.assert_almost_equal(rolling_result, result) - - -def test_expanding_count(series): - result = series.expanding(min_periods=0).count() - tm.assert_almost_equal( - result, series.rolling(window=len(series), min_periods=0).count() - ) - - -def test_expanding_quantile(series): - result = series.expanding().quantile(0.5) - - rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5) - - tm.assert_almost_equal(result, rolling_result) - - -def test_expanding_cov(series): - A = series - B = (A + np.random.randn(len(A)))[:-5] - - result = A.expanding().cov(B) - - rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) - - tm.assert_almost_equal(rolling_result, result) - - -def test_expanding_cov_pairwise(frame): - result = frame.expanding().cov() - - rolling_result = frame.rolling(window=len(frame), min_periods=1).cov() - - tm.assert_frame_equal(result, rolling_result) - - -def test_expanding_corr_pairwise(frame): - result = frame.expanding().corr() - - rolling_result = frame.rolling(window=len(frame), min_periods=1).corr() - tm.assert_frame_equal(result, rolling_result) - - -@pytest.mark.parametrize( - "func,static_comp", - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], - ids=["sum", "mean", "max", "min"], -) -def test_expanding_func(func, static_comp, frame_or_series): - data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) - result = getattr(data.expanding(min_periods=1, axis=0), func)() - assert isinstance(result, frame_or_series) - - if frame_or_series is Series: - tm.assert_almost_equal(result[10], static_comp(data[:11])) - else: - tm.assert_series_equal( - result.iloc[10], static_comp(data[:11]), check_names=False - ) - - -@pytest.mark.parametrize( - "func,static_comp", - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], - ids=["sum", "mean", "max", "min"], -) -def test_expanding_min_periods(func, static_comp): - ser = Series(np.random.randn(50)) - - result = getattr(ser.expanding(min_periods=30, axis=0), func)() - assert result[:29].isna().all() - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) - - # min_periods is working correctly - result = getattr(ser.expanding(min_periods=15, axis=0), func)() - assert isna(result.iloc[13]) - assert notna(result.iloc[14]) - - ser2 = Series(np.random.randn(20)) - result = getattr(ser2.expanding(min_periods=5, axis=0), func)() - assert isna(result[3]) - assert notna(result[4]) - - # min_periods=0 - result0 = getattr(ser.expanding(min_periods=0, axis=0), func)() - result1 = getattr(ser.expanding(min_periods=1, axis=0), func)() - tm.assert_almost_equal(result0, result1) - - result = getattr(ser.expanding(min_periods=1, axis=0), func)() - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) - - -def test_expanding_apply(engine_and_raw, frame_or_series): - engine, raw = engine_and_raw - data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) - result = data.expanding(min_periods=1).apply( - lambda x: x.mean(), raw=raw, engine=engine - ) - assert isinstance(result, frame_or_series) - - if frame_or_series is Series: - tm.assert_almost_equal(result[9], np.mean(data[:11])) - else: - tm.assert_series_equal(result.iloc[9], np.mean(data[:11]), check_names=False) - - -def test_expanding_min_periods_apply(engine_and_raw): - engine, raw = engine_and_raw - ser = Series(np.random.randn(50)) - - result = ser.expanding(min_periods=30).apply( - lambda x: x.mean(), raw=raw, engine=engine - ) - assert result[:29].isna().all() - tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) - - # min_periods is working correctly - result = ser.expanding(min_periods=15).apply( - lambda x: x.mean(), raw=raw, engine=engine - ) - assert isna(result.iloc[13]) - assert notna(result.iloc[14]) - - ser2 = Series(np.random.randn(20)) - result = ser2.expanding(min_periods=5).apply( - lambda x: x.mean(), raw=raw, engine=engine - ) - assert isna(result[3]) - assert notna(result[4]) - - # min_periods=0 - result0 = ser.expanding(min_periods=0).apply( - lambda x: x.mean(), raw=raw, engine=engine - ) - result1 = ser.expanding(min_periods=1).apply( - lambda x: x.mean(), raw=raw, engine=engine - ) - tm.assert_almost_equal(result0, result1) - - result = ser.expanding(min_periods=1).apply( - lambda x: x.mean(), raw=raw, engine=engine - ) - tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) - - @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum]) def test_expanding_apply_consistency_sum_nans(consistency_data, min_periods, f): @@ -334,202 +173,3 @@ def test_expanding_consistency_var_debiasing_factors(consistency_data, min_perio x.expanding().count() - 1.0 ).replace(0.0, np.nan) tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) - - -@pytest.mark.parametrize( - "f", - [ - lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)), - lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)), - ], -) -def test_moment_functions_zero_length_pairwise(f): - - df1 = DataFrame() - df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) - df2["a"] = df2["a"].astype("float64") - - df1_expected = DataFrame( - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) - ) - df2_expected = DataFrame( - index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), - columns=Index(["a"], name="foo"), - dtype="float64", - ) - - df1_result = f(df1) - tm.assert_frame_equal(df1_result, df1_expected) - - df2_result = f(df2) - tm.assert_frame_equal(df2_result, df2_expected) - - -@pytest.mark.parametrize( - "f", - [ - lambda x: x.expanding().count(), - lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), - lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), - lambda x: x.expanding(min_periods=5).max(), - lambda x: x.expanding(min_periods=5).min(), - lambda x: x.expanding(min_periods=5).sum(), - lambda x: x.expanding(min_periods=5).mean(), - lambda x: x.expanding(min_periods=5).std(), - lambda x: x.expanding(min_periods=5).var(), - lambda x: x.expanding(min_periods=5).skew(), - lambda x: x.expanding(min_periods=5).kurt(), - lambda x: x.expanding(min_periods=5).quantile(0.5), - lambda x: x.expanding(min_periods=5).median(), - lambda x: x.expanding(min_periods=5).apply(sum, raw=False), - lambda x: x.expanding(min_periods=5).apply(sum, raw=True), - ], -) -def test_moment_functions_zero_length(f): - # GH 8056 - s = Series(dtype=np.float64) - s_expected = s - df1 = DataFrame() - df1_expected = df1 - df2 = DataFrame(columns=["a"]) - df2["a"] = df2["a"].astype("float64") - df2_expected = df2 - - s_result = f(s) - tm.assert_series_equal(s_result, s_expected) - - df1_result = f(df1) - tm.assert_frame_equal(df1_result, df1_expected) - - df2_result = f(df2) - tm.assert_frame_equal(df2_result, df2_expected) - - -def test_expanding_apply_empty_series(engine_and_raw): - engine, raw = engine_and_raw - ser = Series([], dtype=np.float64) - tm.assert_series_equal( - ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine) - ) - - -def test_expanding_apply_min_periods_0(engine_and_raw): - # GH 8080 - engine, raw = engine_and_raw - s = Series([None, None, None]) - result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine) - expected = Series([1.0, 2.0, 3.0]) - tm.assert_series_equal(result, expected) - - -def test_expanding_cov_diff_index(): - # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) - result = s1.expanding().cov(s2) - expected = Series([None, None, 2.0]) - tm.assert_series_equal(result, expected) - - s2a = Series([1, None, 3], index=[0, 1, 2]) - result = s1.expanding().cov(s2a) - tm.assert_series_equal(result, expected) - - s1 = Series([7, 8, 10], index=[0, 1, 3]) - s2 = Series([7, 9, 10], index=[0, 2, 3]) - result = s1.expanding().cov(s2) - expected = Series([None, None, None, 4.5]) - tm.assert_series_equal(result, expected) - - -def test_expanding_corr_diff_index(): - # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) - result = s1.expanding().corr(s2) - expected = Series([None, None, 1.0]) - tm.assert_series_equal(result, expected) - - s2a = Series([1, None, 3], index=[0, 1, 2]) - result = s1.expanding().corr(s2a) - tm.assert_series_equal(result, expected) - - s1 = Series([7, 8, 10], index=[0, 1, 3]) - s2 = Series([7, 9, 10], index=[0, 2, 3]) - result = s1.expanding().corr(s2) - expected = Series([None, None, None, 1.0]) - tm.assert_series_equal(result, expected) - - -def test_expanding_cov_pairwise_diff_length(): - # GH 7512 - df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo")) - df1a = DataFrame( - [[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo") - ) - df2 = DataFrame( - [[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo") - ) - df2a = DataFrame( - [[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo") - ) - # TODO: xref gh-15826 - # .loc is not preserving the names - result1 = df1.expanding().cov(df2, pairwise=True).loc[2] - result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] - result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] - result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] - expected = DataFrame( - [[-3.0, -6.0], [-5.0, -10.0]], - columns=Index(["A", "B"], name="foo"), - index=Index(["X", "Y"], name="foo"), - ) - tm.assert_frame_equal(result1, expected) - tm.assert_frame_equal(result2, expected) - tm.assert_frame_equal(result3, expected) - tm.assert_frame_equal(result4, expected) - - -def test_expanding_corr_pairwise_diff_length(): - # GH 7512 - df1 = DataFrame( - [[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar") - ) - df1a = DataFrame( - [[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"] - ) - df2 = DataFrame( - [[5, 6], [None, None], [2, 1]], - columns=["X", "Y"], - index=Index(range(3), name="bar"), - ) - df2a = DataFrame( - [[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"] - ) - result1 = df1.expanding().corr(df2, pairwise=True).loc[2] - result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] - result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] - result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] - expected = DataFrame( - [[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"]) - ) - tm.assert_frame_equal(result1, expected) - tm.assert_frame_equal(result2, expected) - tm.assert_frame_equal(result3, expected) - tm.assert_frame_equal(result4, expected) - - -def test_expanding_apply_args_kwargs(engine_and_raw): - def mean_w_arg(x, const): - return np.mean(x) + const - - engine, raw = engine_and_raw - - df = DataFrame(np.random.rand(20, 3)) - - expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0 - - result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,)) - tm.assert_frame_equal(result, expected) - - result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20}) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 680ac3654222a..ad43a02724960 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -6,7 +6,11 @@ from pandas import ( DataFrame, DatetimeIndex, + Index, + MultiIndex, Series, + isna, + notna, ) import pandas._testing as tm from pandas.core.window import Expanding @@ -288,3 +292,356 @@ def test_rank(window, method, pct, ascending, test_data): result = ser.expanding(window).rank(method=method, pct=pct, ascending=ascending) tm.assert_series_equal(result, expected) + + +def test_expanding_corr(series): + A = series.dropna() + B = (A + np.random.randn(len(A)))[:-5] + + result = A.expanding().corr(B) + + rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) + + tm.assert_almost_equal(rolling_result, result) + + +def test_expanding_count(series): + result = series.expanding(min_periods=0).count() + tm.assert_almost_equal( + result, series.rolling(window=len(series), min_periods=0).count() + ) + + +def test_expanding_quantile(series): + result = series.expanding().quantile(0.5) + + rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5) + + tm.assert_almost_equal(result, rolling_result) + + +def test_expanding_cov(series): + A = series + B = (A + np.random.randn(len(A)))[:-5] + + result = A.expanding().cov(B) + + rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) + + tm.assert_almost_equal(rolling_result, result) + + +def test_expanding_cov_pairwise(frame): + result = frame.expanding().cov() + + rolling_result = frame.rolling(window=len(frame), min_periods=1).cov() + + tm.assert_frame_equal(result, rolling_result) + + +def test_expanding_corr_pairwise(frame): + result = frame.expanding().corr() + + rolling_result = frame.rolling(window=len(frame), min_periods=1).corr() + tm.assert_frame_equal(result, rolling_result) + + +@pytest.mark.parametrize( + "func,static_comp", + [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], + ids=["sum", "mean", "max", "min"], +) +def test_expanding_func(func, static_comp, frame_or_series): + data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) + result = getattr(data.expanding(min_periods=1, axis=0), func)() + assert isinstance(result, frame_or_series) + + if frame_or_series is Series: + tm.assert_almost_equal(result[10], static_comp(data[:11])) + else: + tm.assert_series_equal( + result.iloc[10], static_comp(data[:11]), check_names=False + ) + + +@pytest.mark.parametrize( + "func,static_comp", + [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], + ids=["sum", "mean", "max", "min"], +) +def test_expanding_min_periods(func, static_comp): + ser = Series(np.random.randn(50)) + + result = getattr(ser.expanding(min_periods=30, axis=0), func)() + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + + # min_periods is working correctly + result = getattr(ser.expanding(min_periods=15, axis=0), func)() + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) + + ser2 = Series(np.random.randn(20)) + result = getattr(ser2.expanding(min_periods=5, axis=0), func)() + assert isna(result[3]) + assert notna(result[4]) + + # min_periods=0 + result0 = getattr(ser.expanding(min_periods=0, axis=0), func)() + result1 = getattr(ser.expanding(min_periods=1, axis=0), func)() + tm.assert_almost_equal(result0, result1) + + result = getattr(ser.expanding(min_periods=1, axis=0), func)() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + + +def test_expanding_apply(engine_and_raw, frame_or_series): + engine, raw = engine_and_raw + data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) + result = data.expanding(min_periods=1).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + assert isinstance(result, frame_or_series) + + if frame_or_series is Series: + tm.assert_almost_equal(result[9], np.mean(data[:11])) + else: + tm.assert_series_equal(result.iloc[9], np.mean(data[:11]), check_names=False) + + +def test_expanding_min_periods_apply(engine_and_raw): + engine, raw = engine_and_raw + ser = Series(np.random.randn(50)) + + result = ser.expanding(min_periods=30).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) + + # min_periods is working correctly + result = ser.expanding(min_periods=15).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) + + ser2 = Series(np.random.randn(20)) + result = ser2.expanding(min_periods=5).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + assert isna(result[3]) + assert notna(result[4]) + + # min_periods=0 + result0 = ser.expanding(min_periods=0).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + result1 = ser.expanding(min_periods=1).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + tm.assert_almost_equal(result0, result1) + + result = ser.expanding(min_periods=1).apply( + lambda x: x.mean(), raw=raw, engine=engine + ) + tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) + + +@pytest.mark.parametrize( + "f", + [ + lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)), + lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)), + ], +) +def test_moment_functions_zero_length_pairwise(f): + + df1 = DataFrame() + df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) + df2["a"] = df2["a"].astype("float64") + + df1_expected = DataFrame( + index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) + ) + df2_expected = DataFrame( + index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), + columns=Index(["a"], name="foo"), + dtype="float64", + ) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) + + +@pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().count(), + lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), + lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), + lambda x: x.expanding(min_periods=5).max(), + lambda x: x.expanding(min_periods=5).min(), + lambda x: x.expanding(min_periods=5).sum(), + lambda x: x.expanding(min_periods=5).mean(), + lambda x: x.expanding(min_periods=5).std(), + lambda x: x.expanding(min_periods=5).var(), + lambda x: x.expanding(min_periods=5).skew(), + lambda x: x.expanding(min_periods=5).kurt(), + lambda x: x.expanding(min_periods=5).quantile(0.5), + lambda x: x.expanding(min_periods=5).median(), + lambda x: x.expanding(min_periods=5).apply(sum, raw=False), + lambda x: x.expanding(min_periods=5).apply(sum, raw=True), + ], +) +def test_moment_functions_zero_length(f): + # GH 8056 + s = Series(dtype=np.float64) + s_expected = s + df1 = DataFrame() + df1_expected = df1 + df2 = DataFrame(columns=["a"]) + df2["a"] = df2["a"].astype("float64") + df2_expected = df2 + + s_result = f(s) + tm.assert_series_equal(s_result, s_expected) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) + + +def test_expanding_apply_empty_series(engine_and_raw): + engine, raw = engine_and_raw + ser = Series([], dtype=np.float64) + tm.assert_series_equal( + ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine) + ) + + +def test_expanding_apply_min_periods_0(engine_and_raw): + # GH 8080 + engine, raw = engine_and_raw + s = Series([None, None, None]) + result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine) + expected = Series([1.0, 2.0, 3.0]) + tm.assert_series_equal(result, expected) + + +def test_expanding_cov_diff_index(): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.expanding().cov(s2) + expected = Series([None, None, 2.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.expanding().cov(s2a) + tm.assert_series_equal(result, expected) + + s1 = Series([7, 8, 10], index=[0, 1, 3]) + s2 = Series([7, 9, 10], index=[0, 2, 3]) + result = s1.expanding().cov(s2) + expected = Series([None, None, None, 4.5]) + tm.assert_series_equal(result, expected) + + +def test_expanding_corr_diff_index(): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.expanding().corr(s2) + expected = Series([None, None, 1.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.expanding().corr(s2a) + tm.assert_series_equal(result, expected) + + s1 = Series([7, 8, 10], index=[0, 1, 3]) + s2 = Series([7, 9, 10], index=[0, 2, 3]) + result = s1.expanding().corr(s2) + expected = Series([None, None, None, 1.0]) + tm.assert_series_equal(result, expected) + + +def test_expanding_cov_pairwise_diff_length(): + # GH 7512 + df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo")) + df1a = DataFrame( + [[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo") + ) + df2 = DataFrame( + [[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo") + ) + df2a = DataFrame( + [[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo") + ) + # TODO: xref gh-15826 + # .loc is not preserving the names + result1 = df1.expanding().cov(df2, pairwise=True).loc[2] + result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] + result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] + expected = DataFrame( + [[-3.0, -6.0], [-5.0, -10.0]], + columns=Index(["A", "B"], name="foo"), + index=Index(["X", "Y"], name="foo"), + ) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected) + tm.assert_frame_equal(result4, expected) + + +def test_expanding_corr_pairwise_diff_length(): + # GH 7512 + df1 = DataFrame( + [[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar") + ) + df1a = DataFrame( + [[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"] + ) + df2 = DataFrame( + [[5, 6], [None, None], [2, 1]], + columns=["X", "Y"], + index=Index(range(3), name="bar"), + ) + df2a = DataFrame( + [[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"] + ) + result1 = df1.expanding().corr(df2, pairwise=True).loc[2] + result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] + result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] + expected = DataFrame( + [[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"]) + ) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected) + tm.assert_frame_equal(result4, expected) + + +def test_expanding_apply_args_kwargs(engine_and_raw): + def mean_w_arg(x, const): + return np.mean(x) + const + + engine, raw = engine_and_raw + + df = DataFrame(np.random.rand(20, 3)) + + expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0 + + result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,)) + tm.assert_frame_equal(result, expected) + + result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20}) + tm.assert_frame_equal(result, expected)