From fea788a5a765a87e70625a5b016cf6b81e9ea6d0 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 27 Feb 2021 12:47:56 -0500 Subject: [PATCH 1/3] CLN/TST: normalize test_frame_apply --- pandas/tests/apply/test_frame_apply.py | 158 +++++++++++++------------ 1 file changed, 81 insertions(+), 77 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 3532040a2fd7b..90e380326e026 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -38,17 +38,24 @@ def int_frame_const_col(): def test_apply(float_frame): with np.errstate(all="ignore"): # ufunc - applied = float_frame.apply(np.sqrt) - tm.assert_series_equal(np.sqrt(float_frame["A"]), applied["A"]) + result = np.sqrt(float_frame["A"]) + expected = float_frame.apply(np.sqrt)["A"] + tm.assert_series_equal(result, expected) # aggregator - applied = float_frame.apply(np.mean) - assert applied["A"] == np.mean(float_frame["A"]) + result = float_frame.apply(np.mean)["A"] + expected = np.mean(float_frame["A"]) + assert result == expected d = float_frame.index[0] - applied = float_frame.apply(np.mean, axis=1) - assert applied[d] == np.mean(float_frame.xs(d)) - assert applied.index is float_frame.index # want this + result = float_frame.apply(np.mean, axis=1) + expected = np.mean(float_frame.xs(d)) + assert result[d] == expected + assert result.index is float_frame.index + + # GH 9573 + df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) + result = df.apply(lambda ts: ts.astype("category")) # invalid axis df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) @@ -56,44 +63,40 @@ def test_apply(float_frame): with pytest.raises(ValueError, match=msg): df.apply(lambda x: x, 2) - # GH 9573 - df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) - df = df.apply(lambda ts: ts.astype("category")) - - assert df.shape == (4, 2) - assert isinstance(df["c0"].dtype, CategoricalDtype) - assert isinstance(df["c1"].dtype, CategoricalDtype) + assert result.shape == (4, 2) + assert isinstance(result["c0"].dtype, CategoricalDtype) + assert isinstance(result["c1"].dtype, CategoricalDtype) def test_apply_axis1_with_ea(): - # GH#36785 - df = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]}) - result = df.apply(lambda x: x, axis=1) - tm.assert_frame_equal(result, df) + # GH 36785 + expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]}) + result = expected.apply(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) def test_apply_mixed_datetimelike(): # mixed datetimelike # GH 7778 - df = DataFrame( + expected = DataFrame( { "A": date_range("20130101", periods=3), "B": pd.to_timedelta(np.arange(3), unit="s"), } ) - result = df.apply(lambda x: x, axis=1) - tm.assert_frame_equal(result, df) + result = expected.apply(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) def test_apply_empty(float_frame): # empty empty_frame = DataFrame() - applied = empty_frame.apply(np.sqrt) - assert applied.empty + result = empty_frame.apply(np.sqrt) + assert result.empty - applied = empty_frame.apply(np.mean) - assert applied.empty + result = empty_frame.apply(np.mean) + assert result.empty no_rows = float_frame[:0] result = no_rows.apply(lambda x: x.mean()) @@ -108,7 +111,7 @@ def test_apply_empty(float_frame): # GH 2476 expected = DataFrame(index=["a"]) result = expected.apply(lambda x: x["a"], axis=1) - tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(result, expected) def test_apply_with_reduce_empty(): @@ -285,14 +288,13 @@ def _assert_raw(x): float_frame.apply(_assert_raw, raw=True) float_frame.apply(_assert_raw, axis=1, raw=True) - result0 = float_frame.apply(np.mean, raw=True) - result1 = float_frame.apply(np.mean, axis=1, raw=True) - - expected0 = float_frame.apply(lambda x: x.values.mean()) - expected1 = float_frame.apply(lambda x: x.values.mean(), axis=1) + result = float_frame.apply(np.mean, raw=True) + expected = float_frame.apply(lambda x: x.values.mean()) + tm.assert_series_equal(result, expected) - tm.assert_series_equal(result0, expected0) - tm.assert_series_equal(result1, expected1) + result = float_frame.apply(np.mean, axis=1, raw=True) + expected = float_frame.apply(lambda x: x.values.mean(), axis=1) + tm.assert_series_equal(result, expected) # no reduction result = float_frame.apply(lambda x: x * 2, raw=True) @@ -306,8 +308,9 @@ def _assert_raw(x): def test_apply_axis1(float_frame): d = float_frame.index[0] - tapplied = float_frame.apply(np.mean, axis=1) - assert tapplied[d] == np.mean(float_frame.xs(d)) + result = float_frame.apply(np.mean, axis=1)[d] + expected = np.mean(float_frame.xs(d)) + assert result == expected def test_apply_mixed_dtype_corner(): @@ -401,27 +404,25 @@ def test_apply_reduce_to_dict(): # GH 25196 37544 data = DataFrame([[1, 2], [3, 4]], columns=["c0", "c1"], index=["i0", "i1"]) - result0 = data.apply(dict, axis=0) - expected0 = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns) - tm.assert_series_equal(result0, expected0) + result = data.apply(dict, axis=0) + expected = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns) + tm.assert_series_equal(result, expected) - result1 = data.apply(dict, axis=1) - expected1 = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index) - tm.assert_series_equal(result1, expected1) + result = data.apply(dict, axis=1) + expected = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index) + tm.assert_series_equal(result, expected) def test_apply_differently_indexed(): df = DataFrame(np.random.randn(20, 10)) - result0 = df.apply(Series.describe, axis=0) - expected0 = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) - tm.assert_frame_equal(result0, expected0) + result = df.apply(Series.describe, axis=0) + expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) + tm.assert_frame_equal(result, expected) - result1 = df.apply(Series.describe, axis=1) - expected1 = DataFrame( - {i: v.describe() for i, v in df.T.items()}, columns=df.index - ).T - tm.assert_frame_equal(result1, expected1) + result = df.apply(Series.describe, axis=1) + expected = DataFrame({i: v.describe() for i, v in df.T.items()}, columns=df.index).T + tm.assert_frame_equal(result, expected) def test_apply_modify_traceback(): @@ -525,7 +526,7 @@ def f(r): def test_apply_convert_objects(): - data = DataFrame( + expected = DataFrame( { "A": [ "foo", @@ -572,8 +573,8 @@ def test_apply_convert_objects(): } ) - result = data.apply(lambda x: x, axis=1) - tm.assert_frame_equal(result._convert(datetime=True), data) + result = expected.apply(lambda x: x, axis=1)._convert(datetime=True) + tm.assert_frame_equal(result, expected) def test_apply_attach_name(float_frame): @@ -635,17 +636,17 @@ def test_applymap(float_frame): float_frame.applymap(type) # GH 465: function returning tuples - result = float_frame.applymap(lambda x: (x, x)) - assert isinstance(result["A"][0], tuple) + result = float_frame.applymap(lambda x: (x, x))["A"][0] + assert isinstance(result, tuple) # GH 2909: object conversion to float in constructor? df = DataFrame(data=[1, "a"]) - result = df.applymap(lambda x: x) - assert result.dtypes[0] == object + result = df.applymap(lambda x: x).dtypes[0] + assert result == object df = DataFrame(data=[1.0, "a"]) - result = df.applymap(lambda x: x) - assert result.dtypes[0] == object + result = df.applymap(lambda x: x).dtypes[0] + assert result == object # GH 2786 df = DataFrame(np.random.random((3, 4))) @@ -672,10 +673,10 @@ def test_applymap(float_frame): DataFrame(index=list("ABC")), DataFrame({"A": [], "B": [], "C": []}), ] - for frame in empty_frames: + for expected in empty_frames: for func in [round, lambda x: x]: - result = frame.applymap(func) - tm.assert_frame_equal(result, frame) + result = expected.applymap(func) + tm.assert_frame_equal(result, expected) def test_applymap_na_ignore(float_frame): @@ -743,7 +744,8 @@ def test_frame_apply_dont_convert_datetime64(): df = df.applymap(lambda x: x + BDay()) df = df.applymap(lambda x: x + BDay()) - assert df.x1.dtype == "M8[ns]" + result = df.x1.dtype + assert result == "M8[ns]" def test_apply_non_numpy_dtype(): @@ -786,16 +788,18 @@ def apply_list(row): def test_apply_noreduction_tzaware_object(): - # https://github.com/pandas-dev/pandas/issues/31505 - df = DataFrame({"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]") - result = df.apply(lambda x: x) - tm.assert_frame_equal(result, df) - result = df.apply(lambda x: x.copy()) - tm.assert_frame_equal(result, df) + # GH 31505 + expected = DataFrame( + {"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]" + ) + result = expected.apply(lambda x: x) + tm.assert_frame_equal(result, expected) + result = expected.apply(lambda x: x.copy()) + tm.assert_frame_equal(result, expected) def test_apply_function_runs_once(): - # https://github.com/pandas-dev/pandas/issues/30815 + # GH 30815 df = DataFrame({"a": [1, 2, 3]}) names = [] # Save row names function is applied to @@ -815,7 +819,7 @@ def non_reducing_function(row): def test_apply_raw_function_runs_once(): - # https://github.com/pandas-dev/pandas/issues/34506 + # GH 34506 df = DataFrame({"a": [1, 2, 3]}) values = [] # Save row values function is applied to @@ -885,11 +889,11 @@ def test_infer_row_shape(): # GH 17437 # if row shape is changing, infer it df = DataFrame(np.random.rand(10, 2)) - result = df.apply(np.fft.fft, axis=0) - assert result.shape == (10, 2) + result = df.apply(np.fft.fft, axis=0).shape + assert result == (10, 2) - result = df.apply(np.fft.rfft, axis=0) - assert result.shape == (6, 2) + result = df.apply(np.fft.rfft, axis=0).shape + assert result == (6, 2) def test_with_dictlike_columns(): @@ -1604,7 +1608,7 @@ def test_apply_dtype(col): def test_apply_mutating(): - # GH#35462 case where applied func pins a new BlockManager to a row + # GH 35462 case where applied func pins a new BlockManager to a row df = DataFrame({"a": range(100), "b": range(100, 200)}) def func(row): @@ -1623,7 +1627,7 @@ def func(row): def test_apply_empty_list_reduce(): - # GH#35683 get columns correct + # GH 35683 get columns correct df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], columns=["a", "b"]) result = df.apply(lambda x: [], result_type="reduce") @@ -1643,7 +1647,7 @@ def test_apply_no_suffix_index(): def test_apply_raw_returns_string(): - # https://github.com/pandas-dev/pandas/issues/35940 + # GH 35940 df = DataFrame({"A": ["aa", "bbb"]}) result = df.apply(lambda x: x[0], axis=1, raw=True) expected = Series(["aa", "bbb"]) From b11f1a1a3c7ada8e2eebb98521d1dcc4795c902c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 27 Feb 2021 15:09:39 -0500 Subject: [PATCH 2/3] regroup test --- pandas/tests/apply/test_frame_apply.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 90e380326e026..45fc4d648f752 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -53,16 +53,16 @@ def test_apply(float_frame): assert result[d] == expected assert result.index is float_frame.index - # GH 9573 - df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) - result = df.apply(lambda ts: ts.astype("category")) - # invalid axis df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.apply(lambda x: x, 2) + # GH 9573 + df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) + result = df.apply(lambda ts: ts.astype("category")) + assert result.shape == (4, 2) assert isinstance(result["c0"].dtype, CategoricalDtype) assert isinstance(result["c1"].dtype, CategoricalDtype) From 9d095bdf61c2967e7c6227c90ec27b9e7f1bfec3 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 2 Mar 2021 16:51:59 -0500 Subject: [PATCH 3/3] Revert changes to GH #s --- pandas/tests/apply/test_frame_apply.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 45fc4d648f752..eb4aeea5e424a 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -69,7 +69,7 @@ def test_apply(float_frame): def test_apply_axis1_with_ea(): - # GH 36785 + # GH#36785 expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]}) result = expected.apply(lambda x: x, axis=1) tm.assert_frame_equal(result, expected) @@ -788,7 +788,7 @@ def apply_list(row): def test_apply_noreduction_tzaware_object(): - # GH 31505 + # https://github.com/pandas-dev/pandas/issues/31505 expected = DataFrame( {"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]" ) @@ -799,7 +799,7 @@ def test_apply_noreduction_tzaware_object(): def test_apply_function_runs_once(): - # GH 30815 + # https://github.com/pandas-dev/pandas/issues/30815 df = DataFrame({"a": [1, 2, 3]}) names = [] # Save row names function is applied to @@ -819,7 +819,7 @@ def non_reducing_function(row): def test_apply_raw_function_runs_once(): - # GH 34506 + # https://github.com/pandas-dev/pandas/issues/34506 df = DataFrame({"a": [1, 2, 3]}) values = [] # Save row values function is applied to @@ -1608,7 +1608,7 @@ def test_apply_dtype(col): def test_apply_mutating(): - # GH 35462 case where applied func pins a new BlockManager to a row + # GH#35462 case where applied func pins a new BlockManager to a row df = DataFrame({"a": range(100), "b": range(100, 200)}) def func(row): @@ -1627,7 +1627,7 @@ def func(row): def test_apply_empty_list_reduce(): - # GH 35683 get columns correct + # GH#35683 get columns correct df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], columns=["a", "b"]) result = df.apply(lambda x: [], result_type="reduce") @@ -1647,7 +1647,7 @@ def test_apply_no_suffix_index(): def test_apply_raw_returns_string(): - # GH 35940 + # https://github.com/pandas-dev/pandas/issues/35940 df = DataFrame({"A": ["aa", "bbb"]}) result = df.apply(lambda x: x[0], axis=1, raw=True) expected = Series(["aa", "bbb"])