From 183bb42b900fc286dde303acc5b6ae27015367a9 Mon Sep 17 00:00:00 2001 From: Numan Date: Thu, 2 Mar 2023 17:29:57 +0100 Subject: [PATCH 1/5] Added tests for std calculation on groups with numeric_only option --- pandas/tests/groupby/test_groupby.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e225ff5a0fa43..15754b9e75ea7 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2837,3 +2837,37 @@ def test_obj_with_exclusions_duplicate_columns(): result = gb._obj_with_exclusions expected = df.take([0, 2, 3], axis=1) tm.assert_frame_equal(result, expected) + + +def test_groupby_numeric_only_std_no_result(): + # GH 51080 + dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}] + df = DataFrame(dicts_non_numeric) + dfgb = df.groupby("a") + result = dfgb.std(numeric_only=True) + + assert result.empty + + +def test_groupby_std_raises_error(): + # GH 51080 + dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}] + df = DataFrame(dicts_non_numeric) + dfgb = df.groupby("a") + with pytest.raises(ValueError, match="could not convert string to float: 'bar'"): + dfgb.std(numeric_only=True) + + +def test_groupby_numeric_only_mixed_data_std(): + # GH 51080 + dicts = [ + {"a": "foo", "b": "bar"}, + {"a": "car", "b": "dar"}, + {"a": 10, "b": 20}, + ] + df = DataFrame(dicts) + dfgb = df.groupby("a", as_index=False) + result = dfgb.std(numeric_only=True) + expected = DataFrame([10, "car", "foo"], columns=["a"]) + + tm.assert_frame_equal(result, expected) From 618efb181cddb680c68521b00d0c1b7dfbcd9f74 Mon Sep 17 00:00:00 2001 From: Numan Date: Thu, 2 Mar 2023 17:29:57 +0100 Subject: [PATCH 2/5] Added tests for std calculation on groups with numeric_only option --- pandas/tests/groupby/test_groupby.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 97e88a8545aa5..34ab2c913f30e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2843,3 +2843,37 @@ def test_obj_with_exclusions_duplicate_columns(): result = gb._obj_with_exclusions expected = df.take([0, 2, 3], axis=1) tm.assert_frame_equal(result, expected) + + +def test_groupby_numeric_only_std_no_result(): + # GH 51080 + dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}] + df = DataFrame(dicts_non_numeric) + dfgb = df.groupby("a") + result = dfgb.std(numeric_only=True) + + assert result.empty + + +def test_groupby_std_raises_error(): + # GH 51080 + dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}] + df = DataFrame(dicts_non_numeric) + dfgb = df.groupby("a") + with pytest.raises(ValueError, match="could not convert string to float: 'bar'"): + dfgb.std(numeric_only=True) + + +def test_groupby_numeric_only_mixed_data_std(): + # GH 51080 + dicts = [ + {"a": "foo", "b": "bar"}, + {"a": "car", "b": "dar"}, + {"a": 10, "b": 20}, + ] + df = DataFrame(dicts) + dfgb = df.groupby("a", as_index=False) + result = dfgb.std(numeric_only=True) + expected = DataFrame([10, "car", "foo"], columns=["a"]) + + tm.assert_frame_equal(result, expected) From d75cdcb141a0619c04030d88d8ae52f4a71e65e5 Mon Sep 17 00:00:00 2001 From: Numan Date: Fri, 3 Mar 2023 16:37:34 +0100 Subject: [PATCH 3/5] removed unnecessary test and adjusted documentation for numeric_only --- pandas/core/groupby/groupby.py | 8 ++++---- pandas/tests/groupby/test_groupby.py | 17 +---------------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 55e14bc11246b..e5f0cf6b1dbae 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1915,7 +1915,7 @@ def std( .. versionadded:: 1.4.0 numeric_only : bool, default False - Include only `float`, `int` or `boolean` data. + Include only `float`, `int` or `boolean` columns. .. versionadded:: 1.5.0 @@ -1998,7 +1998,7 @@ def var( .. versionadded:: 1.4.0 numeric_only : bool, default False - Include only `float`, `int` or `boolean` data. + Include only `float`, `int` or `boolean` columns. .. versionadded:: 1.5.0 @@ -2167,7 +2167,7 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): Degrees of freedom. numeric_only : bool, default False - Include only `float`, `int` or `boolean` data. + Include only `float`, `int` or `boolean` columns. .. versionadded:: 1.5.0 @@ -3093,7 +3093,7 @@ def quantile( interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} Method to use when the desired quantile falls between two points. numeric_only : bool, default False - Include only `float`, `int` or `boolean` data. + Include only `float`, `int` or `boolean` columns. .. versionadded:: 1.5.0 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 34ab2c913f30e..37ef7e088d680 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2861,19 +2861,4 @@ def test_groupby_std_raises_error(): df = DataFrame(dicts_non_numeric) dfgb = df.groupby("a") with pytest.raises(ValueError, match="could not convert string to float: 'bar'"): - dfgb.std(numeric_only=True) - - -def test_groupby_numeric_only_mixed_data_std(): - # GH 51080 - dicts = [ - {"a": "foo", "b": "bar"}, - {"a": "car", "b": "dar"}, - {"a": 10, "b": 20}, - ] - df = DataFrame(dicts) - dfgb = df.groupby("a", as_index=False) - result = dfgb.std(numeric_only=True) - expected = DataFrame([10, "car", "foo"], columns=["a"]) - - tm.assert_frame_equal(result, expected) + dfgb.std() From 10edb72e0197453c14af51eb39c068bc6910c558 Mon Sep 17 00:00:00 2001 From: Numan Date: Sun, 5 Mar 2023 01:43:44 +0100 Subject: [PATCH 4/5] Parameterized the tests and removed doc changes --- pandas/core/groupby/groupby.py | 6 +++--- pandas/tests/groupby/test_groupby.py | 25 ++++++++++++------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e5f0cf6b1dbae..1d56bc6baf52a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1915,7 +1915,7 @@ def std( .. versionadded:: 1.4.0 numeric_only : bool, default False - Include only `float`, `int` or `boolean` columns. + Include only `float`, `int` or `boolean` data. .. versionadded:: 1.5.0 @@ -1998,7 +1998,7 @@ def var( .. versionadded:: 1.4.0 numeric_only : bool, default False - Include only `float`, `int` or `boolean` columns. + Include only `float`, `int` or `boolean` data. .. versionadded:: 1.5.0 @@ -2167,7 +2167,7 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): Degrees of freedom. numeric_only : bool, default False - Include only `float`, `int` or `boolean` columns. + Include only `float`, `int` or `boolean` data. .. versionadded:: 1.5.0 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 37ef7e088d680..d74745b36b74c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2845,20 +2845,19 @@ def test_obj_with_exclusions_duplicate_columns(): tm.assert_frame_equal(result, expected) -def test_groupby_numeric_only_std_no_result(): +@pytest.mark.parametrize("numeric_only", [True, False]) +def test_groupby_numeric_only_std_no_result(numeric_only): # GH 51080 dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}] df = DataFrame(dicts_non_numeric) - dfgb = df.groupby("a") - result = dfgb.std(numeric_only=True) - - assert result.empty + dfgb = df.groupby("a", as_index=False, sort=False) - -def test_groupby_std_raises_error(): - # GH 51080 - dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}] - df = DataFrame(dicts_non_numeric) - dfgb = df.groupby("a") - with pytest.raises(ValueError, match="could not convert string to float: 'bar'"): - dfgb.std() + if numeric_only: + result = dfgb.std(numeric_only=True) + expected_df = DataFrame(["foo", "car"], columns=["a"]) + tm.assert_frame_equal(result, expected_df) + else: + with pytest.raises( + ValueError, match="could not convert string to float: 'bar'" + ): + dfgb.std(numeric_only=numeric_only) From 1390fa4606cc44c9b3a4e46af253e9106ccacd05 Mon Sep 17 00:00:00 2001 From: Numan Date: Sun, 5 Mar 2023 01:45:42 +0100 Subject: [PATCH 5/5] revert missed doc --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 94e8b1fa4e415..457352564f255 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3094,7 +3094,7 @@ def quantile( interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} Method to use when the desired quantile falls between two points. numeric_only : bool, default False - Include only `float`, `int` or `boolean` columns. + Include only `float`, `int` or `boolean` data. .. versionadded:: 1.5.0