From 41e0f074a42cd7e2404830d4fcf2528e84867d00 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:15:13 -0400 Subject: [PATCH 1/5] TST: add test for last method on dataframe grouped by on boolean column (#46409) --- pandas/tests/frame/methods/test_dtypes.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index 31592f987f04d..d0355adbce6ff 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -79,6 +79,24 @@ def test_dtypes_are_correct_after_column_slice(self): Series({"a": np.float_, "b": np.float_, "c": np.float_}), ) + def test_dtypes_are_correct_after_groupby_last(self): + # GH46409 + df1 = DataFrame( + {"id": [1, 2, 3, 4], "test": [True, pd.NA, pd.NA, False]} + ).convert_dtypes() + + df2 = DataFrame( + {"id": [1, 2, 3, 4], "test": [True, pd.NA, True, False]} + ).convert_dtypes() + + grouped1 = df1.groupby("id") + last1 = grouped1.last() + grouped2 = df2.groupby("id") + last2 = grouped2.last() + + assert last1.test.dtype == pd.BooleanDtype() + assert last2.test.dtype == pd.BooleanDtype() + def test_dtypes_gh8722(self, float_string_frame): float_string_frame["bool"] = float_string_frame["A"] > 0 result = float_string_frame.dtypes From 02c79d64f1fcd8a87d753571a7389d7c3d5e73a7 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Sun, 17 Jul 2022 17:45:32 -0400 Subject: [PATCH 2/5] TST: add test for last method on dataframe grouped by on boolean column (#46409) --- pandas/tests/frame/methods/test_dtypes.py | 33 ++++++++++++----------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index d0355adbce6ff..3d9eec8de283b 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -1,6 +1,7 @@ from datetime import timedelta import numpy as np +import pytest from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -79,23 +80,23 @@ def test_dtypes_are_correct_after_column_slice(self): Series({"a": np.float_, "b": np.float_, "c": np.float_}), ) - def test_dtypes_are_correct_after_groupby_last(self): + @pytest.mark.parametrize( + "data", + [ + DataFrame( + {"id": [1, 2, 3, 4], "test": [True, pd.NA, pd.NA, False]} + ).convert_dtypes(), + DataFrame( + {"id": [1, 2, 3, 4], "test": [True, pd.NA, True, False]} + ).convert_dtypes(), + ], + ) + def test_dtypes_are_correct_after_groupby_last(self, data): # GH46409 - df1 = DataFrame( - {"id": [1, 2, 3, 4], "test": [True, pd.NA, pd.NA, False]} - ).convert_dtypes() - - df2 = DataFrame( - {"id": [1, 2, 3, 4], "test": [True, pd.NA, True, False]} - ).convert_dtypes() - - grouped1 = df1.groupby("id") - last1 = grouped1.last() - grouped2 = df2.groupby("id") - last2 = grouped2.last() - - assert last1.test.dtype == pd.BooleanDtype() - assert last2.test.dtype == pd.BooleanDtype() + result = data.groupby("id").last().test + expected = data.set_index("id").test + assert result.dtype == pd.BooleanDtype() + tm.assert_series_equal(expected, result) def test_dtypes_gh8722(self, float_string_frame): float_string_frame["bool"] = float_string_frame["A"] > 0 From 841bb6e0f6cfe3e58b81e71815b7cf1e0c357f7b Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:33:14 -0400 Subject: [PATCH 3/5] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- pandas/tests/frame/methods/test_dtypes.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index 3d9eec8de283b..856e58a2eb2d2 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -83,16 +83,13 @@ def test_dtypes_are_correct_after_column_slice(self): @pytest.mark.parametrize( "data", [ - DataFrame( - {"id": [1, 2, 3, 4], "test": [True, pd.NA, pd.NA, False]} - ).convert_dtypes(), - DataFrame( - {"id": [1, 2, 3, 4], "test": [True, pd.NA, True, False]} - ).convert_dtypes(), + DataFrame({"id": [1, 2, 3, 4], "test": [True, pd.NA, pd.NA, False]}), + DataFrame({"id": [1, 2, 3, 4], "test": [True, pd.NA, True, False]}), ], ) def test_dtypes_are_correct_after_groupby_last(self, data): # GH46409 + data = data.convert_dtypes() result = data.groupby("id").last().test expected = data.set_index("id").test assert result.dtype == pd.BooleanDtype() From ed3a044ab3e6438a399a45f09e5658267236e2b7 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:47:45 -0400 Subject: [PATCH 4/5] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- pandas/tests/frame/methods/test_dtypes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index 856e58a2eb2d2..c9f6d2847ca09 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -83,15 +83,15 @@ def test_dtypes_are_correct_after_column_slice(self): @pytest.mark.parametrize( "data", [ - DataFrame({"id": [1, 2, 3, 4], "test": [True, pd.NA, pd.NA, False]}), - DataFrame({"id": [1, 2, 3, 4], "test": [True, pd.NA, True, False]}), + [True, pd.NA, pd.NA, False], + [True, pd.NA, True, False], ], ) def test_dtypes_are_correct_after_groupby_last(self, data): # GH46409 - data = data.convert_dtypes() - result = data.groupby("id").last().test - expected = data.set_index("id").test + df = DataFrame({"id": [1, 2, 3, 4], "test": data}).convert_dtypes() + result = df.groupby("id").last().test + expected = df.set_index("id").test assert result.dtype == pd.BooleanDtype() tm.assert_series_equal(expected, result) From 12046887473bd8badb6e20de6f2233256ac716eb Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:59:48 -0400 Subject: [PATCH 5/5] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- pandas/tests/frame/methods/test_dtypes.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index c9f6d2847ca09..87e6ed5b1b135 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -82,14 +82,13 @@ def test_dtypes_are_correct_after_column_slice(self): @pytest.mark.parametrize( "data", - [ - [True, pd.NA, pd.NA, False], - [True, pd.NA, True, False], - ], + [pd.NA, True], ) def test_dtypes_are_correct_after_groupby_last(self, data): # GH46409 - df = DataFrame({"id": [1, 2, 3, 4], "test": data}).convert_dtypes() + df = DataFrame( + {"id": [1, 2, 3, 4], "test": [True, pd.NA, data, False]} + ).convert_dtypes() result = df.groupby("id").last().test expected = df.set_index("id").test assert result.dtype == pd.BooleanDtype()