From a9a0a19b98ec0dd0bc36f6c67a42002d0435a928 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sun, 9 Aug 2020 00:33:56 +0100
Subject: [PATCH 01/13] first pass at a post function

---
 pandas/core/groupby/groupby.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4597afeeaddbf..862acbaa6408a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -733,7 +733,7 @@ def pipe(self, func, *args, **kwargs):
 
     plot = property(GroupByPlot)
 
-    def _make_wrapper(self, name):
+    def _make_wrapper(self, name):        
         assert name in self._apply_allowlist
 
         with _group_selection_context(self):
@@ -1789,7 +1789,14 @@ def _fill(self, direction, limit=None):
         if limit is None:
             limit = -1
 
-        return self._get_cythonized_result(
+        def _post(values, inferences):
+            if not self.dropna:
+                return values
+            mask = DataFrame(self.grouper.codes).eq(-1).any()
+            values[mask] = np.nan
+            return values
+        
+        res = self._get_cythonized_result(
             "group_fillna_indexer",
             numeric_only=False,
             needs_mask=True,
@@ -1797,7 +1804,10 @@ def _fill(self, direction, limit=None):
             result_is_index=True,
             direction=direction,
             limit=limit,
+            post_processing=_post,
         )
+        
+        return res
 
     @Substitution(name="groupby")
     def pad(self, limit=None):
@@ -2532,7 +2542,7 @@ def _get_cythonized_result(
         # error_msg is "" on an frame/series with no rows or columns
         if len(output) == 0 and error_msg != "":
             raise TypeError(error_msg)
-
+        
         if aggregate:
             return self._wrap_aggregated_output(output)
         else:

From e6f30220ad789270ec45f3da0b74de62ccbe1a6c Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sun, 23 Aug 2020 22:03:59 +0100
Subject: [PATCH 02/13] mask.any()

---
 pandas/core/groupby/groupby.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 862acbaa6408a..573e3dab98f0c 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1793,7 +1793,8 @@ def _post(values, inferences):
             if not self.dropna:
                 return values
             mask = DataFrame(self.grouper.codes).eq(-1).any()
-            values[mask] = np.nan
+            if mask.any():
+                values[mask] = np.nan
             return values
         
         res = self._get_cythonized_result(

From 61c5915334e2140c4a8f1d6d588dae17e97d1b65 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 24 Aug 2020 01:12:43 +0100
Subject: [PATCH 03/13] wrote test

---
 pandas/core/groupby/groupby.py                |  6 ++-
 .../tests/groupby/transform/test_transform.py | 45 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 1ee70b2c8560f..3aa2ab9cb111b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1818,7 +1818,11 @@ def _post(values, inferences):
                 return values
             mask = DataFrame(self.grouper.codes).eq(-1).any()
             if mask.any():
-                values[mask] = np.nan
+                try:
+                    values[mask] = np.nan
+                except ValueError:
+                    values[mask] = np.datetime64('NaT') 
+                
             return values
         
         res = self._get_cythonized_result(
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index c09f35526a6bf..6af9622288fd4 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -976,6 +976,51 @@ def test_ffill_bfill_non_unique_multilevel(func, expected_status):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize('dropna', [True, False])
+@pytest.mark.parametrize('limit', [None, 1])
+@pytest.mark.parametrize('method', ['ffill', 'bfill', 'pad'])
+@pytest.mark.parametrize('by', ['grp1', ['grp1'], ['grp1', 'grp2']])
+@pytest.mark.parametrize('has_nan', [[], ['grp1'], ['grp1', 'grp2']])
+def test_pad_handles_nan_groups(dropna, limit, method, by, has_nan):
+    
+    rows = pd.DataFrame({
+        'int' : pd.array([1,2], dtype='Int64'),
+        'float' : [.1, .2], 
+        'bool' : pd.array([True, False], dtype='bool'),
+        'date' : [pd.Timestamp(2010,1,1), pd.Timestamp(2020,2,2)],
+        'period' : pd.array([pd.Period('2010-01'), pd.Period('2020-2')], dtype='period[M]'),
+        'obj' : ['hello', 'world'],
+        'cat' : pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']),
+        })
+
+    ridx = pd.Series([None]*10)
+    ridx[2] = 0
+    ridx[7] = 1
+    df = rows.reindex(ridx).reset_index(drop=True)
+    
+    grps = pd.Series(['good']*5 + ['bad']*5)
+    if type(by) is list:
+        grps = pd.concat([grps]*len(by), axis=1)
+    df[by] = grps
+    
+    by = [by] if type(by) is not list else by
+    has_nan = list(set(has_nan).intersection(set(by)))
+    df[has_nan] = df[has_nan].replace('bad', np.nan)
+    
+    grouped = df.groupby(by=by, dropna=dropna)
+    result = getattr(grouped, method)(limit=limit)
+    
+    if dropna and (len(has_nan) > 0):
+        ridx[7] = None
+    lim = 2 if limit is None else limit
+    
+    ridx = getattr(ridx, method)(limit=lim)
+    expected = rows.reindex(ridx).reset_index(drop=True)
+
+    tm.assert_frame_equal(result, expected)
+
+
+
 @pytest.mark.parametrize("func", [np.any, np.all])
 def test_any_all_np_func(func):
     # GH 20653

From edd10e540006762c9ea852b4820049725eacdb16 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Thu, 1 Oct 2020 22:25:16 +0100
Subject: [PATCH 04/13] clean up func. comments on test

---
 pandas/core/groupby/groupby.py                | 23 +++---
 .../tests/groupby/transform/test_transform.py | 77 ++++++++++++-------
 2 files changed, 62 insertions(+), 38 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 6cd7785b66d7f..3edc66792be7b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1847,18 +1847,17 @@ def _fill(self, direction, limit=None):
         if limit is None:
             limit = -1
 
-        def _post(values, inferences):
+        def _nan_group_gets_nan_values(values, *args):
             if not self.dropna:
                 return values
-            mask = DataFrame(self.grouper.codes).eq(-1).any()
-            if mask.any():
-                try:
-                    values[mask] = np.nan
-                except ValueError:
-                    values[mask] = np.datetime64('NaT') 
-                
+            in_nan_group = DataFrame(self.grouper.codes).eq(-1).any()
+            if in_nan_group.any():
+                filler = {np.datetime64: np.datetime64("NaT")}.get(
+                    values.dtype.type, np.nan
+                )
+                values[in_nan_group] = filler
             return values
-        
+
         res = self._get_cythonized_result(
             "group_fillna_indexer",
             numeric_only=False,
@@ -1867,9 +1866,9 @@ def _post(values, inferences):
             result_is_index=True,
             direction=direction,
             limit=limit,
-            post_processing=_post,
+            post_processing=_nan_group_gets_nan_values,
         )
-        
+
         return res
 
     @Substitution(name="groupby")
@@ -2605,7 +2604,7 @@ def _get_cythonized_result(
         # error_msg is "" on an frame/series with no rows or columns
         if len(output) == 0 and error_msg != "":
             raise TypeError(error_msg)
-        
+
         if aggregate:
             return self._wrap_aggregated_output(output, index=self.grouper.result_index)
         else:
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index c8314f5d6d14d..77e9f9e1efe3f 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -977,51 +977,76 @@ def test_ffill_bfill_non_unique_multilevel(func, expected_status):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize('dropna', [True, False])
-@pytest.mark.parametrize('limit', [None, 1])
-@pytest.mark.parametrize('method', ['ffill', 'bfill', 'pad'])
-@pytest.mark.parametrize('by', ['grp1', ['grp1'], ['grp1', 'grp2']])
-@pytest.mark.parametrize('has_nan', [[], ['grp1'], ['grp1', 'grp2']])
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("limit", [None, 1])
+@pytest.mark.parametrize("method", ["ffill", "bfill", "pad", "backfill"])
+@pytest.mark.parametrize("by", ["grp1", ["grp1"], ["grp1", "grp2"]])
+@pytest.mark.parametrize("has_nan", [[], ["grp1"], ["grp1", "grp2"]])
 def test_pad_handles_nan_groups(dropna, limit, method, by, has_nan):
-    
-    rows = pd.DataFrame({
-        'int' : pd.array([1,2], dtype='Int64'),
-        'float' : [.1, .2], 
-        'bool' : pd.array([True, False], dtype='bool'),
-        'date' : [pd.Timestamp(2010,1,1), pd.Timestamp(2020,2,2)],
-        'period' : pd.array([pd.Period('2010-01'), pd.Period('2020-2')], dtype='period[M]'),
-        'obj' : ['hello', 'world'],
-        'cat' : pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']),
-        })
-
-    ridx = pd.Series([None]*10)
+    # GH 34725
+
+    # Create two rows with many different dytypes. The first row will be in
+    # the 'good' group which never has a nan in the grouping column(s). The
+    # second row will be in the 'bad' grouping which sometimes has a nan in
+    # the group column(s).
+    rows = pd.DataFrame(
+        {
+            "int": pd.array([1, 2], dtype="Int64"),
+            "float": [0.1, 0.2],
+            "bool": pd.array([True, False], dtype="bool"),
+            "date": [pd.Timestamp(2010, 1, 1), pd.Timestamp(2020, 2, 2)],
+            "period": pd.array(
+                [pd.Period("2010-01"), pd.Period("2020-2")], dtype="period[M]"
+            ),
+            "obj": ["hello", "world"],
+            "cat": pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
+        }
+    )
+
+    # Put those rows into a 10-row dataframe at rows 2 and 7. This will
+    # allows us to ffill and bfill the rows and confirm that our method is
+    # behaving as expected
+    ridx = pd.Series([None] * 10)
     ridx[2] = 0
     ridx[7] = 1
     df = rows.reindex(ridx).reset_index(drop=True)
-    
-    grps = pd.Series(['good']*5 + ['bad']*5)
+
+    # Add the grouping column(s).
+    grps = pd.Series(["good"] * 5 + ["bad"] * 5)
     if type(by) is list:
-        grps = pd.concat([grps]*len(by), axis=1)
+        grps = pd.concat([grps] * len(by), axis=1)
     df[by] = grps
-    
+
+    # Our 'has_nan' arg sometimes lists more columns than we are actually
+    # grouping by (our 'by' arg), i.e. has_nan=['grp1', 'grp2'] when
+    # by=['grp1']. We can just reduce 'has_nan' to its intersection with 'by'.
     by = [by] if type(by) is not list else by
     has_nan = list(set(has_nan).intersection(set(by)))
-    df[has_nan] = df[has_nan].replace('bad', np.nan)
-    
+
+    # For the colunms that are in 'has_nan' replace 'bad' with 'nan'
+    df[has_nan] = df[has_nan].replace("bad", np.nan)
+
     grouped = df.groupby(by=by, dropna=dropna)
     result = getattr(grouped, method)(limit=limit)
-    
+
+    # If dropna=True and 'bad' has been replaced by 'nan', then the second
+    # 5 rows will all be nan, which is what we want: the nan group contains
+    # only nan values
     if dropna and (len(has_nan) > 0):
         ridx[7] = None
+
+    # To get our expected/benchmark output, we ffill/bfill the rows directly
+    # (not via a groupby), so we don't want limit=None for this part. With 5
+    # rows per group and the value rows in positions 2&7, we ffill/bfill
+    #  with limit=2. If we use limit=None rows 2&7 will ffill/bfill into the
+    # other group
     lim = 2 if limit is None else limit
-    
     ridx = getattr(ridx, method)(limit=lim)
     expected = rows.reindex(ridx).reset_index(drop=True)
 
     tm.assert_frame_equal(result, expected)
 
 
-
 @pytest.mark.parametrize("func", [np.any, np.all])
 def test_any_all_np_func(func):
     # GH 20653

From 8447d264b40e9d383803fdeb71dedef97dd4fb81 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Thu, 1 Oct 2020 22:31:57 +0100
Subject: [PATCH 05/13] whatsnew

---
 doc/source/whatsnew/v1.1.3.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst
index 15777abcb8084..02c6534328d86 100644
--- a/doc/source/whatsnew/v1.1.3.rst
+++ b/doc/source/whatsnew/v1.1.3.rst
@@ -58,6 +58,7 @@ Bug fixes
 - Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`)
 - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`)
 - Bug in :func:`cut` raising a ``ValueError`` when passed a :class:`Series` of labels with ``ordered=False`` (:issue:`36603`)
+- Bug in :meth:`DataFrameGroupBy.ffill` where a ``NaN`` group would return foward-filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
 
 .. ---------------------------------------------------------------------------
 

From 09a64da645402b26202f2afec1514d58eb26a38b Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Tue, 6 Oct 2020 06:57:57 +0100
Subject: [PATCH 06/13] implemented cython fix in group_fillna_indexer rather
 than post-processing

---
 pandas/_libs/groupby.pyx       |  5 ++++-
 pandas/core/groupby/groupby.py | 13 +------------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index a83634aad3ce2..09d876c55c594 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -344,7 +344,7 @@ def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
 @cython.boundscheck(False)
 def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
                          ndarray[uint8_t] mask, object direction,
-                         int64_t limit):
+                         int64_t limit, bint dropna):
     """
     Indexes how to fill values forwards or backwards within a group.
 
@@ -389,6 +389,9 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
             else:  # reset items when not missing
                 filled_vals = 0
                 curr_fill_idx = idx
+            
+            if dropna and labels[idx] == -1:
+                curr_fill_idx = -1
 
             out[idx] = curr_fill_idx
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 3edc66792be7b..5f1824d552ef8 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1847,17 +1847,6 @@ def _fill(self, direction, limit=None):
         if limit is None:
             limit = -1
 
-        def _nan_group_gets_nan_values(values, *args):
-            if not self.dropna:
-                return values
-            in_nan_group = DataFrame(self.grouper.codes).eq(-1).any()
-            if in_nan_group.any():
-                filler = {np.datetime64: np.datetime64("NaT")}.get(
-                    values.dtype.type, np.nan
-                )
-                values[in_nan_group] = filler
-            return values
-
         res = self._get_cythonized_result(
             "group_fillna_indexer",
             numeric_only=False,
@@ -1866,7 +1855,7 @@ def _nan_group_gets_nan_values(values, *args):
             result_is_index=True,
             direction=direction,
             limit=limit,
-            post_processing=_nan_group_gets_nan_values,
+            dropna=self.dropna
         )
 
         return res

From 694e63cacdcc6b2ec64fe97f91566c7ee2d922b2 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Tue, 6 Oct 2020 07:56:50 +0100
Subject: [PATCH 07/13] simplified test

---
 pandas/core/groupby/groupby.py                |  2 +-
 .../tests/groupby/transform/test_transform.py | 88 ++++++-------------
 2 files changed, 27 insertions(+), 63 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5f1824d552ef8..75c3763461312 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1855,7 +1855,7 @@ def _fill(self, direction, limit=None):
             result_is_index=True,
             direction=direction,
             limit=limit,
-            dropna=self.dropna
+            dropna=self.dropna,
         )
 
         return res
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 77e9f9e1efe3f..45149da3d72c8 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -977,72 +977,36 @@ def test_ffill_bfill_non_unique_multilevel(func, expected_status):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize("method", ["ffill", "bfill"])
 @pytest.mark.parametrize("dropna", [True, False])
-@pytest.mark.parametrize("limit", [None, 1])
-@pytest.mark.parametrize("method", ["ffill", "bfill", "pad", "backfill"])
-@pytest.mark.parametrize("by", ["grp1", ["grp1"], ["grp1", "grp2"]])
-@pytest.mark.parametrize("has_nan", [[], ["grp1"], ["grp1", "grp2"]])
-def test_pad_handles_nan_groups(dropna, limit, method, by, has_nan):
+@pytest.mark.parametrize("has_nan_group", [True, False])
+def test_ffill_handles_nan_groups(dropna, method, has_nan_group):
     # GH 34725
 
-    # Create two rows with many different dytypes. The first row will be in
-    # the 'good' group which never has a nan in the grouping column(s). The
-    # second row will be in the 'bad' grouping which sometimes has a nan in
-    # the group column(s).
-    rows = pd.DataFrame(
-        {
-            "int": pd.array([1, 2], dtype="Int64"),
-            "float": [0.1, 0.2],
-            "bool": pd.array([True, False], dtype="bool"),
-            "date": [pd.Timestamp(2010, 1, 1), pd.Timestamp(2020, 2, 2)],
-            "period": pd.array(
-                [pd.Period("2010-01"), pd.Period("2020-2")], dtype="period[M]"
-            ),
-            "obj": ["hello", "world"],
-            "cat": pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
-        }
-    )
+    df_without_nan_rows = pd.DataFrame([(1, 0.1), (2, 0.2)])
+
+    ridx = [-1, 0, -1, -1, 1, -1]
+    df = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
+
+    group_b = np.nan if has_nan_group else "b"
+    df["group_col"] = pd.Series(["a"] * 3 + [group_b] * 3)
+
+    grouped = df.groupby(by="group_col", dropna=dropna)
+    result = getattr(grouped, method)(limit=None)
+
+    expected_rows = {
+        ("ffill", True, True): [-1, 0, 0, -1, -1, -1],
+        ("ffill", True, False): [-1, 0, 0, -1, 1, 1],
+        ("ffill", False, True): [-1, 0, 0, -1, 1, 1],
+        ("ffill", False, False): [-1, 0, 0, -1, 1, 1],
+        ("bfill", True, True): [0, 0, -1, -1, -1, -1],
+        ("bfill", True, False): [0, 0, -1, 1, 1, -1],
+        ("bfill", False, True): [0, 0, -1, 1, 1, -1],
+        ("bfill", False, False): [0, 0, -1, 1, 1, -1],
+    }
 
-    # Put those rows into a 10-row dataframe at rows 2 and 7. This will
-    # allows us to ffill and bfill the rows and confirm that our method is
-    # behaving as expected
-    ridx = pd.Series([None] * 10)
-    ridx[2] = 0
-    ridx[7] = 1
-    df = rows.reindex(ridx).reset_index(drop=True)
-
-    # Add the grouping column(s).
-    grps = pd.Series(["good"] * 5 + ["bad"] * 5)
-    if type(by) is list:
-        grps = pd.concat([grps] * len(by), axis=1)
-    df[by] = grps
-
-    # Our 'has_nan' arg sometimes lists more columns than we are actually
-    # grouping by (our 'by' arg), i.e. has_nan=['grp1', 'grp2'] when
-    # by=['grp1']. We can just reduce 'has_nan' to its intersection with 'by'.
-    by = [by] if type(by) is not list else by
-    has_nan = list(set(has_nan).intersection(set(by)))
-
-    # For the colunms that are in 'has_nan' replace 'bad' with 'nan'
-    df[has_nan] = df[has_nan].replace("bad", np.nan)
-
-    grouped = df.groupby(by=by, dropna=dropna)
-    result = getattr(grouped, method)(limit=limit)
-
-    # If dropna=True and 'bad' has been replaced by 'nan', then the second
-    # 5 rows will all be nan, which is what we want: the nan group contains
-    # only nan values
-    if dropna and (len(has_nan) > 0):
-        ridx[7] = None
-
-    # To get our expected/benchmark output, we ffill/bfill the rows directly
-    # (not via a groupby), so we don't want limit=None for this part. With 5
-    # rows per group and the value rows in positions 2&7, we ffill/bfill
-    #  with limit=2. If we use limit=None rows 2&7 will ffill/bfill into the
-    # other group
-    lim = 2 if limit is None else limit
-    ridx = getattr(ridx, method)(limit=lim)
-    expected = rows.reindex(ridx).reset_index(drop=True)
+    ridx = expected_rows.get((method, dropna, has_nan_group))
+    expected = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
 
     tm.assert_frame_equal(result, expected)
 

From bb886099edb3c5e850c1474fbcb642d881b9fd36 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Tue, 6 Oct 2020 08:01:11 +0100
Subject: [PATCH 08/13] whatsnew and docstring

---
 doc/source/whatsnew/v1.1.3.rst | 1 -
 doc/source/whatsnew/v1.2.0.rst | 1 +
 pandas/_libs/groupby.pyx       | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst
index 02c6534328d86..15777abcb8084 100644
--- a/doc/source/whatsnew/v1.1.3.rst
+++ b/doc/source/whatsnew/v1.1.3.rst
@@ -58,7 +58,6 @@ Bug fixes
 - Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`)
 - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`)
 - Bug in :func:`cut` raising a ``ValueError`` when passed a :class:`Series` of labels with ``ordered=False`` (:issue:`36603`)
-- Bug in :meth:`DataFrameGroupBy.ffill` where a ``NaN`` group would return foward-filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 016e8d90e7d21..6d338e6717035 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -404,6 +404,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
 - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
 - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`)
+- Bug in :meth:`DataFrameGroupBy.ffill` where a ``NaN`` group would return foward-filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 09d876c55c594..17a557818ee00 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -358,6 +358,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
     direction : {'ffill', 'bfill'}
         Direction for fill to be applied (forwards or backwards, respectively)
     limit : Consecutive values to fill before stopping, or -1 for no limit
+    dropna : Flag to indicate if NaN groups should return all NaN values
 
     Notes
     -----

From 11c4c9a12ed2a5a08c551a33e5adb17b2f73e846 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Tue, 6 Oct 2020 08:03:36 +0100
Subject: [PATCH 09/13] cleaning up

---
 pandas/core/groupby/groupby.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 75c3763461312..b8e7af18e0f2b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1847,7 +1847,7 @@ def _fill(self, direction, limit=None):
         if limit is None:
             limit = -1
 
-        res = self._get_cythonized_result(
+        return self._get_cythonized_result(
             "group_fillna_indexer",
             numeric_only=False,
             needs_mask=True,
@@ -1858,8 +1858,6 @@ def _fill(self, direction, limit=None):
             dropna=self.dropna,
         )
 
-        return res
-
     @Substitution(name="groupby")
     def pad(self, limit=None):
         """

From 6f4fa32aedbfc3e9942b4ad326ff1f61bceed3f3 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Tue, 6 Oct 2020 09:29:04 +0100
Subject: [PATCH 10/13] whitespace

---
 pandas/_libs/groupby.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 17a557818ee00..386267422fd0b 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -390,7 +390,6 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
             else:  # reset items when not missing
                 filled_vals = 0
                 curr_fill_idx = idx
-            
             if dropna and labels[idx] == -1:
                 curr_fill_idx = -1
 

From 3fcce3bb5a16827d1fd184d12b3c077c861ed6ef Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 7 Oct 2020 14:45:01 +0100
Subject: [PATCH 11/13] addressing comments

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 pandas/_libs/groupby.pyx       | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 03fd019447925..673d656c730ab 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -439,7 +439,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`)
 - Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`)
 - Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`)
-- Bug in :meth:`DataFrameGroupBy.ffill` where a ``NaN`` group would return foward-filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
+- Bug in :meth:`DataFrameGroupBy.ffill` and :meth:`DataFrameGroupBy.bfill` where a ``NaN`` group would return filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 386267422fd0b..1c887135ced0e 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -382,7 +382,9 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
     with nogil:
         for i in range(N):
             idx = sorted_labels[i]
-            if mask[idx] == 1:  # is missing
+            if dropna and labels[idx] == -1: # nan-group gets nan-values
+                curr_fill_idx = -1
+            elif mask[idx] == 1:  # is missing
                 # Stop filling once we've hit the limit
                 if filled_vals >= limit and limit != -1:
                     curr_fill_idx = -1
@@ -390,8 +392,6 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
             else:  # reset items when not missing
                 filled_vals = 0
                 curr_fill_idx = idx
-            if dropna and labels[idx] == -1:
-                curr_fill_idx = -1
 
             out[idx] = curr_fill_idx
 

From 434cef59db0ac8a355ce45508ec9dcec3573c739 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 7 Oct 2020 15:10:01 +0100
Subject: [PATCH 12/13] fixing pre-commit check

---
 pandas/_libs/groupby.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 1c887135ced0e..5a958d5e0bd3c 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -382,7 +382,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
     with nogil:
         for i in range(N):
             idx = sorted_labels[i]
-            if dropna and labels[idx] == -1: # nan-group gets nan-values
+            if dropna and labels[idx] == -1:  # nan-group gets nan-values
                 curr_fill_idx = -1
             elif mask[idx] == 1:  # is missing
                 # Stop filling once we've hit the limit

From 84abae0c43b8ebea3c1584a71a2e6a7ae9fd8e79 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Thu, 8 Oct 2020 10:41:13 +0100
Subject: [PATCH 13/13] move test to test_missing,py

---
 pandas/tests/groupby/test_missing.py          | 34 +++++++++++++++++++
 .../tests/groupby/transform/test_transform.py | 34 -------------------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py
index 116aed9935694..70d8dfc20822a 100644
--- a/pandas/tests/groupby/test_missing.py
+++ b/pandas/tests/groupby/test_missing.py
@@ -82,3 +82,37 @@ def test_fill_consistency():
     expected = df.groupby(level=0, axis=0).fillna(method="ffill")
     result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("method", ["ffill", "bfill"])
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("has_nan_group", [True, False])
+def test_ffill_handles_nan_groups(dropna, method, has_nan_group):
+    # GH 34725
+
+    df_without_nan_rows = pd.DataFrame([(1, 0.1), (2, 0.2)])
+
+    ridx = [-1, 0, -1, -1, 1, -1]
+    df = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
+
+    group_b = np.nan if has_nan_group else "b"
+    df["group_col"] = pd.Series(["a"] * 3 + [group_b] * 3)
+
+    grouped = df.groupby(by="group_col", dropna=dropna)
+    result = getattr(grouped, method)(limit=None)
+
+    expected_rows = {
+        ("ffill", True, True): [-1, 0, 0, -1, -1, -1],
+        ("ffill", True, False): [-1, 0, 0, -1, 1, 1],
+        ("ffill", False, True): [-1, 0, 0, -1, 1, 1],
+        ("ffill", False, False): [-1, 0, 0, -1, 1, 1],
+        ("bfill", True, True): [0, 0, -1, -1, -1, -1],
+        ("bfill", True, False): [0, 0, -1, 1, 1, -1],
+        ("bfill", False, True): [0, 0, -1, 1, 1, -1],
+        ("bfill", False, False): [0, 0, -1, 1, 1, -1],
+    }
+
+    ridx = expected_rows.get((method, dropna, has_nan_group))
+    expected = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 45149da3d72c8..97be039e16ebb 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -977,40 +977,6 @@ def test_ffill_bfill_non_unique_multilevel(func, expected_status):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("method", ["ffill", "bfill"])
-@pytest.mark.parametrize("dropna", [True, False])
-@pytest.mark.parametrize("has_nan_group", [True, False])
-def test_ffill_handles_nan_groups(dropna, method, has_nan_group):
-    # GH 34725
-
-    df_without_nan_rows = pd.DataFrame([(1, 0.1), (2, 0.2)])
-
-    ridx = [-1, 0, -1, -1, 1, -1]
-    df = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
-
-    group_b = np.nan if has_nan_group else "b"
-    df["group_col"] = pd.Series(["a"] * 3 + [group_b] * 3)
-
-    grouped = df.groupby(by="group_col", dropna=dropna)
-    result = getattr(grouped, method)(limit=None)
-
-    expected_rows = {
-        ("ffill", True, True): [-1, 0, 0, -1, -1, -1],
-        ("ffill", True, False): [-1, 0, 0, -1, 1, 1],
-        ("ffill", False, True): [-1, 0, 0, -1, 1, 1],
-        ("ffill", False, False): [-1, 0, 0, -1, 1, 1],
-        ("bfill", True, True): [0, 0, -1, -1, -1, -1],
-        ("bfill", True, False): [0, 0, -1, 1, 1, -1],
-        ("bfill", False, True): [0, 0, -1, 1, 1, -1],
-        ("bfill", False, False): [0, 0, -1, 1, 1, -1],
-    }
-
-    ridx = expected_rows.get((method, dropna, has_nan_group))
-    expected = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
-
-    tm.assert_frame_equal(result, expected)
-
-
 @pytest.mark.parametrize("func", [np.any, np.all])
 def test_any_all_np_func(func):
     # GH 20653