From 42e864c525dfb4a0dd8e46b32ce3a53907b54fc3 Mon Sep 17 00:00:00 2001
From: Anna Daglis <ana.daglis@farfetch.com>
Date: Thu, 21 Jan 2021 18:23:55 +0000
Subject: [PATCH 1/4] BUG: SeriesGroupBy.value_counts raising error on an empty
 series

---
 doc/source/whatsnew/v1.3.0.rst            |  1 +
 pandas/core/groupby/generic.py            |  4 ++
 pandas/tests/groupby/test_value_counts.py | 87 ++++++++++++++---------
 3 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index cbbba84da6ae6..973f95ebb3510 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -340,6 +340,7 @@ Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug in :meth:`SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical series were not tallied (:issue:`38672`)
+- Bug in :meth:`SeriesGroupBy.value_counts` where error was raised on an empty series (:issue:`39172`)
 - Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`)
 - Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`)
 - Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 50dbfe2596a77..ec59cdb475da9 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -731,10 +731,14 @@ def apply_series_value_counts():
 
         # group boundaries are where group ids change
         idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
+        if len(ids) == 0:
+            idx = ids
 
         # new values are where sorted labels change
         lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
         inc = np.r_[True, lchanges]
+        if len(lchanges) == 0:
+            inc = lchanges
         inc[idx] = True  # group boundaries are also new values
         out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts
 
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index afb648d8527ca..881073f116d2a 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -59,39 +59,39 @@ def seed_df(seed_nans, n, m):
             ids.append(f"{k}-{n}-{m}")
 
 
-@pytest.mark.slow
-@pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids)
-@pytest.mark.parametrize("isort", [True, False])
-@pytest.mark.parametrize("normalize", [True, False])
-@pytest.mark.parametrize("sort", [True, False])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("dropna", [True, False])
-def test_series_groupby_value_counts(
-    df, keys, bins, n, m, isort, normalize, sort, ascending, dropna
-):
-    def rebuild_index(df):
-        arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
-        df.index = MultiIndex.from_arrays(arr, names=df.index.names)
-        return df
-
-    kwargs = {
-        "normalize": normalize,
-        "sort": sort,
-        "ascending": ascending,
-        "dropna": dropna,
-        "bins": bins,
-    }
-
-    gr = df.groupby(keys, sort=isort)
-    left = gr["3rd"].value_counts(**kwargs)
-
-    gr = df.groupby(keys, sort=isort)
-    right = gr["3rd"].apply(Series.value_counts, **kwargs)
-    right.index.names = right.index.names[:-1] + ["3rd"]
-
-    # have to sort on index because of unstable sort on values
-    left, right = map(rebuild_index, (left, right))  # xref GH9212
-    tm.assert_series_equal(left.sort_index(), right.sort_index())
+# @pytest.mark.slow
+# @pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids)
+# @pytest.mark.parametrize("isort", [True, False])
+# @pytest.mark.parametrize("normalize", [True, False])
+# @pytest.mark.parametrize("sort", [True, False])
+# @pytest.mark.parametrize("ascending", [True, False])
+# @pytest.mark.parametrize("dropna", [True, False])
+# def test_series_groupby_value_counts(
+#     df, keys, bins, n, m, isort, normalize, sort, ascending, dropna
+# ):
+#     def rebuild_index(df):
+#         arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
+#         df.index = MultiIndex.from_arrays(arr, names=df.index.names)
+#         return df
+#
+#     kwargs = {
+#         "normalize": normalize,
+#         "sort": sort,
+#         "ascending": ascending,
+#         "dropna": dropna,
+#         "bins": bins,
+#     }
+#
+#     gr = df.groupby(keys, sort=isort)
+#     left = gr["3rd"].value_counts(**kwargs)
+#
+#     gr = df.groupby(keys, sort=isort)
+#     right = gr["3rd"].apply(Series.value_counts, **kwargs)
+#     right.index.names = right.index.names[:-1] + ["3rd"]
+#
+#     # have to sort on index because of unstable sort on values
+#     left, right = map(rebuild_index, (left, right))  # xref GH9212
+#     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
 def test_series_groupby_value_counts_with_grouper():
@@ -122,6 +122,27 @@ def test_series_groupby_value_counts_with_grouper():
     tm.assert_series_equal(result, expected)
 
 
+def test_series_groupby_value_counts_empty():
+    # GH39172
+    df = DataFrame(columns=["A", "B"])
+    dfg = df.groupby("A")
+
+    result = dfg["B"].value_counts()
+    expected = Series([], name="B", dtype=result.dtype)
+    expected.index = MultiIndex.from_arrays([[]] * 2, names=["A", "B"])
+
+    tm.assert_series_equal(result, expected)
+
+    df = DataFrame(columns=["A", "B", "C"])
+    dfg = df.groupby(["A", "B"])
+
+    result = dfg["C"].value_counts()
+    expected = Series([], name="C", dtype=result.dtype)
+    expected.index = MultiIndex.from_arrays([[]] * 3, names=["A", "B", "C"])
+
+    tm.assert_series_equal(result, expected)
+
+
 def test_series_groupby_value_counts_on_categorical():
     # GH38672
 

From 466a100eeb73f8085da1b96c495f173dce150ec9 Mon Sep 17 00:00:00 2001
From: Anna Daglis <ana.daglis@farfetch.com>
Date: Thu, 21 Jan 2021 19:05:08 +0000
Subject: [PATCH 2/4] Fix bug

---
 pandas/tests/groupby/test_value_counts.py | 66 +++++++++++------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 881073f116d2a..8bb07b7163f2e 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -59,39 +59,39 @@ def seed_df(seed_nans, n, m):
             ids.append(f"{k}-{n}-{m}")
 
 
-# @pytest.mark.slow
-# @pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids)
-# @pytest.mark.parametrize("isort", [True, False])
-# @pytest.mark.parametrize("normalize", [True, False])
-# @pytest.mark.parametrize("sort", [True, False])
-# @pytest.mark.parametrize("ascending", [True, False])
-# @pytest.mark.parametrize("dropna", [True, False])
-# def test_series_groupby_value_counts(
-#     df, keys, bins, n, m, isort, normalize, sort, ascending, dropna
-# ):
-#     def rebuild_index(df):
-#         arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
-#         df.index = MultiIndex.from_arrays(arr, names=df.index.names)
-#         return df
-#
-#     kwargs = {
-#         "normalize": normalize,
-#         "sort": sort,
-#         "ascending": ascending,
-#         "dropna": dropna,
-#         "bins": bins,
-#     }
-#
-#     gr = df.groupby(keys, sort=isort)
-#     left = gr["3rd"].value_counts(**kwargs)
-#
-#     gr = df.groupby(keys, sort=isort)
-#     right = gr["3rd"].apply(Series.value_counts, **kwargs)
-#     right.index.names = right.index.names[:-1] + ["3rd"]
-#
-#     # have to sort on index because of unstable sort on values
-#     left, right = map(rebuild_index, (left, right))  # xref GH9212
-#     tm.assert_series_equal(left.sort_index(), right.sort_index())
+@pytest.mark.slow
+@pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids)
+@pytest.mark.parametrize("isort", [True, False])
+@pytest.mark.parametrize("normalize", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("dropna", [True, False])
+def test_series_groupby_value_counts(
+    df, keys, bins, n, m, isort, normalize, sort, ascending, dropna
+):
+    def rebuild_index(df):
+        arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
+        df.index = MultiIndex.from_arrays(arr, names=df.index.names)
+        return df
+
+    kwargs = {
+        "normalize": normalize,
+        "sort": sort,
+        "ascending": ascending,
+        "dropna": dropna,
+        "bins": bins,
+    }
+
+    gr = df.groupby(keys, sort=isort)
+    left = gr["3rd"].value_counts(**kwargs)
+
+    gr = df.groupby(keys, sort=isort)
+    right = gr["3rd"].apply(Series.value_counts, **kwargs)
+    right.index.names = right.index.names[:-1] + ["3rd"]
+
+    # have to sort on index because of unstable sort on values
+    left, right = map(rebuild_index, (left, right))  # xref GH9212
+    tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
 def test_series_groupby_value_counts_with_grouper():

From a16f99d8127e421db3e5558e814385fbcdc9dad6 Mon Sep 17 00:00:00 2001
From: Anna Daglis <ana.daglis@farfetch.com>
Date: Thu, 21 Jan 2021 20:40:00 +0000
Subject: [PATCH 3/4] Trigger


From ee9ac9bb8b8e992ea9bf0905314a7eaa2fb870be Mon Sep 17 00:00:00 2001
From: Anna Daglis <ana.daglis@farfetch.com>
Date: Fri, 22 Jan 2021 19:39:02 +0000
Subject: [PATCH 4/4] Make sure the data type of idx is correct

---
 pandas/core/groupby/generic.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ec59cdb475da9..812b1470be9a7 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -730,14 +730,15 @@ def apply_series_value_counts():
         ids, lab = ids[sorter], lab[sorter]
 
         # group boundaries are where group ids change
-        idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
-        if len(ids) == 0:
-            idx = ids
+        idchanges = 1 + np.nonzero(ids[1:] != ids[:-1])[0]
+        idx = np.r_[0, idchanges]
+        if not len(ids):
+            idx = idchanges
 
         # new values are where sorted labels change
         lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
         inc = np.r_[True, lchanges]
-        if len(lchanges) == 0:
+        if not len(lchanges):
             inc = lchanges
         inc[idx] = True  # group boundaries are also new values
         out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts