Skip to content

Commit c3f9055

Browse files
committed
COMPAT: Pandas 0.23 duplicate names in MI
Pandas 0.23 is disallowing duplicate names in MultiIndexes. This adjusts a test that relied on that behavior, and `groupby().nunique` which produced it as a by-product. Closes dask#3039 xref pandas-dev/pandas#18882
1 parent a165346 commit c3f9055

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

dask/dataframe/groupby.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,18 @@ def _nunique_df_chunk(df, *index, **kwargs):
289289
return grouped
290290

291291

292+
def _drop_duplicates_rename(df):
293+
# Avoid duplicate index labels in a groupby().apply() context
294+
# https://github.com/dask/dask/issues/3039
295+
# https://github.com/pandas-dev/pandas/pull/18882
296+
names = [None] * df.index.nlevels
297+
result = df.drop_duplicates().rename_axis(names)
298+
return result
299+
300+
292301
def _nunique_df_combine(df, levels):
293-
result = df.groupby(level=levels, sort=False).apply(pd.DataFrame.drop_duplicates)
302+
result = df.groupby(level=levels,
303+
sort=False).apply(_drop_duplicates_rename)
294304

295305
if isinstance(levels, list):
296306
result.index = pd.MultiIndex.from_arrays([

dask/dataframe/tests/test_groupby.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@ def test_groupby_on_index(get):
160160
def func(df):
161161
return df.assign(b=df.b - df.b.mean())
162162

163+
def func2(df):
164+
return df[['b']] - df[['b']].mean()
165+
163166
with dask.set_options(get=get):
164167
with pytest.warns(None):
165168
assert_eq(ddf.groupby('a').apply(func),
@@ -168,8 +171,8 @@ def func(df):
168171
assert_eq(ddf.groupby('a').apply(func).set_index('a'),
169172
pdf.groupby('a').apply(func).set_index('a'))
170173

171-
assert_eq(pdf2.groupby(pdf2.index).apply(func),
172-
ddf2.groupby(ddf2.index).apply(func))
174+
assert_eq(pdf2.groupby(pdf2.index).apply(func2),
175+
ddf2.groupby(ddf2.index).apply(func2))
173176

174177

175178
def test_groupby_multilevel_getitem():

0 commit comments

Comments
 (0)