Skip to content

Commit da9169d

Browse files
committed
REF: Compute correct result_index upfront in groupby
1 parent 32b8cd2 commit da9169d

File tree

13 files changed

+259
-390
lines changed

13 files changed

+259
-390
lines changed

pandas/core/groupby/generic.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,6 @@ def _wrap_applied_output(
416416
# GH #823 #24880
417417
index = self.grouper.result_index
418418
res_df = self.obj._constructor_expanddim(values, index=index)
419-
res_df = self._reindex_output(res_df)
420419
# if self.observed is False,
421420
# keep all-NaN rows created while re-indexing
422421
res_ser = res_df.stack(future_stack=True)
@@ -442,7 +441,7 @@ def _wrap_applied_output(
442441
if not self.as_index:
443442
result = self._insert_inaxis_grouper(result)
444443
result.index = default_index(len(result))
445-
return self._reindex_output(result)
444+
return result
446445

447446
def _aggregate_named(self, func, *args, **kwargs):
448447
# Note: this is very similar to _aggregate_series_pure_python,
@@ -672,7 +671,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
672671
2023-02-01 1
673672
Freq: MS, dtype: int64
674673
"""
675-
ids, _, _ = self.grouper.group_info
674+
ids, _ = self.grouper.group_info
676675

677676
val = self.obj._values
678677

@@ -721,7 +720,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
721720
if not self.as_index:
722721
result = self._insert_inaxis_grouper(result)
723722
result.index = default_index(len(result))
724-
return self._reindex_output(result, fill_value=0)
723+
return result
725724

726725
@doc(Series.describe)
727726
def describe(self, percentiles=None, include=None, exclude=None) -> Series:
@@ -749,7 +748,7 @@ def value_counts(
749748
from pandas.core.reshape.merge import get_join_indexers
750749
from pandas.core.reshape.tile import cut
751750

752-
ids, _, _ = self.grouper.group_info
751+
ids, _ = self.grouper.group_info
753752
val = self.obj._values
754753

755754
index_names = self.grouper.names + [self.obj.name]
@@ -819,9 +818,18 @@ def value_counts(
819818
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
820819

821820
# multi-index components
822-
codes = self.grouper.reconstructed_codes
821+
if isinstance(self.grouper.result_index, MultiIndex):
822+
codes = list(self.grouper.result_index.codes)
823+
else:
824+
codes = [
825+
algorithms.factorize(
826+
self.grouper.result_index,
827+
sort=self.grouper._sort,
828+
use_na_sentinel=self.grouper.dropna,
829+
)[0]
830+
]
823831
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
824-
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
832+
levels = self.grouper.levels + [lev]
825833

826834
if dropna:
827835
mask = codes[-1] != -1
@@ -1686,7 +1694,7 @@ def _wrap_applied_output_series(
16861694
if not self.as_index:
16871695
result = self._insert_inaxis_grouper(result)
16881696

1689-
return self._reindex_output(result)
1697+
return result
16901698

16911699
def _cython_transform(
16921700
self,

0 commit comments

Comments
 (0)