Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,11 +777,7 @@ def apply_series_value_counts():
# multi-index components
codes = self.grouper.reconstructed_codes
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
# error: List item 0 has incompatible type "Union[ndarray, Any]";
# expected "Index"
levels = [ping.group_index for ping in self.grouper.groupings] + [
lev # type: ignore[list-item]
]
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
names = self.grouper.names + [self._selection_name]

if dropna:
Expand Down
93 changes: 60 additions & 33 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,9 @@ class Grouping:
* groups : dict of {group -> label_list}
"""

_codes: np.ndarray | None = None
_group_index: Index | None = None

def __init__(
self,
index: Index,
Expand All @@ -462,6 +465,8 @@ def __init__(
self.in_axis = in_axis
self.dropna = dropna

self._passed_categorical = False

# right place for this?
if isinstance(grouper, (Series, Index)) and name is None:
self.name = grouper.name
Expand All @@ -472,20 +477,16 @@ def __init__(
# we have a single grouper which may be a myriad of things,
# some of which are dependent on the passing in level

if level is not None:
if not isinstance(level, int):
if level not in index.names:
raise AssertionError(f"Level {level} not in index")
level = index.names.index(level)

ilevel = self._ilevel
if ilevel is not None:
if self.name is None:
self.name = index.names[level]
self.name = index.names[ilevel]

(
self.grouper,
self.grouper, # Index
self._codes,
self._group_index,
) = index._get_grouper_for_level(self.grouper, level)
) = index._get_grouper_for_level(self.grouper, ilevel)

# a passed Grouper like, directly get the grouper in the same way
# as single grouper groupby, use the group_info to get codes
Expand All @@ -509,37 +510,24 @@ def __init__(
if self.grouper is None and self.name is not None and self.obj is not None:
self.grouper = self.obj[self.name]

if self.grouper.ndim > 1:
# i.e. DataFrame case reachable if columns non-unique
t = self.name or str(type(self.grouper))
raise ValueError(f"Grouper for '{t}' not 1-dimensional")

elif isinstance(self.grouper, (list, tuple)):
self.grouper = com.asarray_tuplesafe(self.grouper)

# a passed Categorical
elif is_categorical_dtype(self.grouper):
self._passed_categorical = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why don't you just make this a property of the grouper itself?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think what you're suggesting is something like

@property
def _passed_categorical(self):
    return is_categorical_dtype(self.grouper)

that is the longer-term goal, but ATM we treat directly-passed Categoricals differently from Categoricals derived within __init__, so that property wouldn't be quite the same as this variable. I expect that the different treatment is non-intentional, but need to track that down to be sure.


self.grouper, self.all_grouper = recode_for_groupby(
self.grouper, self.sort, observed
)
categories = self.grouper.categories

# we make a CategoricalIndex out of the cat grouper
# preserving the categories / ordered attributes
self._codes = self.grouper.codes
if observed:
codes = algorithms.unique1d(self.grouper.codes)
codes = codes[codes != -1]
if sort or self.grouper.ordered:
codes = np.sort(codes)
else:
codes = np.arange(len(categories))

self._group_index = CategoricalIndex(
Categorical.from_codes(
codes=codes, categories=categories, ordered=self.grouper.ordered
),
name=self.name,
)

# we are done
if isinstance(self.grouper, Grouping):
elif isinstance(self.grouper, Grouping):
self.grouper = self.grouper.grouper

# no level passed
Expand Down Expand Up @@ -577,8 +565,20 @@ def __repr__(self) -> str:
def __iter__(self):
return iter(self.indices)

_codes: np.ndarray | None = None
_group_index: Index | None = None
@cache_readonly
def _ilevel(self) -> int | None:
"""
If necessary, converted index level name to index level position.
"""
level = self.level
if level is None:
return None
if not isinstance(level, int):
index = self.index
if level not in index.names:
raise AssertionError(f"Level {level} not in index")
return index.names.index(level)
return level

@property
def ngroups(self) -> int:
Expand All @@ -595,6 +595,12 @@ def indices(self):

@property
def codes(self) -> np.ndarray:
if self._passed_categorical:
# we make a CategoricalIndex out of the cat grouper
# preserving the categories / ordered attributes
cat = self.grouper
return cat.codes

if self._codes is None:
self._make_codes()
# error: Incompatible return value type (got "Optional[ndarray]",
Expand All @@ -605,12 +611,33 @@ def codes(self) -> np.ndarray:
def result_index(self) -> Index:
if self.all_grouper is not None:
group_idx = self.group_index
assert isinstance(group_idx, CategoricalIndex) # set in __init__
assert isinstance(group_idx, CategoricalIndex)
return recode_from_groupby(self.all_grouper, self.sort, group_idx)
return self.group_index

@property
@cache_readonly
def group_index(self) -> Index:
if self._passed_categorical:
# we make a CategoricalIndex out of the cat grouper
# preserving the categories / ordered attributes
cat = self.grouper
categories = cat.categories

if self.observed:
codes = algorithms.unique1d(cat.codes)
codes = codes[codes != -1]
if self.sort or cat.ordered:
codes = np.sort(codes)
else:
codes = np.arange(len(categories))

return CategoricalIndex(
Categorical.from_codes(
codes=codes, categories=categories, ordered=cat.ordered
),
name=self.name,
)

if self._group_index is None:
self._make_codes()
assert self._group_index is not None
Expand Down