1010import numpy as np
1111
1212from pandas ._typing import (
13+ ArrayLike ,
1314 FrameOrSeries ,
1415 final ,
1516)
@@ -587,20 +588,23 @@ def indices(self):
587588
588589 @property
589590 def codes (self ) -> np .ndarray :
590- if self ._passed_categorical :
591- # we make a CategoricalIndex out of the cat grouper
592- # preserving the categories / ordered attributes
593- cat = self .grouper
594- return cat .codes
591+ if self ._codes is not None :
592+ # _codes is set in __init__ for MultiIndex cases
593+ return self ._codes
595594
596- if self ._codes is None :
597- self ._make_codes ()
598- # error: Incompatible return value type (got "Optional[ndarray]",
599- # expected "ndarray")
600- return self ._codes # type: ignore[return-value]
595+ return self ._codes_and_uniques [0 ]
596+
597+ @cache_readonly
598+ def group_arraylike (self ) -> ArrayLike :
599+ """
600+ Analogous to result_index, but holding an ArrayLike to ensure
601+ we can can retain ExtensionDtypes.
602+ """
603+ return self ._codes_and_uniques [1 ]
601604
602605 @cache_readonly
603606 def result_index (self ) -> Index :
607+ # TODO: what's the difference between result_index vs group_index?
604608 if self .all_grouper is not None :
605609 group_idx = self .group_index
606610 assert isinstance (group_idx , CategoricalIndex )
@@ -609,40 +613,37 @@ def result_index(self) -> Index:
609613
610614 @cache_readonly
611615 def group_index (self ) -> Index :
616+ if self ._group_index is not None :
617+ # _group_index is set in __init__ for MultiIndex cases
618+ return self ._group_index
619+ uniques = self .group_arraylike
620+ return Index (uniques , name = self .name )
621+
622+ @cache_readonly
623+ def _codes_and_uniques (self ) -> tuple [np .ndarray , ArrayLike ]:
612624 if self ._passed_categorical :
613625 # we make a CategoricalIndex out of the cat grouper
614626 # preserving the categories / ordered attributes
615627 cat = self .grouper
616628 categories = cat .categories
617629
618630 if self .observed :
619- codes = algorithms .unique1d (cat .codes )
620- codes = codes [ codes != - 1 ]
631+ ucodes = algorithms .unique1d (cat .codes )
632+ ucodes = ucodes [ ucodes != - 1 ]
621633 if self .sort or cat .ordered :
622- codes = np .sort (codes )
634+ ucodes = np .sort (ucodes )
623635 else :
624- codes = np .arange (len (categories ))
636+ ucodes = np .arange (len (categories ))
625637
626- return CategoricalIndex (
627- Categorical .from_codes (
628- codes = codes , categories = categories , ordered = cat .ordered
629- ),
630- name = self .name ,
638+ uniques = Categorical .from_codes (
639+ codes = ucodes , categories = categories , ordered = cat .ordered
631640 )
641+ return cat .codes , uniques
632642
633- if self ._group_index is None :
634- self ._make_codes ()
635- assert self ._group_index is not None
636- return self ._group_index
637-
638- def _make_codes (self ) -> None :
639- if self ._codes is not None and self ._group_index is not None :
640- return
641-
642- # we have a list of groupers
643- if isinstance (self .grouper , ops .BaseGrouper ):
643+ elif isinstance (self .grouper , ops .BaseGrouper ):
644+ # we have a list of groupers
644645 codes = self .grouper .codes_info
645- uniques = self .grouper .result_index
646+ uniques = self .grouper .result_arraylike
646647 else :
647648 # GH35667, replace dropna=False with na_sentinel=None
648649 if not self .dropna :
@@ -652,9 +653,7 @@ def _make_codes(self) -> None:
652653 codes , uniques = algorithms .factorize (
653654 self .grouper , sort = self .sort , na_sentinel = na_sentinel
654655 )
655- uniques = Index (uniques , name = self .name )
656- self ._codes = codes
657- self ._group_index = uniques
656+ return codes , uniques
658657
659658 @cache_readonly
660659 def groups (self ) -> dict [Hashable , np .ndarray ]:
0 commit comments