10
10
import numpy as np
11
11
12
12
from pandas ._typing import (
13
+ ArrayLike ,
13
14
FrameOrSeries ,
14
15
final ,
15
16
)
@@ -587,20 +588,23 @@ def indices(self):
587
588
588
589
@property
589
590
def codes (self ) -> np .ndarray :
590
- if self ._passed_categorical :
591
- # we make a CategoricalIndex out of the cat grouper
592
- # preserving the categories / ordered attributes
593
- cat = self .grouper
594
- return cat .codes
591
+ if self ._codes is not None :
592
+ # _codes is set in __init__ for MultiIndex cases
593
+ return self ._codes
595
594
596
- if self ._codes is None :
597
- self ._make_codes ()
598
- # error: Incompatible return value type (got "Optional[ndarray]",
599
- # expected "ndarray")
600
- return self ._codes # type: ignore[return-value]
595
+ return self ._codes_and_uniques [0 ]
596
+
597
+ @cache_readonly
598
+ def group_arraylike (self ) -> ArrayLike :
599
+ """
600
+ Analogous to result_index, but holding an ArrayLike to ensure
601
+ we can can retain ExtensionDtypes.
602
+ """
603
+ return self ._codes_and_uniques [1 ]
601
604
602
605
@cache_readonly
603
606
def result_index (self ) -> Index :
607
+ # TODO: what's the difference between result_index vs group_index?
604
608
if self .all_grouper is not None :
605
609
group_idx = self .group_index
606
610
assert isinstance (group_idx , CategoricalIndex )
@@ -609,40 +613,37 @@ def result_index(self) -> Index:
609
613
610
614
@cache_readonly
611
615
def group_index (self ) -> Index :
616
+ if self ._group_index is not None :
617
+ # _group_index is set in __init__ for MultiIndex cases
618
+ return self ._group_index
619
+ uniques = self .group_arraylike
620
+ return Index (uniques , name = self .name )
621
+
622
+ @cache_readonly
623
+ def _codes_and_uniques (self ) -> tuple [np .ndarray , ArrayLike ]:
612
624
if self ._passed_categorical :
613
625
# we make a CategoricalIndex out of the cat grouper
614
626
# preserving the categories / ordered attributes
615
627
cat = self .grouper
616
628
categories = cat .categories
617
629
618
630
if self .observed :
619
- codes = algorithms .unique1d (cat .codes )
620
- codes = codes [ codes != - 1 ]
631
+ ucodes = algorithms .unique1d (cat .codes )
632
+ ucodes = ucodes [ ucodes != - 1 ]
621
633
if self .sort or cat .ordered :
622
- codes = np .sort (codes )
634
+ ucodes = np .sort (ucodes )
623
635
else :
624
- codes = np .arange (len (categories ))
636
+ ucodes = np .arange (len (categories ))
625
637
626
- return CategoricalIndex (
627
- Categorical .from_codes (
628
- codes = codes , categories = categories , ordered = cat .ordered
629
- ),
630
- name = self .name ,
638
+ uniques = Categorical .from_codes (
639
+ codes = ucodes , categories = categories , ordered = cat .ordered
631
640
)
641
+ return cat .codes , uniques
632
642
633
- if self ._group_index is None :
634
- self ._make_codes ()
635
- assert self ._group_index is not None
636
- return self ._group_index
637
-
638
- def _make_codes (self ) -> None :
639
- if self ._codes is not None and self ._group_index is not None :
640
- return
641
-
642
- # we have a list of groupers
643
- if isinstance (self .grouper , ops .BaseGrouper ):
643
+ elif isinstance (self .grouper , ops .BaseGrouper ):
644
+ # we have a list of groupers
644
645
codes = self .grouper .codes_info
645
- uniques = self .grouper .result_index
646
+ uniques = self .grouper .result_arraylike
646
647
else :
647
648
# GH35667, replace dropna=False with na_sentinel=None
648
649
if not self .dropna :
@@ -652,9 +653,7 @@ def _make_codes(self) -> None:
652
653
codes , uniques = algorithms .factorize (
653
654
self .grouper , sort = self .sort , na_sentinel = na_sentinel
654
655
)
655
- uniques = Index (uniques , name = self .name )
656
- self ._codes = codes
657
- self ._group_index = uniques
656
+ return codes , uniques
658
657
659
658
@cache_readonly
660
659
def groups (self ) -> dict [Hashable , np .ndarray ]:
0 commit comments