From e7e2444f03419442d84d8a84797fef9518613007 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Mar 2020 16:24:24 -0700 Subject: [PATCH] CLN: avoid internals, some misc cleanups --- pandas/core/groupby/grouper.py | 3 ++- pandas/core/indexes/period.py | 1 + pandas/core/indexing.py | 9 ++++++--- pandas/core/series.py | 15 ++++----------- pandas/io/formats/info.py | 3 ++- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 21e171f937de8..2f50845fda4dc 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -561,7 +561,8 @@ def get_grouper( # if the actual grouper should be obj[key] def is_in_axis(key) -> bool: if not _is_label_like(key): - items = obj._data.items + # items -> .columns for DataFrame, .index for Series + items = obj.axes[-1] try: items.get_loc(key) except (KeyError, TypeError, InvalidIndexError): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 68d7e8dd384f0..0646acab57081 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -149,6 +149,7 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): _infer_as_myclass = True _data: PeriodArray + freq: DateOffset _engine_type = libindex.PeriodEngine _supports_partial_string_indexing = True diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8038bba8b6448..14c5c8ea011c3 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,4 +1,4 @@ -from typing import Hashable, List, Tuple, Union +from typing import TYPE_CHECKING, Hashable, List, Tuple, Union import numpy as np @@ -29,6 +29,9 @@ ) from pandas.core.indexes.api import Index, InvalidIndexError +if TYPE_CHECKING: + from pandas import DataFrame # noqa:F401 + # "null slice" _NS = slice(None, None) @@ -2108,7 +2111,7 @@ def _tuplify(ndim: int, loc: Hashable) -> Tuple[Union[Hashable, slice], ...]: return tuple(_tup) -def convert_to_index_sliceable(obj, key): +def convert_to_index_sliceable(obj: "DataFrame", key): """ If we are index sliceable, then return my slicer, otherwise return None. """ @@ -2119,7 +2122,7 @@ def convert_to_index_sliceable(obj, key): elif isinstance(key, str): # we are an actual column - if key in obj._data.items: + if key in obj.columns: return None # We might have a datetimelike string that we can translate to a diff --git a/pandas/core/series.py b/pandas/core/series.py index 39e1178a3a5c3..9e6c65c437df6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -74,7 +74,7 @@ sanitize_array, ) from pandas.core.generic import NDFrame -from pandas.core.indexers import maybe_convert_indices, unpack_1tuple +from pandas.core.indexers import unpack_1tuple from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( Float64Index, @@ -435,7 +435,8 @@ def dtypes(self) -> DtypeObj: """ Return the dtype object of the underlying data. """ - return self._data.dtype + # DataFrame compatibility + return self.dtype @property def name(self) -> Label: @@ -828,15 +829,7 @@ def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series": indices = ensure_platform_int(indices) new_index = self.index.take(indices) - - if is_categorical_dtype(self): - # https://github.com/pandas-dev/pandas/issues/20664 - # TODO: remove when the default Categorical.take behavior changes - indices = maybe_convert_indices(indices, len(self._get_axis(axis))) - kwargs = {"allow_fill": False} - else: - kwargs = {} - new_values = self._values.take(indices, **kwargs) + new_values = self._values.take(indices) return self._constructor( new_values, index=new_index, fastpath=True diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 0c08065f55273..1fbc321160120 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -265,7 +265,8 @@ def _sizeof_fmt(num, size_qualifier): else: _verbose_repr() - counts = data._data.get_dtype_counts() + # groupby dtype.name to collect e.g. Categorical columns + counts = data.dtypes.value_counts().groupby(lambda x: x.name).sum() dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] lines.append(f"dtypes: {', '.join(dtypes)}")