From cd6787dd36525e54149c728fd03b37f974bf25d8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Mar 2020 14:09:03 -0800 Subject: [PATCH 1/2] Avoid _ndarray_values in MultiIndex --- pandas/core/indexes/multi.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c1efa512f326a..b5d8139015638 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -18,7 +18,7 @@ from pandas._libs import algos as libalgos, index as libindex, lib from pandas._libs.hashtable import duplicated_int64 -from pandas._typing import AnyArrayLike, ArrayLike, Scalar +from pandas._typing import AnyArrayLike, Scalar from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly @@ -52,6 +52,7 @@ ensure_index, ) from pandas.core.indexes.frozen import FrozenList +from pandas.core.indexes.numeric import Int64Index import pandas.core.missing as missing from pandas.core.sorting import ( get_group_index, @@ -2949,7 +2950,7 @@ def get_locs(self, seq): n = len(self) indexer = None - def _convert_to_indexer(r): + def _convert_to_indexer(r) -> Int64Index: # return an indexer if isinstance(r, slice): m = np.zeros(n, dtype=bool) @@ -3026,13 +3027,16 @@ def _update_indexer(idxr, indexer=indexer): if indexer is None: return np.array([], dtype=np.int64) + assert isinstance(indexer, Int64Index), type(indexer) indexer = self._reorder_indexer(seq, indexer) - return indexer._ndarray_values + return indexer._values def _reorder_indexer( - self, seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...], indexer: ArrayLike - ) -> ArrayLike: + self, + seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...], + indexer: Int64Index, + ) -> Int64Index: """ Reorder an indexer of a MultiIndex (self) so that the label are in the same order as given in seq @@ -3139,8 +3143,8 @@ def equals(self, other) -> bool: if self.nlevels != other.nlevels: return False - other_vals = com.values_from_object(ensure_index(other)) - return array_equivalent(self._ndarray_values, other_vals) + other_vals = com.values_from_object(other) + return array_equivalent(self._values, other_vals) if self.nlevels != other.nlevels: return False @@ -3232,7 +3236,7 @@ def union(self, other, sort=None): # TODO: Index.union returns other when `len(self)` is 0. uniq_tuples = lib.fast_unique_multiple( - [self._ndarray_values, other._ndarray_values], sort=sort + [self._values, other._ndarray_values], sort=sort ) return MultiIndex.from_arrays( @@ -3267,7 +3271,7 @@ def intersection(self, other, sort=False): if self.equals(other): return self - lvals = self._ndarray_values + lvals = self._values rvals = other._ndarray_values uniq_tuples = None # flag whether _inner_indexer was succesful From d8a9fc12b7abbf5ff56f93a3c05020fb163b95c9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Mar 2020 16:30:17 -0800 Subject: [PATCH 2/2] Avoid .values, _ndarray_values --- pandas/core/indexes/multi.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b5d8139015638..c22b289bb4017 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1181,7 +1181,7 @@ def _format_native_types(self, na_rep="nan", **kwargs): sortorder=self.sortorder, verify_integrity=False, ) - return mi.values + return mi._values def format( self, @@ -1420,7 +1420,7 @@ def is_monotonic_increasing(self) -> bool: except TypeError: # we have mixed types and np.lexsort is not happy - return Index(self.values).is_monotonic + return Index(self._values).is_monotonic @cache_readonly def is_monotonic_decreasing(self) -> bool: @@ -1613,7 +1613,7 @@ def to_flat_index(self): ('bar', 'baz'), ('bar', 'qux')], dtype='object') """ - return Index(self.values, tupleize_cols=False) + return Index(self._values, tupleize_cols=False) @property def is_all_dates(self) -> bool: @@ -1915,7 +1915,7 @@ def append(self, other): arrays.append(label.append(appended)) return MultiIndex.from_arrays(arrays, names=self.names) - to_concat = (self.values,) + tuple(k._values for k in other) + to_concat = (self._values,) + tuple(k._values for k in other) new_tuples = np.concatenate(to_concat) # if all(isinstance(x, MultiIndex) for x in other): @@ -1925,7 +1925,7 @@ def append(self, other): return Index(new_tuples) def argsort(self, *args, **kwargs) -> np.ndarray: - return self.values.argsort(*args, **kwargs) + return self._values.argsort(*args, **kwargs) @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) def repeat(self, repeats, axis=None): @@ -2369,7 +2369,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): # let's instead try with a straight Index if method is None: - return Index(self.values).get_indexer( + return Index(self._values).get_indexer( target, method=method, limit=limit, tolerance=tolerance ) @@ -2832,7 +2832,8 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): mapper = Series(indexer) indexer = codes.take(ensure_platform_int(indexer)) result = Series(Index(indexer).isin(r).nonzero()[0]) - m = result.map(mapper)._ndarray_values + m = result.map(mapper) + m = np.asarray(m) else: m = np.zeros(len(codes), dtype=bool) @@ -3346,7 +3347,7 @@ def difference(self, other, sort=None): indexer = indexer.take((indexer != -1).nonzero()[0]) label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) - difference = this.values.take(label_diff) + difference = this._values.take(label_diff) if sort is None: difference = sorted(difference) @@ -3363,7 +3364,8 @@ def difference(self, other, sort=None): def _convert_can_do_setop(self, other): result_names = self.names - if not hasattr(other, "names"): + if not isinstance(other, Index): + if len(other) == 0: other = MultiIndex( levels=[[]] * self.nlevels, @@ -3460,8 +3462,8 @@ def _wrap_joined_index(self, joined, other): @Appender(Index.isin.__doc__) def isin(self, values, level=None): if level is None: - values = MultiIndex.from_tuples(values, names=self.names).values - return algos.isin(self.values, values) + values = MultiIndex.from_tuples(values, names=self.names)._values + return algos.isin(self._values, values) else: num = self._get_level_number(level) levs = self.get_level_values(num)