From f09053abb0cfc07222f413598e9420478da7f945 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 9 Oct 2021 20:58:39 -0700 Subject: [PATCH 1/4] PERF: Index.insert --- pandas/core/indexes/base.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2ff9b3973a526..f23704cbf7706 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6327,13 +6327,23 @@ def insert(self, loc: int, item) -> Index: dtype = self._find_common_type_compat(item) return self.astype(dtype).insert(loc, item) - arr = np.asarray(self) + arr = self._values + + if arr.dtype != object or not isinstance( + item, (tuple, np.datetime64, np.timedelta64) + ): + # with object-dtype we need to worry about numpy incorrectly casting + # dt64/td64 to integer, also about treating tuples as sequences + # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550 + new_values = np.concatenate([arr[:loc], [item], arr[loc:]]) + + else: + new_values = np.concatenate([arr[:loc], [None], arr[loc:]]) + new_values[loc] = item - # Use constructor to ensure we get tuples cast correctly. # Use self._constructor instead of Index to retain NumericIndex GH#43921 - item = self._constructor([item], dtype=self.dtype)._values - idx = np.concatenate((arr[:loc], item, arr[loc:])) - return self._constructor._with_infer(idx, name=self.name) + # TODO(2.0) can use Index instead of self._constructor + return self._constructor._with_infer(new_values, name=self.name) def drop(self, labels, errors: str_t = "raise") -> Index: """ From 2017e7c3f7a48ff0de58af9f49097d2783e004a7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 10 Oct 2021 08:03:10 -0700 Subject: [PATCH 2/4] 32bit compat --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f23704cbf7706..3870b9f6f5039 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6335,7 +6335,7 @@ def insert(self, loc: int, item) -> Index: # with object-dtype we need to worry about numpy incorrectly casting # dt64/td64 to integer, also about treating tuples as sequences # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550 - new_values = np.concatenate([arr[:loc], [item], arr[loc:]]) + new_values = np.concatenate([arr[:loc], [item], arr[loc:]], dtype=arr.dtype) else: new_values = np.concatenate([arr[:loc], [None], arr[loc:]]) From 7fc271b5f370f49d1071a580ea3c2f44ee7d946d Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 10 Oct 2021 11:19:41 -0700 Subject: [PATCH 3/4] troubleshoot build --- pandas/core/indexes/base.py | 5 +++-- pandas/core/indexes/range.py | 14 ++++++++++++++ pandas/tests/indexes/common.py | 2 +- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3870b9f6f5039..b36aaeecb5046 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6335,10 +6335,11 @@ def insert(self, loc: int, item) -> Index: # with object-dtype we need to worry about numpy incorrectly casting # dt64/td64 to integer, also about treating tuples as sequences # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550 - new_values = np.concatenate([arr[:loc], [item], arr[loc:]], dtype=arr.dtype) + casted = arr.dtype.type(item) + new_values = np.insert(arr, loc, casted) else: - new_values = np.concatenate([arr[:loc], [None], arr[loc:]]) + new_values = np.insert(arr, loc, None) new_values[loc] = item # Use self._constructor instead of Index to retain NumericIndex GH#43921 diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 51d9f15390789..e32f0e7c18ae7 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -425,6 +425,20 @@ def _get_indexer( # -------------------------------------------------------------------- + def insert(self, loc, item): + if len(self) and (is_integer(item) or is_float(item)): + # We can retain RangeIndex is inserting at the beginning or end + rng = self._range + if loc == 0 and item == self[0] - self.step: + new_rng = range(rng.start - rng.step, rng.stop, rng.step) + return type(self)._simple_new(new_rng, name=self.name) + + elif loc == len(self) and item == self[-1] + self.step: + new_rng = range(rng.start, rng.stop + rng.step, rng.step) + return type(self)._simple_new(new_rng, name=self.name) + + return super().insert(loc, item) + def repeat(self, repeats, axis=None) -> Int64Index: return self._int64index.repeat(repeats, axis=axis) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 7e43664c6b3de..ac29009bbfb42 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -805,7 +805,7 @@ def test_insert_non_na(self, simple_index): cls = Int64Index expected = cls([index[0]] + list(index), dtype=index.dtype) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) def test_insert_na(self, nulls_fixture, simple_index): # GH 18295 (test missing) From a29a8aad3a6e0d0dab2da1ed3d79649da013ca83 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 10 Oct 2021 11:26:52 -0700 Subject: [PATCH 4/4] revert accidental commit --- pandas/core/indexes/range.py | 61 ++++++---------------------------- pandas/tests/indexes/common.py | 2 +- 2 files changed, 11 insertions(+), 52 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e32f0e7c18ae7..4003165a7ddc6 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -26,7 +26,6 @@ cache_readonly, doc, ) -from pandas.util._exceptions import rewrite_exception from pandas.core.dtypes.common import ( ensure_platform_int, @@ -189,17 +188,6 @@ def _data(self) -> np.ndarray: """ return np.arange(self.start, self.stop, self.step, dtype=np.int64) - @cache_readonly - def _cached_int64index(self) -> Int64Index: - return Int64Index._simple_new(self._data, name=self.name) - - @property - def _int64index(self) -> Int64Index: - # wrap _cached_int64index so we can be sure its name matches self.name - res = self._cached_int64index - res._name = self._name - return res - def _get_data_as_items(self): """return a list of tuples of start, stop, step""" rng = self._range @@ -425,38 +413,6 @@ def _get_indexer( # -------------------------------------------------------------------- - def insert(self, loc, item): - if len(self) and (is_integer(item) or is_float(item)): - # We can retain RangeIndex is inserting at the beginning or end - rng = self._range - if loc == 0 and item == self[0] - self.step: - new_rng = range(rng.start - rng.step, rng.stop, rng.step) - return type(self)._simple_new(new_rng, name=self.name) - - elif loc == len(self) and item == self[-1] + self.step: - new_rng = range(rng.start, rng.stop + rng.step, rng.step) - return type(self)._simple_new(new_rng, name=self.name) - - return super().insert(loc, item) - - def repeat(self, repeats, axis=None) -> Int64Index: - return self._int64index.repeat(repeats, axis=axis) - - def delete(self, loc) -> Int64Index: # type: ignore[override] - return self._int64index.delete(loc) - - def take( - self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs - ) -> Int64Index: - with rewrite_exception("Int64Index", type(self).__name__): - return self._int64index.take( - indices, - axis=axis, - allow_fill=allow_fill, - fill_value=fill_value, - **kwargs, - ) - def tolist(self) -> list[int]: return list(self._range) @@ -697,7 +653,8 @@ def _union(self, other: Index, sort): and (end_s - step_o <= end_o) ): return type(self)(start_r, end_r + step_o, step_o) - return self._int64index._union(other, sort=sort) + + return super()._union(other, sort=sort) def _difference(self, other, sort=None): # optimized set operation if we have another RangeIndex @@ -871,7 +828,8 @@ def __floordiv__(self, other): start = self.start // other new_range = range(start, start + 1, 1) return self._simple_new(new_range, name=self.name) - return self._int64index // other + + return super().__floordiv__(other) # -------------------------------------------------------------------- # Reductions @@ -905,21 +863,22 @@ def _arith_method(self, other, op): elif isinstance(other, (timedelta, np.timedelta64)): # GH#19333 is_integer evaluated True on timedelta64, # so we need to catch these explicitly - return op(self._int64index, other) + return super()._arith_method(other, op) elif is_timedelta64_dtype(other): # Must be an np.ndarray; GH#22390 - return op(self._int64index, other) + return super()._arith_method(other, op) if op in [ operator.pow, ops.rpow, operator.mod, ops.rmod, + operator.floordiv, ops.rfloordiv, divmod, ops.rdivmod, ]: - return op(self._int64index, other) + return super()._arith_method(other, op) step: Callable | None = None if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]: @@ -960,5 +919,5 @@ def _arith_method(self, other, op): except (ValueError, TypeError, ZeroDivisionError): # Defer to Int64Index implementation - return op(self._int64index, other) - # TODO: Do attrs get handled reliably? + # test_arithmetic_explicit_conversions + return super()._arith_method(other, op) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index ac29009bbfb42..7e43664c6b3de 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -805,7 +805,7 @@ def test_insert_non_na(self, simple_index): cls = Int64Index expected = cls([index[0]] + list(index), dtype=index.dtype) - tm.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected) def test_insert_na(self, nulls_fixture, simple_index): # GH 18295 (test missing)