Skip to content

PERF: custom ops for RangeIndex.[all|any|__contains__] #26617

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ Performance Improvements
- Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is
int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
- Improved performance when slicing :class:`RangeIndex` (:issue:`26565`)
- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`)
- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
- Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4013,11 +4013,7 @@ def __contains__(self, key):

@Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
def contains(self, key):
hash(key)
try:
return key in self._engine
except (TypeError, ValueError):
return False
return key in self

def __hash__(self):
raise TypeError("unhashable type: %r" % type(self).__name__)
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import timedelta
import operator
from sys import getsizeof
from typing import Union
import warnings

import numpy as np
Expand Down Expand Up @@ -334,6 +335,14 @@ def is_monotonic_decreasing(self):
def has_duplicates(self):
return False

def __contains__(self, key: Union[int, np.integer]) -> bool:
hash(key)
try:
key = ensure_python_int(key)
except TypeError:
return False
return key in self._range

@Appender(_index_shared_docs['get_loc'])
def get_loc(self, key, method=None, tolerance=None):
if is_integer(key) and method is None and tolerance is None:
Expand Down Expand Up @@ -640,6 +649,12 @@ def __floordiv__(self, other):
return self._simple_new(start, start + 1, 1, name=self.name)
return self._int64index // other

def all(self) -> bool:
return 0 not in self._range

def any(self) -> bool:
return any(self._range)

@classmethod
def _add_numeric_methods_binary(cls):
""" add in numeric methods, specialized to RangeIndex """
Expand Down Expand Up @@ -725,4 +740,3 @@ def _evaluate_numeric_binop(self, other):


RangeIndex._add_numeric_methods()
RangeIndex._add_logical_methods()
25 changes: 21 additions & 4 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,9 @@ def test_dtype(self):
assert self.index.dtype == np.int64

def test_cached_data(self):
# GH 26565
# Calling RangeIndex._data caches an int64 array of the same length as
# self at self._cached_data.
# This tests whether _cached_data is being set by various operations.
# GH 26565, GH26617
# Calling RangeIndex._data caches an int64 array of the same length at
# self._cached_data. This test checks whether _cached_data has been set
idx = RangeIndex(0, 100, 10)

assert idx._cached_data is None
Expand All @@ -262,6 +261,24 @@ def test_cached_data(self):
idx.get_loc(20)
assert idx._cached_data is None

90 in idx
assert idx._cached_data is None

91 in idx
assert idx._cached_data is None

idx.contains(90)
assert idx._cached_data is None

idx.contains(91)
assert idx._cached_data is None

idx.all()
assert idx._cached_data is None

idx.any()
assert idx._cached_data is None

df = pd.DataFrame({'a': range(10)}, index=idx)

df.loc[50]
Expand Down