Skip to content

PERF: use uniqueness_check from monotonic check when possible #14270

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1404,6 +1404,7 @@ Performance Improvements
- Improved performance of datetime string parsing in ``DatetimeIndex`` (:issue:`13692`)
- Improved performance of hashing ``Period`` (:issue:`12817`)
- Improved performance of ``factorize`` of datetime with timezone (:issue:`13750`)
- Improved performance of by lazily creating indexing hashtables on larger Indexes (:issue:`14266`)


.. _whatsnew_0190.bug_fixes:
Expand All @@ -1422,7 +1423,6 @@ Bug Fixes
- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`)
- Bug in ``Categorical.from_codes()`` where an unhelpful error was raised when an invalid ``ordered`` parameter was passed in (:issue:`14058`)
- Bug in ``Series`` construction from a tuple of integers on windows not returning default dtype (int64) (:issue:`13646`)

- Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`)
- Bug in ``.to_records()`` when index name is a unicode string (:issue:`13172`)

Expand Down
29 changes: 24 additions & 5 deletions pandas/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ cdef class IndexEngine:

cdef:
bint unique, monotonic_inc, monotonic_dec
bint initialized, monotonic_check
bint initialized, monotonic_check, unique_check

def __init__(self, vgetter, n):
self.vgetter = vgetter
Expand All @@ -91,6 +91,7 @@ cdef class IndexEngine:

self.initialized = 0
self.monotonic_check = 0
self.unique_check = 0

self.unique = 0
self.monotonic_inc = 0
Expand Down Expand Up @@ -177,8 +178,8 @@ cdef class IndexEngine:
return left
else:
return slice(left, right)
else:
return self._maybe_get_bool_indexer(val)

return self._maybe_get_bool_indexer(val)

cdef _maybe_get_bool_indexer(self, object val):
cdef:
Expand Down Expand Up @@ -215,6 +216,7 @@ cdef class IndexEngine:
if not self.initialized:
self.initialize()

self.unique_check = 1
return self.unique == 1

property is_monotonic_increasing:
Expand All @@ -234,16 +236,24 @@ cdef class IndexEngine:
return self.monotonic_dec == 1

cdef inline _do_monotonic_check(self):
cdef object is_unique
try:
values = self._get_index_values()
self.monotonic_inc, self.monotonic_dec = \
self.monotonic_inc, self.monotonic_dec, is_unique = \
self._call_monotonic(values)
except TypeError:
self.monotonic_inc = 0
self.monotonic_dec = 0
is_unique = 0

self.monotonic_check = 1

# we can only be sure of uniqueness if is_unique=1
if is_unique:
self.initialized = 1
self.unique = 1
self.unique_check = 1

cdef _get_index_values(self):
return self.vgetter()

Expand All @@ -257,6 +267,10 @@ cdef class IndexEngine:
hash(val)

cdef inline _ensure_mapping_populated(self):
# need to reset if we have previously
# set the initialized from monotonic checks
if self.unique_check:
self.initialized = 0
if not self.initialized:
self.initialize()

Expand All @@ -274,6 +288,12 @@ cdef class IndexEngine:
def clear_mapping(self):
self.mapping = None
self.initialized = 0
self.monotonic_check = 0
self.unique_check = 0

self.unique = 0
self.monotonic_inc = 0
self.monotonic_dec = 0

def get_indexer(self, values):
self._ensure_mapping_populated()
Expand Down Expand Up @@ -537,7 +557,6 @@ cdef class DatetimeEngine(Int64Engine):
raise TypeError

# Welcome to the spaghetti factory

if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
val = _to_i8(val)
Expand Down
Loading