Skip to content

REF: simplify PeriodIndex.get_loc #31598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Feb 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
8e17545
REF: make PeriodIndex.get_loc conform to DTI/TDI.get_loc
jbrockmendel Jan 29, 2020
357fb8f
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Jan 29, 2020
fdbf814
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Jan 30, 2020
d46780a
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Jan 31, 2020
a857fa6
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Jan 31, 2020
2de7966
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Feb 1, 2020
17c4329
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Feb 2, 2020
20063e1
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Feb 3, 2020
71ce7d0
rebase fixup
jbrockmendel Feb 3, 2020
8bced71
Fix convert_tolerance
jbrockmendel Feb 3, 2020
0fb548d
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Feb 3, 2020
dca8c7c
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Feb 3, 2020
15e15ef
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Feb 5, 2020
356b64c
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Feb 7, 2020
2dc116f
Merge branch 'master' of https://github.com/pandas-dev/pandas into Pe…
jbrockmendel Feb 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cnp.import_array()

cimport pandas._libs.util as util

from pandas._libs.tslibs import Period
from pandas._libs.tslibs.nattype cimport c_NaT as NaT
from pandas._libs.tslibs.c_timestamp cimport _Timestamp

Expand Down Expand Up @@ -466,6 +467,28 @@ cdef class TimedeltaEngine(DatetimeEngine):

cdef class PeriodEngine(Int64Engine):

cdef int64_t _unbox_scalar(self, scalar) except? -1:
if scalar is NaT:
return scalar.value
if isinstance(scalar, Period):
# NB: we assume that we have the correct freq here.
# TODO: potential optimize by checking for _Period?
return scalar.ordinal
raise TypeError(scalar)

cpdef get_loc(self, object val):
# NB: the caller is responsible for ensuring that we are called
# with either a Period or NaT
cdef:
int64_t conv

try:
conv = self._unbox_scalar(val)
except TypeError:
raise KeyError(val)

return Int64Engine.get_loc(self, conv)

cdef _get_index_values(self):
return super(PeriodEngine, self).vgetter().view("i8")

Expand Down
25 changes: 8 additions & 17 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,10 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):

if tolerance is not None:
tolerance = self._convert_tolerance(tolerance, target)
if self_index is not self:
# convert tolerance to i8
tolerance = self._maybe_convert_timedelta(tolerance)

return Index.get_indexer(self_index, target, method, limit, tolerance)

@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
Expand Down Expand Up @@ -504,6 +508,7 @@ def get_loc(self, key, method=None, tolerance=None):
TypeError
If key is listlike or otherwise not hashable.
"""
orig_key = key

if not is_scalar(key):
raise InvalidIndexError(key)
Expand Down Expand Up @@ -545,20 +550,12 @@ def get_loc(self, key, method=None, tolerance=None):
key = Period(key, freq=self.freq)
except ValueError:
# we cannot construct the Period
raise KeyError(key)
raise KeyError(orig_key)

ordinal = self._data._unbox_scalar(key)
try:
return self._engine.get_loc(ordinal)
return Index.get_loc(self, key, method, tolerance)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this changes perf? the reason is that we have already created an _int64index, but now we are creating something new?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but now we are creating something new?

I don't think we're creating a new object here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the engine itself is (maybe) being created again

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess on the first all, but on the flip side the _int64index may not have been created yert

except KeyError:

try:
if tolerance is not None:
tolerance = self._convert_tolerance(tolerance, np.asarray(key))
return self._int64index.get_loc(ordinal, method, tolerance)

except KeyError:
raise KeyError(key)
raise KeyError(orig_key)

def _maybe_cast_slice_bound(self, label, side: str, kind: str):
"""
Expand Down Expand Up @@ -625,12 +622,6 @@ def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True
except KeyError:
raise KeyError(key)

def _convert_tolerance(self, tolerance, target):
tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance, target)
if target.size != tolerance.size and tolerance.size > 1:
raise ValueError("list-like tolerance size must match target index size")
return self._maybe_convert_timedelta(tolerance)

def insert(self, loc, item):
if not isinstance(item, Period) or self.freq != item.freq:
return self.astype(object).insert(loc, item)
Expand Down