-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Add dtype parameter to IntervalIndex constructors and deprecate from_intervals #19339
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
""" define the IntervalIndex """ | ||
|
||
import numpy as np | ||
import warnings | ||
|
||
from pandas.core.dtypes.missing import notna, isna | ||
from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex | ||
|
@@ -151,6 +152,10 @@ class IntervalIndex(IntervalMixin, Index): | |
Name to be stored in the index. | ||
copy : boolean, default False | ||
Copy the meta-data | ||
dtype : dtype or None, default None | ||
If None, dtype will be inferred | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a version added There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
..versionadded:: 0.23.0 | ||
|
||
Attributes | ||
---------- | ||
|
@@ -167,7 +172,6 @@ class IntervalIndex(IntervalMixin, Index): | |
from_arrays | ||
from_tuples | ||
from_breaks | ||
from_intervals | ||
contains | ||
|
||
Examples | ||
|
@@ -181,8 +185,7 @@ class IntervalIndex(IntervalMixin, Index): | |
|
||
It may also be constructed using one of the constructor | ||
methods: :meth:`IntervalIndex.from_arrays`, | ||
:meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_intervals` | ||
and :meth:`IntervalIndex.from_tuples`. | ||
:meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. | ||
|
||
See further examples in the doc strings of ``interval_range`` and the | ||
mentioned constructor methods. | ||
|
@@ -211,8 +214,7 @@ class IntervalIndex(IntervalMixin, Index): | |
|
||
_mask = None | ||
|
||
def __new__(cls, data, closed=None, | ||
name=None, copy=False, dtype=None, | ||
def __new__(cls, data, closed=None, name=None, copy=False, dtype=None, | ||
fastpath=False, verify_integrity=True): | ||
|
||
if fastpath: | ||
|
@@ -245,19 +247,28 @@ def __new__(cls, data, closed=None, | |
|
||
closed = closed or infer_closed | ||
|
||
return cls._simple_new(left, right, closed, name, | ||
copy=copy, verify_integrity=verify_integrity) | ||
return cls._simple_new(left, right, closed, name, copy=copy, | ||
dtype=dtype, verify_integrity=verify_integrity) | ||
|
||
@classmethod | ||
def _simple_new(cls, left, right, closed=None, name=None, | ||
copy=False, verify_integrity=True): | ||
def _simple_new(cls, left, right, closed=None, name=None, copy=False, | ||
dtype=None, verify_integrity=True): | ||
result = IntervalMixin.__new__(cls) | ||
|
||
if closed is None: | ||
closed = 'right' | ||
closed = closed or 'right' | ||
left = _ensure_index(left, copy=copy) | ||
right = _ensure_index(right, copy=copy) | ||
|
||
if dtype is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a 1-liner here, noting that this must be an interval dtype There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
# GH 19262: dtype must be an IntervalDtype to override inferred | ||
dtype = pandas_dtype(dtype) | ||
if not is_interval_dtype(dtype): | ||
msg = 'dtype must be an IntervalDtype, got {dtype}' | ||
raise TypeError(msg.format(dtype=dtype)) | ||
elif dtype.subtype is not None: | ||
left = left.astype(dtype.subtype) | ||
right = right.astype(dtype.subtype) | ||
|
||
# coerce dtypes to match if needed | ||
if is_float_dtype(left) and is_integer_dtype(right): | ||
right = right.astype(left.dtype) | ||
|
@@ -304,7 +315,7 @@ def _shallow_copy(self, left=None, right=None, **kwargs): | |
# only single value passed, could be an IntervalIndex | ||
# or array of Intervals | ||
if not isinstance(left, IntervalIndex): | ||
left = type(self).from_intervals(left) | ||
left = self._constructor(left) | ||
|
||
left, right = left.left, left.right | ||
else: | ||
|
@@ -322,7 +333,7 @@ def _validate(self): | |
Verify that the IntervalIndex is valid. | ||
""" | ||
if self.closed not in _VALID_CLOSED: | ||
raise ValueError("invalid options for 'closed': {closed}" | ||
raise ValueError("invalid option for 'closed': {closed}" | ||
.format(closed=self.closed)) | ||
if len(self.left) != len(self.right): | ||
raise ValueError('left and right must have the same length') | ||
|
@@ -356,7 +367,7 @@ def _engine(self): | |
|
||
@property | ||
def _constructor(self): | ||
return type(self).from_intervals | ||
return type(self) | ||
|
||
def __contains__(self, key): | ||
""" | ||
|
@@ -402,7 +413,8 @@ def contains(self, key): | |
return False | ||
|
||
@classmethod | ||
def from_breaks(cls, breaks, closed='right', name=None, copy=False): | ||
def from_breaks(cls, breaks, closed='right', name=None, copy=False, | ||
dtype=None): | ||
""" | ||
Construct an IntervalIndex from an array of splits | ||
|
||
|
@@ -417,6 +429,10 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): | |
Name to be stored in the index. | ||
copy : boolean, default False | ||
copy the data | ||
dtype : dtype or None, default None | ||
If None, dtype will be inferred | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. vesionadded There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
..versionadded:: 0.23.0 | ||
|
||
Examples | ||
-------- | ||
|
@@ -430,18 +446,17 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): | |
interval_range : Function to create a fixed frequency IntervalIndex | ||
IntervalIndex.from_arrays : Construct an IntervalIndex from a left and | ||
right array | ||
IntervalIndex.from_intervals : Construct an IntervalIndex from an array | ||
of Interval objects | ||
IntervalIndex.from_tuples : Construct an IntervalIndex from a | ||
list/array of tuples | ||
""" | ||
breaks = maybe_convert_platform_interval(breaks) | ||
|
||
return cls.from_arrays(breaks[:-1], breaks[1:], closed, | ||
name=name, copy=copy) | ||
name=name, copy=copy, dtype=dtype) | ||
|
||
@classmethod | ||
def from_arrays(cls, left, right, closed='right', name=None, copy=False): | ||
def from_arrays(cls, left, right, closed='right', name=None, copy=False, | ||
dtype=None): | ||
""" | ||
Construct an IntervalIndex from a a left and right array | ||
|
||
|
@@ -458,6 +473,10 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): | |
Name to be stored in the index. | ||
copy : boolean, default False | ||
copy the data | ||
dtype : dtype or None, default None | ||
If None, dtype will be inferred | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
..versionadded:: 0.23.0 | ||
|
||
Examples | ||
-------- | ||
|
@@ -471,22 +490,23 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): | |
interval_range : Function to create a fixed frequency IntervalIndex | ||
IntervalIndex.from_breaks : Construct an IntervalIndex from an array of | ||
splits | ||
IntervalIndex.from_intervals : Construct an IntervalIndex from an array | ||
of Interval objects | ||
IntervalIndex.from_tuples : Construct an IntervalIndex from a | ||
list/array of tuples | ||
""" | ||
left = maybe_convert_platform_interval(left) | ||
right = maybe_convert_platform_interval(right) | ||
|
||
return cls._simple_new(left, right, closed, name=name, | ||
copy=copy, verify_integrity=True) | ||
return cls._simple_new(left, right, closed, name=name, copy=copy, | ||
dtype=dtype, verify_integrity=True) | ||
|
||
@classmethod | ||
def from_intervals(cls, data, name=None, copy=False): | ||
def from_intervals(cls, data, closed=None, name=None, copy=False, | ||
dtype=None): | ||
""" | ||
Construct an IntervalIndex from a 1d array of Interval objects | ||
|
||
.. deprecated:: 0.23.0 | ||
|
||
Parameters | ||
---------- | ||
data : array-like (1-dimensional) | ||
|
@@ -496,6 +516,10 @@ def from_intervals(cls, data, name=None, copy=False): | |
Name to be stored in the index. | ||
copy : boolean, default False | ||
by-default copy the data, this is compat only and ignored | ||
dtype : dtype or None, default None | ||
If None, dtype will be inferred | ||
|
||
..versionadded:: 0.23.0 | ||
|
||
Examples | ||
-------- | ||
|
@@ -521,16 +545,14 @@ def from_intervals(cls, data, name=None, copy=False): | |
IntervalIndex.from_tuples : Construct an IntervalIndex from a | ||
list/array of tuples | ||
""" | ||
if isinstance(data, IntervalIndex): | ||
left, right, closed = data.left, data.right, data.closed | ||
name = name or data.name | ||
else: | ||
data = maybe_convert_platform_interval(data) | ||
left, right, closed = intervals_to_interval_bounds(data) | ||
return cls.from_arrays(left, right, closed, name=name, copy=False) | ||
msg = ('IntervalIndex.from_intervals is deprecated and will be ' | ||
'removed in a future version; use IntervalIndex(...) instead') | ||
warnings.warn(msg, FutureWarning, stacklevel=2) | ||
return cls(data, closed=closed, name=name, copy=copy, dtype=dtype) | ||
|
||
@classmethod | ||
def from_tuples(cls, data, closed='right', name=None, copy=False): | ||
def from_tuples(cls, data, closed='right', name=None, copy=False, | ||
dtype=None): | ||
""" | ||
Construct an IntervalIndex from a list/array of tuples | ||
|
||
|
@@ -545,10 +567,14 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): | |
Name to be stored in the index. | ||
copy : boolean, default False | ||
by-default copy the data, this is compat only and ignored | ||
dtype : dtype or None, default None | ||
If None, dtype will be inferred | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
..versionadded:: 0.23.0 | ||
|
||
Examples | ||
-------- | ||
>>> pd.IntervalIndex.from_tuples([(0, 1), (1,2)]) | ||
>>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) | ||
IntervalIndex([(0, 1], (1, 2]], | ||
closed='right', dtype='interval[int64]') | ||
|
||
|
@@ -559,8 +585,6 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): | |
right array | ||
IntervalIndex.from_breaks : Construct an IntervalIndex from an array of | ||
splits | ||
IntervalIndex.from_intervals : Construct an IntervalIndex from an array | ||
of Interval objects | ||
""" | ||
if len(data): | ||
left, right = [], [] | ||
|
@@ -571,15 +595,22 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): | |
if isna(d): | ||
lhs = rhs = np.nan | ||
else: | ||
lhs, rhs = d | ||
try: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a 1-liner here showing the format of th expected input (list-of-tuples) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
# need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] | ||
lhs, rhs = d | ||
except ValueError: | ||
msg = ('IntervalIndex.from_tuples requires tuples of ' | ||
'length 2, got {tpl}').format(tpl=d) | ||
raise ValueError(msg) | ||
except TypeError: | ||
msg = ('IntervalIndex.from_tuples received an invalid ' | ||
'item, {tpl}').format(tpl=d) | ||
raise TypeError(msg) | ||
left.append(lhs) | ||
right.append(rhs) | ||
|
||
# TODO | ||
# if we have nulls and we previous had *only* | ||
# integer data, then we have changed the dtype | ||
|
||
return cls.from_arrays(left, right, closed, name=name, copy=False) | ||
return cls.from_arrays(left, right, closed, name=name, copy=False, | ||
dtype=dtype) | ||
|
||
def to_tuples(self, na_tuple=True): | ||
""" | ||
|
@@ -921,7 +952,7 @@ def get_loc(self, key, method=None): | |
Examples | ||
--------- | ||
>>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) | ||
>>> index = pd.IntervalIndex.from_intervals([i1, i2]) | ||
>>> index = pd.IntervalIndex([i1, i2]) | ||
>>> index.get_loc(1) | ||
0 | ||
|
||
|
@@ -937,7 +968,7 @@ def get_loc(self, key, method=None): | |
relevant intervals. | ||
|
||
>>> i3 = pd.Interval(0, 2) | ||
>>> overlapping_index = pd.IntervalIndex.from_intervals([i2, i3]) | ||
>>> overlapping_index = pd.IntervalIndex([i2, i3]) | ||
>>> overlapping_index.get_loc(1.5) | ||
array([0, 1], dtype=int64) | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be removed from api.rst now, or should I wait until
from_intervals
is actually deleted from the codebase and not just deprecated?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes this is fine