From 921376a8a778c00d05eaa10d3a279650abc9cde5 Mon Sep 17 00:00:00 2001 From: jschendel Date: Sun, 21 Jan 2018 20:43:01 -0700 Subject: [PATCH 1/3] ENH: Add dtype parameter to IntervalIndex constructors and deprecate from_intervals --- doc/source/api.rst | 1 - doc/source/whatsnew/v0.23.0.txt | 4 +- pandas/core/indexes/base.py | 6 +- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/interval.py | 100 +++-- pandas/core/reshape/tile.py | 3 +- pandas/tests/categorical/test_constructors.py | 4 +- .../indexes/interval/test_construction.py | 342 ++++++++++++++++++ .../tests/indexes/interval/test_interval.py | 247 +------------ pandas/tests/indexes/test_category.py | 2 +- pandas/tests/reshape/test_tile.py | 9 +- 11 files changed, 419 insertions(+), 301 deletions(-) create mode 100644 pandas/tests/indexes/interval/test_construction.py diff --git a/doc/source/api.rst b/doc/source/api.rst index 88419df1880ec..ddd09327935ce 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1617,7 +1617,6 @@ IntervalIndex Components IntervalIndex.from_arrays IntervalIndex.from_tuples IntervalIndex.from_breaks - IntervalIndex.from_intervals IntervalIndex.contains IntervalIndex.left IntervalIndex.right diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 71492154419fb..4dde76dee46a5 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -207,9 +207,8 @@ Other Enhancements :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas to register custom accessors like ``.cat`` on pandas objects. See :ref:`Registering Custom Accessors ` for more (:issue:`14781`). - - - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) +- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) .. _whatsnew_0230.api_breaking: @@ -329,6 +328,7 @@ Deprecations - ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`). - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). +- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f67e6eae27001..906f9b99d9ed5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -200,7 +200,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # interval if is_interval_dtype(data) or is_interval_dtype(dtype): from .interval import IntervalIndex - return IntervalIndex(data, dtype=dtype, name=name, copy=copy) + return IntervalIndex(data, dtype=dtype, name=name, copy=copy, + **kwargs) # index-like elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -313,8 +314,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return Float64Index(subarr, copy=copy, name=name) elif inferred == 'interval': from .interval import IntervalIndex - return IntervalIndex.from_intervals(subarr, name=name, - copy=copy) + return IntervalIndex(subarr, name=name, copy=copy) elif inferred == 'boolean': # don't support boolean explicitly ATM pass diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2d4655d84dca8..2c7be2b21f959 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -341,7 +341,7 @@ def __array__(self, dtype=None): def astype(self, dtype, copy=True): if is_interval_dtype(dtype): from pandas import IntervalIndex - return IntervalIndex.from_intervals(np.array(self)) + return IntervalIndex(np.array(self)) elif is_categorical_dtype(dtype): # GH 18630 dtype = self.dtype._update_dtype(dtype) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 18fb71b490592..fa58a2e263f6c 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1,6 +1,7 @@ """ define the IntervalIndex """ import numpy as np +import warnings from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex @@ -151,6 +152,8 @@ class IntervalIndex(IntervalMixin, Index): Name to be stored in the index. copy : boolean, default False Copy the meta-data + dtype : dtype or None, default None + If None, dtype will be inferred Attributes ---------- @@ -167,7 +170,6 @@ class IntervalIndex(IntervalMixin, Index): from_arrays from_tuples from_breaks - from_intervals contains Examples @@ -181,8 +183,7 @@ class IntervalIndex(IntervalMixin, Index): It may also be constructed using one of the constructor methods: :meth:`IntervalIndex.from_arrays`, - :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_intervals` - and :meth:`IntervalIndex.from_tuples`. + :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. See further examples in the doc strings of ``interval_range`` and the mentioned constructor methods. @@ -211,8 +212,7 @@ class IntervalIndex(IntervalMixin, Index): _mask = None - def __new__(cls, data, closed=None, - name=None, copy=False, dtype=None, + def __new__(cls, data, closed=None, name=None, copy=False, dtype=None, fastpath=False, verify_integrity=True): if fastpath: @@ -245,19 +245,28 @@ def __new__(cls, data, closed=None, closed = closed or infer_closed - return cls._simple_new(left, right, closed, name, - copy=copy, verify_integrity=verify_integrity) + return cls._simple_new(left, right, closed, name, copy=copy, + dtype=dtype, verify_integrity=verify_integrity) @classmethod - def _simple_new(cls, left, right, closed=None, name=None, - copy=False, verify_integrity=True): + def _simple_new(cls, left, right, closed=None, name=None, copy=False, + dtype=None, verify_integrity=True): result = IntervalMixin.__new__(cls) - if closed is None: - closed = 'right' + closed = closed or 'right' left = _ensure_index(left, copy=copy) right = _ensure_index(right, copy=copy) + if dtype is not None: + # GH 19262 + dtype = pandas_dtype(dtype) + if not is_interval_dtype(dtype): + msg = 'dtype must be an IntervalDtype, got {dtype}' + raise TypeError(msg.format(dtype=dtype)) + elif dtype.subtype is not None: + left = left.astype(dtype.subtype) + right = right.astype(dtype.subtype) + # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): right = right.astype(left.dtype) @@ -304,7 +313,7 @@ def _shallow_copy(self, left=None, right=None, **kwargs): # only single value passed, could be an IntervalIndex # or array of Intervals if not isinstance(left, IntervalIndex): - left = type(self).from_intervals(left) + left = self._constructor(left) left, right = left.left, left.right else: @@ -322,7 +331,7 @@ def _validate(self): Verify that the IntervalIndex is valid. """ if self.closed not in _VALID_CLOSED: - raise ValueError("invalid options for 'closed': {closed}" + raise ValueError("invalid option for 'closed': {closed}" .format(closed=self.closed)) if len(self.left) != len(self.right): raise ValueError('left and right must have the same length') @@ -356,7 +365,7 @@ def _engine(self): @property def _constructor(self): - return type(self).from_intervals + return type(self) def __contains__(self, key): """ @@ -402,7 +411,8 @@ def contains(self, key): return False @classmethod - def from_breaks(cls, breaks, closed='right', name=None, copy=False): + def from_breaks(cls, breaks, closed='right', name=None, copy=False, + dtype=None): """ Construct an IntervalIndex from an array of splits @@ -417,6 +427,8 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): Name to be stored in the index. copy : boolean, default False copy the data + dtype : dtype or None, default None + If None, dtype will be inferred Examples -------- @@ -430,18 +442,17 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_arrays : Construct an IntervalIndex from a left and right array - IntervalIndex.from_intervals : Construct an IntervalIndex from an array - of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ breaks = maybe_convert_platform_interval(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, - name=name, copy=copy) + name=name, copy=copy, dtype=dtype) @classmethod - def from_arrays(cls, left, right, closed='right', name=None, copy=False): + def from_arrays(cls, left, right, closed='right', name=None, copy=False, + dtype=None): """ Construct an IntervalIndex from a a left and right array @@ -458,6 +469,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): Name to be stored in the index. copy : boolean, default False copy the data + dtype : dtype or None, default None + If None, dtype will be inferred Examples -------- @@ -471,22 +484,23 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits - IntervalIndex.from_intervals : Construct an IntervalIndex from an array - of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ left = maybe_convert_platform_interval(left) right = maybe_convert_platform_interval(right) - return cls._simple_new(left, right, closed, name=name, - copy=copy, verify_integrity=True) + return cls._simple_new(left, right, closed, name=name, copy=copy, + dtype=dtype, verify_integrity=True) @classmethod - def from_intervals(cls, data, name=None, copy=False): + def from_intervals(cls, data, closed=None, name=None, copy=False, + dtype=None): """ Construct an IntervalIndex from a 1d array of Interval objects + .. deprecated:: 0.23.0 + Parameters ---------- data : array-like (1-dimensional) @@ -496,6 +510,8 @@ def from_intervals(cls, data, name=None, copy=False): Name to be stored in the index. copy : boolean, default False by-default copy the data, this is compat only and ignored + dtype : dtype or None, default None + If None, dtype will be inferred Examples -------- @@ -521,16 +537,14 @@ def from_intervals(cls, data, name=None, copy=False): IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ - if isinstance(data, IntervalIndex): - left, right, closed = data.left, data.right, data.closed - name = name or data.name - else: - data = maybe_convert_platform_interval(data) - left, right, closed = intervals_to_interval_bounds(data) - return cls.from_arrays(left, right, closed, name=name, copy=False) + msg = ('IntervalIndex.from_intervals is deprecated and will be ' + 'removed in a future version; use IntervalIndex(...) instead') + warnings.warn(msg, FutureWarning, stacklevel=2) + return cls(data, closed=closed, name=name, copy=copy, dtype=dtype) @classmethod - def from_tuples(cls, data, closed='right', name=None, copy=False): + def from_tuples(cls, data, closed='right', name=None, copy=False, + dtype=None): """ Construct an IntervalIndex from a list/array of tuples @@ -545,6 +559,8 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): Name to be stored in the index. copy : boolean, default False by-default copy the data, this is compat only and ignored + dtype : dtype or None, default None + If None, dtype will be inferred Examples -------- @@ -559,8 +575,6 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): right array IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits - IntervalIndex.from_intervals : Construct an IntervalIndex from an array - of Interval objects """ if len(data): left, right = [], [] @@ -571,7 +585,16 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): if isna(d): lhs = rhs = np.nan else: - lhs, rhs = d + try: + lhs, rhs = d + except ValueError: + msg = ('IntervalIndex.from_tuples requires tuples of ' + 'length 2, got {tpl}').format(tpl=d) + raise ValueError(msg) + except TypeError: + msg = ('IntervalIndex.from_tuples received an invalid ' + 'item, {tpl}').format(tpl=d) + raise TypeError(msg) left.append(lhs) right.append(rhs) @@ -579,7 +602,8 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): # if we have nulls and we previous had *only* # integer data, then we have changed the dtype - return cls.from_arrays(left, right, closed, name=name, copy=False) + return cls.from_arrays(left, right, closed, name=name, copy=False, + dtype=dtype) def to_tuples(self, na_tuple=True): """ @@ -921,7 +945,7 @@ def get_loc(self, key, method=None): Examples --------- >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) - >>> index = pd.IntervalIndex.from_intervals([i1, i2]) + >>> index = pd.IntervalIndex([i1, i2]) >>> index.get_loc(1) 0 @@ -937,7 +961,7 @@ def get_loc(self, key, method=None): relevant intervals. >>> i3 = pd.Interval(0, 2) - >>> overlapping_index = pd.IntervalIndex.from_intervals([i2, i3]) + >>> overlapping_index = pd.IntervalIndex([i2, i3]) >>> overlapping_index.get_loc(1.5) array([0, 1], dtype=int64) """ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 2adf17a227a59..777f08bd9db2b 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -348,8 +348,7 @@ def _format_labels(bins, precision, right=True, # account that we are all right closed v = adjust(labels[0].left) - i = IntervalIndex.from_intervals( - [Interval(v, labels[0].right, closed='right')]) + i = IntervalIndex([Interval(v, labels[0].right, closed='right')]) labels = i.append(labels[1:]) return labels diff --git a/pandas/tests/categorical/test_constructors.py b/pandas/tests/categorical/test_constructors.py index abea7e9a0e0b4..b29d75bed5c6f 100644 --- a/pandas/tests/categorical/test_constructors.py +++ b/pandas/tests/categorical/test_constructors.py @@ -76,9 +76,7 @@ def test_constructor_unsortable(self): def test_constructor_interval(self): result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True) - ii = IntervalIndex.from_intervals([Interval(1, 2), - Interval(2, 3), - Interval(3, 6)]) + ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) exp = Categorical(ii, ordered=True) tm.assert_categorical_equal(result, exp) tm.assert_index_equal(result.categories, ii) diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py new file mode 100644 index 0000000000000..34fa3b26d294a --- /dev/null +++ b/pandas/tests/indexes/interval/test_construction.py @@ -0,0 +1,342 @@ +from __future__ import division + +import pytest +import numpy as np +from functools import partial + +from pandas import ( + Interval, IntervalIndex, Index, Int64Index, Float64Index, Categorical, + date_range, timedelta_range, period_range, notna) +from pandas.compat import lzip +from pandas.core.dtypes.dtypes import IntervalDtype +import pandas.core.common as com +import pandas.util.testing as tm + + +@pytest.fixture(params=['left', 'right', 'both', 'neither']) +def closed(request): + return request.param + + +@pytest.fixture(params=[None, 'foo']) +def name(request): + return request.param + + +class Base(object): + """ + Common tests for all variations of IntervalIndex construction. Input data + to be supplied in breaks format, then converted by the superclass method + get_kwargs_from_breaks to the expected format. + """ + + @pytest.mark.parametrize('breaks', [ + [3, 14, 15, 92, 653], + np.arange(10, dtype='int64'), + Int64Index(range(-10, 11)), + Float64Index(np.arange(20, 30, 0.5)), + date_range('20180101', periods=10), + date_range('20180101', periods=10, tz='US/Eastern'), + timedelta_range('1 day', periods=10)]) + def test_constructor(self, constructor, breaks, closed, name): + result_kwargs = self.get_kwargs_from_breaks(breaks, closed) + result = constructor(closed=closed, name=name, **result_kwargs) + + assert result.closed == closed + assert result.name == name + assert result.dtype.subtype == getattr(breaks, 'dtype', 'int64') + tm.assert_index_equal(result.left, Index(breaks[:-1])) + tm.assert_index_equal(result.right, Index(breaks[1:])) + + @pytest.mark.parametrize('breaks, subtype', [ + (Int64Index([0, 1, 2, 3, 4]), 'float64'), + (Int64Index([0, 1, 2, 3, 4]), 'datetime64[ns]'), + (Int64Index([0, 1, 2, 3, 4]), 'timedelta64[ns]'), + (Float64Index([0, 1, 2, 3, 4]), 'int64'), + (date_range('2017-01-01', periods=5), 'int64'), + (timedelta_range('1 day', periods=5), 'int64')]) + def test_constructor_dtype(self, constructor, breaks, subtype): + # GH 19262: conversion via dtype parameter + expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) + expected = constructor(**expected_kwargs) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + iv_dtype = IntervalDtype(subtype) + for dtype in (iv_dtype, str(iv_dtype)): + result = constructor(dtype=dtype, **result_kwargs) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('breaks', [ + [np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) + def test_constructor_nan(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_subtype = np.float64 + expected_values = np.array(breaks[:-1], dtype=object) + + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(result.values, expected_values) + + @pytest.mark.parametrize('breaks', [ + [], + np.array([], dtype='int64'), + np.array([], dtype='float64'), + np.array([], dtype='datetime64[ns]'), + np.array([], dtype='timedelta64[ns]')]) + def test_constructor_empty(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_values = np.array([], dtype=object) + expected_subtype = getattr(breaks, 'dtype', np.int64) + + assert result.empty + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(result.values, expected_values) + + @pytest.mark.parametrize('breaks', [ + tuple('0123456789'), + list('abcdefghij'), + np.array(list('abcdefghij'), dtype=object), + np.array(list('abcdefghij'), dtype=' with value 0 " + "is not an interval") + with tm.assert_raises_regex(TypeError, msg): + constructor([0, 1]) + + +class TestFromIntervals(TestClassConstructors): + """ + Tests for IntervalIndex.from_intervals, which is deprecated in favor of the + IntervalIndex constructor. Same tests as the IntervalIndex constructor, + plus deprecation test. Should only need to delete this class when removed. + """ + + @pytest.fixture + def constructor(self): + def from_intervals_ignore_warnings(*args, **kwargs): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + return IntervalIndex.from_intervals(*args, **kwargs) + return from_intervals_ignore_warnings + + def test_deprecated(self): + ivs = [Interval(0, 1), Interval(1, 2)] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + IntervalIndex.from_intervals(ivs) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 0e509c241fe51..71a6f78125004 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -4,7 +4,7 @@ import numpy as np from pandas import ( Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp, - Timedelta, date_range, timedelta_range, Categorical) + Timedelta, date_range, timedelta_range) from pandas.compat import lzip import pandas.core.common as com from pandas.tests.indexes.common import Base @@ -40,249 +40,6 @@ def create_index_with_nan(self, closed='right'): np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed) - @pytest.mark.parametrize('data', [ - Index([0, 1, 2, 3, 4]), - date_range('2017-01-01', periods=5), - date_range('2017-01-01', periods=5, tz='US/Eastern'), - timedelta_range('1 day', periods=5)]) - def test_constructors(self, data, closed, name): - left, right = data[:-1], data[1:] - ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)] - expected = IntervalIndex._simple_new( - left=left, right=right, closed=closed, name=name) - - # validate expected - assert expected.closed == closed - assert expected.name == name - assert expected.dtype.subtype == data.dtype - tm.assert_index_equal(expected.left, data[:-1]) - tm.assert_index_equal(expected.right, data[1:]) - - # validated constructors - result = IntervalIndex(ivs, name=name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_intervals(ivs, name=name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_breaks(data, closed=closed, name=name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_arrays( - left, right, closed=closed, name=name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_tuples( - lzip(left, right), closed=closed, name=name) - tm.assert_index_equal(result, expected) - - result = Index(ivs, name=name) - assert isinstance(result, IntervalIndex) - tm.assert_index_equal(result, expected) - - # idempotent - tm.assert_index_equal(Index(expected), expected) - tm.assert_index_equal(IntervalIndex(expected), expected) - - result = IntervalIndex.from_intervals(expected) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_intervals( - expected.values, name=expected.name) - tm.assert_index_equal(result, expected) - - left, right = expected.left, expected.right - result = IntervalIndex.from_arrays( - left, right, closed=expected.closed, name=expected.name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_tuples( - expected.to_tuples(), closed=expected.closed, name=expected.name) - tm.assert_index_equal(result, expected) - - breaks = expected.left.tolist() + [expected.right[-1]] - result = IntervalIndex.from_breaks( - breaks, closed=expected.closed, name=expected.name) - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize('data', [[np.nan], [np.nan] * 2, [np.nan] * 50]) - def test_constructors_nan(self, closed, data): - # GH 18421 - expected_values = np.array(data, dtype=object) - expected_idx = IntervalIndex(data, closed=closed) - - # validate the expected index - assert expected_idx.closed == closed - tm.assert_numpy_array_equal(expected_idx.values, expected_values) - - result = IntervalIndex.from_tuples(data, closed=closed) - tm.assert_index_equal(result, expected_idx) - tm.assert_numpy_array_equal(result.values, expected_values) - - result = IntervalIndex.from_breaks([np.nan] + data, closed=closed) - tm.assert_index_equal(result, expected_idx) - tm.assert_numpy_array_equal(result.values, expected_values) - - result = IntervalIndex.from_arrays(data, data, closed=closed) - tm.assert_index_equal(result, expected_idx) - tm.assert_numpy_array_equal(result.values, expected_values) - - if closed == 'right': - # Can't specify closed for IntervalIndex.from_intervals - result = IntervalIndex.from_intervals(data) - tm.assert_index_equal(result, expected_idx) - tm.assert_numpy_array_equal(result.values, expected_values) - - @pytest.mark.parametrize('data', [ - [], - np.array([], dtype='int64'), - np.array([], dtype='float64'), - np.array([], dtype='datetime64[ns]')]) - def test_constructors_empty(self, data, closed): - # GH 18421 - expected_dtype = getattr(data, 'dtype', np.int64) - expected_values = np.array([], dtype=object) - expected_index = IntervalIndex(data, closed=closed) - - # validate the expected index - assert expected_index.empty - assert expected_index.closed == closed - assert expected_index.dtype.subtype == expected_dtype - tm.assert_numpy_array_equal(expected_index.values, expected_values) - - result = IntervalIndex.from_tuples(data, closed=closed) - tm.assert_index_equal(result, expected_index) - tm.assert_numpy_array_equal(result.values, expected_values) - - result = IntervalIndex.from_breaks(data, closed=closed) - tm.assert_index_equal(result, expected_index) - tm.assert_numpy_array_equal(result.values, expected_values) - - result = IntervalIndex.from_arrays(data, data, closed=closed) - tm.assert_index_equal(result, expected_index) - tm.assert_numpy_array_equal(result.values, expected_values) - - if closed == 'right': - # Can't specify closed for IntervalIndex.from_intervals - result = IntervalIndex.from_intervals(data) - tm.assert_index_equal(result, expected_index) - tm.assert_numpy_array_equal(result.values, expected_values) - - def test_constructors_errors(self): - - # scalar - msg = (r'IntervalIndex\(...\) must be called with a collection of ' - 'some kind, 5 was passed') - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex(5) - - # not an interval - msg = ("type <(class|type) 'numpy.int64'> with value 0 " - "is not an interval") - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex([0, 1]) - - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex.from_intervals([0, 1]) - - # invalid closed - msg = "invalid options for 'closed': invalid" - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid') - - # mismatched closed within intervals - msg = 'intervals must all be closed on the same side' - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_intervals([Interval(0, 1), - Interval(1, 2, closed='left')]) - - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex([Interval(0, 1), Interval(2, 3, closed='left')]) - - with tm.assert_raises_regex(ValueError, msg): - Index([Interval(0, 1), Interval(2, 3, closed='left')]) - - # mismatched closed inferred from intervals vs constructor. - msg = 'conflicting values for closed' - with tm.assert_raises_regex(ValueError, msg): - iv = [Interval(0, 1, closed='both'), Interval(1, 2, closed='both')] - IntervalIndex(iv, closed='neither') - - # no point in nesting periods in an IntervalIndex - msg = 'Period dtypes are not supported, use a PeriodIndex instead' - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_breaks( - pd.period_range('2000-01-01', periods=3)) - - # decreasing breaks/arrays - msg = 'left side of interval must be <= right side' - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_breaks(range(10, -1, -1)) - - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1)) - - # GH 19016: categorical data - data = Categorical(list('01234abcde'), ordered=True) - msg = ('category, object, and string subtypes are not supported ' - 'for IntervalIndex') - - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex.from_breaks(data) - - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex.from_arrays(data[:-1], data[1:]) - - @pytest.mark.parametrize('data', [ - tuple('0123456789'), - list('abcdefghij'), - np.array(list('abcdefghij'), dtype=object), - np.array(list('abcdefghij'), dtype=' Date: Mon, 22 Jan 2018 19:12:30 -0700 Subject: [PATCH 2/3] review edits --- pandas/core/indexes/interval.py | 14 ++++++++++---- pandas/tests/indexes/interval/test_construction.py | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index fa58a2e263f6c..1a7e41d6211c3 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -155,6 +155,8 @@ class IntervalIndex(IntervalMixin, Index): dtype : dtype or None, default None If None, dtype will be inferred + ..versionadded:: 0.23.0 + Attributes ---------- left @@ -430,6 +432,8 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False, dtype : dtype or None, default None If None, dtype will be inferred + ..versionadded:: 0.23.0 + Examples -------- >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) @@ -472,6 +476,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False, dtype : dtype or None, default None If None, dtype will be inferred + ..versionadded:: 0.23.0 + Examples -------- >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) @@ -513,6 +519,8 @@ def from_intervals(cls, data, closed=None, name=None, copy=False, dtype : dtype or None, default None If None, dtype will be inferred + ..versionadded:: 0.23.0 + Examples -------- >>> pd.IntervalIndex.from_intervals([pd.Interval(0, 1), @@ -562,6 +570,8 @@ def from_tuples(cls, data, closed='right', name=None, copy=False, dtype : dtype or None, default None If None, dtype will be inferred + ..versionadded:: 0.23.0 + Examples -------- >>> pd.IntervalIndex.from_tuples([(0, 1), (1,2)]) @@ -598,10 +608,6 @@ def from_tuples(cls, data, closed='right', name=None, copy=False, left.append(lhs) right.append(rhs) - # TODO - # if we have nulls and we previous had *only* - # integer data, then we have changed the dtype - return cls.from_arrays(left, right, closed, name=name, copy=False, dtype=dtype) diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py index 34fa3b26d294a..5fdf92dcb2044 100644 --- a/pandas/tests/indexes/interval/test_construction.py +++ b/pandas/tests/indexes/interval/test_construction.py @@ -26,7 +26,7 @@ def name(request): class Base(object): """ Common tests for all variations of IntervalIndex construction. Input data - to be supplied in breaks format, then converted by the superclass method + to be supplied in breaks format, then converted by the subclass method get_kwargs_from_breaks to the expected format. """ From da2eac2a022c044677b537a903acfa9fc532717a Mon Sep 17 00:00:00 2001 From: jschendel Date: Wed, 24 Jan 2018 01:00:09 -0700 Subject: [PATCH 3/3] additional review edits --- pandas/core/indexes/base.py | 3 ++- pandas/core/indexes/interval.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 906f9b99d9ed5..74c6abeb0ad12 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -200,8 +200,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # interval if is_interval_dtype(data) or is_interval_dtype(dtype): from .interval import IntervalIndex + closed = kwargs.get('closed', None) return IntervalIndex(data, dtype=dtype, name=name, copy=copy, - **kwargs) + closed=closed) # index-like elif isinstance(data, (np.ndarray, Index, ABCSeries)): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1a7e41d6211c3..232770e582763 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -260,7 +260,7 @@ def _simple_new(cls, left, right, closed=None, name=None, copy=False, right = _ensure_index(right, copy=copy) if dtype is not None: - # GH 19262 + # GH 19262: dtype must be an IntervalDtype to override inferred dtype = pandas_dtype(dtype) if not is_interval_dtype(dtype): msg = 'dtype must be an IntervalDtype, got {dtype}' @@ -574,7 +574,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False, Examples -------- - >>> pd.IntervalIndex.from_tuples([(0, 1), (1,2)]) + >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) IntervalIndex([(0, 1], (1, 2]], closed='right', dtype='interval[int64]') @@ -596,6 +596,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False, lhs = rhs = np.nan else: try: + # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] lhs, rhs = d except ValueError: msg = ('IntervalIndex.from_tuples requires tuples of '