diff --git a/pandas/core/api.py b/pandas/core/api.py index fde9bc77c4bd9..103fe740cfa36 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -8,7 +8,7 @@ from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.core.format import set_eng_float_format -from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex +from pandas.core.index import Index, CategoricalIndex, Int64Index, RangeIndex, Float64Index, MultiIndex from pandas.core.series import Series, TimeSeries from pandas.core.frame import DataFrame diff --git a/pandas/core/common.py b/pandas/core/common.py index 3d23aeff942dc..878b1af078d4d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2498,6 +2498,11 @@ def is_integer_dtype(arr_or_dtype): not issubclass(tipo, (np.datetime64, np.timedelta64))) +def is_int64_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.int64) + + def is_int_or_datetime_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, np.integer) or diff --git a/pandas/core/index.py b/pandas/core/index.py index 8b650fea9b440..f5278ccc9cf6e 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -7,6 +7,7 @@ from pandas.compat import range, zip, lrange, lzip, u, reduce, filter, map from pandas import compat import numpy as np +from math import ceil, floor from sys import getsizeof import pandas.tslib as tslib @@ -21,7 +22,7 @@ from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, is_object_dtype, _values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype, ABCSeries, ABCCategorical, _ensure_object, _ensure_int64, is_bool_indexer, - is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype) + is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype, is_int64_dtype) from pandas.core.config import get_option from pandas.io.common import PerformanceWarning @@ -107,13 +108,36 @@ class Index(IndexOpsMixin, PandasObject): def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): + # RangeIndex pass-through + # Index(start, stop, ...) --> RangeIndex(start, stop, ...) + if isinstance(data, int): + if dtype is None and copy == False: + copy = None + range_constructor = True + elif isinstance(dtype, int): + range_constructor = True + if copy == False: + copy = None + elif isinstance(copy, int): + range_constructor = True + else: + range_constructor = False + + if range_constructor: + return RangeIndex(data, dtype, copy, name) + + # no class inference! if fastpath: return cls._simple_new(data, name) from pandas.tseries.period import PeriodIndex if isinstance(data, (np.ndarray, Index, ABCSeries)): - if issubclass(data.dtype.type, np.datetime64): + if (isinstance(data, RangeIndex) and + (dtype is None or is_int64_dtype(dtype))): + # copy passed-in RangeIndex + return data.copy(name=name) + elif issubclass(data.dtype.type, np.datetime64): from pandas.tseries.index import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: @@ -3299,6 +3323,415 @@ def _wrap_joined_index(self, joined, other): Int64Index._add_logical_methods() +class RangeIndex(Int64Index): + + """ + Immutable Index implementing an monotonic range. RangeIndex is a + memory-saving special case of `Int64Index` limited to representing + monotonic ranges. + + Parameters + ---------- + start : int (default: 0) + stop : int (default: 0) + step : int (default: 1) + name : object, optional + Name to be stored in the index + """ + + _typ = 'rangeindex' + _engine_type = _index.Int64Engine + _attributes = ['name', 'start', 'stop', 'step'] + + def __new__(cls, start=None, stop=None, step=None, name=None, fastpath=False): + if fastpath: + return cls._simple_new(start, stop, step, name=name) + + # RangeIndex() constructor + if start is None and stop is None and step is None: + return cls._simple_new(0, 0, 1, name=name) + + # sort the arguments depending on which are provided + if step is None: + step = 1 + if stop is None: + stop = start + start = 0 + + # check validity of inputs + start = cls._ensure_int(start) + stop = cls._ensure_int(stop) + step = cls._ensure_int(step) + if step == 0: + raise ValueError("Step must not be zero") + + return cls._simple_new(start, stop, step, name) + + @classmethod + def _simple_new(cls, start, stop, step, name=None): + result = object.__new__(cls) + result._start = start + result._stop = stop + result._step = step + result.name = name + return result + + @classmethod + def _ensure_int(cls, value): + try: + int_value = int(value) + if int_value != value: + raise Exception + except Exception: + raise TypeError("Need to pass integral values") + return int_value + + @property + def _data(self): + return np.arange(self.start, self.stop, self.step, dtype=np.int64) + + @property + def dtype(self): + return np.dtype(np.int64) + + @property + def start(self): + return self._start + + @start.setter + def start(self, value): + self._start = self._ensure_int(value) + + @property + def stop(self): + return self._stop + + @stop.setter + def stop(self, value): + self._stop = self._ensure_int(value) + + @property + def step(self): + return self._step + + @step.setter + def step(self, value): + self._step = self._ensure_int(value) + + @cache_readonly(allow_setting=True) + def is_unique(self): + """ return if the index has unique values """ + return True + + @property + def has_duplicates(self): + return not self.is_unique + + def tolist(self): + return list(range(self.start, self.stop, self.step)) + + def _shallow_copy(self, values=None, **kwargs): + """ create a new Index, don't copy the data, use the same object attributes + with passed in attributes taking precedence """ + if values is None: + return RangeIndex(self.start, self.stop, self.step, + name=self.name, fastpath=True) + else: + name = kwargs.get('name', self.name) + return Int64Index(self.values, name=name, copy=False)._shallow_copy(values, **kwargs) + + def copy(self, names=None, name=None, dtype=None, deep=False): + """ + Make a copy of this object. Name and dtype sets those attributes on + the new object. + + Parameters + ---------- + name : string, optional + dtype : numpy dtype or pandas type + + Returns + ------- + copy : Index + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + """ + if dtype is not None and not is_int64_dtype(dtype): + return super(RangeIndex, self).copy(names, name, dtype, deep) + + if name is None: + name = self.name + return RangeIndex(self.start, self.stop, self.step, name, fastpath=True) + + def argsort(self, *args, **kwargs): + """ + return an ndarray indexer of the underlying data + + See also + -------- + numpy.ndarray.argsort + """ + return self._data.argsort(*args, **kwargs) + + def __repr__(self): + attrs = [('start', default_pprint(self.start)), + ('stop', default_pprint(self.stop)), + ('step', default_pprint(self.step)), + ('name', default_pprint(self.name))] + + prepr = u(", ").join([u("%s=%s") % (k, v) + for k, v in attrs]) + res = u("%s(%s)") % (self.__class__.__name__, prepr) + + if not compat.PY3: + # needs to be str in Python 2 + encoding = get_option('display.encoding') + res = res.encode(encoding) + return res + + def __unicode__(self): + """ + Return a string representation for this object. + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + if self.start != 0 or self.step != 1: + start = u('%s, ') % default_pprint(self.start) + else: + start = u('') + stop = default_pprint(self.stop) + step = u('') if self.step == 1 else u(', %s') % default_pprint(self.step) + if self.name is None: + name = u('') + else: + name = u(', name=%s') % default_pprint(self.name) + + res = u("%s(%s%s%s%s)") % (self.__class__.__name__, + start, stop, step, name) + return res + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + elif isinstance(other, RangeIndex): + return (self.start == other.start and + self.stop == other.stop and + self.step == other.step) + + try: + return array_equivalent(_values_from_object(self), + _values_from_object(other)) + except TypeError: + # e.g. fails in numpy 1.6 with DatetimeIndex #1681 + return False + + def __reduce__(self): + d = self._get_attributes_dict() + return _new_Index, (self.__class__, d), None + + def view(self, cls=None): + if cls is None or is_int64_dtype(cls): + return self + else: + result = self._shallow_copy() + if isinstance(result, Index): + result._id = self._id + return result + + def intersection(self, other): + """ + Form the intersection of two Index objects. Sortedness of the result is + not guaranteed + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + intersection : Index + """ + if not isinstance(other, RangeIndex): + return super(RangeIndex, self).intersection(other) + + # check whether intervals intersect + # deals with in- and decreasing ranges + int_low = max(min(self.start, self.stop+1), + min(other.start, other.stop+1)) + int_high = min(max(self.stop, self.start+1), + max(other.stop, other.start+1)) + if int_high <= int_low: + return RangeIndex() + + ### Method hint: linear Diophantine equation + # solve intersection + # perf: for identical step sizes, could use cheaper alternative + gcd, s, t = self._extended_gcd(self.step, other.step) + + # check whether element sets intersect + if (self.start - other.start) % gcd: + return RangeIndex() + + # calculate parameters for the RangeIndex describing the intersection + # disregarding the lower bounds + tmp_start = self.start + (other.start-self.start)*self.step//gcd*s + new_step = self.step * other.step // gcd + new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True) + + # adjust index to limiting interval + new_index.start = new_index._min_fitting_element(int_low) + return new_index + + def _min_fitting_element(self, lower_limit): + """Returns the value of the smallest element greater than the limit""" + round = ceil if self.step > 0 else floor + no_steps = round( (float(lower_limit)-self.start) / self.step ) + return self.start + self.step * no_steps + + def _max_fitting_element(self, upper_limit): + """Returns the value of the largest element smaller than the limit""" + round = floor if self.step > 0 else ceil + no_steps = round( (float(upper_limit)-self.start) / self.step ) + return self.start + self.step * no_steps + + def _extended_gcd(self, a, b): + """ + Extended Euclidean algorithms to solve Bezout's identity: + a*x + b*y = gcd(x, y) + Finds one particular solution for x, y: s, t + Returns: gcd, s, t + """ + s, old_s = 0, 1 + t, old_t = 1, 0 + r, old_r = b, a + while r: + quotient = old_r // r + old_r, r = r, old_r - quotient * r + old_s, s = s, old_s - quotient * s + old_t, t = t, old_t - quotient * t + return old_r, old_s, old_t + + def union(self, other): + """ + Form the union of two Index objects and sorts if possible + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + union : Index + """ + # note: could return a RangeIndex in some circumstances + return Int64Index(self.values, copy=False).union(other) + + def join(self, other, how='left', level=None, return_indexers=False): + """ + *this is an internal non-public method* + + Compute join_index and indexers to conform data + structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : boolean, default False + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + if how == 'outer' and self is not other: + # note: could return RangeIndex in more circumstances + return Int64Index(self.values, copy=False).join(other, how, level, return_indexers) + + return super(RangeIndex, self).join(other, how, level, return_indexers) + + def _mul(self, other): + "__mul__() implementation" + try: + int_input = other == int(other) + if int_input: + other = int(other) + except Exception: + int_input = False + + if int_input == True and other != 0: + return RangeIndex(self.start*other, self.stop*other, self.step*other, + fastpath=True) + else: + return super(RangeIndex, self).__mul__(other) + + def __len__(self): + """ + return the length of the RangeIndex + """ + return (self.stop-self.start) // self.step + + @property + def size(self): + return len(self) + + def __getitem__(self, key): + """ + Conserve RangeIndex type for scalar and slice keys. + """ + super_getitem = super(RangeIndex, self).__getitem__ + + if np.isscalar(key): + n = int(key) + if n != key: + return super_getitem(key) + if n < 0: + n = len(self) + key + if n < 0 or n > len(self)-1: + raise IndexError('index %d is out of bounds for axis 0 with size %d' % (key, len(self))) + return self.start + n * self.step + + if isinstance(key, slice): + # complete missing slice information + n_start = 0 if key.start is None else key.start + n_stop = len(self)+1 if key.stop is None else key.stop + n_step = 1 if key.step is None else key.step + + # delegate non-integer slices + if (n_start != int(n_start) and + n_stop != int(n_stop) and + n_step != int(n_step)): + return super_getitem(key) + + # deal with index wrap-around + n_start = len(self)+n_start if n_start < 0 else n_start + n_stop = len(self)+n_stop if n_stop < 0 else n_stop + + + # convert indexes to values + start = self.start + self.step * n_start + stop = self.start + self.step * n_stop + 1 + step = self.step * n_step + + stop = min(stop, self.stop) + return RangeIndex(start, stop, step, self.name, fastpath=True) + + # fall back to Int64Index + return super_getitem(key) + +RangeIndex._add_numeric_methods() +RangeIndex.__mul__ = RangeIndex.__rmul__ = RangeIndex._mul +RangeIndex._add_logical_methods() + + class Float64Index(NumericIndex): """ diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 3c9dbd2e48cb6..bb0e6abf93d59 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -13,7 +13,7 @@ from numpy.testing import assert_array_equal from pandas import (period_range, date_range, Categorical, Series, - Index, Float64Index, Int64Index, MultiIndex, + Index, Float64Index, Int64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex) from pandas.core.index import InvalidIndexError, NumericIndex from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, @@ -297,10 +297,6 @@ def test_constructor(self): # arr = np.array(5.) # self.assertRaises(Exception, arr.view, Index) - def test_constructor_corner(self): - # corner case - self.assertRaises(TypeError, Index, 0) - def test_constructor_from_series(self): expected = DatetimeIndex([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]) @@ -2447,6 +2443,567 @@ def test_slice_keep_name(self): idx = Int64Index([1, 2], name='asdf') self.assertEqual(idx.name, idx[1:].name) +class TestRangeIndex(Numeric, tm.TestCase): + _holder = RangeIndex + ### what does the following do? + #_multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index = RangeIndex(0, 20, 2)) + self.setup_indices() + + def create_index(self): + return RangeIndex(5) + + def test_too_many_names(self): + def testit(): + self.index.names = ["roger", "harold"] + assertRaisesRegexp(ValueError, "^Length", testit) + + def test_constructor(self): + index = RangeIndex(5) + expected = np.arange(5, dtype=np.int64) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex(1, 5) + expected = np.arange(1, 5, dtype=np.int64) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex(1, 5, 2) + expected = np.arange(1, 5, 2, dtype=np.int64) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex() + expected = np.empty(0, dtype=np.int64) + self.assert_numpy_array_equal(index, expected) + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = RangeIndex(1, 5) + self.assertEqual(index.values.dtype, np.int64) + self.assertTrue(index.equals(arr)) + + # non-int raise Exception + self.assertRaises(TypeError, RangeIndex, '1', '10', '1') + self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) + + # iterable raise Exception + self.assertRaises(TypeError, RangeIndex, iter([-5, 0, 1, 2])) + + def test_copy(self): + i = RangeIndex(5, name='Foo') + i_copy = i.copy() + self.assertEqual(i_copy.start, 0) + self.assertEqual(i_copy.stop, 5) + self.assertEqual(i_copy.step, 1) + self.assertEqual(i_copy.name, 'Foo') + + def test_view(self): + super(TestRangeIndex, self).test_view() + + i = RangeIndex(name='Foo') + i_view = i.view() + self.assertEqual(i_view.name, 'Foo') + + i_view = i.view('i8') + tm.assert_index_equal(i, i_view) + + i_view = i.view(RangeIndex) + tm.assert_index_equal(i, i_view) + + def test_index_constructor(self): + arr = Index(5) + tm.assert_isinstance(arr, RangeIndex) + + def test_dtype(self): + self.assertEqual(self.index.dtype, np.int64) + + def test_is_monotonic(self): + self.assertTrue(self.index.is_monotonic) + self.assertTrue(self.index.is_monotonic_increasing) + self.assertFalse(self.index.is_monotonic_decreasing) + + index = RangeIndex(4, 0, -1) + self.assertFalse(index.is_monotonic) + self.assertTrue(index.is_monotonic_decreasing) + + index = RangeIndex(1, 2) + self.assertTrue(index.is_monotonic) + self.assertTrue(index.is_monotonic_increasing) + self.assertTrue(index.is_monotonic_decreasing) + + def test_equals(self): + same_values = Index(self.index, dtype=object) + self.assertTrue(self.index.equals(same_values)) + self.assertTrue(same_values.equals(self.index)) + + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + + def test_identical(self): + i = Index(self.index.copy()) + self.assertTrue(i.identical(self.index)) + + same_values_different_type = Index(i, dtype=object) + self.assertFalse(i.identical(same_values_different_type)) + + i = self.index.copy(dtype=object) + i = i.rename('foo') + same_values = Index(i, dtype=object) + self.assertTrue(same_values.identical(self.index.copy(dtype=object))) + + self.assertFalse(i.identical(self.index)) + self.assertTrue(Index(same_values, name='foo', dtype=object + ).identical(i)) + + self.assertFalse( + self.index.copy(dtype=object) + .identical(self.index.copy(dtype='int64'))) + + def test_get_indexer(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='pad') + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='backfill') + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + self.assert_numpy_array_equal(indexer, expected) + + def test_join_outer(self): + ### join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]) + elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], + dtype=np.int64) + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + dtype=np.int64) + + tm.assert_isinstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + tm.assert_isinstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + ### Join with non-RangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9]) + eridx = np.array([9, 7]) + + tm.assert_isinstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + + def test_join_left(self): + ### Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + eres = self.index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], + dtype=np.int64) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + ### Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], + dtype=np.int64) + + tm.assert_isinstance(other, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + ### Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + + tm.assert_isinstance(other, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + def test_join_non_int_index(self): + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = self.index.join(other, how='outer') + outer2 = other.join(self.index, how='outer') + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, + 16, 18], dtype=object) + self.assertTrue(outer.equals(outer2)) + self.assertTrue(outer.equals(expected)) + + inner = self.index.join(other, how='inner') + inner2 = other.join(self.index, how='inner') + expected = Index([6, 8, 10], dtype=object) + self.assertTrue(inner.equals(inner2)) + self.assertTrue(inner.equals(expected)) + + left = self.index.join(other, how='left') + self.assertTrue(left.equals(self.index)) + + left2 = other.join(self.index, how='left') + self.assertTrue(left2.equals(other)) + + right = self.index.join(other, how='right') + self.assertTrue(right.equals(other)) + + right2 = other.join(self.index, how='right') + self.assertTrue(right2.equals(self.index)) + + def test_join_non_unique(self): + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = self.index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.int64) + + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = self.index.join(self.index, how=kind) + self.assertIs(self.index, joined) + + def test_intersection(self): + ### intersect with Int64Index + other = Index(np.arange(1, 6)) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + result = other.intersection(self.index) + expected = np.sort(np.asarray(np.intersect1d(self.index.values, + other.values))) + self.assert_numpy_array_equal(result, expected) + + ### intersect with increasing RangeIndex + other = Index(1, 6) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + ### intersect with decreasing RangeIndex + other = Index(5, 0, -1) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + self.assertEqual(len(res), 0) + + def test_union_noncomparable(self): + from datetime import datetime, timedelta + # corner case, non-Int64Index + now = datetime.now() + other = Index([now + timedelta(i) for i in range(4)], dtype=object) + result = self.index.union(other) + expected = np.concatenate((self.index, other)) + self.assert_numpy_array_equal(result, expected) + + result = other.union(self.index) + expected = np.concatenate((other, self.index)) + self.assert_numpy_array_equal(result, expected) + + def test_cant_or_shouldnt_cast(self): + # can't + self.assertRaises(TypeError, RangeIndex, 'foo', 'bar', 'baz') + + # shouldn't + self.assertRaises(TypeError, RangeIndex, '0', '1', '2') + + def test_view_Index(self): + self.index.view(Index) + + def test_prevent_casting(self): + result = self.index.astype('O') + self.assertEqual(result.dtype, np.object_) + + def test_take_preserve_name(self): + index = RangeIndex(1, 5, name='foo') + taken = index.take([3, 0, 1]) + self.assertEqual(index.name, taken.name) + + def test_int_name_format(self): + from pandas import Series, DataFrame + index = Index(3, name=0) + s = Series(lrange(3), index) + df = DataFrame(lrange(3), index=index) + repr(s) + repr(df) + + def test_print_unicode_columns(self): + df = pd.DataFrame( + {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_repr_roundtrip(self): + tm.assert_index_equal(eval(repr(self.index)), self.index) + + def test_unicode_string_with_unicode(self): + idx = Index(1000) + + if compat.PY3: + str(idx) + else: + compat.text_type(idx) + + def test_bytestring_with_unicode(self): + idx = Index(1000) + if compat.PY3: + bytes(idx) + else: + str(idx) + + def test_slice_keep_name(self): + idx = RangeIndex(1, 2, name='asdf') + self.assertEqual(idx.name, idx[1:].name) + + def test_numeric_compat(self): + idx = RangeIndex(5) + didx = Index(np.arange(5,dtype='int64')**2) + + # note: special cases of the following could return RangeIndex + # see _mul() example + + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx * idx + tm.assert_index_equal(result, didx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5,dtype='int64') + tm.assert_index_equal(result, Index(np.arange(5,dtype='int64')*5)) + + result = idx * np.arange(5,dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5,dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5,dtype='float64')+0.1) + tm.assert_index_equal(result, + Float64Index(np.arange(5,dtype='float64')*(np.arange(5,dtype='float64')+0.1))) + + # invalid + self.assertRaises(TypeError, lambda : idx * date_range('20130101',periods=5)) + self.assertRaises(ValueError, lambda : idx * self._holder(3)) + self.assertRaises(ValueError, lambda : idx * np.array([1,2])) + + def test_explicit_conversions(self): + + # GH 8608 + # add/sub are overriden explicity for Float/Int Index + idx = RangeIndex(5) + + # float conversions + arr = np.arange(5,dtype='int64')*3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx,expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx,expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5,dtype='float64') + result = fidx - a + tm.assert_index_equal(result,expected) + + expected = Float64Index(-arr) + a = np.zeros(5,dtype='float64') + result = a - fidx + tm.assert_index_equal(result,expected) + + def test_duplicates(self): + # RangeIndex has no duplicates by definition + pass + + def test_ufunc_compat(self): + idx = RangeIndex(5) + result = np.sin(idx) + expected = Float64Index(np.sin(np.arange(5,dtype='int64'))) + tm.assert_index_equal(result, expected) + + def test_extended_gcd(self): + result = self.index._extended_gcd(6, 10) + self.assertEqual(result[0], result[1]*6 + result[2]*10) + self.assertEqual(2, result[0]) + + result = self.index._extended_gcd(10, 6) + self.assertEqual(2, result[1]*10 + result[2]*6) + self.assertEqual(2, result[0]) + + def test_min_fitting_element(self): + result = RangeIndex(0, 20, 2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(1, 6)._min_fitting_element(1) + self.assertEqual(1, result) + + result = RangeIndex(18, -2, -2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(5, 0, -1)._min_fitting_element(1) + self.assertEqual(1, result) + + def test_max_fitting_element(self): + result = RangeIndex(0, 20, 2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(1, 6)._max_fitting_element(4) + self.assertEqual(4, result) + + result = RangeIndex(18, -2, -2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(5, 0, -1)._max_fitting_element(4) + self.assertEqual(4, result) + + def test_pickle_compat_construction(self): + # RangeIndex() is a valid constructor + pass + + def test_slice_specialised(self): + # scalar indexing + res = self.index[1] + expected = 2 + self.assertEqual(res, expected) + + res = self.index[-1] + expected = 18 + self.assertEqual(res, expected) + + ### slicing + # slice value completion + index = self.index[:] + expected = self.index + self.assert_numpy_array_equal(index, expected) + + # positive slice values + index = self.index[7:10:2] + expected = np.array([14, 18]) + self.assert_numpy_array_equal(index, expected) + + # negative slice values + index = self.index[-1:-5:-2] + expected = np.array([18, 14]) + self.assert_numpy_array_equal(index, expected) + + # stop overshoot + index = self.index[2:100:4] + expected = np.array([4, 12]) + self.assert_numpy_array_equal(index, expected) + + def test_len_specialised(self): + # TODO: How to test that len is specialised rather than calling + # the parent classes __len__() (which is slow)? + pass + + def test_size_specialised(self): + # TODO: How to test that size is specialised rather than calling + # the parent classes size property (which is slow)? + pass + class DatetimeLike(Base): def test_view(self):