From 9739b2f8889aa7278aa9c90474c5f921a282439c Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Thu, 7 Nov 2013 22:46:11 -0500 Subject: [PATCH 01/13] Start of RangeIndex implementation --- pandas/core/range.py | 219 +++++++++++++++++++++ pandas/tests/test_range.py | 384 +++++++++++++++++++++++++++++++++++++ 2 files changed, 603 insertions(+) create mode 100644 pandas/core/range.py create mode 100644 pandas/tests/test_range.py diff --git a/pandas/core/range.py b/pandas/core/range.py new file mode 100644 index 0000000000000..4f9f6058cacaa --- /dev/null +++ b/pandas/core/range.py @@ -0,0 +1,219 @@ +import pandas as pd +import numpy as np + + +class RangeIndex(object): + """Represents a range with left-open interval. + + Parameters + ---------- + left, right : int + start and end of range (i.e., ``s[left:right]``). If left > right, + assumes reversed (i.e., ``s[left:right:-1]``) + """ + # My thinking on this structure: + # - from a 'set theoretic' standpoint, order doesn't matter, so ascending + # vs. descending is generally an implementation detail + # - left, right are necessary for comparison, but making start and end + # separate makes it much easier to work with + # - prohibiting empty RangeIndex helps simplify edge cases considerably. + # - From pandas' perspective, RangeIndex should behave *exactly* the same + # as an Int64Index. (except for ops where it can yield more RangeIndexes) + # - supporting steps might be possible, but less simple (and less clear + # that it would be useful to pandas proper, given that you'd have to know + # ahead of time that you could convert to a stepped range). Plus, only + # helpful when you specifically have a consistent step + # - Certain operations with RangeIndex could benefit from allowing nested + # ranges, i.e. the union of RangeIndex(10, 5) and RangeIndex(3, 7) could be + # something like: [RangeIndex(10,5), RangeIndex(3, 7)] and then could just + # iterate over that. But that's for after all of this starts working. + # - It would be nice if groupby() accepted an Index + # - It might be valuable to cache the values property of a RangeIndex, but + # I'm not totally convinced that's the best strategy (yet!), unless RangeIndex + # could *replace* itself on its parent. (tradeoff between instantiation time and + # ability to gc values when they aren't needed anymore) + # TODO: Block setting of start and end + def __new__(cls, left, right): + if left == right: + return pd.Index([], dtype='int64') + else: + return object.__new__(cls) + def __init__(self, left, right): + # shouldn't happen + if left == right: + raise ValueError("Can't have empty range") + + l, r = left, right + left, right = int(left), int(right) + + if left != l or right != r: + raise ValueError("Need to pass integral values") + + self.left = left + self.right = right + self.ascending = left < right + if self.ascending: + self.start, self.end = left, right + self.step = 1 + else: + self.start, self.end = right, left + self.step = -1 + + @property + def values(self): + return np.arange(self.left, self.right, (1 if self.ascending else -1), dtype='int64') + + def union(self, other): + """Union this with another RangeIndex. Always returns ascending RangeIndex.""" + if not isinstance(other, RangeIndex): + raise NotImplementedError("Other not range index") + + if not self._overlaps(other): + return self.values | other.values + + start = min(self.start, other.start) + end = max(self.end, other.end) + return RangeIndex(start, end) + + def intersection(self, other): + if not isinstance(other, RangeIndex): + raise NotImplementedError("Other not range index") + # not overlapping or start touches end or vice versa + if not self._overlaps(other) or (self.start == other.end) or (self.end == other.start): + return pd.Index([], dtype='int64') + else: + return RangeIndex(max(self.start, other.start), min(self.end, other.end)) + + def view(self, other): + return self + + def difference(self, other): + if not isinstance(other, RangeIndex): + raise NotImplementedError("Other not range index") + + if not self._overlaps(other) or self.start == other.end or self.end == other.start: + return self.view() + # completely contained + elif self.start >= other.start and self.end <= other.end: + return pd.Index([], dtype='int64') + elif self.start < other.start: + return RangeIndex(self.start, other.start) + # starts within other [because must overlap] + elif self.start > other.start: + assert other.end > self.end, (self, other) + return RangeIndex(other.end, self.end) + assert False, "Shouldn't get to here" + + @property + def empty(self): + return False + + def all(self): + return True + + def any(self): + return True + + def __array__(self): + return self.values + + def __or__(self, other): + return self.union(other) + + + __add__ = __or__ + + def __and__(self, other): + return self.intersection(other) + + def __sub__(self, other): + return self.difference(other) + + def equals(self, other): + return self.left == other.left and self.right == other.right + + def _overlaps(self, other): + # cheers to Ned Batchelder on this + # only overlaps if each ranges' end is beyond or *at* the other ranges' start. + # touching does not count as overlapping + return other.end > self.start and self.end > other.start + + # # starts before or on other's start and ends after or on other's start + # return ((self.start <= other.start and self.end >= other.start) or + # # starts within other + # (self.start > other.start and self.start <= other.end)) + def nonzero(self): + if self.start > 0: + return np.arange(len(self)) + else: + # need to skip when self is zero + res = range(len(self)) + res.pop(0 - self.start * self.step) + return np.array(res) + + def __contains__(self, val): + # can only hold integers + try: + val = int(val) + except (TypeError, ValueError): + return False + + if val != val: + return False + + return self.start <= val < self.end + + def __iter__(self): + return iter(xrange(self.left, self.right, self.step)) + + def __len__(self): + return self.end - self.start + + def __repr__(self): + # TODO: Either change to Int64Repr OR to RangeIndex(left, right) + return "RangeIndex(%r)" % (dict(left=self.left, right=self.right, start=self.start, end=self.end, ascending=self.ascending)) + + def get_indexer(self, arr, method=None): + arr = np.asarray(arr, dtype='int64') + indexer = arr - self.start + if not self.ascending: + indexer = (len(self) - 1) - indexer + indexer[(indexer < 0) | (indexer >= len(self) )] = -1 + return indexer + + def get_loc(self, val): + if val in self: + return val - (self.start if self.ascending else self.end) + else: + return -1 + + def __getitem__(self, val): + if isinstance(val, slice): + if slice.step not in (1, -1): + return self.values[val] + if slice.start >= 0 and slice.end >= 0: + start = slice.start if slice.start is None or slice.start > self.start else self.start + end = slice.end if slice.end is None or slice.end < self.end else self.end + + if self.step != slice.step: + start, end = end, start + + return RangeIndex(start, end) + else: + if 0 <= val < len(self): + return self.left + val * self.step + elif -len(self) <= val < 0: + return self.right + val * self.step + else: + raise IndexError("%d out of range" % val) + + def __bool__(self): + raise ValueError("The truth value of an array is ambiguous...") # blah blah blah + + __nonzero__ = __bool__ + + def slice_locs(self, start=None, end=None): + pass + + def get_indexer_non_unique(self, arr): + return self.get_indexer(self, arr), np.array([], dtype='int64') diff --git a/pandas/tests/test_range.py b/pandas/tests/test_range.py new file mode 100644 index 0000000000000..ccef4b6c8235f --- /dev/null +++ b/pandas/tests/test_range.py @@ -0,0 +1,384 @@ +import numpy as np +import pandas.util.testing as tm +from pandas.core.index import Index +from pandas.core.range import RangeIndex + + +def assert_almost_equal(a, b): + try: + tm.assert_almost_equal(a, b) + except: + print(a, b) + raise + + +def knownfail(f): + def wrapper(): + try: + f() + except Exception as e: + print("%s: KNOWN FAILURE: %r" % (f.__name__, e)) + else: + raise AssertionError("Known failure passed! %s" % f.__name__) + return wrapper + + +class self(object): + + """Fake for tests!""" + + @staticmethod + def assertEquals(a, b): + assert a == b, "%r != %r" % (a, b) + + assertEqual = assertEquals + + @staticmethod + def assertRaises(exc, f, *args, **kwargs): + try: + f(*args, **kwargs) + except exc: + return True + else: + raise AssertionError( + "Expected exception of type %s to be raised!" % exc) + + +def test_basic(): + assert not RangeIndex(1, 0).ascending + assert RangeIndex(0, 100).ascending + r = RangeIndex(10, 5) + assert r.start == 5 + assert r.stop == 10 + assert r.left == 10 + assert r.right == 5 + assert r.step == -1 + r2 = RangeIndex(5, 10) + assert r2.start == 5 + assert r2.stop == 10 + assert r2.left == 5 + assert r2.right == 10 + assert r2.step == 1 + + for i in range(5, 10): + assert i in r, i + assert i in r2, i + + r3 = RangeIndex(-10, -9) + assert r3.start == -10 + assert r3.stop == -9 + assert r3.left == -10 + assert r3.right == -9 + assert r3.ascending + assert r3.step == 1 + r4 = RangeIndex(-8, -15) + assert r4.start == -15 + assert r4.stop == -8 + assert r4.left == -8 + assert r4.right == -15 + assert not r4.ascending + assert r4.step == -1 + assert np.array_equal(RangeIndex(5, 5), Index([], dtype='int64')) + + +def test_as_array(): + # __array__ + assert np.array_equal(RangeIndex(1, 0), np.array([1])) + assert np.array_equal(RangeIndex(0, 100), np.arange(0, 100)) + assert np.array_equal(RangeIndex(1, 0).values, np.array([1])) + assert np.array_equal(RangeIndex(0, 100).values, np.arange(0, 100)) + + +def test_combinations(): + r1 = RangeIndex(1, 10) + r2 = RangeIndex(5, 10) + assert r1._overlaps(r2) + assert r2._overlaps(r1) + + # union and intersection - underlying methods + assert r1.intersection(r2).equals(RangeIndex(5, 10)) + assert r2.intersection(r1).equals(r1.intersection(r2)) + assert (r1 & r2).equals(RangeIndex(5, 10)) + assert (r2 & r1).equals((r1 & r2)) + assert r1.union(r2).equals(r2.union(r1)) + assert r1.union(r2).equals(r1) + # union and intersection - with infix operators + assert (r1 + r2).equals((r2 + r1)) + assert (r1 + r2).equals(r1) + assert (r1 | r2).equals((r2 | r1)) + assert (r1 | r2).equals(r1) + + # difference - underlying method + assert r1.difference(r2).equals(RangeIndex(1, 5)) + assert r2.difference(r1).equals(Index([], dtype='int64')) + assert r1.difference(r1).equals(Index([], dtype='int64')) + assert r2.difference(r2).equals(Index([], dtype='int64')) + # difference - with infix operator + assert (r1 - r2).equals(RangeIndex(1, 5)) + assert (r2 - r1).equals(Index([], dtype='int64')) + assert (r1 - r1).equals(Index([], dtype='int64')) + assert (r2 - r2).equals(Index([], dtype='int64')) + +# +# basic container ops +# + +lrange = lambda *args: list(range(*args)) + + +def test_getitem_and_iter(): + r1 = RangeIndex(-10, -5) + r2 = RangeIndex(8, 5) + r3 = RangeIndex(0, 10) + r4 = RangeIndex(-5, 5) + r5 = RangeIndex(3, -15) + pairs = [(r1, lrange(-10, -5)), + (r2, lrange(8, 5, -1)), + (r3, lrange(0, 10)), + (r4, lrange(-5, 5)), + (r5, lrange(3, -15, -1))] + + for ind, rng in pairs: + try: + assert len(ind) == len(rng) + for i in range(len(rng)): + self.assertEqual(ind[i], rng[i]) + self.assertEqual(ind[-i], rng[-i]) + except: + print(i, ind, ind[i]) + print(i, rng, rng[i]) + raise + # basic __iter__ test + assert_almost_equal(list(ind), rng) + assert np.array_equal(ind.values, np.array(list(rng))) + + cases = 10 + for ind in zip(*pairs)[0]: + length = len(ind) + # edges + self.assertRaises(IndexError, lambda: ind[length]) + self.assertRaises(IndexError, lambda: ind[-length - 1]) + for _ in range(cases): + i = np.random.randint(1, 100) + self.assertRaises(IndexError, lambda: ind[length + i]) + self.assertRaises(IndexError, lambda: ind[-length - 1 - i]) + + +def test_slicing(): + r1 = RangeIndex(-10, -5) + r2 = RangeIndex(8, 5) + r3 = RangeIndex(0, 10) + r4 = RangeIndex(-3, 3) + r5 = RangeIndex(3, -2) + pairs = [(r1, lrange(-10, -5)), # can remove later + (r2, np.arange(8, 5, -1)), + (r3, lrange(0, 10)), # can remove later + (r4, np.arange(-3, 3)), + (r5, np.arange(3, -2, -1))] + # TODO: This is incredibly slow - pick something smaller to work with + for ind, rng in pairs: + assert_almost_equal(ind[:], rng[:]) + for i, j in [(i, j) for i in range(len(rng)) for j in range(len(rng)) + if i >= j]: + assert_almost_equal(ind[i:], rng[i:]) + assert_almost_equal(ind[:i], rng[:i]) + assert_almost_equal(ind[-i:], rng[-i:]) + assert_almost_equal(ind[:-i], rng[:-i]) + + assert_almost_equal(ind[i:j], rng[i:j]) + assert_almost_equal(ind[i:-j], rng[i:-j]) + assert_almost_equal(ind[-i:-j], rng[-i:-j]) + assert_almost_equal(ind[-i:j], rng[-i:j]) + + assert_almost_equal(ind[j:i], rng[j:i]) + assert_almost_equal(ind[j:-i], rng[j:-i]) + assert_almost_equal(ind[-j:-i], rng[-j:-i]) + assert_almost_equal(ind[-j:i], rng[-j:i]) + # in range + # - forward + # - reversed + # totally out of range + # - forward/reversed below + # - forward/reversed above + # partial in range + # - forward/reversed with low + # - forward/reversed with high + # [:] yields (shallow copy of) self + # Empty slice yields Index([], dtype='int64') + pass + + +def test_slicing_with_step(): + # [::-1] yields self but reversed + pass + + +def test_immutable(): + # setitem + # setslice + pass + + +# +# PandasObject properties +# + + +def test_copy_and_view(): + # shallow / deep copy should be same + pass + + +def test_is__continuity(): + # is should work on views/copies + # is should not work with two separately constructed indices + # is should be False when reversed or sliced + pass + + +def test_equals(): + # should work on views/copies + # should be equal when separately constructed + # should not be equal when reversed/reduced/etc + pass + + +def test_error_on_bool(): + self.assertRaises(ValueError, bool, RangeIndex(1, 5)) + self.assertRaises(ValueError, bool, RangeIndex(-10, -9)) + self.assertRaises(ValueError, bool, RangeIndex(1, 2)) + + +# +# indexing ops +# +@knownfail +def test_get_indexer(): + idx1 = RangeIndex(1, 5) + # TODO: Consider supporting steps + idx2 = Index([2, 4, 6]) + idx3 = Index([1, 6, 7, 1, 2]) + + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, [1, 3, -1]) + + r1 = idx1.get_indexer(idx3) + assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) + + r1 = idx1.get_indexer(idx3, method='pad') + assert_almost_equal(r1, np.array([0, 3, 3, -1, -1])) + + rffill1 = idx1.get_indexer(idx3, method='ffill') + assert_almost_equal(r1, rffill1) + + r1 = idx1.get_indexer(idx3, method='backfill') + assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) + + rbfill1 = idx1.get_indexer(idx3, method='bfill') + assert_almost_equal(r1, rbfill1) + + # r1 = idx3.get_indexer(idx1, method='pad') + # assert_almost_equal(r1, [0, 0, 0, 0, 0]) + + # rffill1 = idx3.get_indexer(idx1, method='ffill') + + # r1 = idx3.get_indexer(idx1, method='backfill') + # assert_almost_equal(r1, [0, -1, -1, -1, -1]) + + # rbfill1 = idx3.get_indexer(idx1, method='bfill') + # assert_almost_equal(r1, rbfill1) + + +@knownfail +def test_range_index_from_range(): + def assert_fails(inpt): + res = RangeIndex.possibly_convert_array(inpt) + assert res is None, "Expected %r to return None" % inpt + + def assert_converts(inpt, expected): + res = RangeIndex.possibly_convert_array(inpt) + assert expected.equals(res), "With input %r, %r != %r" % (inpt, res, + expected) + assert_converts(range(5), RangeIndex(0, 5)) + assert_fails([1, 3, 7, 5]) + assert_fails([4, 10, 11, 13]) + assert_converts(np.arange(50, 40, -1), RangeIndex(50, 40)) + assert_converts([0], RangeIndex(0, 1)) + assert_fails([]) + + # dupe values + assert_fails([10, 9, 8, 7, 10]) + assert_fails([1, 2, 3, 4, 5, 7]) + + # should not try to convert dtype (caller responsibility) + arr = np.arange(5, 15) + assert_converts(arr, RangeIndex(5, 15)) + assert_fails(arr.astype(float)) + + # works with resort + assert_fails([-10, -5, -6, -7, -2, -3, -4, -8, -9]) + assert_fails([9, 8, 5, 7, 6]) + + # possibilities that *won't* work now but could in the future + # (i.e., nested ranges, steps) + assert_fails([15, 13, 11, 9, 7, 5]) + assert_fails([1, 2, 3, 8, 9, 10]) + assert_fails([2, 4, 6, 8, 10, 12]) + + +def test_nonzero(): + r1 = RangeIndex(0, 5) + a1 = np.arange(0, 5) + assert_almost_equal(r1.nonzero(), a1.nonzero()) + r2 = RangeIndex(5, 0) + a2 = np.arange(5, 0, -1) + assert_almost_equal(r2.nonzero(), a2.nonzero()) + + +def test_get_loc(): + pass + + +def test_groupby(): + pass + + +@knownfail +def test_slice_locs(): + idx = RangeIndex(0, 11) + n = len(idx) + + self.assertEquals(idx.slice_locs(start=2), (2, n)) + self.assertEquals(idx.slice_locs(start=3), (3, n)) + self.assertEquals(idx.slice_locs(3, 8), (3, 8)) + self.assertEquals(idx.slice_locs(5, 10), (3, n)) + self.assertEquals(idx.slice_locs(end=8), (0, 8)) + self.assertEquals(idx.slice_locs(end=9), (0, 9)) + + # WHAAA? + # idx2 = idx[::-1] + idx2 = RangeIndex(5, 1) + self.assertRaises(KeyError, idx2.slice_locs, 8, 2) + self.assertRaises(KeyError, idx2.slice_locs, 7, 3) + +# +# Index inference +# + + +@knownfail +def test_sorted_index_yields_range(): + ind = Index(range(10)) + assert isinstance(ind, RangeIndex) + assert ind.equals(RangeIndex(0, 10)) + ind = Index(range(15, -1, -1)), + assert isinstance(ind, RangeIndex) + assert ind.equals(RangeIndex(15, -1)) + ind = Index([1, 3, 5, 7]) + assert not isinstance(ind, RangeIndex) + ind = Index(range(5) + [6]) + assert not isinstance(ind, RangeIndex) + ind = Index([1, 3, 2, 4, 5]) + assert not isinstance(ind, RangeIndex) + ind = Index(np.arange(0, 10).astype(float)) + assert not isinstance(ind, RangeIndex) From 26b4e5c93d1b70057a151741089878b36e53664c Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Fri, 22 Nov 2013 12:51:26 -0500 Subject: [PATCH 02/13] More work on RangeIndex (with cleaned up tests) --- pandas/core/range.py | 19 +- pandas/tests/test_range.py | 393 ++++++++++++++++++------------------- 2 files changed, 211 insertions(+), 201 deletions(-) diff --git a/pandas/core/range.py b/pandas/core/range.py index 4f9f6058cacaa..bc7cb2cab27cd 100644 --- a/pandas/core/range.py +++ b/pandas/core/range.py @@ -1,5 +1,8 @@ import pandas as pd import numpy as np +import pandas.lib as lib +import pandas.algos as _algos +from pandas.core.index import Int64Index class RangeIndex(object): @@ -11,6 +14,7 @@ class RangeIndex(object): start and end of range (i.e., ``s[left:right]``). If left > right, assumes reversed (i.e., ``s[left:right:-1]``) """ + _groupby = _algos.groupby_int64 # My thinking on this structure: # - from a 'set theoretic' standpoint, order doesn't matter, so ascending # vs. descending is generally an implementation detail @@ -38,6 +42,7 @@ def __new__(cls, left, right): return pd.Index([], dtype='int64') else: return object.__new__(cls) + def __init__(self, left, right): # shouldn't happen if left == right: @@ -61,7 +66,11 @@ def __init__(self, left, right): @property def values(self): - return np.arange(self.left, self.right, (1 if self.ascending else -1), dtype='int64') + if self.ascending: + vals = np.arange(self.start, self.stop, 1, dtype='int64') + else: + vals = np.arange(self.stop, self.start, -1, dtype='int64') + return vals def union(self, other): """Union this with another RangeIndex. Always returns ascending RangeIndex.""" @@ -154,11 +163,13 @@ def nonzero(self): def __contains__(self, val): # can only hold integers try: + v = val val = int(val) except (TypeError, ValueError): return False - if val != val: + # pd.isnull(val)? + if v != val or val != val: return False return self.start <= val < self.end @@ -171,7 +182,7 @@ def __len__(self): def __repr__(self): # TODO: Either change to Int64Repr OR to RangeIndex(left, right) - return "RangeIndex(%r)" % (dict(left=self.left, right=self.right, start=self.start, end=self.end, ascending=self.ascending)) + return "RangeIndex(%r)" % (dict(start=self.start, end=self.end, ascending=self.ascending)) def get_indexer(self, arr, method=None): arr = np.asarray(arr, dtype='int64') @@ -191,6 +202,8 @@ def __getitem__(self, val): if isinstance(val, slice): if slice.step not in (1, -1): return self.values[val] + if slice.start is None or slice.start > self.start: + start = slice.start if slice.start >= 0 and slice.end >= 0: start = slice.start if slice.start is None or slice.start > self.start else self.start end = slice.end if slice.end is None or slice.end < self.end else self.end diff --git a/pandas/tests/test_range.py b/pandas/tests/test_range.py index ccef4b6c8235f..494a3540a6355 100644 --- a/pandas/tests/test_range.py +++ b/pandas/tests/test_range.py @@ -1,8 +1,11 @@ +import unittest + import numpy as np import pandas.util.testing as tm from pandas.core.index import Index from pandas.core.range import RangeIndex +lrange = lambda *args: list(range(*args)) def assert_almost_equal(a, b): try: @@ -44,209 +47,203 @@ def assertRaises(exc, f, *args, **kwargs): "Expected exception of type %s to be raised!" % exc) -def test_basic(): - assert not RangeIndex(1, 0).ascending - assert RangeIndex(0, 100).ascending - r = RangeIndex(10, 5) - assert r.start == 5 - assert r.stop == 10 - assert r.left == 10 - assert r.right == 5 - assert r.step == -1 - r2 = RangeIndex(5, 10) - assert r2.start == 5 - assert r2.stop == 10 - assert r2.left == 5 - assert r2.right == 10 - assert r2.step == 1 - - for i in range(5, 10): - assert i in r, i - assert i in r2, i - - r3 = RangeIndex(-10, -9) - assert r3.start == -10 - assert r3.stop == -9 - assert r3.left == -10 - assert r3.right == -9 - assert r3.ascending - assert r3.step == 1 - r4 = RangeIndex(-8, -15) - assert r4.start == -15 - assert r4.stop == -8 - assert r4.left == -8 - assert r4.right == -15 - assert not r4.ascending - assert r4.step == -1 - assert np.array_equal(RangeIndex(5, 5), Index([], dtype='int64')) - - -def test_as_array(): - # __array__ - assert np.array_equal(RangeIndex(1, 0), np.array([1])) - assert np.array_equal(RangeIndex(0, 100), np.arange(0, 100)) - assert np.array_equal(RangeIndex(1, 0).values, np.array([1])) - assert np.array_equal(RangeIndex(0, 100).values, np.arange(0, 100)) - - -def test_combinations(): - r1 = RangeIndex(1, 10) - r2 = RangeIndex(5, 10) - assert r1._overlaps(r2) - assert r2._overlaps(r1) - - # union and intersection - underlying methods - assert r1.intersection(r2).equals(RangeIndex(5, 10)) - assert r2.intersection(r1).equals(r1.intersection(r2)) - assert (r1 & r2).equals(RangeIndex(5, 10)) - assert (r2 & r1).equals((r1 & r2)) - assert r1.union(r2).equals(r2.union(r1)) - assert r1.union(r2).equals(r1) - # union and intersection - with infix operators - assert (r1 + r2).equals((r2 + r1)) - assert (r1 + r2).equals(r1) - assert (r1 | r2).equals((r2 | r1)) - assert (r1 | r2).equals(r1) - - # difference - underlying method - assert r1.difference(r2).equals(RangeIndex(1, 5)) - assert r2.difference(r1).equals(Index([], dtype='int64')) - assert r1.difference(r1).equals(Index([], dtype='int64')) - assert r2.difference(r2).equals(Index([], dtype='int64')) - # difference - with infix operator - assert (r1 - r2).equals(RangeIndex(1, 5)) - assert (r2 - r1).equals(Index([], dtype='int64')) - assert (r1 - r1).equals(Index([], dtype='int64')) - assert (r2 - r2).equals(Index([], dtype='int64')) - -# -# basic container ops -# - -lrange = lambda *args: list(range(*args)) - - -def test_getitem_and_iter(): - r1 = RangeIndex(-10, -5) - r2 = RangeIndex(8, 5) - r3 = RangeIndex(0, 10) - r4 = RangeIndex(-5, 5) - r5 = RangeIndex(3, -15) - pairs = [(r1, lrange(-10, -5)), - (r2, lrange(8, 5, -1)), - (r3, lrange(0, 10)), - (r4, lrange(-5, 5)), - (r5, lrange(3, -15, -1))] - - for ind, rng in pairs: - try: - assert len(ind) == len(rng) - for i in range(len(rng)): - self.assertEqual(ind[i], rng[i]) - self.assertEqual(ind[-i], rng[-i]) - except: - print(i, ind, ind[i]) - print(i, rng, rng[i]) - raise - # basic __iter__ test - assert_almost_equal(list(ind), rng) - assert np.array_equal(ind.values, np.array(list(rng))) - - cases = 10 - for ind in zip(*pairs)[0]: - length = len(ind) - # edges - self.assertRaises(IndexError, lambda: ind[length]) - self.assertRaises(IndexError, lambda: ind[-length - 1]) - for _ in range(cases): - i = np.random.randint(1, 100) - self.assertRaises(IndexError, lambda: ind[length + i]) - self.assertRaises(IndexError, lambda: ind[-length - 1 - i]) - - -def test_slicing(): - r1 = RangeIndex(-10, -5) - r2 = RangeIndex(8, 5) - r3 = RangeIndex(0, 10) - r4 = RangeIndex(-3, 3) - r5 = RangeIndex(3, -2) - pairs = [(r1, lrange(-10, -5)), # can remove later - (r2, np.arange(8, 5, -1)), - (r3, lrange(0, 10)), # can remove later - (r4, np.arange(-3, 3)), - (r5, np.arange(3, -2, -1))] - # TODO: This is incredibly slow - pick something smaller to work with - for ind, rng in pairs: - assert_almost_equal(ind[:], rng[:]) - for i, j in [(i, j) for i in range(len(rng)) for j in range(len(rng)) - if i >= j]: - assert_almost_equal(ind[i:], rng[i:]) - assert_almost_equal(ind[:i], rng[:i]) - assert_almost_equal(ind[-i:], rng[-i:]) - assert_almost_equal(ind[:-i], rng[:-i]) - - assert_almost_equal(ind[i:j], rng[i:j]) - assert_almost_equal(ind[i:-j], rng[i:-j]) - assert_almost_equal(ind[-i:-j], rng[-i:-j]) - assert_almost_equal(ind[-i:j], rng[-i:j]) - - assert_almost_equal(ind[j:i], rng[j:i]) - assert_almost_equal(ind[j:-i], rng[j:-i]) - assert_almost_equal(ind[-j:-i], rng[-j:-i]) - assert_almost_equal(ind[-j:i], rng[-j:i]) - # in range - # - forward - # - reversed - # totally out of range - # - forward/reversed below - # - forward/reversed above - # partial in range - # - forward/reversed with low - # - forward/reversed with high - # [:] yields (shallow copy of) self - # Empty slice yields Index([], dtype='int64') - pass - - -def test_slicing_with_step(): - # [::-1] yields self but reversed - pass - - -def test_immutable(): - # setitem - # setslice - pass - +class TestRangeIndex(unittest.TestCase): + + def test_basic(self): + self.assert_(not RangeIndex(1, 0).ascending) + self.assert_(RangeIndex(0, 100).ascending) + # make sure conditions work correctly + # descending + r = RangeIndex(10, 5) + self.assertEqual(r.start, 5) + self.assertEqual(r.stop, 10) + self.assertEqual(r.left, 10) + self.assertEqual(r.right, 5) + self.assertEqual(r.step, -1) + + # ascending + r2 = RangeIndex(5, 10) + self.assertEqual(r2.start, 5) + self.assertEqual(r2.stop, 10) + self.assertEqual(r2.left, 5) + self.assertEqual(r2.right, 10) + self.assertEqual(r2.step, 1) + + # negative values + r3 = RangeIndex(-10, -9) + self.assertEqual(r3.start, -10) + self.assertEqual(r3.stop, -9) + self.assert_(r3.ascending) + self.assertEqual(r3.step, 1) + + r4 = RangeIndex(-8, -15) + self.assertEqual(r4.start, -15) + self.assertEqual(r4.stop, -8) + self.assert_(not r4.ascending) + self.assertEqual(r4.step, -1) + + def test_contains(self): + + r = RangeIndex(10, 5) + r2 = RangeIndex(5, 10) + for i in range(5, 10): + self.assert_(i in r) + self.assert_(i in r2) + + def test_empty(self): + assert np.array_equal(RangeIndex(5, 5), Index([], dtype='int64')) + + def test_asarray(self): + # __array__ + self.assert_(np.array_equal(np.asarray(RangeIndex(1, 0)), + np.array([1]))) + self.assert_(np.array_equal(np.asarray(RangeIndex(0, 100)), + np.arange(0, 100))) + self.assert_(np.array_equal(RangeIndex(1, 0).values, np.array([1]))) + self.assert_(np.array_equal(RangeIndex(0, 100).values, + np.arange(0, 100))) + + def test_set_ops(self): + r1 = RangeIndex(1, 10) + r2 = RangeIndex(5, 10) + self.assert_(r1._overlaps(r2)) + self.assert_(r2._overlaps(r1)) + # union and intersection - underlying methods) + self.assert_(r1.intersection(r2).equals(RangeIndex(5, 10))) + self.assert_(r2.intersection(r1).equals(r1.intersection(r2))) + self.assert_((r1 & r2).equals(RangeIndex(5, 10))) + self.assert_((r2 & r1).equals((r1 & r2))) + self.assert_(r1.union(r2).equals(r2.union(r1))) + self.assert_(r1.union(r2).equals(r1)) + # union and intersection - with infix operators) + self.assert_((r1 + r2).equals((r2 + r1))) + self.assert_((r1 + r2).equals(r1)) + self.assert_((r1 | r2).equals((r2 | r1))) + self.assert_((r1 | r2).equals(r1)) + + # difference - underlying method) + self.assert_(r1.difference(r2).equals(RangeIndex(1, 5))) + self.assert_(r2.difference(r1).equals(Index([], dtype='int64'))) + self.assert_(r1.difference(r1).equals(Index([], dtype='int64'))) + self.assert_(r2.difference(r2).equals(Index([], dtype='int64'))) + # difference - with infix operator) + self.assert_((r1 - r2).equals(RangeIndex(1, 5))) + self.assert_((r2 - r1).equals(Index([], dtype='int64'))) + self.assert_((r1 - r1).equals(Index([], dtype='int64'))) + self.assert_((r2 - r2).equals(Index([], dtype='int64'))) + + def test_getitem_and_iter(self): + # basic container ops + pairs = [(RangeIndex(-10, -5), lrange(-10, -5)), + (RangeIndex(8, 5), lrange(8, 5, -1)), + (RangeIndex(0, 10), lrange(0, 10)), + (RangeIndex(-5, 5), lrange(-5, 5)), + (RangeIndex(3, -15), lrange(3, -15, -1))] + + for ind, rng in pairs: + try: + self.assertEqual(len(ind), len(rng)) + for i in range(len(rng)): + self.assertEqual(ind[i], rng[i]) + self.assertEqual(ind[-i], rng[-i]) + except: + print(i, ind, ind[i]) + print(i, rng, rng[i]) + raise + # basic __iter__ test + assert_almost_equal(list(ind), rng) + assert np.array_equal(ind.values, np.array(list(rng))) + + cases = 10 + for ind in zip(*pairs)[0]: + length = len(ind) + # edges + self.assertRaises(IndexError, lambda: ind[length]) + self.assertRaises(IndexError, lambda: ind[-length - 1]) + for _ in range(cases): + i = np.random.randint(1, 100) + self.assertRaises(IndexError, lambda: ind[length + i]) + self.assertRaises(IndexError, lambda: ind[-length - 1 - i]) + + def test_slicing(self): + pairs = [(RangeIndex(-10, -5), lrange(-10, -5)), # can remove later + (RangeIndex(8, 5), np.arange(8, 5, -1)), + (RangeIndex(0, 10), lrange(0, 10)), # can remove later + (RangeIndex(-3, 3), np.arange(-3, 3)), + (RangeIndex(3, -2), np.arange(3, -2, -1))] + # TODO: This is incredibly slow - pick something smaller to work with + for ind, rng in pairs: + assert_almost_equal(ind[:], rng[:]) + for i, j in [(i, j) for i in range(len(rng)) + for j in range(len(rng)) if i >= j]: + assert_almost_equal(ind[i:], rng[i:]) + assert_almost_equal(ind[:i], rng[:i]) + assert_almost_equal(ind[-i:], rng[-i:]) + assert_almost_equal(ind[:-i], rng[:-i]) + + assert_almost_equal(ind[i:j], rng[i:j]) + assert_almost_equal(ind[i:-j], rng[i:-j]) + assert_almost_equal(ind[-i:-j], rng[-i:-j]) + assert_almost_equal(ind[-i:j], rng[-i:j]) + + assert_almost_equal(ind[j:i], rng[j:i]) + assert_almost_equal(ind[j:-i], rng[j:-i]) + assert_almost_equal(ind[-j:-i], rng[-j:-i]) + assert_almost_equal(ind[-j:i], rng[-j:i]) + # in range + # - forward + # - reversed + # totally out of range + # - forward/reversed below + # - forward/reversed above + # partial in range + # - forward/reversed with low + # - forward/reversed with high + # [:] yields (shallow copy of) self + # Empty slice yields Index([], dtype='int64') + pass + + def test_slicing_with_step_of_1(self): + # [::-1] yields self but reversed + r1 = RangeIndex(-5, 5) + r2 = RangeIndex(20, 10) + self.assert_(r1[::-1].equals(RangeIndex(5, -5))) + self.assert_(r2[::-1].equals(RangeIndex(10, 20))) + self.assert_(r1[::1].equals(r1)) + self.assert_(r2[::1].equals(r2)) + + def test_slicing_with_other_steps(self): + pass + + def test_immutable(self): + # setitem + # setslice + pass # # PandasObject properties # - -def test_copy_and_view(): - # shallow / deep copy should be same - pass - - -def test_is__continuity(): - # is should work on views/copies - # is should not work with two separately constructed indices - # is should be False when reversed or sliced - pass - - -def test_equals(): - # should work on views/copies - # should be equal when separately constructed - # should not be equal when reversed/reduced/etc - pass - - -def test_error_on_bool(): - self.assertRaises(ValueError, bool, RangeIndex(1, 5)) - self.assertRaises(ValueError, bool, RangeIndex(-10, -9)) - self.assertRaises(ValueError, bool, RangeIndex(1, 2)) + def test_copy_and_view(self): + # shallow / deep copy should be same + pass + + def test_is__continuity(): + # is should work on views/copies + # is should not work with two separately constructed indices + # is should be False when reversed or sliced + pass + + def test_equals(): + # should work on views/copies + # should be equal when separately constructed + # should not be equal when reversed/reduced/etc + pass + + def test_error_on_bool(self): + self.assertRaises(ValueError, bool, RangeIndex(1, 5)) + self.assertRaises(ValueError, bool, RangeIndex(-10, -9)) + self.assertRaises(ValueError, bool, RangeIndex(1, 2)) # From c93f263b8826d6fa931cd961274f06417a5ab9d9 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Sun, 22 Dec 2013 19:29:39 -0500 Subject: [PATCH 03/13] add note on slice locs for non-monotonically increasing Index objects --- pandas/tests/test_range.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_range.py b/pandas/tests/test_range.py index 494a3540a6355..2aa6079e5c930 100644 --- a/pandas/tests/test_range.py +++ b/pandas/tests/test_range.py @@ -351,9 +351,9 @@ def test_slice_locs(): self.assertEquals(idx.slice_locs(5, 10), (3, n)) self.assertEquals(idx.slice_locs(end=8), (0, 8)) self.assertEquals(idx.slice_locs(end=9), (0, 9)) + # monotonic *increasing* indexes allow slice_locs that aren't in the Index + self.assertEquals(idx.slice_locs(-5, 50), (0, 11)) - # WHAAA? - # idx2 = idx[::-1] idx2 = RangeIndex(5, 1) self.assertRaises(KeyError, idx2.slice_locs, 8, 2) self.assertRaises(KeyError, idx2.slice_locs, 7, 3) From 9f34a97f15ef3dbaa7bab8ed9a1577eb79b3be31 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Sun, 22 Dec 2013 21:21:10 -0500 Subject: [PATCH 04/13] Flesh out RangeIndex test cases (including translating many of the Int64Index tests to be RangeIndex tests instead) --- pandas/tests/test_range.py | 749 +++++++++++++++++++++++++++++-------- 1 file changed, 595 insertions(+), 154 deletions(-) diff --git a/pandas/tests/test_range.py b/pandas/tests/test_range.py index 2aa6079e5c930..4d87b957072d0 100644 --- a/pandas/tests/test_range.py +++ b/pandas/tests/test_range.py @@ -2,8 +2,9 @@ import numpy as np import pandas.util.testing as tm -from pandas.core.index import Index +from pandas.core.index import Index, Int64Index from pandas.core.range import RangeIndex +import pandas.compat as compat lrange = lambda *args: list(range(*args)) @@ -57,16 +58,16 @@ def test_basic(self): r = RangeIndex(10, 5) self.assertEqual(r.start, 5) self.assertEqual(r.stop, 10) - self.assertEqual(r.left, 10) - self.assertEqual(r.right, 5) + # self.assertEqual(r.left, 10) + # self.assertEqual(r.right, 5) self.assertEqual(r.step, -1) # ascending r2 = RangeIndex(5, 10) self.assertEqual(r2.start, 5) self.assertEqual(r2.stop, 10) - self.assertEqual(r2.left, 5) - self.assertEqual(r2.right, 10) + # self.assertEqual(r2.left, 5) + # self.assertEqual(r2.right, 10) self.assertEqual(r2.step, 1) # negative values @@ -82,6 +83,17 @@ def test_basic(self): self.assert_(not r4.ascending) self.assertEqual(r4.step, -1) + def test_bad_input(self): + with tm.assertRaisesRegexp(TypeError, 'Must be integer'): + RangeIndex(0, 1.25) + + with tm.assertRaisesRegexp(TypeError, 'invalid literal'): + RangeIndex(0, 'a') + + with tm.assertRaisesRegexp(TypeError, 'Must be integer'): + RangeIndex('0', '5') + + def test_contains(self): r = RangeIndex(10, 5) @@ -190,27 +202,22 @@ def test_slicing(self): assert_almost_equal(ind[j:-i], rng[j:-i]) assert_almost_equal(ind[-j:-i], rng[-j:-i]) assert_almost_equal(ind[-j:i], rng[-j:i]) - # in range - # - forward - # - reversed - # totally out of range - # - forward/reversed below - # - forward/reversed above - # partial in range - # - forward/reversed with low - # - forward/reversed with high - # [:] yields (shallow copy of) self - # Empty slice yields Index([], dtype='int64') - pass + assert_almost_equal(ind[0:0], Index([], dtype='int64')) + assert_almost_equal(ind[8:8], Index([], dtype='int64')) def test_slicing_with_step_of_1(self): # [::-1] yields self but reversed - r1 = RangeIndex(-5, 5) - r2 = RangeIndex(20, 10) - self.assert_(r1[::-1].equals(RangeIndex(5, -5))) - self.assert_(r2[::-1].equals(RangeIndex(10, 20))) - self.assert_(r1[::1].equals(r1)) - self.assert_(r2[::1].equals(r2)) + rng1 = RangeIndex(-5, 5) + rev1 = rng1[::-1] + self.assertEqual(list(rev1), list(range(4, -6, -1))) + self.assert_(rev1.equals(RangeIndex(4, -6))) + self.assert_(rev1.equals(Index(np.arange(4, -6, -1)))) + + rng2 = RangeIndex(20, 10) + rev2 = rng2[::-1] + self.assertEqual(list(rev2), list(range(11, 21, 1))) + self.assert_(rev2.equals(RangeIndex(11, 21))) + self.assert_(rev2.equals(Index(np.arange(11, 21, 1)))) def test_slicing_with_other_steps(self): pass @@ -228,13 +235,13 @@ def test_copy_and_view(self): # shallow / deep copy should be same pass - def test_is__continuity(): + def test_is__continuity(self): # is should work on views/copies # is should not work with two separately constructed indices # is should be False when reversed or sliced pass - def test_equals(): + def test_equals(self): # should work on views/copies # should be equal when separately constructed # should not be equal when reversed/reduced/etc @@ -245,137 +252,571 @@ def test_error_on_bool(self): self.assertRaises(ValueError, bool, RangeIndex(-10, -9)) self.assertRaises(ValueError, bool, RangeIndex(1, 2)) + def test_all_and_any(self): + zero_only = [RangeIndex(0, 1), RangeIndex(0, -1)] + assert not any(x.any() for x in zero_only) + assert not any(x.all() for x in zero_only) + assert RangeIndex(5, 10).any() + assert RangeIndex(5, 10).all() + assert not RangeIndex(-5, 5).all() + assert RangeIndex(-5, 5).any() + assert RangeIndex(-3, -1).any() + assert not RangeIndex(-3, 1).all() + assert RangeIndex(-3, 0).all() + + # + # indexing ops + # + def test_get_indexer(self): + idx1 = RangeIndex(1, 5) + # TODO: Consider supporting steps + idx2 = Index([2, 4, 6]) + idx3 = Index([1, 6, 7, 1, 2]) + + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, [1, 3, -1]) + + r1 = idx1.get_indexer(idx3) + assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) + + r1 = idx1.get_indexer(idx3, method='pad') + assert_almost_equal(r1, np.array([0, 3, 3, -1, -1])) + + rffill1 = idx1.get_indexer(idx3, method='ffill') + assert_almost_equal(r1, rffill1) + + r1 = idx1.get_indexer(idx3, method='backfill') + assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) + + rbfill1 = idx1.get_indexer(idx3, method='bfill') + assert_almost_equal(r1, rbfill1) + + # r1 = idx3.get_indexer(idx1, method='pad') + # assert_almost_equal(r1, [0, 0, 0, 0, 0]) + + # rffill1 = idx3.get_indexer(idx1, method='ffill') + + # r1 = idx3.get_indexer(idx1, method='backfill') + # assert_almost_equal(r1, [0, -1, -1, -1, -1]) + + # rbfill1 = idx3.get_indexer(idx1, method='bfill') + # assert_almost_equal(r1, rbfill1) + + def test_range_index_from_range(self): + def assert_fails(inpt): + res = RangeIndex.possibly_convert_array(inpt) + assert res is None, "Expected %r to return None" % inpt + + def assert_converts(inpt, expected): + res = RangeIndex.possibly_convert_array(inpt) + assert expected.equals(res), "With input %r, %r != %r" % (inpt, res, + expected) + assert_converts(range(5), RangeIndex(0, 5)) + assert_fails([1, 3, 7, 5]) + assert_fails([4, 10, 11, 13]) + assert_converts(np.arange(50, 40, -1), RangeIndex(50, 40)) + assert_converts([0], RangeIndex(0, 1)) + assert_fails([]) + + # dupe values + assert_fails([10, 9, 8, 7, 10]) + assert_fails([1, 2, 3, 4, 5, 7]) + + # should not try to convert dtype (caller responsibility) + arr = np.arange(5, 15) + assert_converts(arr, RangeIndex(5, 15)) + assert_fails(arr.astype(float)) + + # works with resort + assert_fails([-10, -5, -6, -7, -2, -3, -4, -8, -9]) + assert_fails([9, 8, 5, 7, 6]) + + # possibilities that *won't* work now but could in the future + # (i.e., nested ranges, steps) + assert_fails([15, 13, 11, 9, 7, 5]) + assert_fails([1, 2, 3, 8, 9, 10]) + assert_fails([2, 4, 6, 8, 10, 12]) + + def test_nonzero(self): + r1 = RangeIndex(0, 5) + a1 = np.arange(0, 5) + assert_almost_equal(r1.nonzero(), a1.nonzero()) + r2 = RangeIndex(5, 0) + a2 = np.arange(5, 0, -1) + assert_almost_equal(r2.nonzero(), a2.nonzero()) + assert_almost_equal(RangeIndex(-10, -5).nonzero(), + np.arange(-10, -5).nonzero()) + + def test_get_loc(self): + pass -# -# indexing ops -# -@knownfail -def test_get_indexer(): - idx1 = RangeIndex(1, 5) - # TODO: Consider supporting steps - idx2 = Index([2, 4, 6]) - idx3 = Index([1, 6, 7, 1, 2]) - - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, [1, 3, -1]) - - r1 = idx1.get_indexer(idx3) - assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) - - r1 = idx1.get_indexer(idx3, method='pad') - assert_almost_equal(r1, np.array([0, 3, 3, -1, -1])) - - rffill1 = idx1.get_indexer(idx3, method='ffill') - assert_almost_equal(r1, rffill1) - - r1 = idx1.get_indexer(idx3, method='backfill') - assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) - - rbfill1 = idx1.get_indexer(idx3, method='bfill') - assert_almost_equal(r1, rbfill1) - - # r1 = idx3.get_indexer(idx1, method='pad') - # assert_almost_equal(r1, [0, 0, 0, 0, 0]) - - # rffill1 = idx3.get_indexer(idx1, method='ffill') - - # r1 = idx3.get_indexer(idx1, method='backfill') - # assert_almost_equal(r1, [0, -1, -1, -1, -1]) - - # rbfill1 = idx3.get_indexer(idx1, method='bfill') - # assert_almost_equal(r1, rbfill1) - - -@knownfail -def test_range_index_from_range(): - def assert_fails(inpt): - res = RangeIndex.possibly_convert_array(inpt) - assert res is None, "Expected %r to return None" % inpt - - def assert_converts(inpt, expected): - res = RangeIndex.possibly_convert_array(inpt) - assert expected.equals(res), "With input %r, %r != %r" % (inpt, res, - expected) - assert_converts(range(5), RangeIndex(0, 5)) - assert_fails([1, 3, 7, 5]) - assert_fails([4, 10, 11, 13]) - assert_converts(np.arange(50, 40, -1), RangeIndex(50, 40)) - assert_converts([0], RangeIndex(0, 1)) - assert_fails([]) - - # dupe values - assert_fails([10, 9, 8, 7, 10]) - assert_fails([1, 2, 3, 4, 5, 7]) - - # should not try to convert dtype (caller responsibility) - arr = np.arange(5, 15) - assert_converts(arr, RangeIndex(5, 15)) - assert_fails(arr.astype(float)) - - # works with resort - assert_fails([-10, -5, -6, -7, -2, -3, -4, -8, -9]) - assert_fails([9, 8, 5, 7, 6]) - - # possibilities that *won't* work now but could in the future - # (i.e., nested ranges, steps) - assert_fails([15, 13, 11, 9, 7, 5]) - assert_fails([1, 2, 3, 8, 9, 10]) - assert_fails([2, 4, 6, 8, 10, 12]) - - -def test_nonzero(): - r1 = RangeIndex(0, 5) - a1 = np.arange(0, 5) - assert_almost_equal(r1.nonzero(), a1.nonzero()) - r2 = RangeIndex(5, 0) - a2 = np.arange(5, 0, -1) - assert_almost_equal(r2.nonzero(), a2.nonzero()) - - -def test_get_loc(): - pass - - -def test_groupby(): - pass - - -@knownfail -def test_slice_locs(): - idx = RangeIndex(0, 11) - n = len(idx) - - self.assertEquals(idx.slice_locs(start=2), (2, n)) - self.assertEquals(idx.slice_locs(start=3), (3, n)) - self.assertEquals(idx.slice_locs(3, 8), (3, 8)) - self.assertEquals(idx.slice_locs(5, 10), (3, n)) - self.assertEquals(idx.slice_locs(end=8), (0, 8)) - self.assertEquals(idx.slice_locs(end=9), (0, 9)) - # monotonic *increasing* indexes allow slice_locs that aren't in the Index - self.assertEquals(idx.slice_locs(-5, 50), (0, 11)) + def test_groupby(self): + pass - idx2 = RangeIndex(5, 1) - self.assertRaises(KeyError, idx2.slice_locs, 8, 2) - self.assertRaises(KeyError, idx2.slice_locs, 7, 3) + def test_slice_locs(self): + idx = RangeIndex(0, 11) + n = len(idx) + + self.assertEquals(idx.slice_locs(start=2), (2, n)) + self.assertEquals(idx.slice_locs(start=3), (3, n)) + self.assertEquals(idx.slice_locs(3, 8), (3, 8)) + self.assertEquals(idx.slice_locs(5, 10), (3, n)) + self.assertEquals(idx.slice_locs(end=8), (0, 8)) + self.assertEquals(idx.slice_locs(end=9), (0, 9)) + # monotonic *increasing* indexes allow slice_locs that aren't in the Index + self.assertEquals(idx.slice_locs(-5, 50), (0, 11)) + self.assertRaises(KeyError, lambda : idx[::-1].slice_locs(-5, 50)) + + idx2 = RangeIndex(5, 1) + self.assertRaises(KeyError, idx2.slice_locs, 8, 2) + self.assertRaises(KeyError, idx2.slice_locs, 7, 3) + + # + # Index inference + # + + @knownfail + def test_sorted_index_yields_range(self): + ind = Index(range(10)) + assert isinstance(ind, RangeIndex) + assert ind.equals(RangeIndex(0, 10)) + ind = Index(range(15, -1, -1)), + assert isinstance(ind, RangeIndex) + assert ind.equals(RangeIndex(15, -1)) + ind = Index([1, 3, 5, 7]) + assert not isinstance(ind, RangeIndex) + ind = Index(range(5) + [6]) + assert not isinstance(ind, RangeIndex) + ind = Index([1, 3, 2, 4, 5]) + assert not isinstance(ind, RangeIndex) + ind = Index(np.arange(0, 10).astype(float)) + assert not isinstance(ind, RangeIndex) + + +class TestRangeIndexInt64Compat(tm.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + self.index = RangeIndex(10, 20) + + def test_too_many_names(self): + with tm.assertRaisesRegexp(ValueError, "^Length"): + self.index.names = ["roger", "harold"] + + def test_constructor(self): + # TODO: Fill this in + raise AssertionError("Decide what to do here!") + # scalar raise Exception + self.assertRaises(TypeError, RangeIndex, 5) + + def test_basic_properties(self): + self.assertTrue(self.index.is_unique) + self.assertTrue(self.index.is_monotonic) + self.assertEqual(self.index.dtype, np.int64) + + def test_basic_functions(self): + self.assertTrue(self.index.is_numeric()) + self.assertTrue(self.index.is_integer()) + self.assertTrue(self.index.holds_integer()) + self.assertFalse(self.index.is_mixed()) + self.assertFalse(self.index.is_floating()) + + self.assertEqual(self.index.nlevels, 1) + self.assertEqual(self.index.inferred_type, 'integer') + self.assertEqual(self.get_duplicates(), []) + + def test_hash_error(self): + with tm.assertRaisesRegexp(TypeError, + "unhashable type: %r" % + type(self.index).__name__): + hash(self.index) + + def test_copy(self): + i = RangeIndex(0, 1, name='Foo') + i_copy = i.copy() + self.assert_(i_copy.name == 'Foo') + + def test_view(self): + i = RangeIndex(0, 1, name='Foo') + i_view = i.view() + self.assert_(i_view.name == 'Foo') + + def test_dtype(self): + self.assert_(self.index.dtype == np.int64) + + def test_is_monotonic(self): + # monotonic is monotonically *increasing* + self.assertTrue(RangeIndex(0, 5).is_monotonic) + self.assertFalse(RangeIndex(5, 0).is_monotonic) + self.assertFalse(RangeIndex(-5, 5)[::-1].is_monotonic) + # TODO: If you have empty Index, need to match regular Index + # self.assertTrue(RangeIndex(0, 0).is_monotonic) + + def test_equals(self): + same_values = Index(self.index, dtype=object) + self.assert_(self.index.equals(same_values)) + self.assert_(same_values.equals(self.index)) + + def test_identical(self): + i = self.index.copy() + same_values = RangeIndex(i.left, i.right, i.step, name=i.name) + self.assert_(i.identical(same_values)) + int64_values = Int64Index(list(i), name=i.name) + self.assertFalse(i.identical(int64_values)) + self.assertFalse(int64_values.identical(i)) + + i = self.index.copy() + i = i.rename('foo') + same_values = RangeIndex(i.left, i.right, i.step) + # no name passed through constructor + self.assert_(same_values.identical(self.index)) + self.assertFalse(i.identical(same_values)) + + def test_get_indexer(self): + def test_indexer(target, expected): + indexer = self.index.get_indexer(target) + self.assert_(np.array_equal(indexer, expected)) + + test_indexer(RangeIndex(-5, 5), + np.array([-1] * 10)) + + test_indexer(RangeIndex(5, 15), + np.array([-1, -1, -1, -1, -1, 0, 1, 2, 3, 4])) + + test_indexer(Index(list('abcd') + [11]), + np.array([-1, -1, -1, -1, 1])) + + test_indexer(Index([0.5, 0.25, 1, 18.0, 17]), + np.array([-1, -1, -1, 8, 7])) + + def test_get_indexer_fails_non_monotonic(self): + ind = RangeIndex(10, 5, -1) + + with tm.assertRaisesRegexp(ValueError, 'monotonic for backward fill'): + ind.get_indexer(Index([0]), method='bfill') + with tm.assertRaisesRegexp(ValueError, 'monotonic for backward fill'): + ind.get_indexer(Index([0]), method='backfill') + + with tm.assertRaisesRegexp(ValueError, 'monotonic for forward fill'): + ind.get_indexer(Index([0]), method='ffill') + with tm.assertRaisesRegexp(ValueError, 'monotonic for forward fill'): + ind.get_indexer(Index([0]), method='pad') + + def test_get_indexer_pad_int64_with_range(self): + # TODO: Move this over to Int64Index tests instead + target = RangeIndex(0, 10) + idx = Index(range(0, 20, 2)) + indexer = idx.get_indexer(target, method='pad') + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + self.assert_(np.array_equal(indexer, expected)) + + def test_get_indexer_pad(self): + idx = RangeIndex(-3, 0) + target = Index([-2, -1, 0, 1, 2]) + indexer = idx.get_indexer(target, method='pad') + expected = np.array([1, 2, 2, 2, 2]) + self.assert_(np.array_equal(indexer, expected)) + + target2 = Index([-4, -2, 1]) + indexer = idx.get_indexer(target2, method='pad') + expected = np.array([-1, 1, 2]) + self.assert_(np.array_equal(indexer, expected)) + + def test_get_indexer_backfill_int64_with_range(self): + # TODO: Move this over to Int64Index tests instead + target = RangeIndex(0, 10) + idx = Index(range(0, 20, 2)) + indexer = idx.get_indexer(target, method='backfill') + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + self.assert_(np.array_equal(indexer, expected)) + + def test_get_indexer_backfill(self): + idx = RangeIndex(3, 5) + target = Index([-4, -2, 3, 4, 5, 7]) + indexer = idx.get_indexer(target, method='backfill') + expected = np.array([0, 0, 1, 2, -1, -1]) + self.assert_(np.array_equal(indexer, expected)) + + # # TODO: Decide on ffill, bfill, pad for NON-integer with RangeIndex... + + def test_join_outer(self): + # TODO: Convert this to RangeIndex formatted + # 1. Write tests for take + # 2. Make sure this works with return indexers (which are just args to + # take). + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assert_(res.equals(noidx_res)) + + eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], + dtype=np.int64) + eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], + dtype=np.int64) + + tm.assert_isinstance(res, Int64Index) + self.assert_(res.equals(eres)) + self.assert_(np.array_equal(lidx, elidx)) + self.assert_(np.array_equal(ridx, eridx)) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='outer', + return_indexers=True) + noidx_res = self.index.join(other_mono, how='outer') + self.assert_(res.equals(noidx_res)) + + eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], + dtype=np.int64) + tm.assert_isinstance(res, Int64Index) + self.assert_(res.equals(eres)) + self.assert_(np.array_equal(lidx, elidx)) + self.assert_(np.array_equal(ridx, eridx)) + + def test_join_inner(self): + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([2, 12]) + elidx = np.array([1, 6]) + eridx = np.array([4, 1]) + + tm.assert_isinstance(res, Int64Index) + self.assert_(res.equals(eres)) + self.assert_(np.array_equal(lidx, elidx)) + self.assert_(np.array_equal(ridx, eridx)) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='inner', + return_indexers=True) + + res2 = self.index.intersection(other_mono) + self.assert_(res.equals(res2)) + + eridx = np.array([1, 4]) + tm.assert_isinstance(res, Int64Index) + self.assert_(res.equals(eres)) + self.assert_(np.array_equal(lidx, elidx)) + self.assert_(np.array_equal(ridx, eridx)) + + def test_join_left(self): + # TODO: Convert this to RangeIndex formatted + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + eres = self.index + eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], + dtype=np.int64) + + tm.assert_isinstance(res, Int64Index) + self.assert_(res.equals(eres)) + self.assert_(lidx is None) + self.assert_(np.array_equal(ridx, eridx)) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='left', + return_indexers=True) + eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], + dtype=np.int64) + tm.assert_isinstance(res, Int64Index) + self.assert_(res.equals(eres)) + self.assert_(lidx is None) + self.assert_(np.array_equal(ridx, eridx)) + + # non-unique + """ + idx = Index([1,1,2,5]) + idx2 = Index([1,2,5,7,9]) + res, lidx, ridx = idx2.join(idx, how='left', return_indexers=True) + eres = idx2 + eridx = np.array([0, 2, 3, -1, -1]) + elidx = np.array([0, 1, 2, 3, 4]) + self.assert_(res.equals(eres)) + self.assert_(np.array_equal(lidx, elidx)) + self.assert_(np.array_equal(ridx, eridx)) + """ + + def test_join_right(self): + # TODO: Convert this to RangeIndex formatted + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + elidx = np.array([-1, 6, -1, -1, 1, -1], + dtype=np.int64) + + tm.assert_isinstance(other, Int64Index) + self.assert_(res.equals(eres)) + self.assert_(np.array_equal(lidx, elidx)) + self.assert_(ridx is None) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='right', + return_indexers=True) + eres = other_mono + elidx = np.array([-1, 1, -1, -1, 6, -1], + dtype=np.int64) + tm.assert_isinstance(other, Int64Index) + self.assert_(res.equals(eres)) + self.assert_(np.array_equal(lidx, elidx)) + self.assert_(ridx is None) + + # non-unique + """ + idx = Index([1,1,2,5]) + idx2 = Index([1,2,5,7,9]) + res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True) + eres = idx2 + elidx = np.array([0, 2, 3, -1, -1]) + eridx = np.array([0, 1, 2, 3, 4]) + self.assert_(res.equals(eres)) + self.assert_(np.array_equal(lidx, elidx)) + self.assert_(np.array_equal(ridx, eridx)) + + idx = Index([1,1,2,5]) + idx2 = Index([1,2,5,9,7]) + res = idx.join(idx2, how='right', return_indexers=False) + eres = idx2 + self.assert(res.equals(eres)) + """ + + def test_join_non_int_index(self): + # UPDATED + idx = RangeIndex(0, 15) + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = idx.join(other, how='outer') + outer2 = other.join(idx, how='outer') + expected = Index(range(0, 15), dtype=object) + self.assert_(outer.equals(outer2)) + self.assert_(outer.equals(expected)) + + inner = idx.join(other, how='inner') + inner2 = other.join(idx, how='inner') + expected = other.copy() # avoid is_ stuff + self.assert_(inner.equals(inner2)) + self.assert_(inner.equals(expected)) + + idx2 = RangeIndex(0, 4) + inner3 = idx2.join(other, how='inner') + inner4 = other.join(idx2, how='inner') + expected = Index([3], dtype=object) + self.assert_(inner3.equals(inner4)) + self.assert_(inner3.equals(expected)) + + + left = idx.join(other, how='left') + self.assert_(left.equals(idx)) + + left2 = other.join(idx, how='left') + self.assert_(left2.equals(other)) + + right = idx.join(other, how='right') + self.assert_(right.equals(other)) + + right2 = other.join(idx, how='right') + self.assert_(right2.equals(idx)) + + def test_join_self(self): + # UPDATED + idx = RangeIndex(-10, -4) + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = idx.join(idx, how=kind) + self.assert_(self.index is joined) + + def test_intersection(self): + # TODO: Convert this to RangeIndex formatted + other = Index([1, 2, 3, 4, 5]) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_(np.array_equal(result, expected)) + + result = other.intersection(self.index) + expected = np.sort(np.asarray(np.intersect1d(self.index.values, + other.values))) + self.assert_(np.array_equal(result, expected)) + + def test_union_noncomparable(self): + # TODO: Convert this to RangeIndex formatted + from datetime import datetime, timedelta + # corner case, non-Int64Index + now = datetime.now() + other = Index([now + timedelta(i) for i in range(4)], dtype=object) + result = self.index.union(other) + expected = np.concatenate((self.index, other)) + self.assert_(np.array_equal(result, expected)) + + result = other.union(self.index) + expected = np.concatenate((other, self.index)) + self.assert_(np.array_equal(result, expected)) + + # def test_view_Index(self): + # self.index.view(Index) + + def test_prevent_casting(self): + # TODO: Convert this to RangeIndex formatted + result = self.index.astype('O') + self.assert_(result.dtype == np.object_) + + def test_take_preserve_name(self): + # TODO: Convert this to RangeIndex formatted + index = RangeIndex(1, 4, name='foo') + taken = index.take([3, 0, 1]) + self.assertEqual(index.name, taken.name) + + def test_int_name_format(self): + from pandas import Series, DataFrame + index = RangeIndex(3, 0, -1, name=0) + s = Series(lrange(3), index) + df = DataFrame(lrange(3), index=index) + repr(s) + repr(df) + + def test_repr_roundtrip(self): + tm.assert_index_equal(eval(repr(self.index)), self.index) + + def test_unicode_string_with_unicode(self): + idx = RangeIndex(0, 1000) + + if compat.PY3: + str(idx) + else: + compat.text_type(idx) -# -# Index inference -# + def test_bytestring_with_unicode(self): + idx = RangeIndex(0, 1000) + if compat.PY3: + bytes(idx) + else: + str(idx) + def test_slice_keep_name(self): + idx = RangeIndex(1, 3, name='asdf') + self.assertEqual(idx.name, idx[1:].name) -@knownfail -def test_sorted_index_yields_range(): - ind = Index(range(10)) - assert isinstance(ind, RangeIndex) - assert ind.equals(RangeIndex(0, 10)) - ind = Index(range(15, -1, -1)), - assert isinstance(ind, RangeIndex) - assert ind.equals(RangeIndex(15, -1)) - ind = Index([1, 3, 5, 7]) - assert not isinstance(ind, RangeIndex) - ind = Index(range(5) + [6]) - assert not isinstance(ind, RangeIndex) - ind = Index([1, 3, 2, 4, 5]) - assert not isinstance(ind, RangeIndex) - ind = Index(np.arange(0, 10).astype(float)) - assert not isinstance(ind, RangeIndex) +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs'], + exit=False) From 5d3bb324ac9facfa5fed0d63efda166e31d413b3 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Sun, 22 Dec 2013 21:21:54 -0500 Subject: [PATCH 05/13] Add specialized arrmap and groupby for RangeIndex to algos --- pandas/algos.pyx | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 5f68c1ee26e87..7686fe899be1f 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -2349,5 +2349,47 @@ cdef inline float64_t _median_linear(float64_t* a, int n): return result +@cython.wraparound(False) +@cython.boundscheck(False) +def groupby_range(range_index, ndarray labels): + ''' + Assigns indices incrementing by step to each index of labels and groups by + uniques. + ''' + cdef Py_ssize_t i, length, idx, step + cdef dict result = {} + cdef list members + cdef object key + length = len(range_index) + if length != len(labels): + raise ValueError("len(index) != len(labels)") + idx = range_index.start + step = range_index.step + for i in range(length): + key = labels[i] + if key in result: + result[key].append(idx) + else: + result[key] = [idx] + idx += step + return result + +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap_range(range_index, object func): + cdef Py_ssize_t i, idx, step, length + length = len(range_index) + cdef ndarray[object] result = np.empty(length, dtype=np.object_) + + from pandas.lib import maybe_convert_objects + + idx = range_index.start + step = range_index.step + for i in range(length): + result[i] = func(idx) + idx += step + return maybe_convert_objects(result) + + include "join.pyx" include "generated.pyx" From 5e8dff4e8648f9ebdd3fd31be90b6d58111a449c Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Sun, 22 Dec 2013 21:22:39 -0500 Subject: [PATCH 06/13] More work on the RangeIndex implementation --- pandas/core/range.py | 321 +++++++++++++++++++++++++++++++++---------- 1 file changed, 252 insertions(+), 69 deletions(-) diff --git a/pandas/core/range.py b/pandas/core/range.py index bc7cb2cab27cd..cc4d1d3c40495 100644 --- a/pandas/core/range.py +++ b/pandas/core/range.py @@ -1,11 +1,24 @@ import pandas as pd import numpy as np -import pandas.lib as lib import pandas.algos as _algos -from pandas.core.index import Int64Index +from pandas.core.index import Int64Index, Index +import pandas.core.index as pdindex +EMPTY_RANGE = lambda: pd.Index([], dtype='int64') -class RangeIndex(object): +def _delegate_to_int64(func_name, doc=None): + def wrapper(self, *args, **kwargs): + return getattr(self.as_int64index, func_name)(*args, **kwargs) + wrapper.__name__ = func_name + if hasattr(Int64Index, func_name): + doc = doc or getattr(Int64Index, func_name).__doc__ + wrapper.__doc__ = doc or '' + return wrapper + +def _not_implemented(self, *args, **kwargs): + raise NotImplementedError('method not implemented') + +class RangeIndex(Index): """Represents a range with left-open interval. Parameters @@ -14,7 +27,7 @@ class RangeIndex(object): start and end of range (i.e., ``s[left:right]``). If left > right, assumes reversed (i.e., ``s[left:right:-1]``) """ - _groupby = _algos.groupby_int64 + _groupby = _algos.groupby_range # My thinking on this structure: # - from a 'set theoretic' standpoint, order doesn't matter, so ascending # vs. descending is generally an implementation detail @@ -37,17 +50,20 @@ class RangeIndex(object): # could *replace* itself on its parent. (tradeoff between instantiation time and # ability to gc values when they aren't needed anymore) # TODO: Block setting of start and end - def __new__(cls, left, right): + def __new__(cls, left, right=None, step=1, name=None): + if step not in (1, -1): + raise ValueError("Invalid step %s" % step) + if right is None: + left, right = 0, left + # RangeIndex can never be empty (makes checking things simpler) if left == right: - return pd.Index([], dtype='int64') - else: - return object.__new__(cls) - - def __init__(self, left, right): + return EMPTY_RANGE() + self = np.array([], dtype='int64').view(RangeIndex) # shouldn't happen if left == right: raise ValueError("Can't have empty range") + # want to coerce where possible, but not change value l, r = left, right left, right = int(left), int(right) @@ -57,71 +73,156 @@ def __init__(self, left, right): self.left = left self.right = right self.ascending = left < right + self.name = name if self.ascending: - self.start, self.end = left, right + self.start, self.stop = left, right self.step = 1 else: - self.start, self.end = right, left + # because non-inclusive + # e.g., range(10, 5, -1) == range(6, 11)[::-1] + self.start, self.stop = right + 1, left + 1 self.step = -1 + return self + + @property + def _constructor(self): + # functions depend on being able to pass a list to this function, + # so makes more sense to use Index [could potentially use Int64Index + # instead] + return Index + + # Stuff that also needs to / could be overriden: + # _has_valid_type funcs (_convert_scalar_indexer, + # _convert_slice_indexer, etc) + + def map(self, mapper): + return _algos.arrmap_range(self, mapper) + + def groupby(self, to_groupby): + return self._groupby(self, to_groupby) + + def __array_finalize__(self, obj): + if not isinstance(obj, type(self)): + return + for attr in ('ascending', 'left', 'right', 'start', + 'stop', 'step'): + if hasattr(obj, attr): + setattr(self, attr, getattr(obj, attr)) + self.name = getattr(obj, 'name', None) + + join = _delegate_to_int64('join') + to_series = _delegate_to_int64('to_series') + astype = _delegate_to_int64('astype') + to_datetime = _delegate_to_int64('to_datetime') + _format_native_types = _delegate_to_int64('_format_native_types') + argsort = _delegate_to_int64('argsort') + asof = _not_implemented + asof_locs = _not_implemented + nlevels = 1 + is_integer = lambda self: True + is_floating = lambda self: False + is_numeric = lambda self: True + is_mixed = lambda self: False + holds_integer = lambda self: True + is_all_dates = lambda self: False + is_unique = True + get_duplicates = lambda self: [] + inferred_type = 'integer' + + def order(self, return_indexers=False, ascending=True): + result = self + left, right = self.left, self.right + if ascending != self.ascending: + result = result[::-1] + if return_indexers: + if ascending != self.ascending: + indexer = np.arange(len(self) - 1, -1, -1) + else: + indexer = np.arange(len(self)) + return result, indexer + else: + return result @property def values(self): if self.ascending: - vals = np.arange(self.start, self.stop, 1, dtype='int64') + vals = np.arange(self.left, self.right, 1, dtype='int64') else: - vals = np.arange(self.stop, self.start, -1, dtype='int64') + vals = np.arange(self.left, self.right, -1, dtype='int64') return vals + @property + def as_int64index(self): + # TODO: Maybe fastpath this! + return Int64Index(self.values, name=self.name) + def union(self, other): """Union this with another RangeIndex. Always returns ascending RangeIndex.""" if not isinstance(other, RangeIndex): - raise NotImplementedError("Other not range index") + return self.as_int64index.union(other) if not self._overlaps(other): return self.values | other.values start = min(self.start, other.start) - end = max(self.end, other.end) - return RangeIndex(start, end) + stop = max(self.stop, other.stop) + return RangeIndex(start, stop) def intersection(self, other): if not isinstance(other, RangeIndex): - raise NotImplementedError("Other not range index") + return self.as_int64index.intersection(other) # not overlapping or start touches end or vice versa - if not self._overlaps(other) or (self.start == other.end) or (self.end == other.start): - return pd.Index([], dtype='int64') + if not self._overlaps(other) or (self.start == other.stop) or (self.stop == other.start): + return EMPTY_RANGE() else: - return RangeIndex(max(self.start, other.start), min(self.end, other.end)) + return RangeIndex(max(self.start, other.start), min(self.stop, other.stop)) - def view(self, other): - return self + def _shallow_copy(self): + # recursion issue: index view() calls _shallow_copy(), probably need to + # decide if _shallow_copy() is necessary. + return RangeIndex(self.left, self.right, self.step) + + def view(self, *args, **kwargs): + if not args and not kwargs: + return self._shallow_copy() + else: + return self.as_int64index.view(*args, **kwargs) def difference(self, other): if not isinstance(other, RangeIndex): - raise NotImplementedError("Other not range index") + return self.as_int64index.difference(other) - if not self._overlaps(other) or self.start == other.end or self.end == other.start: + if not self._overlaps(other) or self.start == other.stop or self.stop == other.start: return self.view() # completely contained - elif self.start >= other.start and self.end <= other.end: - return pd.Index([], dtype='int64') + elif self.start >= other.start and self.stop <= other.stop: + return EMPTY_RANGE() elif self.start < other.start: return RangeIndex(self.start, other.start) # starts within other [because must overlap] elif self.start > other.start: - assert other.end > self.end, (self, other) - return RangeIndex(other.end, self.end) + assert other.stop > self.stop, (self, other) + return RangeIndex(other.stop, self.stop) assert False, "Shouldn't get to here" @property def empty(self): return False + @property + def is_monotonic(self): + # monotonically increasing + return self.ascending + def all(self): - return True + if self.start <= 0: + return False + else: + return True def any(self): - return True + # if includes any number other than zero than any is True + return len(self) > 1 or self.start != 0 def __array__(self): return self.values @@ -129,9 +230,6 @@ def __array__(self): def __or__(self, other): return self.union(other) - - __add__ = __or__ - def __and__(self, other): return self.intersection(other) @@ -139,26 +237,28 @@ def __sub__(self, other): return self.difference(other) def equals(self, other): - return self.left == other.left and self.right == other.right + if not isinstance(other, RangeIndex): + return self.as_int64index.equals(other) + return self.ascending == other.ascending and self.start == other.start and self.stop == other.stop + + def identical(self, other): + other = pdindex._ensure_index(other) + return self.equals(other) and self.name == other.name def _overlaps(self, other): # cheers to Ned Batchelder on this # only overlaps if each ranges' end is beyond or *at* the other ranges' start. # touching does not count as overlapping - return other.end > self.start and self.end > other.start + return other.stop > self.start and self.stop > other.start - # # starts before or on other's start and ends after or on other's start - # return ((self.start <= other.start and self.end >= other.start) or - # # starts within other - # (self.start > other.start and self.start <= other.end)) def nonzero(self): if self.start > 0: - return np.arange(len(self)) + return (np.arange(len(self)),) else: # need to skip when self is zero res = range(len(self)) res.pop(0 - self.start * self.step) - return np.array(res) + return (np.array(res),) def __contains__(self, val): # can only hold integers @@ -172,61 +272,144 @@ def __contains__(self, val): if v != val or val != val: return False - return self.start <= val < self.end + return self.start <= val < self.stop def __iter__(self): return iter(xrange(self.left, self.right, self.step)) def __len__(self): - return self.end - self.start + return self.stop - self.start + + def __str__(self): + return str(self.as_int64index) def __repr__(self): # TODO: Either change to Int64Repr OR to RangeIndex(left, right) - return "RangeIndex(%r)" % (dict(start=self.start, end=self.end, ascending=self.ascending)) + return "RangeIndex(%s, %s, %s)" % (self.left, self.right, self.step) def get_indexer(self, arr, method=None): + """Returns indexer (i.e., matching indices between the index and + arr).""" + # bfill : will fill everything < start as 0; > stop filled as -1 + # ffill : will fill everything < start as -1; > stop filled as + # len(self) - 1 + if method not in (None, 'ffill', 'pad', 'bfill', 'backfill'): + raise ValueError("Unknown method: %r" % method) + if method and not self.is_monotonic: + kind = 'forward' if method in ('ffill', 'pad') else 'backward' + raise ValueError("Must be monotonic for %s fill." % kind) + arr = np.asarray(arr, dtype='int64') indexer = arr - self.start if not self.ascending: indexer = (len(self) - 1) - indexer - indexer[(indexer < 0) | (indexer >= len(self) )] = -1 + + if method in ('ffill', 'pad'): + # next valid observation always 0 + min_fill, max_fill = 0, -1 + elif method in ('bfill', 'backfill'): + # last valid observation is len(self) - 1 + min_fill, max_fill = -1, len(self) - 1 + else: + min_fill = max_fill = -1 + + indexer[indexer < 0] = min_fill + indexer[indexer >= len(self)] = max_fill return indexer def get_loc(self, val): - if val in self: - return val - (self.start if self.ascending else self.end) - else: + if val not in self: return -1 + if self.ascending: + return val - self.start + else: + return self.stop - val def __getitem__(self, val): - if isinstance(val, slice): - if slice.step not in (1, -1): - return self.values[val] - if slice.start is None or slice.start > self.start: - start = slice.start - if slice.start >= 0 and slice.end >= 0: - start = slice.start if slice.start is None or slice.start > self.start else self.start - end = slice.end if slice.end is None or slice.end < self.end else self.end - - if self.step != slice.step: - start, end = end, start - - return RangeIndex(start, end) - else: - if 0 <= val < len(self): - return self.left + val * self.step - elif -len(self) <= val < 0: - return self.right + val * self.step - else: + # only want to handle the simple stuff here, otherwise let Int64Index + # handle it + if isinstance(val, slice) and val.step in (1, -1): + # Step 1 - convert the slice to be forward index (i.e. step == -1 + # --> step == 1) + v_start, v_stop = _get_forward_indices(len(self), val) + left, right = self.left, self.right + step = 1 if self.ascending else -1 + if v_start is None: + # empty range + return EMPTY_RANGE() + + # Step 3 - increment left by start of slice + left += v_start * step + + # Step 4 - set right to min(right, stop) + + # Step 5 - flip bounds if they were reversed + return RangeIndex(start, stop, step) + elif np.isscalar(val): + if -len(self) <= val < 0: + val = len(self) + val + if val > len(self): raise IndexError("%d out of range" % val) + step = 1 if self.ascending else -1 + return self.left + val * step + else: + return self.as_int64index[val] def __bool__(self): raise ValueError("The truth value of an array is ambiguous...") # blah blah blah __nonzero__ = __bool__ + # don't need to override slice_indexer def slice_locs(self, start=None, end=None): - pass + start = self.get_loc(start) if start is not None else 0 + end = self.get_loc(end) + 1 if end is not None else len(self) + return start, end def get_indexer_non_unique(self, arr): return self.get_indexer(self, arr), np.array([], dtype='int64') + +def _flip_bounds(start, stop, step): + """Returns bounds and step for reversed range (where end is non-inclusive): + >>> range(3, 6) + [3, 4, 5] + >>> _flip_bounds(3, 6, 1) + (5, 2, -1) + >>> range(*_flip_bounds(3, 6, 1)) + [5, 4, 3] + """ + return stop - step, start - step, step * -1 + + +def _get_forward_indices(length, slc): + """Converts given slice to positive, forward step indices. + Returns (None, None) if not possible to convert. + + >>> _get_forward_indices(10, slice(5, 1, -2)) + (2, 6) + >>> _get_forward_indices(10, slice(-100, -90, 5)) + (None, None) + >>> _get_forward_indices(5, slice(3, 4, 1)) + (3, 4) + """ + if slc.step == 0 or length == 0: + return None, None + start, stop = slc.start, slc.stop + if slc.step < 0: + # when you flip direction, need to increment edges + # e.g., [6:2:-1] --> [3:7][::-1] + start = start + 1 if start is not None else length - 1 + stop = stop + 1 if stop is not None else 0 + start, stop = stop, start + else: + start = start if start is not None else 0 + stop = stop if stop is not None else length - 1 + + if start >= stop or start > length or stop == 0 or stop < -length: + return (None, None) + if start < 0: + start = length + start if start > -length else 0 + if stop < 0: + stop = length + stop + + return start, min(length, stop) From 5ba899d22618e5dbf557bf2cfcf0a53347f8eb6c Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Sun, 22 Dec 2013 21:52:41 -0500 Subject: [PATCH 07/13] tweaks to test code to get everything working right + block some of the ndarray interface --- pandas/core/range.py | 24 +++++++++++++++++------ pandas/tests/test_range.py | 39 ++++++++++++++++++++++++-------------- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/pandas/core/range.py b/pandas/core/range.py index cc4d1d3c40495..119006ce911f1 100644 --- a/pandas/core/range.py +++ b/pandas/core/range.py @@ -63,12 +63,14 @@ def __new__(cls, left, right=None, step=1, name=None): if left == right: raise ValueError("Can't have empty range") + # TODO: builtin range function only accepts integers, could make more + # sense to just do a single isinstance check there. Not sure... # want to coerce where possible, but not change value l, r = left, right left, right = int(left), int(right) if left != l or right != r: - raise ValueError("Need to pass integral values") + raise TypeError("Need to pass integral values") self.left = left self.right = right @@ -78,7 +80,8 @@ def __new__(cls, left, right=None, step=1, name=None): self.start, self.stop = left, right self.step = 1 else: - # because non-inclusive + # because non-inclusive. want start and stop to be the actual + # bounds of the range if the range were ascending. # e.g., range(10, 5, -1) == range(6, 11)[::-1] self.start, self.stop = right + 1, left + 1 self.step = -1 @@ -215,10 +218,8 @@ def is_monotonic(self): return self.ascending def all(self): - if self.start <= 0: - return False - else: - return True + # False only if it *spans* zero + return not (self.start <= 0 and self.stop > 0) def any(self): # if includes any number other than zero than any is True @@ -227,6 +228,17 @@ def any(self): def __array__(self): return self.values + # TODO: Probably remove these functions when Index is no longer a subclass of + # ndarray [need to override them for now to make them work with np.asarray + # and buddies]. + @property + def __array_interface__(self): + raise AttributeError("No attribute __array_interface__") + + @property + def __array_struct__(self): + raise AttributeError("No attribute __array_struct__ [disabled]") + def __or__(self, other): return self.union(other) diff --git a/pandas/tests/test_range.py b/pandas/tests/test_range.py index 4d87b957072d0..830028a7e48d3 100644 --- a/pandas/tests/test_range.py +++ b/pandas/tests/test_range.py @@ -5,6 +5,7 @@ from pandas.core.index import Index, Int64Index from pandas.core.range import RangeIndex import pandas.compat as compat +import nose lrange = lambda *args: list(range(*args)) @@ -56,56 +57,66 @@ def test_basic(self): # make sure conditions work correctly # descending r = RangeIndex(10, 5) - self.assertEqual(r.start, 5) - self.assertEqual(r.stop, 10) - # self.assertEqual(r.left, 10) - # self.assertEqual(r.right, 5) + # start and stopped are what the range would be if you sorted it, + # i.e. range(10, 5, -1) is [10, 9, 8, 7, 6]. And range(6, 11) is [6, 7, + # 8, 9, 10] + self.assertEqual(r.start, 6) + self.assertEqual(r.stop, 11) + self.assertEqual(r.left, 10) + self.assertEqual(r.right, 5) self.assertEqual(r.step, -1) # ascending r2 = RangeIndex(5, 10) self.assertEqual(r2.start, 5) self.assertEqual(r2.stop, 10) - # self.assertEqual(r2.left, 5) - # self.assertEqual(r2.right, 10) + self.assertEqual(r2.left, 5) + self.assertEqual(r2.right, 10) self.assertEqual(r2.step, 1) # negative values r3 = RangeIndex(-10, -9) self.assertEqual(r3.start, -10) self.assertEqual(r3.stop, -9) + self.assertEqual(r3.left, -10) + self.assertEqual(r3.right, -9) self.assert_(r3.ascending) self.assertEqual(r3.step, 1) r4 = RangeIndex(-8, -15) - self.assertEqual(r4.start, -15) - self.assertEqual(r4.stop, -8) + self.assertEqual(r4.start, -14) + self.assertEqual(r4.stop, -7) + self.assertEqual(r4.right, -15) + self.assertEqual(r4.left, -8) self.assert_(not r4.ascending) self.assertEqual(r4.step, -1) def test_bad_input(self): - with tm.assertRaisesRegexp(TypeError, 'Must be integer'): + with tm.assertRaisesRegexp(TypeError, 'Need to pass integral values'): RangeIndex(0, 1.25) - with tm.assertRaisesRegexp(TypeError, 'invalid literal'): + with tm.assertRaisesRegexp(ValueError, 'invalid literal'): RangeIndex(0, 'a') - with tm.assertRaisesRegexp(TypeError, 'Must be integer'): + with tm.assertRaisesRegexp(TypeError, 'Need to pass integral values'): RangeIndex('0', '5') def test_contains(self): - r = RangeIndex(10, 5) + r = RangeIndex(9, 4) r2 = RangeIndex(5, 10) for i in range(5, 10): - self.assert_(i in r) - self.assert_(i in r2) + self.assert_(i in r, i) + self.assert_(i in r2, i) def test_empty(self): assert np.array_equal(RangeIndex(5, 5), Index([], dtype='int64')) def test_asarray(self): + # TODO: Remove this SkipTest once Index is not a subclass of ndarray + raise nose.SkipTest("This test case cannot work until Index is no " + "longer a subclass of ndarray") # __array__ self.assert_(np.array_equal(np.asarray(RangeIndex(1, 0)), np.array([1]))) From a2c6ea35fdac8d262f777c9c92c53b9980270b59 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Wed, 25 Dec 2013 01:21:36 -0500 Subject: [PATCH 08/13] More fixes to backfill (you know, using something that can actually be filled...shocker) --- pandas/tests/test_range.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/tests/test_range.py b/pandas/tests/test_range.py index 830028a7e48d3..f17cea1e861fb 100644 --- a/pandas/tests/test_range.py +++ b/pandas/tests/test_range.py @@ -283,6 +283,7 @@ def test_get_indexer(self): # TODO: Consider supporting steps idx2 = Index([2, 4, 6]) idx3 = Index([1, 6, 7, 1, 2]) + idx4 = Index([-4, 3, 6]) r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, [1, 3, -1]) @@ -290,14 +291,17 @@ def test_get_indexer(self): r1 = idx1.get_indexer(idx3) assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) - r1 = idx1.get_indexer(idx3, method='pad') - assert_almost_equal(r1, np.array([0, 3, 3, -1, -1])) + assert_almost_equal(idx1.get_indexer(idx4), np.array([-1, 2, -1])) - rffill1 = idx1.get_indexer(idx3, method='ffill') + r1 = idx1.get_indexer(idx4, method='pad') + assert_almost_equal(r1, np.array([-1, 2, 3])) + + # synonym + rffill1 = idx1.get_indexer(idx4, method='ffill') assert_almost_equal(r1, rffill1) - r1 = idx1.get_indexer(idx3, method='backfill') - assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) + r1 = idx1.get_indexer(idx4, method='backfill') + assert_almost_equal(r1, np.array([0, 2, -1])) rbfill1 = idx1.get_indexer(idx3, method='bfill') assert_almost_equal(r1, rbfill1) From b4a80d15c77c15462dd0a03b61c998dc77a10165 Mon Sep 17 00:00:00 2001 From: ARF Date: Thu, 23 Apr 2015 20:14:04 +0200 Subject: [PATCH 09/13] Reset to master @ 76571d0356 --- .travis.yml | 2 + doc/source/whatsnew/v0.16.1.txt | 15 +- pandas/algos.pyx | 42 -- pandas/core/categorical.py | 2 +- pandas/core/range.py | 427 ---------------- pandas/tests/test_categorical.py | 13 + pandas/tests/test_range.py | 837 ------------------------------- 7 files changed, 24 insertions(+), 1314 deletions(-) delete mode 100644 pandas/core/range.py delete mode 100644 pandas/tests/test_range.py diff --git a/.travis.yml b/.travis.yml index bc87853b26d6e..0d143d7f7133b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,6 +22,7 @@ matrix: - LOCALE_OVERRIDE="it_IT.UTF-8" - BUILD_TYPE=conda - JOB_NAME: "26_nslow_nnet" + - INSTALL_TEST=true - python: 2.7 env: - NOSE_ARGS="slow and not network and not disabled" @@ -183,6 +184,7 @@ script: # nothing here, or failed tests won't fail travis after_script: + - ci/install_test.sh - if [ -f /tmp/doc.log ]; then cat /tmp/doc.log; fi - source activate pandas && ci/print_versions.py - ci/print_skipped.py /tmp/nosetests.xml diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 08d8ef9116367..3d5c95aee2e92 100755 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -9,7 +9,7 @@ We recommend that all users upgrade to this version. Highlights include: -- Support for a ``CategoricalIndex``, a category based index, see :ref:`here ` +- Support for a ``CategoricalIndex``, a category based index, see :ref:`here ` .. contents:: What's new in v0.16.1 :local: @@ -24,15 +24,15 @@ Enhancements - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`) - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`) - The `.str` accessor is now available for both `Series` and `Index`. + The ``.str`` accessor is now available for both ``Series`` and ``Index``. .. ipython:: python idx = Index([' jack', 'jill ', ' jesse ', 'frank']) idx.str.strip() - One special case for the `.str` accessor on `Index` is that if a string method returns `bool`, the `.str` accessor - will return a `np.array` instead of a boolean `Index` (:issue:`8875`). This enables the following expression + One special case for the `.str` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor + will return a ``np.array`` instead of a boolean ``Index`` (:issue:`8875`). This enables the following expression to work naturally: @@ -46,7 +46,7 @@ Enhancements - ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`) -- ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`) +- ``drop`` function can now accept ``errors`` keyword to suppress ``ValueError`` raised when any of label does not exist in the target data. (:issue:`6736`) .. ipython:: python @@ -61,6 +61,7 @@ Enhancements - Allow Panel.shift with ``axis='items'`` (:issue:`9890`) - Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`) +- Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`) - Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`) @@ -86,7 +87,7 @@ setting the index of a ``DataFrame/Series`` with a ``category`` dtype would conv df.dtypes df.B.cat.categories -setting the index, will create create a CategoricalIndex +setting the index, will create create a ``CategoricalIndex`` .. ipython :: python @@ -178,7 +179,7 @@ Bug Fixes - Fixed bug (:issue:`9542`) where labels did not appear properly in legend of ``DataFrame.plot()``. Passing ``label=`` args also now works, and series indices are no longer mutated. - Bug in json serialization when frame has length zero.(:issue:`9805`) -- Bug in `read_csv` where missing trailing delimiters would cause segfault. (:issue:`5664`) +- Bug in ``read_csv`` where missing trailing delimiters would cause segfault. (:issue:`5664`) - Bug in retaining index name on appending (:issue:`9862`) - Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`) - Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`). diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 7686fe899be1f..5f68c1ee26e87 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -2349,47 +2349,5 @@ cdef inline float64_t _median_linear(float64_t* a, int n): return result -@cython.wraparound(False) -@cython.boundscheck(False) -def groupby_range(range_index, ndarray labels): - ''' - Assigns indices incrementing by step to each index of labels and groups by - uniques. - ''' - cdef Py_ssize_t i, length, idx, step - cdef dict result = {} - cdef list members - cdef object key - length = len(range_index) - if length != len(labels): - raise ValueError("len(index) != len(labels)") - idx = range_index.start - step = range_index.step - for i in range(length): - key = labels[i] - if key in result: - result[key].append(idx) - else: - result[key] = [idx] - idx += step - return result - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_range(range_index, object func): - cdef Py_ssize_t i, idx, step, length - length = len(range_index) - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - idx = range_index.start - step = range_index.step - for i in range(length): - result[i] = func(idx) - idx += step - return maybe_convert_objects(result) - - include "join.pyx" include "generated.pyx" diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 9537523380350..caf706fcbcbbd 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -708,7 +708,7 @@ def add_categories(self, new_categories, inplace=False): if len(already_included) != 0: msg = "new categories must not include old categories: %s" % str(already_included) raise ValueError(msg) - new_categories = list(self._categories) + (new_categories) + new_categories = list(self._categories) + list(new_categories) new_categories = self._validate_categories(new_categories) cat = self if inplace else self.copy() cat._categories = new_categories diff --git a/pandas/core/range.py b/pandas/core/range.py deleted file mode 100644 index 119006ce911f1..0000000000000 --- a/pandas/core/range.py +++ /dev/null @@ -1,427 +0,0 @@ -import pandas as pd -import numpy as np -import pandas.algos as _algos -from pandas.core.index import Int64Index, Index -import pandas.core.index as pdindex - -EMPTY_RANGE = lambda: pd.Index([], dtype='int64') - -def _delegate_to_int64(func_name, doc=None): - def wrapper(self, *args, **kwargs): - return getattr(self.as_int64index, func_name)(*args, **kwargs) - wrapper.__name__ = func_name - if hasattr(Int64Index, func_name): - doc = doc or getattr(Int64Index, func_name).__doc__ - wrapper.__doc__ = doc or '' - return wrapper - -def _not_implemented(self, *args, **kwargs): - raise NotImplementedError('method not implemented') - -class RangeIndex(Index): - """Represents a range with left-open interval. - - Parameters - ---------- - left, right : int - start and end of range (i.e., ``s[left:right]``). If left > right, - assumes reversed (i.e., ``s[left:right:-1]``) - """ - _groupby = _algos.groupby_range - # My thinking on this structure: - # - from a 'set theoretic' standpoint, order doesn't matter, so ascending - # vs. descending is generally an implementation detail - # - left, right are necessary for comparison, but making start and end - # separate makes it much easier to work with - # - prohibiting empty RangeIndex helps simplify edge cases considerably. - # - From pandas' perspective, RangeIndex should behave *exactly* the same - # as an Int64Index. (except for ops where it can yield more RangeIndexes) - # - supporting steps might be possible, but less simple (and less clear - # that it would be useful to pandas proper, given that you'd have to know - # ahead of time that you could convert to a stepped range). Plus, only - # helpful when you specifically have a consistent step - # - Certain operations with RangeIndex could benefit from allowing nested - # ranges, i.e. the union of RangeIndex(10, 5) and RangeIndex(3, 7) could be - # something like: [RangeIndex(10,5), RangeIndex(3, 7)] and then could just - # iterate over that. But that's for after all of this starts working. - # - It would be nice if groupby() accepted an Index - # - It might be valuable to cache the values property of a RangeIndex, but - # I'm not totally convinced that's the best strategy (yet!), unless RangeIndex - # could *replace* itself on its parent. (tradeoff between instantiation time and - # ability to gc values when they aren't needed anymore) - # TODO: Block setting of start and end - def __new__(cls, left, right=None, step=1, name=None): - if step not in (1, -1): - raise ValueError("Invalid step %s" % step) - if right is None: - left, right = 0, left - # RangeIndex can never be empty (makes checking things simpler) - if left == right: - return EMPTY_RANGE() - self = np.array([], dtype='int64').view(RangeIndex) - # shouldn't happen - if left == right: - raise ValueError("Can't have empty range") - - # TODO: builtin range function only accepts integers, could make more - # sense to just do a single isinstance check there. Not sure... - # want to coerce where possible, but not change value - l, r = left, right - left, right = int(left), int(right) - - if left != l or right != r: - raise TypeError("Need to pass integral values") - - self.left = left - self.right = right - self.ascending = left < right - self.name = name - if self.ascending: - self.start, self.stop = left, right - self.step = 1 - else: - # because non-inclusive. want start and stop to be the actual - # bounds of the range if the range were ascending. - # e.g., range(10, 5, -1) == range(6, 11)[::-1] - self.start, self.stop = right + 1, left + 1 - self.step = -1 - return self - - @property - def _constructor(self): - # functions depend on being able to pass a list to this function, - # so makes more sense to use Index [could potentially use Int64Index - # instead] - return Index - - # Stuff that also needs to / could be overriden: - # _has_valid_type funcs (_convert_scalar_indexer, - # _convert_slice_indexer, etc) - - def map(self, mapper): - return _algos.arrmap_range(self, mapper) - - def groupby(self, to_groupby): - return self._groupby(self, to_groupby) - - def __array_finalize__(self, obj): - if not isinstance(obj, type(self)): - return - for attr in ('ascending', 'left', 'right', 'start', - 'stop', 'step'): - if hasattr(obj, attr): - setattr(self, attr, getattr(obj, attr)) - self.name = getattr(obj, 'name', None) - - join = _delegate_to_int64('join') - to_series = _delegate_to_int64('to_series') - astype = _delegate_to_int64('astype') - to_datetime = _delegate_to_int64('to_datetime') - _format_native_types = _delegate_to_int64('_format_native_types') - argsort = _delegate_to_int64('argsort') - asof = _not_implemented - asof_locs = _not_implemented - nlevels = 1 - is_integer = lambda self: True - is_floating = lambda self: False - is_numeric = lambda self: True - is_mixed = lambda self: False - holds_integer = lambda self: True - is_all_dates = lambda self: False - is_unique = True - get_duplicates = lambda self: [] - inferred_type = 'integer' - - def order(self, return_indexers=False, ascending=True): - result = self - left, right = self.left, self.right - if ascending != self.ascending: - result = result[::-1] - if return_indexers: - if ascending != self.ascending: - indexer = np.arange(len(self) - 1, -1, -1) - else: - indexer = np.arange(len(self)) - return result, indexer - else: - return result - - @property - def values(self): - if self.ascending: - vals = np.arange(self.left, self.right, 1, dtype='int64') - else: - vals = np.arange(self.left, self.right, -1, dtype='int64') - return vals - - @property - def as_int64index(self): - # TODO: Maybe fastpath this! - return Int64Index(self.values, name=self.name) - - def union(self, other): - """Union this with another RangeIndex. Always returns ascending RangeIndex.""" - if not isinstance(other, RangeIndex): - return self.as_int64index.union(other) - - if not self._overlaps(other): - return self.values | other.values - - start = min(self.start, other.start) - stop = max(self.stop, other.stop) - return RangeIndex(start, stop) - - def intersection(self, other): - if not isinstance(other, RangeIndex): - return self.as_int64index.intersection(other) - # not overlapping or start touches end or vice versa - if not self._overlaps(other) or (self.start == other.stop) or (self.stop == other.start): - return EMPTY_RANGE() - else: - return RangeIndex(max(self.start, other.start), min(self.stop, other.stop)) - - def _shallow_copy(self): - # recursion issue: index view() calls _shallow_copy(), probably need to - # decide if _shallow_copy() is necessary. - return RangeIndex(self.left, self.right, self.step) - - def view(self, *args, **kwargs): - if not args and not kwargs: - return self._shallow_copy() - else: - return self.as_int64index.view(*args, **kwargs) - - def difference(self, other): - if not isinstance(other, RangeIndex): - return self.as_int64index.difference(other) - - if not self._overlaps(other) or self.start == other.stop or self.stop == other.start: - return self.view() - # completely contained - elif self.start >= other.start and self.stop <= other.stop: - return EMPTY_RANGE() - elif self.start < other.start: - return RangeIndex(self.start, other.start) - # starts within other [because must overlap] - elif self.start > other.start: - assert other.stop > self.stop, (self, other) - return RangeIndex(other.stop, self.stop) - assert False, "Shouldn't get to here" - - @property - def empty(self): - return False - - @property - def is_monotonic(self): - # monotonically increasing - return self.ascending - - def all(self): - # False only if it *spans* zero - return not (self.start <= 0 and self.stop > 0) - - def any(self): - # if includes any number other than zero than any is True - return len(self) > 1 or self.start != 0 - - def __array__(self): - return self.values - - # TODO: Probably remove these functions when Index is no longer a subclass of - # ndarray [need to override them for now to make them work with np.asarray - # and buddies]. - @property - def __array_interface__(self): - raise AttributeError("No attribute __array_interface__") - - @property - def __array_struct__(self): - raise AttributeError("No attribute __array_struct__ [disabled]") - - def __or__(self, other): - return self.union(other) - - def __and__(self, other): - return self.intersection(other) - - def __sub__(self, other): - return self.difference(other) - - def equals(self, other): - if not isinstance(other, RangeIndex): - return self.as_int64index.equals(other) - return self.ascending == other.ascending and self.start == other.start and self.stop == other.stop - - def identical(self, other): - other = pdindex._ensure_index(other) - return self.equals(other) and self.name == other.name - - def _overlaps(self, other): - # cheers to Ned Batchelder on this - # only overlaps if each ranges' end is beyond or *at* the other ranges' start. - # touching does not count as overlapping - return other.stop > self.start and self.stop > other.start - - def nonzero(self): - if self.start > 0: - return (np.arange(len(self)),) - else: - # need to skip when self is zero - res = range(len(self)) - res.pop(0 - self.start * self.step) - return (np.array(res),) - - def __contains__(self, val): - # can only hold integers - try: - v = val - val = int(val) - except (TypeError, ValueError): - return False - - # pd.isnull(val)? - if v != val or val != val: - return False - - return self.start <= val < self.stop - - def __iter__(self): - return iter(xrange(self.left, self.right, self.step)) - - def __len__(self): - return self.stop - self.start - - def __str__(self): - return str(self.as_int64index) - - def __repr__(self): - # TODO: Either change to Int64Repr OR to RangeIndex(left, right) - return "RangeIndex(%s, %s, %s)" % (self.left, self.right, self.step) - - def get_indexer(self, arr, method=None): - """Returns indexer (i.e., matching indices between the index and - arr).""" - # bfill : will fill everything < start as 0; > stop filled as -1 - # ffill : will fill everything < start as -1; > stop filled as - # len(self) - 1 - if method not in (None, 'ffill', 'pad', 'bfill', 'backfill'): - raise ValueError("Unknown method: %r" % method) - if method and not self.is_monotonic: - kind = 'forward' if method in ('ffill', 'pad') else 'backward' - raise ValueError("Must be monotonic for %s fill." % kind) - - arr = np.asarray(arr, dtype='int64') - indexer = arr - self.start - if not self.ascending: - indexer = (len(self) - 1) - indexer - - if method in ('ffill', 'pad'): - # next valid observation always 0 - min_fill, max_fill = 0, -1 - elif method in ('bfill', 'backfill'): - # last valid observation is len(self) - 1 - min_fill, max_fill = -1, len(self) - 1 - else: - min_fill = max_fill = -1 - - indexer[indexer < 0] = min_fill - indexer[indexer >= len(self)] = max_fill - return indexer - - def get_loc(self, val): - if val not in self: - return -1 - if self.ascending: - return val - self.start - else: - return self.stop - val - - def __getitem__(self, val): - # only want to handle the simple stuff here, otherwise let Int64Index - # handle it - if isinstance(val, slice) and val.step in (1, -1): - # Step 1 - convert the slice to be forward index (i.e. step == -1 - # --> step == 1) - v_start, v_stop = _get_forward_indices(len(self), val) - left, right = self.left, self.right - step = 1 if self.ascending else -1 - if v_start is None: - # empty range - return EMPTY_RANGE() - - # Step 3 - increment left by start of slice - left += v_start * step - - # Step 4 - set right to min(right, stop) - - # Step 5 - flip bounds if they were reversed - return RangeIndex(start, stop, step) - elif np.isscalar(val): - if -len(self) <= val < 0: - val = len(self) + val - if val > len(self): - raise IndexError("%d out of range" % val) - step = 1 if self.ascending else -1 - return self.left + val * step - else: - return self.as_int64index[val] - - def __bool__(self): - raise ValueError("The truth value of an array is ambiguous...") # blah blah blah - - __nonzero__ = __bool__ - - # don't need to override slice_indexer - def slice_locs(self, start=None, end=None): - start = self.get_loc(start) if start is not None else 0 - end = self.get_loc(end) + 1 if end is not None else len(self) - return start, end - - def get_indexer_non_unique(self, arr): - return self.get_indexer(self, arr), np.array([], dtype='int64') - -def _flip_bounds(start, stop, step): - """Returns bounds and step for reversed range (where end is non-inclusive): - >>> range(3, 6) - [3, 4, 5] - >>> _flip_bounds(3, 6, 1) - (5, 2, -1) - >>> range(*_flip_bounds(3, 6, 1)) - [5, 4, 3] - """ - return stop - step, start - step, step * -1 - - -def _get_forward_indices(length, slc): - """Converts given slice to positive, forward step indices. - Returns (None, None) if not possible to convert. - - >>> _get_forward_indices(10, slice(5, 1, -2)) - (2, 6) - >>> _get_forward_indices(10, slice(-100, -90, 5)) - (None, None) - >>> _get_forward_indices(5, slice(3, 4, 1)) - (3, 4) - """ - if slc.step == 0 or length == 0: - return None, None - start, stop = slc.start, slc.stop - if slc.step < 0: - # when you flip direction, need to increment edges - # e.g., [6:2:-1] --> [3:7][::-1] - start = start + 1 if start is not None else length - 1 - stop = stop + 1 if stop is not None else 0 - start, stop = stop, start - else: - start = start if start is not None else 0 - stop = stop if stop is not None else length - 1 - - if start >= stop or start > length or stop == 0 or stop < -length: - return (None, None) - if start < 0: - start = length + start if start > -length else 0 - if stop < 0: - stop = length + stop - - return start, min(length, stop) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 97fa442595893..6a6564347d35f 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -757,6 +757,19 @@ def f(): cat.add_categories(["d"]) self.assertRaises(ValueError, f) + # GH 9927 + cat = Categorical(list("abc"), ordered=True) + expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) + # test with Series, np.array, index, list + res = cat.add_categories(Series(["d", "e"])) + self.assert_categorical_equal(res, expected) + res = cat.add_categories(np.array(["d", "e"])) + self.assert_categorical_equal(res, expected) + res = cat.add_categories(Index(["d", "e"])) + self.assert_categorical_equal(res, expected) + res = cat.add_categories(["d", "e"]) + self.assert_categorical_equal(res, expected) + def test_remove_categories(self): cat = Categorical(["a","b","c","a"], ordered=True) old = cat.copy() diff --git a/pandas/tests/test_range.py b/pandas/tests/test_range.py deleted file mode 100644 index f17cea1e861fb..0000000000000 --- a/pandas/tests/test_range.py +++ /dev/null @@ -1,837 +0,0 @@ -import unittest - -import numpy as np -import pandas.util.testing as tm -from pandas.core.index import Index, Int64Index -from pandas.core.range import RangeIndex -import pandas.compat as compat -import nose - -lrange = lambda *args: list(range(*args)) - -def assert_almost_equal(a, b): - try: - tm.assert_almost_equal(a, b) - except: - print(a, b) - raise - - -def knownfail(f): - def wrapper(): - try: - f() - except Exception as e: - print("%s: KNOWN FAILURE: %r" % (f.__name__, e)) - else: - raise AssertionError("Known failure passed! %s" % f.__name__) - return wrapper - - -class self(object): - - """Fake for tests!""" - - @staticmethod - def assertEquals(a, b): - assert a == b, "%r != %r" % (a, b) - - assertEqual = assertEquals - - @staticmethod - def assertRaises(exc, f, *args, **kwargs): - try: - f(*args, **kwargs) - except exc: - return True - else: - raise AssertionError( - "Expected exception of type %s to be raised!" % exc) - - -class TestRangeIndex(unittest.TestCase): - - def test_basic(self): - self.assert_(not RangeIndex(1, 0).ascending) - self.assert_(RangeIndex(0, 100).ascending) - # make sure conditions work correctly - # descending - r = RangeIndex(10, 5) - # start and stopped are what the range would be if you sorted it, - # i.e. range(10, 5, -1) is [10, 9, 8, 7, 6]. And range(6, 11) is [6, 7, - # 8, 9, 10] - self.assertEqual(r.start, 6) - self.assertEqual(r.stop, 11) - self.assertEqual(r.left, 10) - self.assertEqual(r.right, 5) - self.assertEqual(r.step, -1) - - # ascending - r2 = RangeIndex(5, 10) - self.assertEqual(r2.start, 5) - self.assertEqual(r2.stop, 10) - self.assertEqual(r2.left, 5) - self.assertEqual(r2.right, 10) - self.assertEqual(r2.step, 1) - - # negative values - r3 = RangeIndex(-10, -9) - self.assertEqual(r3.start, -10) - self.assertEqual(r3.stop, -9) - self.assertEqual(r3.left, -10) - self.assertEqual(r3.right, -9) - self.assert_(r3.ascending) - self.assertEqual(r3.step, 1) - - r4 = RangeIndex(-8, -15) - self.assertEqual(r4.start, -14) - self.assertEqual(r4.stop, -7) - self.assertEqual(r4.right, -15) - self.assertEqual(r4.left, -8) - self.assert_(not r4.ascending) - self.assertEqual(r4.step, -1) - - def test_bad_input(self): - with tm.assertRaisesRegexp(TypeError, 'Need to pass integral values'): - RangeIndex(0, 1.25) - - with tm.assertRaisesRegexp(ValueError, 'invalid literal'): - RangeIndex(0, 'a') - - with tm.assertRaisesRegexp(TypeError, 'Need to pass integral values'): - RangeIndex('0', '5') - - - def test_contains(self): - - r = RangeIndex(9, 4) - r2 = RangeIndex(5, 10) - for i in range(5, 10): - self.assert_(i in r, i) - self.assert_(i in r2, i) - - def test_empty(self): - assert np.array_equal(RangeIndex(5, 5), Index([], dtype='int64')) - - def test_asarray(self): - # TODO: Remove this SkipTest once Index is not a subclass of ndarray - raise nose.SkipTest("This test case cannot work until Index is no " - "longer a subclass of ndarray") - # __array__ - self.assert_(np.array_equal(np.asarray(RangeIndex(1, 0)), - np.array([1]))) - self.assert_(np.array_equal(np.asarray(RangeIndex(0, 100)), - np.arange(0, 100))) - self.assert_(np.array_equal(RangeIndex(1, 0).values, np.array([1]))) - self.assert_(np.array_equal(RangeIndex(0, 100).values, - np.arange(0, 100))) - - def test_set_ops(self): - r1 = RangeIndex(1, 10) - r2 = RangeIndex(5, 10) - self.assert_(r1._overlaps(r2)) - self.assert_(r2._overlaps(r1)) - # union and intersection - underlying methods) - self.assert_(r1.intersection(r2).equals(RangeIndex(5, 10))) - self.assert_(r2.intersection(r1).equals(r1.intersection(r2))) - self.assert_((r1 & r2).equals(RangeIndex(5, 10))) - self.assert_((r2 & r1).equals((r1 & r2))) - self.assert_(r1.union(r2).equals(r2.union(r1))) - self.assert_(r1.union(r2).equals(r1)) - # union and intersection - with infix operators) - self.assert_((r1 + r2).equals((r2 + r1))) - self.assert_((r1 + r2).equals(r1)) - self.assert_((r1 | r2).equals((r2 | r1))) - self.assert_((r1 | r2).equals(r1)) - - # difference - underlying method) - self.assert_(r1.difference(r2).equals(RangeIndex(1, 5))) - self.assert_(r2.difference(r1).equals(Index([], dtype='int64'))) - self.assert_(r1.difference(r1).equals(Index([], dtype='int64'))) - self.assert_(r2.difference(r2).equals(Index([], dtype='int64'))) - # difference - with infix operator) - self.assert_((r1 - r2).equals(RangeIndex(1, 5))) - self.assert_((r2 - r1).equals(Index([], dtype='int64'))) - self.assert_((r1 - r1).equals(Index([], dtype='int64'))) - self.assert_((r2 - r2).equals(Index([], dtype='int64'))) - - def test_getitem_and_iter(self): - # basic container ops - pairs = [(RangeIndex(-10, -5), lrange(-10, -5)), - (RangeIndex(8, 5), lrange(8, 5, -1)), - (RangeIndex(0, 10), lrange(0, 10)), - (RangeIndex(-5, 5), lrange(-5, 5)), - (RangeIndex(3, -15), lrange(3, -15, -1))] - - for ind, rng in pairs: - try: - self.assertEqual(len(ind), len(rng)) - for i in range(len(rng)): - self.assertEqual(ind[i], rng[i]) - self.assertEqual(ind[-i], rng[-i]) - except: - print(i, ind, ind[i]) - print(i, rng, rng[i]) - raise - # basic __iter__ test - assert_almost_equal(list(ind), rng) - assert np.array_equal(ind.values, np.array(list(rng))) - - cases = 10 - for ind in zip(*pairs)[0]: - length = len(ind) - # edges - self.assertRaises(IndexError, lambda: ind[length]) - self.assertRaises(IndexError, lambda: ind[-length - 1]) - for _ in range(cases): - i = np.random.randint(1, 100) - self.assertRaises(IndexError, lambda: ind[length + i]) - self.assertRaises(IndexError, lambda: ind[-length - 1 - i]) - - def test_slicing(self): - pairs = [(RangeIndex(-10, -5), lrange(-10, -5)), # can remove later - (RangeIndex(8, 5), np.arange(8, 5, -1)), - (RangeIndex(0, 10), lrange(0, 10)), # can remove later - (RangeIndex(-3, 3), np.arange(-3, 3)), - (RangeIndex(3, -2), np.arange(3, -2, -1))] - # TODO: This is incredibly slow - pick something smaller to work with - for ind, rng in pairs: - assert_almost_equal(ind[:], rng[:]) - for i, j in [(i, j) for i in range(len(rng)) - for j in range(len(rng)) if i >= j]: - assert_almost_equal(ind[i:], rng[i:]) - assert_almost_equal(ind[:i], rng[:i]) - assert_almost_equal(ind[-i:], rng[-i:]) - assert_almost_equal(ind[:-i], rng[:-i]) - - assert_almost_equal(ind[i:j], rng[i:j]) - assert_almost_equal(ind[i:-j], rng[i:-j]) - assert_almost_equal(ind[-i:-j], rng[-i:-j]) - assert_almost_equal(ind[-i:j], rng[-i:j]) - - assert_almost_equal(ind[j:i], rng[j:i]) - assert_almost_equal(ind[j:-i], rng[j:-i]) - assert_almost_equal(ind[-j:-i], rng[-j:-i]) - assert_almost_equal(ind[-j:i], rng[-j:i]) - assert_almost_equal(ind[0:0], Index([], dtype='int64')) - assert_almost_equal(ind[8:8], Index([], dtype='int64')) - - def test_slicing_with_step_of_1(self): - # [::-1] yields self but reversed - rng1 = RangeIndex(-5, 5) - rev1 = rng1[::-1] - self.assertEqual(list(rev1), list(range(4, -6, -1))) - self.assert_(rev1.equals(RangeIndex(4, -6))) - self.assert_(rev1.equals(Index(np.arange(4, -6, -1)))) - - rng2 = RangeIndex(20, 10) - rev2 = rng2[::-1] - self.assertEqual(list(rev2), list(range(11, 21, 1))) - self.assert_(rev2.equals(RangeIndex(11, 21))) - self.assert_(rev2.equals(Index(np.arange(11, 21, 1)))) - - def test_slicing_with_other_steps(self): - pass - - def test_immutable(self): - # setitem - # setslice - pass - -# -# PandasObject properties -# - - def test_copy_and_view(self): - # shallow / deep copy should be same - pass - - def test_is__continuity(self): - # is should work on views/copies - # is should not work with two separately constructed indices - # is should be False when reversed or sliced - pass - - def test_equals(self): - # should work on views/copies - # should be equal when separately constructed - # should not be equal when reversed/reduced/etc - pass - - def test_error_on_bool(self): - self.assertRaises(ValueError, bool, RangeIndex(1, 5)) - self.assertRaises(ValueError, bool, RangeIndex(-10, -9)) - self.assertRaises(ValueError, bool, RangeIndex(1, 2)) - - def test_all_and_any(self): - zero_only = [RangeIndex(0, 1), RangeIndex(0, -1)] - assert not any(x.any() for x in zero_only) - assert not any(x.all() for x in zero_only) - assert RangeIndex(5, 10).any() - assert RangeIndex(5, 10).all() - assert not RangeIndex(-5, 5).all() - assert RangeIndex(-5, 5).any() - assert RangeIndex(-3, -1).any() - assert not RangeIndex(-3, 1).all() - assert RangeIndex(-3, 0).all() - - # - # indexing ops - # - def test_get_indexer(self): - idx1 = RangeIndex(1, 5) - # TODO: Consider supporting steps - idx2 = Index([2, 4, 6]) - idx3 = Index([1, 6, 7, 1, 2]) - idx4 = Index([-4, 3, 6]) - - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, [1, 3, -1]) - - r1 = idx1.get_indexer(idx3) - assert_almost_equal(r1, np.array([0, -1, -1, 0, 1])) - - assert_almost_equal(idx1.get_indexer(idx4), np.array([-1, 2, -1])) - - r1 = idx1.get_indexer(idx4, method='pad') - assert_almost_equal(r1, np.array([-1, 2, 3])) - - # synonym - rffill1 = idx1.get_indexer(idx4, method='ffill') - assert_almost_equal(r1, rffill1) - - r1 = idx1.get_indexer(idx4, method='backfill') - assert_almost_equal(r1, np.array([0, 2, -1])) - - rbfill1 = idx1.get_indexer(idx3, method='bfill') - assert_almost_equal(r1, rbfill1) - - # r1 = idx3.get_indexer(idx1, method='pad') - # assert_almost_equal(r1, [0, 0, 0, 0, 0]) - - # rffill1 = idx3.get_indexer(idx1, method='ffill') - - # r1 = idx3.get_indexer(idx1, method='backfill') - # assert_almost_equal(r1, [0, -1, -1, -1, -1]) - - # rbfill1 = idx3.get_indexer(idx1, method='bfill') - # assert_almost_equal(r1, rbfill1) - - def test_range_index_from_range(self): - def assert_fails(inpt): - res = RangeIndex.possibly_convert_array(inpt) - assert res is None, "Expected %r to return None" % inpt - - def assert_converts(inpt, expected): - res = RangeIndex.possibly_convert_array(inpt) - assert expected.equals(res), "With input %r, %r != %r" % (inpt, res, - expected) - assert_converts(range(5), RangeIndex(0, 5)) - assert_fails([1, 3, 7, 5]) - assert_fails([4, 10, 11, 13]) - assert_converts(np.arange(50, 40, -1), RangeIndex(50, 40)) - assert_converts([0], RangeIndex(0, 1)) - assert_fails([]) - - # dupe values - assert_fails([10, 9, 8, 7, 10]) - assert_fails([1, 2, 3, 4, 5, 7]) - - # should not try to convert dtype (caller responsibility) - arr = np.arange(5, 15) - assert_converts(arr, RangeIndex(5, 15)) - assert_fails(arr.astype(float)) - - # works with resort - assert_fails([-10, -5, -6, -7, -2, -3, -4, -8, -9]) - assert_fails([9, 8, 5, 7, 6]) - - # possibilities that *won't* work now but could in the future - # (i.e., nested ranges, steps) - assert_fails([15, 13, 11, 9, 7, 5]) - assert_fails([1, 2, 3, 8, 9, 10]) - assert_fails([2, 4, 6, 8, 10, 12]) - - def test_nonzero(self): - r1 = RangeIndex(0, 5) - a1 = np.arange(0, 5) - assert_almost_equal(r1.nonzero(), a1.nonzero()) - r2 = RangeIndex(5, 0) - a2 = np.arange(5, 0, -1) - assert_almost_equal(r2.nonzero(), a2.nonzero()) - assert_almost_equal(RangeIndex(-10, -5).nonzero(), - np.arange(-10, -5).nonzero()) - - def test_get_loc(self): - pass - - def test_groupby(self): - pass - - def test_slice_locs(self): - idx = RangeIndex(0, 11) - n = len(idx) - - self.assertEquals(idx.slice_locs(start=2), (2, n)) - self.assertEquals(idx.slice_locs(start=3), (3, n)) - self.assertEquals(idx.slice_locs(3, 8), (3, 8)) - self.assertEquals(idx.slice_locs(5, 10), (3, n)) - self.assertEquals(idx.slice_locs(end=8), (0, 8)) - self.assertEquals(idx.slice_locs(end=9), (0, 9)) - # monotonic *increasing* indexes allow slice_locs that aren't in the Index - self.assertEquals(idx.slice_locs(-5, 50), (0, 11)) - self.assertRaises(KeyError, lambda : idx[::-1].slice_locs(-5, 50)) - - idx2 = RangeIndex(5, 1) - self.assertRaises(KeyError, idx2.slice_locs, 8, 2) - self.assertRaises(KeyError, idx2.slice_locs, 7, 3) - - # - # Index inference - # - - @knownfail - def test_sorted_index_yields_range(self): - ind = Index(range(10)) - assert isinstance(ind, RangeIndex) - assert ind.equals(RangeIndex(0, 10)) - ind = Index(range(15, -1, -1)), - assert isinstance(ind, RangeIndex) - assert ind.equals(RangeIndex(15, -1)) - ind = Index([1, 3, 5, 7]) - assert not isinstance(ind, RangeIndex) - ind = Index(range(5) + [6]) - assert not isinstance(ind, RangeIndex) - ind = Index([1, 3, 2, 4, 5]) - assert not isinstance(ind, RangeIndex) - ind = Index(np.arange(0, 10).astype(float)) - assert not isinstance(ind, RangeIndex) - - -class TestRangeIndexInt64Compat(tm.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - self.index = RangeIndex(10, 20) - - def test_too_many_names(self): - with tm.assertRaisesRegexp(ValueError, "^Length"): - self.index.names = ["roger", "harold"] - - def test_constructor(self): - # TODO: Fill this in - raise AssertionError("Decide what to do here!") - # scalar raise Exception - self.assertRaises(TypeError, RangeIndex, 5) - - def test_basic_properties(self): - self.assertTrue(self.index.is_unique) - self.assertTrue(self.index.is_monotonic) - self.assertEqual(self.index.dtype, np.int64) - - def test_basic_functions(self): - self.assertTrue(self.index.is_numeric()) - self.assertTrue(self.index.is_integer()) - self.assertTrue(self.index.holds_integer()) - self.assertFalse(self.index.is_mixed()) - self.assertFalse(self.index.is_floating()) - - self.assertEqual(self.index.nlevels, 1) - self.assertEqual(self.index.inferred_type, 'integer') - self.assertEqual(self.get_duplicates(), []) - - def test_hash_error(self): - with tm.assertRaisesRegexp(TypeError, - "unhashable type: %r" % - type(self.index).__name__): - hash(self.index) - - def test_copy(self): - i = RangeIndex(0, 1, name='Foo') - i_copy = i.copy() - self.assert_(i_copy.name == 'Foo') - - def test_view(self): - i = RangeIndex(0, 1, name='Foo') - i_view = i.view() - self.assert_(i_view.name == 'Foo') - - def test_dtype(self): - self.assert_(self.index.dtype == np.int64) - - def test_is_monotonic(self): - # monotonic is monotonically *increasing* - self.assertTrue(RangeIndex(0, 5).is_monotonic) - self.assertFalse(RangeIndex(5, 0).is_monotonic) - self.assertFalse(RangeIndex(-5, 5)[::-1].is_monotonic) - # TODO: If you have empty Index, need to match regular Index - # self.assertTrue(RangeIndex(0, 0).is_monotonic) - - def test_equals(self): - same_values = Index(self.index, dtype=object) - self.assert_(self.index.equals(same_values)) - self.assert_(same_values.equals(self.index)) - - def test_identical(self): - i = self.index.copy() - same_values = RangeIndex(i.left, i.right, i.step, name=i.name) - self.assert_(i.identical(same_values)) - int64_values = Int64Index(list(i), name=i.name) - self.assertFalse(i.identical(int64_values)) - self.assertFalse(int64_values.identical(i)) - - i = self.index.copy() - i = i.rename('foo') - same_values = RangeIndex(i.left, i.right, i.step) - # no name passed through constructor - self.assert_(same_values.identical(self.index)) - self.assertFalse(i.identical(same_values)) - - def test_get_indexer(self): - def test_indexer(target, expected): - indexer = self.index.get_indexer(target) - self.assert_(np.array_equal(indexer, expected)) - - test_indexer(RangeIndex(-5, 5), - np.array([-1] * 10)) - - test_indexer(RangeIndex(5, 15), - np.array([-1, -1, -1, -1, -1, 0, 1, 2, 3, 4])) - - test_indexer(Index(list('abcd') + [11]), - np.array([-1, -1, -1, -1, 1])) - - test_indexer(Index([0.5, 0.25, 1, 18.0, 17]), - np.array([-1, -1, -1, 8, 7])) - - def test_get_indexer_fails_non_monotonic(self): - ind = RangeIndex(10, 5, -1) - - with tm.assertRaisesRegexp(ValueError, 'monotonic for backward fill'): - ind.get_indexer(Index([0]), method='bfill') - with tm.assertRaisesRegexp(ValueError, 'monotonic for backward fill'): - ind.get_indexer(Index([0]), method='backfill') - - with tm.assertRaisesRegexp(ValueError, 'monotonic for forward fill'): - ind.get_indexer(Index([0]), method='ffill') - with tm.assertRaisesRegexp(ValueError, 'monotonic for forward fill'): - ind.get_indexer(Index([0]), method='pad') - - def test_get_indexer_pad_int64_with_range(self): - # TODO: Move this over to Int64Index tests instead - target = RangeIndex(0, 10) - idx = Index(range(0, 20, 2)) - indexer = idx.get_indexer(target, method='pad') - expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) - self.assert_(np.array_equal(indexer, expected)) - - def test_get_indexer_pad(self): - idx = RangeIndex(-3, 0) - target = Index([-2, -1, 0, 1, 2]) - indexer = idx.get_indexer(target, method='pad') - expected = np.array([1, 2, 2, 2, 2]) - self.assert_(np.array_equal(indexer, expected)) - - target2 = Index([-4, -2, 1]) - indexer = idx.get_indexer(target2, method='pad') - expected = np.array([-1, 1, 2]) - self.assert_(np.array_equal(indexer, expected)) - - def test_get_indexer_backfill_int64_with_range(self): - # TODO: Move this over to Int64Index tests instead - target = RangeIndex(0, 10) - idx = Index(range(0, 20, 2)) - indexer = idx.get_indexer(target, method='backfill') - expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) - self.assert_(np.array_equal(indexer, expected)) - - def test_get_indexer_backfill(self): - idx = RangeIndex(3, 5) - target = Index([-4, -2, 3, 4, 5, 7]) - indexer = idx.get_indexer(target, method='backfill') - expected = np.array([0, 0, 1, 2, -1, -1]) - self.assert_(np.array_equal(indexer, expected)) - - # # TODO: Decide on ffill, bfill, pad for NON-integer with RangeIndex... - - def test_join_outer(self): - # TODO: Convert this to RangeIndex formatted - # 1. Write tests for take - # 2. Make sure this works with return indexers (which are just args to - # take). - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - # guarantee of sortedness - res, lidx, ridx = self.index.join(other, how='outer', - return_indexers=True) - noidx_res = self.index.join(other, how='outer') - self.assert_(res.equals(noidx_res)) - - eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) - elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], - dtype=np.int64) - eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], - dtype=np.int64) - - tm.assert_isinstance(res, Int64Index) - self.assert_(res.equals(eres)) - self.assert_(np.array_equal(lidx, elidx)) - self.assert_(np.array_equal(ridx, eridx)) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='outer', - return_indexers=True) - noidx_res = self.index.join(other_mono, how='outer') - self.assert_(res.equals(noidx_res)) - - eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], - dtype=np.int64) - tm.assert_isinstance(res, Int64Index) - self.assert_(res.equals(eres)) - self.assert_(np.array_equal(lidx, elidx)) - self.assert_(np.array_equal(ridx, eridx)) - - def test_join_inner(self): - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = self.index.join(other, how='inner', - return_indexers=True) - - # no guarantee of sortedness, so sort for comparison purposes - ind = res.argsort() - res = res.take(ind) - lidx = lidx.take(ind) - ridx = ridx.take(ind) - - eres = Int64Index([2, 12]) - elidx = np.array([1, 6]) - eridx = np.array([4, 1]) - - tm.assert_isinstance(res, Int64Index) - self.assert_(res.equals(eres)) - self.assert_(np.array_equal(lidx, elidx)) - self.assert_(np.array_equal(ridx, eridx)) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='inner', - return_indexers=True) - - res2 = self.index.intersection(other_mono) - self.assert_(res.equals(res2)) - - eridx = np.array([1, 4]) - tm.assert_isinstance(res, Int64Index) - self.assert_(res.equals(eres)) - self.assert_(np.array_equal(lidx, elidx)) - self.assert_(np.array_equal(ridx, eridx)) - - def test_join_left(self): - # TODO: Convert this to RangeIndex formatted - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = self.index.join(other, how='left', - return_indexers=True) - eres = self.index - eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], - dtype=np.int64) - - tm.assert_isinstance(res, Int64Index) - self.assert_(res.equals(eres)) - self.assert_(lidx is None) - self.assert_(np.array_equal(ridx, eridx)) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='left', - return_indexers=True) - eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], - dtype=np.int64) - tm.assert_isinstance(res, Int64Index) - self.assert_(res.equals(eres)) - self.assert_(lidx is None) - self.assert_(np.array_equal(ridx, eridx)) - - # non-unique - """ - idx = Index([1,1,2,5]) - idx2 = Index([1,2,5,7,9]) - res, lidx, ridx = idx2.join(idx, how='left', return_indexers=True) - eres = idx2 - eridx = np.array([0, 2, 3, -1, -1]) - elidx = np.array([0, 1, 2, 3, 4]) - self.assert_(res.equals(eres)) - self.assert_(np.array_equal(lidx, elidx)) - self.assert_(np.array_equal(ridx, eridx)) - """ - - def test_join_right(self): - # TODO: Convert this to RangeIndex formatted - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = self.index.join(other, how='right', - return_indexers=True) - eres = other - elidx = np.array([-1, 6, -1, -1, 1, -1], - dtype=np.int64) - - tm.assert_isinstance(other, Int64Index) - self.assert_(res.equals(eres)) - self.assert_(np.array_equal(lidx, elidx)) - self.assert_(ridx is None) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='right', - return_indexers=True) - eres = other_mono - elidx = np.array([-1, 1, -1, -1, 6, -1], - dtype=np.int64) - tm.assert_isinstance(other, Int64Index) - self.assert_(res.equals(eres)) - self.assert_(np.array_equal(lidx, elidx)) - self.assert_(ridx is None) - - # non-unique - """ - idx = Index([1,1,2,5]) - idx2 = Index([1,2,5,7,9]) - res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True) - eres = idx2 - elidx = np.array([0, 2, 3, -1, -1]) - eridx = np.array([0, 1, 2, 3, 4]) - self.assert_(res.equals(eres)) - self.assert_(np.array_equal(lidx, elidx)) - self.assert_(np.array_equal(ridx, eridx)) - - idx = Index([1,1,2,5]) - idx2 = Index([1,2,5,9,7]) - res = idx.join(idx2, how='right', return_indexers=False) - eres = idx2 - self.assert(res.equals(eres)) - """ - - def test_join_non_int_index(self): - # UPDATED - idx = RangeIndex(0, 15) - other = Index([3, 6, 7, 8, 10], dtype=object) - - outer = idx.join(other, how='outer') - outer2 = other.join(idx, how='outer') - expected = Index(range(0, 15), dtype=object) - self.assert_(outer.equals(outer2)) - self.assert_(outer.equals(expected)) - - inner = idx.join(other, how='inner') - inner2 = other.join(idx, how='inner') - expected = other.copy() # avoid is_ stuff - self.assert_(inner.equals(inner2)) - self.assert_(inner.equals(expected)) - - idx2 = RangeIndex(0, 4) - inner3 = idx2.join(other, how='inner') - inner4 = other.join(idx2, how='inner') - expected = Index([3], dtype=object) - self.assert_(inner3.equals(inner4)) - self.assert_(inner3.equals(expected)) - - - left = idx.join(other, how='left') - self.assert_(left.equals(idx)) - - left2 = other.join(idx, how='left') - self.assert_(left2.equals(other)) - - right = idx.join(other, how='right') - self.assert_(right.equals(other)) - - right2 = other.join(idx, how='right') - self.assert_(right2.equals(idx)) - - def test_join_self(self): - # UPDATED - idx = RangeIndex(-10, -4) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = idx.join(idx, how=kind) - self.assert_(self.index is joined) - - def test_intersection(self): - # TODO: Convert this to RangeIndex formatted - other = Index([1, 2, 3, 4, 5]) - result = self.index.intersection(other) - expected = np.sort(np.intersect1d(self.index.values, other.values)) - self.assert_(np.array_equal(result, expected)) - - result = other.intersection(self.index) - expected = np.sort(np.asarray(np.intersect1d(self.index.values, - other.values))) - self.assert_(np.array_equal(result, expected)) - - def test_union_noncomparable(self): - # TODO: Convert this to RangeIndex formatted - from datetime import datetime, timedelta - # corner case, non-Int64Index - now = datetime.now() - other = Index([now + timedelta(i) for i in range(4)], dtype=object) - result = self.index.union(other) - expected = np.concatenate((self.index, other)) - self.assert_(np.array_equal(result, expected)) - - result = other.union(self.index) - expected = np.concatenate((other, self.index)) - self.assert_(np.array_equal(result, expected)) - - # def test_view_Index(self): - # self.index.view(Index) - - def test_prevent_casting(self): - # TODO: Convert this to RangeIndex formatted - result = self.index.astype('O') - self.assert_(result.dtype == np.object_) - - def test_take_preserve_name(self): - # TODO: Convert this to RangeIndex formatted - index = RangeIndex(1, 4, name='foo') - taken = index.take([3, 0, 1]) - self.assertEqual(index.name, taken.name) - - def test_int_name_format(self): - from pandas import Series, DataFrame - index = RangeIndex(3, 0, -1, name=0) - s = Series(lrange(3), index) - df = DataFrame(lrange(3), index=index) - repr(s) - repr(df) - - def test_repr_roundtrip(self): - tm.assert_index_equal(eval(repr(self.index)), self.index) - - def test_unicode_string_with_unicode(self): - idx = RangeIndex(0, 1000) - - if compat.PY3: - str(idx) - else: - compat.text_type(idx) - - def test_bytestring_with_unicode(self): - idx = RangeIndex(0, 1000) - if compat.PY3: - bytes(idx) - else: - str(idx) - - def test_slice_keep_name(self): - idx = RangeIndex(1, 3, name='asdf') - self.assertEqual(idx.name, idx[1:].name) - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs'], - exit=False) From 197cd54bec0395fbfee5e594d3faa9e353ad5590 Mon Sep 17 00:00:00 2001 From: ARF Date: Thu, 23 Apr 2015 20:16:48 +0200 Subject: [PATCH 10/13] Re-implementation of RangeIndex() RangeIndex(1, 10, 2) is a memory saving alternative to Index(np.arange(1, 10,2)). This implementation is compatible with the current Index() api and is a drop-in replacement for Int64Index(). It automatically converts to Int64Index() when required by operations. --- pandas/core/api.py | 2 +- pandas/core/common.py | 5 + pandas/core/index.py | 345 +++++++++++++++++++++++- pandas/tests/test_index.py | 526 ++++++++++++++++++++++++++++++++++++- 4 files changed, 870 insertions(+), 8 deletions(-) diff --git a/pandas/core/api.py b/pandas/core/api.py index fde9bc77c4bd9..103fe740cfa36 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -8,7 +8,7 @@ from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.core.format import set_eng_float_format -from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex +from pandas.core.index import Index, CategoricalIndex, Int64Index, RangeIndex, Float64Index, MultiIndex from pandas.core.series import Series, TimeSeries from pandas.core.frame import DataFrame diff --git a/pandas/core/common.py b/pandas/core/common.py index 3d23aeff942dc..878b1af078d4d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2498,6 +2498,11 @@ def is_integer_dtype(arr_or_dtype): not issubclass(tipo, (np.datetime64, np.timedelta64))) +def is_int64_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.int64) + + def is_int_or_datetime_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, np.integer) or diff --git a/pandas/core/index.py b/pandas/core/index.py index 8b650fea9b440..9a6dbb1f35205 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -7,6 +7,7 @@ from pandas.compat import range, zip, lrange, lzip, u, reduce, filter, map from pandas import compat import numpy as np +from math import ceil, floor from sys import getsizeof import pandas.tslib as tslib @@ -21,7 +22,7 @@ from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, is_object_dtype, _values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype, ABCSeries, ABCCategorical, _ensure_object, _ensure_int64, is_bool_indexer, - is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype) + is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype, is_int64_dtype) from pandas.core.config import get_option from pandas.io.common import PerformanceWarning @@ -107,13 +108,36 @@ class Index(IndexOpsMixin, PandasObject): def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): + # RangeIndex pass-through + # Index(start, stop, ...) --> RangeIndex(start, stop, ...) + if isinstance(data, int): + if dtype is None and copy == False: + copy = None + range_constructor = True + elif isinstance(dtype, int): + range_constructor = True + if copy == False: + copy = None + elif isinstance(copy, int): + range_constructor = True + else: + range_constructor = False + + if range_constructor: + return RangeIndex(data, dtype, copy, name) + + # no class inference! if fastpath: return cls._simple_new(data, name) from pandas.tseries.period import PeriodIndex if isinstance(data, (np.ndarray, Index, ABCSeries)): - if issubclass(data.dtype.type, np.datetime64): + if (isinstance(data, RangeIndex) and + (dtype is None or is_int64_dtype(dtype))): + # copy passed-in RangeIndex + return data.copy(name=name) + elif issubclass(data.dtype.type, np.datetime64): from pandas.tseries.index import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: @@ -3299,6 +3323,323 @@ def _wrap_joined_index(self, joined, other): Int64Index._add_logical_methods() +class RangeIndex(Int64Index): + + """ + Immutable Index implementing an monotonic range. RangeIndex is a + memory-saving special case of `Int64Index` limited to representing + monotonic ranges. + + Parameters + ---------- + start : int (default: 0) + stop : int (default: 0) + step : int (default: 1) + name : object, optional + Name to be stored in the index + """ + + _typ = 'rangeindex' + _engine_type = _index.Int64Engine + _attributes = ['name', 'start', 'stop', 'step'] + + def __new__(cls, start=None, stop=None, step=None, name=None, fastpath=False): + if fastpath: + return cls._simple_new(start, stop, step, name=name) + + # RangeIndex() constructor + if start is None and stop is None and step is None: + start, stop, step = (0, 0, 1) + + # sort the arguments depending on which are provided + if step is None: + step = 1 + if stop is None: + stop = start + start = 0 + + # check validity of inputs + if (not isinstance(start, int) or + not isinstance(stop, int) or + not isinstance(step, int)): + raise TypeError("Need to pass integral values") + elif step == 0: + raise ValueError("Step must not be zero") + + return cls._simple_new(start, stop, step, name) + + @classmethod + def _simple_new(cls, start, stop, step, name=None): + result = object.__new__(cls) + result.start = start + result.stop = stop + result.step = step + result.name = name + return result + + @property + def _data(self): + return np.arange(self.start, self.stop, self.step, dtype=np.int64) + + @cache_readonly(allow_setting=True) + def is_unique(self): + """ return if the index has unique values """ + return True + + @property + def has_duplicates(self): + return not self.is_unique + + def tolist(self): + return list(range(self.start, self.stop, self.step)) + + def _shallow_copy(self, values=None, **kwargs): + """ create a new Index, don't copy the data, use the same object attributes + with passed in attributes taking precedence """ + if values is None: + return RangeIndex(self.start, self.stop, self.step, self.name) + else: + name = kwargs.get('name', self.name) + return Int64Index(self.values, name=name, copy=False)._shallow_copy(values, **kwargs) + + def copy(self, names=None, name=None, dtype=None, deep=False): + """ + Make a copy of this object. Name and dtype sets those attributes on + the new object. + + Parameters + ---------- + name : string, optional + dtype : numpy dtype or pandas type + + Returns + ------- + copy : Index + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + """ + if dtype is not None and not is_int64_dtype(dtype): + return super(RangeIndex, self).copy(names, name, dtype, deep) + + if name is None: + name = self.name + return RangeIndex(self.start, self.stop, self.step, name) + + def argsort(self, *args, **kwargs): + """ + return an ndarray indexer of the underlying data + + See also + -------- + numpy.ndarray.argsort + """ + return self._data.argsort(*args, **kwargs) + + def __repr__(self): + attrs = [('start', default_pprint(self.start)), + ('stop', default_pprint(self.stop)), + ('step', default_pprint(self.step)), + ('name', default_pprint(self.name))] + + prepr = u(", ").join([u("%s=%s") % (k, v) + for k, v in attrs]) + res = u("%s(%s)") % (self.__class__.__name__, prepr) + + if not compat.PY3: + # needs to be str in Python 2 + encoding = get_option('display.encoding') + res = res.encode(encoding) + return res + + def __unicode__(self): + """ + Return a string representation for this object. + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + if self.start != 0 or self.step != 1: + start = u('%s, ') % default_pprint(self.start) + else: + start = u('') + stop = default_pprint(self.stop) + step = u('') if self.step == 1 else u(', %s') % default_pprint(self.step) + if self.name is None: + name = u('') + else: + name = u(', name=%s') % default_pprint(self.name) + + res = u("%s(%s%s%s%s)") % (self.__class__.__name__, + start, stop, step, name) + return res + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + elif isinstance(other, RangeIndex): + return (self.start == other.start and + self.stop == other.stop and + self.step == other.step) + + try: + return array_equivalent(_values_from_object(self), + _values_from_object(other)) + except TypeError: + # e.g. fails in numpy 1.6 with DatetimeIndex #1681 + return False + + def __reduce__(self): + d = self._get_attributes_dict() + return _new_Index, (self.__class__, d), None + + def view(self, cls=None): + if cls is None or is_int64_dtype(cls): + return self + else: + result = self._shallow_copy() + if isinstance(result, Index): + result._id = self._id + return result + + def intersection(self, other): + """ + Form the intersection of two Index objects. Sortedness of the result is + not guaranteed + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + intersection : Index + """ + if not isinstance(other, RangeIndex): + return super(RangeIndex, self).intersection(other) + + # check whether intervals intersect + # deals with in- and decreasing ranges + int_low = max(min(self.start, self.stop+1), + min(other.start, other.stop+1)) + int_high = min(max(self.stop, self.start+1), + max(other.stop, other.start+1)) + if int_high <= int_low: + return RangeIndex() + + ### Method hint: linear Diophantine equation + # solve intersection + # perf: for identical step sizes, could use cheaper alternative + gcd, s, t = self._extended_gcd(self.step, other.step) + + # check whether element sets intersect + if (self.start - other.start) % gcd: + return RangeIndex() + + # calculate parameters for the RangeIndex describing the intersection + # disregarding the lower bounds + tmp_start = self.start + (other.start-self.start)*self.step/gcd*s + new_step = self.step * other.step / gcd + assert new_step == int(new_step) + new_index = RangeIndex(tmp_start, int_high, new_step) + + # adjust index to limiting interval + new_index.start = new_index._min_fitting_element(int_low) + return new_index + + def _min_fitting_element(self, lower_limit): + """Returns the value of the smallest element greater than the limit""" + round = ceil if self.step > 0 else floor + no_steps = round( (float(lower_limit)-self.start) / self.step ) + return self.start + self.step * no_steps + + def _max_fitting_element(self, upper_limit): + """Returns the value of the largest element smaller than the limit""" + round = floor if self.step > 0 else ceil + no_steps = round( (float(upper_limit)-self.start) / self.step ) + return self.start + self.step * no_steps + + def _extended_gcd(self, a, b): + """ + Extended Euclidean algorithms to solve Bezout's identity: + a*x + b*y = gcd(x, y) + Finds one particular solution for x, y: s, t + Returns: gcd, s, t + """ + s, old_s = 0, 1 + t, old_t = 1, 0 + r, old_r = b, a + while r: + quotient = old_r / r + old_r, r = r, old_r - quotient * r + old_s, s = s, old_s - quotient * s + old_t, t = t, old_t - quotient * t + return old_r, old_s, old_t + + def union(self, other): + """ + Form the union of two Index objects and sorts if possible + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + union : Index + """ + # note: could return a RangeIndex in some circumstances + return Int64Index(self.values, copy=False).union(other) + + def join(self, other, how='left', level=None, return_indexers=False): + """ + *this is an internal non-public method* + + Compute join_index and indexers to conform data + structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : boolean, default False + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + if how == 'outer' and self is not other: + # note: could return RangeIndex in more circumstances + return Int64Index(self.values, copy=False).join(other, how, level, return_indexers) + + return super(RangeIndex, self).join(other, how, level, return_indexers) + + def _mul(self, other): + "__mul__() implementation" + try: + int_input = other == int(other) + if int_input: + other = int(other) + except Exception: + int_input = False + + if int_input == True and other != 0: + return RangeIndex(self.start*other, self.stop*other, self.step*other) + else: + return super(RangeIndex, self).__mul__(other) + +RangeIndex._add_numeric_methods() +RangeIndex.__mul__ = RangeIndex.__rmul__ = RangeIndex._mul +RangeIndex._add_logical_methods() + + class Float64Index(NumericIndex): """ diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 3c9dbd2e48cb6..201fa988e5859 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -13,7 +13,7 @@ from numpy.testing import assert_array_equal from pandas import (period_range, date_range, Categorical, Series, - Index, Float64Index, Int64Index, MultiIndex, + Index, Float64Index, Int64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex) from pandas.core.index import InvalidIndexError, NumericIndex from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, @@ -297,10 +297,6 @@ def test_constructor(self): # arr = np.array(5.) # self.assertRaises(Exception, arr.view, Index) - def test_constructor_corner(self): - # corner case - self.assertRaises(TypeError, Index, 0) - def test_constructor_from_series(self): expected = DatetimeIndex([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]) @@ -2447,6 +2443,526 @@ def test_slice_keep_name(self): idx = Int64Index([1, 2], name='asdf') self.assertEqual(idx.name, idx[1:].name) +class TestRangeIndex(Numeric, tm.TestCase): + _holder = RangeIndex + ### what does the following do? + #_multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index = RangeIndex(0, 20, 2)) + self.setup_indices() + + def create_index(self): + return RangeIndex(5) + + def test_too_many_names(self): + def testit(): + self.index.names = ["roger", "harold"] + assertRaisesRegexp(ValueError, "^Length", testit) + + def test_constructor(self): + index = RangeIndex(5) + expected = np.arange(5, dtype=np.int64) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex(1, 5) + expected = np.arange(1, 5, dtype=np.int64) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex(1, 5, 2) + expected = np.arange(1, 5, 2, dtype=np.int64) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex() + expected = np.empty(0, dtype=np.int64) + self.assert_numpy_array_equal(index, expected) + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = RangeIndex(1, 5) + self.assertEqual(index.values.dtype, np.int64) + self.assertTrue(index.equals(arr)) + + # non-int raise Exception + self.assertRaises(TypeError, RangeIndex, '1', '10', '1') + self.assertRaises(TypeError, RangeIndex, 1.0, 10.0, 1.0) + + # iterable raise Exception + self.assertRaises(TypeError, RangeIndex, iter([-5, 0, 1, 2])) + + def test_copy(self): + i = RangeIndex(5, name='Foo') + i_copy = i.copy() + self.assertEqual(i_copy.start, 0) + self.assertEqual(i_copy.stop, 5) + self.assertEqual(i_copy.step, 1) + self.assertEqual(i_copy.name, 'Foo') + + def test_view(self): + super(TestRangeIndex, self).test_view() + + i = RangeIndex(name='Foo') + i_view = i.view() + self.assertEqual(i_view.name, 'Foo') + + i_view = i.view('i8') + tm.assert_index_equal(i, i_view) + + i_view = i.view(RangeIndex) + tm.assert_index_equal(i, i_view) + + def test_index_constructor(self): + arr = Index(5) + tm.assert_isinstance(arr, RangeIndex) + + def test_dtype(self): + self.assertEqual(self.index.dtype, np.int64) + + def test_is_monotonic(self): + self.assertTrue(self.index.is_monotonic) + self.assertTrue(self.index.is_monotonic_increasing) + self.assertFalse(self.index.is_monotonic_decreasing) + + index = RangeIndex(4, 0, -1) + self.assertFalse(index.is_monotonic) + self.assertTrue(index.is_monotonic_decreasing) + + index = RangeIndex(1, 2) + self.assertTrue(index.is_monotonic) + self.assertTrue(index.is_monotonic_increasing) + self.assertTrue(index.is_monotonic_decreasing) + + def test_equals(self): + same_values = Index(self.index, dtype=object) + self.assertTrue(self.index.equals(same_values)) + self.assertTrue(same_values.equals(self.index)) + + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + + def test_identical(self): + i = Index(self.index.copy()) + self.assertTrue(i.identical(self.index)) + + same_values_different_type = Index(i, dtype=object) + self.assertFalse(i.identical(same_values_different_type)) + + i = self.index.copy(dtype=object) + i = i.rename('foo') + same_values = Index(i, dtype=object) + self.assertTrue(same_values.identical(self.index.copy(dtype=object))) + + self.assertFalse(i.identical(self.index)) + self.assertTrue(Index(same_values, name='foo', dtype=object + ).identical(i)) + + self.assertFalse( + self.index.copy(dtype=object) + .identical(self.index.copy(dtype='int64'))) + + def test_get_indexer(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='pad') + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='backfill') + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + self.assert_numpy_array_equal(indexer, expected) + + def test_join_outer(self): + ### join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]) + elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], + dtype=np.int64) + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + dtype=np.int64) + + tm.assert_isinstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + tm.assert_isinstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + ### Join with non-RangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9]) + eridx = np.array([9, 7]) + + tm.assert_isinstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + + def test_join_left(self): + ### Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + eres = self.index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], + dtype=np.int64) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + ### Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], + dtype=np.int64) + + tm.assert_isinstance(other, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + ### Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + + tm.assert_isinstance(other, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + def test_join_non_int_index(self): + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = self.index.join(other, how='outer') + outer2 = other.join(self.index, how='outer') + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, + 16, 18], dtype=object) + self.assertTrue(outer.equals(outer2)) + self.assertTrue(outer.equals(expected)) + + inner = self.index.join(other, how='inner') + inner2 = other.join(self.index, how='inner') + expected = Index([6, 8, 10], dtype=object) + self.assertTrue(inner.equals(inner2)) + self.assertTrue(inner.equals(expected)) + + left = self.index.join(other, how='left') + self.assertTrue(left.equals(self.index)) + + left2 = other.join(self.index, how='left') + self.assertTrue(left2.equals(other)) + + right = self.index.join(other, how='right') + self.assertTrue(right.equals(other)) + + right2 = other.join(self.index, how='right') + self.assertTrue(right2.equals(self.index)) + + def test_join_non_unique(self): + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = self.index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.int64) + + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = self.index.join(self.index, how=kind) + self.assertIs(self.index, joined) + + def test_intersection(self): + ### intersect with Int64Index + other = Index(np.arange(1, 6)) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + result = other.intersection(self.index) + expected = np.sort(np.asarray(np.intersect1d(self.index.values, + other.values))) + self.assert_numpy_array_equal(result, expected) + + ### intersect with increasing RangeIndex + other = Index(1, 6) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + ### intersect with decreasing RangeIndex + other = Index(5, 0, -1) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + self.assertEqual(len(res), 0) + + def test_union_noncomparable(self): + from datetime import datetime, timedelta + # corner case, non-Int64Index + now = datetime.now() + other = Index([now + timedelta(i) for i in range(4)], dtype=object) + result = self.index.union(other) + expected = np.concatenate((self.index, other)) + self.assert_numpy_array_equal(result, expected) + + result = other.union(self.index) + expected = np.concatenate((other, self.index)) + self.assert_numpy_array_equal(result, expected) + + def test_cant_or_shouldnt_cast(self): + # can't + self.assertRaises(TypeError, RangeIndex, 'foo', 'bar', 'baz') + + # shouldn't + self.assertRaises(TypeError, RangeIndex, '0', '1', '2') + + def test_view_Index(self): + self.index.view(Index) + + def test_prevent_casting(self): + result = self.index.astype('O') + self.assertEqual(result.dtype, np.object_) + + def test_take_preserve_name(self): + index = RangeIndex(1, 5, name='foo') + taken = index.take([3, 0, 1]) + self.assertEqual(index.name, taken.name) + + def test_int_name_format(self): + from pandas import Series, DataFrame + index = Index(3, name=0) + s = Series(lrange(3), index) + df = DataFrame(lrange(3), index=index) + repr(s) + repr(df) + + def test_print_unicode_columns(self): + df = pd.DataFrame( + {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_repr_roundtrip(self): + tm.assert_index_equal(eval(repr(self.index)), self.index) + + def test_unicode_string_with_unicode(self): + idx = Index(1000) + + if compat.PY3: + str(idx) + else: + compat.text_type(idx) + + def test_bytestring_with_unicode(self): + idx = Index(1000) + if compat.PY3: + bytes(idx) + else: + str(idx) + + def test_slice_keep_name(self): + idx = RangeIndex(1, 2, name='asdf') + self.assertEqual(idx.name, idx[1:].name) + + def test_numeric_compat(self): + idx = RangeIndex(5) + didx = Index(np.arange(5,dtype='int64')**2) + + # note: special cases of the following could return RangeIndex + # see _mul() example + + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx * idx + tm.assert_index_equal(result, didx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5,dtype='int64') + tm.assert_index_equal(result, Index(np.arange(5,dtype='int64')*5)) + + result = idx * np.arange(5,dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5,dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5,dtype='float64')+0.1) + tm.assert_index_equal(result, + Float64Index(np.arange(5,dtype='float64')*(np.arange(5,dtype='float64')+0.1))) + + # invalid + self.assertRaises(TypeError, lambda : idx * date_range('20130101',periods=5)) + self.assertRaises(ValueError, lambda : idx * self._holder(3)) + self.assertRaises(ValueError, lambda : idx * np.array([1,2])) + + def test_explicit_conversions(self): + + # GH 8608 + # add/sub are overriden explicity for Float/Int Index + idx = RangeIndex(5) + + # float conversions + arr = np.arange(5,dtype='int64')*3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx,expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx,expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5,dtype='float64') + result = fidx - a + tm.assert_index_equal(result,expected) + + expected = Float64Index(-arr) + a = np.zeros(5,dtype='float64') + result = a - fidx + tm.assert_index_equal(result,expected) + + def test_duplicates(self): + # RangeIndex has no duplicates by definition + pass + + def test_ufunc_compat(self): + idx = RangeIndex(5) + result = np.sin(idx) + expected = Float64Index(np.sin(np.arange(5,dtype='int64'))) + tm.assert_index_equal(result, expected) + + def test_extended_gcd(self): + result = self.index._extended_gcd(6, 10) + self.assertEqual(result[0], result[1]*6 + result[2]*10) + self.assertEqual(2, result[0]) + + result = self.index._extended_gcd(10, 6) + self.assertEqual(2, result[1]*10 + result[2]*6) + self.assertEqual(2, result[0]) + + def test_min_fitting_element(self): + result = RangeIndex(0, 20, 2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(1, 6)._min_fitting_element(1) + self.assertEqual(1, result) + + result = RangeIndex(18, -2, -2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(5, 0, -1)._min_fitting_element(1) + self.assertEqual(1, result) + + def test_max_fitting_element(self): + result = RangeIndex(0, 20, 2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(1, 6)._max_fitting_element(4) + self.assertEqual(4, result) + + result = RangeIndex(18, -2, -2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(5, 0, -1)._max_fitting_element(4) + self.assertEqual(4, result) + + def test_pickle_compat_construction(self): + # RangeIndex() is a valid constructor + pass + class DatetimeLike(Base): def test_view(self): From 4dab44ce5cc730b69cfa786119406258b3d33920 Mon Sep 17 00:00:00 2001 From: ARF Date: Thu, 23 Apr 2015 23:32:54 +0200 Subject: [PATCH 11/13] fix python3: use floor division --- pandas/core/index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 9a6dbb1f35205..a6545e7c3cc40 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -3544,8 +3544,8 @@ def intersection(self, other): # calculate parameters for the RangeIndex describing the intersection # disregarding the lower bounds - tmp_start = self.start + (other.start-self.start)*self.step/gcd*s - new_step = self.step * other.step / gcd + tmp_start = self.start + (other.start-self.start)*self.step//gcd*s + new_step = self.step * other.step // gcd assert new_step == int(new_step) new_index = RangeIndex(tmp_start, int_high, new_step) @@ -3576,7 +3576,7 @@ def _extended_gcd(self, a, b): t, old_t = 1, 0 r, old_r = b, a while r: - quotient = old_r / r + quotient = old_r // r old_r, r = r, old_r - quotient * r old_s, s = s, old_s - quotient * s old_t, t = t, old_t - quotient * t From 3d2e48d3c5fe6fb16e43b7d56cc618052e16f051 Mon Sep 17 00:00:00 2001 From: ARF Date: Fri, 24 Apr 2015 12:30:14 +0200 Subject: [PATCH 12/13] protect properties, fastpath-ing, specialised slicing, size, __len__() --- pandas/core/index.py | 120 ++++++++++++++++++++++++++++++++----- pandas/tests/test_index.py | 43 ++++++++++++- 2 files changed, 148 insertions(+), 15 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index a6545e7c3cc40..f5278ccc9cf6e 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -3349,7 +3349,7 @@ def __new__(cls, start=None, stop=None, step=None, name=None, fastpath=False): # RangeIndex() constructor if start is None and stop is None and step is None: - start, stop, step = (0, 0, 1) + return cls._simple_new(0, 0, 1, name=name) # sort the arguments depending on which are provided if step is None: @@ -3359,11 +3359,10 @@ def __new__(cls, start=None, stop=None, step=None, name=None, fastpath=False): start = 0 # check validity of inputs - if (not isinstance(start, int) or - not isinstance(stop, int) or - not isinstance(step, int)): - raise TypeError("Need to pass integral values") - elif step == 0: + start = cls._ensure_int(start) + stop = cls._ensure_int(stop) + step = cls._ensure_int(step) + if step == 0: raise ValueError("Step must not be zero") return cls._simple_new(start, stop, step, name) @@ -3371,16 +3370,54 @@ def __new__(cls, start=None, stop=None, step=None, name=None, fastpath=False): @classmethod def _simple_new(cls, start, stop, step, name=None): result = object.__new__(cls) - result.start = start - result.stop = stop - result.step = step + result._start = start + result._stop = stop + result._step = step result.name = name return result + @classmethod + def _ensure_int(cls, value): + try: + int_value = int(value) + if int_value != value: + raise Exception + except Exception: + raise TypeError("Need to pass integral values") + return int_value + @property def _data(self): return np.arange(self.start, self.stop, self.step, dtype=np.int64) + @property + def dtype(self): + return np.dtype(np.int64) + + @property + def start(self): + return self._start + + @start.setter + def start(self, value): + self._start = self._ensure_int(value) + + @property + def stop(self): + return self._stop + + @stop.setter + def stop(self, value): + self._stop = self._ensure_int(value) + + @property + def step(self): + return self._step + + @step.setter + def step(self, value): + self._step = self._ensure_int(value) + @cache_readonly(allow_setting=True) def is_unique(self): """ return if the index has unique values """ @@ -3397,7 +3434,8 @@ def _shallow_copy(self, values=None, **kwargs): """ create a new Index, don't copy the data, use the same object attributes with passed in attributes taking precedence """ if values is None: - return RangeIndex(self.start, self.stop, self.step, self.name) + return RangeIndex(self.start, self.stop, self.step, + name=self.name, fastpath=True) else: name = kwargs.get('name', self.name) return Int64Index(self.values, name=name, copy=False)._shallow_copy(values, **kwargs) @@ -3426,7 +3464,7 @@ def copy(self, names=None, name=None, dtype=None, deep=False): if name is None: name = self.name - return RangeIndex(self.start, self.stop, self.step, name) + return RangeIndex(self.start, self.stop, self.step, name, fastpath=True) def argsort(self, *args, **kwargs): """ @@ -3546,8 +3584,7 @@ def intersection(self, other): # disregarding the lower bounds tmp_start = self.start + (other.start-self.start)*self.step//gcd*s new_step = self.step * other.step // gcd - assert new_step == int(new_step) - new_index = RangeIndex(tmp_start, int_high, new_step) + new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True) # adjust index to limiting interval new_index.start = new_index._min_fitting_element(int_low) @@ -3631,10 +3668,65 @@ def _mul(self, other): int_input = False if int_input == True and other != 0: - return RangeIndex(self.start*other, self.stop*other, self.step*other) + return RangeIndex(self.start*other, self.stop*other, self.step*other, + fastpath=True) else: return super(RangeIndex, self).__mul__(other) + def __len__(self): + """ + return the length of the RangeIndex + """ + return (self.stop-self.start) // self.step + + @property + def size(self): + return len(self) + + def __getitem__(self, key): + """ + Conserve RangeIndex type for scalar and slice keys. + """ + super_getitem = super(RangeIndex, self).__getitem__ + + if np.isscalar(key): + n = int(key) + if n != key: + return super_getitem(key) + if n < 0: + n = len(self) + key + if n < 0 or n > len(self)-1: + raise IndexError('index %d is out of bounds for axis 0 with size %d' % (key, len(self))) + return self.start + n * self.step + + if isinstance(key, slice): + # complete missing slice information + n_start = 0 if key.start is None else key.start + n_stop = len(self)+1 if key.stop is None else key.stop + n_step = 1 if key.step is None else key.step + + # delegate non-integer slices + if (n_start != int(n_start) and + n_stop != int(n_stop) and + n_step != int(n_step)): + return super_getitem(key) + + # deal with index wrap-around + n_start = len(self)+n_start if n_start < 0 else n_start + n_stop = len(self)+n_stop if n_stop < 0 else n_stop + + + # convert indexes to values + start = self.start + self.step * n_start + stop = self.start + self.step * n_stop + 1 + step = self.step * n_step + + stop = min(stop, self.stop) + return RangeIndex(start, stop, step, self.name, fastpath=True) + + # fall back to Int64Index + return super_getitem(key) + RangeIndex._add_numeric_methods() RangeIndex.__mul__ = RangeIndex.__rmul__ = RangeIndex._mul RangeIndex._add_logical_methods() diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 201fa988e5859..bb0e6abf93d59 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -2485,7 +2485,7 @@ def test_constructor_corner(self): # non-int raise Exception self.assertRaises(TypeError, RangeIndex, '1', '10', '1') - self.assertRaises(TypeError, RangeIndex, 1.0, 10.0, 1.0) + self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) # iterable raise Exception self.assertRaises(TypeError, RangeIndex, iter([-5, 0, 1, 2])) @@ -2963,6 +2963,47 @@ def test_pickle_compat_construction(self): # RangeIndex() is a valid constructor pass + def test_slice_specialised(self): + # scalar indexing + res = self.index[1] + expected = 2 + self.assertEqual(res, expected) + + res = self.index[-1] + expected = 18 + self.assertEqual(res, expected) + + ### slicing + # slice value completion + index = self.index[:] + expected = self.index + self.assert_numpy_array_equal(index, expected) + + # positive slice values + index = self.index[7:10:2] + expected = np.array([14, 18]) + self.assert_numpy_array_equal(index, expected) + + # negative slice values + index = self.index[-1:-5:-2] + expected = np.array([18, 14]) + self.assert_numpy_array_equal(index, expected) + + # stop overshoot + index = self.index[2:100:4] + expected = np.array([4, 12]) + self.assert_numpy_array_equal(index, expected) + + def test_len_specialised(self): + # TODO: How to test that len is specialised rather than calling + # the parent classes __len__() (which is slow)? + pass + + def test_size_specialised(self): + # TODO: How to test that size is specialised rather than calling + # the parent classes size property (which is slow)? + pass + class DatetimeLike(Base): def test_view(self): From 73871dcdd6bd8345cca13e9c8ecac86a8e369423 Mon Sep 17 00:00:00 2001 From: ARF Date: Fri, 24 Apr 2015 12:57:18 +0200 Subject: [PATCH 13/13] fix rebasing issues --- .travis.yml | 2 -- doc/source/whatsnew/v0.16.1.txt | 15 +++++++-------- pandas/core/categorical.py | 2 +- pandas/tests/test_categorical.py | 13 ------------- 4 files changed, 8 insertions(+), 24 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0d143d7f7133b..bc87853b26d6e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,6 @@ matrix: - LOCALE_OVERRIDE="it_IT.UTF-8" - BUILD_TYPE=conda - JOB_NAME: "26_nslow_nnet" - - INSTALL_TEST=true - python: 2.7 env: - NOSE_ARGS="slow and not network and not disabled" @@ -184,7 +183,6 @@ script: # nothing here, or failed tests won't fail travis after_script: - - ci/install_test.sh - if [ -f /tmp/doc.log ]; then cat /tmp/doc.log; fi - source activate pandas && ci/print_versions.py - ci/print_skipped.py /tmp/nosetests.xml diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 3d5c95aee2e92..08d8ef9116367 100755 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -9,7 +9,7 @@ We recommend that all users upgrade to this version. Highlights include: -- Support for a ``CategoricalIndex``, a category based index, see :ref:`here ` +- Support for a ``CategoricalIndex``, a category based index, see :ref:`here ` .. contents:: What's new in v0.16.1 :local: @@ -24,15 +24,15 @@ Enhancements - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`) - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`) - The ``.str`` accessor is now available for both ``Series`` and ``Index``. + The `.str` accessor is now available for both `Series` and `Index`. .. ipython:: python idx = Index([' jack', 'jill ', ' jesse ', 'frank']) idx.str.strip() - One special case for the `.str` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor - will return a ``np.array`` instead of a boolean ``Index`` (:issue:`8875`). This enables the following expression + One special case for the `.str` accessor on `Index` is that if a string method returns `bool`, the `.str` accessor + will return a `np.array` instead of a boolean `Index` (:issue:`8875`). This enables the following expression to work naturally: @@ -46,7 +46,7 @@ Enhancements - ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`) -- ``drop`` function can now accept ``errors`` keyword to suppress ``ValueError`` raised when any of label does not exist in the target data. (:issue:`6736`) +- ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`) .. ipython:: python @@ -61,7 +61,6 @@ Enhancements - Allow Panel.shift with ``axis='items'`` (:issue:`9890`) - Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`) -- Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`) - Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`) @@ -87,7 +86,7 @@ setting the index of a ``DataFrame/Series`` with a ``category`` dtype would conv df.dtypes df.B.cat.categories -setting the index, will create create a ``CategoricalIndex`` +setting the index, will create create a CategoricalIndex .. ipython :: python @@ -179,7 +178,7 @@ Bug Fixes - Fixed bug (:issue:`9542`) where labels did not appear properly in legend of ``DataFrame.plot()``. Passing ``label=`` args also now works, and series indices are no longer mutated. - Bug in json serialization when frame has length zero.(:issue:`9805`) -- Bug in ``read_csv`` where missing trailing delimiters would cause segfault. (:issue:`5664`) +- Bug in `read_csv` where missing trailing delimiters would cause segfault. (:issue:`5664`) - Bug in retaining index name on appending (:issue:`9862`) - Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`) - Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`). diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index caf706fcbcbbd..9537523380350 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -708,7 +708,7 @@ def add_categories(self, new_categories, inplace=False): if len(already_included) != 0: msg = "new categories must not include old categories: %s" % str(already_included) raise ValueError(msg) - new_categories = list(self._categories) + list(new_categories) + new_categories = list(self._categories) + (new_categories) new_categories = self._validate_categories(new_categories) cat = self if inplace else self.copy() cat._categories = new_categories diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 6a6564347d35f..97fa442595893 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -757,19 +757,6 @@ def f(): cat.add_categories(["d"]) self.assertRaises(ValueError, f) - # GH 9927 - cat = Categorical(list("abc"), ordered=True) - expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) - # test with Series, np.array, index, list - res = cat.add_categories(Series(["d", "e"])) - self.assert_categorical_equal(res, expected) - res = cat.add_categories(np.array(["d", "e"])) - self.assert_categorical_equal(res, expected) - res = cat.add_categories(Index(["d", "e"])) - self.assert_categorical_equal(res, expected) - res = cat.add_categories(["d", "e"]) - self.assert_categorical_equal(res, expected) - def test_remove_categories(self): cat = Categorical(["a","b","c","a"], ordered=True) old = cat.copy()