From 2a64a4f74cb9d5014e0d424cd23dbee26efefaeb Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 14:03:27 -0500 Subject: [PATCH] CLN: split off frozen (immutable) data structures into pandas/indexes/frozen.py --- pandas/compat/pickle_compat.py | 34 +++++++- pandas/core/base.py | 105 ----------------------- pandas/indexes/base.py | 13 +-- pandas/indexes/frozen.py | 126 ++++++++++++++++++++++++++++ pandas/indexes/multi.py | 6 +- pandas/tests/indexes/test_frozen.py | 68 +++++++++++++++ pandas/tests/test_base.py | 68 +-------------- 7 files changed, 231 insertions(+), 189 deletions(-) create mode 100644 pandas/indexes/frozen.py create mode 100644 pandas/tests/indexes/test_frozen.py diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 1cdf8afd563c6..240baa848adbc 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -52,12 +52,40 @@ def load_reduce(self): stack[-1] = value + +# if classes are moved, provide compat here +_class_locations_map = { + + # 15477 + ('pandas.core.base', 'FrozenNDArray'): ('pandas.indexes.frozen', 'FrozenNDArray'), + ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList') + } + + +# our Unpickler sub-class to override methods and some dispatcher +# functions for compat + if compat.PY3: class Unpickler(pkl._Unpickler): - pass + + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + return super(Unpickler, self).find_class(module, name) + else: + class Unpickler(pkl.Unpickler): - pass + + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + __import__(module) + mod = sys.modules[module] + klass = getattr(mod, name) + return klass Unpickler.dispatch = copy.copy(Unpickler.dispatch) Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce @@ -76,8 +104,6 @@ def load_newobj(self): self.stack[-1] = obj Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj -# py3 compat - def load_newobj_ex(self): kwargs = self.stack.pop() diff --git a/pandas/core/base.py b/pandas/core/base.py index 92ec6bb3d73e6..55149198b0dbf 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -17,7 +17,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError -from pandas.formats.printing import pprint_thing _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', @@ -694,110 +693,6 @@ def _gotitem(self, key, ndim, subset=None): return self -class FrozenList(PandasObject, list): - - """ - Container that doesn't allow setting item *but* - because it's technically non-hashable, will be used - for lookups, appropriately, etc. - """ - # Sidenote: This has to be of type list, otherwise it messes up PyTables - # typechecks - - def __add__(self, other): - if isinstance(other, tuple): - other = list(other) - return self.__class__(super(FrozenList, self).__add__(other)) - - __iadd__ = __add__ - - # Python 2 compat - def __getslice__(self, i, j): - return self.__class__(super(FrozenList, self).__getslice__(i, j)) - - def __getitem__(self, n): - # Python 3 compat - if isinstance(n, slice): - return self.__class__(super(FrozenList, self).__getitem__(n)) - return super(FrozenList, self).__getitem__(n) - - def __radd__(self, other): - if isinstance(other, tuple): - other = list(other) - return self.__class__(other + list(self)) - - def __eq__(self, other): - if isinstance(other, (tuple, FrozenList)): - other = list(other) - return super(FrozenList, self).__eq__(other) - - __req__ = __eq__ - - def __mul__(self, other): - return self.__class__(super(FrozenList, self).__mul__(other)) - - __imul__ = __mul__ - - def __reduce__(self): - return self.__class__, (list(self),) - - def __hash__(self): - return hash(tuple(self)) - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError("'%s' does not support mutable operations." % - self.__class__.__name__) - - def __unicode__(self): - return pprint_thing(self, quote_strings=True, - escape_chars=('\t', '\r', '\n')) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, - str(self)) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - pop = append = extend = remove = sort = insert = _disabled - - -class FrozenNDArray(PandasObject, np.ndarray): - - # no __array_finalize__ for now because no metadata - def __new__(cls, data, dtype=None, copy=False): - if copy is None: - copy = not isinstance(data, FrozenNDArray) - res = np.array(data, dtype=dtype, copy=copy).view(cls) - return res - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError("'%s' does not support mutable operations." % - self.__class__) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - put = itemset = fill = _disabled - - def _shallow_copy(self): - return self.view() - - def values(self): - """returns *copy* of underlying array""" - arr = self.view(np.ndarray).copy() - return arr - - def __unicode__(self): - """ - Return a string representation for this object. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'), - quote_strings=True) - return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) - - class IndexOpsMixin(object): """ common ops mixin to support a unified inteface / docs for Series / Index diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index f1f37622b2a74..4837fc0d7438c 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -35,16 +35,15 @@ needs_i8_conversion, is_iterator, is_list_like, is_scalar) -from pandas.types.cast import _coerce_indexer_dtype from pandas.core.common import (is_bool_indexer, _values_from_object, _asarray_tuplesafe) -from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, - IndexOpsMixin) +from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate, deprecate_kwarg) +from pandas.indexes.frozen import FrozenList import pandas.core.common as com import pandas.types.concat as _concat import pandas.core.missing as missing @@ -3844,14 +3843,6 @@ def _get_na_value(dtype): np.timedelta64: tslib.NaT}.get(dtype, np.nan) -def _ensure_frozen(array_like, categories, copy=False): - array_like = _coerce_indexer_dtype(array_like, categories) - array_like = array_like.view(FrozenNDArray) - if copy: - array_like = array_like.copy() - return array_like - - def _ensure_has_len(seq): """If seq is an iterator, put its values into a list.""" try: diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py new file mode 100644 index 0000000000000..e043ba64bbad7 --- /dev/null +++ b/pandas/indexes/frozen.py @@ -0,0 +1,126 @@ +""" +frozen (immutable) data structures to support MultiIndexing + +These are used for: + +- .names (FrozenList) +- .levels & .labels (FrozenNDArray) + +""" + +import numpy as np +from pandas.core.base import PandasObject +from pandas.types.cast import _coerce_indexer_dtype +from pandas.formats.printing import pprint_thing + + +class FrozenList(PandasObject, list): + + """ + Container that doesn't allow setting item *but* + because it's technically non-hashable, will be used + for lookups, appropriately, etc. + """ + # Sidenote: This has to be of type list, otherwise it messes up PyTables + # typechecks + + def __add__(self, other): + if isinstance(other, tuple): + other = list(other) + return self.__class__(super(FrozenList, self).__add__(other)) + + __iadd__ = __add__ + + # Python 2 compat + def __getslice__(self, i, j): + return self.__class__(super(FrozenList, self).__getslice__(i, j)) + + def __getitem__(self, n): + # Python 3 compat + if isinstance(n, slice): + return self.__class__(super(FrozenList, self).__getitem__(n)) + return super(FrozenList, self).__getitem__(n) + + def __radd__(self, other): + if isinstance(other, tuple): + other = list(other) + return self.__class__(other + list(self)) + + def __eq__(self, other): + if isinstance(other, (tuple, FrozenList)): + other = list(other) + return super(FrozenList, self).__eq__(other) + + __req__ = __eq__ + + def __mul__(self, other): + return self.__class__(super(FrozenList, self).__mul__(other)) + + __imul__ = __mul__ + + def __reduce__(self): + return self.__class__, (list(self),) + + def __hash__(self): + return hash(tuple(self)) + + def _disabled(self, *args, **kwargs): + """This method will not function because object is immutable.""" + raise TypeError("'%s' does not support mutable operations." % + self.__class__.__name__) + + def __unicode__(self): + return pprint_thing(self, quote_strings=True, + escape_chars=('\t', '\r', '\n')) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, + str(self)) + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + pop = append = extend = remove = sort = insert = _disabled + + +class FrozenNDArray(PandasObject, np.ndarray): + + # no __array_finalize__ for now because no metadata + def __new__(cls, data, dtype=None, copy=False): + if copy is None: + copy = not isinstance(data, FrozenNDArray) + res = np.array(data, dtype=dtype, copy=copy).view(cls) + return res + + def _disabled(self, *args, **kwargs): + """This method will not function because object is immutable.""" + raise TypeError("'%s' does not support mutable operations." % + self.__class__) + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + put = itemset = fill = _disabled + + def _shallow_copy(self): + return self.view() + + def values(self): + """returns *copy* of underlying array""" + arr = self.view(np.ndarray).copy() + return arr + + def __unicode__(self): + """ + Return a string representation for this object. + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'), + quote_strings=True) + return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) + + +def _ensure_frozen(array_like, categories, copy=False): + array_like = _coerce_indexer_dtype(array_like, categories) + array_like = array_like.view(FrozenNDArray) + if copy: + array_like = array_like.copy() + return array_like diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 18e1da7303d6d..ec30d2c44efd7 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -28,7 +28,6 @@ UnsortedIndexError) -from pandas.core.base import FrozenList import pandas.core.base as base from pandas.util.decorators import (Appender, cache_readonly, deprecate, deprecate_kwarg) @@ -39,9 +38,10 @@ from pandas.core.config import get_option -from pandas.indexes.base import (Index, _ensure_index, _ensure_frozen, +from pandas.indexes.base import (Index, _ensure_index, _get_na_value, InvalidIndexError, _index_shared_docs) +from pandas.indexes.frozen import FrozenNDArray, FrozenList, _ensure_frozen import pandas.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -1276,7 +1276,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, for new_label in taken: label_values = new_label.values() label_values[mask] = na_value - masked.append(base.FrozenNDArray(label_values)) + masked.append(FrozenNDArray(label_values)) taken = masked else: taken = [lab.take(indices) for lab in self.labels] diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py new file mode 100644 index 0000000000000..a82409fbf9513 --- /dev/null +++ b/pandas/tests/indexes/test_frozen.py @@ -0,0 +1,68 @@ +import numpy as np +from pandas.util import testing as tm +from pandas.tests.test_base import CheckImmutable, CheckStringMixin +from pandas.indexes.frozen import FrozenList, FrozenNDArray +from pandas.compat import u + + +class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): + mutable_methods = ('extend', 'pop', 'remove', 'insert') + unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) + + def setUp(self): + self.lst = [1, 2, 3, 4, 5] + self.container = FrozenList(self.lst) + self.klass = FrozenList + + def test_add(self): + result = self.container + (1, 2, 3) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) + self.check_result(result, expected) + + def test_inplace(self): + q = r = self.container + q += [5] + self.check_result(q, self.lst + [5]) + # other shouldn't be mutated + self.check_result(r, self.lst) + + +class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): + mutable_methods = ('put', 'itemset', 'fill') + unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) + + def setUp(self): + self.lst = [3, 5, 7, -2] + self.container = FrozenNDArray(self.lst) + self.klass = FrozenNDArray + + def test_shallow_copying(self): + original = self.container.copy() + self.assertIsInstance(self.container.view(), FrozenNDArray) + self.assertFalse(isinstance( + self.container.view(np.ndarray), FrozenNDArray)) + self.assertIsNot(self.container.view(), self.container) + self.assert_numpy_array_equal(self.container, original) + # shallow copy should be the same too + self.assertIsInstance(self.container._shallow_copy(), FrozenNDArray) + + # setting should not be allowed + def testit(container): + container[0] = 16 + + self.check_mutable_error(testit, self.container) + + def test_values(self): + original = self.container.view(np.ndarray).copy() + n = original[0] + 15 + vals = self.container.values() + self.assert_numpy_array_equal(original, vals) + self.assertIsNot(original, vals) + vals[0] = n + self.assertIsInstance(self.container, FrozenNDArray) + self.assert_numpy_array_equal(self.container.values(), original) + self.assertEqual(vals[0], n) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 473f1d81c9532..8264ad33950f9 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -14,10 +14,9 @@ import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta) -from pandas.compat import u, StringIO +from pandas.compat import StringIO from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate, - NoNewAttributesMixin) +from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.base import DatetimeIndexOpsMixin @@ -83,69 +82,6 @@ def check_result(self, result, expected, klass=None): self.assertEqual(result, expected) -class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): - mutable_methods = ('extend', 'pop', 'remove', 'insert') - unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) - - def setUp(self): - self.lst = [1, 2, 3, 4, 5] - self.container = FrozenList(self.lst) - self.klass = FrozenList - - def test_add(self): - result = self.container + (1, 2, 3) - expected = FrozenList(self.lst + [1, 2, 3]) - self.check_result(result, expected) - - result = (1, 2, 3) + self.container - expected = FrozenList([1, 2, 3] + self.lst) - self.check_result(result, expected) - - def test_inplace(self): - q = r = self.container - q += [5] - self.check_result(q, self.lst + [5]) - # other shouldn't be mutated - self.check_result(r, self.lst) - - -class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): - mutable_methods = ('put', 'itemset', 'fill') - unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) - - def setUp(self): - self.lst = [3, 5, 7, -2] - self.container = FrozenNDArray(self.lst) - self.klass = FrozenNDArray - - def test_shallow_copying(self): - original = self.container.copy() - self.assertIsInstance(self.container.view(), FrozenNDArray) - self.assertFalse(isinstance( - self.container.view(np.ndarray), FrozenNDArray)) - self.assertIsNot(self.container.view(), self.container) - self.assert_numpy_array_equal(self.container, original) - # shallow copy should be the same too - self.assertIsInstance(self.container._shallow_copy(), FrozenNDArray) - - # setting should not be allowed - def testit(container): - container[0] = 16 - - self.check_mutable_error(testit, self.container) - - def test_values(self): - original = self.container.view(np.ndarray).copy() - n = original[0] + 15 - vals = self.container.values() - self.assert_numpy_array_equal(original, vals) - self.assertIsNot(original, vals) - vals[0] = n - self.assertIsInstance(self.container, pd.core.base.FrozenNDArray) - self.assert_numpy_array_equal(self.container.values(), original) - self.assertEqual(vals[0], n) - - class TestPandasDelegate(tm.TestCase): class Delegator(object):