From b7762dd5734981002866c53dcfc2fd5efda6b271 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 20 Jun 2013 19:06:44 -0400 Subject: [PATCH] BUG: Possibly invalidate the item_cache when numpy implicty converts a view to a copy (GH3970) PERF: testing perf BUG: implement cache tracking with a weak reference --- doc/source/release.rst | 2 +- pandas/core/generic.py | 3 +++ pandas/core/indexing.py | 9 +++++++++ pandas/core/internals.py | 1 + pandas/tests/test_indexing.py | 13 ++++++++++++- 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 0fa7b4b2ed5f2..555169441f3cb 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -287,7 +287,7 @@ pandas 0.12 - Fixed insertion issue into DataFrame, after rename (:issue:`4032`) - Fixed testing issue where too many sockets where open thus leading to a connection reset issue (:issue:`3982`, :issue:`3985`) - + - Possibly invalidate the item_cache when numpy implicty converts a view to a copy (:issue:`3970`) pandas 0.11.0 ============= diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 627a8ab825e5f..caf610ad2a0a6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,5 +1,6 @@ # pylint: disable=W0231,E1101 +import weakref import numpy as np from pandas.core.index import MultiIndex @@ -666,6 +667,7 @@ def _get_item_cache(self, item): values = self._data.get(item) res = self._box_item_values(item, values) cache[item] = res + res._cacher = weakref.ref(self) return res def _box_item_values(self, key, values): @@ -1065,6 +1067,7 @@ def take(self, indices, axis=0, convert=True): new_data = self._data.reindex_axis(new_items, axis=0) else: new_data = self._data.take(indices, axis=axis, verify=False) + return self._constructor(new_data) def tz_convert(self, tz, axis=0, copy=True): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 27c12fcd2e8eb..ace285c7399c7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -184,6 +184,14 @@ def setter(item, v): if np.prod(values.shape): values[indexer] = value + # we might need to invalidate a cached version of myself + cacher = getattr(self.obj,'_cacher',None) + if cacher is not None: + try: + cacher()._clear_item_cache() + except: + pass + def _align_series(self, indexer, ser): # indexer to assign Series can be tuple or scalar if isinstance(indexer, tuple): @@ -709,6 +717,7 @@ def _getbool_axis(self, key, axis=0): return self.obj.take(inds, axis=axis, convert=False) except (Exception), detail: raise self._exception(detail) + def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ obj = self.obj diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0fbadafeca617..c0f62a7bc725c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1650,6 +1650,7 @@ def _consolidate_inplace(self): self._known_consolidated = True def get(self, item): + if self.items.is_unique: _, block = self._find_block(item) return block.get(item) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 8b6bf1ed7f651..491d347320b72 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1066,7 +1066,18 @@ def test_iloc_non_unique_indexing(self): result = df2.loc[idx] assert_frame_equal(result, expected) - + def test_series_iloc(self): + # GH 3970 + + df = DataFrame({ "aa":range(5), "bb":[2.2]*5}) + df["cc"] = 0.0 + ck = [True]*len(df) + df["bb"].loc[0] = .13 # works + df_tmp = df.iloc[ck] + df["bb"].loc[0] = .15 # doesn't work + expected = DataFrame({ "aa":range(5), "bb":[0.15,2.2,2.2,2.2,2.2], "cc": 0.0 }) + assert_frame_equal(df, expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],