diff --git a/doc/source/release.rst b/doc/source/release.rst index 77ce69c40bb9b..0117d77d4143d 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -59,7 +59,7 @@ API Changes - ``Series.sort`` will raise a ``ValueError`` (rather than a ``TypeError``) on sorting an object that is a view of another (:issue:`5856`, :issue:`5853`) - Raise/Warn ``SettingWithCopyError`` (according to the option ``chained_assignment`` in more cases, - when detecting chained assignment, related (:issue:`5938`) + when detecting chained assignment, related (:issue:`5938`, :issue:`6025`) - DataFrame.head(0) returns self instead of empty frame (:issue:`5846`) - ``autocorrelation_plot`` now accepts ``**kwargs``. (:issue:`5623`) - ``convert_objects`` now accepts a ``convert_timedeltas='coerce'`` argument to allow forced dtype conversion of diff --git a/pandas/core/frame.py b/pandas/core/frame.py index adb7b2d2b2691..2d4fd2d792732 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1582,7 +1582,7 @@ def _ixs(self, i, axis=0, copy=False): new_values, copy = self._data.fast_2d_xs(i, copy=copy) result = Series(new_values, index=self.columns, name=self.index[i], dtype=new_values.dtype) - result.is_copy=copy + result._set_is_copy(self, copy=copy) return result # icol @@ -1707,7 +1707,7 @@ def _getitem_multilevel(self, key): if isinstance(result, Series): result = Series(result, index=self.index, name=key) - result.is_copy=True + result._set_is_copy(self) return result else: return self._get_item_cache(key) @@ -1878,6 +1878,7 @@ def __setitem__(self, key, value): self._set_item(key, value) def _setitem_slice(self, key, value): + self._check_setitem_copy() self.ix._setitem_with_indexer(key, value) def _setitem_array(self, key, value): @@ -1912,6 +1913,7 @@ def _setitem_frame(self, key, value): raise TypeError( 'Cannot do boolean setting on mixed-type frame') + self._check_setitem_copy() self.where(-key, value, inplace=True) def _ensure_valid_index(self, value): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eb24301e5e603..63454d32a5fa1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2,6 +2,7 @@ import warnings import operator import weakref +import gc import numpy as np import pandas.lib as lib @@ -97,7 +98,7 @@ def __init__(self, data, axes=None, copy=False, dtype=None, for i, ax in enumerate(axes): data = data.reindex_axis(ax, axis=i) - object.__setattr__(self, 'is_copy', False) + object.__setattr__(self, 'is_copy', None) object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) @@ -994,6 +995,9 @@ def _get_item_cache(self, item): res = self._box_item_values(item, values) cache[item] = res res._set_as_cached(item, self) + + # for a chain + res.is_copy = self.is_copy return res def _set_as_cached(self, item, cacher): @@ -1031,16 +1035,14 @@ def _maybe_update_cacher(self, clear=False): # a copy if ref is None: del self._cacher - self.is_copy = True - self._check_setitem_copy(stacklevel=5, t='referant') else: try: ref._maybe_cache_changed(cacher[0], self) except: pass - if ref.is_copy: - self.is_copy = True - self._check_setitem_copy(stacklevel=5, t='referant') + + # check if we are a copy + self._check_setitem_copy(stacklevel=5, t='referant') if clear: self._clear_item_cache() @@ -1055,13 +1057,35 @@ def _set_item(self, key, value): self._data.set(key, value) self._clear_item_cache() + def _set_is_copy(self, ref=None, copy=True): + if not copy: + self.is_copy = None + else: + if ref is not None: + self.is_copy = weakref.ref(ref) + else: + self.is_copy = None + def _check_setitem_copy(self, stacklevel=4, t='setting'): """ validate if we are doing a settitem on a chained copy. If you call this function, be sure to set the stacklevel such that the user will see the error *at the level of setting*""" if self.is_copy: + value = config.get_option('mode.chained_assignment') + if value is None: + return + + # see if the copy is not actually refererd; if so, then disolve + # the copy weakref + try: + gc.collect(2) + if not gc.get_referents(self.is_copy()): + self.is_copy = None + return + except: + pass if t == 'referant': t = ("A value is trying to be set on a copy of a slice from a " @@ -1143,7 +1167,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True): # maybe set copy if we didn't actually change the index if is_copy and not result._get_axis(axis).equals(self._get_axis(axis)): - result.is_copy=is_copy + result._set_is_copy(self) return result @@ -1276,12 +1300,12 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): new_values, copy = self._data.fast_2d_xs(loc, copy=copy) result = Series(new_values, index=self.columns, name=self.index[loc]) - result.is_copy=True else: result = self[loc] result.index = new_index + result._set_is_copy(self) return result _xs = xs diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 474b5ae3ac6d6..9a67f5fe3854e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -211,7 +211,7 @@ def _setitem_with_indexer(self, indexer, value): labels = _safe_append_to_index(index, key) self.obj._data = self.obj.reindex_axis(labels, i)._data self.obj._maybe_update_cacher(clear=True) - self.obj.is_copy=False + self.obj.is_copy=None if isinstance(labels, MultiIndex): self.obj.sortlevel(inplace=True) @@ -418,6 +418,7 @@ def can_do_equal_len(): if isinstance(value, ABCPanel): value = self._align_panel(indexer, value) + # actually do the set self.obj._data = self.obj._data.setitem(indexer, value) self.obj._maybe_update_cacher(clear=True) diff --git a/pandas/io/json.py b/pandas/io/json.py index 698f7777a1100..4ed325df9a747 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -60,7 +60,7 @@ def __init__(self, obj, orient, date_format, double_precision, self.date_unit = date_unit self.default_handler = default_handler - self.is_copy = False + self.is_copy = None self._format_axes() def _format_axes(self): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 7daf95ac15a95..3270d80dc9dad 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1380,10 +1380,10 @@ def test_set_value_keeps_names(self): columns=['one', 'two', 'three', 'four'], index=idx) df = df.sortlevel() - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) self.assertEqual(df.index.names, ('Name', 'Number')) df = df.set_value(('grethe', '4'), 'one', 99.34) - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) self.assertEqual(df.index.names, ('Name', 'Number')) def test_names(self): diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index b763b885fe7b8..645ec532f8510 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1980,7 +1980,7 @@ def test_detect_chained_assignment(self): # work with the chain expected = DataFrame([[-5,1],[-6,3]],columns=list('AB')) df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64') - self.assert_(not df.is_copy) + self.assert_(df.is_copy is None) df['A'][0] = -5 df['A'][1] = -6 @@ -1988,11 +1988,11 @@ def test_detect_chained_assignment(self): expected = DataFrame([[-5,2],[np.nan,3.]],columns=list('AB')) df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) - self.assert_(not df.is_copy) + self.assert_(df.is_copy is None) df['A'][0] = -5 df['A'][1] = np.nan assert_frame_equal(df, expected) - self.assert_(not df['A'].is_copy) + self.assert_(df['A'].is_copy is None) # using a copy (the chain), fails df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) @@ -2004,7 +2004,7 @@ def f(): df = DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'], 'c' : Series(range(7),dtype='int64') }) - self.assert_(not df.is_copy) + self.assert_(df.is_copy is None) expected = DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'], 'c' : [42,42,2,3,4,42,6]}) @@ -2033,7 +2033,7 @@ def f(): # make sure that is_copy is picked up reconstruction # GH5475 df = DataFrame({"A": [1,2]}) - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) with tm.ensure_clean('__tmp__pickle') as path: df.to_pickle(path) df2 = pd.read_pickle(path) @@ -2058,25 +2058,34 @@ def random_text(nobs=100): # always a copy x = df.iloc[[0,1,2]] - self.assert_(x.is_copy is True) + self.assert_(x.is_copy is not None) x = df.iloc[[0,1,2,4]] - self.assert_(x.is_copy is True) + self.assert_(x.is_copy is not None) # explicity copy indexer = df.letters.apply(lambda x : len(x) > 10) df = df.ix[indexer].copy() - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) df['letters'] = df['letters'].apply(str.lower) # implicity take df = random_text(100000) indexer = df.letters.apply(lambda x : len(x) > 10) df = df.ix[indexer] - self.assert_(df.is_copy is True) + self.assert_(df.is_copy is not None) + df['letters'] = df['letters'].apply(str.lower) + + # implicity take 2 + df = random_text(100000) + indexer = df.letters.apply(lambda x : len(x) > 10) + df = df.ix[indexer] + self.assert_(df.is_copy is not None) df.loc[:,'letters'] = df['letters'].apply(str.lower) - # this will raise - #df['letters'] = df['letters'].apply(str.lower) + # should be ok even though its a copy! + self.assert_(df.is_copy is None) + df['letters'] = df['letters'].apply(str.lower) + self.assert_(df.is_copy is None) df = random_text(100000) indexer = df.letters.apply(lambda x : len(x) > 10) @@ -2084,7 +2093,7 @@ def random_text(nobs=100): # an identical take, so no copy df = DataFrame({'a' : [1]}).dropna() - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) df['a'] += 1 # inplace ops @@ -2123,7 +2132,15 @@ def f(): df[['c']][mask] = df[['b']][mask] self.assertRaises(com.SettingWithCopyError, f) - pd.set_option('chained_assignment','warn') + # false positives GH6025 + df = DataFrame ({'column1':['a', 'a', 'a'], 'column2': [4,8,9] }) + str(df) + df['column1'] = df['column1'] + 'b' + str(df) + df = df [df['column2']!=8] + str(df) + df['column1'] = df['column1'] + 'c' + str(df) def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 0ede6bd2bd46d..1571cf3bf6a7d 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -174,7 +174,7 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']): exp = self.data.groupby(rows)[col].mean() tm.assert_series_equal(cmarg, exp) - res.sortlevel(inplace=True) + res = res.sortlevel() rmarg = res.xs(('All', ''))[:-1] exp = self.data.groupby(cols)[col].mean() tm.assert_series_equal(rmarg, exp)