From 5cab3102483d16ad1544104f611340f6b9c64ed6 Mon Sep 17 00:00:00 2001 From: Dale Jung Date: Sun, 30 Mar 2014 06:36:38 -0400 Subject: [PATCH 1/2] PRF: .shift() speedup: Only pass in c_contiguous arrays to np.roll/take --- pandas/core/generic.py | 4 ++-- pandas/core/internals.py | 13 +++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc7883f789703..67f8694925dad 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3238,9 +3238,9 @@ def shift(self, periods=1, freq=None, axis=0, **kwds): if periods == 0: return self - axis = self._get_axis_number(axis) + block_axis = self._get_block_manager_axis(axis) if freq is None and not len(kwds): - new_data = self._data.shift(periods=periods, axis=axis) + new_data = self._data.shift(periods=periods, axis=block_axis) else: return self.tshift(periods, freq, **kwds) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d32664559f7fc..ed8cfb59bc995 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -966,7 +966,12 @@ def shift(self, periods, axis=0): # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also new_values, fill_value = com._maybe_upcast(self.values) - new_values = np.roll(new_values.T,periods,axis=axis) + # make sure array sent to np.roll is c_contiguous + f_ordered = new_values.flags.f_contiguous + if f_ordered: + new_values = new_values.T + axis = new_values.ndim - axis - 1 + new_values = np.roll(new_values, periods, axis=axis) axis_indexer = [ slice(None) ] * self.ndim if periods > 0: axis_indexer[axis] = slice(None,periods) @@ -974,7 +979,11 @@ def shift(self, periods, axis=0): axis_indexer[axis] = slice(periods,None) new_values[tuple(axis_indexer)] = fill_value - return [make_block(new_values.T, self.items, self.ref_items, + # restore original order + if f_ordered: + new_values = new_values.T + + return [make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)] def eval(self, func, other, raise_on_error=True, try_cast=False): From 3a4e770778febe9c37c3ccad921619f996290d8b Mon Sep 17 00:00:00 2001 From: Dale Jung Date: Sun, 30 Mar 2014 22:23:55 -0400 Subject: [PATCH 2/2] PRF: Fixed shift speed tests and added consolidated DataFrame shift perf test --- vb_suite/frame_methods.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index 7f9063003191f..1c0e9086c63ee 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -386,7 +386,7 @@ def test_equal(name): def test_unequal(name): df, df2 = pairs[name] return df.equals(df2) - + float_df = DataFrame(np.random.randn(1000, 1000)) object_df = DataFrame([['foo']*1000]*1000) nonunique_cols = object_df.copy() @@ -434,11 +434,21 @@ def test_unequal(name): # frame shift speedup issue-5609 setup = common_setup + """ -df = pd.DataFrame(np.random.rand(10000,500)) +df = DataFrame(np.random.rand(10000,500)) +# note: df._data.blocks are f_contigous """ frame_shift_axis0 = Benchmark('df.shift(1,axis=0)', setup, - name = 'frame_shift_axis_0', start_date=datetime(2014,1,1)) frame_shift_axis1 = Benchmark('df.shift(1,axis=1)', setup, - name = 'frame_shift_axis_1', - start_date=datetime(2014,1,1)) \ No newline at end of file + start_date=datetime(2014,1,1)) + +# +setup = common_setup + """ +df = DataFrame(np.random.rand(10000,500)) +df = df.consolidate() +# note: df._data.blocks are c_contigous +""" +frame_shift_c_order_axis0 = Benchmark('df.shift(1,axis=0)', setup, + start_date=datetime(2014,1,1)) +frame_shift_c_order_axis1 = Benchmark('df.shift(1,axis=1)', setup, + start_date=datetime(2014,1,1))