From 74e454443a2730b3a7a0f90d51ef748cb3085ec5 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Tue, 3 Sep 2013 23:59:33 +0100 Subject: [PATCH 1/2] ENH ohlc resample for DataFrame --- doc/source/release.rst | 1 + pandas/core/groupby.py | 14 +++++++++++++- pandas/tseries/tests/test_resample.py | 25 +++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index bbadba61c0135..70aeb454e9ef2 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -56,6 +56,7 @@ pandas 0.13 - Significant table writing performance improvements in ``HDFStore`` - JSON date serialisation now performed in low-level C code. - Add ``drop_level`` argument to xs (:issue:`4180`) + - Can now resample a DataFrame with ohlc (:issue:`2320`) - ``Index.copy()`` and ``MultiIndex.copy()`` now accept keyword arguments to change attributes (i.e., ``names``, ``levels``, ``labels``) (:issue:`4039`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 723ff2fd5ab56..5e86d081606d5 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -430,6 +430,13 @@ def ohlc(self): For multiple groupings, the result index will be a MultiIndex """ + if isinstance(self.obj, com.ABCDataFrame): + from pandas.tools.merge import concat + return concat((col_groupby._cython_agg_general('ohlc') + for _, col_groupby in self._iterate_column_groupbys()), + keys=self.obj.columns, + axis=1) + return self._cython_agg_general('ohlc') def nth(self, n): @@ -1619,7 +1626,6 @@ def _iterate_slices(self): for val in slice_axis: if val in self.exclusions: continue - yield val, slicer(val) def _cython_agg_general(self, how, numeric_only=True): @@ -2233,6 +2239,12 @@ def _wrap_agged_blocks(self, blocks): return result.convert_objects() + def _iterate_column_groupbys(self): + for i, colname in enumerate(self.obj.columns): + yield colname, SeriesGroupBy(self.obj.iloc[:, i], selection=colname, + grouper=self.grouper, + exclusions=self.exclusions) + from pandas.tools.plotting import boxplot_frame_groupby DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 3fdeacad5ffcd..1c6c4eae8d279 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -259,6 +259,31 @@ def test_resample_ohlc(self): self.assertEquals(xs['low'], s[:5].min()) self.assertEquals(xs['close'], s[4]) + def test_resample_ohlc_dataframe(self): + df = (pd.DataFrame({'PRICE': {Timestamp('2011-01-06 10:59:05', tz=None): 24990, + Timestamp('2011-01-06 12:43:33', tz=None): 25499, + Timestamp('2011-01-06 12:54:09', tz=None): 25499}, + 'VOLUME': {Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, + Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, + Timestamp('2011-01-06 12:54:09', tz=None): 100000000}}) + ).reindex_axis(['VOLUME', 'PRICE'], axis=1) + res = df.resample('H', how='ohlc') + exp = pd.concat([df['VOLUME'].resample('H', how='ohlc'), + df['PRICE'].resample('H', how='ohlc')], + axis=1, + keys=['VOLUME', 'PRICE']) + assert_frame_equal(exp, res) + + df.columns = [['a', 'b'], ['c', 'd']] + res = df.resample('H', how='ohlc') + exp.columns = pd.MultiIndex.from_tuples([('a', 'c', 'open'), ('a', 'c', 'high'), + ('a', 'c', 'low'), ('a', 'c', 'close'), ('b', 'd', 'open'), + ('b', 'd', 'high'), ('b', 'd', 'low'), ('b', 'd', 'close')]) + assert_frame_equal(exp, res) + + # dupe columns fail atm + # df.columns = ['PRICE', 'PRICE'] + def test_resample_reresample(self): dti = DatetimeIndex( start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), From e85ef7d11d61a76506fb4885bee399092acc82fc Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Wed, 4 Sep 2013 01:13:30 +0100 Subject: [PATCH 2/2] CLN refactor with _apply_to_column_groupbys --- pandas/core/groupby.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 5e86d081606d5..0a39b83d35e66 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -430,13 +430,6 @@ def ohlc(self): For multiple groupings, the result index will be a MultiIndex """ - if isinstance(self.obj, com.ABCDataFrame): - from pandas.tools.merge import concat - return concat((col_groupby._cython_agg_general('ohlc') - for _, col_groupby in self._iterate_column_groupbys()), - keys=self.obj.columns, - axis=1) - return self._cython_agg_general('ohlc') def nth(self, n): @@ -2245,6 +2238,20 @@ def _iterate_column_groupbys(self): grouper=self.grouper, exclusions=self.exclusions) + def _apply_to_column_groupbys(self, func): + from pandas.tools.merge import concat + return concat((func(col_groupby) + for _, col_groupby in self._iterate_column_groupbys()), + keys=self.obj.columns, + axis=1) + + def ohlc(self): + """ + Compute sum of values, excluding missing values + + For multiple groupings, the result index will be a MultiIndex + """ + return self._apply_to_column_groupbys(lambda x: x._cython_agg_general('ohlc')) from pandas.tools.plotting import boxplot_frame_groupby DataFrameGroupBy.boxplot = boxplot_frame_groupby