diff --git a/doc/source/release.rst b/doc/source/release.rst index bbadba61c0135..70aeb454e9ef2 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -56,6 +56,7 @@ pandas 0.13 - Significant table writing performance improvements in ``HDFStore`` - JSON date serialisation now performed in low-level C code. - Add ``drop_level`` argument to xs (:issue:`4180`) + - Can now resample a DataFrame with ohlc (:issue:`2320`) - ``Index.copy()`` and ``MultiIndex.copy()`` now accept keyword arguments to change attributes (i.e., ``names``, ``levels``, ``labels``) (:issue:`4039`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 723ff2fd5ab56..0a39b83d35e66 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1619,7 +1619,6 @@ def _iterate_slices(self): for val in slice_axis: if val in self.exclusions: continue - yield val, slicer(val) def _cython_agg_general(self, how, numeric_only=True): @@ -2233,6 +2232,26 @@ def _wrap_agged_blocks(self, blocks): return result.convert_objects() + def _iterate_column_groupbys(self): + for i, colname in enumerate(self.obj.columns): + yield colname, SeriesGroupBy(self.obj.iloc[:, i], selection=colname, + grouper=self.grouper, + exclusions=self.exclusions) + + def _apply_to_column_groupbys(self, func): + from pandas.tools.merge import concat + return concat((func(col_groupby) + for _, col_groupby in self._iterate_column_groupbys()), + keys=self.obj.columns, + axis=1) + + def ohlc(self): + """ + Compute sum of values, excluding missing values + + For multiple groupings, the result index will be a MultiIndex + """ + return self._apply_to_column_groupbys(lambda x: x._cython_agg_general('ohlc')) from pandas.tools.plotting import boxplot_frame_groupby DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 3fdeacad5ffcd..1c6c4eae8d279 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -259,6 +259,31 @@ def test_resample_ohlc(self): self.assertEquals(xs['low'], s[:5].min()) self.assertEquals(xs['close'], s[4]) + def test_resample_ohlc_dataframe(self): + df = (pd.DataFrame({'PRICE': {Timestamp('2011-01-06 10:59:05', tz=None): 24990, + Timestamp('2011-01-06 12:43:33', tz=None): 25499, + Timestamp('2011-01-06 12:54:09', tz=None): 25499}, + 'VOLUME': {Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, + Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, + Timestamp('2011-01-06 12:54:09', tz=None): 100000000}}) + ).reindex_axis(['VOLUME', 'PRICE'], axis=1) + res = df.resample('H', how='ohlc') + exp = pd.concat([df['VOLUME'].resample('H', how='ohlc'), + df['PRICE'].resample('H', how='ohlc')], + axis=1, + keys=['VOLUME', 'PRICE']) + assert_frame_equal(exp, res) + + df.columns = [['a', 'b'], ['c', 'd']] + res = df.resample('H', how='ohlc') + exp.columns = pd.MultiIndex.from_tuples([('a', 'c', 'open'), ('a', 'c', 'high'), + ('a', 'c', 'low'), ('a', 'c', 'close'), ('b', 'd', 'open'), + ('b', 'd', 'high'), ('b', 'd', 'low'), ('b', 'd', 'close')]) + assert_frame_equal(exp, res) + + # dupe columns fail atm + # df.columns = ['PRICE', 'PRICE'] + def test_resample_reresample(self): dti = DatetimeIndex( start=datetime(2005, 1, 1), end=datetime(2005, 1, 10),