Skip to content

ENH ohlc resample for DataFrame #4740

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 10, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pandas 0.13
- Significant table writing performance improvements in ``HDFStore``
- JSON date serialisation now performed in low-level C code.
- Add ``drop_level`` argument to xs (:issue:`4180`)
- Can now resample a DataFrame with ohlc (:issue:`2320`)
- ``Index.copy()`` and ``MultiIndex.copy()`` now accept keyword arguments to
change attributes (i.e., ``names``, ``levels``, ``labels``)
(:issue:`4039`)
Expand Down
21 changes: 20 additions & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1619,7 +1619,6 @@ def _iterate_slices(self):
for val in slice_axis:
if val in self.exclusions:
continue

yield val, slicer(val)

def _cython_agg_general(self, how, numeric_only=True):
Expand Down Expand Up @@ -2233,6 +2232,26 @@ def _wrap_agged_blocks(self, blocks):

return result.convert_objects()

def _iterate_column_groupbys(self):
for i, colname in enumerate(self.obj.columns):
yield colname, SeriesGroupBy(self.obj.iloc[:, i], selection=colname,
grouper=self.grouper,
exclusions=self.exclusions)

def _apply_to_column_groupbys(self, func):
from pandas.tools.merge import concat
return concat((func(col_groupby)
for _, col_groupby in self._iterate_column_groupbys()),
keys=self.obj.columns,
axis=1)

def ohlc(self):
"""
Compute sum of values, excluding missing values

For multiple groupings, the result index will be a MultiIndex
"""
return self._apply_to_column_groupbys(lambda x: x._cython_agg_general('ohlc'))

from pandas.tools.plotting import boxplot_frame_groupby
DataFrameGroupBy.boxplot = boxplot_frame_groupby
Expand Down
25 changes: 25 additions & 0 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,31 @@ def test_resample_ohlc(self):
self.assertEquals(xs['low'], s[:5].min())
self.assertEquals(xs['close'], s[4])

def test_resample_ohlc_dataframe(self):
df = (pd.DataFrame({'PRICE': {Timestamp('2011-01-06 10:59:05', tz=None): 24990,
Timestamp('2011-01-06 12:43:33', tz=None): 25499,
Timestamp('2011-01-06 12:54:09', tz=None): 25499},
'VOLUME': {Timestamp('2011-01-06 10:59:05', tz=None): 1500000000,
Timestamp('2011-01-06 12:43:33', tz=None): 5000000000,
Timestamp('2011-01-06 12:54:09', tz=None): 100000000}})
).reindex_axis(['VOLUME', 'PRICE'], axis=1)
res = df.resample('H', how='ohlc')
exp = pd.concat([df['VOLUME'].resample('H', how='ohlc'),
df['PRICE'].resample('H', how='ohlc')],
axis=1,
keys=['VOLUME', 'PRICE'])
assert_frame_equal(exp, res)

df.columns = [['a', 'b'], ['c', 'd']]
res = df.resample('H', how='ohlc')
exp.columns = pd.MultiIndex.from_tuples([('a', 'c', 'open'), ('a', 'c', 'high'),
('a', 'c', 'low'), ('a', 'c', 'close'), ('b', 'd', 'open'),
('b', 'd', 'high'), ('b', 'd', 'low'), ('b', 'd', 'close')])
assert_frame_equal(exp, res)

# dupe columns fail atm
# df.columns = ['PRICE', 'PRICE']

def test_resample_reresample(self):
dti = DatetimeIndex(
start=datetime(2005, 1, 1), end=datetime(2005, 1, 10),
Expand Down