diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 3d96b93de4cc9..f114e8f7c5769 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -128,7 +128,7 @@ Property Attributes ``Series`` ``DataFrame`` ``Panel ``_constructor_expanddim`` ``DataFrame`` ``Panel`` ``NotImplementedError`` =========================== ======================= =================== ======================= -Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties. +The below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties: .. code-block:: python @@ -152,6 +152,8 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame def _constructor_sliced(self): return SubclassedSeries +Overriding constructor properties allows subclass families to be preserved across slice and reshape operations: + .. code-block:: python >>> s = SubclassedSeries([1, 2, 3]) @@ -162,7 +164,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame >>> type(to_framed) - >>> df = SubclassedDataFrame({'A', [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) + >>> df = SubclassedDataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) >>> df A B C 0 1 4 7 @@ -190,6 +192,21 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame >>> type(sliced2) + >>> stacked = df.stack() + >>> stacked + 0 A 1 + B 4 + C 7 + 1 A 2 + B 5 + C 8 + 2 A 3 + B 6 + C 9 + dtype: int64 + >>> type(stacked) + + Define Original Properties ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 725dc7fc52ed0..f101f9dff3e2e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -228,6 +228,7 @@ Other enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) +- `stack`, `unstack`, and `pivot` operations now preserve subclass family (:issue:`15563`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 7bcd9f2d30b79..1c612965d3367 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -371,8 +371,11 @@ def pivot(self, index=None, columns=None, values=None): index = self.index else: index = self[index] - indexed = Series(self[values].values, - index=MultiIndex.from_arrays([index, self[columns]])) + + indexed = self._constructor_sliced( + self[values].values, + index=MultiIndex.from_arrays([index, self[columns]])) + return indexed.unstack(columns) @@ -448,13 +451,24 @@ def unstack(obj, level, fill_value=None): if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): - return _unstack_frame(obj, level, fill_value=fill_value) + unstacked = _unstack_frame(obj, level, fill_value=fill_value) + else: + unstacked = obj.T.stack(dropna=False) + + if len(unstacked.shape) == 1: + return obj._constructor_sliced(unstacked) else: - return obj.T.stack(dropna=False) + return obj._constructor(unstacked) + else: unstacker = _Unstacker(obj.values, obj.index, level=level, fill_value=fill_value) - return unstacker.get_result() + unstacked = unstacker.get_result() + + if len(unstacked.shape) == 1: + return obj._constructor(unstacked) + else: + return obj._constructor_expanddim(unstacked) def _unstack_frame(obj, level, fill_value=None): @@ -553,7 +567,7 @@ def factorize(index): mask = notnull(new_values) new_values = new_values[mask] new_index = new_index[mask] - return Series(new_values, index=new_index) + return frame._constructor_sliced(new_values, index=new_index) def stack_multiple(frame, level, dropna=True): @@ -692,7 +706,7 @@ def _convert_level_number(level_num, columns): new_index = MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False) - result = DataFrame(new_data, index=new_index, columns=new_columns) + result = frame._constructor(new_data, index=new_index, columns=new_columns) # more efficient way to go about this? can do the whole masking biz but # will only save a small amount of time... @@ -852,7 +866,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, mdata[col] = np.asanyarray(frame.columns ._get_level_values(i)).repeat(N) - return DataFrame(mdata, columns=mcolumns) + return frame._constructor(mdata, columns=mcolumns) def lreshape(data, groups, dropna=True, label=None): @@ -921,7 +935,7 @@ def lreshape(data, groups, dropna=True, label=None): if not mask.all(): mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) - return DataFrame(mdata, columns=id_cols + pivot_cols) + return data._constructor(mdata, columns=id_cols + pivot_cols) def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 9052a16bf973c..d15c11a3b740f 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -4,7 +4,7 @@ import numpy as np -from pandas import DataFrame, Series, MultiIndex, Panel +from pandas import DataFrame, Series, MultiIndex, Panel, Index import pandas as pd import pandas.util.testing as tm @@ -125,6 +125,209 @@ def test_indexing_sliced(self): tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries) + def test_subclass_stack(self): +<<<<<<< HEAD + # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.stack() + exp = tm.SubclassedSeries( + [1, 2, 3, 4, 5, 6, 7, 8, 9], + index=[list('aaabbbccc'), list('XYZXYZXYZ')]) + + tm.assert_series_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedSeries) + + def test_subclass_stack_multi(self): +<<<<<<< HEAD + # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( +<<<<<<< HEAD + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), +======= + zip(list('AABB'), list('cdcd')), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWXX'), list('yzyz')), +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 12], + [11, 13], + [20, 22], + [21, 23], + [30, 32], + [31, 33], + [40, 42], + [41, 43]], +<<<<<<< HEAD + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), + names=['aaa', 'ccc', 'yyy']), + columns=Index(['W', 'X'], name='www')) +======= + index=MultiIndex.from_tuples( + zip(list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz')), + names=['aaa', 'ccc', 'yyy']), + columns=MultiIndex.from_tuples( + zip(list('WX')), names=['www'])) +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + + res = df.stack() + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + res = df.stack('yyy') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + exp = tm.SubclassedDataFrame([ + [10, 11], + [12, 13], + [20, 21], + [22, 23], + [30, 31], + [32, 33], + [40, 41], + [42, 43]], +<<<<<<< HEAD + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), + names=['aaa', 'ccc', 'www']), + columns=Index(['y', 'z'], name='yyy')) +======= + index=MultiIndex.from_tuples( + zip(list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX')), + names=['aaa', 'ccc', 'www']), + columns=MultiIndex.from_tuples( + zip(list('yz')), names=['yyy'])) +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + + res = df.stack('www') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + def test_subclass_unstack(self): +<<<<<<< HEAD + # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.unstack() + exp = tm.SubclassedSeries( + [1, 4, 7, 2, 5, 8, 3, 6, 9], + index=[list('XXXYYYZZZ'), list('abcabcabc')]) + + tm.assert_series_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedSeries) + + def test_subclass_unstack_multi(self): +<<<<<<< HEAD + # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( +<<<<<<< HEAD + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), +======= + zip(list('AABB'), list('cdcd')), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWXX'), list('yzyz')), +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 20, 11, 21, 12, 22, 13, 23], + [30, 40, 31, 41, 32, 42, 33, 43]], +<<<<<<< HEAD + index=Index(['A', 'B'], name='aaa'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), + names=['www', 'yyy', 'ccc'])) +======= + index=MultiIndex.from_tuples( + zip(list('AB')), names=['aaa']), + columns=MultiIndex.from_tuples( + zip(list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd')), + names=['www', 'yyy', 'ccc'])) +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + + res = df.unstack() + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + res = df.unstack('ccc') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + exp = tm.SubclassedDataFrame([ + [10, 30, 11, 31, 12, 32, 13, 33], + [20, 40, 21, 41, 22, 42, 23, 43]], +<<<<<<< HEAD + index=Index(['c', 'd'], name='ccc'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), +======= + index=MultiIndex.from_tuples( + zip(list('cd')), names=['ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB')), +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + names=['www', 'yyy', 'aaa'])) + + res = df.unstack('aaa') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + def test_subclass_pivot(self): +<<<<<<< HEAD + # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex + df = tm.SubclassedDataFrame({ + 'index': ['A', 'B', 'C', 'C', 'B', 'A'], + 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], + 'values': [1., 2., 3., 3., 2., 1.]}) + + pivoted = df.pivot( + index='index', columns='columns', values='values') + + expected = tm.SubclassedDataFrame({ + 'One': {'A': 1., 'B': 2., 'C': 3.}, + 'Two': {'A': 1., 'B': 2., 'C': 3.}}) + + expected.index.name, expected.columns.name = 'index', 'columns' + + tm.assert_frame_equal(pivoted, expected) + tm.assertIsInstance(pivoted, tm.SubclassedDataFrame) + def test_to_panel_expanddim(self): # GH 9762 diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 3b1b8aca426e1..5ba38f45ba093 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -32,6 +32,18 @@ def test_to_frame(self): tm.assert_frame_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedDataFrame) + def test_subclass_unstack(self): + # GH 15564 + s = tm.SubclassedSeries( + [1, 2, 3, 4], index=[list('aabb'), list('xyxy')]) + + res = s.unstack() + exp = tm.SubclassedDataFrame( + {'x': [1, 3], 'y': [2, 4]}, index=['a', 'b']) + + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + class TestSparseSeriesSubclassing(tm.TestCase): @@ -76,6 +88,7 @@ def test_subclass_sparse_addition(self): tm.assert_sp_series_equal(s1 + s2, exp) def test_subclass_sparse_to_frame(self): + # GH 15564 s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx') res = s.to_frame()