From d1554c2b96d66f69cc4a21a831077b3875995754 Mon Sep 17 00:00:00 2001 From: benjamin Date: Sat, 26 Jan 2019 17:23:58 +0000 Subject: [PATCH 01/20] Update to frame and pivot to accept observed kwarg to pass to groupby calls for categorical data. --- pandas/core/frame.py | 9 +++++++-- pandas/core/reshape/pivot.py | 7 ++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4f79bda25517..8a3e2b54714e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5669,6 +5669,11 @@ def pivot(self, index=None, columns=None, values=None): margins_name : string, default 'All' Name of the row / column that will contain the totals when margins is True. + observed : boolean, default False + This option is passed to calls to groupby. + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. Returns ------- @@ -5760,12 +5765,12 @@ def pivot(self, index=None, columns=None, values=None): @Appender(_shared_docs['pivot_table']) def pivot_table(self, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, - dropna=True, margins_name='All'): + dropna=True, margins_name='All', observed=False): from pandas.core.reshape.pivot import pivot_table return pivot_table(self, values=values, index=index, columns=columns, aggfunc=aggfunc, fill_value=fill_value, margins=margins, dropna=dropna, - margins_name=margins_name) + margins_name=margins_name, observed=observed) def stack(self, level=-1, dropna=True): """ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index c7c447d18b6b1..1adaa8bc5cc1c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -24,7 +24,7 @@ @Appender(_shared_docs['pivot_table'], indents=1) def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, - margins_name='All'): + margins_name='All', observed=False): index = _convert_by(index) columns = _convert_by(columns) @@ -35,7 +35,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = pivot_table(data, values=values, index=index, columns=columns, fill_value=fill_value, aggfunc=func, - margins=margins, margins_name=margins_name) + margins=margins, margins_name=margins_name, + observed=observed) pieces.append(table) keys.append(getattr(func, '__name__', func)) @@ -78,7 +79,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', pass values = list(values) - grouped = data.groupby(keys, observed=False) + grouped = data.groupby(keys, observed=observed) agged = grouped.agg(aggfunc) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): agged = agged.dropna(how='all') From 0662fa3d91d304d585a69da7144f3b0cf50917f1 Mon Sep 17 00:00:00 2001 From: benjamin Date: Sat, 26 Jan 2019 17:56:01 +0000 Subject: [PATCH 02/20] Addition of whatsnew entry. --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 16319a3b83ca4..51e62c6e074b5 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -426,6 +426,7 @@ Other Enhancements - :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method ` section in the documentation. (:issue:`8953`) - :meth:`DataFrame.corrwith` now supports Spearman's rank correlation, Kendall's tau as well as callable correlation methods. (:issue:`21925`) - :meth:`DataFrame.to_json`, :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_pickle`, and other export methods now support tilde(~) in path argument. (:issue:`23473`) +- :meth:`DataFrame.pivot_table` now accepts ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`) .. _whatsnew_0240.api_breaking: From 612131390379ca25900edd76d32f1bf58bf69aac Mon Sep 17 00:00:00 2001 From: benjamin Date: Sat, 26 Jan 2019 22:44:24 +0000 Subject: [PATCH 03/20] Added change to pass observed fixture to pivot_table test, added change version to docstring and updated correct whatsnew. --- doc/source/whatsnew/v0.24.0.rst | 1 - doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/frame.py | 2 ++ pandas/tests/reshape/test_pivot.py | 8 ++++---- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 51e62c6e074b5..16319a3b83ca4 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -426,7 +426,6 @@ Other Enhancements - :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method ` section in the documentation. (:issue:`8953`) - :meth:`DataFrame.corrwith` now supports Spearman's rank correlation, Kendall's tau as well as callable correlation methods. (:issue:`21925`) - :meth:`DataFrame.to_json`, :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_pickle`, and other export methods now support tilde(~) in path argument. (:issue:`23473`) -- :meth:`DataFrame.pivot_table` now accepts ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`) .. _whatsnew_0240.api_breaking: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 5129449e4fdf3..073f5fca512a1 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -19,7 +19,7 @@ including other versions of pandas. Other Enhancements ^^^^^^^^^^^^^^^^^^ -- +:meth:`DataFrame.pivot_table` now accepts ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`) - - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8a3e2b54714e9..30621e39c17b8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5675,6 +5675,8 @@ def pivot(self, index=None, columns=None, values=None): If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. + .. versionchanged :: 0.25.0 + Returns ------- table : DataFrame diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e4fbb204af533..91ebdd4b3390c 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -38,18 +38,18 @@ def setup_method(self, method): 'E': np.random.randn(11), 'F': np.random.randn(11)}) - def test_pivot_table(self): + def test_pivot_table(self, observed): index = ['A', 'B'] columns = 'C' table = pivot_table(self.data, values='D', - index=index, columns=columns) + index=index, columns=columns, observed=observed) table2 = self.data.pivot_table( - values='D', index=index, columns=columns) + values='D', index=index, columns=columns, observed=observed) tm.assert_frame_equal(table, table2) # this works - pivot_table(self.data, values='D', index=index) + pivot_table(self.data, values='D', index=index, observed=observed) if len(index) > 1: assert table.index.names == tuple(index) From 9f93ab93bb8e435f7d38534ea0f555f668bf647d Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 30 Jan 2019 20:49:54 +0000 Subject: [PATCH 04/20] Added "an" to whatsnew and added example from original issue to the test - this checks passing observed parameter remains equivalent to not passing. --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/tests/reshape/test_pivot.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 073f5fca512a1..334b520e6479e 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -19,7 +19,7 @@ including other versions of pandas. Other Enhancements ^^^^^^^^^^^^^^^^^^ -:meth:`DataFrame.pivot_table` now accepts ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`) +:meth:`DataFrame.pivot_table` now accepts an ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`) - - diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 91ebdd4b3390c..f05466b8b8859 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -65,6 +65,23 @@ def test_pivot_table(self, observed): index + [columns])['D'].agg(np.mean).unstack() tm.assert_frame_equal(table, expected) + df = pd.DataFrame({'col1': list('abcde'), + 'col2': list('fghij'), + 'col3': [1, 2, 3, 4, 5]}) + + df.col1 = df.col1.astype('category') + df.col2 = df.col1.astype('category') + + df_pivot = df.pivot_table(index='col1', values='col3', + columns='col2', aggfunc=np.sum, + fill_value=0) + + df_pivot_observed = df.pivot_table(index='col1', values='col3', + columns='col2', aggfunc=np.sum, + fill_value=0, observed=observed) + + tm.assert_frame_equal(df_pivot, df_pivot_observed) + def test_pivot_table_nocols(self): df = DataFrame({'rows': ['a', 'b', 'c'], 'cols': ['x', 'y', 'z'], From ebe597226c9204ece1cc2e3ba3636e869d8e118c Mon Sep 17 00:00:00 2001 From: benjamin Date: Thu, 31 Jan 2019 16:48:42 +0000 Subject: [PATCH 05/20] Removed unnecessary sentence. --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 30621e39c17b8..31e47745a9e57 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5670,7 +5670,6 @@ def pivot(self, index=None, columns=None, values=None): Name of the row / column that will contain the totals when margins is True. observed : boolean, default False - This option is passed to calls to groupby. This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. From 416e9c85cc8ff676fed92f1760cc9de0158a40d9 Mon Sep 17 00:00:00 2001 From: benjamin Date: Thu, 31 Jan 2019 17:17:18 +0000 Subject: [PATCH 06/20] Test separated into own test. Added issue comment. Updated df var names so more explicit in tm assertion call. --- pandas/tests/reshape/test_pivot.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f05466b8b8859..a6a99b3a0cd7c 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -65,6 +65,8 @@ def test_pivot_table(self, observed): index + [columns])['D'].agg(np.mean).unstack() tm.assert_frame_equal(table, expected) + def test_pivot_table_categorical_observed(self, observed): + # issue #24923 df = pd.DataFrame({'col1': list('abcde'), 'col2': list('fghij'), 'col3': [1, 2, 3, 4, 5]}) @@ -72,15 +74,15 @@ def test_pivot_table(self, observed): df.col1 = df.col1.astype('category') df.col2 = df.col1.astype('category') - df_pivot = df.pivot_table(index='col1', values='col3', + expected = df.pivot_table(index='col1', values='col3', columns='col2', aggfunc=np.sum, fill_value=0) - df_pivot_observed = df.pivot_table(index='col1', values='col3', - columns='col2', aggfunc=np.sum, - fill_value=0, observed=observed) + result = df.pivot_table(index='col1', values='col3', + columns='col2', aggfunc=np.sum, + fill_value=0, observed=observed) - tm.assert_frame_equal(df_pivot, df_pivot_observed) + tm.assert_frame_equal(result, expected) def test_pivot_table_nocols(self): df = DataFrame({'rows': ['a', 'b', 'c'], From 5c62063534a063f2d64e02ea9e817d052222ccaf Mon Sep 17 00:00:00 2001 From: benjamin Date: Thu, 31 Jan 2019 19:27:46 +0000 Subject: [PATCH 07/20] Addition of asv for pivot_table of categorical data with observed keyword. --- asv_bench/benchmarks/reshape.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index f6ee107ab618e..33f5bd43b2908 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -128,6 +128,10 @@ def setup(self): 'value2': np.random.randn(N), 'value3': np.random.randn(N)}) + self.df2 = pd.DataFrame({'col1': list('abcde'), 'col2': list('fghij'), 'col3': [1, 2, 3, 4, 5]}) + self.df2.col1 = self.df2.col1.astype('category') + self.df2.col2 = self.df2.col2.astype('category') + def time_pivot_table(self): self.df.pivot_table(index='key1', columns=['key2', 'key3']) @@ -139,6 +143,10 @@ def time_pivot_table_margins(self): self.df.pivot_table(index='key1', columns=['key2', 'key3'], margins=True) + def time_pivot_table_categorical(self): + self.df2.pivot_table(index='col1', values='col3', columns='col2', + aggfunc=np.sum, fill_value=0, observed=True) + class Crosstab(object): From 8663be262086ffc4257480ccedbe750a6624e73c Mon Sep 17 00:00:00 2001 From: benjamin Date: Thu, 31 Jan 2019 19:29:30 +0000 Subject: [PATCH 08/20] Resolve PEP8 issue. --- asv_bench/benchmarks/reshape.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 33f5bd43b2908..18cc016155863 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -128,7 +128,8 @@ def setup(self): 'value2': np.random.randn(N), 'value3': np.random.randn(N)}) - self.df2 = pd.DataFrame({'col1': list('abcde'), 'col2': list('fghij'), 'col3': [1, 2, 3, 4, 5]}) + self.df2 = pd.DataFrame({'col1': list('abcde'), 'col2': list('fghij'), + 'col3': [1, 2, 3, 4, 5]}) self.df2.col1 = self.df2.col1.astype('category') self.df2.col2 = self.df2.col2.astype('category') From 22637a35889ade2783e6ab82ede3865fdc02d123 Mon Sep 17 00:00:00 2001 From: benjamin Date: Sat, 2 Feb 2019 23:58:19 +0000 Subject: [PATCH 09/20] Minor adjustment to asv entry. --- asv_bench/benchmarks/reshape.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 18cc016155863..9a8da1f38c589 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -127,9 +127,8 @@ def setup(self): 'value1': np.random.randn(N), 'value2': np.random.randn(N), 'value3': np.random.randn(N)}) - - self.df2 = pd.DataFrame({'col1': list('abcde'), 'col2': list('fghij'), - 'col3': [1, 2, 3, 4, 5]}) + self.df2 = DataFrame({'col1': list('abcde'), 'col2': list('fghij'), + 'col3': [1, 2, 3, 4, 5]}) self.df2.col1 = self.df2.col1.astype('category') self.df2.col2 = self.df2.col2.astype('category') From c8e085d9d0ef89f3312291f7dd5454f396dd416f Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 13 Feb 2019 21:10:49 +0000 Subject: [PATCH 10/20] Triggering CI tests. From 2516386a034bb973c141882e51c6a869730c4d2f Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 13 Feb 2019 21:35:41 +0000 Subject: [PATCH 11/20] Triggering CI tests - attempt 2. From 12b8fac30429573755001c9e4cb5113751ee1b80 Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 20 Mar 2019 18:25:47 +0000 Subject: [PATCH 12/20] Triggering CI. From 09af30b1d3c90d9c2aa7c3f36c1885695cd4df5c Mon Sep 17 00:00:00 2001 From: benjamin Date: Fri, 22 Mar 2019 20:58:20 +0000 Subject: [PATCH 13/20] Addition of test to ensure observed pivots on categorial data are indeed faster than those which are set to False. --- pandas/tests/reshape/test_pivot.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index a6a99b3a0cd7c..7b2d7406fcbf8 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2,6 +2,7 @@ from collections import OrderedDict from datetime import date, datetime, timedelta +import time import numpy as np import pytest @@ -65,7 +66,7 @@ def test_pivot_table(self, observed): index + [columns])['D'].agg(np.mean).unstack() tm.assert_frame_equal(table, expected) - def test_pivot_table_categorical_observed(self, observed): + def test_pivot_table_categorical_observed_equal(self, observed): # issue #24923 df = pd.DataFrame({'col1': list('abcde'), 'col2': list('fghij'), @@ -84,6 +85,29 @@ def test_pivot_table_categorical_observed(self, observed): tm.assert_frame_equal(result, expected) + def test_pivot_table_categorical_observed_speed(self): + # issue #24923 + df = pd.DataFrame({'col1': list('abcde'), + 'col2': list('fghij'), + 'col3': [1, 2, 3, 4, 5]}) + + df.col1 = df.col1.astype('category') + df.col2 = df.col1.astype('category') + + start_time_observed_false = time.time() + df.pivot_table(index='col1', values='col3', + columns='col2', aggfunc=np.sum, + fill_value=0, observed=False) + total_time_observed_false = time.time() - start_time_observed_false + + start_time_observed_true = time.time() + df.pivot_table(index='col1', values='col3', + columns='col2', aggfunc=np.sum, + fill_value=0, observed=True) + total_time_observed_true = time.time() - start_time_observed_true + + assert total_time_observed_true < total_time_observed_false + def test_pivot_table_nocols(self): df = DataFrame({'rows': ['a', 'b', 'c'], 'cols': ['x', 'y', 'z'], From 6df9e6df7caaaf553ea62bb4415a3a48506bc0c3 Mon Sep 17 00:00:00 2001 From: benjamin Date: Sat, 23 Mar 2019 20:33:55 +0000 Subject: [PATCH 14/20] Removal of test that is otherwise handled by asv. --- pandas/tests/reshape/test_pivot.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 7b2d7406fcbf8..f313fe9ad342e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -85,29 +85,6 @@ def test_pivot_table_categorical_observed_equal(self, observed): tm.assert_frame_equal(result, expected) - def test_pivot_table_categorical_observed_speed(self): - # issue #24923 - df = pd.DataFrame({'col1': list('abcde'), - 'col2': list('fghij'), - 'col3': [1, 2, 3, 4, 5]}) - - df.col1 = df.col1.astype('category') - df.col2 = df.col1.astype('category') - - start_time_observed_false = time.time() - df.pivot_table(index='col1', values='col3', - columns='col2', aggfunc=np.sum, - fill_value=0, observed=False) - total_time_observed_false = time.time() - start_time_observed_false - - start_time_observed_true = time.time() - df.pivot_table(index='col1', values='col3', - columns='col2', aggfunc=np.sum, - fill_value=0, observed=True) - total_time_observed_true = time.time() - start_time_observed_true - - assert total_time_observed_true < total_time_observed_false - def test_pivot_table_nocols(self): df = DataFrame({'rows': ['a', 'b', 'c'], 'cols': ['x', 'y', 'z'], From a23b5d084dd2d18407269a26b753bc4fd2c5a2b4 Mon Sep 17 00:00:00 2001 From: benjamin Date: Sat, 23 Mar 2019 22:00:48 +0000 Subject: [PATCH 15/20] Extra asv benchmark to see difference between pivots on categorical data when observed is passed as True and when it defaults to False. --- asv_bench/benchmarks/reshape.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 9a8da1f38c589..0ee9cf13a3dbf 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -145,8 +145,11 @@ def time_pivot_table_margins(self): def time_pivot_table_categorical(self): self.df2.pivot_table(index='col1', values='col3', columns='col2', - aggfunc=np.sum, fill_value=0, observed=True) + aggfunc=np.sum, fill_value=0) + def time_pivot_table_categorical_observed(self): + self.df2.pivot_table(index='col1', values='col3', columns='col2', + aggfunc=np.sum, fill_value=0, observed=True) class Crosstab(object): From 8d50e8557db66909529a99022a54ed8384c5f557 Mon Sep 17 00:00:00 2001 From: benjamin Date: Sat, 23 Mar 2019 22:01:50 +0000 Subject: [PATCH 16/20] Removal of import time. --- pandas/tests/reshape/test_pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f313fe9ad342e..16b5c98617008 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2,7 +2,6 @@ from collections import OrderedDict from datetime import date, datetime, timedelta -import time import numpy as np import pytest From 3d39dff6cbb9eafd85ca0c5aa0b702079bbbc776 Mon Sep 17 00:00:00 2001 From: benjamin Date: Sat, 23 Mar 2019 22:59:19 +0000 Subject: [PATCH 17/20] Fix pep8 issue. --- asv_bench/benchmarks/reshape.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 0ee9cf13a3dbf..e444ef41f0532 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -151,6 +151,7 @@ def time_pivot_table_categorical_observed(self): self.df2.pivot_table(index='col1', values='col3', columns='col2', aggfunc=np.sum, fill_value=0, observed=True) + class Crosstab(object): def setup(self): From cf7e8f5b39be2d098a2298ccc29361b1fc7369fb Mon Sep 17 00:00:00 2001 From: Benjamin Rowell Date: Sat, 20 Apr 2019 22:26:29 +0100 Subject: [PATCH 18/20] Trailing whitespace fix. --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 48bdb6a21b662..be0d74b460850 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -33,7 +33,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = pivot_table(data, values=values, index=index, columns=columns, fill_value=fill_value, aggfunc=func, - margins=margins, dropna=dropna, + margins=margins, dropna=dropna, margins_name=margins_name, observed=observed) pieces.append(table) From a3bcf1a85653b153ba9d0004b42319808223a507 Mon Sep 17 00:00:00 2001 From: Benjamin Rowell Date: Sun, 21 Apr 2019 22:09:11 +0100 Subject: [PATCH 19/20] Setting categorical datatype after calc on expected. --- pandas/tests/reshape/test_pivot.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 726a1e5da48d0..64374cd9646eb 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -70,13 +70,16 @@ def test_pivot_table_categorical_observed_equal(self, observed): 'col2': list('fghij'), 'col3': [1, 2, 3, 4, 5]}) - df.col1 = df.col1.astype('category') - df.col2 = df.col1.astype('category') - expected = df.pivot_table(index='col1', values='col3', columns='col2', aggfunc=np.sum, fill_value=0) + expected.index = expected.index.astype('category') + expected.columns = expected.columns.astype('category') + + df.col1 = df.col1.astype('category') + df.col2 = df.col2.astype('category') + result = df.pivot_table(index='col1', values='col3', columns='col2', aggfunc=np.sum, fill_value=0, observed=observed) From 5921646a01fe4b2a3554985943640d993293296a Mon Sep 17 00:00:00 2001 From: Benjamin Rowell Date: Mon, 22 Apr 2019 08:17:44 +0100 Subject: [PATCH 20/20] Empty commit to trigger CI.