Skip to content

Commit 2e3f8e0

Browse files
committed
fix indexes dropna=false
1 parent 0f38f43 commit 2e3f8e0

File tree

2 files changed

+63
-10
lines changed

2 files changed

+63
-10
lines changed

pandas/tools/pivot.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pandas.tools.util import cartesian_product
1010
from pandas.compat import range, lrange, zip
1111
from pandas import compat
12+
from pandas import isnull
1213
import pandas.core.common as com
1314
import numpy as np
1415

@@ -81,9 +82,21 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
8182
DataFrame.pivot : pivot without aggregation that can handle
8283
non-numeric data
8384
"""
85+
pd_null = "_null_pd"
86+
8487
index = _convert_by(index)
8588
columns = _convert_by(columns)
8689

90+
keys = index + columns
91+
92+
if not dropna:
93+
key_data = np.array(data[keys], dtype='object')
94+
_data_null_idx = isnull(key_data)
95+
_data_null_val = key_data[_data_null_idx]
96+
key_data[_data_null_idx] = pd_null
97+
for idx, k in enumerate(keys):
98+
data[k] = key_data[:, idx]
99+
87100
if isinstance(aggfunc, list):
88101
pieces = []
89102
keys = []
@@ -96,8 +109,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
96109
keys.append(func.__name__)
97110
return concat(pieces, keys=keys, axis=1)
98111

99-
keys = index + columns
100-
101112
values_passed = values is not None
102113
if values_passed:
103114
if is_list_like(values):
@@ -180,6 +191,27 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
180191
if len(index) == 0 and len(columns) > 0:
181192
table = table.T
182193

194+
if not dropna:
195+
if _data_null_val.size > 0:
196+
def _convert_null_vals(indexes):
197+
if isinstance(indexes, MultiIndex):
198+
_new_level = []
199+
for _tmp_index in indexes.levels:
200+
tmp = np.array(_tmp_index)
201+
tmp[tmp == pd_null] = _data_null_val[0]
202+
_new_level.append(Index(tmp, name=_tmp_index.name))
203+
indexes = MultiIndex(levels=_new_level,
204+
labels=indexes.labels,
205+
names=indexes.names)
206+
else:
207+
tmp = np.array(indexes)
208+
tmp[tmp == pd_null] = _data_null_val[0]
209+
indexes = Index(tmp, name=indexes.name)
210+
return indexes
211+
212+
table.columns = _convert_null_vals(table.columns)
213+
table.index = _convert_null_vals(table.index)
214+
183215
return table
184216

185217

pandas/tools/tests/test_pivot.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,26 @@ def test_pivot_table_dropna(self):
8787
tm.assert_index_equal(pv_col.columns, m)
8888
tm.assert_index_equal(pv_ind.index, m)
8989

90+
df = DataFrame([[1, 'a', 'A'], [1, 'b', 'B'], [1, 'c', None]],
91+
columns=['x', 'y', 'z'])
92+
actual = df.pivot_table(values='x', index='y', columns='z',
93+
aggfunc='sum', fill_value=0, margins=True,
94+
dropna=True)
95+
expected = pd.DataFrame([[1.0, 0.0, 1.0], [0.0, 1.0, 1.0],
96+
[1.0, 1.0, 2.0]])
97+
expected.index = Index(['a', 'b', 'All'], name='y')
98+
expected.columns = Index(['A', 'B', 'All'], name='z')
99+
tm.assert_frame_equal(actual, expected)
100+
101+
actual = df.pivot_table(values='x', index='y', columns='z',
102+
aggfunc='sum', fill_value=0, margins=True,
103+
dropna=False)
104+
expected = pd.DataFrame([[1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 1.0],
105+
[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 1.0, 3.0]])
106+
expected.index = Index(['a', 'b', 'c', 'All'], name='y')
107+
expected.columns = Index(['A', 'B', None, 'All'], name='z')
108+
tm.assert_frame_equal(actual, expected)
109+
90110
def test_pass_array(self):
91111
result = self.data.pivot_table(
92112
'D', index=self.data.A, columns=self.data.C)
@@ -1080,7 +1100,8 @@ def test_margin_dropna(self):
10801100
df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
10811101
'b': [3, np.nan, 4, 4, 4, 4]})
10821102
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
1083-
expected = pd.DataFrame([[1, 0, 0, 1], [0, 1, 0, 1], [0, 3, 1, 4], [1, 4, 1, 6]])
1103+
expected = pd.DataFrame([[1, 0, 0, 1], [0, 1, 0, 1], [0, 3, 1, 4],
1104+
[1, 4, 1, 6]])
10841105
expected.index = Index([1.0, 2.0, np.nan, 'All'], name='a')
10851106
expected.columns = Index([3.0, 4.0, np.nan, 'All'], name='b')
10861107
tm.assert_frame_equal(actual, expected)
@@ -1095,8 +1116,9 @@ def test_margin_dropna(self):
10951116
actual = pd.crosstab(a, [b, c], rownames=['a'],
10961117
colnames=['b', 'c'], margins=True, dropna=False)
10971118

1098-
m = MultiIndex(levels = [Index(['All', np.nan, 'one', 'two']),
1099-
Index(['', 'dull', 'shiny'])], labels = [[1, 1, 2, 2, 3, 3, 0],
1119+
m = MultiIndex(levels=[Index(['All', np.nan, 'one', 'two']),
1120+
Index(['', 'dull', 'shiny'])],
1121+
labels=[[1, 1, 2, 2, 3, 3, 0],
11001122
[1, 2, 1, 2, 1, 2, 0]], names=['b', 'c'])
11011123
expected = DataFrame([[0, 0, 1, 0, 1, 0, 2], [0, 1, 2, 0, 1, 1, 5],
11021124
[0, 1, 3, 0, 2, 1, 7]], columns=m)
@@ -1105,13 +1127,12 @@ def test_margin_dropna(self):
11051127

11061128
actual = pd.crosstab([a, b], c, rownames=['a', 'b'],
11071129
colnames=['c'], margins=True, dropna=False)
1108-
1109-
print actual.index
1110-
m = MultiIndex(levels=[['All', 'bar', 'foo'], ['', np.nan, 'one', 'two']],
1130+
m = MultiIndex(levels=[['All', 'bar', 'foo'],
1131+
['', np.nan, 'one', 'two']],
11111132
labels=[[1, 1, 1, 2, 2, 2, 0], [1, 2, 3, 1, 2, 3, 0]],
11121133
names=['a', 'b'])
1113-
expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1], [2, 0, 2], [1, 1, 2],
1114-
[5, 2, 7]], index=m)
1134+
expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1],
1135+
[2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m)
11151136
expected.columns = Index(['dull', 'shiny', 'All'], name='c')
11161137
tm.assert_frame_equal(actual, expected)
11171138

0 commit comments

Comments
 (0)