Skip to content

Commit e456ab3

Browse files
author
Christoph Möhl
committed
BUG GH15150 crosstable normalize with multiindex
1 parent 53449fd commit e456ab3

File tree

2 files changed

+36
-28
lines changed

2 files changed

+36
-28
lines changed

pandas/core/reshape/pivot.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -537,35 +537,46 @@ def _normalize(table, normalize, margins, margins_name='All'):
537537
raise ValueError("Not a valid normalize argument")
538538

539539
if margins is False:
540-
541540
# Actual Normalizations
542541
normalizers = {
543542
'all': lambda x: x / x.sum(axis=1).sum(axis=0),
544543
'columns': lambda x: x / x.sum(),
545544
'index': lambda x: x.div(x.sum(axis=1), axis=0)
546545
}
547-
546+
548547
elif margins is True:
549-
#skip margin rows and/or cols for normalization
548+
# skip margin rows and/or cols for normalization
550549
normalizers = {
551-
'all': lambda x: x / x.iloc[:-1,:-1].sum(axis=1).sum(axis=0),
552-
'columns': lambda x: x.div(x.iloc[:-1,:].sum()).iloc[:-1,:],
553-
'index': lambda x: (x.div(x.iloc[:,:-1].sum(axis=1), axis=0)).iloc[:,:-1]
550+
'all': lambda x: x / x.iloc[:-1, :-1].sum(axis=1).sum(axis=0),
551+
'columns': lambda x: x.div(x.iloc[:-1, :].sum()).iloc[:-1, :],
552+
'index': lambda x: (x.div(x.iloc[:, :-1].sum(axis=1),
553+
axis=0)).iloc[:, :-1]
554554
}
555555

556556
else:
557-
raise ValueError("Not a valid margins argument")
557+
raise ValueError("Not a valid margins argument")
558558

559559
normalizers[True] = normalizers['all']
560560

561561
try:
562562
f = normalizers[normalize]
563563
except KeyError:
564564
raise ValueError("Not a valid normalize argument")
565-
565+
566566
table = f(table)
567567
table = table.fillna(0)
568-
568+
569+
if margins is True:
570+
# reset index to ensure default index dtype
571+
if normalize == 'index':
572+
colnames = table.columns.names
573+
table.columns = Index(table.columns.tolist())
574+
table.columns.names = colnames
575+
if normalize == 'columns':
576+
rownames = table.index.names
577+
table.index = Index(table.index.tolist())
578+
table.index.names = rownames
579+
569580
return table
570581

571582

pandas/tests/reshape/test_pivot.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,21 +1266,18 @@ def test_crosstab_normalize(self):
12661266
[0.25, 0.75],
12671267
[0.4, 0.6]],
12681268
index=pd.Index([1, 2, 'All'],
1269-
name='a',
1270-
dtype='object'),
1269+
name='a'),
12711270
columns=pd.Index([3, 4], name='b'))
12721271
col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
1273-
index=pd.Index([1, 2], name='a',
1274-
dtype='object'),
1272+
index=pd.Index([1, 2], name='a'),
12751273
columns=pd.Index([3, 4, 'All'],
12761274
name='b'))
12771275

12781276
all_normal_margins = pd.DataFrame([[0.2, 0, 0.2],
12791277
[0.2, 0.6, 0.8],
12801278
[0.4, 0.6, 1]],
12811279
index=pd.Index([1, 2, 'All'],
1282-
name='a',
1283-
dtype='object'),
1280+
name='a'),
12841281
columns=pd.Index([3, 4, 'All'],
12851282
name='b'))
12861283
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index',
@@ -1327,10 +1324,10 @@ def test_crosstab_normalize(self):
13271324

13281325
def test_crosstab_norm_margins_with_multiindex(self):
13291326
# GH 15150
1330-
a = np.array(['foo', 'bar', 'foo', 'bar','bar', 'foo'])
1331-
b = np.array(['one', 'one', 'two', 'one','two', 'two'])
1332-
c = np.array(['dull', 'shiny', 'dull', 'dull','dull', 'shiny'])
1333-
d = np.array(['a', 'a', 'b', 'a','b', 'b'])
1327+
a = np.array(['foo', 'bar', 'foo', 'bar', 'bar', 'foo'])
1328+
b = np.array(['one', 'one', 'two', 'one', 'two', 'two'])
1329+
c = np.array(['dull', 'shiny', 'dull', 'dull', 'dull', 'shiny'])
1330+
d = np.array(['a', 'a', 'b', 'a', 'b', 'b'])
13341331
expected_col_colnorm = MultiIndex(levels=[['All', 'dull', 'shiny'],
13351332
['', 'a', 'b']],
13361333
labels=[[1, 1, 2, 2, 0],
@@ -1365,11 +1362,11 @@ def test_crosstab_norm_margins_with_multiindex(self):
13651362
[.33333333, .33333333,
13661363
.16666667, .16666667]])
13671364
expected_indexnorm = pd.DataFrame(expected_data_indexnorm,
1368-
index=expected_index_indexnorm,
1369-
columns=expected_col_indexnorm)
1365+
index=expected_index_indexnorm,
1366+
columns=expected_col_indexnorm)
13701367
expected_data_allnorm = np.array([[0.16666667, 0., .16666667,
13711368
0., .33333333],
1372-
[0. ,.16666667, 0.,
1369+
[0., .16666667, 0.,
13731370
0., .16666667],
13741371
[.16666667, 0., 0.,
13751372
0., .16666667],
@@ -1378,15 +1375,15 @@ def test_crosstab_norm_margins_with_multiindex(self):
13781375
[0.33333333, .33333333, .16666667,
13791376
.16666667, 1.]])
13801377
expected_allnorm = pd.DataFrame(expected_data_allnorm,
1381-
index=expected_index_indexnorm,
1382-
columns=expected_col_colnorm)
1378+
index=expected_index_indexnorm,
1379+
columns=expected_col_colnorm)
13831380

1384-
result_colnorm = pd.crosstab([a, b], [c,d], normalize='columns',
1381+
result_colnorm = pd.crosstab([a, b], [c, d], normalize='columns',
13851382
margins=True)
1386-
result_indexnorm = pd.crosstab([a, b], [c,d], normalize='index',
1387-
margins=True)
1388-
result_allnorm = pd.crosstab([a, b], [c,d], normalize='all',
1383+
result_indexnorm = pd.crosstab([a, b], [c, d], normalize='index',
13891384
margins=True)
1385+
result_allnorm = pd.crosstab([a, b], [c, d], normalize='all',
1386+
margins=True)
13901387

13911388
tm.assert_frame_equal(result_colnorm, expected_colnorm)
13921389
tm.assert_frame_equal(result_indexnorm, expected_indexnorm)

0 commit comments

Comments
 (0)