Skip to content

Commit 72be37b

Browse files
agrabosojreback
authored andcommitted
BUG: allow describe() for DataFrames with only boolean columns
closes #13891 Author: agraboso <[email protected]> Closes #13898 from agraboso/fix-13891 and squashes the following commits: 26201aa [agraboso] BUG: allow describe() for DataFrames with only boolean columns
1 parent cff1f55 commit 72be37b

File tree

4 files changed

+58
-4
lines changed

4 files changed

+58
-4
lines changed

doc/source/whatsnew/v0.19.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,7 @@ Bug Fixes
885885
- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`)
886886

887887
- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`)
888+
- Bug in ``DataFrame.describe()`` raising ``ValueError`` with only boolean columns (:issue:`13898`)
888889
- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
889890
- Bug in ``.str.replace`` does not raise ``TypeError`` for invalid replacement (:issue:`13438`)
890891
- Bug in ``MultiIndex.from_arrays`` which didn't check for input array lengths matching (:issue:`13599`)

pandas/core/generic.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -5138,10 +5138,9 @@ def describe_1d(data):
51385138
if self.ndim == 1:
51395139
return describe_1d(self)
51405140
elif (include is None) and (exclude is None):
5141-
if len(self._get_numeric_data()._info_axis) > 0:
5142-
# when some numerics are found, keep only numerics
5143-
data = self.select_dtypes(include=[np.number])
5144-
else:
5141+
# when some numerics are found, keep only numerics
5142+
data = self.select_dtypes(include=[np.number])
5143+
if len(data.columns) == 0:
51455144
data = self
51465145
elif include == 'all':
51475146
if exclude is not None:

pandas/tests/frame/test_analytics.py

+33
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,39 @@ def test_bool_describe_in_mixed_frame(self):
249249
index=['count', 'unique', 'top', 'freq'])
250250
tm.assert_frame_equal(result, expected)
251251

252+
def test_describe_bool_frame(self):
253+
# GH 13891
254+
df = pd.DataFrame({
255+
'bool_data_1': [False, False, True, True],
256+
'bool_data_2': [False, True, True, True]
257+
})
258+
result = df.describe()
259+
expected = DataFrame({'bool_data_1': [4, 2, True, 2],
260+
'bool_data_2': [4, 2, True, 3]},
261+
index=['count', 'unique', 'top', 'freq'])
262+
tm.assert_frame_equal(result, expected)
263+
264+
df = pd.DataFrame({
265+
'bool_data': [False, False, True, True, False],
266+
'int_data': [0, 1, 2, 3, 4]
267+
})
268+
result = df.describe()
269+
expected = DataFrame({'int_data': [5, 2, df.int_data.std(), 0, 1,
270+
2, 3, 4]},
271+
index=['count', 'mean', 'std', 'min', '25%',
272+
'50%', '75%', 'max'])
273+
tm.assert_frame_equal(result, expected)
274+
275+
df = pd.DataFrame({
276+
'bool_data': [False, False, True, True],
277+
'str_data': ['a', 'b', 'c', 'a']
278+
})
279+
result = df.describe()
280+
expected = DataFrame({'bool_data': [4, 2, True, 2],
281+
'str_data': [4, 3, 'a', 2]},
282+
index=['count', 'unique', 'top', 'freq'])
283+
tm.assert_frame_equal(result, expected)
284+
252285
def test_describe_categorical_columns(self):
253286
# GH 11558
254287
columns = pd.CategoricalIndex(['int1', 'int2', 'obj'],

pandas/tests/series/test_analytics.py

+21
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,27 @@ def test_kurt(self):
260260
self.assertEqual(0, s.kurt())
261261
self.assertTrue((df.kurt() == 0).all())
262262

263+
def test_describe(self):
264+
s = Series([0, 1, 2, 3, 4], name='int_data')
265+
result = s.describe()
266+
expected = Series([5, 2, s.std(), 0, 1, 2, 3, 4],
267+
name='int_data',
268+
index=['count', 'mean', 'std', 'min', '25%',
269+
'50%', '75%', 'max'])
270+
self.assert_series_equal(result, expected)
271+
272+
s = Series([True, True, False, False, False], name='bool_data')
273+
result = s.describe()
274+
expected = Series([5, 2, False, 3], name='bool_data',
275+
index=['count', 'unique', 'top', 'freq'])
276+
self.assert_series_equal(result, expected)
277+
278+
s = Series(['a', 'a', 'b', 'c', 'd'], name='str_data')
279+
result = s.describe()
280+
expected = Series([5, 4, 'a', 2], name='str_data',
281+
index=['count', 'unique', 'top', 'freq'])
282+
self.assert_series_equal(result, expected)
283+
263284
def test_argsort(self):
264285
self._check_accum_op('argsort', check_dtype=False)
265286
argsorted = self.ts.argsort()

0 commit comments

Comments
 (0)