Skip to content

Commit 9dbb7d7

Browse files
authored
DEPR: Enforce default of numeric_only=False in DataFrame methods (#49622)
* DEPR: Enforce default of numeric_only=False * Remove unused functions * Add versionchanged * Add Series.rank to whatsnew * newline in docs
1 parent dbb2adc commit 9dbb7d7

File tree

10 files changed

+74
-283
lines changed

10 files changed

+74
-283
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,8 @@ Removal of prior version deprecations/changes
555555
- Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`)
556556
- Changed behavior of :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`)
557557
- Enforced deprecation ``numeric_only=None`` (the default) in DataFrame reductions that would silently drop columns that raised; ``numeric_only`` now defaults to ``False`` (:issue:`41480`)
558+
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
559+
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
558560
-
559561

560562
.. ---------------------------------------------------------------------------

pandas/core/common.py

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
cast,
2626
overload,
2727
)
28-
import warnings
2928

3029
import numpy as np
3130

@@ -37,7 +36,6 @@
3736
RandomState,
3837
T,
3938
)
40-
from pandas.util._exceptions import find_stack_level
4139

4240
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
4341
from pandas.core.dtypes.common import (
@@ -631,65 +629,3 @@ def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
631629
list of column names with the None values replaced.
632630
"""
633631
return [f"level_{i}" if name is None else name for i, name in enumerate(names)]
634-
635-
636-
def resolve_numeric_only(numeric_only: bool | None | lib.NoDefault) -> bool:
637-
"""Determine the Boolean value of numeric_only.
638-
639-
See GH#46560 for details on the deprecation.
640-
641-
Parameters
642-
----------
643-
numeric_only : bool, None, or lib.no_default
644-
Value passed to the method.
645-
646-
Returns
647-
-------
648-
Resolved value of numeric_only.
649-
"""
650-
if numeric_only is lib.no_default:
651-
# Methods that behave like numeric_only=True and only got the numeric_only
652-
# arg in 1.5.0 default to lib.no_default
653-
result = True
654-
elif numeric_only is None:
655-
# Methods that had the numeric_only arg prior to 1.5.0 and try all columns
656-
# first default to None
657-
result = False
658-
else:
659-
result = numeric_only
660-
return result
661-
662-
663-
def deprecate_numeric_only_default(
664-
cls: type, name: str, deprecate_none: bool = False
665-
) -> None:
666-
"""Emit FutureWarning message for deprecation of numeric_only.
667-
668-
See GH#46560 for details on the deprecation.
669-
670-
Parameters
671-
----------
672-
cls : type
673-
pandas type that is generating the warning.
674-
name : str
675-
Name of the method that is generating the warning.
676-
deprecate_none : bool, default False
677-
Whether to also warn about the deprecation of specifying ``numeric_only=None``.
678-
"""
679-
if name in ["all", "any"]:
680-
arg_name = "bool_only"
681-
else:
682-
arg_name = "numeric_only"
683-
684-
msg = (
685-
f"The default value of {arg_name} in {cls.__name__}.{name} is "
686-
"deprecated. In a future version, it will default to False. "
687-
)
688-
if deprecate_none:
689-
msg += f"In addition, specifying '{arg_name}=None' is deprecated. "
690-
msg += (
691-
f"Select only valid columns or specify the value of {arg_name} to silence "
692-
"this warning."
693-
)
694-
695-
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())

pandas/core/frame.py

Lines changed: 28 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@
132132
is_integer_dtype,
133133
is_iterator,
134134
is_list_like,
135-
is_numeric_dtype,
136135
is_object_dtype,
137136
is_scalar,
138137
is_sequence,
@@ -9938,7 +9937,7 @@ def corr(
99389937
self,
99399938
method: CorrelationMethod = "pearson",
99409939
min_periods: int = 1,
9941-
numeric_only: bool | lib.NoDefault = lib.no_default,
9940+
numeric_only: bool = False,
99429941
) -> DataFrame:
99439942
"""
99449943
Compute pairwise correlation of columns, excluding NA/null values.
@@ -9959,14 +9958,13 @@ def corr(
99599958
Minimum number of observations required per pair of columns
99609959
to have a valid result. Currently only available for Pearson
99619960
and Spearman correlation.
9962-
numeric_only : bool, default True
9961+
numeric_only : bool, default False
99639962
Include only `float`, `int` or `boolean` data.
99649963
99659964
.. versionadded:: 1.5.0
99669965
9967-
.. deprecated:: 1.5.0
9968-
The default value of ``numeric_only`` will be ``False`` in a future
9969-
version of pandas.
9966+
.. versionchanged:: 2.0.0
9967+
The default value of ``numeric_only`` is now ``False``.
99709968
99719969
Returns
99729970
-------
@@ -10006,11 +10004,7 @@ def corr(
1000610004
dogs 1.0 NaN
1000710005
cats NaN 1.0
1000810006
""" # noqa:E501
10009-
numeric_only_bool = com.resolve_numeric_only(numeric_only)
10010-
data = self._get_numeric_data() if numeric_only_bool else self
10011-
if numeric_only is lib.no_default and len(data.columns) < len(self.columns):
10012-
com.deprecate_numeric_only_default(type(self), "corr")
10013-
10007+
data = self._get_numeric_data() if numeric_only else self
1001410008
cols = data.columns
1001510009
idx = cols.copy()
1001610010
mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
@@ -10057,7 +10051,7 @@ def cov(
1005710051
self,
1005810052
min_periods: int | None = None,
1005910053
ddof: int | None = 1,
10060-
numeric_only: bool | lib.NoDefault = lib.no_default,
10054+
numeric_only: bool = False,
1006110055
) -> DataFrame:
1006210056
"""
1006310057
Compute pairwise covariance of columns, excluding NA/null values.
@@ -10089,14 +10083,13 @@ def cov(
1008910083
1009010084
.. versionadded:: 1.1.0
1009110085
10092-
numeric_only : bool, default True
10086+
numeric_only : bool, default False
1009310087
Include only `float`, `int` or `boolean` data.
1009410088
1009510089
.. versionadded:: 1.5.0
1009610090
10097-
.. deprecated:: 1.5.0
10098-
The default value of ``numeric_only`` will be ``False`` in a future
10099-
version of pandas.
10091+
.. versionchanged:: 2.0.0
10092+
The default value of ``numeric_only`` is now ``False``.
1010010093
1010110094
Returns
1010210095
-------
@@ -10167,11 +10160,7 @@ def cov(
1016710160
b NaN 1.248003 0.191417
1016810161
c -0.150812 0.191417 0.895202
1016910162
"""
10170-
numeric_only_bool = com.resolve_numeric_only(numeric_only)
10171-
data = self._get_numeric_data() if numeric_only_bool else self
10172-
if numeric_only is lib.no_default and len(data.columns) < len(self.columns):
10173-
com.deprecate_numeric_only_default(type(self), "cov")
10174-
10163+
data = self._get_numeric_data() if numeric_only else self
1017510164
cols = data.columns
1017610165
idx = cols.copy()
1017710166
mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
@@ -10195,7 +10184,7 @@ def corrwith(
1019510184
axis: Axis = 0,
1019610185
drop: bool = False,
1019710186
method: CorrelationMethod = "pearson",
10198-
numeric_only: bool | lib.NoDefault = lib.no_default,
10187+
numeric_only: bool = False,
1019910188
) -> Series:
1020010189
"""
1020110190
Compute pairwise correlation.
@@ -10223,14 +10212,13 @@ def corrwith(
1022310212
* callable: callable with input two 1d ndarrays
1022410213
and returning a float.
1022510214
10226-
numeric_only : bool, default True
10215+
numeric_only : bool, default False
1022710216
Include only `float`, `int` or `boolean` data.
1022810217
1022910218
.. versionadded:: 1.5.0
1023010219
10231-
.. deprecated:: 1.5.0
10232-
The default value of ``numeric_only`` will be ``False`` in a future
10233-
version of pandas.
10220+
.. versionchanged:: 2.0.0
10221+
The default value of ``numeric_only`` is now ``False``.
1023410222
1023510223
Returns
1023610224
-------
@@ -10263,15 +10251,12 @@ def corrwith(
1026310251
dtype: float64
1026410252
""" # noqa:E501
1026510253
axis = self._get_axis_number(axis)
10266-
numeric_only_bool = com.resolve_numeric_only(numeric_only)
10267-
this = self._get_numeric_data() if numeric_only_bool else self
10268-
if numeric_only is lib.no_default and len(this.columns) < len(self.columns):
10269-
com.deprecate_numeric_only_default(type(self), "corrwith")
10254+
this = self._get_numeric_data() if numeric_only else self
1027010255

1027110256
if isinstance(other, Series):
1027210257
return this.apply(lambda x: other.corr(x, method=method), axis=axis)
1027310258

10274-
if numeric_only_bool:
10259+
if numeric_only:
1027510260
other = other._get_numeric_data()
1027610261
left, right = this.align(other, join="inner", copy=False)
1027710262

@@ -10285,14 +10270,14 @@ def corrwith(
1028510270
right = right + left * 0
1028610271

1028710272
# demeaned data
10288-
ldem = left - left.mean(numeric_only=numeric_only_bool)
10289-
rdem = right - right.mean(numeric_only=numeric_only_bool)
10273+
ldem = left - left.mean(numeric_only=numeric_only)
10274+
rdem = right - right.mean(numeric_only=numeric_only)
1029010275

1029110276
num = (ldem * rdem).sum()
1029210277
dom = (
1029310278
(left.count() - 1)
10294-
* left.std(numeric_only=numeric_only_bool)
10295-
* right.std(numeric_only=numeric_only_bool)
10279+
* left.std(numeric_only=numeric_only)
10280+
* right.std(numeric_only=numeric_only)
1029610281
)
1029710282

1029810283
correl = num / dom
@@ -10484,12 +10469,6 @@ def _get_data() -> DataFrame:
1048410469
# float64, see test_apply_funcs_over_empty
1048510470
out = out.astype(np.float64)
1048610471

10487-
if numeric_only is None and out.shape[0] != df.shape[1]:
10488-
# columns have been dropped GH#41480
10489-
com.deprecate_numeric_only_default(
10490-
type(self), name, deprecate_none=True
10491-
)
10492-
1049310472
return out
1049410473

1049510474
assert not numeric_only and axis == 1
@@ -10739,7 +10718,7 @@ def quantile(
1073910718
self,
1074010719
q: float = ...,
1074110720
axis: Axis = ...,
10742-
numeric_only: bool | lib.NoDefault = ...,
10721+
numeric_only: bool = ...,
1074310722
interpolation: QuantileInterpolation = ...,
1074410723
) -> Series:
1074510724
...
@@ -10749,7 +10728,7 @@ def quantile(
1074910728
self,
1075010729
q: AnyArrayLike | Sequence[float],
1075110730
axis: Axis = ...,
10752-
numeric_only: bool | lib.NoDefault = ...,
10731+
numeric_only: bool = ...,
1075310732
interpolation: QuantileInterpolation = ...,
1075410733
) -> Series | DataFrame:
1075510734
...
@@ -10759,7 +10738,7 @@ def quantile(
1075910738
self,
1076010739
q: float | AnyArrayLike | Sequence[float] = ...,
1076110740
axis: Axis = ...,
10762-
numeric_only: bool | lib.NoDefault = ...,
10741+
numeric_only: bool = ...,
1076310742
interpolation: QuantileInterpolation = ...,
1076410743
) -> Series | DataFrame:
1076510744
...
@@ -10768,7 +10747,7 @@ def quantile(
1076810747
self,
1076910748
q: float | AnyArrayLike | Sequence[float] = 0.5,
1077010749
axis: Axis = 0,
10771-
numeric_only: bool | lib.NoDefault = no_default,
10750+
numeric_only: bool = False,
1077210751
interpolation: QuantileInterpolation = "linear",
1077310752
method: Literal["single", "table"] = "single",
1077410753
) -> Series | DataFrame:
@@ -10781,13 +10760,11 @@ def quantile(
1078110760
Value between 0 <= q <= 1, the quantile(s) to compute.
1078210761
axis : {0 or 'index', 1 or 'columns'}, default 0
1078310762
Equals 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
10784-
numeric_only : bool, default True
10785-
If False, the quantile of datetime and timedelta data will be
10786-
computed as well.
10763+
numeric_only : bool, default False
10764+
Include only `float`, `int` or `boolean` data.
1078710765
10788-
.. deprecated:: 1.5.0
10789-
The default value of ``numeric_only`` will be ``False`` in a future
10790-
version of pandas.
10766+
.. versionchanged:: 2.0.0
10767+
The default value of ``numeric_only`` is now ``False``.
1079110768
1079210769
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
1079310770
This optional parameter specifies the interpolation method to use,
@@ -10859,10 +10836,6 @@ def quantile(
1085910836
"""
1086010837
validate_percentile(q)
1086110838
axis = self._get_axis_number(axis)
10862-
any_not_numeric = any(not is_numeric_dtype(x) for x in self.dtypes)
10863-
if numeric_only is no_default and any_not_numeric:
10864-
com.deprecate_numeric_only_default(type(self), "quantile")
10865-
numeric_only = com.resolve_numeric_only(numeric_only)
1086610839

1086710840
if not is_list_like(q):
1086810841
# BlockManager.quantile expects listlike, so we wrap and unwrap here

pandas/core/generic.py

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8854,7 +8854,7 @@ def rank(
88548854
self: NDFrameT,
88558855
axis: Axis = 0,
88568856
method: str = "average",
8857-
numeric_only: bool_t | None | lib.NoDefault = lib.no_default,
8857+
numeric_only: bool_t = False,
88588858
na_option: str = "keep",
88598859
ascending: bool_t = True,
88608860
pct: bool_t = False,
@@ -8879,8 +8879,12 @@ def rank(
88798879
* first: ranks assigned in order they appear in the array
88808880
* dense: like 'min', but rank always increases by 1 between groups.
88818881
8882-
numeric_only : bool, optional
8882+
numeric_only : bool, default False
88838883
For DataFrame objects, rank only numeric columns if set to True.
8884+
8885+
.. versionchanged:: 2.0.0
8886+
The default value of ``numeric_only`` is now ``False``.
8887+
88848888
na_option : {'keep', 'top', 'bottom'}, default 'keep'
88858889
How to rank NaN values:
88868890
@@ -8954,20 +8958,6 @@ def rank(
89548958
3 spider 8.0 4.0 4.0 4.0 1.000
89558959
4 snake NaN NaN NaN 5.0 NaN
89568960
"""
8957-
warned = False
8958-
if numeric_only is None:
8959-
# GH#45036
8960-
warnings.warn(
8961-
f"'numeric_only=None' in {type(self).__name__}.rank is deprecated "
8962-
"and will raise in a future version. Pass either 'True' or "
8963-
"'False'. 'False' will be the default.",
8964-
FutureWarning,
8965-
stacklevel=find_stack_level(),
8966-
)
8967-
warned = True
8968-
elif numeric_only is lib.no_default:
8969-
numeric_only = None
8970-
89718961
axis_int = self._get_axis_number(axis)
89728962

89738963
if na_option not in {"keep", "top", "bottom"}:
@@ -9003,24 +8993,6 @@ def ranker(data):
90038993
ranks_obj = self._constructor(ranks, **data._construct_axes_dict())
90048994
return ranks_obj.__finalize__(self, method="rank")
90058995

9006-
# if numeric_only is None, and we can't get anything, we try with
9007-
# numeric_only=True
9008-
if numeric_only is None:
9009-
try:
9010-
return ranker(self)
9011-
except TypeError:
9012-
numeric_only = True
9013-
if not warned:
9014-
# Only warn here if we didn't already issue a warning above
9015-
# GH#45036
9016-
warnings.warn(
9017-
f"Dropping of nuisance columns in {type(self).__name__}.rank "
9018-
"is deprecated; in a future version this will raise TypeError. "
9019-
"Select only valid columns before calling rank.",
9020-
FutureWarning,
9021-
stacklevel=find_stack_level(),
9022-
)
9023-
90248996
if numeric_only:
90258997
if self.ndim == 1 and not is_numeric_dtype(self.dtype):
90268998
# GH#47500

0 commit comments

Comments
 (0)