-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Convert DataFrame.rename to keyword only; simplify axis validation #29140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
f09c6df
66acc90
177a440
b66e9e2
2415e46
296a9df
6ba8518
4128780
2776c7d
8e20fa4
cca8eed
1f8e1cf
4dc8bbe
1b261f8
b5e54bd
ab1ee2e
af0b7c5
d5d812c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,10 +15,12 @@ | |
import sys | ||
from textwrap import dedent | ||
from typing import ( | ||
Callable, | ||
FrozenSet, | ||
Hashable, | ||
Iterable, | ||
List, | ||
Mapping, | ||
Optional, | ||
Sequence, | ||
Set, | ||
|
@@ -93,7 +95,7 @@ | |
) | ||
from pandas.core.dtypes.missing import isna, notna | ||
|
||
from pandas._typing import Axes, Dtype, FilePathOrBuffer | ||
from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Level | ||
from pandas.core import algorithms, common as com, nanops, ops | ||
from pandas.core.accessor import CachedAccessor | ||
from pandas.core.arrays import Categorical, ExtensionArray | ||
|
@@ -2074,7 +2076,7 @@ def to_stata( | |
data_label=data_label, | ||
write_index=write_index, | ||
variable_labels=variable_labels, | ||
**kwargs | ||
**kwargs, | ||
) | ||
writer.write_file() | ||
|
||
|
@@ -2100,7 +2102,7 @@ def to_parquet( | |
compression="snappy", | ||
index=None, | ||
partition_cols=None, | ||
**kwargs | ||
**kwargs, | ||
): | ||
""" | ||
Write a DataFrame to the binary parquet format. | ||
|
@@ -2180,7 +2182,7 @@ def to_parquet( | |
compression=compression, | ||
index=index, | ||
partition_cols=partition_cols, | ||
**kwargs | ||
**kwargs, | ||
) | ||
|
||
@Substitution( | ||
|
@@ -4031,7 +4033,25 @@ def drop( | |
"mapper", | ||
[("copy", True), ("inplace", False), ("level", None), ("errors", "ignore")], | ||
) | ||
def rename(self, *args, **kwargs): | ||
def rename( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Change here was to be explicit about what is accepted There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 to being explicit. The only reason we had *args, **kwargs in the first place was to support the ambiguous case. |
||
self, | ||
mapper: Optional[ | ||
Union[Mapping[Hashable, Hashable], Callable[[Hashable], Hashable]] | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
] = None, | ||
*, | ||
index: Optional[ | ||
Union[Mapping[Hashable, Hashable], Callable[[Hashable], Hashable]] | ||
] = None, | ||
columns: Optional[ | ||
Union[Mapping[Hashable, Hashable], Callable[[Hashable], Hashable]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it worth making a name for this in _typing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was thinking the same thing. Any suggestions? |
||
] = None, | ||
axis: Optional[Axis] = None, | ||
copy: bool = True, | ||
inplace: bool = False, | ||
level: Optional[Level] = None, | ||
errors: str = "ignore", | ||
) -> "DataFrame": | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
""" | ||
Alter axes labels. | ||
|
||
|
@@ -4140,12 +4160,16 @@ def rename(self, *args, **kwargs): | |
2 2 5 | ||
4 3 6 | ||
""" | ||
axes = validate_axis_style_args(self, args, kwargs, "mapper", "rename") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Side note - there is only one other use now of this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there anything blocking a similar PR for
Is this referring to DataFrame.reindex, or validate_axis_style_args? Ideally we can completely remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't look before because I was trying to keep this focused, but it might not be too much extra effort. I'll take a look and if it gets out of hand let you know |
||
kwargs.update(axes) | ||
# Pop these, since the values are in `kwargs` under different names | ||
kwargs.pop("axis", None) | ||
kwargs.pop("mapper", None) | ||
return super().rename(**kwargs) | ||
return super().rename( | ||
mapper=mapper, | ||
index=index, | ||
columns=columns, | ||
axis=axis, | ||
copy=copy, | ||
inplace=inplace, | ||
level=level, | ||
errors=errors, | ||
) | ||
|
||
@Substitution(**_shared_doc_kwargs) | ||
@Appender(NDFrame.fillna.__doc__) | ||
|
@@ -4157,7 +4181,7 @@ def fillna( | |
inplace=False, | ||
limit=None, | ||
downcast=None, | ||
**kwargs | ||
**kwargs, | ||
): | ||
return super().fillna( | ||
value=value, | ||
|
@@ -4166,7 +4190,7 @@ def fillna( | |
inplace=inplace, | ||
limit=limit, | ||
downcast=downcast, | ||
**kwargs | ||
**kwargs, | ||
) | ||
|
||
@Appender(_shared_docs["replace"] % _shared_doc_kwargs) | ||
|
@@ -6613,7 +6637,7 @@ def _gotitem( | |
see_also=_agg_summary_and_see_also_doc, | ||
examples=_agg_examples_doc, | ||
versionadded="\n.. versionadded:: 0.20.0\n", | ||
**_shared_doc_kwargs | ||
**_shared_doc_kwargs, | ||
) | ||
@Appender(_shared_docs["aggregate"]) | ||
def aggregate(self, func, axis=0, *args, **kwargs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
FrozenSet, | ||
Hashable, | ||
List, | ||
Mapping, | ||
Optional, | ||
Sequence, | ||
Set, | ||
|
@@ -65,7 +66,7 @@ | |
from pandas.core.dtypes.missing import isna, notna | ||
|
||
import pandas as pd | ||
from pandas._typing import Dtype, FilePathOrBuffer, Scalar | ||
from pandas._typing import Axis, Dtype, FilePathOrBuffer, Level, Scalar | ||
from pandas.core import missing, nanops | ||
import pandas.core.algorithms as algos | ||
from pandas.core.base import PandasObject, SelectionMixin | ||
|
@@ -1013,7 +1014,24 @@ def swaplevel(self, i=-2, j=-1, axis=0): | |
# ---------------------------------------------------------------------- | ||
# Rename | ||
|
||
def rename(self, *args, **kwargs): | ||
def rename( | ||
self, | ||
mapper: Optional[ | ||
Union[Mapping[Hashable, Hashable], Callable[[Hashable], Hashable]] | ||
] = None, | ||
*, | ||
index: Optional[ | ||
Union[Mapping[Hashable, Hashable], Callable[[Hashable], Hashable]] | ||
] = None, | ||
columns: Optional[ | ||
Union[Mapping[Hashable, Hashable], Callable[[Hashable], Hashable]] | ||
] = None, | ||
axis: Optional[Axis] = None, | ||
copy: bool = True, | ||
inplace: bool = False, | ||
level: Optional[Level] = None, | ||
errors: str = "ignore", | ||
): | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Alter axes input function or functions. Function / dict values must be | ||
unique (1-to-1). Labels not contained in a dict / Series will be left | ||
|
@@ -1126,44 +1144,46 @@ def rename(self, *args, **kwargs): | |
|
||
See the :ref:`user guide <basics.rename>` for more. | ||
""" | ||
axes, kwargs = self._construct_axes_from_arguments(args, kwargs) | ||
copy = kwargs.pop("copy", True) | ||
inplace = kwargs.pop("inplace", False) | ||
level = kwargs.pop("level", None) | ||
axis = kwargs.pop("axis", None) | ||
errors = kwargs.pop("errors", "ignore") | ||
if axis is not None: | ||
# Validate the axis | ||
self._get_axis_number(axis) | ||
|
||
if kwargs: | ||
raise TypeError( | ||
"rename() got an unexpected keyword " | ||
'argument "{0}"'.format(list(kwargs.keys())[0]) | ||
) | ||
|
||
if com.count_not_none(*axes.values()) == 0: | ||
if mapper is None and index is None and columns is None: | ||
raise TypeError("must pass an index to rename") | ||
|
||
self._consolidate_inplace() | ||
if index is not None or columns is not None: | ||
if axis is not None: | ||
raise TypeError( | ||
"Cannot specify both 'axis' and any of 'index' or 'columns'" | ||
) | ||
elif mapper is not None: | ||
raise TypeError( | ||
"Cannot specify both 'mapper' and any of 'index' or 'columns'" | ||
) | ||
else: | ||
# use the mapper argument | ||
if axis in {1, "columns"}: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We accept 3 arguments, Note that this is arguably a little too strict, as this is no longer valid: df.rename({"key": "val"}, axis=1, index={"key2": "val2"}) But I don't think there is a lot to gain in supporting that given the variety of other options available here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't recall if that case was originally discussed, but I think it's OK to break it.
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
columns = mapper | ||
else: | ||
index = mapper | ||
|
||
result = self if inplace else self.copy(deep=copy) | ||
|
||
# start in the axis order to eliminate too many copies | ||
for axis in range(self._AXIS_LEN): | ||
v = axes.get(self._AXIS_NAMES[axis]) | ||
if v is None: | ||
for axis_no, replacements in enumerate((index, columns)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ultimately I am trying to get rid of all of the internal If we buy into just having |
||
if replacements is None: | ||
continue | ||
f = com.get_rename_function(v) | ||
baxis = self._get_block_manager_axis(axis) | ||
|
||
axis = self._get_axis(axis_no) | ||
baxis = self._get_block_manager_axis(axis_no) | ||
f = com.get_rename_function(replacements) | ||
|
||
if level is not None: | ||
level = self.axes[axis]._get_level_number(level) | ||
level = axis._get_level_number(level) | ||
|
||
# GH 13473 | ||
if not callable(v): | ||
indexer = self.axes[axis].get_indexer_for(v) | ||
if not callable(replacements): | ||
indexer = axis.get_indexer_for(replacements) | ||
if errors == "raise" and len(indexer[indexer == -1]): | ||
missing_labels = [ | ||
label for index, label in enumerate(v) if indexer[index] == -1 | ||
label | ||
for index, label in enumerate(replacements) | ||
if indexer[index] == -1 | ||
] | ||
raise KeyError("{} not found in axis".format(missing_labels)) | ||
|
||
|
@@ -7016,7 +7036,7 @@ def interpolate( | |
limit_direction="forward", | ||
limit_area=None, | ||
downcast=None, | ||
**kwargs | ||
**kwargs, | ||
): | ||
""" | ||
Interpolate values according to different methods. | ||
|
@@ -7089,7 +7109,7 @@ def interpolate( | |
limit_area=limit_area, | ||
inplace=inplace, | ||
downcast=downcast, | ||
**kwargs | ||
**kwargs, | ||
) | ||
|
||
if inplace: | ||
|
@@ -7789,7 +7809,7 @@ def groupby( | |
group_keys=True, | ||
squeeze=False, | ||
observed=False, | ||
**kwargs | ||
**kwargs, | ||
): | ||
""" | ||
Group DataFrame or Series using a mapper or by a Series of columns. | ||
|
@@ -7915,7 +7935,7 @@ def groupby( | |
group_keys=group_keys, | ||
squeeze=squeeze, | ||
observed=observed, | ||
**kwargs | ||
**kwargs, | ||
) | ||
|
||
def asfreq(self, freq, method=None, how=None, normalize=False, fill_value=None): | ||
|
@@ -11556,7 +11576,7 @@ def stat_func( | |
level=None, | ||
numeric_only=None, | ||
min_count=0, | ||
**kwargs | ||
**kwargs, | ||
): | ||
if name == "sum": | ||
nv.validate_sum(tuple(), kwargs) | ||
|
Uh oh!
There was an error while loading. Please reload this page.