-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
API/ENH: add method='nearest' to Index.get_indexer/reindex and method to get_loc #9258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2682,21 +2682,31 @@ def _astype_nansafe(arr, dtype, copy=True): | |
return arr.view(dtype) | ||
|
||
|
||
def _clean_fill_method(method): | ||
def _clean_fill_method(method, allow_nearest=False): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this seems kind of hacky, why don't we just always allow nearest (I know its not quite supported by fillna). Or will you just fix this then? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, when nearest is valid for fillna we should remove this option. I was considering adding nearest for fillna in this PR but I'd rather save it for another just to minimize the scope here. |
||
if method is None: | ||
return None | ||
method = method.lower() | ||
if method == 'ffill': | ||
method = 'pad' | ||
if method == 'bfill': | ||
method = 'backfill' | ||
if method not in ['pad', 'backfill']: | ||
msg = ('Invalid fill method. Expecting pad (ffill) or backfill ' | ||
'(bfill). Got %s' % method) | ||
|
||
valid_methods = ['pad', 'backfill'] | ||
expecting = 'pad (ffill) or backfill (bfill)' | ||
if allow_nearest: | ||
valid_methods.append('nearest') | ||
expecting = 'pad (ffill), backfill (bfill) or nearest' | ||
if method not in valid_methods: | ||
msg = ('Invalid fill method. Expecting %s. Got %s' | ||
% (expecting, method)) | ||
raise ValueError(msg) | ||
return method | ||
|
||
|
||
def _clean_reindex_fill_method(method): | ||
return _clean_fill_method(method, allow_nearest=True) | ||
|
||
|
||
def _all_none(*args): | ||
for arg in args: | ||
if arg is not None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1672,10 +1672,12 @@ def sort_index(self, axis=0, ascending=True): | |
keywords) | ||
New labels / index to conform to. Preferably an Index object to | ||
avoid duplicating data | ||
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None | ||
Method to use for filling holes in reindexed DataFrame | ||
pad / ffill: propagate last valid observation forward to next valid | ||
backfill / bfill: use NEXT valid observation to fill gap | ||
method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional | ||
Method to use for filling holes in reindexed DataFrame: | ||
* default: don't fill gaps | ||
* pad / ffill: propagate last valid observation forward to next valid | ||
* backfill / bfill: use next valid observation to fill gap | ||
* nearest: use nearest valid observations to fill gap | ||
copy : boolean, default True | ||
Return a new object, even if the passed indexes are the same | ||
level : int or name | ||
|
@@ -1703,7 +1705,7 @@ def reindex(self, *args, **kwargs): | |
|
||
# construct the args | ||
axes, kwargs = self._construct_axes_from_arguments(args, kwargs) | ||
method = com._clean_fill_method(kwargs.get('method')) | ||
method = com._clean_reindex_fill_method(kwargs.get('method')) | ||
level = kwargs.get('level') | ||
copy = kwargs.get('copy', True) | ||
limit = kwargs.get('limit') | ||
|
@@ -1744,9 +1746,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy): | |
|
||
axis = self._get_axis_number(a) | ||
obj = obj._reindex_with_indexers( | ||
{axis: [new_index, indexer]}, method=method, | ||
fill_value=fill_value, limit=limit, copy=copy, | ||
allow_dups=False) | ||
{axis: [new_index, indexer]}, | ||
fill_value=fill_value, copy=copy, allow_dups=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't this need method passed thru? shocked the tests don't fail There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You'll note that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep |
||
|
||
return obj | ||
|
||
|
@@ -1770,10 +1771,12 @@ def _reindex_multi(self, axes, copy, fill_value): | |
New labels / index to conform to. Preferably an Index object to | ||
avoid duplicating data | ||
axis : %(axes_single_arg)s | ||
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None | ||
Method to use for filling holes in reindexed object. | ||
pad / ffill: propagate last valid observation forward to next valid | ||
backfill / bfill: use NEXT valid observation to fill gap | ||
method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional | ||
Method to use for filling holes in reindexed DataFrame: | ||
* default: don't fill gaps | ||
* pad / ffill: propagate last valid observation forward to next valid | ||
* backfill / bfill: use next valid observation to fill gap | ||
* nearest: use nearest valid observations to fill gap | ||
copy : boolean, default True | ||
Return a new object, even if the passed indexes are the same | ||
level : int or name | ||
|
@@ -1802,15 +1805,14 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, | |
|
||
axis_name = self._get_axis_name(axis) | ||
axis_values = self._get_axis(axis_name) | ||
method = com._clean_fill_method(method) | ||
method = com._clean_reindex_fill_method(method) | ||
new_index, indexer = axis_values.reindex(labels, method, level, | ||
limit=limit) | ||
return self._reindex_with_indexers( | ||
{axis: [new_index, indexer]}, method=method, fill_value=fill_value, | ||
limit=limit, copy=copy) | ||
{axis: [new_index, indexer]}, fill_value=fill_value, copy=copy) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, right, |
||
|
||
def _reindex_with_indexers(self, reindexers, method=None, | ||
fill_value=np.nan, limit=None, copy=False, | ||
def _reindex_with_indexers(self, reindexers, | ||
fill_value=np.nan, copy=False, | ||
allow_dups=False): | ||
""" allow_dups indicates an internal call here """ | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this was a bug then?