@@ -2801,8 +2801,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
2801
2801
else :
2802
2802
return result
2803
2803
2804
+ @deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' , False : 'first' })
2804
2805
@deprecate_kwarg (old_arg_name = 'cols' , new_arg_name = 'subset' )
2805
- def drop_duplicates (self , subset = None , take_last = False , inplace = False ):
2806
+ def drop_duplicates (self , subset = None , keep = 'first' , inplace = False ):
2806
2807
"""
2807
2808
Return DataFrame with duplicate rows removed, optionally only
2808
2809
considering certain columns
@@ -2812,8 +2813,11 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
2812
2813
subset : column label or sequence of labels, optional
2813
2814
Only consider certain columns for identifying duplicates, by
2814
2815
default use all of the columns
2815
- take_last : boolean, default False
2816
- Take the last observed row in a row. Defaults to the first row
2816
+ keep : {'first', 'last', False}, default 'first'
2817
+ - ``first`` : Drop duplicates except for the first occurrence.
2818
+ - ``last`` : Drop duplicates except for the last occurrence.
2819
+ - False : Drop all duplicates.
2820
+ take_last : deprecated
2817
2821
inplace : boolean, default False
2818
2822
Whether to drop duplicates in place or to return a copy
2819
2823
cols : kwargs only argument of subset [deprecated]
@@ -2822,7 +2826,7 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
2822
2826
-------
2823
2827
deduplicated : DataFrame
2824
2828
"""
2825
- duplicated = self .duplicated (subset , take_last = take_last )
2829
+ duplicated = self .duplicated (subset , keep = keep )
2826
2830
2827
2831
if inplace :
2828
2832
inds , = (- duplicated ).nonzero ()
@@ -2831,8 +2835,9 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
2831
2835
else :
2832
2836
return self [- duplicated ]
2833
2837
2838
+ @deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' , False : 'first' })
2834
2839
@deprecate_kwarg (old_arg_name = 'cols' , new_arg_name = 'subset' )
2835
- def duplicated (self , subset = None , take_last = False ):
2840
+ def duplicated (self , subset = None , keep = 'first' ):
2836
2841
"""
2837
2842
Return boolean Series denoting duplicate rows, optionally only
2838
2843
considering certain columns
@@ -2842,9 +2847,13 @@ def duplicated(self, subset=None, take_last=False):
2842
2847
subset : column label or sequence of labels, optional
2843
2848
Only consider certain columns for identifying duplicates, by
2844
2849
default use all of the columns
2845
- take_last : boolean, default False
2846
- For a set of distinct duplicate rows, flag all but the last row as
2847
- duplicated. Default is for all but the first row to be flagged
2850
+ keep : {'first', 'last', False}, default 'first'
2851
+ - ``first`` : Mark duplicates as ``True`` except for the
2852
+ first occurrence.
2853
+ - ``last`` : Mark duplicates as ``True`` except for the
2854
+ last occurrence.
2855
+ - False : Mark all duplicates as ``True``.
2856
+ take_last : deprecated
2848
2857
cols : kwargs only argument of subset [deprecated]
2849
2858
2850
2859
Returns
@@ -2870,7 +2879,7 @@ def f(vals):
2870
2879
labels , shape = map (list , zip ( * map (f , vals )))
2871
2880
2872
2881
ids = get_group_index (labels , shape , sort = False , xnull = False )
2873
- return Series (duplicated_int64 (ids , take_last ), index = self .index )
2882
+ return Series (duplicated_int64 (ids , keep ), index = self .index )
2874
2883
2875
2884
#----------------------------------------------------------------------
2876
2885
# Sorting
0 commit comments