@@ -4364,7 +4364,7 @@ def duplicated(self, subset=None, keep='first', return_inverse=False):
4364
4364
compatible with ``return_inverse``.
4365
4365
return_inverse : boolean, default False
4366
4366
Determines whether the mapping from unique elements to the original
4367
- index should be returned. If true , the output is a tuple.
4367
+ index should be returned. If True , the output is a tuple.
4368
4368
4369
4369
.. versionadded:: 0.24.0
4370
4370
@@ -4373,12 +4373,14 @@ def duplicated(self, subset=None, keep='first', return_inverse=False):
4373
4373
duplicated : Series or tuple of Series if return_inverse is True
4374
4374
"""
4375
4375
from pandas .core .sorting import get_group_index
4376
- from pandas ._libs .hashtable import duplicated_int64 , _SIZE_HINT_LIMIT
4376
+ from pandas ._libs .hashtable import _SIZE_HINT_LIMIT
4377
+ from pandas .core .algorithms import duplicated
4377
4378
4378
4379
if return_inverse and not keep :
4379
4380
raise ValueError ("The parameters return_inverse=True and "
4380
4381
"keep=False cannot be used together (impossible "
4381
- "to calculate an inverse when discarding values)" )
4382
+ "to calculate an inverse when discarding all "
4383
+ "instances of a duplicate)." )
4382
4384
4383
4385
def f (vals ):
4384
4386
labels , shape = algorithms .factorize (
@@ -4404,32 +4406,13 @@ def f(vals):
4404
4406
labels , shape = map (list , zip (* map (f , vals )))
4405
4407
4406
4408
ids = get_group_index (labels , shape , sort = False , xnull = False )
4407
- isdup = Series (duplicated_int64 (ids , keep ), index = self .index )
4408
4409
if not return_inverse :
4409
- return isdup
4410
- elif not isdup .any ():
4411
- # no need to calculate inverse if no duplicates
4412
- inv = Series (self .index , index = self .index )
4413
- return isdup , inv
4414
-
4415
- if keep == 'first' :
4416
- # o2u: original indices to indices of ARRAY of unique values
4417
- # u2o: reduplication from array of unique values to original array
4418
- _ , o2u , u2o = np .unique (ids , return_inverse = True ,
4419
- return_index = True )
4420
- inv = Series (self .index [o2u ][u2o ], index = self .index )
4421
- elif keep == 'last' :
4422
- # np.unique takes first occurrence as unique value,
4423
- # so we flip ids that first becomes last
4424
- ids = ids [::- 1 ]
4425
- _ , o2u , u2o = np .unique (ids , return_inverse = True ,
4426
- return_index = True )
4427
- # the values in the ids-array correspond(ed) to self.index -
4428
- # by flipping ids around, we need to do the same for self.index,
4429
- # ___because o2u and u2o are relative to that order___.
4430
- # Finally, to fit with 'index=self.index' in the constructor,
4431
- # we need to flip the values around one last time
4432
- inv = Series (self .index [::- 1 ][o2u ][u2o ][::- 1 ], index = self .index )
4410
+ return Series (duplicated (ids , keep = keep ), index = self .index )
4411
+
4412
+ isdup_array , inv_array = duplicated (ids , keep = keep ,
4413
+ return_inverse = return_inverse )
4414
+ isdup = Series (isdup_array , index = self .index )
4415
+ inv = Series (self .index [inv_array ], index = self .index )
4433
4416
return isdup , inv
4434
4417
4435
4418
# ----------------------------------------------------------------------
0 commit comments