Skip to content

Commit b5e08b5

Browse files
HyukjinKwonrxin
authored andcommitted
Series.unique (#249)
1 parent 5ce0014 commit b5e08b5

File tree

2 files changed

+31
-6
lines changed

2 files changed

+31
-6
lines changed

databricks/koalas/frame.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,10 +1167,6 @@ def count(self):
11671167
"""
11681168
return self._reduce_for_stat_function(_Frame._count_expr)
11691169

1170-
def unique(self):
1171-
sdf = self._sdf
1172-
return DataFrame(spark.DataFrame(sdf._jdf.distinct(), sdf.sql_ctx), self._metadata.copy())
1173-
11741170
def drop(self, labels=None, axis=1, columns: Union[str, List[str]] = None):
11751171
"""
11761172
Drop specified labels from columns.

databricks/koalas/series.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,9 +491,38 @@ def dropna(self, axis=0, inplace=False, **kwargs):
491491
def head(self, n=5):
492492
return _col(self.to_dataframe().head(n))
493493

494+
# TODO: Categorical type isn't supported (due to PySpark's limitation) and
495+
# some doctests related with timestamps were not added.
494496
def unique(self):
495-
# Pandas wants a series/array-like object
496-
return _col(self.to_dataframe().unique())
497+
"""
498+
Return unique values of Series object.
499+
500+
Uniques are returned in order of appearance. Hash table-based unique,
501+
therefore does NOT sort.
502+
503+
.. note:: This method returns newly creased Series whereas Pandas returns
504+
the unique values as a NumPy array.
505+
506+
Returns
507+
-------
508+
Returns the unique values as a Series.
509+
510+
See Examples section.
511+
512+
Examples
513+
--------
514+
>>> ks.Series([2, 1, 3, 3], name='A').unique()
515+
0 1
516+
1 3
517+
2 2
518+
Name: A, dtype: int64
519+
520+
>>> ks.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
521+
0 2016-01-01
522+
Name: 0, dtype: datetime64[ns]
523+
"""
524+
sdf = self.to_dataframe()._sdf
525+
return _col(DataFrame(sdf.select(self._scol).distinct()))
497526

498527
# TODO: Update Documentation for Bins Parameter when its supported
499528
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):

0 commit comments

Comments
 (0)