Skip to content

Commit a924df6

Browse files
itholicHyukjinKwon
authored andcommitted
Implement Series.mode (#899)
Resolves #888 ```python >>> s = ks.Series([0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, ... np.nan, np.nan, np.nan]) >>> s 0 0.0 1 0.0 2 1.0 3 1.0 4 1.0 5 2.0 6 2.0 7 2.0 8 3.0 9 3.0 10 3.0 11 NaN 12 NaN 13 NaN Name: 0, dtype: float64 >>> s.mode() 0 1.0 1 2.0 2 3.0 Name: 0, dtype: float64 >>> s.mode(False) 0 NaN 1 1.0 2 2.0 3 3.0 Name: 0, dtype: float64 ```
1 parent c4921b0 commit a924df6

File tree

3 files changed

+84
-1
lines changed

3 files changed

+84
-1
lines changed

databricks/koalas/missing/series.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ class _MissingPandasLikeSeries(object):
7979
last_valid_index = unsupported_function('last_valid_index')
8080
mad = unsupported_function('mad')
8181
mask = unsupported_function('mask')
82-
mode = unsupported_function('mode')
8382
pct_change = unsupported_function('pct_change')
8483
pop = unsupported_function('pop')
8584
prod = unsupported_function('prod')

databricks/koalas/series.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3224,6 +3224,89 @@ def copy(self) -> 'Series':
32243224
"""
32253225
return _col(DataFrame(self._internal.copy()))
32263226

3227+
def mode(self, dropna=True) -> 'Series':
3228+
"""
3229+
Return the mode(s) of the dataset.
3230+
3231+
Always returns Series even if only one value is returned.
3232+
3233+
Parameters
3234+
----------
3235+
dropna : bool, default True
3236+
Don't consider counts of NaN/NaT.
3237+
3238+
Returns
3239+
-------
3240+
Series
3241+
Modes of the Series in sorted order.
3242+
3243+
Examples
3244+
--------
3245+
>>> s = ks.Series([0, 0, 1, 1, 1, np.nan, np.nan, np.nan])
3246+
>>> s
3247+
0 0.0
3248+
1 0.0
3249+
2 1.0
3250+
3 1.0
3251+
4 1.0
3252+
5 NaN
3253+
6 NaN
3254+
7 NaN
3255+
Name: 0, dtype: float64
3256+
3257+
>>> s.mode()
3258+
0 1.0
3259+
Name: 0, dtype: float64
3260+
3261+
If there are several same modes, all items are shown
3262+
3263+
>>> s = ks.Series([0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3,
3264+
... np.nan, np.nan, np.nan])
3265+
>>> s
3266+
0 0.0
3267+
1 0.0
3268+
2 1.0
3269+
3 1.0
3270+
4 1.0
3271+
5 2.0
3272+
6 2.0
3273+
7 2.0
3274+
8 3.0
3275+
9 3.0
3276+
10 3.0
3277+
11 NaN
3278+
12 NaN
3279+
13 NaN
3280+
Name: 0, dtype: float64
3281+
3282+
>>> s.mode()
3283+
0 1.0
3284+
1 3.0
3285+
2 2.0
3286+
Name: 0, dtype: float64
3287+
3288+
With 'dropna' set to 'False', we can also see NaN in the result
3289+
3290+
>>> s.mode(False)
3291+
0 NaN
3292+
1 1.0
3293+
2 3.0
3294+
3 2.0
3295+
Name: 0, dtype: float64
3296+
"""
3297+
ser_count = self.value_counts(dropna=dropna, sort=False)
3298+
sdf_count = ser_count._internal.sdf
3299+
most_value = ser_count.max()
3300+
sdf_most_value = sdf_count.where("count == {}".format(most_value))
3301+
sdf = sdf_most_value.select(
3302+
F.col(SPARK_INDEX_NAME_FORMAT(0)).alias('0'))
3303+
internal = _InternalFrame(sdf=sdf)
3304+
3305+
result = _col(DataFrame(internal))
3306+
result.name = self.name
3307+
3308+
return result
3309+
32273310
def keys(self):
32283311
"""
32293312
Return alias for index.

docs/source/reference/series.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ Computations / Descriptive Stats
115115
Series.max
116116
Series.mean
117117
Series.min
118+
Series.mode
118119
Series.nlargest
119120
Series.nsmallest
120121
Series.nunique

0 commit comments

Comments
 (0)