Skip to content

Commit 3e263df

Browse files
itholicueshin
authored andcommitted
Implement Series.aggregate and agg (#816)
Like pandas Series.aggregate (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.aggregate.html) I implemented aggregate function for series. Example: ```python >>> s = ks.Series([1, 2, 3, 4]) >>> s 0 1 1 2 2 3 3 4 Name: 0, dtype: int64 >>> s.agg('min') 1 >>> s.agg(['min', 'max']) max 4 min 1 Name: 0, dtype: int64 ``` (above example is using pandas one)
1 parent f6f27b0 commit 3e263df

File tree

5 files changed

+58
-3
lines changed

5 files changed

+58
-3
lines changed

databricks/koalas/frame.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,6 @@ def applymap(self, func):
871871
column_index=[c._internal.column_index[0] for c in applied])
872872
return DataFrame(internal)
873873

874-
# TODO: Series support is not implemented yet.
875874
# TODO: not all arguments are implemented comparing to Pandas' for now.
876875
def aggregate(self, func: Union[List[str], Dict[str, List[str]]]):
877876
"""Aggregate using one or more operations over the specified axis.

databricks/koalas/missing/series.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,6 @@ class _MissingPandasLikeSeries(object):
4848
flags = unsupported_property('flags', deprecated=True)
4949

5050
# Functions
51-
agg = unsupported_function('agg')
52-
aggregate = unsupported_function('aggregate')
5351
align = unsupported_function('align')
5452
argsort = unsupported_function('argsort')
5553
asfreq = unsupported_function('asfreq')

databricks/koalas/series.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2294,6 +2294,51 @@ def apply(self, func, args=(), **kwds):
22942294
wrapped = ks.pandas_wraps(return_col=return_sig)(apply_each)
22952295
return wrapped(self, *args, **kwds).rename(self.name)
22962296

2297+
# TODO: not all arguments are implemented comparing to Pandas' for now.
2298+
def aggregate(self, func: Union[str, List[str]]):
2299+
"""Aggregate using one or more operations over the specified axis.
2300+
2301+
Parameters
2302+
----------
2303+
func : str or a list of str
2304+
function name(s) as string apply to series.
2305+
2306+
Returns
2307+
-------
2308+
scalar, Series
2309+
The return can be:
2310+
- scalar : when Series.agg is called with single function
2311+
- Series : when Series.agg is called with several functions
2312+
2313+
Notes
2314+
-----
2315+
`agg` is an alias for `aggregate`. Use the alias.
2316+
2317+
See Also
2318+
--------
2319+
databricks.koalas.Series.apply
2320+
databricks.koalas.Series.transform
2321+
2322+
Examples
2323+
--------
2324+
>>> s = ks.Series([1, 2, 3, 4])
2325+
>>> s.agg('min')
2326+
1
2327+
2328+
>>> s.agg(['min', 'max'])
2329+
max 4
2330+
min 1
2331+
Name: 0, dtype: int64
2332+
"""
2333+
if isinstance(func, list):
2334+
return self.to_frame().agg(func)[self.name]
2335+
elif isinstance(func, str):
2336+
return getattr(self, func)()
2337+
else:
2338+
raise ValueError("func must be a string or list of strings")
2339+
2340+
agg = aggregate
2341+
22972342
def transpose(self, *args, **kwargs):
22982343
"""
22992344
Return the transpose, which is by definition self.

databricks/koalas/tests/test_series.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,3 +647,14 @@ def test_astype(self):
647647
kser = koalas.Series(pser)
648648
with self.assertRaisesRegex(ValueError, 'Type int63 not understood'):
649649
kser.astype('int63')
650+
651+
def test_aggregate(self):
652+
pser = pd.Series([10, 20, 15, 30, 45], name='x')
653+
kser = koalas.Series(pser)
654+
msg = 'func must be a string or list of strings'
655+
with self.assertRaisesRegex(ValueError, msg):
656+
kser.aggregate({'x': ['min', 'max']})
657+
msg = ('If the given function is a list, it '
658+
'should only contains function names as strings.')
659+
with self.assertRaisesRegex(ValueError, msg):
660+
kser.aggregate(['min', max])

docs/source/reference/series.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ Function application, GroupBy & Window
8585
:toctree: api/
8686

8787
Series.apply
88+
Series.agg
89+
Series.aggregate
8890
Series.map
8991
Series.groupby
9092
Series.pipe

0 commit comments

Comments
 (0)