Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7521,6 +7521,67 @@ def keys(self):
"""
return self.columns

def pct_change(self, periods=1):
"""
Percentage change between the current and a prior element.

.. note:: the current implementation of this API uses Spark's Window without
specifying partition specification. This leads to move all data into
single partition in single machine and could cause serious
performance degradation. Avoid this method against very large dataset.

Parameters
----------
periods : int, default 1
Periods to shift for forming percent change.

Returns
-------
DataFrame

Examples
--------
Percentage change in French franc, Deutsche Mark, and Italian lira
from 1980-01-01 to 1980-03-01.

>>> df = ks.DataFrame({
... 'FR': [4.0405, 4.0963, 4.3149],
... 'GR': [1.7246, 1.7482, 1.8519],
... 'IT': [804.74, 810.01, 860.13]},
... index=['1980-01-01', '1980-02-01', '1980-03-01'])
>>> df
FR GR IT
1980-01-01 4.0405 1.7246 804.74
1980-02-01 4.0963 1.7482 810.01
1980-03-01 4.3149 1.8519 860.13

>>> df.pct_change()
FR GR IT
1980-01-01 NaN NaN NaN
1980-02-01 0.013810 0.013684 0.006549
1980-03-01 0.053365 0.059318 0.061876

You can set periods to shift for forming percent change

>>> df.pct_change(2)
FR GR IT
1980-01-01 NaN NaN NaN
1980-02-01 NaN NaN NaN
1980-03-01 0.067912 0.073814 0.06883
"""
sdf = self._sdf
window = Window.orderBy(self._internal.index_columns).rowsBetween(-periods, -periods)

for column_name in self._internal.data_columns:
prev_row = F.lag(F.col(column_name), periods).over(window)
sdf = sdf.withColumn(column_name, (F.col(column_name) - prev_row) / prev_row)

internal = _InternalFrame(
sdf=sdf,
index_map=self._internal.index_map)

return DataFrame(internal)

# TODO: fix parameter 'axis' and 'numeric_only' to work same as pandas'
def quantile(self, q=0.5, axis=0, numeric_only=True, accuracy=10000):
"""
Expand Down
1 change: 0 additions & 1 deletion databricks/koalas/missing/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ class _MissingPandasLikeDataFrame(object):
mad = unsupported_function('mad')
mask = unsupported_function('mask')
mode = unsupported_function('mode')
pct_change = unsupported_function('pct_change')
prod = unsupported_function('prod')
product = unsupported_function('product')
query = unsupported_function('query')
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/frame.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ Computations / Descriptive Stats
DataFrame.mean
DataFrame.min
DataFrame.median
DataFrame.pct_change
DataFrame.quantile
DataFrame.nunique
DataFrame.skew
Expand Down