Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 0 additions & 62 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6448,68 +6448,6 @@ def swaplevel(self, i=-2, j=-1, axis=0) -> "DataFrame":

return DataFrame(internal)

def swapaxes(
self, i: Union[str, int] = 0, j: Union[str, int] = 1, copy: bool = True
) -> "DataFrame":
"""
Interchange axes and swap values axes appropriately.

.. note:: This method is based on an expensive operation due to the nature
of big data. Internally it needs to generate each row for each value, and
then group twice - it is a huge operation. To prevent misusage, this method
has the 'compute.max_rows' default limit of input length, and raises a ValueError.

>>> from databricks.koalas.config import option_context
>>> with option_context('compute.max_rows', 1000): # doctest: +NORMALIZE_WHITESPACE
... ks.DataFrame({'a': range(1001)}).swapaxes()
Traceback (most recent call last):
...
ValueError: Current DataFrame has more then the given limit 1000 rows.
Please set 'compute.max_rows' by using 'databricks.koalas.config.set_option'
to retrieve to retrieve more than 1000 rows. Note that, before changing the
'compute.max_rows', this operation is considerably expensive.

Parameters
----------
i: {0 or 'index', 1 or 'columns'}, default 0. The axis to swap.
j: {0 or 'index', 1 or 'columns'}, default 1. The axis to swap.

Returns
-------
DataFrame

Examples
--------
>>> kdf = ks.DataFrame(
... [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['x', 'y', 'z'], columns=['a', 'b', 'c']
... )
>>> kdf
a b c
x 1 2 3
y 4 5 6
z 7 8 9
>>> kdf.swapaxes()
x y z
a 1 4 7
b 2 5 8
c 3 6 9
>>> kdf.swapaxes(i=1, j=0)
x y z
a 1 4 7
b 2 5 8
c 3 6 9
>>> kdf.swapaxes(i=1, j=1)
a b c
x 1 2 3
y 4 5 6
z 7 8 9
"""
assert copy is True
i = validate_axis(i)
j = validate_axis(j)

return self if i == j else self.transpose()

def _swaplevel_columns(self, i, j) -> InternalFrame:
assert isinstance(self.columns, pd.MultiIndex)
for index in (i, j):
Expand Down
79 changes: 79 additions & 0 deletions databricks/koalas/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,85 @@ def sum(self, axis=None, numeric_only=True) -> Union[Scalar, "Series"]:
F.sum, name="sum", numeric_only=numeric_only, axis=axis
)

def swapaxes(
self, i: Union[str, int], j: Union[str, int], copy: bool = True
) -> Union["DataFrame", "Series"]:
"""
Interchange axes and swap values axes appropriately.

.. note:: This method, if applied to a DataFrame, is based on an expensive operation due to
the nature of big data. Internally it needs to generate each row for each value, and
then group twice - it is a huge operation. To prevent misusage, this method
has the 'compute.max_rows' default limit of input length, and raises a ValueError.

>>> from databricks.koalas.config import option_context
>>> with option_context('compute.max_rows', 1000): # doctest: +NORMALIZE_WHITESPACE
... ks.DataFrame({'a': range(1001)}).swapaxes()
Traceback (most recent call last):
...
ValueError: Current DataFrame has more then the given limit 1000 rows.
Please set 'compute.max_rows' by using 'databricks.koalas.config.set_option'
to retrieve to retrieve more than 1000 rows. Note that, before changing the
'compute.max_rows', this operation is considerably expensive.

Parameters
----------
i: {0 or 'index', 1 or 'columns'}. The axis to swap.
j: {0 or 'index', 1 or 'columns'}. The axis to swap.
copy : bool, default True.

Returns
-------
DataFrame or Series

Examples
--------
On a DataFrame:

>>> kdf = ks.DataFrame(
... [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['x', 'y', 'z'], columns=['a', 'b', 'c']
... )
>>> kdf
a b c
x 1 2 3
y 4 5 6
z 7 8 9
>>> kdf.swapaxes(i=1, j=0)
x y z
a 1 4 7
b 2 5 8
c 3 6 9
>>> kdf.swapaxes(i=1, j=1)
a b c
x 1 2 3
y 4 5 6
z 7 8 9

On a Series:

>>> kser = ks.Series([1, 2, 3], index=["x", "y", "z"])
>>> kser
x 1
y 2
z 3
dtype: int64
>>>
>>> kser.swapaxes(0, 0)
x 1
y 2
z 3
dtype: int64
"""
assert copy is True
i = validate_axis(i)
j = validate_axis(j)

if isinstance(self, ks.Series):
if not i == j == 0:
raise ValueError("Axis must be 0 for Series")

return self.copy() if i == j else cast(Union["DataFrame", "Series"], self).transpose()

def skew(self, axis=None, numeric_only=True) -> Union[Scalar, "Series"]:
"""
Return unbiased skew normalized by N-1.
Expand Down
1 change: 0 additions & 1 deletion databricks/koalas/missing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ class MissingPandasLikeSeries(object):
sem = _unsupported_function("sem")
set_axis = _unsupported_function("set_axis")
slice_shift = _unsupported_function("slice_shift")
swapaxes = _unsupported_function("swapaxes")
to_hdf = _unsupported_function("to_hdf")
to_period = _unsupported_function("to_period")
to_sql = _unsupported_function("to_sql")
Expand Down
1 change: 1 addition & 0 deletions databricks/koalas/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1453,6 +1453,7 @@ def test_swapaxes(self):

self.assertRaises(AssertionError, lambda: kdf.swapaxes(0, 1, copy=False))
self.assertRaises(ValueError, lambda: kdf.swapaxes(0, -1))
self.assertRaises(TypeError, lambda: kdf.swapaxes())

def test_nlargest(self):
pdf = pd.DataFrame(
Expand Down
13 changes: 13 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1853,6 +1853,19 @@ def test_swaplevel(self):
self.assertRaises(KeyError, lambda: kser.swaplevel("not_number", "color"))
self.assertRaises(AssertionError, lambda: kser.swaplevel(copy=False))

def test_swapaxes(self):
pser = pd.Series([1, 2, 3], index=["x", "y", "z"], name="ser")
kser = ks.from_pandas(pser)

self.assert_eq(kser.swapaxes(0, 0), pser.swapaxes(0, 0))
self.assert_eq(kser.swapaxes("index", "index"), pser.swapaxes("index", "index"))
self.assert_eq((kser + 1).swapaxes(0, 0), (pser + 1).swapaxes(0, 0))

self.assertRaises(AssertionError, lambda: kser.swapaxes(0, 1, copy=False))
self.assertRaises(ValueError, lambda: kser.swapaxes(0, 1))
self.assertRaises(ValueError, lambda: kser.swapaxes("index", "columns"))
self.assertRaises(TypeError, lambda: kser.swapaxes())

def test_div_zero_and_nan(self):
pser = pd.Series([100, None, -300, None, 500, -700, np.inf, -np.inf], name="Koalas")
kser = ks.from_pandas(pser)
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ Reindexing / Selection / Label manipulation
Series.reset_index
Series.sample
Series.swaplevel
Series.swapaxes
Series.take
Series.tail
Series.where
Expand Down