Skip to content
53 changes: 26 additions & 27 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5749,6 +5749,7 @@ def droplevel(self, level, axis=0) -> "DataFrame":
1 2 3 4
5 6 7 8
9 10 11 12

>>> df.droplevel('a') # doctest: +NORMALIZE_WHITESPACE
level_1 c d
level_2 e f
Expand All @@ -5767,40 +5768,38 @@ def droplevel(self, level, axis=0) -> "DataFrame":
axis = validate_axis(axis)
kdf = self.copy()
if axis == 0:
names = self.index.names
nlevels = self.index.nlevels
if not isinstance(level, (tuple, list)):
if not isinstance(level, (str, int)):
raise KeyError("Level {} not found".format(level))
level = [level]

spark_frame = self._internal.spark_frame
index_map = self._internal.index_map.copy()
index_names = self.index.names
nlevels = self.index.nlevels
for n in level:
if isinstance(n, int) and (n > nlevels - 1):
raise IndexError(
"Too many levels: Index has only {} levels, not {}".format(nlevels, n + 1)
)
if isinstance(n, (str, tuple)) and (n not in names):
raise KeyError("Level {} not found".format(n))
if isinstance(n, str):
if n not in index_names:
raise KeyError("Level {} not found".format(n))
n = index_names.index(n)
elif isinstance(n, int):
if n >= nlevels:
raise IndexError(
"Too many levels: Index has only {} levels, not {}".format(
nlevels, n + 1
)
)
index_spark_column = self._internal.index_spark_column_names[n]
spark_frame = spark_frame.drop(index_spark_column)
index_map.pop(index_spark_column)

if len(level) >= nlevels:
if len(level) == nlevels:
raise ValueError(
"Cannot remove {} levels from an index with {} "
"levels: at least one level must be "
"left.".format(len(level), nlevels)
"Cannot remove {0} levels from an index with {0} levels: "
"at least one level must be left.".format(nlevels)
)
drop_spark_index_columns = list()
index_spark_column_names = kdf._internal.index_spark_column_names
for n in level:
if isinstance(n, int):
index_order = n
elif isinstance(n, (str, tuple)):
index_order = kdf.index.names.index(n)
drop_spark_index_columns.append(index_spark_column_names[index_order])
sdf = kdf._internal.spark_frame
sdf = sdf.drop(*drop_spark_index_columns)
index_map = kdf._internal.index_map.copy()
for drop_spark_index_column in drop_spark_index_columns:
index_map.pop(drop_spark_index_column)
internal_frame = kdf._internal.copy(spark_frame=sdf, index_map=index_map)
kdf = DataFrame(internal_frame)
internal = self._internal.copy(spark_frame=spark_frame, index_map=index_map)
kdf = DataFrame(internal)
elif axis == 1:
names = self.columns.names
nlevels = self.columns.nlevels
Expand Down
1 change: 0 additions & 1 deletion databricks/koalas/missing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ class MissingPandasLikeSeries(object):
between_time = _unsupported_function("between_time")
combine = _unsupported_function("combine")
cov = _unsupported_function("cov")
droplevel = _unsupported_function("droplevel")
ewm = _unsupported_function("ewm")
factorize = _unsupported_function("factorize")
first = _unsupported_function("first")
Expand Down
51 changes: 51 additions & 0 deletions databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4865,6 +4865,57 @@ def items(self) -> Iterable:
"""This is an alias of ``iteritems``."""
return self.iteritems()

def droplevel(self, level):
"""
Return Series with requested index level(s) removed.

Parameters
----------
level : int, str, or list-like
If a string is given, must be the name of a level
If list-like, elements must be names or positional indexes
of levels.

Returns
-------
Series
Series with requested index level(s) removed.

Examples
--------
>>> kser = ks.Series(
... [1, 2, 3],
... index=pd.MultiIndex.from_tuples(
... [("x", "a"), ("x", "b"), ("y", "c")], names=["level_1", "level_2"]
... ),
... )
>>> kser
level_1 level_2
x a 1
b 2
y c 3
Name: 0, dtype: int64

Removing specific index level by level

>>> kser.droplevel(0)
level_2
a 1
b 2
c 3
Name: 0, dtype: int64

Removing specific index level by name

>>> kser.droplevel("level_2")
level_1
x 1
x 2
y 3
Name: 0, dtype: int64
"""
return first_series(self.to_frame().droplevel(level=level, axis=0))

def tail(self, n=5):
"""
Return the last `n` rows.
Expand Down
33 changes: 33 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1807,6 +1807,39 @@ def test_iteritems(self):
for p_items, k_items in zip(pser.iteritems(), kser.iteritems()):
self.assert_eq(repr(p_items), repr(k_items))

def test_droplevel(self):
# droplevel is new in pandas 0.24.0
if LooseVersion(pd.__version__) >= LooseVersion("0.24.0"):
pser = pd.Series(
[1, 2, 3],
index=pd.MultiIndex.from_tuples(
[("x", "a", "q"), ("x", "b", "w"), ("y", "c", "e")],
names=["level_1", "level_2", "level_3"],
),
)
kser = ks.from_pandas(pser)

self.assert_eq(pser.droplevel(0), kser.droplevel(0))
self.assert_eq(pser.droplevel([0]), kser.droplevel([0]))
self.assert_eq(pser.droplevel((0,)), kser.droplevel((0,)))
self.assert_eq(pser.droplevel([0, 2]), kser.droplevel([0, 2]))
self.assert_eq(pser.droplevel((1, 2)), kser.droplevel((1, 2)))

with self.assertRaisesRegex(KeyError, "Level {0, 1, 2} not found"):
kser.droplevel({0, 1, 2})
with self.assertRaisesRegex(KeyError, "Level level_100 not found"):
kser.droplevel(["level_1", "level_100"])
with self.assertRaisesRegex(
IndexError, "Too many levels: Index has only 3 levels, not 11"
):
kser.droplevel(10)
with self.assertRaisesRegex(
ValueError,
"Cannot remove 3 levels from an index with 3 levels: "
"at least one level must be left.",
):
kser.droplevel([0, 1, 2])

def test_tail(self):
if LooseVersion(pyspark.__version__) >= LooseVersion("3.0"):
pser = pd.Series(range(1000), name="Koalas")
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ Reindexing / Selection / Label manipulation
:toctree: api/

Series.drop
Series.droplevel
Series.drop_duplicates
Series.equals
Series.add_prefix
Expand Down