Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions databricks/koalas/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,33 @@ def unique(self, level=None):
sdf = self._kdf._sdf.select(self._scol.alias(self._internal.index_columns[0])).distinct()
return DataFrame(_InternalFrame(sdf=sdf, index_map=self._kdf._internal.index_map)).index

# TODO: add error parameter
def drop(self, labels):
"""
Make new Index with passed list of labels deleted.

Parameters
----------
labels : array-like

Returns
-------
dropped : Index

Examples
--------
>>> index = ks.Index([1, 2, 3])
>>> index
Int64Index([1, 2, 3], dtype='int64')

>>> index.drop([1])
Int64Index([2, 3], dtype='int64')
"""
if not isinstance(labels, (tuple, list)):
labels = [labels]
sdf = self._internal.sdf[~self._internal.index_scols[0].isin(labels)]
return Index(DataFrame(_InternalFrame(sdf=sdf, index_map=self._kdf._internal.index_map)))

def _validate_index_level(self, level):
"""
Validate index level.
Expand Down Expand Up @@ -1231,6 +1258,49 @@ def symmetric_difference(self, other, result_name=None, sort=None):

return result

# TODO: ADD error parameter
def drop(self, labels, level=None):
"""
Make new MultiIndex with passed list of labels deleted

Parameters
----------
labels : array-like
Must be a list of tuples
level : int or level name, default None

Returns
-------
dropped : MultiIndex

Examples
--------
>>> index = ks.MultiIndex.from_tuples([('a', 'x'), ('b', 'y'), ('c', 'z')])
>>> index # doctest: +SKIP
MultiIndex([('a', 'x'),
('b', 'y'),
('c', 'z')],
)

>>> index.drop(['a']) # doctest: +SKIP
MultiIndex([('b', 'y'),
('c', 'z')],
)

>>> index.drop(['x', 'y'], level=1) # doctest: +SKIP
MultiIndex([('c', 'z')],
)
"""
sdf = self._internal.sdf
index_scols = self._internal.index_scols
if level is None:
scol = index_scols[0]
else:
scol = index_scols[level] if isinstance(level, int) else sdf[level]
sdf = sdf[~scol.isin(labels)]
return MultiIndex(DataFrame(_InternalFrame(sdf=sdf,
index_map=self._kdf._internal.index_map)))

def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
if LooseVersion(pyspark.__version__) < LooseVersion("2.4") and \
default_session().conf.get("spark.sql.execution.arrow.enabled") == "true" and \
Expand Down
2 changes: 0 additions & 2 deletions databricks/koalas/missing/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ class _MissingPandasLikeIndex(object):
asof_locs = unsupported_function('asof_locs')
delete = unsupported_function('delete')
difference = unsupported_function('difference')
drop = unsupported_function('drop')
droplevel = unsupported_function('droplevel')
duplicated = unsupported_function('duplicated')
equals = unsupported_function('equals')
Expand Down Expand Up @@ -130,7 +129,6 @@ class _MissingPandasLikeMultiIndex(object):
asof_locs = unsupported_function('asof_locs')
delete = unsupported_function('delete')
difference = unsupported_function('difference')
drop = unsupported_function('drop')
droplevel = unsupported_function('droplevel')
duplicated = unsupported_function('duplicated')
equal_levels = unsupported_function('equal_levels')
Expand Down
17 changes: 17 additions & 0 deletions databricks/koalas/tests/test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,23 @@ def test_index_fillna(self):
with self.assertRaisesRegex(TypeError, "Unsupported type <class 'list'>"):
kidx.fillna([1, 2])

def test_index_drop(self):
pidx = pd.DataFrame({'a': ['a', 'b', 'c']}, index=[1, 2, 3]).index
kidx = ks.DataFrame({'a': ['a', 'b', 'c']}, index=[1, 2, 3]).index

self.assert_eq(pidx.drop(1), kidx.drop(1))
self.assert_eq(pidx.drop([1, 2]), kidx.drop([1, 2]))

def test_multiindex_drop(self):
pidx = pd.MultiIndex.from_tuples([('a', 'x'), ('b', 'y'), ('c', 'z')],
names=['level1', 'level2'])
kidx = ks.MultiIndex.from_tuples([('a', 'x'), ('b', 'y'), ('c', 'z')],
names=['level1', 'level2'])
self.assert_eq(pidx.drop('a'), kidx.drop('a'))
self.assert_eq(pidx.drop(['a', 'b']), kidx.drop(['a', 'b']))
self.assert_eq(pidx.drop(['x', 'y'], level='level2'),
kidx.drop(['x', 'y'], level='level2'))

def test_sort_values(self):
pidx = pd.Index([-10, -100, 200, 100])
kidx = ks.Index([-10, -100, 200, 100])
Expand Down
2 changes: 2 additions & 0 deletions docs/source/reference/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Modifying and computations
Index.is_interval
Index.is_numeric
Index.is_object
Index.drop
Index.drop_duplicates
Index.min
Index.max
Expand Down Expand Up @@ -150,6 +151,7 @@ MultiIndex Modifying and computations
.. autosummary::
:toctree: api/

MultiIndex.drop
MultiIndex.copy
MultiIndex.rename
MultiIndex.min
Expand Down