Skip to content

Commit 0979130

Browse files
authored
Merge branch 'master' into Index.drop
2 parents cfb52f7 + eb763ea commit 0979130

File tree

5 files changed

+84
-69
lines changed

5 files changed

+84
-69
lines changed

databricks/koalas/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,9 @@ def isnull(self):
558558
>>> ser.rename("a").to_frame().set_index("a").index.isna()
559559
Index([False, False, True], dtype='object', name='a')
560560
"""
561+
from databricks.koalas.indexes import MultiIndex
562+
if isinstance(self, MultiIndex):
563+
raise NotImplementedError("isna is not defined for MultiIndex")
561564
if isinstance(self.spark_type, (FloatType, DoubleType)):
562565
return self._with_new_scol(self._scol.isNull() | F.isnan(self._scol)).rename(self.name)
563566
else:
@@ -599,6 +602,9 @@ def notnull(self):
599602
>>> ser.rename("a").to_frame().set_index("a").index.notna()
600603
Index([True, True, False], dtype='object', name='a')
601604
"""
605+
from databricks.koalas.indexes import MultiIndex
606+
if isinstance(self, MultiIndex):
607+
raise NotImplementedError("notna is not defined for MultiIndex")
602608
return (~self.isnull()).rename(self.name)
603609

604610
notna = notnull

databricks/koalas/indexing.py

Lines changed: 27 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,31 @@ def _unfold(key, kseries):
8585
return rows_sel, cols_sel
8686

8787

88-
class AtIndexer(object):
88+
class _IndexerLike(object):
89+
90+
def __init__(self, kdf_or_kser):
91+
from databricks.koalas.frame import DataFrame
92+
from databricks.koalas.series import Series
93+
assert isinstance(kdf_or_kser, (DataFrame, Series)), \
94+
'unexpected argument type: {}'.format(type(kdf_or_kser))
95+
self._kdf_or_kser = kdf_or_kser
96+
97+
@property
98+
def _is_df(self):
99+
from databricks.koalas.frame import DataFrame
100+
return isinstance(self._kdf_or_kser, DataFrame)
101+
102+
@property
103+
def _is_series(self):
104+
from databricks.koalas.series import Series
105+
return isinstance(self._kdf_or_kser, Series)
106+
107+
@property
108+
def _internal(self):
109+
return self._kdf_or_kser._internal
110+
111+
112+
class AtIndexer(_IndexerLike):
89113
"""
90114
Access a single value for a row/column label pair.
91115
If the index is not unique, all matching pairs are returned as an array.
@@ -122,26 +146,6 @@ class AtIndexer(object):
122146
>>> kdf.at[5, 'B']
123147
array([ 4, 20])
124148
"""
125-
def __init__(self, kdf_or_kser):
126-
from databricks.koalas.frame import DataFrame
127-
from databricks.koalas.series import Series
128-
assert isinstance(kdf_or_kser, (DataFrame, Series)), \
129-
'unexpected argument type: {}'.format(type(kdf_or_kser))
130-
self._kdf_or_kser = kdf_or_kser
131-
132-
@property
133-
def _is_df(self):
134-
from databricks.koalas.frame import DataFrame
135-
return isinstance(self._kdf_or_kser, DataFrame)
136-
137-
@property
138-
def _is_series(self):
139-
from databricks.koalas.series import Series
140-
return isinstance(self._kdf_or_kser, Series)
141-
142-
@property
143-
def _internal(self):
144-
return self._kdf_or_kser._internal
145149

146150
def __getitem__(self, key):
147151
if self._is_df:
@@ -181,7 +185,7 @@ def __getitem__(self, key):
181185
or len(values) > 1) else values[0]
182186

183187

184-
class LocIndexer(object):
188+
class LocIndexer(_IndexerLike):
185189
"""
186190
Access a group of rows and columns by label(s) or a boolean Series.
187191
@@ -357,27 +361,6 @@ class LocIndexer(object):
357361
9 7 8
358362
"""
359363

360-
def __init__(self, kdf_or_kser):
361-
from databricks.koalas.frame import DataFrame
362-
from databricks.koalas.series import Series
363-
assert isinstance(kdf_or_kser, (DataFrame, Series)), \
364-
'unexpected argument type: {}'.format(type(kdf_or_kser))
365-
self._kdf_or_kser = kdf_or_kser
366-
367-
@property
368-
def _is_df(self):
369-
from databricks.koalas.frame import DataFrame
370-
return isinstance(self._kdf_or_kser, DataFrame)
371-
372-
@property
373-
def _is_series(self):
374-
from databricks.koalas.series import Series
375-
return isinstance(self._kdf_or_kser, Series)
376-
377-
@property
378-
def _internal(self):
379-
return self._kdf_or_kser._internal
380-
381364
def __getitem__(self, key):
382365
from databricks.koalas.frame import DataFrame
383366
from databricks.koalas.series import Series
@@ -563,7 +546,7 @@ def __setitem__(self, key, value):
563546
self._kdf_or_kser[col_sel] = value
564547

565548

566-
class ILocIndexer(object):
549+
class ILocIndexer(_IndexerLike):
567550
"""
568551
Purely integer-location based indexing for selection by position.
569552
@@ -677,27 +660,6 @@ class ILocIndexer(object):
677660
2 1000 3000
678661
"""
679662

680-
def __init__(self, kdf_or_kser):
681-
from databricks.koalas.frame import DataFrame
682-
from databricks.koalas.series import Series
683-
assert isinstance(kdf_or_kser, (DataFrame, Series)), \
684-
'unexpected argument type: {}'.format(type(kdf_or_kser))
685-
self._kdf_or_kser = kdf_or_kser
686-
687-
@property
688-
def _is_df(self):
689-
from databricks.koalas.frame import DataFrame
690-
return isinstance(self._kdf_or_kser, DataFrame)
691-
692-
@property
693-
def _is_series(self):
694-
from databricks.koalas.series import Series
695-
return isinstance(self._kdf_or_kser, Series)
696-
697-
@property
698-
def _internal(self):
699-
return self._kdf_or_kser._internal
700-
701663
def __getitem__(self, key):
702664
from databricks.koalas.frame import DataFrame
703665
from databricks.koalas.indexes import Index

databricks/koalas/numpy_compat.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from typing import Callable, Any
1818

1919
import numpy as np
20-
from pyspark.sql import functions as F
20+
from pyspark.sql import functions as F, Column
2121
from pyspark.sql.types import DoubleType, LongType, BooleanType
2222

2323

@@ -180,19 +180,23 @@ def maybe_dispatch_ufunc_to_spark_func(
180180
ser_or_index, ufunc: Callable, method: str, *inputs, **kwargs: Any
181181
):
182182
from databricks.koalas import Series
183+
from databricks.koalas.base import _column_op
183184

184185
op_name = ufunc.__name__
185186

186187
if (method == "__call__"
187188
and (op_name in unary_np_spark_mappings or op_name in binary_np_spark_mappings)
188189
and kwargs.get("out") is None):
189-
inputs = [ # type: ignore
190-
inp._scol if isinstance(inp, Series) else F.lit(inp) for inp in inputs] # type: ignore
191190

192191
np_spark_map_func = (
193192
unary_np_spark_mappings.get(op_name)
194193
or binary_np_spark_mappings.get(op_name))
195194

196-
return ser_or_index._with_new_scol(np_spark_map_func(*inputs)) # type: ignore
195+
def convert_arguments(*args):
196+
args = [ # type: ignore
197+
F.lit(inp) if not isinstance(inp, Column) else inp for inp in args] # type: ignore
198+
return np_spark_map_func(*args)
199+
200+
return _column_op(convert_arguments)(*inputs) # type: ignore
197201
else:
198202
return NotImplemented

databricks/koalas/tests/test_indexes.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,3 +351,26 @@ def test_multiindex_drop(self):
351351
self.assert_eq(pidx.drop(['a', 'b']), kidx.drop(['a', 'b']))
352352
self.assert_eq(pidx.drop(['x', 'y'], level='level2'),
353353
kidx.drop(['x', 'y'], level='level2'))
354+
355+
def test_multiindex_isna(self):
356+
kidx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3)])
357+
358+
with self.assertRaisesRegex(
359+
NotImplementedError,
360+
"isna is not defined for MultiIndex"):
361+
kidx.isna()
362+
363+
with self.assertRaisesRegex(
364+
NotImplementedError,
365+
"isna is not defined for MultiIndex"):
366+
kidx.isnull()
367+
368+
with self.assertRaisesRegex(
369+
NotImplementedError,
370+
"notna is not defined for MultiIndex"):
371+
kidx.notna()
372+
373+
with self.assertRaisesRegex(
374+
NotImplementedError,
375+
"notna is not defined for MultiIndex"):
376+
kidx.notnull()

databricks/koalas/tests/test_numpy_compat.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import pandas as pd
1818

1919
from databricks import koalas as ks
20+
from databricks.koalas import set_option, reset_option
2021
from databricks.koalas.numpy_compat import unary_np_spark_mappings, binary_np_spark_mappings
2122
from databricks.koalas.testing.utils import ReusedSQLTestCase, SQLTestUtils
2223

@@ -57,7 +58,10 @@ def test_np_spark_compat(self):
5758
# Use randomly generated dataFrame
5859
pdf = pd.DataFrame(
5960
np.random.randint(-100, 100, size=(np.random.randint(100), 2)), columns=['a', 'b'])
61+
pdf2 = pd.DataFrame(
62+
np.random.randint(-100, 100, size=(len(pdf), len(pdf.columns))), columns=['a', 'b'])
6063
kdf = ks.from_pandas(pdf)
64+
kdf2 = ks.from_pandas(pdf2)
6165

6266
blacklist = [
6367
# Koalas does not currently support
@@ -103,3 +107,19 @@ def test_np_spark_compat(self):
103107
np_func(pdf.a, 1), np_func(kdf.a, 1), almost=True)
104108
except Exception as e:
105109
raise AssertionError("Test in '%s' function was failed." % np_name) from e
110+
111+
# Test only top 5 for now. 'compute.ops_on_diff_frames' option increases too much time.
112+
try:
113+
set_option('compute.ops_on_diff_frames', True)
114+
for np_name, spark_func in list(binary_np_spark_mappings.items())[:5]:
115+
np_func = getattr(np, np_name)
116+
if np_name not in blacklist:
117+
try:
118+
# binary ufunc
119+
self.assert_eq(
120+
np_func(pdf.a, pdf2.b).sort_index(),
121+
np_func(kdf.a, kdf2.b).sort_index(), almost=True)
122+
except Exception as e:
123+
raise AssertionError("Test in '%s' function was failed." % np_name) from e
124+
finally:
125+
reset_option('compute.ops_on_diff_frames')

0 commit comments

Comments
 (0)