Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions databricks/koalas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,14 @@ def __sub__(self, other):
return _column_op(spark.Column.__sub__)(self, other)

__mul__ = _column_op(spark.Column.__mul__)
__div__ = _numpy_column_op(spark.Column.__div__)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: In Python 3.x, __div__ and __rdiv__ no more supported. Using __truediv__ and __rtruediv__instead.

__truediv__ = _numpy_column_op(spark.Column.__truediv__)

def __truediv__(self, other):
def truediv(left, right):
return F.when(F.lit(right == 0), F.lit(np.inf).__div__(left)).otherwise(
left.__truediv__(right)
)

return _numpy_column_op(truediv)(self, other)

def __mod__(self, other):
def mod(left, right):
Expand All @@ -202,18 +208,30 @@ def __radd__(self, other):

__rsub__ = _column_op(spark.Column.__rsub__)
__rmul__ = _column_op(spark.Column.__rmul__)
__rdiv__ = _numpy_column_op(spark.Column.__rdiv__)
__rtruediv__ = _numpy_column_op(spark.Column.__rtruediv__)

def __rtruediv__(self, other):
def rtruediv(left, right):
return F.when(left == 0, F.lit(np.inf).__div__(right)).otherwise(
F.lit(right).__truediv__(left)
)

return _numpy_column_op(rtruediv)(self, other)

def __floordiv__(self, other):
return self._with_new_scol(
F.floor(_numpy_column_op(spark.Column.__div__)(self, other)._scol)
)
def floordiv(left, right):
return F.when(F.lit(right == 0), F.lit(np.inf).__div__(left)).otherwise(
F.floor(left.__div__(right))
)

return _numpy_column_op(floordiv)(self, other)

def __rfloordiv__(self, other):
return self._with_new_scol(
F.floor(_numpy_column_op(spark.Column.__rdiv__)(self, other)._scol)
)
def rfloordiv(left, right):
return F.when(F.lit(left == 0), F.lit(np.inf).__div__(right)).otherwise(
F.floor(F.lit(right).__div__(left))
)

return _numpy_column_op(rfloordiv)(self, other)

def __rmod__(self, other):
def rmod(left, right):
Expand Down
22 changes: 11 additions & 11 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@
>>> df.rdiv(10)
angles degrees
circle NaN 0.027778
circle inf 0.027778
triangle 3.333333 0.055556
rectangle 2.500000 0.027778
Expand All @@ -180,7 +180,7 @@
>>> df.rtruediv(10)
angles degrees
circle NaN 0.027778
circle inf 0.027778
triangle 3.333333 0.055556
rectangle 2.500000 0.027778
Expand Down Expand Up @@ -228,21 +228,21 @@
>>> df // 10
angles degrees
circle 0 36
triangle 0 18
rectangle 0 36
circle 0.0 36.0
triangle 0.0 18.0
rectangle 0.0 36.0
>>> df.floordiv(10)
angles degrees
circle 0 36
triangle 0 18
rectangle 0 36
circle 0.0 36.0
triangle 0.0 18.0
rectangle 0.0 36.0
Copy link
Contributor Author

@itholic itholic Apr 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For floordiv and rfloordiv, the result will always be a float since we cannot predict the result type of each column which is determined before executes the job by Spark.

Copy link
Contributor Author

@itholic itholic Apr 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In short, since there is a possibility that the result can be a Infinity which is float, the result always be a float.

Someone can give an opinion about this?

>>> df.rfloordiv(10)
angles degrees
circle NaN 0
triangle 3.0 0
rectangle 2.0 0
circle inf 0.0
triangle 3.0 0.0
rectangle 2.0 0.0
Mod by constant with reverse version.
Expand Down
17 changes: 17 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1446,3 +1446,20 @@ def test_squeeze(self):
kser = ks.Series([90, 91, 85], index=midx)
pser = kser.to_pandas()
self.assert_eq(kser.squeeze(), pser.squeeze())

def test_div_zero(self):
pser = pd.Series([100, None, -300, None, 500, -700], name="Koalas")
kser = ks.from_pandas(pser)

self.assert_eq(repr(pser.div(0)), repr(kser.div(0)))
self.assert_eq(repr(pser.truediv(0)), repr(kser.truediv(0)))
self.assert_eq(repr(pser / 0), repr(kser / 0))

# floordiv has different behavior in pandas > 1.0.0
if LooseVersion(pd.__version__) >= LooseVersion("1.0.0"):
self.assert_eq(repr(pser.floordiv(0)), repr(kser.floordiv(0)))
self.assert_eq(repr(pser // 0), repr(kser // 0))
else:
result = ks.Series([np.inf, np.nan, -np.inf, np.nan, np.inf, -np.inf], name="Koalas")
self.assert_eq(repr(kser.floordiv(0)), repr(result))
self.assert_eq(repr(kser // 0), repr(result))