Skip to content

Commit 421b80d

Browse files
authored
Fix Series.clip not to create a new DataFrame. (#1525)
Currently `Series.clip` creates a new DataFrame, so assigning back to the anchor DataFrame can't be executed without the option "compute.ops_on_diff_frames": ```py >>> kdf = ks.DataFrame({"a": [1, 2, 6, 4, 4, 6, 4, 3, 7]}) >>> kdf['clip'] = kdf.a.clip(3, 6) Traceback (most recent call last): ... ValueError: Cannot combine the series or dataframe because it comes from a different dataframe. In order to allow this operation, enable 'compute.ops_on_diff_frames' option. ```
1 parent 11d8dc6 commit 421b80d

File tree

3 files changed

+25
-30
lines changed

3 files changed

+25
-30
lines changed

databricks/koalas/frame.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,9 @@
4646
from pyspark.sql.readwriter import OptionUtils
4747
from pyspark.sql.types import (
4848
BooleanType,
49-
ByteType,
50-
DecimalType,
5149
DoubleType,
5250
FloatType,
53-
IntegerType,
54-
LongType,
5551
NumericType,
56-
ShortType,
5752
StructType,
5853
StructField,
5954
)
@@ -5728,28 +5723,7 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
57285723
if lower is None and upper is None:
57295724
return self
57305725

5731-
numeric_types = (
5732-
DecimalType,
5733-
DoubleType,
5734-
FloatType,
5735-
ByteType,
5736-
IntegerType,
5737-
LongType,
5738-
ShortType,
5739-
)
5740-
5741-
def op(kser):
5742-
if isinstance(kser.spark_type, numeric_types):
5743-
scol = kser.spark_column
5744-
if lower is not None:
5745-
scol = F.when(scol < lower, lower).otherwise(scol)
5746-
if upper is not None:
5747-
scol = F.when(scol > upper, upper).otherwise(scol)
5748-
return scol.alias(kser._internal.data_spark_column_names[0])
5749-
else:
5750-
return kser
5751-
5752-
return self._apply_series_op(op)
5726+
return self._apply_series_op(lambda kser: kser.clip(lower=lower, upper=upper))
57535727

57545728
def head(self, n: int = 5) -> "DataFrame":
57555729
"""

databricks/koalas/series.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,11 @@
3737
BooleanType,
3838
DoubleType,
3939
FloatType,
40+
IntegerType,
41+
LongType,
42+
NumericType,
4043
StringType,
4144
StructType,
42-
LongType,
43-
IntegerType,
4445
)
4546
from pyspark.sql.window import Window
4647

@@ -1726,7 +1727,23 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
17261727
instances of 'str' and 'int'" while `ks.Series(['a', 'b']).clip(0, 1)` will output the
17271728
original Series, simply ignoring the incompatible types.
17281729
"""
1729-
return first_series(self.to_dataframe().clip(lower, upper))
1730+
if is_list_like(lower) or is_list_like(upper):
1731+
raise ValueError(
1732+
"List-like value are not supported for 'lower' and 'upper' at the " + "moment"
1733+
)
1734+
1735+
if lower is None and upper is None:
1736+
return self
1737+
1738+
if isinstance(self.spark_type, NumericType):
1739+
scol = self.spark_column
1740+
if lower is not None:
1741+
scol = F.when(scol < lower, lower).otherwise(scol)
1742+
if upper is not None:
1743+
scol = F.when(scol > upper, upper).otherwise(scol)
1744+
return self._with_new_scol(scol.alias(self._internal.data_spark_column_names[0]))
1745+
else:
1746+
return self
17301747

17311748
def drop(
17321749
self,

databricks/koalas/tests/test_dataframe.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,6 +1387,10 @@ def test_clip(self):
13871387
# Assert lower and upper
13881388
self.assert_eq(kdf.clip(1, 3), pdf.clip(1, 3))
13891389

1390+
pdf["clip"] = pdf.A.clip(lower=1, upper=3)
1391+
kdf["clip"] = kdf.A.clip(lower=1, upper=3)
1392+
self.assert_eq(kdf, pdf)
1393+
13901394
# Assert behavior on string values
13911395
str_kdf = ks.DataFrame({"A": ["a", "b", "c"]}, index=np.random.rand(3))
13921396
self.assert_eq(str_kdf.clip(1, 3), str_kdf)

0 commit comments

Comments
 (0)