Skip to content

Commit be69c54

Browse files
ueshinHyukjinKwon
authored andcommitted
Fix clip to use _InternalFrame.with_new_columns. (#1232)
Fixes `DataFrame/Series.clip` function to use `_InternalFrame.with_new_columns` and preserve its index.
1 parent 5d8fbb1 commit be69c54

File tree

3 files changed

+21
-13
lines changed

3 files changed

+21
-13
lines changed

databricks/koalas/frame.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4441,22 +4441,29 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
44414441

44424442
numeric_types = (DecimalType, DoubleType, FloatType, ByteType, IntegerType, LongType,
44434443
ShortType)
4444-
numeric_columns = [(c, self._internal.scol_for(c)) for c in self.columns
4445-
if isinstance(self._internal.spark_type_for(c), numeric_types)]
4444+
numeric_columns = [(idx, self._internal.scol_for(idx))
4445+
for idx in self._internal.column_index
4446+
if isinstance(self._internal.spark_type_for(idx), numeric_types)]
44464447

44474448
if lower is not None:
4448-
numeric_columns = [(c, F.when(scol < lower, lower).otherwise(scol).alias(c))
4449-
for c, scol in numeric_columns]
4449+
numeric_columns = [(idx, (F.when(scol < lower, lower).otherwise(scol)
4450+
.alias(name_like_string(idx))))
4451+
for idx, scol in numeric_columns]
44504452
if upper is not None:
4451-
numeric_columns = [(c, F.when(scol > upper, upper).otherwise(scol).alias(c))
4452-
for c, scol in numeric_columns]
4453+
numeric_columns = [(idx, (F.when(scol > upper, upper).otherwise(scol)
4454+
.alias(name_like_string(idx))))
4455+
for idx, scol in numeric_columns]
44534456

4454-
nonnumeric_columns = [self._internal.scol_for(c) for c in self.columns
4455-
if not isinstance(self._internal.spark_type_for(c), numeric_types)]
4457+
column_index = [idx for idx, _ in numeric_columns]
4458+
column_scols = [scol for _, scol in numeric_columns]
44564459

4457-
sdf = self._sdf.select([scol for _, scol in numeric_columns] + nonnumeric_columns)
4460+
for idx in self._internal.column_index:
4461+
if not isinstance(self._internal.spark_type_for(idx), numeric_types):
4462+
column_index.append(idx)
4463+
column_scols.append(self._internal.scol_for(idx))
44584464

4459-
return ks.DataFrame(sdf)[list(self.columns)]
4465+
internal = self._internal.with_new_columns(column_scols, column_index=column_index)
4466+
return DataFrame(internal)[list(self.columns)]
44604467

44614468
def head(self, n=5):
44624469
"""

databricks/koalas/tests/test_dataframe.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,7 +1139,8 @@ def test_append(self):
11391139
self.assert_eq(kdf.append(kdf), pdf.append(pdf))
11401140

11411141
def test_clip(self):
1142-
pdf = pd.DataFrame({'A': [0, 2, 4]})
1142+
pdf = pd.DataFrame({'A': [0, 2, 4], 'B': [4, 2, 0], 'X': [-1, 10, 0]},
1143+
index=np.random.rand(3))
11431144
kdf = ks.from_pandas(pdf)
11441145

11451146
# Assert list-like values are not accepted for 'lower' and 'upper'
@@ -1159,7 +1160,7 @@ def test_clip(self):
11591160
self.assert_eq(kdf.clip(1, 3), pdf.clip(1, 3))
11601161

11611162
# Assert behavior on string values
1162-
str_kdf = ks.DataFrame({'A': ['a', 'b', 'c']})
1163+
str_kdf = ks.DataFrame({'A': ['a', 'b', 'c']}, index=np.random.rand(3))
11631164
self.assert_eq(str_kdf.clip(1, 3), str_kdf)
11641165

11651166
def test_binary_operators(self):

databricks/koalas/tests/test_series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ def test_missing(self):
570570
getattr(kser, name)
571571

572572
def test_clip(self):
573-
pser = pd.Series([0, 2, 4])
573+
pser = pd.Series([0, 2, 4], index=np.random.rand(3))
574574
kser = ks.from_pandas(pser)
575575

576576
# Assert list-like values are not accepted for 'lower' and 'upper'

0 commit comments

Comments
 (0)