Skip to content

Commit a9cbbd3

Browse files
committed
Fix spark_df.
1 parent e992200 commit a9cbbd3

File tree

1 file changed

+38
-7
lines changed

1 file changed

+38
-7
lines changed

databricks/koalas/internal.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,15 @@ class _InternalFrame(object):
131131
| 2| 3| 7| 11| 15| 19|
132132
| 3| 4| 8| 12| 16| 20|
133133
+-----------------+---+---+---+---+---+
134+
>>> internal.spark_df.show() # doctest: +NORMALIZE_WHITESPACE
135+
+---+---+---+---+---+
136+
| A| B| C| D| E|
137+
+---+---+---+---+---+
138+
| 1| 5| 9| 13| 17|
139+
| 2| 6| 10| 14| 18|
140+
| 3| 7| 11| 15| 19|
141+
| 4| 8| 12| 16| 20|
142+
+---+---+---+---+---+
134143
>>> internal.pandas_df
135144
A B C D E
136145
0 1 5 9 13 17
@@ -196,6 +205,30 @@ class _InternalFrame(object):
196205
3 7 11 15 19
197206
4 8 12 16 20
198207
208+
The `spark_df` will drop the index columns:
209+
210+
>>> internal.spark_df.show() # doctest: +NORMALIZE_WHITESPACE
211+
+---+---+---+---+
212+
| B| C| D| E|
213+
+---+---+---+---+
214+
| 5| 9| 13| 17|
215+
| 6| 10| 14| 18|
216+
| 7| 11| 15| 19|
217+
| 8| 12| 16| 20|
218+
+---+---+---+---+
219+
220+
but if `drop=False`, the columns will still remain in `spark_df`:
221+
222+
>>> kdf.set_index("A", drop=False)._internal.spark_df.show() # doctest: +NORMALIZE_WHITESPACE
223+
+---+---+---+---+---+
224+
| A| B| C| D| E|
225+
+---+---+---+---+---+
226+
| 1| 5| 9| 13| 17|
227+
| 2| 6| 10| 14| 18|
228+
| 3| 7| 11| 15| 19|
229+
| 4| 8| 12| 16| 20|
230+
+---+---+---+---+---+
231+
199232
In case that index becomes a multi index as below:
200233
201234
>>> kdf2 = kdf.set_index("A", append=True)
@@ -586,15 +619,13 @@ def spark_internal_df(self) -> spark.DataFrame:
586619
@lazy_property
587620
def spark_df(self) -> spark.DataFrame:
588621
""" Return as Spark DataFrame. """
589-
index_columns = set(self.index_columns)
590622
data_columns = []
591623
for i, (column, idx) in enumerate(zip(self._data_columns, self.column_index)):
592-
if column not in index_columns:
593-
scol = self.scol_for(idx)
594-
name = str(i) if idx is None else str(idx) if len(idx) > 1 else idx[0]
595-
if column != name:
596-
scol = scol.alias(name)
597-
data_columns.append(scol)
624+
scol = self.scol_for(idx)
625+
name = str(i) if idx is None else str(idx) if len(idx) > 1 else idx[0]
626+
if column != name:
627+
scol = scol.alias(name)
628+
data_columns.append(scol)
598629
return self._sdf.select(data_columns)
599630

600631
@lazy_property

0 commit comments

Comments
 (0)