@@ -131,6 +131,15 @@ class _InternalFrame(object):
131131 | 2| 3| 7| 11| 15| 19|
132132 | 3| 4| 8| 12| 16| 20|
133133 +-----------------+---+---+---+---+---+
134+ >>> internal.spark_df.show() # doctest: +NORMALIZE_WHITESPACE
135+ +---+---+---+---+---+
136+ | A| B| C| D| E|
137+ +---+---+---+---+---+
138+ | 1| 5| 9| 13| 17|
139+ | 2| 6| 10| 14| 18|
140+ | 3| 7| 11| 15| 19|
141+ | 4| 8| 12| 16| 20|
142+ +---+---+---+---+---+
134143 >>> internal.pandas_df
135144 A B C D E
136145 0 1 5 9 13 17
@@ -196,6 +205,30 @@ class _InternalFrame(object):
196205 3 7 11 15 19
197206 4 8 12 16 20
198207
208+ The `spark_df` will drop the index columns:
209+
210+ >>> internal.spark_df.show() # doctest: +NORMALIZE_WHITESPACE
211+ +---+---+---+---+
212+ | B| C| D| E|
213+ +---+---+---+---+
214+ | 5| 9| 13| 17|
215+ | 6| 10| 14| 18|
216+ | 7| 11| 15| 19|
217+ | 8| 12| 16| 20|
218+ +---+---+---+---+
219+
220+ but if `drop=False`, the columns will still remain in `spark_df`:
221+
222+ >>> kdf.set_index("A", drop=False)._internal.spark_df.show() # doctest: +NORMALIZE_WHITESPACE
223+ +---+---+---+---+---+
224+ | A| B| C| D| E|
225+ +---+---+---+---+---+
226+ | 1| 5| 9| 13| 17|
227+ | 2| 6| 10| 14| 18|
228+ | 3| 7| 11| 15| 19|
229+ | 4| 8| 12| 16| 20|
230+ +---+---+---+---+---+
231+
199232 In case that index becomes a multi index as below:
200233
201234 >>> kdf2 = kdf.set_index("A", append=True)
@@ -586,15 +619,13 @@ def spark_internal_df(self) -> spark.DataFrame:
586619 @lazy_property
587620 def spark_df (self ) -> spark .DataFrame :
588621 """ Return as Spark DataFrame. """
589- index_columns = set (self .index_columns )
590622 data_columns = []
591623 for i , (column , idx ) in enumerate (zip (self ._data_columns , self .column_index )):
592- if column not in index_columns :
593- scol = self .scol_for (idx )
594- name = str (i ) if idx is None else str (idx ) if len (idx ) > 1 else idx [0 ]
595- if column != name :
596- scol = scol .alias (name )
597- data_columns .append (scol )
624+ scol = self .scol_for (idx )
625+ name = str (i ) if idx is None else str (idx ) if len (idx ) > 1 else idx [0 ]
626+ if column != name :
627+ scol = scol .alias (name )
628+ data_columns .append (scol )
598629 return self ._sdf .select (data_columns )
599630
600631 @lazy_property
0 commit comments