@@ -1598,9 +1598,7 @@ def index(self):
15981598 Index
15991599 """
16001600 from databricks .koalas .indexes import Index , MultiIndex
1601- if len (self ._internal .index_map ) == 0 :
1602- return None
1603- elif len (self ._internal .index_map ) == 1 :
1601+ if len (self ._internal .index_map ) == 1 :
16041602 return Index (self )
16051603 else :
16061604 return MultiIndex (self )
@@ -1860,9 +1858,6 @@ class max type
18601858 lion mammal 80.5 run
18611859 monkey mammal NaN jump
18621860 """
1863- if len (self ._internal .index_map ) == 0 :
1864- raise NotImplementedError ('Can\' t reset index because there is no index.' )
1865-
18661861 multi_index = len (self ._internal .index_map ) > 1
18671862
18681863 def rename (index ):
@@ -1877,7 +1872,15 @@ def rename(index):
18771872 if level is None :
18781873 new_index_map = [(column , name if name is not None else rename (i ))
18791874 for i , (column , name ) in enumerate (self ._internal .index_map )]
1880- index_map = []
1875+ new_data_columns = [
1876+ self ._internal .scol_for (column ).alias (name ) for column , name in new_index_map ]
1877+ sdf = self ._sdf .select (new_data_columns + self ._internal .data_columns )
1878+
1879+ # Now, new internal Spark columns are named as same as index name.
1880+ new_index_map = [(name , name ) for column , name in new_index_map ]
1881+
1882+ index_map = [('__index_level_0__' , None )]
1883+ sdf = _InternalFrame .attach_default_index (sdf )
18811884 else :
18821885 if isinstance (level , (int , str )):
18831886 level = [level ]
@@ -1915,10 +1918,13 @@ def rename(index):
19151918 index_name if index_name is not None else rename (index_name )))
19161919 index_map .remove (info )
19171920
1921+ sdf = self ._sdf
1922+
19181923 if drop :
19191924 new_index_map = []
19201925
19211926 internal = self ._internal .copy (
1927+ sdf = sdf ,
19221928 data_columns = [column for column , _ in new_index_map ] + self ._internal .data_columns ,
19231929 index_map = index_map ,
19241930 column_index = None )
@@ -2382,13 +2388,13 @@ def to_koalas(self):
23822388
23832389 >>> spark_df = df.to_spark()
23842390 >>> spark_df
2385- DataFrame[__index_level_0__: bigint, col1: bigint, col2: bigint]
2391+ DataFrame[col1: bigint, col2: bigint]
23862392
23872393 >>> kdf = spark_df.to_koalas()
23882394 >>> kdf
2389- __index_level_0__ col1 col2
2390- 0 0 1 3
2391- 1 1 2 4
2395+ col1 col2
2396+ 0 1 3
2397+ 1 2 4
23922398
23932399 Calling to_koalas on a Koalas DataFrame simply returns itself.
23942400
@@ -2493,8 +2499,8 @@ def to_table(self, name: str, format: Optional[str] = None, mode: str = 'error',
24932499
24942500 >>> df.to_table('%s.my_table' % db, partition_cols='date')
24952501 """
2496- self ._sdf .write .saveAsTable (name = name , format = format , mode = mode ,
2497- partitionBy = partition_cols , options = options )
2502+ self .to_spark () .write .saveAsTable (name = name , format = format , mode = mode ,
2503+ partitionBy = partition_cols , options = options )
24982504
24992505 def to_delta (self , path : str , mode : str = 'error' ,
25002506 partition_cols : Union [str , List [str ], None ] = None , ** options ):
@@ -2604,8 +2610,8 @@ def to_parquet(self, path: str, mode: str = 'error',
26042610 ... mode = 'overwrite',
26052611 ... partition_cols=['date', 'country'])
26062612 """
2607- self ._sdf .write .parquet (path = path , mode = mode , partitionBy = partition_cols ,
2608- compression = compression )
2613+ self .to_spark () .write .parquet (
2614+ path = path , mode = mode , partitionBy = partition_cols , compression = compression )
26092615
26102616 def to_spark_io (self , path : Optional [str ] = None , format : Optional [str ] = None ,
26112617 mode : str = 'error' , partition_cols : Union [str , List [str ], None ] = None ,
@@ -2657,13 +2663,16 @@ def to_spark_io(self, path: Optional[str] = None, format: Optional[str] = None,
26572663
26582664 >>> df.to_spark_io(path='%s/to_spark_io/foo.json' % path, format='json')
26592665 """
2660- self ._sdf .write .save (path = path , format = format , mode = mode , partitionBy = partition_cols ,
2661- options = options )
2666+ self .to_spark () .write .save (
2667+ path = path , format = format , mode = mode , partitionBy = partition_cols , options = options )
26622668
26632669 def to_spark (self ):
26642670 """
26652671 Return the current DataFrame as a Spark DataFrame.
26662672
2673+ .. note:: Index information is lost. So, if the index columns are not present in
2674+ actual columns, they are lost.
2675+
26672676 See Also
26682677 --------
26692678 DataFrame.to_koalas
@@ -3653,14 +3662,21 @@ def pivot_table(self, values=None, index=None, columns=None,
36533662 sdf = sdf .fillna (fill_value )
36543663
36553664 if index is not None :
3656- return DataFrame (sdf ).set_index (index )
3665+ data_columns = [column for column in sdf .columns if column not in index ]
3666+ index_map = [(column , column ) for column in index ]
3667+ internal = _InternalFrame (sdf = sdf , data_columns = data_columns , index_map = index_map )
3668+ return DataFrame (internal )
36573669 else :
36583670 if isinstance (values , list ):
36593671 index_values = values [- 1 ]
36603672 else :
36613673 index_values = values
36623674
3663- return DataFrame (sdf .withColumn (columns , F .lit (index_values ))).set_index (columns )
3675+ sdf = sdf .withColumn (columns , F .lit (index_values ))
3676+ data_columns = [column for column in sdf .columns if column not in columns ]
3677+ index_map = [(column , column ) for column in columns ]
3678+ internal = _InternalFrame (sdf = sdf , data_columns = data_columns , index_map = index_map )
3679+ return DataFrame (internal )
36643680
36653681 def pivot (self , index = None , columns = None , values = None ):
36663682 """
@@ -4364,9 +4380,6 @@ def sort_index(self, axis: int = 0,
43644380 a 1 2 1
43654381 b 1 0 3
43664382 """
4367- if len (self ._internal .index_map ) == 0 :
4368- raise ValueError ("Index should be set." )
4369-
43704383 if axis != 0 :
43714384 raise ValueError ("No other axes than 0 are supported at the moment" )
43724385 if kind is not None :
@@ -4959,12 +4972,12 @@ def join(self, right: 'DataFrame', on: Optional[Union[str, List[str]]] = None,
49594972 original DataFrame’s index in the result.
49604973
49614974 >>> join_kdf = kdf1.join(kdf2.set_index('key'), on='key')
4962- >>> join_kdf.sort_values(by=join_kdf.columns )
4975+ >>> join_kdf.sort_index( )
49634976 key A B
4964- 0 K0 A0 B0
4965- 1 K1 A1 B1
4966- 2 K2 A2 B2
4967- 3 K3 A3 None
4977+ 0 K3 A3 None
4978+ 1 K0 A0 B0
4979+ 2 K1 A1 B1
4980+ 3 K2 A2 B2
49684981 """
49694982 if on :
49704983 self = self .set_index (on )
@@ -5543,9 +5556,6 @@ def _cum(self, func, skipna: bool):
55435556 elif func .__name__ == "cumprod" :
55445557 func = "cumprod"
55455558
5546- if len (self ._internal .index_columns ) == 0 :
5547- raise ValueError ("Index must be set." )
5548-
55495559 applied = []
55505560 for column in self .columns :
55515561 applied .append (getattr (self [column ], func )(skipna ))
0 commit comments