@@ -1598,9 +1598,7 @@ def index(self):
15981598 Index
15991599 """
16001600 from databricks .koalas .indexes import Index , MultiIndex
1601- if len (self ._internal .index_map ) == 0 :
1602- return None
1603- elif len (self ._internal .index_map ) == 1 :
1601+ if len (self ._internal .index_map ) == 1 :
16041602 return Index (self )
16051603 else :
16061604 return MultiIndex (self )
@@ -1860,9 +1858,6 @@ class max type
18601858 lion mammal 80.5 run
18611859 monkey mammal NaN jump
18621860 """
1863- if len (self ._internal .index_map ) == 0 :
1864- raise NotImplementedError ('Can\' t reset index because there is no index.' )
1865-
18661861 multi_index = len (self ._internal .index_map ) > 1
18671862
18681863 def rename (index ):
@@ -1915,10 +1910,27 @@ def rename(index):
19151910 index_name if index_name is not None else rename (index_name )))
19161911 index_map .remove (info )
19171912
1913+ new_data_columns = [
1914+ self ._internal .scol_for (column ).alias (name ) for column , name in new_index_map ]
1915+
1916+ if len (index_map ) > 0 :
1917+ index_columns = [column for column , _ in index_map ]
1918+ sdf = self ._sdf .select (
1919+ index_columns + new_data_columns + self ._internal .data_columns )
1920+ else :
1921+ sdf = self ._sdf .select (new_data_columns + self ._internal .data_columns )
1922+
1923+ # Now, new internal Spark columns are named as same as index name.
1924+ new_index_map = [(name , name ) for column , name in new_index_map ]
1925+
1926+ index_map = [('__index_level_0__' , None )]
1927+ sdf = _InternalFrame .attach_default_index (sdf )
1928+
19181929 if drop :
19191930 new_index_map = []
19201931
19211932 internal = self ._internal .copy (
1933+ sdf = sdf ,
19221934 data_columns = [column for column , _ in new_index_map ] + self ._internal .data_columns ,
19231935 index_map = index_map ,
19241936 column_index = None )
@@ -2382,13 +2394,13 @@ def to_koalas(self):
23822394
23832395 >>> spark_df = df.to_spark()
23842396 >>> spark_df
2385- DataFrame[__index_level_0__: bigint, col1: bigint, col2: bigint]
2397+ DataFrame[col1: bigint, col2: bigint]
23862398
23872399 >>> kdf = spark_df.to_koalas()
23882400 >>> kdf
2389- __index_level_0__ col1 col2
2390- 0 0 1 3
2391- 1 1 2 4
2401+ col1 col2
2402+ 0 1 3
2403+ 1 2 4
23922404
23932405 Calling to_koalas on a Koalas DataFrame simply returns itself.
23942406
@@ -2493,8 +2505,8 @@ def to_table(self, name: str, format: Optional[str] = None, mode: str = 'error',
24932505
24942506 >>> df.to_table('%s.my_table' % db, partition_cols='date')
24952507 """
2496- self ._sdf .write .saveAsTable (name = name , format = format , mode = mode ,
2497- partitionBy = partition_cols , options = options )
2508+ self .to_spark () .write .saveAsTable (name = name , format = format , mode = mode ,
2509+ partitionBy = partition_cols , options = options )
24982510
24992511 def to_delta (self , path : str , mode : str = 'error' ,
25002512 partition_cols : Union [str , List [str ], None ] = None , ** options ):
@@ -2604,8 +2616,8 @@ def to_parquet(self, path: str, mode: str = 'error',
26042616 ... mode = 'overwrite',
26052617 ... partition_cols=['date', 'country'])
26062618 """
2607- self ._sdf .write .parquet (path = path , mode = mode , partitionBy = partition_cols ,
2608- compression = compression )
2619+ self .to_spark () .write .parquet (
2620+ path = path , mode = mode , partitionBy = partition_cols , compression = compression )
26092621
26102622 def to_spark_io (self , path : Optional [str ] = None , format : Optional [str ] = None ,
26112623 mode : str = 'error' , partition_cols : Union [str , List [str ], None ] = None ,
@@ -2657,13 +2669,16 @@ def to_spark_io(self, path: Optional[str] = None, format: Optional[str] = None,
26572669
26582670 >>> df.to_spark_io(path='%s/to_spark_io/foo.json' % path, format='json')
26592671 """
2660- self ._sdf .write .save (path = path , format = format , mode = mode , partitionBy = partition_cols ,
2661- options = options )
2672+ self .to_spark () .write .save (
2673+ path = path , format = format , mode = mode , partitionBy = partition_cols , options = options )
26622674
26632675 def to_spark (self ):
26642676 """
26652677 Return the current DataFrame as a Spark DataFrame.
26662678
2679+ .. note:: Index information is lost. So, if the index columns are not present in
2680+ actual columns, they are lost.
2681+
26672682 See Also
26682683 --------
26692684 DataFrame.to_koalas
@@ -3653,14 +3668,21 @@ def pivot_table(self, values=None, index=None, columns=None,
36533668 sdf = sdf .fillna (fill_value )
36543669
36553670 if index is not None :
3656- return DataFrame (sdf ).set_index (index )
3671+ data_columns = [column for column in sdf .columns if column not in index ]
3672+ index_map = [(column , column ) for column in index ]
3673+ internal = _InternalFrame (sdf = sdf , data_columns = data_columns , index_map = index_map )
3674+ return DataFrame (internal )
36573675 else :
36583676 if isinstance (values , list ):
36593677 index_values = values [- 1 ]
36603678 else :
36613679 index_values = values
36623680
3663- return DataFrame (sdf .withColumn (columns , F .lit (index_values ))).set_index (columns )
3681+ sdf = sdf .withColumn (columns , F .lit (index_values ))
3682+ data_columns = [column for column in sdf .columns if column not in columns ]
3683+ index_map = [(column , column ) for column in columns ]
3684+ internal = _InternalFrame (sdf = sdf , data_columns = data_columns , index_map = index_map )
3685+ return DataFrame (internal )
36643686
36653687 def pivot (self , index = None , columns = None , values = None ):
36663688 """
@@ -4364,9 +4386,6 @@ def sort_index(self, axis: int = 0,
43644386 a 1 2 1
43654387 b 1 0 3
43664388 """
4367- if len (self ._internal .index_map ) == 0 :
4368- raise ValueError ("Index should be set." )
4369-
43704389 if axis != 0 :
43714390 raise ValueError ("No other axes than 0 are supported at the moment" )
43724391 if kind is not None :
@@ -4959,12 +4978,12 @@ def join(self, right: 'DataFrame', on: Optional[Union[str, List[str]]] = None,
49594978 original DataFrame’s index in the result.
49604979
49614980 >>> join_kdf = kdf1.join(kdf2.set_index('key'), on='key')
4962- >>> join_kdf.sort_values(by=join_kdf.columns )
4981+ >>> join_kdf.sort_index( )
49634982 key A B
4964- 0 K0 A0 B0
4965- 1 K1 A1 B1
4966- 2 K2 A2 B2
4967- 3 K3 A3 None
4983+ 0 K3 A3 None
4984+ 1 K0 A0 B0
4985+ 2 K1 A1 B1
4986+ 3 K2 A2 B2
49684987 """
49694988 if on :
49704989 self = self .set_index (on )
@@ -5543,9 +5562,6 @@ def _cum(self, func, skipna: bool):
55435562 elif func .__name__ == "cumprod" :
55445563 func = "cumprod"
55455564
5546- if len (self ._internal .index_columns ) == 0 :
5547- raise ValueError ("Index must be set." )
5548-
55495565 applied = []
55505566 for column in self .columns :
55515567 applied .append (getattr (self [column ], func )(skipna ))
0 commit comments