@@ -1598,9 +1598,7 @@ def index(self):
15981598 Index
15991599 """
16001600 from databricks .koalas .indexes import Index , MultiIndex
1601- if len (self ._internal .index_map ) == 0 :
1602- return None
1603- elif len (self ._internal .index_map ) == 1 :
1601+ if len (self ._internal .index_map ) == 1 :
16041602 return Index (self )
16051603 else :
16061604 return MultiIndex (self )
@@ -1860,9 +1858,6 @@ class max type
18601858 lion mammal 80.5 run
18611859 monkey mammal NaN jump
18621860 """
1863- if len (self ._internal .index_map ) == 0 :
1864- raise NotImplementedError ('Can\' t reset index because there is no index.' )
1865-
18661861 multi_index = len (self ._internal .index_map ) > 1
18671862
18681863 def rename (index ):
@@ -1877,7 +1872,10 @@ def rename(index):
18771872 if level is None :
18781873 new_index_map = [(column , name if name is not None else rename (i ))
18791874 for i , (column , name ) in enumerate (self ._internal .index_map )]
1880- index_map = []
1875+ # TODO: this will end up with multiple columns that point the same index column.
1876+ # For instance, if we do df.reset_index(), the new column `index` points the
1877+ # same '__index_level_0__' internally. We should have new column.
1878+ index_map = [('__index_level_0__' , None )]
18811879 else :
18821880 if isinstance (level , (int , str )):
18831881 level = [level ]
@@ -2382,13 +2380,13 @@ def to_koalas(self):
23822380
23832381 >>> spark_df = df.to_spark()
23842382 >>> spark_df
2385- DataFrame[__index_level_0__: bigint, col1: bigint, col2: bigint]
2383+ DataFrame[col1: bigint, col2: bigint]
23862384
23872385 >>> kdf = spark_df.to_koalas()
23882386 >>> kdf
2389- __index_level_0__ col1 col2
2390- 0 0 1 3
2391- 1 1 2 4
2387+ col1 col2
2388+ 0 1 3
2389+ 1 2 4
23922390
23932391 Calling to_koalas on a Koalas DataFrame simply returns itself.
23942392
@@ -2493,8 +2491,8 @@ def to_table(self, name: str, format: Optional[str] = None, mode: str = 'error',
24932491
24942492 >>> df.to_table('%s.my_table' % db, partition_cols='date')
24952493 """
2496- self ._sdf .write .saveAsTable (name = name , format = format , mode = mode ,
2497- partitionBy = partition_cols , options = options )
2494+ self .to_spark () .write .saveAsTable (name = name , format = format , mode = mode ,
2495+ partitionBy = partition_cols , options = options )
24982496
24992497 def to_delta (self , path : str , mode : str = 'error' ,
25002498 partition_cols : Union [str , List [str ], None ] = None , ** options ):
@@ -2604,8 +2602,8 @@ def to_parquet(self, path: str, mode: str = 'error',
26042602 ... mode = 'overwrite',
26052603 ... partition_cols=['date', 'country'])
26062604 """
2607- self ._sdf .write .parquet (path = path , mode = mode , partitionBy = partition_cols ,
2608- compression = compression )
2605+ self .to_spark () .write .parquet (
2606+ path = path , mode = mode , partitionBy = partition_cols , compression = compression )
26092607
26102608 def to_spark_io (self , path : Optional [str ] = None , format : Optional [str ] = None ,
26112609 mode : str = 'error' , partition_cols : Union [str , List [str ], None ] = None ,
@@ -2657,13 +2655,16 @@ def to_spark_io(self, path: Optional[str] = None, format: Optional[str] = None,
26572655
26582656 >>> df.to_spark_io(path='%s/to_spark_io/foo.json' % path, format='json')
26592657 """
2660- self ._sdf .write .save (path = path , format = format , mode = mode , partitionBy = partition_cols ,
2661- options = options )
2658+ self .to_spark () .write .save (
2659+ path = path , format = format , mode = mode , partitionBy = partition_cols , options = options )
26622660
26632661 def to_spark (self ):
26642662 """
26652663 Return the current DataFrame as a Spark DataFrame.
26662664
2665+ .. note:: Index information is lost. So, if the index columns are not present in
2666+ actual columns, they are lost.
2667+
26672668 See Also
26682669 --------
26692670 DataFrame.to_koalas
@@ -3653,14 +3654,21 @@ def pivot_table(self, values=None, index=None, columns=None,
36533654 sdf = sdf .fillna (fill_value )
36543655
36553656 if index is not None :
3656- return DataFrame (sdf ).set_index (index )
3657+ data_columns = [column for column in sdf .columns if column not in index ]
3658+ index_map = [(column , column ) for column in index ]
3659+ internal = _InternalFrame (sdf = sdf , data_columns = data_columns , index_map = index_map )
3660+ return DataFrame (internal )
36573661 else :
36583662 if isinstance (values , list ):
36593663 index_values = values [- 1 ]
36603664 else :
36613665 index_values = values
36623666
3663- return DataFrame (sdf .withColumn (columns , F .lit (index_values ))).set_index (columns )
3667+ sdf = sdf .withColumn (columns , F .lit (index_values ))
3668+ data_columns = [column for column in sdf .columns if column not in columns ]
3669+ index_map = [(column , column ) for column in columns ]
3670+ internal = _InternalFrame (sdf = sdf , data_columns = data_columns , index_map = index_map )
3671+ return DataFrame (internal )
36643672
36653673 def pivot (self , index = None , columns = None , values = None ):
36663674 """
@@ -4364,9 +4372,6 @@ def sort_index(self, axis: int = 0,
43644372 a 1 2 1
43654373 b 1 0 3
43664374 """
4367- if len (self ._internal .index_map ) == 0 :
4368- raise ValueError ("Index should be set." )
4369-
43704375 if axis != 0 :
43714376 raise ValueError ("No other axes than 0 are supported at the moment" )
43724377 if kind is not None :
0 commit comments