databricks · ueshin · May 11, 2019 · May 9, 2019 · May 9, 2019 · May 9, 2019
diff --git a/databricks/koalas/frame.py b/databricks/koalas/frame.py
@@ -950,6 +950,108 @@ def assign(self, **kwargs):
                            [name for name, _ in pairs if name not in self._metadata.column_fields]))
         return DataFrame(sdf, metadata)
 
+    def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="", float_format=None,
+                 columns=None, header=True, index=True, index_label=None, startrow=0,
+                 startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep="inf",
+                 verbose=True, freeze_panes=None):
+        """
+        Write object to an Excel sheet.
+
+        To write a single object to an Excel .xlsx file it is only necessary to
+        specify a target file name. To write to multiple sheets it is necessary to
+        create an `ExcelWriter` object with a target file name, and specify a sheet
+        in the file to write to.
+
+        Multiple sheets may be written to by specifying unique `sheet_name`.
+        With all data written to the file it is necessary to save the changes.
+        Note that creating an `ExcelWriter` object with a file name that already
+        exists will result in the contents of the existing file being erased.
+
+        Parameters
+        ----------
+        excel_writer : str or ExcelWriter object
+            File path or existing ExcelWriter.
+        sheet_name : str, default 'Sheet1'
+            Name of sheet which will contain DataFrame.
+        na_rep : str, default ''
+            Missing data representation.
+        float_format : str, optional
+            Format string for floating point numbers. For example
+            ``float_format="%%.2f"`` will format 0.1234 to 0.12.
+        columns : sequence or list of str, optional
+            Columns to write.
+        header : bool or list of str, default True
+            Write out the column names. If a list of string is given it is
+            assumed to be aliases for the column names.
+        index : bool, default True
+            Write row names (index).
+        index_label : str or sequence, optional
+            Column label for index column(s) if desired. If not specified, and
+            `header` and `index` are True, then the index names are used. A
+            sequence should be given if the DataFrame uses MultiIndex.
+        startrow : int, default 0
+            Upper left cell row to dump data frame.
+        startcol : int, default 0
+            Upper left cell column to dump data frame.
+        engine : str, optional
+            Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this
+            via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
+            ``io.excel.xlsm.writer``.
+        merge_cells : bool, default True
+            Write MultiIndex and Hierarchical Rows as merged cells.
+        encoding : str, optional
+            Encoding of the resulting excel file. Only necessary for xlwt,
+            other writers support unicode natively.
+        inf_rep : str, default 'inf'
+            Representation for infinity (there is no native representation for
+            infinity in Excel).
+        verbose : bool, default True
+            Display more information in the error logs.
+        freeze_panes : tuple of int (length 2), optional
+            Specifies the one-based bottommost row and rightmost column that
+            is to be frozen.
+
+        Notes
+        -----
+        Once a workbook has been saved it is not possible write further data
+        without rewriting the whole workbook.
+
+        Examples
+        --------
+        Create, write to and save a workbook:
+
+        >>> df1 = ks.DataFrame([['a', 'b'], ['c', 'd']],
+        ...                    index=['row 1', 'row 2'],
+        ...                    columns=['col 1', 'col 2'])
+        >>> df1.to_excel("output.xlsx")  # doctest: +SKIP
+
+        To specify the sheet name:
+
+        >>> df1.to_excel("output.xlsx")  # doctest: +SKIP
+        >>> df1.to_excel("output.xlsx",
+        ...              sheet_name='Sheet_name_1')  # doctest: +SKIP
+
+        If you wish to write to more than one sheet in the workbook, it is
+        necessary to specify an ExcelWriter object:
+
+        >>> with pd.ExcelWriter('output.xlsx') as writer:  # doctest: +SKIP
+        ...      df1.to_excel(writer, sheet_name='Sheet_name_1')
+        ...      df2.to_excel(writer, sheet_name='Sheet_name_2')
+
+        To set the library that is used to write the Excel file,
+        you can pass the `engine` keyword (the default engine is
+        automatically chosen depending on the file extension):
+
+        >>> df1.to_excel('output1.xlsx', engine='xlsxwriter')  # doctest: +SKIP
+        """
+
+        # Make sure locals() call is at the top of the function so we don't capture local variables.
+        args = locals()
+        kdf = self
+
+        return validate_arguments_and_invoke_function(
+            kdf.to_pandas(), self.to_excel, pd.DataFrame.to_excel, args)
+
     @property
     def loc(self):
         return SparkDataFrameLocator(self)

diff --git a/databricks/koalas/missing/frame.py b/databricks/koalas/missing/frame.py
@@ -178,7 +178,6 @@ class _MissingPandasLikeDataFrame(object):
     to_clipboard = unsupported_function('to_clipboard')
     to_csv = unsupported_function('to_csv')
     to_dense = unsupported_function('to_dense')
-    to_excel = unsupported_function('to_excel')
     to_feather = unsupported_function('to_feather')
     to_gbq = unsupported_function('to_gbq')
     to_hdf = unsupported_function('to_hdf')

diff --git a/databricks/koalas/tests/test_dataframe_conversion.py b/databricks/koalas/tests/test_dataframe_conversion.py
@@ -70,3 +70,34 @@ def test_to_html(self):
             """)
         got = self.strip_all_whitespace(self.kdf.to_html(max_rows=2))
         self.assert_eq(got, expected)
+
+    def test_to_excel(self):
+        pdf = self.pdf
+        kdf = self.kdf
+        excel_writer = "output.xlsx"
+
+        self.assert_eq(kdf.to_excel(excel_writer), pdf.to_excel(excel_writer))
+
+        pdf = pd.DataFrame({
+            'a': [1, None, 3],
+            'b': ["one", "two", None],
+        }, index=[0, 1, 3])
+
+        kdf = koalas.from_pandas(pdf)
+
+        self.assert_eq(kdf.to_excel(excel_writer, na_rep='null'),
+                       pdf.to_excel(excel_writer, na_rep='null'))
+
+        pdf = pd.DataFrame({
+            'a': [1.0, 2.0, 3.0],
+            'b': [4.0, 5.0, 6.0],
+        }, index=[0, 1, 3])
+
+        kdf = koalas.from_pandas(pdf)
+
+        self.assert_eq(kdf.to_excel(excel_writer, float_format='%.1f'),
+                       pdf.to_excel(excel_writer, float_format='%.1f'))
+        self.assert_eq(kdf.to_excel(excel_writer, header=False),
+                       pdf.to_excel(excel_writer, header=False))
+        self.assert_eq(kdf.to_excel(excel_writer, index=False),
+                       pdf.to_excel(excel_writer, index=False))
diff --git a/docs/source/reference/frame.rst b/docs/source/reference/frame.rst
@@ -124,3 +124,4 @@ Serialization / IO / Conversion
    DataFrame.to_spark
    DataFrame.to_string
    DataFrame.to_dict
+   DataFrame.to_excel
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -2,6 +2,7 @@
 pandas>=0.23
 pyarrow>=0.10,<0.11
 numpy>=1.14
+openpyxl>=2.6.2
 
 # Documentation build
 sphinx