databricks · HyukjinKwon · May 19, 2019 · May 17, 2019
diff --git a/databricks/koalas/frame.py b/databricks/koalas/frame.py
@@ -24,7 +24,8 @@
 
 import numpy as np
 import pandas as pd
-from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype, is_list_like
+from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype, is_list_like, \
+    is_dict_like
 from pyspark import sql as spark
 from pyspark.sql import functions as F, Column
 from pyspark.sql.types import (BooleanType, ByteType, DecimalType, DoubleType, FloatType,
@@ -2570,6 +2571,78 @@ def sample(self, n: Optional[int] = None, frac: Optional[float] = None, replace:
         sdf = self._sdf.sample(withReplacement=replace, fraction=frac, seed=random_state)
         return DataFrame(sdf, self._metadata.copy())
 
+    def astype(self, dtype) -> 'DataFrame':
+        """
+        Cast a pandas object to a specified dtype ``dtype``.
+
+        Parameters
+        ----------
+        dtype : data type, or dict of column name -> data type
+            Use a numpy.dtype or Python type to cast entire pandas object to
+            the same type. Alternatively, use {col: dtype, ...}, where col is a
+            column label and dtype is a numpy.dtype or Python type to cast one
+            or more of the DataFrame's columns to column-specific types.
+
+        Returns
+        -------
+        casted : same type as caller
+
+        See Also
+        --------
+        to_datetime : Convert argument to datetime.
+
+        Examples
+        --------
+        >>> df = ks.DataFrame({'a': [1, 2, 3], 'b': [1, 2, 3]}, dtype='int64')
+        >>> df
+           a  b
+        0  1  1
+        1  2  2
+        2  3  3
+
+        Convert to float type:
+
+        >>> df.astype('float')
+             a    b
+        0  1.0  1.0
+        1  2.0  2.0
+        2  3.0  3.0
+
+        Convert to int64 type back:
+
+        >>> df.astype('int64')
+           a  b
+        0  1  1
+        1  2  2
+        2  3  3
+
+        Convert column a to float type:
+
+        >>> df.astype({'a': float})
+             a  b
+        0  1.0  1
+        1  2.0  2
+        2  3.0  3
+
+        """
+        results = []
+        if is_dict_like(dtype):
+            for col_name in dtype.keys():
+                if col_name not in self.columns:
+                    raise KeyError('Only a column name can be used for the '
+                                   'key in a dtype mappings argument.')
+            for col_name, col in self.iteritems():
+                if col_name in dtype:
+                    results.append(col.astype(dtype=dtype[col_name]))
+                else:
+                    results.append(col)
+        else:
+            for col_name, col in self.iteritems():
+                results.append(col.astype(dtype=dtype))
+        sdf = self._sdf.select(
+            self._metadata.index_columns + list(map(lambda ser: ser._scol, results)))
+        return DataFrame(sdf, self._metadata.copy())
+
     def _pd_getitem(self, key):
         from databricks.koalas.series import Series
         if key is None:

diff --git a/databricks/koalas/missing/frame.py b/databricks/koalas/missing/frame.py
@@ -57,7 +57,6 @@ class _MissingPandasLikeDataFrame(object):
     as_matrix = unsupported_function('as_matrix')
     asfreq = unsupported_function('asfreq')
     asof = unsupported_function('asof')
-    astype = unsupported_function('astype')
     at_time = unsupported_function('at_time')
     between_time = unsupported_function('between_time')
     bfill = unsupported_function('bfill')

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
@@ -257,7 +257,7 @@ def spark_type(self):
         """ Returns the data type as defined by Spark, as a Spark DataType object."""
         return self.schema.fields[-1].dataType
 
-    def astype(self, dtype):
+    def astype(self, dtype) -> 'Series':
         """
         Cast a Koalas object to a specified dtype ``dtype``.
 

diff --git a/docs/source/reference/frame.rst b/docs/source/reference/frame.rst
@@ -34,6 +34,7 @@ Conversion
 
    DataFrame.copy
    DataFrame.isna
+   DataFrame.astype
    DataFrame.isnull
    DataFrame.notna
    DataFrame.notnull