databricks · ueshin · Mar 4, 2021 · Mar 3, 2021 · Mar 3, 2021 · Mar 3, 2021
diff --git a/databricks/koalas/__init__.py b/databricks/koalas/__init__.py
@@ -119,6 +119,7 @@ def assert_pyspark_version():
     "read_csv",
     "read_parquet",
     "to_datetime",
+    "date_range",
     "from_pandas",
     "get_dummies",
     "DataFrame",

diff --git a/databricks/koalas/indexes/datetimes.py b/databricks/koalas/indexes/datetimes.py
@@ -222,7 +222,7 @@ def dayofweek(self) -> Index:
 
         Examples
         --------
-        >>> idx = ks.from_pandas(pd.date_range('2016-12-31', '2017-01-08', freq='D'))
+        >>> idx = ks.date_range('2016-12-31', '2017-01-08', freq='D')
         >>> idx.dayofweek
         Int64Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int64')
         """
@@ -277,7 +277,7 @@ def is_month_start(self) -> Index:
 
         Examples
         --------
-        >>> idx = ks.from_pandas(pd.date_range("2018-02-27", periods=3))
+        >>> idx = ks.date_range("2018-02-27", periods=3)
         >>> idx.is_month_start
         Index([False, False, True], dtype='object')
         """
@@ -300,7 +300,7 @@ def is_month_end(self) -> Index:
 
         Examples
         --------
-        >>> idx = ks.from_pandas(pd.date_range("2018-02-27", periods=3))
+        >>> idx = ks.date_range("2018-02-27", periods=3)
         >>> idx.is_month_end
         Index([False, True, False], dtype='object')
         """
@@ -323,7 +323,7 @@ def is_quarter_start(self) -> Index:
 
         Examples
         --------
-        >>> idx = ks.from_pandas(pd.date_range('2017-03-30', periods=4))
+        >>> idx = ks.date_range('2017-03-30', periods=4)
         >>> idx.is_quarter_start
         Index([False, False, True, False], dtype='object')
         """
@@ -346,7 +346,7 @@ def is_quarter_end(self) -> Index:
 
         Examples
         --------
-        >>> idx = ks.from_pandas(pd.date_range('2017-03-30', periods=4))
+        >>> idx = ks.date_range('2017-03-30', periods=4)
         >>> idx.is_quarter_end
         Index([False, True, False, False], dtype='object')
         """
@@ -368,7 +368,7 @@ def is_year_start(self) -> Index:
 
         Examples
         --------
-        >>> idx = ks.from_pandas(pd.date_range("2017-12-30", periods=3))
+        >>> idx = ks.date_range("2017-12-30", periods=3)
         >>> idx.is_year_start
         Index([False, False, True], dtype='object')
         """
@@ -390,7 +390,7 @@ def is_year_end(self) -> Index:
 
         Examples
         --------
-        >>> idx = ks.from_pandas(pd.date_range("2017-12-30", periods=3))
+        >>> idx = ks.date_range("2017-12-30", periods=3)
         >>> idx.is_year_end
         Index([False, True, False], dtype='object')
         """
@@ -413,7 +413,7 @@ def is_leap_year(self) -> Index:
 
         Examples
         --------
-        >>> idx = ks.from_pandas(pd.date_range("2012-01-01", "2015-01-01", freq="Y"))
+        >>> idx = ks.date_range("2012-01-01", "2015-01-01", freq="Y")
         >>> idx.is_leap_year
         Index([True, False, False], dtype='object')
         """

diff --git a/databricks/koalas/namespace.py b/databricks/koalas/namespace.py
@@ -68,7 +68,7 @@
 )
 from databricks.koalas.series import Series, first_series
 from databricks.koalas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale
-from databricks.koalas.indexes import Index
+from databricks.koalas.indexes import Index, DatetimeIndex
 
 
 __all__ = [
@@ -83,6 +83,7 @@
     "read_excel",
     "read_html",
     "to_datetime",
+    "date_range",
     "get_dummies",
     "concat",
     "melt",
@@ -199,7 +200,7 @@ def read_csv(
     quotechar=None,
     escapechar=None,
     comment=None,
-    **options
+    **options,
 ) -> Union[DataFrame, Series]:
     """Read CSV (comma-separated) file into DataFrame or Series.
 
@@ -467,7 +468,7 @@ def read_delta(
     version: Optional[str] = None,
     timestamp: Optional[str] = None,
     index_col: Optional[Union[str, List[str]]] = None,
-    **options
+    **options,
 ) -> DataFrame:
     """
     Read a Delta Lake table on some file system and return a DataFrame.
@@ -596,7 +597,7 @@ def read_spark_io(
     format: Optional[str] = None,
     schema: Union[str, "StructType"] = None,
     index_col: Optional[Union[str, List[str]]] = None,
-    **options
+    **options,
 ) -> DataFrame:
     """Load a DataFrame from a Spark data source.
 
@@ -832,7 +833,7 @@ def read_excel(
     skipfooter=0,
     convert_float=True,
     mangle_dupe_cols=True,
-    **kwds
+    **kwds,
 ) -> Union[DataFrame, Series, OrderedDict]:
     """
     Read an Excel file into a Koalas DataFrame or Series.
@@ -1060,7 +1061,7 @@ def pd_read_excel(io_or_bin, sn, sq):
             skipfooter=skipfooter,
             convert_float=convert_float,
             mangle_dupe_cols=mangle_dupe_cols,
-            **kwds
+            **kwds,
         )
 
     if isinstance(io, str):
@@ -1595,6 +1596,161 @@ def pandas_to_datetime(pser_or_pdf) -> Series[np.datetime64]:
     )
 
 
+def date_range(
+    start=None,
+    end=None,
+    periods=None,
+    freq=None,
+    tz=None,
+    normalize=False,
+    name=None,
+    closed=None,
+    **kwargs,
+) -> DatetimeIndex:
+    """
+    Return a fixed frequency DatetimeIndex.
+
+    Parameters
+    ----------
+    start : str or datetime-like, optional
+        Left bound for generating dates.
+    end : str or datetime-like, optional
+        Right bound for generating dates.
+    periods : int, optional
+        Number of periods to generate.
+    freq : str or DateOffset, default 'D'
+        Frequency strings can have multiples, e.g. '5H'.
+    tz : str or tzinfo, optional
+        Time zone name for returning localized DatetimeIndex, for example
+        'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
+        timezone-naive.
+    normalize : bool, default False
+        Normalize start/end dates to midnight before generating date range.
+    name : str, default None
+        Name of the resulting DatetimeIndex.
+    closed : {None, 'left', 'right'}, optional
+        Make the interval closed with respect to the given frequency to
+        the 'left', 'right', or both sides (None, the default).
+    **kwargs
+        For compatibility. Has no effect on the result.
+
+    Returns
+    -------
+    rng : DatetimeIndex
+
+    See Also
+    --------
+    DatetimeIndex : An immutable container for datetimes.
+
+    Notes
+    -----
+    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
+    exactly three must be specified. If ``freq`` is omitted, the resulting
+    ``DatetimeIndex`` will have ``periods`` linearly spaced elements between
+    ``start`` and ``end`` (closed on both sides).
+
+    To learn more about the frequency strings, please see `this link
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
+
+    Examples
+    --------
+    **Specifying the values**
+
+    The next four examples generate the same `DatetimeIndex`, but vary
+    the combination of `start`, `end` and `periods`.
+
+    Specify `start` and `end`, with the default daily frequency.
+
+    >>> ks.date_range(start='1/1/2018', end='1/08/2018')
+    DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
+                   '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
+                  dtype='datetime64[ns]', freq=None)
+
+    Specify `start` and `periods`, the number of periods (days).
+
+    >>> ks.date_range(start='1/1/2018', periods=8)
+    DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
+                   '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
+                  dtype='datetime64[ns]', freq=None)
+
+    Specify `end` and `periods`, the number of periods (days).
+
+    >>> ks.date_range(end='1/1/2018', periods=8)
+    DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',
+                   '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],
+                  dtype='datetime64[ns]', freq=None)
+
+    Specify `start`, `end`, and `periods`; the frequency is generated
+    automatically (linearly spaced).
+
+    >>> ks.date_range(start='2018-04-24', end='2018-04-27', periods=3)
+    DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
+                   '2018-04-27 00:00:00'],
+                  dtype='datetime64[ns]', freq=None)
+
+    **Other Parameters**
+
+    Changed the `freq` (frequency) to ``'M'`` (month end frequency).
+
+    >>> ks.date_range(start='1/1/2018', periods=5, freq='M')
+    DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
+                   '2018-05-31'],
+                  dtype='datetime64[ns]', freq=None)
+
+    Multiples are allowed
+
+    >>> ks.date_range(start='1/1/2018', periods=5, freq='3M')
+    DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
+                   '2019-01-31'],
+                  dtype='datetime64[ns]', freq=None)
+
+    `freq` can also be specified as an Offset object.
+
+    >>> ks.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3))
+    DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
+                   '2019-01-31'],
+                  dtype='datetime64[ns]', freq=None)
+
+    `closed` controls whether to include `start` and `end` that are on the
+    boundary. The default includes boundary points on either end.
+
+    >>> ks.date_range(start='2017-01-01', end='2017-01-04', closed=None)
+    DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],
+                  dtype='datetime64[ns]', freq=None)
+
+    Use ``closed='left'`` to exclude `end` if it falls on the boundary.
+
+    >>> ks.date_range(start='2017-01-01', end='2017-01-04', closed='left')
+    DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'],
+                  dtype='datetime64[ns]', freq=None)
+
+    Use ``closed='right'`` to exclude `start` if it falls on the boundary.
+
+    >>> ks.date_range(start='2017-01-01', end='2017-01-04', closed='right')
+    DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
+                  dtype='datetime64[ns]', freq=None)
+    """
+    assert freq not in ["N", "ns"], "nanoseconds is not supported"
+    assert tz is None, "Localized DatetimeIndex is not supported"
+
+    return cast(
+        DatetimeIndex,
+        ks.from_pandas(
+            pd.date_range(
+                start=start,
+                end=end,
+                periods=periods,
+                freq=freq,
+                tz=tz,
+                normalize=normalize,
+                name=name,
+                closed=closed,
+                **kwargs,
+            )
+        ),
+    )
+
+
 def get_dummies(
     data,
     prefix=None,
@@ -2614,7 +2770,7 @@ def read_orc(
     path,
     columns: Optional[List[str]] = None,
     index_col: Optional[Union[str, List[str]]] = None,
-    **options
+    **options,
 ) -> "DataFrame":
     """
     Load an ORC object from the file path, returning a DataFrame.

diff --git a/databricks/koalas/tests/indexes/test_datetime.py b/databricks/koalas/tests/indexes/test_datetime.py
@@ -17,8 +17,8 @@
 from distutils.version import LooseVersion
 
 import pandas as pd
-import databricks.koalas as ks
 
+import databricks.koalas as ks
 from databricks.koalas.testing.utils import ReusedSQLTestCase, TestUtils
 
 

diff --git a/databricks/koalas/tests/test_namespace.py b/databricks/koalas/tests/test_namespace.py
@@ -67,6 +67,57 @@ def test_to_datetime(self):
             ks.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01")),
         )
 
+    def test_date_range(self):
+        self.assert_eq(
+            ks.date_range(start="1/1/2018", end="1/08/2018"),
+            pd.date_range(start="1/1/2018", end="1/08/2018"),
+        )
+        self.assert_eq(
+            ks.date_range(start="1/1/2018", periods=8), pd.date_range(start="1/1/2018", periods=8)
+        )
+        self.assert_eq(
+            ks.date_range(end="1/1/2018", periods=8), pd.date_range(end="1/1/2018", periods=8)
+        )
+        self.assert_eq(
+            ks.date_range(start="2018-04-24", end="2018-04-27", periods=3),
+            pd.date_range(start="2018-04-24", end="2018-04-27", periods=3),
+        )
+
+        self.assert_eq(
+            ks.date_range(start="1/1/2018", periods=5, freq="M"),
+            pd.date_range(start="1/1/2018", periods=5, freq="M"),
+        )
+
+        self.assert_eq(
+            ks.date_range(start="1/1/2018", periods=5, freq="3M"),
+            pd.date_range(start="1/1/2018", periods=5, freq="3M"),
+        )
+
+        self.assert_eq(
+            ks.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)),
+            pd.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)),
+        )
+
+        self.assert_eq(
+            ks.date_range(start="2017-01-01", end="2017-01-04", closed="left"),
+            pd.date_range(start="2017-01-01", end="2017-01-04", closed="left"),
+        )
+
+        self.assert_eq(
+            ks.date_range(start="2017-01-01", end="2017-01-04", closed="right"),
+            pd.date_range(start="2017-01-01", end="2017-01-04", closed="right"),
+        )
+
+        self.assertRaises(
+            AssertionError, lambda: ks.date_range(start="1/1/2018", periods=5, tz="Asia/Tokyo")
+        )
+        self.assertRaises(
+            AssertionError, lambda: ks.date_range(start="1/1/2018", periods=5, freq="ns")
+        )
+        self.assertRaises(
+            AssertionError, lambda: ks.date_range(start="1/1/2018", periods=5, freq="N")
+        )
+
     def test_concat_index_axis(self):
         pdf = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5], "C": [6, 7, 8]})
         # TODO: pdf.columns.names = ["ABC"]

diff --git a/docs/source/reference/general_functions.rst b/docs/source/reference/general_functions.rst
@@ -46,4 +46,4 @@ Top-level dealing with datetimelike
    :toctree: api/
 
    to_datetime
-
+   date_range
Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,4 +46,4 @@ Top-level dealing with datetimelike
		:toctree: api/

		to_datetime

		date_range