Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions databricks/koalas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def assert_pyspark_version():
"read_csv",
"read_parquet",
"to_datetime",
"date_range",
"from_pandas",
"get_dummies",
"DataFrame",
Expand Down
16 changes: 8 additions & 8 deletions databricks/koalas/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def dayofweek(self) -> Index:

Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2016-12-31', '2017-01-08', freq='D'))
>>> idx = ks.date_range('2016-12-31', '2017-01-08', freq='D')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

>>> idx.dayofweek
Int64Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int64')
"""
Expand Down Expand Up @@ -277,7 +277,7 @@ def is_month_start(self) -> Index:

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2018-02-27", periods=3))
>>> idx = ks.date_range("2018-02-27", periods=3)
>>> idx.is_month_start
Index([False, False, True], dtype='object')
"""
Expand All @@ -300,7 +300,7 @@ def is_month_end(self) -> Index:

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2018-02-27", periods=3))
>>> idx = ks.date_range("2018-02-27", periods=3)
>>> idx.is_month_end
Index([False, True, False], dtype='object')
"""
Expand All @@ -323,7 +323,7 @@ def is_quarter_start(self) -> Index:

Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2017-03-30', periods=4))
>>> idx = ks.date_range('2017-03-30', periods=4)
>>> idx.is_quarter_start
Index([False, False, True, False], dtype='object')
"""
Expand All @@ -346,7 +346,7 @@ def is_quarter_end(self) -> Index:

Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2017-03-30', periods=4))
>>> idx = ks.date_range('2017-03-30', periods=4)
>>> idx.is_quarter_end
Index([False, True, False, False], dtype='object')
"""
Expand All @@ -368,7 +368,7 @@ def is_year_start(self) -> Index:

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2017-12-30", periods=3))
>>> idx = ks.date_range("2017-12-30", periods=3)
>>> idx.is_year_start
Index([False, False, True], dtype='object')
"""
Expand All @@ -390,7 +390,7 @@ def is_year_end(self) -> Index:

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2017-12-30", periods=3))
>>> idx = ks.date_range("2017-12-30", periods=3)
>>> idx.is_year_end
Index([False, True, False], dtype='object')
"""
Expand All @@ -413,7 +413,7 @@ def is_leap_year(self) -> Index:

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2012-01-01", "2015-01-01", freq="Y"))
>>> idx = ks.date_range("2012-01-01", "2015-01-01", freq="Y")
>>> idx.is_leap_year
Index([True, False, False], dtype='object')
"""
Expand Down
170 changes: 163 additions & 7 deletions databricks/koalas/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
)
from databricks.koalas.series import Series, first_series
from databricks.koalas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale
from databricks.koalas.indexes import Index
from databricks.koalas.indexes import Index, DatetimeIndex


__all__ = [
Expand All @@ -83,6 +83,7 @@
"read_excel",
"read_html",
"to_datetime",
"date_range",
"get_dummies",
"concat",
"melt",
Expand Down Expand Up @@ -199,7 +200,7 @@ def read_csv(
quotechar=None,
escapechar=None,
comment=None,
**options
**options,
) -> Union[DataFrame, Series]:
"""Read CSV (comma-separated) file into DataFrame or Series.

Expand Down Expand Up @@ -467,7 +468,7 @@ def read_delta(
version: Optional[str] = None,
timestamp: Optional[str] = None,
index_col: Optional[Union[str, List[str]]] = None,
**options
**options,
) -> DataFrame:
"""
Read a Delta Lake table on some file system and return a DataFrame.
Expand Down Expand Up @@ -596,7 +597,7 @@ def read_spark_io(
format: Optional[str] = None,
schema: Union[str, "StructType"] = None,
index_col: Optional[Union[str, List[str]]] = None,
**options
**options,
) -> DataFrame:
"""Load a DataFrame from a Spark data source.

Expand Down Expand Up @@ -832,7 +833,7 @@ def read_excel(
skipfooter=0,
convert_float=True,
mangle_dupe_cols=True,
**kwds
**kwds,
) -> Union[DataFrame, Series, OrderedDict]:
"""
Read an Excel file into a Koalas DataFrame or Series.
Expand Down Expand Up @@ -1060,7 +1061,7 @@ def pd_read_excel(io_or_bin, sn, sq):
skipfooter=skipfooter,
convert_float=convert_float,
mangle_dupe_cols=mangle_dupe_cols,
**kwds
**kwds,
)

if isinstance(io, str):
Expand Down Expand Up @@ -1595,6 +1596,161 @@ def pandas_to_datetime(pser_or_pdf) -> Series[np.datetime64]:
)


def date_range(
start=None,
end=None,
periods=None,
freq=None,
tz=None,
normalize=False,
name=None,
closed=None,
**kwargs,
) -> DatetimeIndex:
"""
Return a fixed frequency DatetimeIndex.

Parameters
----------
start : str or datetime-like, optional
Left bound for generating dates.
end : str or datetime-like, optional
Right bound for generating dates.
periods : int, optional
Number of periods to generate.
freq : str or DateOffset, default 'D'
Frequency strings can have multiples, e.g. '5H'.
tz : str or tzinfo, optional
Time zone name for returning localized DatetimeIndex, for example
'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
timezone-naive.
normalize : bool, default False
Normalize start/end dates to midnight before generating date range.
name : str, default None
Name of the resulting DatetimeIndex.
closed : {None, 'left', 'right'}, optional
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None, the default).
**kwargs
For compatibility. Has no effect on the result.

Returns
-------
rng : DatetimeIndex

See Also
--------
DatetimeIndex : An immutable container for datetimes.

Notes
-----
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. If ``freq`` is omitted, the resulting
``DatetimeIndex`` will have ``periods`` linearly spaced elements between
``start`` and ``end`` (closed on both sides).

To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

Examples
--------
**Specifying the values**

The next four examples generate the same `DatetimeIndex`, but vary
the combination of `start`, `end` and `periods`.

Specify `start` and `end`, with the default daily frequency.

>>> ks.date_range(start='1/1/2018', end='1/08/2018')
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
'2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
dtype='datetime64[ns]', freq=None)

Specify `start` and `periods`, the number of periods (days).

>>> ks.date_range(start='1/1/2018', periods=8)
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
'2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
dtype='datetime64[ns]', freq=None)

Specify `end` and `periods`, the number of periods (days).

>>> ks.date_range(end='1/1/2018', periods=8)
DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',
'2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],
dtype='datetime64[ns]', freq=None)

Specify `start`, `end`, and `periods`; the frequency is generated
automatically (linearly spaced).

>>> ks.date_range(start='2018-04-24', end='2018-04-27', periods=3)
DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
'2018-04-27 00:00:00'],
dtype='datetime64[ns]', freq=None)

**Other Parameters**

Changed the `freq` (frequency) to ``'M'`` (month end frequency).

>>> ks.date_range(start='1/1/2018', periods=5, freq='M')
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
'2018-05-31'],
dtype='datetime64[ns]', freq=None)

Multiples are allowed

>>> ks.date_range(start='1/1/2018', periods=5, freq='3M')
DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
'2019-01-31'],
dtype='datetime64[ns]', freq=None)

`freq` can also be specified as an Offset object.

>>> ks.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3))
DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
'2019-01-31'],
dtype='datetime64[ns]', freq=None)

`closed` controls whether to include `start` and `end` that are on the
boundary. The default includes boundary points on either end.

>>> ks.date_range(start='2017-01-01', end='2017-01-04', closed=None)
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],
dtype='datetime64[ns]', freq=None)

Use ``closed='left'`` to exclude `end` if it falls on the boundary.

>>> ks.date_range(start='2017-01-01', end='2017-01-04', closed='left')
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'],
dtype='datetime64[ns]', freq=None)

Use ``closed='right'`` to exclude `start` if it falls on the boundary.

>>> ks.date_range(start='2017-01-01', end='2017-01-04', closed='right')
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
dtype='datetime64[ns]', freq=None)
"""
assert freq not in ["N", "ns"], "nanoseconds is not supported"
assert tz is None, "Localized DatetimeIndex is not supported"

return cast(
DatetimeIndex,
ks.from_pandas(
pd.date_range(
start=start,
end=end,
periods=periods,
freq=freq,
tz=tz,
normalize=normalize,
name=name,
closed=closed,
**kwargs,
)
),
)


def get_dummies(
data,
prefix=None,
Expand Down Expand Up @@ -2614,7 +2770,7 @@ def read_orc(
path,
columns: Optional[List[str]] = None,
index_col: Optional[Union[str, List[str]]] = None,
**options
**options,
) -> "DataFrame":
"""
Load an ORC object from the file path, returning a DataFrame.
Expand Down
2 changes: 1 addition & 1 deletion databricks/koalas/tests/indexes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from distutils.version import LooseVersion

import pandas as pd
import databricks.koalas as ks

import databricks.koalas as ks
from databricks.koalas.testing.utils import ReusedSQLTestCase, TestUtils


Expand Down
51 changes: 51 additions & 0 deletions databricks/koalas/tests/test_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,57 @@ def test_to_datetime(self):
ks.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01")),
)

def test_date_range(self):
self.assert_eq(
ks.date_range(start="1/1/2018", end="1/08/2018"),
pd.date_range(start="1/1/2018", end="1/08/2018"),
)
self.assert_eq(
ks.date_range(start="1/1/2018", periods=8), pd.date_range(start="1/1/2018", periods=8)
)
self.assert_eq(
ks.date_range(end="1/1/2018", periods=8), pd.date_range(end="1/1/2018", periods=8)
)
self.assert_eq(
ks.date_range(start="2018-04-24", end="2018-04-27", periods=3),
pd.date_range(start="2018-04-24", end="2018-04-27", periods=3),
)

self.assert_eq(
ks.date_range(start="1/1/2018", periods=5, freq="M"),
pd.date_range(start="1/1/2018", periods=5, freq="M"),
)

self.assert_eq(
ks.date_range(start="1/1/2018", periods=5, freq="3M"),
pd.date_range(start="1/1/2018", periods=5, freq="3M"),
)

self.assert_eq(
ks.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)),
pd.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)),
)

self.assert_eq(
ks.date_range(start="2017-01-01", end="2017-01-04", closed="left"),
pd.date_range(start="2017-01-01", end="2017-01-04", closed="left"),
)

self.assert_eq(
ks.date_range(start="2017-01-01", end="2017-01-04", closed="right"),
pd.date_range(start="2017-01-01", end="2017-01-04", closed="right"),
)

self.assertRaises(
AssertionError, lambda: ks.date_range(start="1/1/2018", periods=5, tz="Asia/Tokyo")
)
self.assertRaises(
AssertionError, lambda: ks.date_range(start="1/1/2018", periods=5, freq="ns")
)
self.assertRaises(
AssertionError, lambda: ks.date_range(start="1/1/2018", periods=5, freq="N")
)

def test_concat_index_axis(self):
pdf = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5], "C": [6, 7, 8]})
# TODO: pdf.columns.names = ["ABC"]
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/general_functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ Top-level dealing with datetimelike
:toctree: api/

to_datetime

date_range