Skip to content

FEAT-#1222: Implement DataFrame.asof() without Pandas fallback #1989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 4, 2020
2 changes: 1 addition & 1 deletion docs/supported_apis/dataframe_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ default to pandas.
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``asfreq`` | `asfreq`_ | D | |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``asof`` | `asof`_ | D | |
| ``asof`` | `asof`_ | Y | |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``assign`` | `assign`_ | Y | |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
Expand Down
23 changes: 23 additions & 0 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,29 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=False):
)
return DataFrame(query_compiler=query_compiler)

def asof(self, where, subset=None):
scalar = not is_list_like(where)
if isinstance(where, pandas.Index):
# Prevent accidental mutation of original:
where = where.copy()
else:
if scalar:
where = [where]
where = pandas.Index(where)

df = self
if subset is not None:
df = self[subset]
no_na_index = df.dropna().index
new_index = pandas.Index([no_na_index.asof(i) for i in where])
result = self.reindex(new_index)
result.index = where

if scalar:
# Need to return a Series:
result = result.squeeze()
return result

def assign(self, **kwargs):
df = self.copy()
for k, v in kwargs.items():
Expand Down
17 changes: 0 additions & 17 deletions modin/pandas/test/dataframe/test_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,6 @@ def test_asfreq():
df.asfreq(freq="30S")


def test_asof():
df = pd.DataFrame(
{"a": [10, 20, 30, 40, 50], "b": [None, None, None, None, 500]},
index=pd.DatetimeIndex(
[
"2018-02-27 09:01:00",
"2018-02-27 09:02:00",
"2018-02-27 09:03:00",
"2018-02-27 09:04:00",
"2018-02-27 09:05:00",
]
),
)
with pytest.warns(UserWarning):
df.asof(pd.DatetimeIndex(["2018-02-27 09:03:30", "2018-02-27 09:04:30"]))


def test_assign():
data = test_data_values[0]
modin_df = pd.DataFrame(data)
Expand Down
34 changes: 34 additions & 0 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,40 @@
matplotlib.use("Agg")


def test_asof():
data = {"a": [10, 20, 30, 40, 50], "b": [None, None, None, None, 500]}
index = pd.DatetimeIndex(
[
"2018-02-27 09:01:00",
"2018-02-27 09:02:00",
"2018-02-27 09:03:00",
"2018-02-27 09:04:00",
"2018-02-27 09:05:00",
]
)
modin_df = pd.DataFrame(data, index=index)
pandas_df = pandas.DataFrame(data, index=index)
dates = ["2018-02-27 09:03:30", "2018-02-27 09:04:30"]
modin_dates = pd.DatetimeIndex(dates)
pandas_dates = pandas.DatetimeIndex(dates)
df_equals(modin_df.asof(modin_dates), pandas_df.asof(pandas_dates))
df_equals(
modin_df.asof(modin_dates, subset=["a"]),
pandas_df.asof(pandas_dates, subset=["a"]),
)
df_equals(
modin_df.asof(modin_dates, subset=["b"]),
pandas_df.asof(pandas_dates, subset=["b"]),
)

date = pd.to_datetime(dates[0])
df_equals(modin_df.asof(date), pandas_df.asof(date))
df_equals(
modin_df.asof(date, subset=["a"]),
pandas_df.asof(date, subset=["a"]),
)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_first_valid_index(data):
modin_df = pd.DataFrame(data)
Expand Down