Skip to content

Commit 743aae6

Browse files
authored
Add parse_dates option to Result.to_df export (#716)
1 parent 6523ad0 commit 743aae6

File tree

5 files changed

+492
-14
lines changed

5 files changed

+492
-14
lines changed

neo4j/_async/work/result.py

+30-5
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
ResultNotSingleError,
3030
)
3131
from ...meta import experimental
32+
from ...time import (
33+
Date,
34+
DateTime,
35+
)
3236
from ...work import ResultSummary
3337
from ..io import ConnectionErrorHandler
3438

@@ -527,7 +531,7 @@ async def data(self, *keys):
527531

528532
@experimental("pandas support is experimental and might be changed or "
529533
"removed in future versions")
530-
async def to_df(self, expand=False):
534+
async def to_df(self, expand=False, parse_dates=False):
531535
r"""Convert (the rest of) the result to a pandas DataFrame.
532536
533537
This method is only available if the `pandas` library is installed.
@@ -540,7 +544,7 @@ async def to_df(self, expand=False):
540544
for instance will return a DataFrame with two columns: ``n`` and ``m``
541545
and 10 rows.
542546
543-
:param expand: if :const:`True`, some structures in the result will be
547+
:param expand: If :const:`True`, some structures in the result will be
544548
recursively expanded (flattened out into multiple columns) like so
545549
(everything inside ``<...>`` is a placeholder):
546550
@@ -604,6 +608,11 @@ async def to_df(self, expand=False):
604608
:const:`dict` keys and variable names that contain ``.`` or ``\``
605609
will be escaped with a backslash (``\.`` and ``\\`` respectively).
606610
:type expand: bool
611+
:param parse_dates:
612+
If :const:`True`, columns that excluvively contain
613+
:class:`time.DateTime` objects, :class:`time.Date` objects, or
614+
:const:`None`, will be converted to :class:`pandas.Timestamp`.
615+
:type parse_dates: bool
607616
608617
:rtype: :py:class:`pandas.DataFrame`
609618
:raises ImportError: if `pandas` library is not available.
@@ -618,7 +627,7 @@ async def to_df(self, expand=False):
618627
import pandas as pd
619628

620629
if not expand:
621-
return pd.DataFrame(await self.values(), columns=self._keys)
630+
df = pd.DataFrame(await self.values(), columns=self._keys)
622631
else:
623632
df_keys = None
624633
rows = []
@@ -638,13 +647,29 @@ async def to_df(self, expand=False):
638647
df_keys = False
639648
rows.append(row)
640649
if df_keys is False:
641-
return pd.DataFrame(rows)
650+
df = pd.DataFrame(rows)
642651
else:
643652
columns = df_keys or [
644653
k.replace(".", "\\.").replace("\\", "\\\\")
645654
for k in self._keys
646655
]
647-
return pd.DataFrame(rows, columns=columns)
656+
df = pd.DataFrame(rows, columns=columns)
657+
if not parse_dates:
658+
return df
659+
dt_columns = df.columns[df.apply(
660+
lambda col: pd.api.types.infer_dtype(col) == "mixed" and col.map(
661+
lambda x: isinstance(x, (DateTime, Date, type(None)))
662+
).all()
663+
)]
664+
df[dt_columns] = df[dt_columns].apply(
665+
lambda col: col.map(
666+
lambda x:
667+
pd.Timestamp(x.iso_format())
668+
.replace(tzinfo=getattr(x, "tzinfo", None))
669+
if x else pd.NaT
670+
)
671+
)
672+
return df
648673

649674
def closed(self):
650675
"""Return True if the result has been closed.

neo4j/_sync/work/result.py

+30-5
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
ResultNotSingleError,
3030
)
3131
from ...meta import experimental
32+
from ...time import (
33+
Date,
34+
DateTime,
35+
)
3236
from ...work import ResultSummary
3337
from ..io import ConnectionErrorHandler
3438

@@ -527,7 +531,7 @@ def data(self, *keys):
527531

528532
@experimental("pandas support is experimental and might be changed or "
529533
"removed in future versions")
530-
def to_df(self, expand=False):
534+
def to_df(self, expand=False, parse_dates=False):
531535
r"""Convert (the rest of) the result to a pandas DataFrame.
532536
533537
This method is only available if the `pandas` library is installed.
@@ -540,7 +544,7 @@ def to_df(self, expand=False):
540544
for instance will return a DataFrame with two columns: ``n`` and ``m``
541545
and 10 rows.
542546
543-
:param expand: if :const:`True`, some structures in the result will be
547+
:param expand: If :const:`True`, some structures in the result will be
544548
recursively expanded (flattened out into multiple columns) like so
545549
(everything inside ``<...>`` is a placeholder):
546550
@@ -604,6 +608,11 @@ def to_df(self, expand=False):
604608
:const:`dict` keys and variable names that contain ``.`` or ``\``
605609
will be escaped with a backslash (``\.`` and ``\\`` respectively).
606610
:type expand: bool
611+
:param parse_dates:
612+
If :const:`True`, columns that excluvively contain
613+
:class:`time.DateTime` objects, :class:`time.Date` objects, or
614+
:const:`None`, will be converted to :class:`pandas.Timestamp`.
615+
:type parse_dates: bool
607616
608617
:rtype: :py:class:`pandas.DataFrame`
609618
:raises ImportError: if `pandas` library is not available.
@@ -618,7 +627,7 @@ def to_df(self, expand=False):
618627
import pandas as pd
619628

620629
if not expand:
621-
return pd.DataFrame(self.values(), columns=self._keys)
630+
df = pd.DataFrame(self.values(), columns=self._keys)
622631
else:
623632
df_keys = None
624633
rows = []
@@ -638,13 +647,29 @@ def to_df(self, expand=False):
638647
df_keys = False
639648
rows.append(row)
640649
if df_keys is False:
641-
return pd.DataFrame(rows)
650+
df = pd.DataFrame(rows)
642651
else:
643652
columns = df_keys or [
644653
k.replace(".", "\\.").replace("\\", "\\\\")
645654
for k in self._keys
646655
]
647-
return pd.DataFrame(rows, columns=columns)
656+
df = pd.DataFrame(rows, columns=columns)
657+
if not parse_dates:
658+
return df
659+
dt_columns = df.columns[df.apply(
660+
lambda col: pd.api.types.infer_dtype(col) == "mixed" and col.map(
661+
lambda x: isinstance(x, (DateTime, Date, type(None)))
662+
).all()
663+
)]
664+
df[dt_columns] = df[dt_columns].apply(
665+
lambda col: col.map(
666+
lambda x:
667+
pd.Timestamp(x.iso_format())
668+
.replace(tzinfo=getattr(x, "tzinfo", None))
669+
if x else pd.NaT
670+
)
671+
)
672+
return df
648673

649674
def closed(self):
650675
"""Return True if the result has been closed.

setup.py

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
install_requires = [
3535
"pytz",
3636
]
37+
extra_require = {
38+
"pandas": ["pandas>=1.0.0"],
39+
}
3740
classifiers = [
3841
"Intended Audience :: Developers",
3942
"License :: OSI Approved :: Apache Software License",
@@ -67,6 +70,7 @@
6770
"keywords": "neo4j graph database",
6871
"url": "https://github.com/neo4j/neo4j-python-driver",
6972
"install_requires": install_requires,
73+
"extra_require": extra_require,
7074
"classifiers": classifiers,
7175
"packages": packages,
7276
"entry_points": entry_points,

0 commit comments

Comments
 (0)