Skip to content

Commit 7473d03

Browse files
committed
Add expand option to Result.to_df.
This option (when `True`) will make the driver flatten nodes, relationships, lists, and dicts into multiple columns of the DataFrame.
1 parent 8c902f0 commit 7473d03

File tree

7 files changed

+567
-12
lines changed

7 files changed

+567
-12
lines changed

docs/source/api.rst

+7-3
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ Path :class:`neo4j.graph.Path`
987987
Node
988988
====
989989

990-
.. autoclass:: neo4j.graph.Node()
990+
.. autoclass:: neo4j.graph.Node
991991

992992
.. describe:: node == other
993993

@@ -1022,6 +1022,8 @@ Node
10221022

10231023
.. autoattribute:: id
10241024

1025+
.. autoattribute:: element_id
1026+
10251027
.. autoattribute:: labels
10261028

10271029
.. automethod:: get
@@ -1036,7 +1038,7 @@ Node
10361038
Relationship
10371039
============
10381040

1039-
.. autoclass:: neo4j.graph.Relationship()
1041+
.. autoclass:: neo4j.graph.Relationship
10401042

10411043
.. describe:: relationship == other
10421044

@@ -1076,6 +1078,8 @@ Relationship
10761078

10771079
.. autoattribute:: id
10781080

1081+
.. autoattribute:: element_id
1082+
10791083
.. autoattribute:: nodes
10801084

10811085
.. autoattribute:: start_node
@@ -1097,7 +1101,7 @@ Relationship
10971101
Path
10981102
====
10991103

1100-
.. autoclass:: neo4j.graph.Path()
1104+
.. autoclass:: neo4j.graph.Path
11011105

11021106
.. describe:: path == other
11031107

neo4j/_async/work/result.py

+92-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
from warnings import warn
2121

2222
from ..._async_compat.util import AsyncUtil
23-
from ...data import DataDehydrator
23+
from ...data import (
24+
DataDehydrator,
25+
RecordTableRowExporter,
26+
)
2427
from ...exceptions import (
2528
ResultConsumedError,
2629
ResultNotSingleError,
@@ -524,15 +527,74 @@ async def data(self, *keys):
524527

525528
@experimental("pandas support is experimental and might be changed or "
526529
"removed in future versions")
527-
async def to_df(self):
528-
"""Convert (the rest of) the result to a pandas DataFrame.
530+
async def to_df(self, *keys, expand=False):
531+
r"""Convert (the rest of) the result to a pandas DataFrame.
529532
530533
This method is only available if the `pandas` library is installed.
531534
532535
``tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m").to_df()``, for
533536
instance will return a DataFrame with two columns: ``n`` and ``m`` and
534537
10 rows.
535538
539+
:param expand: if :const:`True`, some structures in the result will be
540+
recursively expanded (flattened out into multiple columns) like so:
541+
542+
* :class:`.Node` objects under any variable ``<n>`` will be
543+
expanded into columns (the recursion stops here)
544+
545+
* ``<n>().prop.<property_name>`` (any) for each property of the
546+
node.
547+
* ``<n>().element_id`` (str) the node's element id.
548+
See :attr:`.Node.element_id`.
549+
* ``<n>().labels`` (list of str) the node's labels.
550+
See :attr:`.Node.labels`.
551+
552+
* :class:`.Relationship` objects under any variable ``<r>``
553+
will be expanded into columns (the recursion stops here)
554+
555+
* ``<r>->.prop.<property_name>`` (any) for each property of the
556+
relationship.
557+
* ``<r>->.element_id`` (str) the relationship's element id.
558+
See :attr:`.Relationship.element_id`.
559+
* ``<r>->.start.element_id`` (str) the relationship's
560+
start node's element id.
561+
See :attr:`.Relationship.start_node`.
562+
* ``<r>->.end.element_id`` (str) the relationship's
563+
end node's element id.
564+
See :attr:`.Relationship.end_node`.
565+
* ``<r>->.type`` (str) the relationship's type.
566+
See :attr:`.Relationship.type`.
567+
568+
* :const:`list` objects under any variable ``<l>`` will be expanded
569+
into
570+
571+
* ``<l>[].0`` (any) the 1st list element
572+
* ``<l>[].1`` (any) the 2nd list element
573+
* ...
574+
575+
* :const:`dict` objects under any variable ``<d>`` will be expanded
576+
into
577+
578+
* ``<d>{}.<key1>`` (any) the 1st key of the dict
579+
* ``<d>{}.<key2>`` (any) the 2nd key of the dict
580+
* ...
581+
582+
* :const:`list` and :const:`dict` objects are expanded recursively.
583+
Example::
584+
585+
[{"foo": "bar", "baz": [42, 0]}, "foobar"]
586+
587+
will be expanded to::
588+
589+
{"0.foo": "bar", "0.baz.0": 42, "0.baz.1": 0, "1": "foobar"}
590+
591+
* Everything else (including :class:`.Path` objects) will not
592+
be flattened.
593+
594+
:const:`dict` keys and variable names that contain ``.`` or ``\``
595+
will be escaped with a backslash (``\.`` and ``\\`` respectively).
596+
:type expand: bool
597+
536598
:rtype: :py:class:`pandas.DataFrame`
537599
:raises ImportError: if `pandas` library is not available.
538600
:raises ResultConsumedError: if the transaction from which this result
@@ -545,7 +607,33 @@ async def to_df(self):
545607
"""
546608
import pandas as pd
547609

548-
return pd.DataFrame(await self.values(), columns=self._keys)
610+
if not expand:
611+
return pd.DataFrame(await self.values(*keys),
612+
columns=keys or self._keys)
613+
else:
614+
df_keys = None
615+
rows = []
616+
async for record in self:
617+
row = RecordTableRowExporter().transform(
618+
dict(record.items(*keys))
619+
)
620+
if df_keys == row.keys():
621+
rows.append(row.values())
622+
elif df_keys is None:
623+
df_keys = row.keys()
624+
rows.append(row.values())
625+
elif df_keys is False:
626+
rows.append(row)
627+
else:
628+
# The rows have different keys. We need to pass a list
629+
# of dicts to pandas
630+
rows = [{k: v for k, v in zip(df_keys, r)} for r in rows]
631+
df_keys = False
632+
rows.append(row)
633+
if df_keys is False:
634+
return pd.DataFrame(rows)
635+
else:
636+
return pd.DataFrame(rows, columns=df_keys or self._keys)
549637

550638
def closed(self):
551639
"""Return True if the result has been closed.

neo4j/_sync/work/result.py

+92-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
from warnings import warn
2121

2222
from ..._async_compat.util import Util
23-
from ...data import DataDehydrator
23+
from ...data import (
24+
DataDehydrator,
25+
RecordTableRowExporter,
26+
)
2427
from ...exceptions import (
2528
ResultConsumedError,
2629
ResultNotSingleError,
@@ -524,15 +527,74 @@ def data(self, *keys):
524527

525528
@experimental("pandas support is experimental and might be changed or "
526529
"removed in future versions")
527-
def to_df(self):
528-
"""Convert (the rest of) the result to a pandas DataFrame.
530+
def to_df(self, *keys, expand=False):
531+
r"""Convert (the rest of) the result to a pandas DataFrame.
529532
530533
This method is only available if the `pandas` library is installed.
531534
532535
``tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m").to_df()``, for
533536
instance will return a DataFrame with two columns: ``n`` and ``m`` and
534537
10 rows.
535538
539+
:param expand: if :const:`True`, some structures in the result will be
540+
recursively expanded (flattened out into multiple columns) like so:
541+
542+
* :class:`.Node` objects under any variable ``<n>`` will be
543+
expanded into columns (the recursion stops here)
544+
545+
* ``<n>().prop.<property_name>`` (any) for each property of the
546+
node.
547+
* ``<n>().element_id`` (str) the node's element id.
548+
See :attr:`.Node.element_id`.
549+
* ``<n>().labels`` (list of str) the node's labels.
550+
See :attr:`.Node.labels`.
551+
552+
* :class:`.Relationship` objects under any variable ``<r>``
553+
will be expanded into columns (the recursion stops here)
554+
555+
* ``<r>->.prop.<property_name>`` (any) for each property of the
556+
relationship.
557+
* ``<r>->.element_id`` (str) the relationship's element id.
558+
See :attr:`.Relationship.element_id`.
559+
* ``<r>->.start.element_id`` (str) the relationship's
560+
start node's element id.
561+
See :attr:`.Relationship.start_node`.
562+
* ``<r>->.end.element_id`` (str) the relationship's
563+
end node's element id.
564+
See :attr:`.Relationship.end_node`.
565+
* ``<r>->.type`` (str) the relationship's type.
566+
See :attr:`.Relationship.type`.
567+
568+
* :const:`list` objects under any variable ``<l>`` will be expanded
569+
into
570+
571+
* ``<l>[].0`` (any) the 1st list element
572+
* ``<l>[].1`` (any) the 2nd list element
573+
* ...
574+
575+
* :const:`dict` objects under any variable ``<d>`` will be expanded
576+
into
577+
578+
* ``<d>{}.<key1>`` (any) the 1st key of the dict
579+
* ``<d>{}.<key2>`` (any) the 2nd key of the dict
580+
* ...
581+
582+
* :const:`list` and :const:`dict` objects are expanded recursively.
583+
Example::
584+
585+
[{"foo": "bar", "baz": [42, 0]}, "foobar"]
586+
587+
will be expanded to::
588+
589+
{"0.foo": "bar", "0.baz.0": 42, "0.baz.1": 0, "1": "foobar"}
590+
591+
* Everything else (including :class:`.Path` objects) will not
592+
be flattened.
593+
594+
:const:`dict` keys and variable names that contain ``.`` or ``\``
595+
will be escaped with a backslash (``\.`` and ``\\`` respectively).
596+
:type expand: bool
597+
536598
:rtype: :py:class:`pandas.DataFrame`
537599
:raises ImportError: if `pandas` library is not available.
538600
:raises ResultConsumedError: if the transaction from which this result
@@ -545,7 +607,33 @@ def to_df(self):
545607
"""
546608
import pandas as pd
547609

548-
return pd.DataFrame(self.values(), columns=self._keys)
610+
if not expand:
611+
return pd.DataFrame(self.values(*keys),
612+
columns=keys or self._keys)
613+
else:
614+
df_keys = None
615+
rows = []
616+
for record in self:
617+
row = RecordTableRowExporter().transform(
618+
dict(record.items(*keys))
619+
)
620+
if df_keys == row.keys():
621+
rows.append(row.values())
622+
elif df_keys is None:
623+
df_keys = row.keys()
624+
rows.append(row.values())
625+
elif df_keys is False:
626+
rows.append(row)
627+
else:
628+
# The rows have different keys. We need to pass a list
629+
# of dicts to pandas
630+
rows = [{k: v for k, v in zip(df_keys, r)} for r in rows]
631+
df_keys = False
632+
rows.append(row)
633+
if df_keys is False:
634+
return pd.DataFrame(rows)
635+
else:
636+
return pd.DataFrame(rows, columns=df_keys or self._keys)
549637

550638
def closed(self):
551639
"""Return True if the result has been closed.

neo4j/data.py

+53
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,59 @@ def transform(self, x):
297297
return x
298298

299299

300+
class RecordTableRowExporter(DataTransformer):
301+
"""Transformer class used by the :meth:`.Result.to_df` method."""
302+
303+
def transform(self, x):
304+
assert isinstance(x, Mapping)
305+
t = type(x)
306+
return t(item
307+
for k, v in x.items()
308+
for item in self._transform(
309+
v, prefix=k.replace("\\", "\\\\").replace(".", "\\.")
310+
).items())
311+
312+
def _transform(self, x, prefix):
313+
if isinstance(x, Node):
314+
res = {
315+
"%s().element_id" % prefix: x.element_id,
316+
"%s().labels" % prefix: x.labels,
317+
}
318+
res.update(("%s().prop.%s" % (prefix, k), v) for k, v in x.items())
319+
return res
320+
elif isinstance(x, Relationship):
321+
res = {
322+
"%s->.element_id" % prefix: x.element_id,
323+
"%s->.start.element_id" % prefix: x.start_node.element_id,
324+
"%s->.end.element_id" % prefix: x.end_node.element_id,
325+
"%s->.type" % prefix: x.__class__.__name__,
326+
}
327+
res.update(("%s->.prop.%s" % (prefix, k), v) for k, v in x.items())
328+
return res
329+
elif isinstance(x, Path) or isinstance(x, str):
330+
return {prefix: x}
331+
elif isinstance(x, Sequence):
332+
return dict(
333+
item
334+
for i, v in enumerate(x)
335+
for item in self._transform(
336+
v, prefix="%s[].%i" % (prefix, i)
337+
).items()
338+
)
339+
elif isinstance(x, Mapping):
340+
t = type(x)
341+
return t(
342+
item
343+
for k, v in x.items()
344+
for item in self._transform(
345+
v, prefix="%s{}.%s" % (prefix, k.replace("\\", "\\\\")
346+
.replace(".", "\\."))
347+
).items()
348+
)
349+
else:
350+
return {prefix: x}
351+
352+
300353
class DataHydrator:
301354
# TODO: extend DataTransformer
302355

neo4j/graph/__init__.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ def id(self):
207207
Depending on the version of the server this entity was retrieved from,
208208
this may be empty (None).
209209
210+
.. Warning::
211+
This value can change for the same entity across multiple
212+
queries. Don't rely on it for cross-query computations.
213+
210214
.. deprecated:: 5.0
211215
Use :attr:`.element_id` instead.
212216
@@ -218,7 +222,11 @@ def id(self):
218222
def element_id(self):
219223
"""The identity of this entity in its container :class:`.Graph`.
220224
221-
.. added:: 5.0
225+
.. Warning::
226+
This value can change for the same entity across multiple
227+
queries. Don't rely on it for cross-query computations.
228+
229+
.. versionadded:: 5.0
222230
223231
:rtype: str
224232
"""

0 commit comments

Comments
 (0)