Skip to content

Commit 4da211b

Browse files
committed
Add Result.to_df to export records as pandas DataFrame
1 parent 79f858c commit 4da211b

File tree

7 files changed

+159
-14
lines changed

7 files changed

+159
-14
lines changed

docs/source/api.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -808,14 +808,14 @@ A :class:`neo4j.Result` is attached to an active connection, through a :class:`n
808808

809809
.. automethod:: graph
810810

811-
**This is experimental.** (See :ref:`filter-warnings-ref`)
812-
813811
.. automethod:: value
814812

815813
.. automethod:: values
816814

817815
.. automethod:: data
818816

817+
.. automethod:: to_df
818+
819819
.. automethod:: closed
820820

821821
See https://neo4j.com/docs/python-manual/current/cypher-workflow/#python-driver-type-mapping for more about type mapping.

docs/source/async_api.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -502,14 +502,14 @@ A :class:`neo4j.AsyncResult` is attached to an active connection, through a :cla
502502

503503
.. automethod:: graph
504504

505-
**This is experimental.** (See :ref:`filter-warnings-ref`)
506-
507505
.. automethod:: value
508506

509507
.. automethod:: values
510508

511509
.. automethod:: data
512510

511+
.. automethod:: to_df
512+
513513
.. automethod:: closed
514514

515515
See https://neo4j.com/docs/python-manual/current/cypher-workflow/#python-driver-type-mapping for more about type mapping.

neo4j/_async/work/result.py

+25
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
ResultConsumedError,
2525
ResultNotSingleError,
2626
)
27+
from ...meta import experimental
2728
from ...work import ResultSummary
2829
from ..io import ConnectionErrorHandler
2930

@@ -392,6 +393,8 @@ async def graph(self):
392393
:raises ResultConsumedError: if the transaction from which this result
393394
was obtained has been closed or the Result has been explicitly
394395
consumed.
396+
397+
**This is experimental.** (See :ref:`filter-warnings-ref`)
395398
"""
396399
await self._buffer_all()
397400
return self._hydrant.graph
@@ -444,6 +447,28 @@ async def data(self, *keys):
444447
"""
445448
return [record.data(*keys) async for record in self]
446449

450+
@experimental("pandas support is experimental and might be changed or "
451+
"removed in future versions")
452+
async def to_df(self):
453+
"""Convert (the rest of) the result to a pandas DataFrame.
454+
455+
This method is only available if the `pandas` library is installed.
456+
457+
``tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m").to_df()``, for
458+
instance will return a DataFrame with two columns: ``n`` and ``m`` and
459+
10 rows.
460+
461+
:rtype: :py:class:`pandas.DataFrame`
462+
:raises ImportError: if `pandas` library is not available.
463+
464+
**This is experimental.**
465+
``pandas`` support might be changed or removed in future versions
466+
without warning. (See :ref:`filter-warnings-ref`)
467+
"""
468+
import pandas as pd
469+
470+
return pd.DataFrame(await self.values(), columns=self._keys)
471+
447472
def closed(self):
448473
"""Return True if the result has been closed.
449474

neo4j/_sync/work/result.py

+25
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
ResultConsumedError,
2525
ResultNotSingleError,
2626
)
27+
from ...meta import experimental
2728
from ...work import ResultSummary
2829
from ..io import ConnectionErrorHandler
2930

@@ -392,6 +393,8 @@ def graph(self):
392393
:raises ResultConsumedError: if the transaction from which this result
393394
was obtained has been closed or the Result has been explicitly
394395
consumed.
396+
397+
**This is experimental.** (See :ref:`filter-warnings-ref`)
395398
"""
396399
self._buffer_all()
397400
return self._hydrant.graph
@@ -444,6 +447,28 @@ def data(self, *keys):
444447
"""
445448
return [record.data(*keys) for record in self]
446449

450+
@experimental("pandas support is experimental and might be changed or "
451+
"removed in future versions")
452+
def to_df(self):
453+
"""Convert (the rest of) the result to a pandas DataFrame.
454+
455+
This method is only available if the `pandas` library is installed.
456+
457+
``tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m").to_df()``, for
458+
instance will return a DataFrame with two columns: ``n`` and ``m`` and
459+
10 rows.
460+
461+
:rtype: :py:class:`pandas.DataFrame`
462+
:raises ImportError: if `pandas` library is not available.
463+
464+
**This is experimental.**
465+
``pandas`` support might be changed or removed in future versions
466+
without warning. (See :ref:`filter-warnings-ref`)
467+
"""
468+
import pandas as pd
469+
470+
return pd.DataFrame(self.values(), columns=self._keys)
471+
447472
def closed(self):
448473
"""Return True if the result has been closed.
449474

tests/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ pytest-cov>=3.0.0
77
pytest-mock>=3.6.1
88
mock>=4.0.3
99
teamcity-messages>=1.29
10+
pandas>=1.4.1

tests/unit/async_/work/test_result.py

+52-5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from unittest import mock
2020

21+
import pandas as pd
2122
import pytest
2223

2324
from neo4j import (
@@ -30,18 +31,21 @@
3031
Version,
3132
)
3233
from neo4j._async_compat.util import AsyncUtil
33-
from neo4j.data import DataHydrator
34+
from neo4j.data import (
35+
DataHydrator,
36+
Node,
37+
)
3438
from neo4j.exceptions import ResultNotSingleError
39+
from neo4j.packstream import Structure
3540

3641
from ...._async_compat import mark_async_test
3742

3843

3944
class Records:
4045
def __init__(self, fields, records):
41-
assert all(len(fields) == len(r) for r in records)
42-
self.fields = fields
43-
# self.records = [{"record_values": r} for r in records]
44-
self.records = records
46+
self.fields = tuple(fields)
47+
self.records = tuple(records)
48+
assert all(len(self.fields) == len(r) for r in self.records)
4549

4650
def __len__(self):
4751
return self.records.__len__()
@@ -469,3 +473,46 @@ async def test_data(num_records):
469473
assert await result.data("hello", "world") == expected_data
470474
for record in records:
471475
assert record.data.called_once_with("hello", "world")
476+
477+
478+
@pytest.mark.parametrize(
479+
("keys", "values", "types", "instances"),
480+
(
481+
(["i"], zip(range(5)), ["int64"], None),
482+
(["x"], zip((n - .5) / 5 for n in range(5)), ["float64"], None),
483+
(["s"], zip(("foo", "bar", "baz", "foobar")), ["object"], None),
484+
(["l"], zip(([1, 2], [3, 4])), ["object"], None),
485+
(
486+
["n"],
487+
zip((
488+
Structure(b"N", 0, ["LABEL_A"], {"a": 1, "b": 2}),
489+
Structure(b"N", 2, ["LABEL_B"], {"a": 1, "c": 1.2}),
490+
Structure(b"N", 1, ["LABEL_A", "LABEL_B"], {"a": [1, "a"]}),
491+
)),
492+
["object"],
493+
[Node]
494+
),
495+
)
496+
)
497+
@mark_async_test
498+
async def test_to_df(keys, values, types, instances):
499+
values = list(values)
500+
connection = AsyncConnectionStub(records=Records(keys, values))
501+
result = AsyncResult(connection, DataHydrator(), 1, noop, noop)
502+
await result._run("CYPHER", {}, None, None, "r", None)
503+
df = await result.to_df()
504+
505+
assert isinstance(df, pd.DataFrame)
506+
assert df.keys().to_list() == keys
507+
assert len(df) == len(values)
508+
assert df.dtypes.to_list() == types
509+
510+
expected_df = pd.DataFrame(
511+
{k: [v[i] for v in values] for i, k in enumerate(keys)}
512+
)
513+
514+
if instances:
515+
for i, k in enumerate(keys):
516+
assert all(isinstance(v, instances[i]) for v in df[k])
517+
else:
518+
assert df.equals(expected_df)

tests/unit/sync/work/test_result.py

+52-5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from unittest import mock
2020

21+
import pandas as pd
2122
import pytest
2223

2324
from neo4j import (
@@ -30,18 +31,21 @@
3031
Version,
3132
)
3233
from neo4j._async_compat.util import Util
33-
from neo4j.data import DataHydrator
34+
from neo4j.data import (
35+
DataHydrator,
36+
Node,
37+
)
3438
from neo4j.exceptions import ResultNotSingleError
39+
from neo4j.packstream import Structure
3540

3641
from ...._async_compat import mark_sync_test
3742

3843

3944
class Records:
4045
def __init__(self, fields, records):
41-
assert all(len(fields) == len(r) for r in records)
42-
self.fields = fields
43-
# self.records = [{"record_values": r} for r in records]
44-
self.records = records
46+
self.fields = tuple(fields)
47+
self.records = tuple(records)
48+
assert all(len(self.fields) == len(r) for r in self.records)
4549

4650
def __len__(self):
4751
return self.records.__len__()
@@ -469,3 +473,46 @@ def test_data(num_records):
469473
assert result.data("hello", "world") == expected_data
470474
for record in records:
471475
assert record.data.called_once_with("hello", "world")
476+
477+
478+
@pytest.mark.parametrize(
479+
("keys", "values", "types", "instances"),
480+
(
481+
(["i"], zip(range(5)), ["int64"], None),
482+
(["x"], zip((n - .5) / 5 for n in range(5)), ["float64"], None),
483+
(["s"], zip(("foo", "bar", "baz", "foobar")), ["object"], None),
484+
(["l"], zip(([1, 2], [3, 4])), ["object"], None),
485+
(
486+
["n"],
487+
zip((
488+
Structure(b"N", 0, ["LABEL_A"], {"a": 1, "b": 2}),
489+
Structure(b"N", 2, ["LABEL_B"], {"a": 1, "c": 1.2}),
490+
Structure(b"N", 1, ["LABEL_A", "LABEL_B"], {"a": [1, "a"]}),
491+
)),
492+
["object"],
493+
[Node]
494+
),
495+
)
496+
)
497+
@mark_sync_test
498+
def test_to_df(keys, values, types, instances):
499+
values = list(values)
500+
connection = ConnectionStub(records=Records(keys, values))
501+
result = Result(connection, DataHydrator(), 1, noop, noop)
502+
result._run("CYPHER", {}, None, None, "r", None)
503+
df = result.to_df()
504+
505+
assert isinstance(df, pd.DataFrame)
506+
assert df.keys().to_list() == keys
507+
assert len(df) == len(values)
508+
assert df.dtypes.to_list() == types
509+
510+
expected_df = pd.DataFrame(
511+
{k: [v[i] for v in values] for i, k in enumerate(keys)}
512+
)
513+
514+
if instances:
515+
for i, k in enumerate(keys):
516+
assert all(isinstance(v, instances[i]) for v in df[k])
517+
else:
518+
assert df.equals(expected_df)

0 commit comments

Comments
 (0)