Skip to content

GA-168 GPU Test #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
b4a0c80
added gpu test using nx and cuda, first test commit for circleci - th…
hkernbach Aug 23, 2024
c33d0b7
fix yml formatting
hkernbach Aug 23, 2024
a710f48
fix yml formatting again, define executr gpu
hkernbach Aug 23, 2024
b542bc3
add test-gpu to matrix executor
hkernbach Aug 23, 2024
e1a2808
fix resource class, added todo for later
hkernbach Aug 23, 2024
7dbf993
flake8
hkernbach Aug 23, 2024
213597b
pot deps fix
hkernbach Aug 26, 2024
0da2a64
gpu test enable
hkernbach Aug 26, 2024
277b928
gpu test enable
hkernbach Aug 26, 2024
bbc69ce
fix syntax
hkernbach Aug 26, 2024
4a0fdb4
fix test, should work now on ci as well
hkernbach Aug 26, 2024
3eabe96
Merge branch 'feature/GA-168' of github.com:arangodb/nx-arangodb into…
hkernbach Aug 26, 2024
521fd4c
incr grid of graph
hkernbach Aug 26, 2024
4262e8b
Merge remote-tracking branch 'origin/main' into feature/GA-168
hkernbach Aug 26, 2024
ca85970
restructured test dirs, do not automatically run gpu tests.
hkernbach Aug 26, 2024
72720e2
isort
hkernbach Aug 26, 2024
316e9df
fmt, move test code
hkernbach Aug 26, 2024
f8377f2
this is not allowed to be removed
hkernbach Aug 26, 2024
a6fa56a
fmt
hkernbach Aug 26, 2024
97c0649
test
hkernbach Aug 26, 2024
181e303
3.12 instead of 3.12.2 for gpu
hkernbach Aug 26, 2024
89d57a4
new: `use_gpu` backend config
aMahanna Aug 27, 2024
628ee62
attempt: set `use_gpu`
aMahanna Aug 27, 2024
737946c
force-set `use_gpu`
aMahanna Aug 27, 2024
10b5652
fix: lint
aMahanna Aug 27, 2024
772786d
cleanup
aMahanna Aug 27, 2024
bac05e0
fix: lint
aMahanna Aug 27, 2024
16da129
fix imports
aMahanna Aug 27, 2024
529171e
attempt: increase `digit`
aMahanna Aug 27, 2024
baa6a12
new: `write_async` param
aMahanna Aug 27, 2024
35d44e8
move assertions
aMahanna Aug 27, 2024
e5cbcc3
fix lint
aMahanna Aug 27, 2024
6e147d9
attempt: increase `digit`
aMahanna Aug 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ executors:
machine:
image: ubuntu-2404:current

gpu-executor:
machine:
image: linux-cuda-12:default
resource_class: gpu.nvidia.small.multi

jobs:
lint:
executor: python-executor
Expand Down Expand Up @@ -79,6 +84,45 @@ jobs:
name: Run NetworkX tests
command: ./run_nx_tests.sh

test-gpu:
parameters:
python_version:
type: string
executor: gpu-executor
steps:
- checkout

- run:
name: Set up ArangoDB
command: |
chmod +x starter.sh
./starter.sh

- run:
name: Setup Python
command: |
pyenv --version
pyenv install -f << parameters.python_version >>
pyenv global << parameters.python_version >>

- run:
name: Setup pip
command: python -m pip install --upgrade pip setuptools wheel

- run:
name: Install packages
command: pip install .[dev]

- run:
name: Install cuda related dependencies
command: |
pip install pylibcugraph-cu12 --extra-index-url https://pypi.nvidia.com
pip install nx-cugraph-cu12 --extra-index-url https://pypi.nvidia.com

- run:
name: Run local gpu tests
command: pytest tests/test.py -k "test_gpu" --run-gpu-tests

workflows:
version: 2
build:
Expand All @@ -87,4 +131,11 @@ workflows:
- test:
matrix:
parameters:
python_version: ["3.10", "3.11", "3.12.2"]
python_version: ["3.10", "3.11", "3.12.2"]
- test-gpu:
requires:
- lint
- test
matrix:
parameters:
python_version: ["3.10", "3.11"] # "3.12" # TODO: Revisit 3.12
1 change: 1 addition & 0 deletions _nx_arangodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def get_info():
"read_parallelism": None,
"read_batch_size": None,
"write_batch_size": None,
"use_gpu": True,
}

return d
Expand Down
2 changes: 2 additions & 0 deletions nx_arangodb/classes/digraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
read_parallelism: int = 10,
read_batch_size: int = 100000,
write_batch_size: int = 50000,
write_async: bool = True,
symmetrize_edges: bool = False,
use_experimental_views: bool = False,
*args: Any,
Expand All @@ -50,6 +51,7 @@ def __init__(
read_parallelism,
read_batch_size,
write_batch_size,
write_async,
symmetrize_edges,
use_experimental_views,
*args,
Expand Down
6 changes: 3 additions & 3 deletions nx_arangodb/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from typing import Any, Callable, ClassVar

import networkx as nx
import numpy as np
import numpy.typing as npt
from adbnx_adapter import ADBNX_Adapter
from arango import ArangoClient
from arango.cursor import Cursor
Expand Down Expand Up @@ -57,6 +55,7 @@ def __init__(
read_parallelism: int = 10,
read_batch_size: int = 100000,
write_batch_size: int = 50000,
write_async: bool = True,
symmetrize_edges: bool = False,
use_experimental_views: bool = False,
*args: Any,
Expand Down Expand Up @@ -168,7 +167,7 @@ def edge_type_func(u: str, v: str) -> str:
incoming_graph_data,
edge_definitions=edge_definitions,
batch_size=self.write_batch_size,
use_async=True,
use_async=write_async,
)

else:
Expand Down Expand Up @@ -211,6 +210,7 @@ def _set_arangodb_backend_config(self) -> None:
config.read_parallelism = self.read_parallelism
config.read_batch_size = self.read_batch_size
config.write_batch_size = self.write_batch_size
config.use_gpu = True # Only used by default if nx-cugraph is available

def _set_factory_methods(self) -> None:
"""Set the factory methods for the graph, _node, and _adj dictionaries.
Expand Down
2 changes: 2 additions & 0 deletions nx_arangodb/classes/multidigraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(
read_parallelism: int = 10,
read_batch_size: int = 100000,
write_batch_size: int = 50000,
write_async: bool = True,
symmetrize_edges: bool = False,
use_experimental_views: bool = False,
*args: Any,
Expand All @@ -49,6 +50,7 @@ def __init__(
read_parallelism,
read_batch_size,
write_batch_size,
write_async,
symmetrize_edges,
use_experimental_views,
*args,
Expand Down
2 changes: 2 additions & 0 deletions nx_arangodb/classes/multigraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
read_parallelism: int = 10,
read_batch_size: int = 100000,
write_batch_size: int = 50000,
write_async: bool = True,
symmetrize_edges: bool = False,
use_experimental_views: bool = False,
*args: Any,
Expand All @@ -50,6 +51,7 @@ def __init__(
read_parallelism,
read_batch_size,
write_batch_size,
write_async,
symmetrize_edges,
use_experimental_views,
*args,
Expand Down
12 changes: 6 additions & 6 deletions nx_arangodb/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
import numpy as np
import nx_cugraph as nxcg

GPU_ENABLED = True
logger.info("NetworkX-cuGraph is enabled.")
GPU_AVAILABLE = True
logger.info("NetworkX-cuGraph is available.")
except Exception as e:
GPU_ENABLED = False
logger.info(f"NetworkX-cuGraph is disabled: {e}.")
GPU_AVAILABLE = False
logger.info(f"NetworkX-cuGraph is unavailable: {e}.")

__all__ = [
"_to_nx_graph",
Expand Down Expand Up @@ -58,7 +58,7 @@ def _to_nxadb_graph(
raise TypeError(f"Expected nxadb.Graph or nx.Graph; got {type(G)}")


if GPU_ENABLED:
if GPU_AVAILABLE:

def _to_nxcg_graph(G: Any, as_directed: bool = False) -> nxcg.Graph:
logger.debug(f"_to_nxcg_graph for {G.__class__.__name__}")
Expand Down Expand Up @@ -161,7 +161,7 @@ def nxadb_to_nx(G: nxadb.Graph) -> nx.Graph:
return G_NX


if GPU_ENABLED:
if GPU_AVAILABLE:

def nxadb_to_nxcg(G: nxadb.Graph, as_directed: bool = False) -> nxcg.Graph:
if G.use_nxcg_cache and G.nxcg_graph is not None:
Expand Down
2 changes: 1 addition & 1 deletion nx_arangodb/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _auto_func(func_name: str, /, *args: Any, **kwargs: Any) -> Any:

# TODO: Use `nx.config.backends.arangodb.backend_priority` instead
backend_priority = []
if nxadb.convert.GPU_ENABLED:
if nxadb.convert.GPU_AVAILABLE and nx.config.backends.arangodb.use_gpu:
backend_priority.append("cugraph")

for backend in backend_priority:
Expand Down
34 changes: 34 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging
import os
import sys
from io import StringIO
from typing import Any

import networkx as nx
Expand All @@ -14,13 +16,17 @@
logger.setLevel(logging.INFO)

db: StandardDatabase
run_gpu_tests: bool


def pytest_addoption(parser: Any) -> None:
parser.addoption("--url", action="store", default="http://localhost:8529")
parser.addoption("--dbName", action="store", default="_system")
parser.addoption("--username", action="store", default="root")
parser.addoption("--password", action="store", default="test")
parser.addoption(
"--run-gpu-tests", action="store_true", default=False, help="Run GPU tests"
)


def pytest_configure(config: Any) -> None:
Expand Down Expand Up @@ -48,6 +54,9 @@ def pytest_configure(config: Any) -> None:
os.environ["DATABASE_PASSWORD"] = con["password"]
os.environ["DATABASE_NAME"] = con["dbName"]

global run_gpu_tests
run_gpu_tests = config.getoption("--run-gpu-tests")


@pytest.fixture(scope="function")
def load_karate_graph() -> None:
Expand Down Expand Up @@ -100,3 +109,28 @@ def create_line_graph(load_attributes: set[str]) -> nxadb.Graph:
name="LineGraph",
edge_collections_attributes=load_attributes,
)


def create_grid_graph(graph_cls: type[nxadb.Graph]) -> nxadb.Graph:
global db
if db.has_graph("GridGraph"):
return graph_cls(name="GridGraph")

grid_graph = nx.grid_graph(dim=(500, 500))
return graph_cls(
incoming_graph_data=grid_graph, name="GridGraph", write_async=False
)


# Taken from:
# https://stackoverflow.com/questions/16571150/how-to-capture-stdout-output-from-a-python-function-call
class Capturing(list[str]):
def __enter__(self):
self._stdout = sys.stdout
sys.stdout = self._stringio = StringIO()
return self

def __exit__(self, *args):
self.extend(self._stringio.getvalue().splitlines())
del self._stringio # free up some memory
sys.stdout = self._stdout
67 changes: 62 additions & 5 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import time
from typing import Any, Callable, Dict, Union

import networkx as nx
import phenolrs
import pytest
from arango import DocumentDeleteError
from phenolrs.networkx.typings import (
Expand All @@ -15,7 +15,7 @@
from nx_arangodb.classes.dict.adj import AdjListOuterDict, EdgeAttrDict, EdgeKeyDict
from nx_arangodb.classes.dict.node import NodeAttrDict, NodeDict

from .conftest import create_line_graph, db
from .conftest import Capturing, create_grid_graph, create_line_graph, db, run_gpu_tests

G_NX = nx.karate_club_graph()

Expand All @@ -38,7 +38,11 @@ def assert_same_dict_values(
if type(next(iter(d2.keys()))) == int:
d2 = {f"person/{k}": v for k, v in d2.items()}

assert d1.keys() == d2.keys(), "Dictionaries have different keys"
d1_keys = set(d1.keys())
d2_keys = set(d2.keys())
difference = d1_keys ^ d2_keys
assert difference == set(), "Dictionaries have different keys"

for key in d1:
m = f"Values for key '{key}' are not equal up to digit {digit}"
assert round(d1[key], digit) == round(d2[key], digit), m
Expand All @@ -50,10 +54,12 @@ def assert_bc(d1: dict[str | int, float], d2: dict[str | int, float]) -> None:
assert_same_dict_values(d1, d2, 14)


def assert_pagerank(d1: dict[str | int, float], d2: dict[str | int, float]) -> None:
def assert_pagerank(
d1: dict[str | int, float], d2: dict[str | int, float], digit: int = 15
) -> None:
assert d1
assert d2
assert_same_dict_values(d1, d2, 15)
assert_same_dict_values(d1, d2, digit)


def assert_louvain(l1: list[set[Any]], l2: list[set[Any]]) -> None:
Expand Down Expand Up @@ -315,6 +321,57 @@ def test_shortest_path_remote_algorithm(load_karate_graph: Any) -> None:
assert r_3 != r_4


@pytest.mark.parametrize(
"graph_cls",
[
(nxadb.Graph),
(nxadb.DiGraph),
(nxadb.MultiGraph),
(nxadb.MultiDiGraph),
],
)
def test_gpu_pagerank(graph_cls: type[nxadb.Graph]) -> None:
if not run_gpu_tests:
pytest.skip("GPU tests are disabled")

graph = create_grid_graph(graph_cls)

assert nxadb.convert.GPU_AVAILABLE is True
assert nx.config.backends.arangodb.use_gpu is True

res_gpu = None
res_cpu = None

# Measure GPU execution time
start_gpu = time.time()

# Note: While this works, we should use the logger or some alternative
# approach testing this. Via stdout is not the best way to test this.
with Capturing() as output_gpu:
res_gpu = nx.pagerank(graph)

assert any(
"NXCG Graph construction took" in line for line in output_gpu
), "Expected output not found in GPU execution"

gpu_time = time.time() - start_gpu

# Disable GPU and measure CPU execution time
nx.config.backends.arangodb.use_gpu = False
start_cpu = time.time()
with Capturing() as output_cpu:
res_cpu = nx.pagerank(graph)

output_cpu_list = list(output_cpu)
assert len(output_cpu_list) == 1
assert "Graph 'GridGraph' load took" in output_cpu_list[0]

cpu_time = time.time() - start_cpu

assert gpu_time < cpu_time, "GPU execution should be faster than CPU execution"
assert_pagerank(res_gpu, res_cpu, 10)


@pytest.mark.parametrize(
"graph_cls",
[
Expand Down