Skip to content

Commit 882646b

Browse files
ENH : added square_clustering (#34)
* added square_clustering * added __getattr__ to ParallelGraph resolved conflicts * replaced G with G.nodes() * fixed square_clustering * added time_square_clustering * style fix * added docs * updating G * un-updated ParallelGraph class * adding total_cores and a style fix * style fix * updated import in benchmark * added heatmap for square_clustering * comparing chunk and no chunk -1 * 2 - trying different default chunking - similar speedups * added get_chunks and updated heatmap(improved speedups), updated docs
1 parent ee10032 commit 882646b

File tree

6 files changed

+99
-9
lines changed

6 files changed

+99
-9
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from .common import (
2+
backends,
3+
num_nodes,
4+
edge_prob,
5+
get_cached_gnp_random_graph,
6+
Benchmark,
7+
)
8+
import networkx as nx
9+
10+
11+
class Cluster(Benchmark):
12+
params = [(backends), (num_nodes), (edge_prob)]
13+
param_names = ["backend", "num_nodes", "edge_prob"]
14+
15+
def time_square_clustering(self, backend, num_nodes, edge_prob):
16+
G = get_cached_gnp_random_graph(num_nodes, edge_prob)
17+
_ = nx.square_clustering(G, backend=backend)

nx_parallel/algorithms/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
from .isolate import *
88
from .tournament import *
99
from .vitality import *
10+
from .cluster import *

nx_parallel/algorithms/cluster.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
from itertools import combinations, chain
2+
from joblib import Parallel, delayed
3+
import nx_parallel as nxp
4+
5+
__all__ = [
6+
"square_clustering",
7+
]
8+
9+
10+
def square_clustering(G, nodes=None, get_chunks="chunks"):
11+
"""The nodes are chunked into `node_chunks` and then the square clustering
12+
coefficient for all `node_chunks` are computed in parallel over all available
13+
CPU cores.
14+
15+
Parameters
16+
------------
17+
get_chunks : str, function (default = "chunks")
18+
A function that takes in a list of all the nodes (or nbunch) as input and
19+
returns an iterable `node_chunks`. The default chunking is done by slicing the
20+
`nodes` into `n` chunks, where `n` is the number of CPU cores.
21+
22+
networkx.square_clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.square_clustering.html
23+
"""
24+
25+
def _compute_clustering_chunk(node_iter_chunk):
26+
result_chunk = []
27+
for v in node_iter_chunk:
28+
clustering = 0
29+
potential = 0
30+
for u, w in combinations(G[v], 2):
31+
squares = len((set(G[u]) & set(G[w])) - {v})
32+
clustering += squares
33+
degm = squares + 1
34+
if w in G[u]:
35+
degm += 1
36+
potential += (len(G[u]) - degm) + (len(G[w]) - degm) + squares
37+
if potential > 0:
38+
clustering /= potential
39+
result_chunk += [(v, clustering)]
40+
return result_chunk
41+
42+
if hasattr(G, "graph_object"):
43+
G = G.graph_object
44+
45+
if nodes is None:
46+
node_iter = list(G)
47+
else:
48+
node_iter = list(G.nbunch_iter(nodes))
49+
50+
total_cores = nxp.cpu_count()
51+
52+
if get_chunks == "chunks":
53+
num_in_chunk = max(len(node_iter) // total_cores, 1)
54+
node_iter_chunks = nxp.chunks(node_iter, num_in_chunk)
55+
else:
56+
node_iter_chunks = get_chunks(node_iter)
57+
58+
result = Parallel(n_jobs=total_cores)(
59+
delayed(_compute_clustering_chunk)(node_iter_chunk)
60+
for node_iter_chunk in node_iter_chunks
61+
)
62+
clustering = dict(chain.from_iterable(result))
63+
64+
if nodes in G:
65+
return clustering[nodes]
66+
return clustering

nx_parallel/interface.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
tournament_is_strongly_connected,
1111
)
1212
from nx_parallel.algorithms.vitality import closeness_vitality
13+
from nx_parallel.algorithms.cluster import square_clustering
1314

1415
__all__ = ["Dispatcher", "ParallelGraph"]
1516

@@ -53,6 +54,9 @@ class Dispatcher:
5354
all_pairs_bellman_ford_path = all_pairs_bellman_ford_path
5455
johnson = johnson
5556

57+
# Clustering
58+
square_clustering = square_clustering
59+
5660
# =============================
5761

5862
@staticmethod
44.4 KB
Loading

timing/timing_individual_function.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,34 +7,36 @@
77
import seaborn as sns
88
from matplotlib import pyplot as plt
99

10-
import nx_parallel
10+
import nx_parallel as nxp
1111

1212
# Code to create README heatmaps for individual function currFun
1313
heatmapDF = pd.DataFrame()
1414
number_of_nodes_list = [10, 50, 100, 300, 500]
1515
pList = [1, 0.8, 0.6, 0.4, 0.2]
16-
currFun = nx.all_pairs_bellman_ford_path
16+
weighted = False
17+
currFun = nx.square_clustering
1718
for p in pList:
1819
for num in number_of_nodes_list:
1920
# create original and parallel graphs
2021
G = nx.fast_gnp_random_graph(num, p, seed=42, directed=False)
2122

2223
# for weighted graphs
23-
random.seed(42)
24-
for u, v in G.edges():
25-
G[u][v]["weight"] = random.random()
24+
if weighted:
25+
random.seed(42)
26+
for u, v in G.edges():
27+
G[u][v]["weight"] = random.random()
2628

27-
H = nx_parallel.ParallelGraph(G)
29+
H = nxp.ParallelGraph(G)
2830

2931
# time both versions and update heatmapDF
3032
t1 = time.time()
31-
c = currFun(H)
33+
c = nx.square_clustering(H)
3234
if isinstance(c, types.GeneratorType):
3335
d = dict(c)
3436
t2 = time.time()
3537
parallelTime = t2 - t1
3638
t1 = time.time()
37-
c = currFun(G)
39+
c = nx.square_clustering(G)
3840
if isinstance(c, types.GeneratorType):
3941
d = dict(c)
4042
t2 = time.time()
@@ -73,7 +75,7 @@
7375
plt.xticks(rotation=45)
7476
plt.yticks(rotation=20)
7577
plt.title(
76-
"Small Scale Demo: Times Speedups of " + currFun.__name__ + " compared to networkx"
78+
"Small Scale Demo: Times Speedups of " + currFun.__name__ + " compared to NetworkX"
7779
)
7880
plt.xlabel("Number of Vertices")
7981
plt.ylabel("Edge Probability")

0 commit comments

Comments
 (0)