Skip to content

Commit 4fe2acb

Browse files
committed
Exp: test renewed GEDModel with fixed parallel.
1 parent 55e0244 commit 4fe2acb

File tree

5 files changed

+1638
-0
lines changed

5 files changed

+1638
-0
lines changed

gklearn/experiments/ged/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""
2+
__init__
3+
4+
@Author: jajupmochi
5+
@Date: May 22 2025
6+
"""
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""
2+
__init__.py
3+
4+
@Author: jajupmochi
5+
@Date: May 22 2025
6+
"""
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
"""
2+
fit_ged_model
3+
4+
@Author: jajupmochi
5+
@Date: May 22 2025
6+
"""
7+
from typing import List
8+
9+
import networkx as nx
10+
import numpy as np
11+
12+
ISSUE_TAG = "\033[91m[issue]\033[0m " # Red
13+
INFO_TAG = "\033[94m[info]\033[0m " # Blue
14+
SUCCESS_TAG = "\033[92m[success]\033[0m " # Green
15+
16+
17+
def fit_model_ged(
18+
graphs_X: List[nx.Graph],
19+
graphs_Y: List[nx.Graph] = None,
20+
ged_options: dict = None,
21+
parallel: bool = None,
22+
n_jobs: int = None,
23+
chunksize: int = None,
24+
copy_graphs: bool = True,
25+
read_resu_from_file: int = 1,
26+
output_dir: str = None,
27+
params_idx: str = None,
28+
reorder_graphs: bool = False,
29+
verbose: int = 2,
30+
**kwargs
31+
):
32+
# if read_resu_from_file >= 1:
33+
# fn_model = os.path.join(
34+
# output_dir, 'metric_model.params_{}.pkl'.format(
35+
# params_idx
36+
# )
37+
# )
38+
# # Load model from file if it exists:
39+
# if os.path.exists(fn_model) and os.path.getsize(fn_model) > 0:
40+
# print('\nLoading model from file...')
41+
# resu = pickle.load(open(fn_model, 'rb'))
42+
# return resu['model'], resu['history'], resu['model'].dis_matrix
43+
44+
# Reorder graphs if specified:
45+
if reorder_graphs:
46+
graphs_X = reorder_graphs_by_index(graphs_X, idx_key='id')
47+
if graphs_Y is not None:
48+
graphs_Y = reorder_graphs_by_index(graphs_Y, idx_key='id')
49+
50+
# Compute metric matrix otherwise:
51+
print(f'{INFO_TAG}Computing metric matrix...')
52+
all_graphs = graphs_X + graphs_Y if graphs_Y else graphs_X
53+
nl_names = list(
54+
all_graphs[0].nodes[list(all_graphs[0].nodes)[0]].keys()
55+
) if graphs_X else []
56+
if not all_graphs:
57+
el_names = []
58+
else:
59+
idx_edge = (
60+
np.where(np.array([nx.number_of_edges(g) for g in all_graphs]) > 0)[0]
61+
)
62+
if len(idx_edge) == 0:
63+
el_names = []
64+
else:
65+
el_names = list(
66+
all_graphs[idx_edge[0]].edges[
67+
list(all_graphs[idx_edge[0]].edges)[0]].keys()
68+
)
69+
70+
from .parallel_version import GEDModel
71+
72+
if parallel is False:
73+
parallel = None
74+
elif parallel is True:
75+
parallel = 'imap_unordered'
76+
77+
model = GEDModel(
78+
ed_method=ged_options['method'],
79+
edit_cost_fun=ged_options['edit_cost_fun'],
80+
init_edit_cost_constants=ged_options['edit_costs'],
81+
optim_method=ged_options['optim_method'],
82+
node_labels=nl_names, edge_labels=el_names,
83+
parallel=parallel,
84+
n_jobs=n_jobs,
85+
chunksize=chunksize,
86+
copy_graphs=copy_graphs,
87+
# make sure it is a full deep copy. and faster!
88+
verbose=verbose
89+
)
90+
91+
# Train model.
92+
try:
93+
if graphs_Y is None:
94+
# Compute the distance matrix for the same set of graphs:
95+
matrix = model.fit_transform(
96+
graphs_X, y=graphs_Y,
97+
save_dm_train=True, repeats=ged_options['repeats'],
98+
)
99+
else:
100+
model.fit(graphs_X, repeats=ged_options['repeats'])
101+
matrix = model.transform(
102+
graphs_Y,
103+
save_dm_test=True, repeats=ged_options['repeats'],
104+
)
105+
106+
except OSError as exception:
107+
if 'GLIBC_2.23' in exception.args[0]:
108+
msg = \
109+
'This error is very likely due to the low version of GLIBC ' \
110+
'on your system. ' \
111+
'The required version of GLIBC is 2.23. This may happen on the ' \
112+
'CentOS 7 system, where the highest version of GLIBC is 2.17. ' \
113+
'You may check your CLIBC version by bash command `rpm -q glibc`. ' \
114+
'The `graphkit-learn` library comes with GLIBC_2.23, which you can ' \
115+
'install by enable the `--build-gedlib` option: ' \
116+
'`python3 setup.py install --build-gedlib`. This will compile the C++ ' \
117+
'module `gedlib`, which requires a C++ compiler and CMake.'
118+
raise AssertionError(msg) from exception
119+
else:
120+
assert False, exception
121+
except Exception as exception:
122+
assert False, exception
123+
124+
# Save history:
125+
# For graph kernels it is n * (n - 1) / 2:
126+
if graphs_Y is None:
127+
n_pairs = len(graphs_X) * (len(graphs_X) - 1) / 2
128+
else:
129+
n_pairs = len(graphs_X) * len(graphs_Y)
130+
# history = {'run_time': AverageMeter()}
131+
# history['run_time'].update(model.run_time / n_pairs, n_pairs)
132+
133+
# # Save model and history to file:
134+
# if read_resu_from_file >= 1:
135+
# os.makedirs(os.path.dirname(fn_model), exist_ok=True)
136+
# pickle.dump({'model': model, 'history': history}, open(fn_model, 'wb'))
137+
138+
# Print out the information:
139+
params_msg = f' for parameters {params_idx}' if params_idx else ''
140+
print(
141+
f'{SUCCESS_TAG}Computed metric matrix of size {matrix.shape} in {model.run_time:.3f} '
142+
f'seconds ({(model.run_time / n_pairs):.9f} s per pair){params_msg}.'
143+
)
144+
145+
stats = {
146+
'n_pairs': n_pairs,
147+
'matrix_shape': matrix.shape,
148+
'run_time': model.run_time,
149+
'run_time_per_pair': model.run_time / n_pairs,
150+
}
151+
152+
return model, matrix, stats
153+
154+
155+
def fit_model_ged_test():
156+
# Example usage:
157+
from gklearn.experiments.ged.ged_model.graph_generator import GraphGenerator
158+
generator = GraphGenerator(
159+
num_graphs=10,
160+
max_num_nodes=5,
161+
min_num_nodes=3,
162+
max_num_edges=10,
163+
min_num_edges=5,
164+
with_discrete_n_features=True,
165+
with_discrete_e_features=True,
166+
with_continuous_n_features=True,
167+
with_continuous_e_features=True,
168+
# node_features=['color', 'shape'],
169+
# edge_features=['weight'],
170+
# node_feature_values={'color': ['red', 'blue'], 'shape': ['circle', 'square']},
171+
# edge_feature_values={'weight': [1, 2, 3]},
172+
)
173+
graphs = generator.generate_graphs()
174+
ged_options = {
175+
'method': 'ged',
176+
'edit_cost_fun': 'NON_SYMBOLIC',
177+
'edit_costs': [3, 3, 1, 3, 3, 1],
178+
'optim_method': 'init',
179+
'repeats': 1
180+
}
181+
model, matrix, stats = fit_model_ged(graphs, ged_options)
182+
print("Model:", model)
183+
print("Matrix shape:", matrix.shape)
184+
185+
186+
if __name__ == '__main__':
187+
# Test the class
188+
fit_model_ged_test()
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
"""
2+
graph_generator
3+
4+
@Author: jajupmochi
5+
@Date: May 22 2025
6+
"""
7+
8+
9+
class GraphGenerator:
10+
"""
11+
A class to generate random graphs for the Graph Edit Distance (GED) model with given
12+
constraints.
13+
14+
Attributes:
15+
- num_graphs: Number of graphs to generate.
16+
- max_num_nodes: Maximum number of nodes in each graph.
17+
- min_num_nodes: Minimum number of nodes in each graph.
18+
- max_num_edges: Maximum number of edges in each graph.
19+
- min_num_edges: Minimum number of edges in each graph.
20+
- with_discrete_n_features: Whether to include discrete node features.
21+
- with_discrete_e_features: Whether to include discrete edge features.
22+
- with_continuous_n_features: Whether to include continuous node features.
23+
- with_continuous_e_features: Whether to include continuous edge features.
24+
- node_features: List of node feature names. Optional.
25+
- edge_features: List of edge feature names. Optional.
26+
- node_feature_values: Dictionary mapping node feature names to their possible values. Optional.
27+
- edge_feature_values: Dictionary mapping edge feature names to their possible values. Optional.
28+
- seed: Random seed for reproducibility. Default is None.
29+
"""
30+
31+
32+
def __init__(
33+
self,
34+
num_graphs: int,
35+
max_num_nodes: int,
36+
min_num_nodes: int,
37+
max_num_edges: int,
38+
min_num_edges: int,
39+
with_discrete_n_features: bool = False,
40+
with_discrete_e_features: bool = False,
41+
with_continuous_n_features: bool = False,
42+
with_continuous_e_features: bool = False,
43+
continuous_n_feature_dim: int = 10,
44+
continuous_e_feature_dim: int = 10,
45+
node_features: list = None,
46+
edge_features: list = None,
47+
node_feature_values: dict = None,
48+
edge_feature_values: dict = None,
49+
seed: int = None
50+
):
51+
self.num_graphs = num_graphs
52+
self.max_num_nodes = max_num_nodes
53+
self.min_num_nodes = min_num_nodes
54+
self.max_num_edges = max_num_edges
55+
self.min_num_edges = min_num_edges
56+
self.with_discrete_n_features = with_discrete_n_features
57+
self.with_discrete_e_features = with_discrete_e_features
58+
self.with_continuous_n_features = with_continuous_n_features
59+
self.with_continuous_e_features = with_continuous_e_features
60+
self.continuous_n_feature_dim = continuous_n_feature_dim
61+
self.continuous_e_feature_dim = continuous_e_feature_dim
62+
self.node_features = node_features if node_features else []
63+
self.edge_features = edge_features if edge_features else []
64+
self.node_feature_values = node_feature_values if node_feature_values else {}
65+
self.edge_feature_values = edge_feature_values if edge_feature_values else {}
66+
self.seed = seed
67+
if with_discrete_n_features and node_features is None:
68+
self.discrete_n_features = [str(i) for i in range(1, 100)]
69+
if with_discrete_e_features and edge_features is None:
70+
import string
71+
self.discrete_e_features = list(string.ascii_lowercase)
72+
73+
74+
def generate_graphs(self):
75+
"""
76+
Generates a list of random graphs based on the specified constraints.
77+
78+
Returns:
79+
List of generated graphs.
80+
"""
81+
import numpy as np
82+
import networkx as nx
83+
import random
84+
85+
rng = np.random.default_rng(self.seed)
86+
87+
graphs = []
88+
89+
for _ in range(self.num_graphs):
90+
num_nodes = rng.integers(self.min_num_nodes, self.max_num_nodes + 1)
91+
num_edges = rng.integers(self.min_num_edges, self.max_num_edges + 1)
92+
93+
G = nx.Graph()
94+
G.add_nodes_from(range(num_nodes))
95+
96+
if num_edges > 0:
97+
while G.number_of_edges() < num_edges:
98+
u = rng.integers(0, num_nodes)
99+
v = rng.integers(0, num_nodes)
100+
if u != v and not G.has_edge(u, v):
101+
G.add_edge(u, v)
102+
103+
if self.with_discrete_n_features:
104+
if self.node_feature_values is None:
105+
for node in G.nodes():
106+
for feature in self.node_features:
107+
G.nodes[node][feature] = rng.choice(
108+
self.discrete_n_features
109+
)
110+
111+
else:
112+
pass
113+
# for node in G.nodes():
114+
# for feature in self.node_features:
115+
# G.nodes[node][feature] = random.choice(
116+
# self.node_feature_values.get(feature, [0])
117+
# )
118+
119+
if self.with_discrete_e_features:
120+
if self.edge_feature_values is None:
121+
for edge in G.edges():
122+
for feature in self.edge_features:
123+
G.edges[edge][feature] = rng.choice(
124+
self.discrete_e_features
125+
)
126+
else:
127+
pass
128+
# for edge in G.edges():
129+
# for feature in self.edge_features:
130+
# G.edges[edge][feature] = random.choice(
131+
# self.edge_feature_values.get(feature, [0])
132+
# )
133+
134+
if self.with_continuous_n_features:
135+
if self.node_feature_values is None:
136+
for node in G.nodes():
137+
feature = rng.random(self.continuous_n_feature_dim)
138+
G.nodes[node]['feature'] = feature
139+
140+
else:
141+
pass
142+
# for node in G.nodes():
143+
# for feature in self.node_features:
144+
# G.nodes[node][feature] = random.uniform(
145+
# self.node_feature_values.get(feature, (0, 1))[0],
146+
# self.node_feature_values.get(feature, (0, 1))[1]
147+
# )
148+
149+
if self.with_continuous_e_features:
150+
if self.edge_feature_values is None:
151+
for edge in G.edges():
152+
feature = rng.random(self.continuous_e_feature_dim)
153+
G.edges[edge]['feature'] = feature
154+
155+
else:
156+
pass
157+
# for edge in G.edges():
158+
# for feature in self.edge_features:
159+
# G.edges[edge][feature] = random.uniform(
160+
# self.edge_feature_values.get(feature, (0, 1))[0],
161+
# self.edge_feature_values.get(feature, (0, 1))[1]
162+
# )
163+
164+
graphs.append(G)
165+
166+
return graphs

0 commit comments

Comments
 (0)