Skip to content

Commit 66ff1cc

Browse files
authored
Merge pull request #146 from dice-group/develop
Develop
2 parents 0289c05 + fdf7db2 commit 66ff1cc

24 files changed

+468
-325
lines changed

.github/workflows/github-actions-python-package.yml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: ubuntu-latest
99
strategy:
1010
matrix:
11-
python-version: ["3.9", "3.10", "3.11"]
11+
python-version: ["3.9.17"]
1212

1313
steps:
1414
- uses: actions/checkout@v3
@@ -19,13 +19,12 @@ jobs:
1919
- name: Install dependencies
2020
run: |
2121
python -m pip install --upgrade pip
22-
pip install ruff pytest
23-
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
22+
pip install -r requirements.txt
2423
- name: Lint with ruff
2524
run: |
26-
ruff --format=github --select=F63,F7,F82 --target-version=py310 dicee/.
25+
ruff --format=github --select=F63,F7,F82 --target-version=py39 dicee/.
2726
- name: Test with pytest
2827
run: |
29-
wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip
28+
wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip --no-check-certificate
3029
unzip KGs.zip
3130
pytest -p no:warnings -x

.github/workflows/sphinx.yml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,12 @@ jobs:
2020
uses: actions/setup-python@v2
2121
with:
2222
python-version: "3.10"
23-
# Runs a single command using the runners shell
24-
- name: Run a one-line script
25-
run: echo Hello, world!
26-
23+
2724
- name: Install dependencies
2825
run: |
2926
python -m pip install --upgrade pip
3027
pip install -r requirements.txt
3128
32-
- name: Build HTML
29+
- name: Build HTML and import
3330
run: |
34-
sphinx-apidoc -o docs dicee/ && make -C docs/ html
35-
- name: Run ghp-import
36-
run: |
37-
mv docs/_build/html docs/ && ghp-import -n -p -f docs/html
31+
sphinx-apidoc -o docs dicee/ && make -C docs/ html && mv docs/_build/html docs/ && ghp-import -n -p -f docs/html

README.md

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,33 +34,16 @@ Deploy a pre-trained embedding model without writing a single line of code.
3434

3535
``` bash
3636
git clone https://github.com/dice-group/dice-embeddings.git
37-
conda create -n dice python=3.10 --no-default-packages && conda activate dice
37+
conda create -n dice python=3.9 --no-default-packages && conda activate dice
3838
pip3 install -r requirements.txt
3939
```
4040
or
4141
```bash
4242
pip install dicee
4343
```
44-
or
45-
```bash
46-
pip3 install "torch>=2.0.0"
47-
pip3 install "pandas>=1.5.1"
48-
pip3 install "polars>=0.16.14"
49-
pip3 install "scikit-learn>=1.2.2"
50-
pip3 install "pyarrow>=11.0.0"
51-
pip3 install "pytorch-lightning==1.6.4"
52-
pip3 install "pykeen==1.10.1"
53-
pip3 install "zstandard>=0.21.0"
54-
pip3 install "pytest>=7.2.2"
55-
pip3 install "psutil>=5.9.4"
56-
pip3 install "ruff>=0.0.284"
57-
pip3 install "gradio>=3.23.0"
58-
pip3 install "rdflib>=7.0.0"
59-
```
60-
6144
To test the Installation
6245
```bash
63-
wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip
46+
wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip --no-check-certificate
6447
unzip KGs.zip
6548
pytest -p no:warnings -x # it takes circa 15 minutes
6649
pytest -p no:warnings --lf # run only the last failed test

analyse_experiments.py

Lines changed: 55 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,59 +2,18 @@
22
import json
33
import pandas as pd
44
import sys
5+
import argparse
56

67

7-
# print('Number of arguments:', len(sys.argv), 'arguments.')
8-
# print('Argument List:', str(sys.argv))
9-
10-
11-
if len(sys.argv) > 1:
12-
input_str_path = sys.argv[1]
13-
else:
14-
# (1) Give a path of Experiments folder
15-
input_str_path = 'Experiments/'
16-
17-
# (2) Get all subfolders
18-
sub_folder_str_paths = os.listdir(input_str_path)
19-
20-
results = dict()
21-
22-
experiments = []
23-
for path in sub_folder_str_paths:
24-
try:
25-
with open(input_str_path + path + '/configuration.json', 'r') as f:
26-
config = json.load(f)
27-
config = {i: config[i] for i in
28-
['model', 'full_storage_path', 'embedding_dim',
29-
'normalization', 'num_epochs', 'batch_size', 'lr',
30-
'callbacks',
31-
'scoring_technique',
32-
'path_dataset_folder', 'p', 'q']}
33-
except FileNotFoundError:
34-
print('Exception occured at reading config')
35-
continue
36-
37-
try:
38-
with open(input_str_path + path + '/report.json', 'r') as f:
39-
report = json.load(f)
40-
report = {i: report[i] for i in ['Runtime','NumParam']}
41-
except FileNotFoundError:
42-
print('Exception occured at reading report')
43-
continue
44-
45-
try:
46-
with open(input_str_path + path + '/eval_report.json', 'r') as f:
47-
eval_report = json.load(f)
48-
# print(eval_report)
49-
# exit(1)
50-
# eval_report = {i: str(eval_report[i]) for i in ['Train', 'Val', 'Test']}
51-
except FileNotFoundError:
52-
print('Exception occured at reading eval_report')
53-
continue
54-
55-
config.update(eval_report)
56-
config.update(report)
57-
experiments.append(config)
8+
def get_default_arguments(description=None):
9+
parser = argparse.ArgumentParser(add_help=False)
10+
# Default Trainer param https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html#methods
11+
# Data related arguments
12+
parser.add_argument("--dir", type=str, default="KINSHIP-DistMult-RN/",
13+
help="")
14+
if description is None:
15+
return parser.parse_args()
16+
return parser.parse_args(description)
5817

5918

6019
# need a class to hold all params
@@ -63,7 +22,7 @@ def __init__(self):
6322
self.model_name = []
6423
self.callbacks = []
6524
self.embedding_dim = []
66-
self.num_params=[]
25+
self.num_params = []
6726
self.num_epochs = []
6827
self.batch_size = []
6928
self.lr = []
@@ -87,6 +46,7 @@ def __init__(self):
8746

8847
self.runtime = []
8948
self.normalization = []
49+
self.scoring_technique = []
9050

9151
def save_experiment(self, x):
9252
self.model_name.append(x['model'])
@@ -100,6 +60,7 @@ def save_experiment(self, x):
10060
self.num_params.append(x['NumParam'])
10161

10262
self.normalization.append(x['normalization'])
63+
self.scoring_technique.append(x['scoring_technique'])
10364
self.callbacks.append(x['callbacks'])
10465

10566
self.train_mrr.append(x['Train']['MRR'])
@@ -122,32 +83,62 @@ def save_experiment(self, x):
12283

12384
def to_df(self):
12485
return pd.DataFrame(
125-
dict(model_name=self.model_name, #pq=self.pq, path_dataset_folder=self.path_dataset_folder,
86+
dict(model_name=self.model_name, # pq=self.pq, path_dataset_folder=self.path_dataset_folder,
12687
train_mrr=self.train_mrr, train_h1=self.train_h1,
12788
train_h3=self.train_h3, train_h10=self.train_h10,
128-
#full_storage_path=self.full_storage_path,
89+
# full_storage_path=self.full_storage_path,
12990
val_mrr=self.val_mrr, val_h1=self.val_h1,
13091
val_h3=self.val_h3, val_h10=self.val_h10,
13192
test_mrr=self.test_mrr, test_h1=self.test_h1,
13293
test_h3=self.test_h3, test_h10=self.test_h10,
13394
runtime=self.runtime,
13495
params=self.num_params,
13596
callbacks=self.callbacks,
136-
#normalization=self.normalization,
137-
#embeddingdim=self.embedding_dim
97+
# normalization=self.normalization,
98+
# embeddingdim=self.embedding_dim
99+
scoring_technique=self.scoring_technique
138100
)
139101
)
140102

141103

142-
counter = Experiment()
104+
def analyse(args):
105+
# (2) Get all subfolders
106+
sub_folder_str_paths = os.listdir(args.dir)
107+
experiments = []
108+
for path in sub_folder_str_paths:
109+
full_path=args.dir +"/"+path
110+
with open(f'{full_path}/configuration.json', 'r') as f:
111+
config = json.load(f)
112+
config = {i: config[i] for i in
113+
['model', 'full_storage_path', 'embedding_dim',
114+
'normalization', 'num_epochs', 'batch_size', 'lr',
115+
'callbacks',
116+
'scoring_technique',
117+
"scoring_technique",
118+
'path_dataset_folder', 'p', 'q']}
119+
with open(f'{full_path}/report.json', 'r') as f:
120+
report = json.load(f)
121+
report = {i: report[i] for i in ['Runtime', 'NumParam']}
122+
with open(f'{full_path}/eval_report.json', 'r') as f:
123+
eval_report = json.load(f)
124+
125+
config.update(eval_report)
126+
config.update(report)
127+
experiments.append(config)
128+
129+
counter = Experiment()
143130

144-
for i in experiments:
145-
counter.save_experiment(i)
131+
for i in experiments:
132+
counter.save_experiment(i)
146133

134+
df = counter.to_df()
135+
df.sort_values(by=['test_mrr'], ascending=False, inplace=True)
136+
pd.set_option("display.precision", 3)
137+
# print(df)
138+
print(df.to_latex(index=False, float_format="%.3f"))
139+
#print(df.to_markdown(index=False))
140+
df.to_csv(path_or_buf=args.dir+'/summary.csv')
147141

148-
df = counter.to_df()
149-
pd.set_option("display.precision", 3)
150-
#print(df)
151-
print(df.to_latex(index=False,float_format="%.3f"))
152142

153-
print(df.to_markdown(index=False))
143+
if __name__ == '__main__':
144+
analyse(get_default_arguments())

0 commit comments

Comments
 (0)