-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
89 lines (67 loc) · 2.58 KB
/
main.py
File metadata and controls
89 lines (67 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from pathlib import Path
from syftbox.lib import Client
import os
import json
import yaml
def copy_html_files(source: Path, destination: Path):
"""
Moves all files from the source directory to the destination directory.
Args:
source (Path): The source directory.
destination (Path): The destination directory.
Raises:
ValueError: If source or destination is not a directory.
"""
if not source.is_dir():
raise ValueError(f"Source {source} is not a directory.")
if not destination.exists():
destination.mkdir(parents=True)
elif not destination.is_dir():
raise ValueError(f"Destination {destination} is not a directory.")
for item in source.iterdir():
if item.is_file():
target = destination / item.name
try:
os.rename(item, target)
except Exception as e:
print(f"Error moving file {item} to {target}: {e}")
def collect_dataset_metadata(
path: str, global_datasets_index: dict, owner: str
) -> None:
datasets_path = Path(path) / "public" / "datasets.yaml"
if Path(datasets_path).exists():
with open(str(datasets_path), "r") as yaml_file:
datasets = yaml.safe_load(yaml_file)
# print(datasets)
for dataset in datasets["datasets"]:
dataset_name = dataset["name"]
if dataset["name"] in global_datasets_index.keys():
global_datasets_index[dataset_name]["owners"].append(owner)
else:
global_datasets_index[dataset_name] = {
"name": dataset_name,
"description": dataset["description"],
"format": dataset["format"],
}
global_datasets_index[dataset_name]["owners"] = [owner]
def build_datasites_index(client: Client):
datasites = os.listdir(client.datasites)
datasite_paths = [
(str(client.datasites / datasite), datasite) for datasite in datasites
]
global_dict = {}
for datasite, owner in datasite_paths:
datasite_datasets = collect_dataset_metadata(datasite, global_dict, owner)
return global_dict
def main():
client = Client.load()
datasets = build_datasites_index(client)
copy_html_files(
source=Path("./assets"),
destination=client.my_datasite / "public" / "data_search",
)
output_file = client.my_datasite / "public" / "data_search" / "datasets.json"
with open(output_file, "w") as json_file:
json.dump(datasets, json_file, indent=4)
if __name__ == "__main__":
main()