forked from ngrayluna/generate-wandb-python-reference
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsort_markdown_files.py
More file actions
executable file
·187 lines (143 loc) · 6.87 KB
/
sort_markdown_files.py
File metadata and controls
executable file
·187 lines (143 loc) · 6.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/env python
import os
import shutil
import glob
import re
import yaml
import argparse
import ast
from pathlib import Path
from configuration import SOURCE
def build_local_paths(root_directory):
"""Create folders based on SOURCE and add local_path."""
SOURCE_COPY = SOURCE.copy()
# First create the sdk directory
sdk_dir = os.path.join(root_directory, "sdk")
os.makedirs(sdk_dir, exist_ok=True)
for key, config in SOURCE_COPY.items():
folder_name = config["hugo_specs"]["folder_name"]
if key == "SDK":
# Place SDK files directly in the sdk directory, not in a subdirectory
local_path = sdk_dir
elif key in ["DATATYPE", "CUSTOMCHARTS", "LAUNCH_API"]:
# Place module in SDK Directory
local_path = os.path.join(sdk_dir, folder_name)
else:
# Place other entries directly under root_directory
local_path = os.path.join(root_directory, folder_name)
SOURCE_COPY[key]["hugo_specs"]["local_path"] = local_path
print(f"Creating directory: {local_path}")
os.makedirs(local_path, exist_ok=True)
return SOURCE_COPY
def create_object_type_lookup(source_dict):
"""Map object_type values from frontmatter to SOURCE keys.
Creates a reverse index from SOURCE dict mapping—where the "object_type"
value in the "frontmatter" is the key. E.g.
"LAUNCH_API": {
"module": "wandb.sdk.launch",
"file_path": "/GitHub/wandb/wandb/sdk/launch/__init__.py",
"hugo_specs": {
"title": "Launch Library",
"description": "A collection of launch APIs for W&B.",
"frontmatter": "object_type: launch_apis_namespace",
"folder_name": "launch-library",
},
This is a utility function that creates a dictionary mapping
object_type values found in the frontmatter to their corresponding
keys in the SOURCE dictionary. This allows for easy lookup when
sorting markdown files based on their object_type.
Args:
source_dict (dict): The SOURCE dictionary containing the configuration.
"""
return {
v["hugo_specs"]["frontmatter"].split(": ")[1]: k for k, v in source_dict.items()
}
def sort_markdown_files(source_directory, source_copy):
"""Read markdown files, extract object_type, and sort them."""
# Create dictionary where the keys are object_type values from frontmatter
# and the values are the corresponding keys in the SOURCE dictionary
# Returns something lke:
# {'api': 'SDK', 'data-type': 'DATATYPE', 'public_apis_namespace': 'PUBLIC_API', 'launch_apis_namespace': 'LAUNCH_API'}
object_type_to_key = create_object_type_lookup(source_copy)
# Create a set to keep track of directories created
directories_created = []
for filepath in glob.glob(os.path.join(os.getcwd(), source_directory, '*.md')):
# Get the frontmatter from the markdown file
frontmatter = read_markdown_metadata(filepath)
object_type = frontmatter.get("object_type")
if not object_type:
print(f"Skipping {filepath}: No object_type in frontmatter.")
continue
source_key = object_type_to_key.get(object_type)
if not source_key:
print(f"Skipping {filepath}: Unknown object_type '{object_type}'.")
continue
# Get the destination directory from the SOURCE dictionary
destination_dir = source_copy[source_key]["hugo_specs"]["local_path"]
# Keep track of directories created this is used to do further processing later
directories_created.append(destination_dir)
destination_path = os.path.join(destination_dir, os.path.basename(filepath))
print(f"Copying to {destination_path}")
shutil.copy(filepath, destination_path)
return set(directories_created)
def read_markdown_metadata(filepath):
"""Read the frontmatter metadata from a markdown file."""
with open(filepath, 'r') as file:
content = file.read()
match = re.search(r"^---\n(.*?)\n---", content, re.DOTALL)
if not match:
return None
try:
frontmatter = yaml.safe_load(match.group(1))
except yaml.YAMLError as e:
print(f"Error parsing frontmatter in {filepath}: {e}")
return None
return frontmatter
def sort_functions_and_classes(filepath):
"""Sort functions and classes into their own directories."""
# Create a new directory for functions and classes
functions_dir = os.path.join(os.getcwd(), filepath, "functions")
classes_dir = os.path.join(os.getcwd(), filepath, "classes")
os.makedirs(functions_dir, exist_ok=True)
os.makedirs(classes_dir, exist_ok=True)
# Move the functions and classes into their respective directories
for filepath in glob.glob(os.path.join(os.getcwd(), filepath, '*.md')):
frontmatter = read_markdown_metadata(filepath)
datatype = frontmatter.get("data_type_classification")
if not datatype:
print(f"Skipping {filepath}: No data_type_classification in frontmatter.")
if "function" in datatype:
shutil.move(filepath, functions_dir)
elif "class" in datatype:
shutil.move(filepath, classes_dir)
return
def main(args):
source_directory = args.source_directory
root_directory = args.destination_directory
# Define the global module path. This has a list of legacy functions that we need to extract but don't advise using.
BASE_DIR = Path(__name__).resolve().parents[1]
global_module_path = BASE_DIR / "wandb" / "wandb" / "sdk" / "lib" / "module.py"
# Step 1: Build folder structure and local_path mapping
source_copy = build_local_paths(root_directory)
# Step 2: Sort markdown files based on frontmatter
# Returns a set of directories created
# Returns: {'python/sdk/data-type', 'python/automations', 'python/sdk/actions', ...}
directories_created = sort_markdown_files(source_directory, source_copy)
# Grab whatever the directory "action" APIs are in
# Since SDK files are now placed directly in the sdk directory, look for that
sdk_path = source_copy["SDK"]["hugo_specs"]["local_path"]
# Find the sdk directory in the created directories
global_fun_root_path = None
for partial_path in directories_created:
if partial_path == sdk_path:
global_fun_root_path = partial_path
break
print(f"Found global_dir_root: {global_fun_root_path}")
# Step 3: Sort functions and classes into their own directories
sort_functions_and_classes(global_fun_root_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--source_directory", default="wandb_sdk_docs", help="Directory where markdown files exist")
parser.add_argument("--destination_directory", default="python", help="Root directory for processed files")
args = parser.parse_args()
main(args)