redot-docs-site/fix_links.py at master · Redot-Engine/redot-docs-site · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import re

def main():
    docs_dir = 'docs'
    rst_to_ref = {} # rst_path -> reference_id
    ref_to_md = {}  # reference_id -> md_path

    # Step 1: Enumerate all .rst files and extract reference
    # Assuming each .rst file has a primary reference at the top like ".. _doc_name:"
    ref_pattern = re.compile(r'^\.\.\s+_([a-zA-Z0-9_-]+):')

    for root, dirs, files in os.walk(docs_dir):
        for file in files:
            if file.endswith('.rst'):
                rst_path = os.path.join(root, file)
                md_path = rst_path[:-4] + '.md'

                if not os.path.exists(md_path):
                    # We only care about .rst files that have a corresponding .md file
                    continue

                try:
                    with open(rst_path, 'r', encoding='utf-8') as f:
                        for line in f:
                            match = ref_pattern.match(line.strip())
                            if match:
                                ref_id = match.group(1)
                                # Map the reference ID to the .md file path
                                ref_to_md[ref_id] = md_path
                                # We'll just take the first reference found as the primary one for the file
                                break
                except Exception as e:
                    print(f"Error reading {rst_path}: {e}")

    print(f"Built database with {len(ref_to_md)} references.")

    # Step 2: Go through all .md files and identify links
    # Link format: [text](ref_id)
    # We want to catch links where ref_id is one of our keys in ref_to_md
    # and it doesn't already look like a path (e.g. doesn't have .md or /)

    # regex for [text](link)
    # This regex is a bit simple but should work for the requested format
    link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')

    for root, dirs, files in os.walk(docs_dir):
        for file in files:
            if file.endswith('.md'):
                md_path = os.path.join(root, file)

                try:
                    with open(md_path, 'r', encoding='utf-8') as f:
                        content = f.read()

                    new_content = content
                    modified = False

                    # Find all matches
                    matches = link_pattern.findall(content)
                    for text, link in matches:
                        if link in ref_to_md:
                            target_md = ref_to_md[link]

                            # Calculate relative path from current md_path to target_md
                            rel_path = os.path.relpath(target_md, root)

                            # Docusaurus usually uses paths without .md or relative paths starting with ./
                            # But standard markdown relative path should work.
                            # Let's see if we should keep .md extension.
                            # If it was [text](doc_gdscript), and doc_gdscript maps to docs/About/gdscript.md
                            # from docs/About/faq.md, the rel_path would be 'gdscript.md'

                            old_link = f'[{text}]({link})'
                            new_link = f'[{text}]({rel_path})'

                            if old_link in new_content:
                                new_content = new_content.replace(old_link, new_link)
                                modified = True
                                print(f"Updated link in {md_path}: {link} -> {rel_path}")

                    if modified:
                        with open(md_path, 'w', encoding='utf-8') as f:
                            f.write(new_content)

                except Exception as e:
                    print(f"Error processing {md_path}: {e}")

if __name__ == '__main__':
    main()