22import re
33
44
5+ DOCS_EXAMPLES_DIR = "docs/source/examples"
6+ DOCS_EXAMPLES_RELATIVE_PREFIX = "../../../"
7+ DOCUMENTATION_IMAGES_BASE_URL = (
8+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/google-cloud"
9+ )
10+ IMAGE_EXTENSIONS = (".png" , ".jpg" , ".jpeg" , ".gif" , ".webp" , ".svg" )
11+
12+
513def process_readme_files ():
614 print ("Processing README.md files from examples/gke and examples/cloud-run..." )
7- os .makedirs ("docs/source/examples" , exist_ok = True )
15+ os .makedirs (DOCS_EXAMPLES_DIR , exist_ok = True )
816
917 for dir in ["gke" , "cloud-run" , "vertex-ai/notebooks" ]:
1018 for root , _ , files in os .walk (f"examples/{ dir } " ):
@@ -13,6 +21,64 @@ def process_readme_files():
1321 process_file (root , file , dir )
1422
1523
24+ def is_relative_image_path (path ):
25+ normalized_path = path .split ("#" , 1 )[0 ].split ("?" , 1 )[0 ]
26+ return normalized_path .startswith ("./" ) and normalized_path .lower ().endswith (IMAGE_EXTENSIONS )
27+
28+
29+ def build_local_image_path (root , image_path ):
30+ return os .path .normpath (os .path .join (root , image_path ))
31+
32+
33+ def build_docs_image_path (local_image_path ):
34+ return f"{ DOCS_EXAMPLES_RELATIVE_PREFIX } { local_image_path } " .replace (os .sep , "/" )
35+
36+
37+ def build_hub_image_url (local_image_path ):
38+ return (
39+ f"{ DOCUMENTATION_IMAGES_BASE_URL } /{ local_image_path } " .replace (os .sep , "/" )
40+ )
41+
42+
43+ def resolve_relative_image_path (root , image_path ):
44+ local_image_path = build_local_image_path (root , image_path )
45+
46+ if os .path .exists (local_image_path ):
47+ return build_docs_image_path (local_image_path )
48+
49+ return build_hub_image_url (local_image_path )
50+
51+
52+ def replace_relative_image_references (content , root ):
53+ def replace_markdown_image (match ):
54+ image_path = match .group ("path" )
55+ if not is_relative_image_path (image_path ):
56+ return match .group (0 )
57+
58+ resolved_path = resolve_relative_image_path (root , image_path )
59+ return f""
60+
61+ def replace_html_image (match ):
62+ image_path = match .group ("path" )
63+ if not is_relative_image_path (image_path ):
64+ return match .group (0 )
65+
66+ resolved_path = resolve_relative_image_path (root , image_path )
67+ return match .group (0 ).replace (image_path , resolved_path , 1 )
68+
69+ content = re .sub (
70+ r"!\[(?P<alt>[^\]]*)\]\((?P<path>[^)]+)\)" ,
71+ replace_markdown_image ,
72+ content ,
73+ )
74+ content = re .sub (
75+ r'(<img\b[^>]*\bsrc=["\'])(?P<path>[^"\']+)(["\'][^>]*>)' ,
76+ replace_html_image ,
77+ content ,
78+ )
79+ return content
80+
81+
1682def process_file (root , file , dir ):
1783 dir_name = dir if not dir .__contains__ ("/" ) else dir .replace ("/" , "-" )
1884
@@ -21,9 +87,9 @@ def process_file(root, file, dir):
2187 base = os .path .basename (subdir )
2288
2389 if file_path == f"examples/{ dir } /README.md" :
24- target = f"docs/source/examples /{ dir_name } -index.mdx"
90+ target = f"{ DOCS_EXAMPLES_DIR } /{ dir_name } -index.mdx"
2591 else :
26- target = f"docs/source/examples /{ dir_name } -{ base } .mdx"
92+ target = f"{ DOCS_EXAMPLES_DIR } /{ dir_name } -{ base } .mdx"
2793
2894 print (f"Processing { file_path } to { target } " )
2995 with open (file_path , "r" ) as f :
@@ -33,23 +99,18 @@ def process_file(root, file, dir):
3399 # We only uncomment the metadata block to avoid uncommenting other HTML comments
34100 content = re .sub (r"<!--\s*(---.*?---)\s*-->" , r"\1" , content , flags = re .DOTALL )
35101
102+ content = replace_relative_image_references (content , root )
103+
36104 # Replace image and link paths
37105 content = re .sub (
38- r"\(\./(imgs|assets)/([^)]*\.png)\)" ,
39- r"(https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/"
40- + root
41- + r"/\1/\2)" ,
42- content ,
43- )
44- content = re .sub (
45- r"\(\.\./([^)]+)\)" ,
106+ r"(?<!!)\(\.\./([^)]+)\)" ,
46107 r"(https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/"
47108 + dir
48109 + r"/\1)" ,
49110 content ,
50111 )
51112 content = re .sub (
52- r"\(\.\/([^)]+)\)" ,
113+ r"(?<!!) \(\.\/([^)]+)\)" ,
53114 r"(https://github.com/huggingface/Google-Cloud-Containers/tree/main/"
54115 + root
55116 + r"/\1)" ,
0 commit comments