1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414"""
15- This script is responsible for cleaning the model section of the table of content by removing duplicates and sorting
16- the entries in alphabetical order.
15+ This script is responsible for ensuring that all model docs are part of the `_toctree.yml` and cleaning the model
16+ section of the table of content by removing duplicates and sorting the entries in alphabetical order.
1717
1818Usage (from the root of the repo):
1919
3131"""
3232
3333import argparse
34+ import os
3435from collections import defaultdict
3536
3637import yaml
3738
3839
39- PATH_TO_TOC = "docs/source/en/_toctree.yml"
40+ ROOT = os .path .dirname (os .path .dirname (__file__ ))
41+ TOCTREE_PATH = os .path .join (ROOT , "docs" , "source" , "en" , "_toctree.yml" )
42+ DOC_PATH = os .path .join (ROOT , "docs" , "source" , "en" , "model_doc" )
4043
4144
4245def clean_model_doc_toc (model_doc : list [dict ]) -> list [dict ]:
@@ -75,16 +78,48 @@ def clean_model_doc_toc(model_doc: list[dict]) -> list[dict]:
7578 return sorted (new_doc , key = lambda s : s ["title" ].lower ())
7679
7780
81+ def ensure_all_models_in_toctree (model_doc : list [dict ]):
82+ """Make sure that all models in `model_doc` folder are also part of the `_toctree.yml`. Raise if it's not
83+ the case."""
84+ all_documented_models = {model_doc_file .removesuffix (".md" ) for model_doc_file in os .listdir (DOC_PATH )} - {"auto" }
85+ all_models_in_toctree = {
86+ model_entry ["local" ].removeprefix ("model_doc/" ) for section in model_doc for model_entry in section ["sections" ]
87+ }
88+
89+ # everything alright
90+ if all_documented_models == all_models_in_toctree :
91+ return
92+
93+ documented_but_not_in_toctree = all_documented_models - all_models_in_toctree
94+ in_toctree_but_not_documented = all_models_in_toctree - all_documented_models
95+
96+ error_msg = ""
97+ if len (documented_but_not_in_toctree ) > 0 :
98+ error_msg += (
99+ f"{ documented_but_not_in_toctree } appear(s) inside the folder `model_doc`, but not in the `_toctree.yml`. "
100+ "Please add it/them in their corresponding section inside the `_toctree.yml`."
101+ )
102+ if len (in_toctree_but_not_documented ) > 0 :
103+ if len (error_msg ) > 0 :
104+ error_msg += "\n "
105+ error_msg += (
106+ f"{ in_toctree_but_not_documented } appear(s) in the `_toctree.yml`, but not inside the folder `model_doc`. "
107+ "Please add a corresponding `model.md` in `model_doc`."
108+ )
109+
110+ raise ValueError (error_msg )
111+
112+
78113def check_model_doc (overwrite : bool = False ):
79114 """
80- Check that the content of the table of content in `_toctree.yml` is clean (no duplicates and sorted for the model
81- API doc) and potentially auto-cleans it.
115+ Check that the content of the table of content in `_toctree.yml` is up-to-date (i.e. it contains all models) and
116+ clean (no duplicates and sorted for the model API doc) and potentially auto-cleans it.
82117
83118 Args:
84119 overwrite (`bool`, *optional*, defaults to `False`):
85120 Whether to just check if the TOC is clean or to auto-clean it (when `overwrite=True`).
86121 """
87- with open (PATH_TO_TOC , encoding = "utf-8" ) as f :
122+ with open (TOCTREE_PATH , encoding = "utf-8" ) as f :
88123 content = yaml .safe_load (f .read ())
89124
90125 # Get to the API doc
@@ -100,6 +135,9 @@ def check_model_doc(overwrite: bool = False):
100135
101136 model_doc = api_doc [model_idx ]["sections" ]
102137
138+ # Make sure the toctree contains all models
139+ ensure_all_models_in_toctree (model_doc )
140+
103141 # Extract the modalities and clean them one by one.
104142 modalities_docs = [(idx , section ) for idx , section in enumerate (model_doc ) if "sections" in section ]
105143 diff = False
@@ -116,11 +154,11 @@ def check_model_doc(overwrite: bool = False):
116154 if overwrite :
117155 api_doc [model_idx ]["sections" ] = model_doc
118156 content [api_idx ]["sections" ] = api_doc
119- with open (PATH_TO_TOC , "w" , encoding = "utf-8" ) as f :
157+ with open (TOCTREE_PATH , "w" , encoding = "utf-8" ) as f :
120158 f .write (yaml .dump (content , allow_unicode = True ))
121159 else :
122160 raise ValueError (
123- "The model doc part of the table of content is not properly sorted, run `make style ` to fix this."
161+ "The model doc part of the table of content is not properly sorted, run `make fix-repo ` to fix this."
124162 )
125163
126164
0 commit comments