Skip to content

Commit e8ae373

Browse files
authored
[consistency] Ensure models are added to the _toctree.yml (#43264)
add check
1 parent c85be98 commit e8ae373

1 file changed

Lines changed: 46 additions & 8 deletions

File tree

utils/check_doc_toc.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
"""
15-
This script is responsible for cleaning the model section of the table of content by removing duplicates and sorting
16-
the entries in alphabetical order.
15+
This script is responsible for ensuring that all model docs are part of the `_toctree.yml` and cleaning the model
16+
section of the table of content by removing duplicates and sorting the entries in alphabetical order.
1717
1818
Usage (from the root of the repo):
1919
@@ -31,12 +31,15 @@
3131
"""
3232

3333
import argparse
34+
import os
3435
from collections import defaultdict
3536

3637
import yaml
3738

3839

39-
PATH_TO_TOC = "docs/source/en/_toctree.yml"
40+
ROOT = os.path.dirname(os.path.dirname(__file__))
41+
TOCTREE_PATH = os.path.join(ROOT, "docs", "source", "en", "_toctree.yml")
42+
DOC_PATH = os.path.join(ROOT, "docs", "source", "en", "model_doc")
4043

4144

4245
def clean_model_doc_toc(model_doc: list[dict]) -> list[dict]:
@@ -75,16 +78,48 @@ def clean_model_doc_toc(model_doc: list[dict]) -> list[dict]:
7578
return sorted(new_doc, key=lambda s: s["title"].lower())
7679

7780

81+
def ensure_all_models_in_toctree(model_doc: list[dict]):
82+
"""Make sure that all models in `model_doc` folder are also part of the `_toctree.yml`. Raise if it's not
83+
the case."""
84+
all_documented_models = {model_doc_file.removesuffix(".md") for model_doc_file in os.listdir(DOC_PATH)} - {"auto"}
85+
all_models_in_toctree = {
86+
model_entry["local"].removeprefix("model_doc/") for section in model_doc for model_entry in section["sections"]
87+
}
88+
89+
# everything alright
90+
if all_documented_models == all_models_in_toctree:
91+
return
92+
93+
documented_but_not_in_toctree = all_documented_models - all_models_in_toctree
94+
in_toctree_but_not_documented = all_models_in_toctree - all_documented_models
95+
96+
error_msg = ""
97+
if len(documented_but_not_in_toctree) > 0:
98+
error_msg += (
99+
f"{documented_but_not_in_toctree} appear(s) inside the folder `model_doc`, but not in the `_toctree.yml`. "
100+
"Please add it/them in their corresponding section inside the `_toctree.yml`."
101+
)
102+
if len(in_toctree_but_not_documented) > 0:
103+
if len(error_msg) > 0:
104+
error_msg += "\n"
105+
error_msg += (
106+
f"{in_toctree_but_not_documented} appear(s) in the `_toctree.yml`, but not inside the folder `model_doc`. "
107+
"Please add a corresponding `model.md` in `model_doc`."
108+
)
109+
110+
raise ValueError(error_msg)
111+
112+
78113
def check_model_doc(overwrite: bool = False):
79114
"""
80-
Check that the content of the table of content in `_toctree.yml` is clean (no duplicates and sorted for the model
81-
API doc) and potentially auto-cleans it.
115+
Check that the content of the table of content in `_toctree.yml` is up-to-date (i.e. it contains all models) and
116+
clean (no duplicates and sorted for the model API doc) and potentially auto-cleans it.
82117
83118
Args:
84119
overwrite (`bool`, *optional*, defaults to `False`):
85120
Whether to just check if the TOC is clean or to auto-clean it (when `overwrite=True`).
86121
"""
87-
with open(PATH_TO_TOC, encoding="utf-8") as f:
122+
with open(TOCTREE_PATH, encoding="utf-8") as f:
88123
content = yaml.safe_load(f.read())
89124

90125
# Get to the API doc
@@ -100,6 +135,9 @@ def check_model_doc(overwrite: bool = False):
100135

101136
model_doc = api_doc[model_idx]["sections"]
102137

138+
# Make sure the toctree contains all models
139+
ensure_all_models_in_toctree(model_doc)
140+
103141
# Extract the modalities and clean them one by one.
104142
modalities_docs = [(idx, section) for idx, section in enumerate(model_doc) if "sections" in section]
105143
diff = False
@@ -116,11 +154,11 @@ def check_model_doc(overwrite: bool = False):
116154
if overwrite:
117155
api_doc[model_idx]["sections"] = model_doc
118156
content[api_idx]["sections"] = api_doc
119-
with open(PATH_TO_TOC, "w", encoding="utf-8") as f:
157+
with open(TOCTREE_PATH, "w", encoding="utf-8") as f:
120158
f.write(yaml.dump(content, allow_unicode=True))
121159
else:
122160
raise ValueError(
123-
"The model doc part of the table of content is not properly sorted, run `make style` to fix this."
161+
"The model doc part of the table of content is not properly sorted, run `make fix-repo` to fix this."
124162
)
125163

126164

0 commit comments

Comments
 (0)