Skip to content

Do not rebuild when it's not needed (like there's no updates). #171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 20, 2023
174 changes: 120 additions & 54 deletions build_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@
import sys
from bisect import bisect_left as bisect
from collections import OrderedDict
from datetime import datetime as dt, timezone
from pathlib import Path
from string import Template
from textwrap import indent
from time import perf_counter, sleep
from typing import Iterable
from urllib.parse import urljoin

Expand Down Expand Up @@ -246,8 +248,6 @@ def run(cmd, cwd=None) -> subprocess.CompletedProcess:
cmdstring,
indent("\n".join(result.stdout.split("\n")[-20:]), " "),
)
else:
logging.debug("Run: %r OK", cmdstring)
result.check_returncode()
return result

Expand Down Expand Up @@ -292,7 +292,13 @@ def get_ref(self, pattern):
return self.run("show-ref", "-s", "tags/" + pattern).stdout.strip()

def fetch(self):
self.run("fetch")
"""Try (and retry) to run git fetch."""
try:
return self.run("fetch")
except subprocess.CalledProcessError as err:
logging.error("'git fetch' failed (%s), retrying...", err.stderr)
sleep(5)
return self.run("fetch")

def switch(self, branch_or_tag):
"""Reset and cleans the repository to the given branch or tag."""
Expand Down Expand Up @@ -354,20 +360,6 @@ def locate_nearest_version(available_versions, target_version):
return tuple_to_version(found)


def translation_branch(repo: Repository, needed_version: str):
"""Some cpython versions may be untranslated, being either too old or
too new.

This function looks for remote branches on the given repo, and
returns the name of the nearest existing branch.

It could be enhanced to also search for tags.
"""
remote_branches = repo.run("branch", "-r").stdout
branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M)
return locate_nearest_version(branches, needed_version)


@contextmanager
def edit(file: Path):
"""Context manager to edit a file "in place", use it as:
Expand Down Expand Up @@ -612,11 +604,15 @@ def parse_args():
def setup_logging(log_directory: Path):
"""Setup logging to stderr if ran by a human, or to a file if ran from a cron."""
if sys.stderr.isatty():
logging.basicConfig(format="%(levelname)s:%(message)s", stream=sys.stderr)
logging.basicConfig(
format="%(asctime)s %(levelname)s: %(message)s", stream=sys.stderr
)
else:
log_directory.mkdir(parents=True, exist_ok=True)
handler = logging.handlers.WatchedFileHandler(log_directory / "docsbuild.log")
handler.setFormatter(logging.Formatter("%(levelname)s:%(asctime)s:%(message)s"))
handler.setFormatter(
logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
)
logging.getLogger().addHandler(handler)
logging.getLogger().setLevel(logging.DEBUG)

Expand Down Expand Up @@ -652,19 +648,19 @@ def full_build(self):

def run(self) -> bool:
"""Build and publish a Python doc, for a language, and a version."""
start_time = perf_counter()
logging.info("Running.")
try:
self.cpython_repo.switch(self.version.branch_or_tag)
if self.language.tag != "en":
self.clone_translation()
self.build_venv()
self.build()
self.copy_build_to_webroot()
if self.should_rebuild():
self.build_venv()
self.build()
self.copy_build_to_webroot()
self.save_state(build_duration=perf_counter() - start_time)
except Exception as err:
logging.exception(
"Exception while building %s version %s",
self.language.tag,
self.version.name,
)
logging.exception("Badly handled exception, human, please help.")
if sentry_sdk:
sentry_sdk.capture_exception(err)
return False
Expand All @@ -676,10 +672,13 @@ def checkout(self) -> Path:
return self.build_root / "cpython"

def clone_translation(self):
"""Clone the translation repository from github.
self.translation_repo.update()
self.translation_repo.switch(self.translation_branch)

@property
def translation_repo(self):
"""See PEP 545 for translations repository naming convention."""

See PEP 545 for repository naming convention.
"""
locale_repo = f"https://github.com/python/python-docs-{self.language.tag}.git"
locale_clone_dir = (
self.build_root
Expand All @@ -688,17 +687,25 @@ def clone_translation(self):
/ self.language.iso639_tag
/ "LC_MESSAGES"
)
repo = Repository(locale_repo, locale_clone_dir)
repo.update()
repo.switch(translation_branch(repo, self.version.name))
return Repository(locale_repo, locale_clone_dir)

@property
def translation_branch(self):
"""Some cpython versions may be untranslated, being either too old or
too new.

This function looks for remote branches on the given repo, and
returns the name of the nearest existing branch.

It could be enhanced to also search for tags.
"""
remote_branches = self.translation_repo.run("branch", "-r").stdout
branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M)
return locate_nearest_version(branches, self.version.name)

def build(self):
"""Build this version/language doc."""
logging.info(
"Build start for version: %s, language: %s",
self.version.name,
self.language.tag,
)
logging.info("Build start.")
sphinxopts = list(self.language.sphinxopts)
sphinxopts.extend(["-q"])
if self.language.tag != "en":
Expand Down Expand Up @@ -774,11 +781,7 @@ def build(self):
setup_switchers(
self.versions, self.languages, self.checkout / "Doc" / "build" / "html"
)
logging.info(
"Build done for version: %s, language: %s",
self.version.name,
self.language.tag,
)
logging.info("Build done.")

def build_venv(self):
"""Build a venv for the specific Python version.
Expand All @@ -799,11 +802,7 @@ def build_venv(self):

def copy_build_to_webroot(self):
"""Copy a given build to the appropriate webroot with appropriate rights."""
logging.info(
"Publishing start for version: %s, language: %s",
self.version.name,
self.language.tag,
)
logging.info("Publishing start.")
self.www_root.mkdir(parents=True, exist_ok=True)
if self.language.tag == "en":
target = self.www_root / self.version.name
Expand Down Expand Up @@ -873,7 +872,7 @@ def copy_build_to_webroot(self):
]
)
if self.full_build:
logging.debug("Copying dist files")
logging.debug("Copying dist files.")
run(
[
"chown",
Expand Down Expand Up @@ -916,11 +915,69 @@ def copy_build_to_webroot(self):
purge(*prefixes)
for prefix in prefixes:
purge(*[prefix + p for p in changed])
logging.info(
"Publishing done for version: %s, language: %s",
self.version.name,
self.language.tag,
)
logging.info("Publishing done")

def should_rebuild(self):
state = self.load_state()
if not state:
logging.info("Should rebuild: no previous state found.")
return True
cpython_sha = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip()
if self.language.tag != "en":
translation_sha = self.translation_repo.run(
"rev-parse", "HEAD"
).stdout.strip()
if translation_sha != state["translation_sha"]:
logging.info(
"Should rebuild: new translations (from %s to %s)",
state["translation_sha"],
translation_sha,
)
return True
if cpython_sha != state["cpython_sha"]:
diff = self.cpython_repo.run(
"diff", "--name-only", state["cpython_sha"], cpython_sha
).stdout
if "Doc/" in diff:
logging.info(
"Should rebuild: Doc/ has changed (from %s to %s)",
state["cpython_sha"],
cpython_sha,
)
return True
logging.info("Nothing changed, no rebuild needed.")
return False

def load_state(self) -> dict:
state_file = self.build_root / "state.toml"
try:
return tomlkit.loads(state_file.read_text(encoding="UTF-8"))[
f"/{self.language.tag}/{self.version.name}/"
]
except KeyError:
return {}

def save_state(self, build_duration: float):
"""Save current cpython sha1 and current translation sha1.

Using this we can deduce if a rebuild is needed or not.
"""
state_file = self.build_root / "state.toml"
try:
states = tomlkit.parse(state_file.read_text(encoding="UTF-8"))
except FileNotFoundError:
states = tomlkit.document()

state = {}
state["cpython_sha"] = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip()
if self.language.tag != "en":
state["translation_sha"] = self.translation_repo.run(
"rev-parse", "HEAD"
).stdout.strip()
state["last_build"] = dt.now(timezone.utc)
state["last_build_duration"] = build_duration
states[f"/{self.language.tag}/{self.version.name}/"] = state
state_file.write_text(tomlkit.dumps(states), encoding="UTF-8")


def symlink(www_root: Path, language: Language, directory: str, name: str, group: str):
Expand Down Expand Up @@ -1063,6 +1120,11 @@ def build_docs(args) -> bool:
cpython_repo.update()
while todo:
version, language = todo.pop()
logging.root.handlers[0].setFormatter(
logging.Formatter(
f"%(asctime)s %(levelname)s {language.tag}/{version.name}: %(message)s"
)
)
if sentry_sdk:
with sentry_sdk.configure_scope() as scope:
scope.set_tag("version", version.name)
Expand All @@ -1071,6 +1133,10 @@ def build_docs(args) -> bool:
version, versions, language, languages, cpython_repo, **vars(args)
)
all_built_successfully &= builder.run()
logging.root.handlers[0].setFormatter(
logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
)

build_sitemap(versions, languages, args.www_root, args.group)
build_404(args.www_root, args.group)
build_robots_txt(
Expand Down