diff --git a/.gitignore b/.gitignore index db214a9ae..232183b18 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ build/ .*~ +.nox +*.pyc +__pycache__ diff --git a/.travis.yml b/.travis.yml index 1cb311e14..5b8c18230 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python sudo: false python: -- 2.7 -install: pip install sphinx==1.5.6 -script: make SPHINXOPTS="-W" html +- 3.6 +install: pip install --upgrade nox-automation virtualenv +script: nox -s build checklinks diff --git a/nox.py b/nox.py new file mode 100644 index 000000000..4da95ec35 --- /dev/null +++ b/nox.py @@ -0,0 +1,36 @@ +# Copyright 2017, PyPA +# The Python Packaging User Guide is licensed under a Creative Commons +# Attribution-ShareAlike license: +# http://creativecommons.org/licenses/by-sa/3.0. + +import os + +import nox + + +@nox.session +def build(session): + session.interpreter = 'python3.6' + session.install('-r', 'requirements.txt') + # Treat warnings as errors. + session.env['SPHINXOPTS'] = '-W' + session.run('make', 'clean', 'html') + + +def linkmonitor(session, command): + if not os.path.exists(os.path.join('build', 'html')): + session.error('HTML output not available, run nox -s build first.') + session.interpreter = 'python3.6' + session.install('-r', 'scripts/linkmonitor/requirements.txt') + session.run( + 'python', 'scripts/linkmonitor/linkmonitor.py', command) + + +@nox.session +def checklinks(session): + linkmonitor(session, 'check') + + +@nox.session +def updatelinks(session): + linkmonitor(session, 'update') diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..4ee8d0237 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +sphinx==1.5.6 diff --git a/scripts/linkmonitor/inventory.yaml b/scripts/linkmonitor/inventory.yaml new file mode 100644 index 000000000..13d38d44e --- /dev/null +++ b/scripts/linkmonitor/inventory.yaml @@ -0,0 +1,97 @@ +[additional.html, additional.html#additional-topics, appveyor.html, appveyor.html#access-to-the-built-wheels, + appveyor.html#adding-appveyor-support-to-your-project, appveyor.html#additional-notes, + appveyor.html#appveyor-yml, appveyor.html#automatically-uploading-wheels, appveyor.html#background, + appveyor.html#contents, appveyor.html#external-dependencies, appveyor.html#setting-up, + appveyor.html#support-script, appveyor.html#support-scripts, appveyor.html#supporting-windows-using-appveyor, + appveyor.html#testing-with-tox, contribute.html, contribute.html#audience, contribute.html#contribute-to-this-guide, + contribute.html#contributing-style-guide, contribute.html#conventions-and-mechanics, + contribute.html#purpose, contribute.html#scope, contribute.html#style-guide, contribute.html#voice-and-tone, + current.html, current.html#installation-tool-recommendations, current.html#packaging-tool-recommendations, + current.html#tool-recommendations, deployment.html, deployment.html#application-bundles, + deployment.html#application-deployment, deployment.html#configuration-management, + deployment.html#contents, deployment.html#os-packaging-installers, deployment.html#overview, + deployment.html#pynsist, deployment.html#supporting-multiple-hardware-platforms, + deployment.html#windows, distributing.html, distributing.html#author, distributing.html#choosing-a-versioning-scheme, + distributing.html#classifiers, distributing.html#configuring-your-project, distributing.html#console-scripts, + distributing.html#contents, distributing.html#create-an-account, distributing.html#data-files, + distributing.html#date-based-versioning, distributing.html#description, distributing.html#entry-points, + distributing.html#hybrid-schemes, distributing.html#initial-files, distributing.html#install-requires, + distributing.html#keywords, distributing.html#license, distributing.html#local-version-identifiers, + distributing.html#manifest-in, distributing.html#name, distributing.html#package-data, + distributing.html#packages, distributing.html#packaging-and-distributing-projects, + distributing.html#packaging-your-project, distributing.html#platform-wheels, distributing.html#pre-release-versioning, + distributing.html#pure-python-wheels, distributing.html#readme-rst, distributing.html#requirements-for-packaging-and-distributing, + distributing.html#scheme-choices, distributing.html#scripts, distributing.html#semantic-versioning-preferred, + distributing.html#serial-versioning, distributing.html#setup-args, distributing.html#setup-cfg, + distributing.html#setup-name, distributing.html#setup-py, distributing.html#source-distributions, + distributing.html#standards-compliance-for-interoperability, distributing.html#universal-wheels, + distributing.html#upload-your-distributions, distributing.html#uploading-your-project-to-pypi, + distributing.html#url, distributing.html#version, distributing.html#wheels, distributing.html#working-in-development-mode, + distributing.html#your-package, extensions.html, extensions.html#alternatives-for-low-level-system-access, + extensions.html#alternatives-to-handcoded-accelerator-modules, extensions.html#alternatives-to-handcoded-wrapper-modules, + extensions.html#an-overview-of-binary-extensions, extensions.html#binary-extensions, + extensions.html#building-binary-extensions, extensions.html#contents, extensions.html#disadvantages, + extensions.html#implementing-binary-extensions, extensions.html#publishing-binary-extensions, + extensions.html#setting-up-a-build-environment-on-windows, extensions.html#use-cases, + glossary.html, glossary.html#glossary, glossary.html#term-binary-distribution, glossary.html#term-built-distribution, + glossary.html#term-distribution-package, glossary.html#term-egg, glossary.html#term-extension-module, + glossary.html#term-import-package, glossary.html#term-known-good-set-kgs, glossary.html#term-module, + glossary.html#term-package-index, glossary.html#term-per-project-index, glossary.html#term-project, + glossary.html#term-pure-module, glossary.html#term-python-package-index-pypi, glossary.html#term-python-packaging-authority-pypa, + glossary.html#term-release, glossary.html#term-requirement, glossary.html#term-requirement-specifier, + glossary.html#term-requirements-file, glossary.html#term-setup-py, glossary.html#term-source-archive, + glossary.html#term-source-distribution-or-sdist, glossary.html#term-system-package, + glossary.html#term-version-specifier, glossary.html#term-virtual-environment, glossary.html#term-wheel, + glossary.html#term-working-set, index.html, index.html#python-packaging-user-guide, + install_requirements_linux.html, install_requirements_linux.html#arch-linux, install_requirements_linux.html#centos-rhel, + install_requirements_linux.html#debian-ubuntu, install_requirements_linux.html#fedora, + install_requirements_linux.html#installing-pip-setuptools-wheel-with-linux-package-managers, + install_requirements_linux.html#opensuse, installing.html, installing.html#contents, + installing.html#creating-and-using-virtual-environments, installing.html#creating-virtual-environments, + installing.html#install-pip-setuptools-and-wheel, installing.html#installing-from-a-local-src-tree, + installing.html#installing-from-local-archives, installing.html#installing-from-other-indexes, + installing.html#installing-from-other-sources, installing.html#installing-from-pypi, + installing.html#installing-from-vcs, installing.html#installing-packages, installing.html#installing-prereleases, + installing.html#installing-requirements, installing.html#installing-setuptools-extras, + installing.html#installing-to-the-user-site, installing.html#optionally-create-a-virtual-environment, + installing.html#requirements-files, installing.html#requirements-for-installing-packages, + installing.html#source-distributions-vs-wheels, installing.html#upgrading-packages, + installing.html#use-pip-for-installing, key_projects.html, key_projects.html#bandersnatch, + key_projects.html#bento, key_projects.html#buildout, key_projects.html#conda, key_projects.html#devpi, + key_projects.html#distlib, key_projects.html#distutils, key_projects.html#easy-install, + key_projects.html#ensurepip, key_projects.html#hashdist, key_projects.html#non-pypa-projects, + key_projects.html#packaging, key_projects.html#pex, key_projects.html#pip, key_projects.html#project-summaries, + key_projects.html#projects, key_projects.html#pypa-projects, key_projects.html#python-packaging-user-guide, + key_projects.html#setuptools, key_projects.html#spack, key_projects.html#standard-library-projects, + key_projects.html#twine, key_projects.html#venv, key_projects.html#virtualenv, key_projects.html#warehouse, + key_projects.html#wheel, mirrors.html, mirrors.html#caching-with-devpi, mirrors.html#caching-with-pip, + mirrors.html#complete-mirror-with-bandersnatch, mirrors.html#contents, mirrors.html#pypi-mirrors-and-caches, + multi_version_install.html, multi_version_install.html#multi-version-installs, multiple_python_versions.html, + multiple_python_versions.html#automated-testing-and-continuous-integration, multiple_python_versions.html#contents, + multiple_python_versions.html#supporting-multiple-python-versions, multiple_python_versions.html#tools-for-single-source-python-packages, + multiple_python_versions.html#what-s-in-which-python, namespace_packages.html, namespace_packages.html#creating-a-namespace-package, + namespace_packages.html#native-namespace-packages, namespace_packages.html#packaging-namespace-packages, + namespace_packages.html#pkg-resources-style-namespace-packages, namespace_packages.html#pkgutil-style-namespace-packages, + patching.html, patching.html#patching-forking, pip_easy_install.html, pip_easy_install.html#pip-vs-easy-install, + platforms.html, platforms.html#platform-integtation, plugin_discovery.html, plugin_discovery.html#plugin-creation-and-discovery, + plugin_discovery.html#using-namespace-packages, plugin_discovery.html#using-naming-convention, + plugin_discovery.html#using-package-metadata, quickstart.html, quickstart.html#quickstart, + requirements.html, requirements.html#contents, requirements.html#install-requires, + requirements.html#install-requires-vs-requirements-files, requirements.html#requirements-files, + science.html, science.html#building-from-source, science.html#contents, science.html#installing-scientific-packages, + science.html#linux-distribution-packages, science.html#mac-os-x-installers-and-package-managers, + science.html#numpy-and-the-science-stack, science.html#scipy-distributions, science.html#spack, + science.html#the-conda-cross-platform-package-manager, science.html#windows-installers, + search.html, search.html#fallback, search.html#search-documentation, search.html#search-progress, + search.html#search-results, search.html#searchindexloader, self_hosted_repository.html, + self_hosted_repository.html#hosting-your-own-simple-repository, self_hosted_repository.html#manual-repository, + single_source_version.html, single_source_version.html#single-sourcing-the-project-version, + single_source_version.html#single-sourcing-the-version, specifications.html, specifications.html#binary-distribution-format, + specifications.html#core-metadata, specifications.html#declaring-build-system-dependencies, + specifications.html#dependency-specifiers, specifications.html#description-content-type, + specifications.html#package-distribution-metadata, specifications.html#package-index-interfaces, + specifications.html#platform-compatibility-tags, specifications.html#provides-extra-multiple-use, + specifications.html#pypa-specifications, specifications.html#recording-installed-distributions, + specifications.html#simple-repository-api, specifications.html#source-distribution-format, + specifications.html#specifications, specifications.html#version-specifiers, support.html, + support.html#how-to-get-support, tutorial.html, wheel_egg.html, wheel_egg.html#wheel-vs-egg] diff --git a/scripts/linkmonitor/linkmonitor.py b/scripts/linkmonitor/linkmonitor.py new file mode 100644 index 000000000..fa3cfe839 --- /dev/null +++ b/scripts/linkmonitor/linkmonitor.py @@ -0,0 +1,190 @@ +# Copyright 2017, PyPA +# The Python Packaging User Guide is licensed under a Creative Commons +# Attribution-ShareAlike license: +# http://creativecommons.org/licenses/by-sa/3.0. + +import argparse +from glob import glob +import io +import os +import sys + +from bs4 import BeautifulSoup +import yaml + +HERE = os.path.abspath(os.path.dirname(__file__)) +INVENTORY_FILENAME = os.path.join(HERE, 'inventory.yaml') +REDIRECTS_FILENAME = os.path.join(HERE, 'redirects.yaml') +ROOT = os.path.abspath(os.path.join(HERE, '..', '..')) +HTML_DIR = os.path.join(ROOT, 'build', 'html') +IGNORED_FILES = [ + 'genindex.html' +] + + +def find_all_named_anchors(filename): + links = set() + + with io.open(filename, 'r') as html_file: + soup = BeautifulSoup(html_file, 'html.parser') + + for tag in soup.find_all(id=True): + anchor = tag['id'] + # Ignore non-named IDs. + if anchor.startswith('id'): + continue + # Ignore index anchors + if anchor.startswith('index-'): + continue + # Ignore searchbox anchors + if anchor == 'searchbox': + continue + + href = '{}#{}'.format(filename, anchor) + links.add(href) + + return links + + +def find_all_named_anchors_in_files(files): + links = set() + + for filename in files: + links.add(filename) + anchors = find_all_named_anchors(filename) + links.update(anchors) + + return links + + +def find_links(): + files = glob('**/*.html', recursive=True) + files = filter(lambda name: name not in IGNORED_FILES, files) + return find_all_named_anchors_in_files(files) + + +def load_inventory(): + if not os.path.exists(INVENTORY_FILENAME): + return set() + with io.open(INVENTORY_FILENAME, 'r') as inventory_file: + return set(yaml.load(inventory_file)) + + +def save_inventory(inventory): + with io.open(INVENTORY_FILENAME, 'w') as inventory_file: + yaml.dump(sorted(list(inventory)), inventory_file) + + +def load_redirects(): + with io.open(REDIRECTS_FILENAME, 'r') as redirects_file: + return yaml.load(redirects_file) + + +def expand_redirects(redirects, inventory): + valid_redirects = set() + missing_redirects = set() + + for redirect in redirects: + from_ = redirect['from'] + source_links = set() + + # Get all links that start with the page. This gathers all deep links. + # For example, the redirect may be old.html -> new.html. old.html may + # have had #1, #2, #3. We need to get all of those deep links. + for link in inventory: + if link.startswith(from_): + source_links.add(link) + + # Make sure all of the source links have a counterpart in the + # destination page. For the example above, new.html needs to have #1 + # #2 and #3 as well. + for source_link in source_links: + dest_link = source_link.replace(from_, redirect['to']) + if dest_link in inventory: + valid_redirects.add(source_link) + else: + missing_redirects.add((source_link, dest_link)) + + return valid_redirects, missing_redirects + + +def update_command(args): + """Updates the current inventory of links with any new links added. + + This should be run after adding new documentation to make a record of new + items added. + """ + os.chdir(HTML_DIR) + + inventory = load_inventory() + links = find_links() + + new_links = links.difference(inventory) + print('Found {} new links.'.format(len(new_links))) + + inventory.update(links) + save_inventory(inventory) + + return 0 + + +def check_command(args): + """Checks the current set of links against the inventory. + + This should be run on every documentation change to ensure that no deep + links have been broken and that new links are tracked in the inventory. + """ + os.chdir(HTML_DIR) + + # TODO: Add another file to list currently defined redirects. + inventory = load_inventory() + redirects = load_redirects() + links = find_links() + + valid_redirects, missing_redirects = expand_redirects(redirects, inventory) + if missing_redirects: + print( + 'The following redirects are missing deep link anchors in the ' + 'destination:') + for source, dest in missing_redirects: + print(' * {} -> {}'.format(source, dest)) + + missing_links = inventory.difference(links) + missing_links -= valid_redirects + + if missing_links: + print('Missing the following deep links:') + for link in missing_links: + print(' * {}'.format(link)) + return 1 + + new_links = links.difference(inventory) + + if new_links: + print('The following new deep links were added:') + for link in new_links: + print(' * {}'.format(link)) + print('Run nox -s updatelinks to update them in git.') + return 2 + + print('All is well') + return 0 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + update_parser = subparsers.add_parser( + 'update', help=update_command.__doc__) + update_parser.set_defaults(func=update_command) + check_parser = subparsers.add_parser( + 'check', help=check_command.__doc__) + check_parser.set_defaults(func=check_command) + + args = parser.parse_args() + + if not hasattr(args, 'func'): + parser.print_help() + sys.exit(1) + + sys.exit(args.func(args)) diff --git a/scripts/linkmonitor/redirects.yaml b/scripts/linkmonitor/redirects.yaml new file mode 100644 index 000000000..87eaffea6 --- /dev/null +++ b/scripts/linkmonitor/redirects.yaml @@ -0,0 +1,7 @@ +# Unfortunately, readthedocs doesn't allow us to specify redirects in yaml. +# They have to be individually added in the readthedocs UI. This file should +# match the configuration of page redirects in the UI. +# See also: https://github.com/rtfd/readthedocs.org/issues/2904 + +- from: old.html + to: new.html diff --git a/scripts/linkmonitor/requirements.txt b/scripts/linkmonitor/requirements.txt new file mode 100644 index 000000000..0aed6e7b2 --- /dev/null +++ b/scripts/linkmonitor/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4==4.6.0 +PyYAML==3.12