Skip to content

Add new tool to check HTML #86059

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1578,6 +1578,13 @@ dependencies = [
"winapi 0.3.9",
]

[[package]]
name = "html-checker"
version = "0.1.0"
dependencies = [
"walkdir",
]

[[package]]
name = "html5ever"
version = "0.25.1"
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ members = [
"src/tools/unicode-table-generator",
"src/tools/expand-yaml-anchors",
"src/tools/jsondocck",
"src/tools/html-checker",
]

exclude = [
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ impl<'a> Builder<'a> {
test::RustdocTheme,
test::RustdocUi,
test::RustdocJson,
test::HtmlCheck,
// Run bootstrap close to the end as it's unlikely to fail
test::Bootstrap,
// Run run-make last, since these won't pass without make on Windows
Expand Down
4 changes: 2 additions & 2 deletions src/bootstrap/doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,8 @@ impl Step for Std {

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Rustc {
stage: u32,
target: TargetSelection,
pub stage: u32,
pub target: TargetSelection,
}

impl Step for Rustc {
Expand Down
45 changes: 44 additions & 1 deletion src/bootstrap/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::fmt;
use std::fs;
use std::iter;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::process::{Command, Stdio};

use build_helper::{self, output, t};

Expand Down Expand Up @@ -161,6 +161,49 @@ You can skip linkcheck with --exclude src/tools/linkchecker"
}
}

fn check_if_tidy_is_installed() -> bool {
Command::new("tidy")
.arg("--version")
.stdout(Stdio::null())
.status()
.map_or(false, |status| status.success())
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct HtmlCheck {
target: TargetSelection,
}

impl Step for HtmlCheck {
type Output = ();
const DEFAULT: bool = true;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
let run = run.path("src/tools/html-checker");
run.lazy_default_condition(Box::new(check_if_tidy_is_installed))
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(HtmlCheck { target: run.target });
}

fn run(self, builder: &Builder<'_>) {
if !check_if_tidy_is_installed() {
eprintln!("not running HTML-check tool because `tidy` is missing");
eprintln!(
"Note that `tidy` is not the in-tree `src/tools/tidy` but needs to be installed"
);
panic!("Cannot run html-check tests");
}
// Ensure that a few different kinds of documentation are available.
builder.default_doc(&[]);
builder.ensure(crate::doc::Rustc { target: self.target, stage: builder.top_stage });

try_run(builder, builder.tool_cmd(Tool::HtmlChecker).arg(builder.doc_out(self.target)));
}
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct Cargotest {
stage: u32,
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ bootstrap_tool!(
ExpandYamlAnchors, "src/tools/expand-yaml-anchors", "expand-yaml-anchors";
LintDocs, "src/tools/lint-docs", "lint-docs";
JsonDocCk, "src/tools/jsondocck", "jsondocck";
HtmlChecker, "src/tools/html-checker", "html-checker";
);

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
Expand Down
3 changes: 2 additions & 1 deletion src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgl1-mesa-dev \
llvm-dev \
libfreetype6-dev \
libexpat1-dev
libexpat1-dev \
tidy

COPY scripts/sccache.sh /scripts/
RUN sh /scripts/sccache.sh
Expand Down
3 changes: 2 additions & 1 deletion src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
libssl-dev \
sudo \
xz-utils
xz-utils \
tidy

# Install dependencies for chromium browser
RUN apt-get install -y \
Expand Down
12 changes: 12 additions & 0 deletions src/tools/html-checker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "html-checker"
version = "0.1.0"
authors = ["Guillaume Gomez <[email protected]>"]
edition = "2018"

[[bin]]
name = "html-checker"
path = "main.rs"

[dependencies]
walkdir = "2"
93 changes: 93 additions & 0 deletions src/tools/html-checker/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use std::env;
use std::path::Path;
use std::process::{Command, Output};

fn check_html_file(file: &Path) -> usize {
let to_mute = &[
// "disabled" on <link> or "autocomplete" on <select> emit this warning
"PROPRIETARY_ATTRIBUTE",
// It complains when multiple in the same page link to the same anchor for some reason...
"ANCHOR_NOT_UNIQUE",
// If a <span> contains only HTML elements and no text, it complains about it.
"TRIM_EMPTY_ELEMENT",
// FIXME: the three next warnings are about <pre> elements which are not supposed to
// contain HTML. The solution here would be to replace them with a <div>
"MISSING_ENDTAG_BEFORE",
"INSERTING_TAG",
"DISCARDING_UNEXPECTED",
];
let to_mute_s = to_mute.join(",");
let mut command = Command::new("tidy");
command
.arg("-errors")
.arg("-quiet")
.arg("--mute-id") // this option is useful in case we want to mute more warnings
.arg("yes")
.arg("--mute")
.arg(&to_mute_s)
.arg(file);

let Output { status, stderr, .. } = command.output().expect("failed to run tidy command");
if status.success() {
0
} else {
let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed...");
if stderr.is_empty() && status.code() != Some(2) {
0
} else {
eprintln!(
"=> Errors for `{}` (error code: {}) <=",
file.display(),
status.code().unwrap_or(-1)
);
eprintln!("{}", stderr);
stderr.lines().count()
}
}
}

const DOCS_TO_CHECK: &[&str] =
&["alloc", "core", "proc_macro", "implementors", "src", "std", "test"];

// Returns the number of files read and the number of errors.
fn find_all_html_files(dir: &Path) -> (usize, usize) {
let mut files_read = 0;
let mut errors = 0;

for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| {
e.depth() != 1
|| e.file_name()
.to_str()
.map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
.unwrap_or(false)
}) {
let entry = entry.expect("failed to read file");
if !entry.file_type().is_file() {
continue;
}
let entry = entry.path();
if entry.extension().and_then(|s| s.to_str()) == Some("html") {
errors += check_html_file(&entry);
files_read += 1;
}
}
(files_read, errors)
}

fn main() -> Result<(), String> {
let args = env::args().collect::<Vec<_>>();
if args.len() != 2 {
return Err(format!("Usage: {} <doc folder>", args[0]));
}

println!("Running HTML checker...");

let (files_read, errors) = find_all_html_files(&Path::new(&args[1]));
println!("Done! Read {} files...", files_read);
if errors > 0 {
Err(format!("HTML check failed: {} errors", errors))
} else {
println!("No error found!");
Ok(())
}
}