Skip to content

Check copyright html #133341

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
391 changes: 0 additions & 391 deletions COPYRIGHT

This file was deleted.

7,358 changes: 7,358 additions & 0 deletions COPYRIGHT-library.html

Large diffs are not rendered by default.

146,840 changes: 146,840 additions & 0 deletions COPYRIGHT.html

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion REUSE.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ path = [
"config.example.toml",
"configure",
"CONTRIBUTING.md",
"COPYRIGHT",
"COPYRIGHT.html",
"COPYRIGHT-library.html",
"INSTALL.md",
"LICENSE-APACHE",
"LICENSE-MIT",
Expand Down
10 changes: 6 additions & 4 deletions src/bootstrap/src/core/build_steps/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ impl Step for CollectLicenseMetadata {
pub struct GenerateCopyright;

impl Step for GenerateCopyright {
type Output = PathBuf;
type Output = (PathBuf, PathBuf);
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
Expand All @@ -211,17 +211,19 @@ impl Step for GenerateCopyright {
fn run(self, builder: &Builder<'_>) -> Self::Output {
let license_metadata = builder.ensure(CollectLicenseMetadata);

// Temporary location, it will be moved to the proper one once it's accurate.
let dest = builder.out.join("COPYRIGHT.html");
let dest = builder.src.join("COPYRIGHT.html");
let dest_libstd = builder.src.join("COPYRIGHT-library.html");

let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);
cmd.env("DEST", &dest);
cmd.env("DEST_LIBSTD", &dest_libstd);
cmd.env("OUT_DIR", &builder.out);
cmd.env("ONLY_CHECK", "0");
cmd.env("CARGO", &builder.initial_cargo);
cmd.run(builder);

dest
(dest, dest_libstd)
}
}

Expand Down
33 changes: 33 additions & 0 deletions src/bootstrap/src/core/build_steps/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3611,3 +3611,36 @@ impl Step for TestFloatParse {
cargo_run.into_cmd().run(builder);
}
}

#[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)]
pub struct GenerateCopyright;

impl Step for GenerateCopyright {
type Output = ();
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("src/tools/generate-copyright")
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(GenerateCopyright);
}

fn run(self, builder: &Builder<'_>) -> Self::Output {
let license_metadata =
builder.ensure(crate::core::build_steps::run::CollectLicenseMetadata);

let dest = builder.src.join("COPYRIGHT.html");
let dest_libstd = builder.src.join("COPYRIGHT-library.html");

let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);
cmd.env("DEST", &dest);
cmd.env("DEST_LIBSTD", &dest_libstd);
cmd.env("OUT_DIR", &builder.out);
cmd.env("ONLY_CHECK", "1");
cmd.env("CARGO", &builder.initial_cargo);
cmd.run(builder);
}
}
1 change: 1 addition & 0 deletions src/bootstrap/src/core/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,7 @@ impl<'a> Builder<'a> {
test::HtmlCheck,
test::RustInstaller,
test::TestFloatParse,
test::GenerateCopyright,
// Run bootstrap close to the end as it's unlikely to fail
test::Bootstrap,
// Run run-make last, since these won't pass without make on Windows
Expand Down
4 changes: 3 additions & 1 deletion src/ci/docker/host-x86_64/mingw-check/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,6 @@ ENV SCRIPT \
es-check es2019 ../src/librustdoc/html/static/js/*.js && \
eslint -c ../src/librustdoc/html/static/.eslintrc.js ../src/librustdoc/html/static/js/*.js && \
eslint -c ../src/tools/rustdoc-js/.eslintrc.js ../src/tools/rustdoc-js/tester.js && \
eslint -c ../src/tools/rustdoc-gui/.eslintrc.js ../src/tools/rustdoc-gui/tester.js
eslint -c ../src/tools/rustdoc-gui/.eslintrc.js ../src/tools/rustdoc-gui/tester.js && \
# Check our COPYRIGHT files are still OK
python3 ../x.py test generate-copyright
5 changes: 5 additions & 0 deletions src/tools/collect-license-metadata/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ fn main() -> Result<(), Error> {
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();

if dest.exists() {
println!("{} exists, skipping REUSE data collection", dest.display());
return Ok(());
}

let mut interner = LicensesInterner::new();
let paths = crate::reuse::collect(&reuse_exe, &mut interner)?;

Expand Down
15 changes: 9 additions & 6 deletions src/tools/generate-copyright/src/cargo_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,26 @@ pub struct PackageMetadata {

/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data.
///
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
/// This will involve running `cargo vendor` into `vendor_path` so we can
/// grab the license files.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get_metadata_and_notices(
cargo: &Path,
dest: &Path,
vendor_path: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
let mut output = get_metadata(cargo, root_path, manifest_paths)?;

// Now do a cargo-vendor and grab everything
let vendor_path = dest.join("vendor");
println!("Vendoring deps into {}...", vendor_path.display());
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
if vendor_path.exists() {
println!("{} exists, skipping `cargo vendor` call", vendor_path.display());
} else {
// Now do a cargo-vendor and grab everything
println!("{} missing, running `cargo vendor` to populate it", vendor_path.display());
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
}

// Now for each dependency we found, go and grab any important looking files
for (package, metadata) in output.iter_mut() {
Expand Down
193 changes: 167 additions & 26 deletions src/tools/generate-copyright/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,68 +1,138 @@
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};

use anyhow::Error;
use anyhow::{Context, Error};
use rinja::Template;

mod cargo_metadata;

#[derive(Template)]
#[template(path = "COPYRIGHT.html")]
struct CopyrightTemplate {
in_tree: Node,
dependencies: BTreeMap<cargo_metadata::Package, cargo_metadata::PackageMetadata>,
}

/// The entry point to the binary.
///
/// You should probably let `bootstrap` execute this program instead of running it directly.
///
/// Run `x.py run generate-copyright`
/// Run `x.py run generate-copyright` to make the files, or
/// `x.py test generate-copyright` to check the existing files.
fn main() -> Result<(), Error> {
let dest_file = env_path("DEST")?;
let libstd_dest_file = env_path("DEST_LIBSTD")?;
let only_check_existing = env_str("ONLY_CHECK")? == "1";

let out_dir = env_path("OUT_DIR")?;
let cargo = env_path("CARGO")?;
let license_metadata = env_path("LICENSE_METADATA")?;

let collected_tree_metadata: Metadata =
serde_json::from_slice(&std::fs::read(&license_metadata)?)?;

let root_path = std::path::absolute(".")?;
let workspace_paths = [
Path::new("./Cargo.toml"),
Path::new("./src/tools/cargo/Cargo.toml"),
Path::new("./library/Cargo.toml"),
];
let mut collected_cargo_metadata =
cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?;

let stdlib_set =
cargo_metadata::get_metadata(&cargo, &root_path, &[Path::new("./library/std/Cargo.toml")])?;
// Scan Cargo dependencies
let mut collected_cargo_metadata =
cargo_metadata::get_metadata_and_notices(&cargo, &out_dir.join("vendor"), &root_path, &[
Path::new("./Cargo.toml"),
Path::new("./src/tools/cargo/Cargo.toml"),
Path::new("./library/Cargo.toml"),
])?;

let library_collected_cargo_metadata = cargo_metadata::get_metadata_and_notices(
&cargo,
&out_dir.join("library-vendor"),
&root_path,
&[Path::new("./library/Cargo.toml")],
)?;

for (key, value) in collected_cargo_metadata.iter_mut() {
value.is_in_libstd = Some(stdlib_set.contains_key(key));
value.is_in_libstd = Some(library_collected_cargo_metadata.contains_key(key));
}

// Load JSON output by reuse
let collected_tree_metadata: Metadata =
serde_json::from_slice(&std::fs::read(&license_metadata)?)?;

// Find libstd sub-set
let library_collected_tree_metadata = Metadata {
files: collected_tree_metadata
.files
.trim_clone(&Path::new("./library"), &Path::new("."))
.unwrap(),
};

// Output main file
let template = CopyrightTemplate {
in_tree: collected_tree_metadata.files,
dependencies: collected_cargo_metadata,
};
let output = template.render()?;
// Git stores text files with \n, but this file may contain \r\n in files
// copied from dependencies. Normalise them before we write them out, for
// consistency.
let output = output.replace("\r\n", "\n");
if only_check_existing {
std::fs::write(&out_dir.join("temp.html"), &output)?;
check_file_contents(&dest_file, &output)?;
} else {
std::fs::write(&dest_file, &output)?;
}

// Output libstd subset file
let template = LibraryCopyrightTemplate {
in_tree: library_collected_tree_metadata.files,
dependencies: library_collected_cargo_metadata,
};
let output = template.render()?;
// Normalise line endings, as above.
let output = output.replace("\r\n", "\n");
if only_check_existing {
check_file_contents(&libstd_dest_file, &output)?;
} else {
std::fs::write(&libstd_dest_file, &output)?;
}

std::fs::write(&dest_file, output)?;
Ok(())
}

/// Check two files have the same contents
fn check_file_contents(path: &Path, new_contents: &str) -> Result<(), Error> {
let orig_contents = std::fs::read_to_string(&path).with_context(|| {
format!(
"File {} failed to read. Run `x run tools/generate-copyright` to regenerate it.",
path.display()
)
})?;
if orig_contents != new_contents {
anyhow::bail!(
"File {} is out of date. Run `x run tools/generate-copyright` to regenerate it.",
path.display()
);
} else {
println!("File {} is OK", path.display());
}
Ok(())
}

/// The HTML template for the toolchain copyright file
#[derive(Template)]
#[template(path = "COPYRIGHT.html")]
struct CopyrightTemplate {
in_tree: Node,
dependencies: BTreeMap<cargo_metadata::Package, cargo_metadata::PackageMetadata>,
}

/// The HTML template for the library copyright file
#[derive(Template)]
#[template(path = "COPYRIGHT-library.html")]
struct LibraryCopyrightTemplate {
in_tree: Node,
dependencies: BTreeMap<cargo_metadata::Package, cargo_metadata::PackageMetadata>,
}

/// Describes a tree of metadata for our filesystem tree
#[derive(serde::Deserialize)]
///
/// Must match the JSON emitted by the `CollectLicenseMetadata` bootstrap tool.
#[derive(serde::Deserialize, Clone, Debug, PartialEq, Eq)]
struct Metadata {
files: Node,
}

/// Describes one node in our metadata tree
#[derive(serde::Deserialize, rinja::Template)]
#[derive(serde::Deserialize, rinja::Template, Clone, Debug, PartialEq, Eq)]
#[serde(rename_all = "kebab-case", tag = "type")]
#[template(path = "Node.html")]
pub(crate) enum Node {
Expand All @@ -72,8 +142,74 @@ pub(crate) enum Node {
Group { files: Vec<String>, directories: Vec<String>, license: License },
}

impl Node {
/// Clone, this node, but only if the path to the item is within the match path
fn trim_clone(&self, match_path: &Path, parent_path: &Path) -> Option<Node> {
match self {
Node::Root { children } => {
let mut filtered_children = Vec::new();
for node in children {
if let Some(child_node) = node.trim_clone(match_path, parent_path) {
filtered_children.push(child_node);
}
}
if filtered_children.is_empty() {
None
} else {
Some(Node::Root { children: filtered_children })
}
}
Node::Directory { name, children, license } => {
let child_name = parent_path.join(name);
if !(child_name.starts_with(match_path) || match_path.starts_with(&child_name)) {
return None;
}
let mut filtered_children = Vec::new();
for node in children {
if let Some(child_node) = node.trim_clone(match_path, &child_name) {
filtered_children.push(child_node);
}
}
Some(Node::Directory {
name: name.clone(),
children: filtered_children,
license: license.clone(),
})
}
Node::File { name, license } => {
let child_name = parent_path.join(name);
if !(child_name.starts_with(match_path) || match_path.starts_with(&child_name)) {
return None;
}
Some(Node::File { name: name.clone(), license: license.clone() })
}
Node::Group { files, directories, license } => {
let mut filtered_child_files = Vec::new();
for child in files {
let child_name = parent_path.join(child);
if child_name.starts_with(match_path) || match_path.starts_with(&child_name) {
filtered_child_files.push(child.clone());
}
}
let mut filtered_child_dirs = Vec::new();
for child in directories {
let child_name = parent_path.join(child);
if child_name.starts_with(match_path) || match_path.starts_with(&child_name) {
filtered_child_dirs.push(child.clone());
}
}
Some(Node::Group {
files: filtered_child_files,
directories: filtered_child_dirs,
license: license.clone(),
})
}
}
}
}

/// A License has an SPDX license name and a list of copyright holders.
#[derive(serde::Deserialize)]
#[derive(serde::Deserialize, Clone, Debug, PartialEq, Eq)]
struct License {
spdx: String,
copyright: Vec<String>,
Expand All @@ -87,3 +223,8 @@ fn env_path(var: &str) -> Result<PathBuf, Error> {
anyhow::bail!("missing environment variable {var}")
}
}

/// Grab an environment variable as a String, or fail nicely.
fn env_str(var: &str) -> Result<String, Error> {
std::env::var(var).map_err(|_| anyhow::anyhow!("missing environment variable {var}"))
}
Loading
Loading