From 29f1257e3584fe0c57fbf203f5a78c7419a648f8 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Fri, 5 Mar 2021 11:41:26 -0500 Subject: [PATCH 1/2] Don't hard-code essential rustdoc files This avoids having to constantly update docs.rs when new files are added. --- src/docbuilder/rustwide_builder.rs | 79 ++++++++++++++---------------- 1 file changed, 36 insertions(+), 43 deletions(-) diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 541c0f460..574830c7f 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -14,47 +14,16 @@ use docsrs_metadata::{Metadata, DEFAULT_TARGETS, HOST_TARGET}; use failure::ResultExt; use log::{debug, info, warn, LevelFilter}; use postgres::Client; -use rustwide::cmd::{Command, SandboxBuilder, SandboxImage}; +use rustwide::cmd::{Binary, Command, SandboxBuilder, SandboxImage}; use rustwide::logging::{self, LogStorage}; use rustwide::toolchain::ToolchainError; use rustwide::{Build, Crate, Toolchain, Workspace, WorkspaceBuilder}; use serde_json::Value; use std::collections::{HashMap, HashSet}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; const USER_AGENT: &str = "docs.rs builder (https://github.com/rust-lang/docs.rs)"; -const ESSENTIAL_FILES_VERSIONED: &[&str] = &[ - "brush.svg", - "favicon.svg", - "wheel.svg", - "down-arrow.svg", - "dark.css", - "light.css", - "ayu.css", - "main.js", - "normalize.css", - "rustdoc.css", - "settings.css", - "settings.js", - "storage.js", - "theme.js", - "source-script.js", - "noscript.css", - "rust-logo.png", -]; -const ESSENTIAL_FILES_UNVERSIONED: &[&str] = &[ - "FiraSans-Medium.woff", - "FiraSans-Medium.woff2", - "FiraSans-Regular.woff", - "FiraSans-Regular.woff2", - "SourceCodePro-Regular.woff", - "SourceCodePro-Semibold.woff", - "SourceSerifPro-Bold.ttf.woff", - "SourceSerifPro-Regular.ttf.woff", - "SourceSerifPro-It.ttf.woff", -]; - const DUMMY_CRATE_NAME: &str = "empty-library"; const DUMMY_CRATE_VERSION: &str = "1.0.0"; @@ -213,6 +182,20 @@ impl RustwideBuilder { let krate = Crate::crates_io(DUMMY_CRATE_NAME, DUMMY_CRATE_VERSION); krate.fetch(&self.workspace)?; + // TODO: remove this when https://github.com/rust-lang/rustwide/pull/53 lands. + struct Rustdoc<'a> { + toolchain_version: &'a str, + } + impl rustwide::cmd::Runnable for Rustdoc<'_> { + fn name(&self) -> Binary { + Binary::ManagedByRustwide(PathBuf::from("rustdoc")) + } + + fn prepare_command<'w, 'pl>(&self, cmd: Command<'w, 'pl>) -> Command<'w, 'pl> { + cmd.args(&[format!("+{}", self.toolchain_version)]) + } + } + build_dir .build(&self.toolchain, &krate, self.prepare_sandbox(&limits)) .run(|build| { @@ -229,19 +212,29 @@ impl RustwideBuilder { .prefix("essential-files") .tempdir()?; - let files = ESSENTIAL_FILES_VERSIONED + let toolchain_version = self.toolchain.as_dist().unwrap().name(); + let output = build.cmd(Rustdoc { toolchain_version }) + .args(&["-Zunstable-options", "--print=unversioned-files"]) + .run_capture() + .context("failed to learn about unversioned files - make sure you have nightly-2021-03-07 or later")?; + let essential_files_unversioned = output + .stdout_lines() .iter() - .map(|f| (f, true)) - .chain(ESSENTIAL_FILES_UNVERSIONED.iter().map(|f| (f, false))); - for (&file, versioned) in files { - let segments = file.rsplitn(2, '.').collect::>(); - let file_name = if versioned { - format!("{}-{}.{}", segments[1], rustc_version, segments[0]) - } else { - file.to_string() - }; + .map(PathBuf::from); + let resource_suffix = format!("-{}", parse_rustc_version(&self.rustc_version)?); + let essential_files_versioned: Vec<_> = source.read_dir()? + .collect::, _>>()? + .into_iter() + .filter_map(|entry| { + entry.file_name().to_str().and_then(|name| if name.contains(&resource_suffix) { + Some(entry.file_name().into()) + } else { None }) + }) + .collect(); + for file_name in essential_files_unversioned.chain(essential_files_versioned) { let source_path = source.join(&file_name); let dest_path = dest.path().join(&file_name); + debug!("copying {} to {}", source_path.display(), dest_path.display()); ::std::fs::copy(&source_path, &dest_path).with_context(|_| { format!( "couldn't copy '{}' to '{}'", From 6f4477ba8350f6154085961fc0bdcf9fe661da9a Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 21 Mar 2021 13:46:59 -0400 Subject: [PATCH 2/2] Don't hard-code essential files in `copy_doc_dir` This avoids unnecessary upload costs for S3. --- src/docbuilder/rustwide_builder.rs | 93 +++++++++++++++++------------- src/utils/copy.rs | 35 ++++++----- 2 files changed, 74 insertions(+), 54 deletions(-) diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 574830c7f..a4579f498 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -182,20 +182,6 @@ impl RustwideBuilder { let krate = Crate::crates_io(DUMMY_CRATE_NAME, DUMMY_CRATE_VERSION); krate.fetch(&self.workspace)?; - // TODO: remove this when https://github.com/rust-lang/rustwide/pull/53 lands. - struct Rustdoc<'a> { - toolchain_version: &'a str, - } - impl rustwide::cmd::Runnable for Rustdoc<'_> { - fn name(&self) -> Binary { - Binary::ManagedByRustwide(PathBuf::from("rustdoc")) - } - - fn prepare_command<'w, 'pl>(&self, cmd: Command<'w, 'pl>) -> Command<'w, 'pl> { - cmd.args(&[format!("+{}", self.toolchain_version)]) - } - } - build_dir .build(&self.toolchain, &krate, self.prepare_sandbox(&limits)) .run(|build| { @@ -212,29 +198,14 @@ impl RustwideBuilder { .prefix("essential-files") .tempdir()?; - let toolchain_version = self.toolchain.as_dist().unwrap().name(); - let output = build.cmd(Rustdoc { toolchain_version }) - .args(&["-Zunstable-options", "--print=unversioned-files"]) - .run_capture() - .context("failed to learn about unversioned files - make sure you have nightly-2021-03-07 or later")?; - let essential_files_unversioned = output - .stdout_lines() - .iter() - .map(PathBuf::from); - let resource_suffix = format!("-{}", parse_rustc_version(&self.rustc_version)?); - let essential_files_versioned: Vec<_> = source.read_dir()? - .collect::, _>>()? - .into_iter() - .filter_map(|entry| { - entry.file_name().to_str().and_then(|name| if name.contains(&resource_suffix) { - Some(entry.file_name().into()) - } else { None }) - }) - .collect(); - for file_name in essential_files_unversioned.chain(essential_files_versioned) { + for file_name in self.essential_files(build, &source)? { let source_path = source.join(&file_name); let dest_path = dest.path().join(&file_name); - debug!("copying {} to {}", source_path.display(), dest_path.display()); + debug!( + "copying {} to {}", + source_path.display(), + dest_path.display() + ); ::std::fs::copy(&source_path, &dest_path).with_context(|_| { format!( "couldn't copy '{}' to '{}'", @@ -363,7 +334,7 @@ impl RustwideBuilder { let mut algs = HashSet::new(); if has_docs { debug!("adding documentation for the default target to the database"); - self.copy_docs(&build.host_target_dir(), local_storage.path(), "", true)?; + self.copy_docs(build, local_storage.path(), "", true)?; successful_targets.push(res.target.clone()); @@ -465,7 +436,7 @@ impl RustwideBuilder { // adding target to successfully_targets. if build.host_target_dir().join(target).join("doc").is_dir() { debug!("adding documentation for target {} to the database", target,); - self.copy_docs(&build.host_target_dir(), local_storage, target, false)?; + self.copy_docs(build, local_storage, target, false)?; successful_targets.push(target.to_string()); } } @@ -638,12 +609,12 @@ impl RustwideBuilder { fn copy_docs( &self, - target_dir: &Path, + build: &Build, local_storage: &Path, target: &str, is_default_target: bool, ) -> Result<()> { - let source = target_dir.join(target).join("doc"); + let source = build.host_target_dir().join(target).join("doc"); let mut dest = local_storage.to_path_buf(); // only add target name to destination directory when we are copying a non-default target. @@ -656,7 +627,49 @@ impl RustwideBuilder { } info!("{} {}", source.display(), dest.display()); - copy_doc_dir(source, dest) + let essential_files = self.essential_files(build, &source)?; + copy_doc_dir(source, dest, &essential_files) + } + + fn essential_files(&self, build: &Build, doc_dir: &Path) -> Result> { + // TODO: remove this when https://github.com/rust-lang/rustwide/pull/53 lands. + struct Rustdoc<'a> { + toolchain_version: &'a str, + } + impl rustwide::cmd::Runnable for Rustdoc<'_> { + fn name(&self) -> Binary { + Binary::ManagedByRustwide(PathBuf::from("rustdoc")) + } + + fn prepare_command<'w, 'pl>(&self, cmd: Command<'w, 'pl>) -> Command<'w, 'pl> { + cmd.args(&[format!("+{}", self.toolchain_version)]) + } + } + + let toolchain_version = self.toolchain.as_dist().unwrap().name(); + let output = build.cmd(Rustdoc { toolchain_version }) + .args(&["-Zunstable-options", "--print=unversioned-files"]) + .run_capture() + .context("failed to learn about unversioned files - make sure you have nightly-2021-03-07 or later")?; + let mut essential_files: Vec<_> = output.stdout_lines().iter().map(PathBuf::from).collect(); + let resource_suffix = format!("-{}", parse_rustc_version(&self.rustc_version)?); + + let essential_files_versioned = doc_dir + .read_dir()? + .collect::, _>>()? + .into_iter() + .filter_map(|entry| { + entry.file_name().to_str().and_then(|name| { + if name.contains(&resource_suffix) { + Some(entry.file_name().into()) + } else { + None + } + }) + }); + + essential_files.extend(essential_files_versioned); + Ok(essential_files) } fn upload_docs( diff --git a/src/utils/copy.rs b/src/utils/copy.rs index 9e30efcd0..bc5bf015d 100644 --- a/src/utils/copy.rs +++ b/src/utils/copy.rs @@ -1,15 +1,17 @@ use crate::error::Result; use std::fs; -use std::path::Path; - -use regex::Regex; +use std::path::{Path, PathBuf}; /// Copies documentation from a crate's target directory to destination. /// /// Target directory must have doc directory. /// -/// This function is designed to avoid file duplications. -pub fn copy_doc_dir, Q: AsRef>(source: P, destination: Q) -> Result<()> { +/// This does not copy any files with the same name as `shared_files`. +pub fn copy_doc_dir, Q: AsRef>( + source: P, + destination: Q, + shared_files: &[PathBuf], +) -> Result<()> { let destination = destination.as_ref(); // Make sure destination directory exists @@ -17,20 +19,19 @@ pub fn copy_doc_dir, Q: AsRef>(source: P, destination: Q) - fs::create_dir_all(destination)?; } - // Avoid copying common files - let dup_regex = Regex::new( - r"(\.lock|\.txt|\.woff|\.svg|\.css|main-.*\.css|main-.*\.js|normalize-.*\.js|rustdoc-.*\.css|storage-.*\.js|theme-.*\.js)$") - .unwrap(); - for file in source.as_ref().read_dir()? { let file = file?; - let destination_full_path = destination.join(file.file_name()); + let filename = file.file_name(); + let destination_full_path = destination.join(&filename); let metadata = file.metadata()?; if metadata.is_dir() { - copy_doc_dir(file.path(), destination_full_path)? - } else if dup_regex.is_match(&file.file_name().into_string().unwrap()[..]) { + copy_doc_dir(file.path(), destination_full_path, shared_files)?; + continue; + } + + if shared_files.contains(&PathBuf::from(filename)) { continue; } else { fs::copy(&file.path(), &destination_full_path)?; @@ -65,7 +66,13 @@ mod test { fs::write(doc.join("inner").join("important.svg"), "").unwrap(); // lets try to copy a src directory to tempdir - copy_doc_dir(source.path().join("doc"), destination.path()).unwrap(); + let ignored_files = ["index.txt".into(), "important.svg".into()]; + copy_doc_dir( + source.path().join("doc"), + destination.path(), + &ignored_files, + ) + .unwrap(); assert!(destination.path().join("index.html").exists()); assert!(!destination.path().join("index.txt").exists()); assert!(destination.path().join("inner").join("index.html").exists());