From 0cf3ce4739568414da15ee018024e339d2c61af4 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 26 Oct 2020 19:08:48 +0100 Subject: [PATCH 1/2] build-manifest: refactor checksum generation into a struct --- src/tools/build-manifest/src/checksum.rs | 75 ++++++++++++++++++++++++ src/tools/build-manifest/src/main.rs | 57 +++--------------- 2 files changed, 82 insertions(+), 50 deletions(-) create mode 100644 src/tools/build-manifest/src/checksum.rs diff --git a/src/tools/build-manifest/src/checksum.rs b/src/tools/build-manifest/src/checksum.rs new file mode 100644 index 0000000000000..10c34b856a2ef --- /dev/null +++ b/src/tools/build-manifest/src/checksum.rs @@ -0,0 +1,75 @@ +use crate::manifest::{FileHash, Manifest}; +use rayon::prelude::*; +use sha2::{Digest, Sha256}; +use std::collections::{HashMap, HashSet}; +use std::error::Error; +use std::fs::File; +use std::io::BufReader; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; +use std::time::Instant; + +pub(crate) struct Checksums { + collected: Mutex>, +} + +impl Checksums { + pub(crate) fn new() -> Self { + Checksums { collected: Mutex::new(HashMap::new()) } + } + + pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) { + let need_checksums = self.find_missing_checksums(manifest); + if !need_checksums.is_empty() { + self.collect_checksums(&need_checksums); + } + self.replace_checksums(manifest); + } + + fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet { + let mut need_checksums = HashSet::new(); + crate::manifest::visit_file_hashes(manifest, |file_hash| { + if let FileHash::Missing(path) = file_hash { + need_checksums.insert(path.clone()); + } + }); + need_checksums + } + + fn replace_checksums(&mut self, manifest: &mut Manifest) { + let collected = self.collected.lock().unwrap(); + crate::manifest::visit_file_hashes(manifest, |file_hash| { + if let FileHash::Missing(path) = file_hash { + match collected.get(path) { + Some(hash) => *file_hash = FileHash::Present(hash.clone()), + None => panic!("missing hash for file {}", path.display()), + } + } + }); + } + + fn collect_checksums(&mut self, files: &HashSet) { + let collection_start = Instant::now(); + println!( + "collecting hashes for {} tarballs across {} threads", + files.len(), + rayon::current_num_threads().min(files.len()), + ); + + files.par_iter().for_each(|path| match hash(path) { + Ok(hash) => { + self.collected.lock().unwrap().insert(path.clone(), hash); + } + Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err), + }); + + println!("collected {} hashes in {:.2?}", files.len(), collection_start.elapsed()); + } +} + +fn hash(path: &Path) -> Result> { + let mut file = BufReader::new(File::open(path)?); + let mut sha256 = Sha256::default(); + std::io::copy(&mut file, &mut sha256)?; + Ok(hex::encode(sha256.finalize())) +} diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs index ffcf10571ca7d..1debd85beb34f 100644 --- a/src/tools/build-manifest/src/main.rs +++ b/src/tools/build-manifest/src/main.rs @@ -4,22 +4,19 @@ //! via `x.py dist hash-and-sign`; the cmdline arguments are set up //! by rustbuild (in `src/bootstrap/dist.rs`). +mod checksum; mod manifest; mod versions; -use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target}; +use crate::checksum::Checksums; +use crate::manifest::{Component, Manifest, Package, Rename, Target}; use crate::versions::{PkgType, Versions}; -use rayon::prelude::*; -use sha2::Digest; use std::collections::{BTreeMap, HashMap, HashSet}; use std::env; -use std::error::Error; use std::fs::{self, File}; -use std::io::{self, BufReader, Read, Write}; +use std::io::{self, Read, Write}; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; -use std::sync::Mutex; -use std::time::Instant; static HOSTS: &[&str] = &[ "aarch64-apple-darwin", @@ -186,6 +183,7 @@ macro_rules! t { struct Builder { versions: Versions, + checksums: Checksums, shipped_files: HashSet, input: PathBuf, @@ -240,6 +238,7 @@ fn main() { Builder { versions: Versions::new(&channel, &input).unwrap(), + checksums: Checksums::new(), shipped_files: HashSet::new(), input, @@ -321,7 +320,7 @@ impl Builder { self.add_renames_to(&mut manifest); manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest)); - self.fill_missing_hashes(&mut manifest); + self.checksums.fill_missing_checksums(&mut manifest); manifest } @@ -595,41 +594,6 @@ impl Builder { assert!(t!(child.wait()).success()); } - fn fill_missing_hashes(&self, manifest: &mut Manifest) { - // First collect all files that need hashes - let mut need_hashes = HashSet::new(); - crate::manifest::visit_file_hashes(manifest, |file_hash| { - if let FileHash::Missing(path) = file_hash { - need_hashes.insert(path.clone()); - } - }); - - let collected = Mutex::new(HashMap::new()); - let collection_start = Instant::now(); - println!( - "collecting hashes for {} tarballs across {} threads", - need_hashes.len(), - rayon::current_num_threads().min(need_hashes.len()), - ); - need_hashes.par_iter().for_each(|path| match fetch_hash(path) { - Ok(hash) => { - collected.lock().unwrap().insert(path, hash); - } - Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err), - }); - let collected = collected.into_inner().unwrap(); - println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed()); - - crate::manifest::visit_file_hashes(manifest, |file_hash| { - if let FileHash::Missing(path) = file_hash { - match collected.get(path) { - Some(hash) => *file_hash = FileHash::Present(hash.clone()), - None => panic!("missing hash for file {}", path.display()), - } - } - }) - } - fn write_channel_files(&mut self, channel_name: &str, manifest: &Manifest) { self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml"); self.write(&manifest.date, channel_name, "-date.txt"); @@ -660,10 +624,3 @@ impl Builder { t!(std::fs::write(path, content.as_bytes())); } } - -fn fetch_hash(path: &Path) -> Result> { - let mut file = BufReader::new(File::open(path)?); - let mut sha256 = sha2::Sha256::default(); - std::io::copy(&mut file, &mut sha256)?; - Ok(hex::encode(sha256.finalize())) -} From c2f4bbd17614bf54353ccd913463744d6f7c9242 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 26 Oct 2020 20:16:33 +0100 Subject: [PATCH 2/2] build-manifest: add BUILD_MANIFEST_CHECKSUM_CACHE The checksum cache allows to reuse the calculated checksums between build-manifest and promote-release, or between multiple invocations of build-manifest. --- src/tools/build-manifest/src/checksum.rs | 30 ++++++++++++++++++++---- src/tools/build-manifest/src/main.rs | 4 +++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/tools/build-manifest/src/checksum.rs b/src/tools/build-manifest/src/checksum.rs index 10c34b856a2ef..c019c7a2f7aec 100644 --- a/src/tools/build-manifest/src/checksum.rs +++ b/src/tools/build-manifest/src/checksum.rs @@ -10,12 +10,29 @@ use std::sync::Mutex; use std::time::Instant; pub(crate) struct Checksums { + cache_path: Option, collected: Mutex>, } impl Checksums { - pub(crate) fn new() -> Self { - Checksums { collected: Mutex::new(HashMap::new()) } + pub(crate) fn new() -> Result> { + let cache_path = std::env::var_os("BUILD_MANIFEST_CHECKSUM_CACHE").map(PathBuf::from); + + let mut collected = HashMap::new(); + if let Some(path) = &cache_path { + if path.is_file() { + collected = serde_json::from_slice(&std::fs::read(path)?)?; + } + } + + Ok(Checksums { cache_path, collected: Mutex::new(collected) }) + } + + pub(crate) fn store_cache(&self) -> Result<(), Box> { + if let Some(path) = &self.cache_path { + std::fs::write(path, &serde_json::to_vec(&self.collected)?)?; + } + Ok(()) } pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) { @@ -27,10 +44,14 @@ impl Checksums { } fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet { + let collected = self.collected.lock().unwrap(); let mut need_checksums = HashSet::new(); crate::manifest::visit_file_hashes(manifest, |file_hash| { if let FileHash::Missing(path) = file_hash { - need_checksums.insert(path.clone()); + let path = std::fs::canonicalize(path).unwrap(); + if !collected.contains_key(&path) { + need_checksums.insert(path); + } } }); need_checksums @@ -40,7 +61,8 @@ impl Checksums { let collected = self.collected.lock().unwrap(); crate::manifest::visit_file_hashes(manifest, |file_hash| { if let FileHash::Missing(path) = file_hash { - match collected.get(path) { + let path = std::fs::canonicalize(path).unwrap(); + match collected.get(&path) { Some(hash) => *file_hash = FileHash::Present(hash.clone()), None => panic!("missing hash for file {}", path.display()), } diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs index 1debd85beb34f..2863216855b83 100644 --- a/src/tools/build-manifest/src/main.rs +++ b/src/tools/build-manifest/src/main.rs @@ -238,7 +238,7 @@ fn main() { Builder { versions: Versions::new(&channel, &input).unwrap(), - checksums: Checksums::new(), + checksums: t!(Checksums::new()), shipped_files: HashSet::new(), input, @@ -275,6 +275,8 @@ impl Builder { if let Some(path) = std::env::var_os("BUILD_MANIFEST_SHIPPED_FILES_PATH") { self.write_shipped_files(&Path::new(&path)); } + + t!(self.checksums.store_cache()); } /// If a tool does not pass its tests, don't ship it.