diff --git a/Cargo.lock b/Cargo.lock index 059c76892a9..4a4795f851e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -173,6 +173,17 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -359,6 +370,7 @@ dependencies = [ "anstyle", "anstyle-progress", "anyhow", + "async-trait", "base64", "blake3", "cargo-credential", @@ -378,6 +390,8 @@ dependencies = [ "curl-sys", "filetime", "flate2", + "futures", + "futures-timer", "git2", "git2-curl", "gix", @@ -387,6 +401,7 @@ dependencies = [ "hex", "hmac 0.13.0", "home 0.5.12", + "http", "http-auth", "ignore", "im-rc", @@ -1358,6 +1373,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.32" @@ -1374,6 +1404,17 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.32" @@ -1392,12 +1433,19 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ + "futures-channel", "futures-core", "futures-io", "futures-sink", @@ -5722,6 +5770,7 @@ dependencies = [ "cargo-util", "cargo-util-terminal", "clap", + "futures", "git2", "semver", "tracing", diff --git a/Cargo.toml b/Cargo.toml index 0d919add0eb..f388c9ad0fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ anstyle = "1.0.14" anstyle-hyperlink = "1.0.2" anstyle-progress = "0.1.0" anyhow = "1.0.102" +async-trait = "0.1.89" base64 = "0.22.1" blake3 = "1.8.4" build-rs = { version = "0.3.4", path = "crates/build-rs" } @@ -48,6 +49,8 @@ curl = "0.4.49" curl-sys = "0.4.87" filetime = "0.2.27" flate2 = { version = "1.1.9", default-features = false, features = ["zlib-rs"] } +futures = { version = "0.3.32", default-features = false, features = ["std", "executor"]} +futures-timer = "3.0.3" git2 = "0.20.4" git2-curl = "0.21.0" # When updating this, also see if `gix-transport` further down needs updating or some auth-related tests will fail. @@ -59,6 +62,7 @@ heck = "0.5.0" hex = "0.4.3" hmac = "0.13.0" home = "0.5.12" +http = "1.4.0" http-auth = { version = "0.1.10", default-features = false } ignore = "0.4.25" im-rc = "15.1.0" @@ -160,6 +164,7 @@ anstream.workspace = true anstyle.workspace = true anstyle-progress.workspace = true anyhow.workspace = true +async-trait.workspace = true base64.workspace = true blake3.workspace = true cargo-credential.workspace = true @@ -175,6 +180,8 @@ curl = { workspace = true, features = ["http2"] } curl-sys.workspace = true filetime.workspace = true flate2.workspace = true +futures.workspace = true +futures-timer.workspace = true git2.workspace = true git2-curl.workspace = true gix.workspace = true @@ -183,6 +190,7 @@ heck.workspace = true hex.workspace = true hmac.workspace = true home.workspace = true +http.workspace = true http-auth.workspace = true ignore.workspace = true im-rc.workspace = true diff --git a/crates/resolver-tests/src/lib.rs b/crates/resolver-tests/src/lib.rs index d332e3962fd..a78e2d3cd24 100644 --- a/crates/resolver-tests/src/lib.rs +++ b/crates/resolver-tests/src/lib.rs @@ -7,10 +7,10 @@ pub mod helpers; pub mod sat; +use std::cell::RefCell; use std::cmp::{max, min}; use std::collections::{BTreeMap, HashSet}; use std::fmt; -use std::task::Poll; use std::time::Instant; use cargo::core::Resolve; @@ -131,15 +131,15 @@ pub fn resolve_with_global_context_raw( ) -> CargoResult { struct MyRegistry<'a> { list: &'a [Summary], - used: HashSet, + used: RefCell>, } impl<'a> Registry for MyRegistry<'a> { - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { + ) -> CargoResult<()> { for summary in self.list.iter() { let matched = match kind { QueryKind::Exact => dep.matches(summary), @@ -148,11 +148,11 @@ pub fn resolve_with_global_context_raw( QueryKind::Normalized => true, }; if matched { - self.used.insert(summary.package_id()); + self.used.borrow_mut().insert(summary.package_id()); f(IndexSummary::Candidate(summary.clone())); } } - Poll::Ready(Ok(())) + Ok(()) } fn describe_source(&self, _src: SourceId) -> String { @@ -162,14 +162,10 @@ pub fn resolve_with_global_context_raw( fn is_replaced(&self, _src: SourceId) -> bool { false } - - fn block_until_ready(&mut self) -> CargoResult<()> { - Ok(()) - } } impl<'a> Drop for MyRegistry<'a> { fn drop(&mut self) { - if std::thread::panicking() && self.list.len() != self.used.len() { + if std::thread::panicking() && self.list.len() != self.used.get_mut().len() { // we found a case that causes a panic and did not use all of the input. // lets print the part of the input that was used for minimization. eprintln!( @@ -177,7 +173,7 @@ pub fn resolve_with_global_context_raw( PrettyPrintRegistry( self.list .iter() - .filter(|s| { self.used.contains(&s.package_id()) }) + .filter(|s| { self.used.get_mut().contains(&s.package_id()) }) .cloned() .collect() ) @@ -187,7 +183,7 @@ pub fn resolve_with_global_context_raw( } let mut registry = MyRegistry { list: registry, - used: HashSet::new(), + used: RefCell::new(HashSet::new()), }; let root_summary = diff --git a/crates/xtask-bump-check/Cargo.toml b/crates/xtask-bump-check/Cargo.toml index 9aa8e4847d1..1d1dd36556f 100644 --- a/crates/xtask-bump-check/Cargo.toml +++ b/crates/xtask-bump-check/Cargo.toml @@ -10,6 +10,7 @@ cargo.workspace = true cargo-util.workspace = true cargo-util-terminal.workspace = true clap.workspace = true +futures.workspace = true git2.workspace = true semver.workspace = true tracing-subscriber.workspace = true diff --git a/crates/xtask-bump-check/src/xtask.rs b/crates/xtask-bump-check/src/xtask.rs index f74d7c72668..bc107d0d3f4 100644 --- a/crates/xtask-bump-check/src/xtask.rs +++ b/crates/xtask-bump-check/src/xtask.rs @@ -15,7 +15,6 @@ use std::collections::HashMap; use std::fmt::Write; use std::fs; -use std::task; use cargo::CargoResult; use cargo::core::Package; @@ -444,15 +443,9 @@ fn check_crates_io<'a>( let current = member.version(); let version_req = format!(">={current}"); let query = Dependency::parse(*name, Some(&version_req), source_id)?; - let possibilities = loop { - // Exact to avoid returning all for path/git - match registry.query_vec(&query, QueryKind::Exact) { - task::Poll::Ready(res) => { - break res?; - } - task::Poll::Pending => registry.block_until_ready()?, - } - }; + // Exact to avoid returning all for path/git + let possibilities = + futures::executor::block_on(registry.query_vec(&query, QueryKind::Exact))?; if possibilities.is_empty() { tracing::trace!("dep `{name}` has no version greater than or equal to `{current}`"); } else { diff --git a/src/cargo/core/compiler/future_incompat.rs b/src/cargo/core/compiler/future_incompat.rs index c5505caff3e..0df342b4b6c 100644 --- a/src/cargo/core/compiler/future_incompat.rs +++ b/src/cargo/core/compiler/future_incompat.rs @@ -40,11 +40,11 @@ use crate::sources::source::QueryKind; use crate::util::CargoResult; use crate::util::cache_lock::CacheLockMode; use anyhow::{Context, bail, format_err}; +use futures::stream::FuturesUnordered; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fmt::Write as _; use std::io::{Read, Write}; -use std::task::Poll; pub const REPORT_PREAMBLE: &str = "\ The following warnings were discovered during the build. These warnings are an @@ -308,7 +308,7 @@ fn get_updates(ws: &Workspace<'_>, package_ids: &BTreeSet) -> Option< .ok()?; // Create a set of updated registry sources. let map = SourceConfigMap::new(ws.gctx()).ok()?; - let mut package_ids: BTreeSet<_> = package_ids + let package_ids: BTreeSet<_> = package_ids .iter() .filter(|pkg_id| pkg_id.source_id().is_registry()) .collect(); @@ -316,7 +316,7 @@ fn get_updates(ws: &Workspace<'_>, package_ids: &BTreeSet) -> Option< .iter() .map(|pkg_id| pkg_id.source_id()) .collect(); - let mut sources: HashMap<_, _> = source_ids + let sources: HashMap<_, _> = source_ids .into_iter() .filter_map(|sid| { let source = map.load(sid, &HashSet::new()).ok()?; @@ -325,28 +325,18 @@ fn get_updates(ws: &Workspace<'_>, package_ids: &BTreeSet) -> Option< .collect(); // Query the sources for new versions, mapping `package_ids` into `summaries`. - let mut summaries = Vec::new(); - while !package_ids.is_empty() { - package_ids.retain(|&pkg_id| { - let Some(source) = sources.get_mut(&pkg_id.source_id()) else { - return false; - }; - let Ok(dep) = Dependency::parse(pkg_id.name(), None, pkg_id.source_id()) else { - return false; - }; - match source.query_vec(&dep, QueryKind::Exact) { - Poll::Ready(Ok(sum)) => { - summaries.push((pkg_id, sum)); - false - } - Poll::Ready(Err(_)) => false, - Poll::Pending => true, - } - }); - for (_, source) in sources.iter_mut() { - source.block_until_ready().ok()?; + let pending = FuturesUnordered::new(); + for pkg_id in package_ids { + if let Some(source) = sources.get(&pkg_id.source_id()) + && let Ok(dep) = Dependency::parse(pkg_id.name(), None, pkg_id.source_id()) + { + pending.push(async move { + let sum = source.query_vec(&dep, QueryKind::Exact).await.ok()?; + Some((pkg_id, sum)) + }); } } + let summaries = crate::util::block_on_stream(pending).flatten(); let mut updates = String::new(); for (pkg_id, summaries) in summaries { diff --git a/src/cargo/core/package.rs b/src/cargo/core/package.rs index 906eda835e5..0fd005fe831 100644 --- a/src/cargo/core/package.rs +++ b/src/cargo/core/package.rs @@ -1,5 +1,5 @@ use std::cell::OnceCell; -use std::cell::{Cell, Ref, RefCell, RefMut}; +use std::cell::{Cell, Ref, RefCell}; use std::cmp::Ordering; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fmt; @@ -663,10 +663,6 @@ impl<'gctx> PackageSet<'gctx> { self.sources.borrow() } - pub fn sources_mut(&self) -> RefMut<'_, SourceMap<'gctx>> { - self.sources.borrow_mut() - } - /// Merge the given set into self. pub fn add_set(&mut self, set: PackageSet<'gctx>) { assert!(!self.downloading.get()); @@ -707,9 +703,9 @@ impl<'a, 'gctx> Downloads<'a, 'gctx> { // Ask the original source for this `PackageId` for the corresponding // package. That may immediately come back and tell us that the package // is ready, or it could tell us that it needs to be downloaded. - let mut sources = self.set.sources.borrow_mut(); + let sources = self.set.sources.borrow_mut(); let source = sources - .get_mut(id.source_id()) + .get(id.source_id()) .ok_or_else(|| internal(format!("couldn't find source for `{}`", id)))?; let pkg = source .download(id) @@ -924,9 +920,9 @@ impl<'a, 'gctx> Downloads<'a, 'gctx> { // Inform the original source that the download is finished which // should allow us to actually get the package and fill it in now. - let mut sources = self.set.sources.borrow_mut(); + let sources = self.set.sources.borrow_mut(); let source = sources - .get_mut(dl.id.source_id()) + .get(dl.id.source_id()) .ok_or_else(|| internal(format!("couldn't find source for `{}`", dl.id)))?; let start = Instant::now(); let pkg = source.finish_download(dl.id, data)?; diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index 98846e7cd63..055b3b63ff3 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -9,8 +9,8 @@ //! The former is just one kind of source, //! while the latter involves operations on the registry Web API. +use std::cell::RefCell; use std::collections::{HashMap, HashSet}; -use std::task::{Poll, ready}; use crate::core::{Dependency, PackageId, PackageSet, Patch, SourceId, Summary}; use crate::sources::IndexSummary; @@ -23,6 +23,7 @@ use crate::util::interning::InternedString; use crate::util::{CanonicalUrl, GlobalContext}; use anyhow::Context as _; use cargo_util_terminal::report::Level; +use futures::stream::FuturesUnordered; use itertools::Itertools; use tracing::{debug, trace}; use url::Url; @@ -36,24 +37,23 @@ use url::Url; /// /// See also the [`Source`] trait, as many of the methods here mirror and /// abstract over its functionalities. +#[allow(async_fn_in_trait)] pub trait Registry { /// Attempt to find the packages that match a dependency request. - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll>; + ) -> CargoResult<()>; /// Gathers the result from [`Registry::query`] as a list of [`IndexSummary`] items /// when they become available. - fn query_vec( - &mut self, - dep: &Dependency, - kind: QueryKind, - ) -> Poll>> { + async fn query_vec(&self, dep: &Dependency, kind: QueryKind) -> CargoResult> { let mut ret = Vec::new(); - self.query(dep, kind, &mut |s| ret.push(s)).map_ok(|()| ret) + self.query(dep, kind, &mut |s| ret.push(s)) + .await + .map(|()| ret) } /// Gets the description of a source, to provide useful messages. @@ -61,9 +61,6 @@ pub trait Registry { /// Checks if a source is replaced with some other source. fn is_replaced(&self, source: SourceId) -> bool; - - /// Block until all outstanding [`Poll::Pending`] requests are [`Poll::Ready`]. - fn block_until_ready(&mut self) -> CargoResult<()>; } /// This structure represents a registry of known packages. It internally @@ -83,11 +80,11 @@ pub trait Registry { /// [`Package`]: crate::core::Package pub struct PackageRegistry<'gctx> { gctx: &'gctx GlobalContext, - sources: SourceMap<'gctx>, + sources: RefCell>, /// A list of sources which are considered "path-overrides" which take /// precedent when querying for packages. - overrides: Vec, + overrides: RefCell>, /// Use for tracking sources that are already loaded into the registry. // Note that each SourceId does not take into account its `precise` field @@ -105,13 +102,13 @@ pub struct PackageRegistry<'gctx> { // This is basically a long-winded way of saying that we want to know // precisely what the keys of `sources` are, so this is a mapping of key to // what exactly the key is. - source_ids: HashMap, + source_ids: RefCell>, /// This is constructed via [`PackageRegistry::register_lock`]. /// See also [`LockedMap`]. locked: LockedMap, /// Packages allowed to be used, even if they are yanked. - yanked_whitelist: HashSet, + yanked_whitelist: RefCell>, source_config: SourceConfigMap<'gctx>, /// Patches registered during calls to [`PackageRegistry::patch`]. @@ -201,12 +198,12 @@ impl<'gctx> PackageRegistry<'gctx> { ) -> CargoResult> { Ok(PackageRegistry { gctx, - sources: SourceMap::new(), - source_ids: HashMap::new(), - overrides: Vec::new(), + sources: RefCell::new(SourceMap::new()), + source_ids: RefCell::new(HashMap::new()), + overrides: RefCell::new(Vec::new()), source_config, locked: HashMap::new(), - yanked_whitelist: HashSet::new(), + yanked_whitelist: RefCell::new(HashSet::new()), patches: HashMap::new(), patches_locked: false, patches_available: HashMap::new(), @@ -214,14 +211,13 @@ impl<'gctx> PackageRegistry<'gctx> { } pub fn get(self, package_ids: &[PackageId]) -> CargoResult> { - trace!("getting packages; sources={}", self.sources.len()); - PackageSet::new(package_ids, self.sources, self.gctx) + trace!("getting packages; sources={}", self.sources.borrow().len()); + PackageSet::new(package_ids, self.sources.into_inner(), self.gctx) } /// Ensures the [`Source`] of the given [`SourceId`] is loaded. - /// If not, this will block until the source is ready. - fn ensure_loaded(&mut self, namespace: SourceId, kind: Kind) -> CargoResult<()> { - match self.source_ids.get(&namespace) { + fn ensure_loaded(&self, namespace: SourceId, kind: Kind) -> CargoResult<()> { + match self.source_ids.borrow().get(&namespace) { // We've previously loaded this source, and we've already locked it, // so we're not allowed to change it even if `namespace` has a // slightly different precise version listed. @@ -254,11 +250,9 @@ impl<'gctx> PackageRegistry<'gctx> { self.load(namespace, kind)?; - // This isn't strictly necessary since it will be called later. - // However it improves error messages for sources that issue errors - // in `block_until_ready` because the callers here have context about - // which deps are being resolved. - self.block_until_ready()?; + // Ensure `shell` is not already in use, + // regardless of which source is used and how it happens to behave this time + self.gctx.debug_assert_shell_not_borrowed(); Ok(()) } @@ -276,27 +270,27 @@ impl<'gctx> PackageRegistry<'gctx> { } /// Adds a source to the registry. - fn add_source(&mut self, source: Box, kind: Kind) { + fn add_source(&self, source: Box, kind: Kind) { let id = source.source_id(); - self.sources.insert(source); - self.source_ids.insert(id, (id, kind)); + self.sources.borrow_mut().insert(source); + self.source_ids.borrow_mut().insert(id, (id, kind)); } /// Adds a source from a [path override]. /// /// [path override]: https://doc.rust-lang.org/nightly/cargo/reference/overriding-dependencies.html#paths-overrides pub fn add_override(&mut self, source: Box) { - self.overrides.push(source.source_id()); + self.overrides.borrow_mut().push(source.source_id()); self.add_source(source, Kind::Override); } /// Allows a group of package to be available to query even if they are yanked. - pub fn add_to_yanked_whitelist(&mut self, iter: impl Iterator) { + pub fn add_to_yanked_whitelist(&self, iter: impl Iterator) { let pkgs = iter.collect::>(); - for (_, source) in self.sources.sources_mut() { + for (_, source) in self.sources.borrow().iter() { source.add_to_yanked_whitelist(&pkgs); } - self.yanked_whitelist.extend(pkgs); + self.yanked_whitelist.borrow_mut().extend(pkgs); } /// remove all residual state from previous lock files. @@ -362,51 +356,68 @@ impl<'gctx> PackageRegistry<'gctx> { // precisely one package, so that's why we're just creating a flat list // of summaries which should be the same length as `deps` above. - let mut patch_deps_remaining: Vec<_> = patch_deps.iter().collect(); - let mut unlocked_summaries = Vec::new(); - while !patch_deps_remaining.is_empty() { - let mut patch_deps_pending = Vec::new(); - for patch_dep_remaining in patch_deps_remaining { - let (orig_patch, locked) = patch_dep_remaining; - - // Use the locked patch if it exists, otherwise use the original. - let dep = match locked { - Some(lock) => &lock.dependency, - None => &orig_patch.dep, - }; - debug!( - "registering a patch for `{}` with `{}`", - url, - dep.package_name() - ); + let pending = FuturesUnordered::new(); - let mut unused_fields = Vec::new(); - if dep.features().len() != 0 { - unused_fields.push("`features`"); - } - if !dep.uses_default_features() { - unused_fields.push("`default-features`") - } - if !unused_fields.is_empty() { - self.source_config.gctx().shell().print_report( - &[Level::WARNING - .secondary_title(format!( - "unused field in patch for `{}`: {}", - dep.package_name(), - unused_fields.join(", ") - )) - .element(Level::HELP.message(format!( - "configure {} in the `dependencies` entry", - unused_fields.join(", ") - )))], - false, - )?; - } + for (orig_patch, locked) in patch_deps { + // Use the locked patch if it exists, otherwise use the original. + let dep = match locked { + Some(lock) => &lock.dependency, + None => &orig_patch.dep, + }; + debug!( + "registering a patch for `{}` with `{}`", + url, + dep.package_name() + ); + + let mut unused_fields = Vec::new(); + if dep.features().len() != 0 { + unused_fields.push("`features`"); + } + if !dep.uses_default_features() { + unused_fields.push("`default-features`") + } + if !unused_fields.is_empty() { + self.source_config.gctx().shell().print_report( + &[Level::WARNING + .secondary_title(format!( + "unused field in patch for `{}`: {}", + dep.package_name(), + unused_fields.join(", ") + )) + .element(Level::HELP.message(format!( + "configure {} in the `dependencies` entry", + unused_fields.join(", ") + )))], + false, + )?; + } - // Go straight to the source for resolving `dep`. Load it as we - // normally would and then ask it directly for the list of summaries - // corresponding to this `dep`. - self.ensure_loaded(dep.source_id(), Kind::Normal) + // Go straight to the source for resolving `dep`. Load it as we + // normally would and then ask it directly for the list of summaries + // corresponding to this `dep`. + self.ensure_loaded(dep.source_id(), Kind::Normal) + .with_context(|| { + format!( + "failed to load source for dependency `{}`", + dep.package_name() + ) + })?; + + let source = self + .sources + .borrow() + .get(dep.source_id()) + .expect("loaded source not present") + .clone(); + pending.push(async move { + let mut summaries = Vec::new(); + source + .query(&dep, QueryKind::Exact, &mut |s| { + summaries.push(s.into_summary()) + }) + .await + .with_context(|| format!("unable to update {}", source.source_id())) .with_context(|| { format!( "failed to load source for dependency `{}`", @@ -414,54 +425,35 @@ impl<'gctx> PackageRegistry<'gctx> { ) })?; - let source = self - .sources - .get_mut(dep.source_id()) - .expect("loaded source not present"); - - let summaries = match source.query_vec(dep, QueryKind::Exact)? { - Poll::Ready(deps) => deps, - Poll::Pending => { - patch_deps_pending.push(patch_dep_remaining); - continue; - } - }; - - let summaries = summaries.into_iter().map(|s| s.into_summary()).collect(); - let (summary, should_unlock) = - match summary_for_patch(&orig_patch, url, &locked, summaries, source) { - Poll::Ready(x) => x, - Poll::Pending => { - patch_deps_pending.push(patch_dep_remaining); - continue; - } - }?; - - debug!( - "patch summary is {:?} should_unlock={:?}", - summary, should_unlock - ); - if let Some(unlock_id) = should_unlock { - unlock_patches.push(((*orig_patch).clone(), unlock_id)); - } + summary_for_patch(&orig_patch, url, &locked, summaries, source.as_ref()) + .await?; + Ok::<_, anyhow::Error>((orig_patch, dep, summary, should_unlock)) + }); + } - if *summary.package_id().source_id().canonical_url() == canonical { - return Err(anyhow::anyhow!( - "patch for `{}` points to the same source, but patches must point to different sources\n\ - help: check `{}` patch definition for `{}` in `{}`", - dep.package_name(), - dep.package_name(), - url, - orig_patch.loc - )); - } - unlocked_summaries.push(summary); + let unlocked_summaries = crate::util::block_on_stream(pending).map(|next| { + let (orig_patch, dep, summary, should_unlock) = next?; + debug!( + "patch summary is {:?} should_unlock={:?}", + summary, should_unlock + ); + if let Some(unlock_id) = should_unlock { + unlock_patches.push(((*orig_patch).clone(), unlock_id)); } - patch_deps_remaining = patch_deps_pending; - self.block_until_ready()?; - } + if *summary.package_id().source_id().canonical_url() == canonical { + return Err(anyhow::anyhow!( + "patch for `{}` points to the same source, but patches must point to different sources\n\ + help: check `{}` patch definition for `{}` in `{}`", + dep.package_name(), + dep.package_name(), + url, + orig_patch.loc + )); + } + Ok(summary) + }).collect::>>()?; let mut name_and_version = HashSet::new(); for summary in unlocked_summaries.iter() { @@ -537,16 +529,16 @@ impl<'gctx> PackageRegistry<'gctx> { /// Loads the [`Source`] for a given [`SourceId`] to this registry, making /// them available to resolution. - fn load(&mut self, source_id: SourceId, kind: Kind) -> CargoResult<()> { + fn load(&self, source_id: SourceId, kind: Kind) -> CargoResult<()> { debug!("loading source {}", source_id); let source = self .source_config - .load(source_id, &self.yanked_whitelist) + .load(source_id, &self.yanked_whitelist.borrow()) .with_context(|| format!("unable to update {}", source_id))?; assert_eq!(source.source_id(), source_id); if kind == Kind::Override { - self.overrides.push(source_id); + self.overrides.borrow_mut().push(source_id); } self.add_source(source, kind); @@ -558,7 +550,11 @@ impl<'gctx> PackageRegistry<'gctx> { // `"locked"` as other values indicate a `cargo update // --precise` request if !source_id.has_locked_precise() { - self.sources.get_mut(source_id).unwrap().invalidate_cache(); + self.sources + .borrow() + .get(source_id) + .unwrap() + .invalidate_cache(); } else { debug!("skipping update due to locked registry"); } @@ -566,18 +562,22 @@ impl<'gctx> PackageRegistry<'gctx> { } /// Queries path overrides from this registry. - fn query_overrides(&mut self, dep: &Dependency) -> Poll>> { - for &s in self.overrides.iter() { - let src = self.sources.get_mut(s).unwrap(); + async fn query_overrides(&self, dep: &Dependency) -> CargoResult> { + let overrides = self.overrides.borrow(); + for &s in overrides.iter() { let dep = Dependency::new_override(dep.package_name(), s); - let mut results = None; - ready!(src.query(&dep, QueryKind::Exact, &mut |s| results = Some(s)))?; + self.sources + .borrow() + .get(s) + .unwrap() + .query(&dep, QueryKind::Exact, &mut |s| results = Some(s)) + .await?; if results.is_some() { - return Poll::Ready(Ok(results)); + return Ok(results); } } - Poll::Ready(Ok(None)) + Ok(None) } /// This function is used to transform a summary to another locked summary @@ -658,15 +658,15 @@ https://doc.rust-lang.org/cargo/reference/overriding-dependencies.html } impl<'gctx> Registry for PackageRegistry<'gctx> { - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { + ) -> CargoResult<()> { assert!(self.patches_locked); // Look for an override and get ready to query the real source. - let override_summary = ready!(self.query_overrides(dep))?; + let override_summary = self.query_overrides(dep).await?; // Next up on our list of candidates is to check the `[patch]` section // of the manifest. Here we look through all patches relevant to the @@ -700,7 +700,7 @@ impl<'gctx> Registry for PackageRegistry<'gctx> { None => f(IndexSummary::Candidate(patch)), } - return Poll::Ready(Ok(())); + return Ok(()); } if !patches.is_empty() { @@ -724,12 +724,31 @@ impl<'gctx> Registry for PackageRegistry<'gctx> { ) })?; - let source = self.sources.get_mut(dep.source_id()); + // Helper function to add context for query errors. + async fn query_with_context( + source: &dyn Source, + dep: &Dependency, + kind: QueryKind, + f: &mut dyn FnMut(IndexSummary), + ) -> CargoResult<()> { + source + .query(dep, kind, f) + .await + .with_context(|| format!("unable to update {}", source.source_id())) + .with_context(|| { + format!( + "failed to load source for dependency `{}`", + dep.package_name() + ) + }) + } + + let source = self.sources.borrow().get(dep.source_id()).cloned(); match (override_summary, source) { (Some(_), None) => { - return Poll::Ready(Err(anyhow::anyhow!("override found but no real ones"))); + return Err(anyhow::anyhow!("override found but no real ones")); } - (None, None) => return Poll::Ready(Ok(())), + (None, None) => return Ok(()), // If we don't have an override then we just ship everything upstairs after locking the summary (None, Some(source)) => { @@ -755,14 +774,14 @@ impl<'gctx> Registry for PackageRegistry<'gctx> { let summary = summary.map_summary(|summary| lock(locked, all_patches, summary)); f(summary) }; - return source.query(dep, kind, callback); + return query_with_context(&*source, dep, kind, callback).await; } // If we have an override summary then we query the source to sanity check its results. // We don't actually use any of the summaries it gives us though. (Some(override_summary), Some(source)) => { if !patches.is_empty() { - return Poll::Ready(Err(anyhow::anyhow!("found patches and a path override"))); + return Err(anyhow::anyhow!("found patches and a path override")); } let mut n = 0; let mut to_warn = None; @@ -770,14 +789,9 @@ impl<'gctx> Registry for PackageRegistry<'gctx> { n += 1; to_warn = Some(summary); }; - let pend = source.query(dep, kind, callback); - if pend.is_pending() { - return Poll::Pending; - } + query_with_context(&*source, dep, kind, callback).await?; if n > 1 { - return Poll::Ready(Err(anyhow::anyhow!( - "found an override with a non-locked list" - ))); + return Err(anyhow::anyhow!("found an override with a non-locked list")); } if let Some(to_warn) = to_warn { self.warn_bad_override(override_summary.as_summary(), to_warn.as_summary())?; @@ -787,35 +801,22 @@ impl<'gctx> Registry for PackageRegistry<'gctx> { } } - Poll::Ready(Ok(())) + Ok(()) } fn describe_source(&self, id: SourceId) -> String { - match self.sources.get(id) { + match self.sources.borrow().get(id) { Some(src) => src.describe(), None => id.to_string(), } } fn is_replaced(&self, id: SourceId) -> bool { - match self.sources.get(id) { + match self.sources.borrow().get(id) { Some(src) => src.is_replaced(), None => false, } } - - #[tracing::instrument(skip_all)] - fn block_until_ready(&mut self) -> CargoResult<()> { - // Ensure `shell` is not already in use, - // regardless of which source is used and how it happens to behave this time - self.gctx.debug_assert_shell_not_borrowed(); - for (source_id, source) in self.sources.sources_mut() { - source - .block_until_ready() - .with_context(|| format!("unable to update {}", source_id))?; - } - Ok(()) - } } /// See [`PackageRegistry::lock`]. @@ -933,15 +934,15 @@ fn lock( /// is a package ID indicating that the patch entry should be unlocked. This /// happens when a match cannot be found with the `locked` one, but found one /// via the original patch, so we need to inform the resolver to "unlock" it. -fn summary_for_patch( +async fn summary_for_patch( original_patch: &Patch, orig_patch_url: &Url, locked: &Option, mut summaries: Vec, - source: &mut dyn Source, -) -> Poll)>> { + source: &dyn Source, +) -> CargoResult<(Summary, Option)> { if summaries.len() == 1 { - return Poll::Ready(Ok((summaries.pop().unwrap(), None))); + return Ok((summaries.pop().unwrap(), None)); } if summaries.len() > 1 { // TODO: In the future, it might be nice to add all of these @@ -954,7 +955,7 @@ fn summary_for_patch( let mut vers: Vec<_> = summaries.iter().map(|summary| summary.version()).collect(); vers.sort(); let versions: Vec<_> = vers.into_iter().map(|v| v.to_string()).collect(); - return Poll::Ready(Err(anyhow::anyhow!( + return Err(anyhow::anyhow!( "patch for `{}` in `{}` resolved to more than one candidate\n\ note: found versions: {}\n\ help: check `{}` patch definition for `{}` in `{}`\n\ @@ -966,13 +967,15 @@ fn summary_for_patch( orig_patch_url, original_patch.loc, versions.last().unwrap() - ))); + )); } assert!(summaries.is_empty()); // No summaries found, try to help the user figure out what is wrong. if let Some(locked) = locked { // Since the locked patch did not match anything, try the unlocked one. - let orig_matches = ready!(source.query_vec(&original_patch.dep, QueryKind::Exact)) + let orig_matches = source + .query_vec(&original_patch.dep, QueryKind::Exact) + .await .unwrap_or_else(|e| { tracing::warn!( "could not determine unlocked summaries for dep {:?}: {:?}", @@ -984,14 +987,15 @@ fn summary_for_patch( let orig_matches = orig_matches.into_iter().map(|s| s.into_summary()).collect(); - let summary = ready!(summary_for_patch( + let summary = Box::pin(summary_for_patch( original_patch, orig_patch_url, &None, orig_matches, - source - ))?; - return Poll::Ready(Ok((summary.0, Some(locked.package_id)))); + source, + )) + .await?; + return Ok((summary.0, Some(locked.package_id))); } // Try checking if there are *any* packages that match this by name. let name_only_dep = Dependency::new_override( @@ -999,8 +1003,10 @@ fn summary_for_patch( original_patch.dep.source_id(), ); - let name_summaries = - ready!(source.query_vec(&name_only_dep, QueryKind::Exact)).unwrap_or_else(|e| { + let name_summaries = source + .query_vec(&name_only_dep, QueryKind::Exact) + .await + .unwrap_or_else(|e| { tracing::warn!( "failed to do name-only summary query for {:?}: {:?}", name_only_dep, @@ -1021,7 +1027,7 @@ fn summary_for_patch( format!("versions `{}`", strs.join(", ")) } }; - Poll::Ready(Err(if found.is_empty() { + Err(if found.is_empty() { anyhow::anyhow!( "patch location `{}` does not contain packages matching `{}`\n\ help: check `{}` patch definition for `{}` in `{}`", @@ -1045,5 +1051,5 @@ fn summary_for_patch( orig_patch_url, original_patch.loc ) - })) + }) } diff --git a/src/cargo/core/resolver/dep_cache.rs b/src/cargo/core/resolver/dep_cache.rs index eb7a581d145..9acd59fd413 100644 --- a/src/cargo/core/resolver/dep_cache.rs +++ b/src/cargo/core/resolver/dep_cache.rs @@ -20,103 +20,60 @@ use crate::core::{ Dependency, FeatureValue, PackageId, PackageIdSpec, PackageIdSpecQuery, Registry, Summary, }; use crate::sources::source::QueryKind; +use crate::util::LocalPollAdapter; use crate::util::closest_msg; use crate::util::errors::CargoResult; use crate::util::interning::{INTERNED_DEFAULT, InternedString}; use anyhow::Context as _; +use std::cell::RefCell; use std::collections::{BTreeSet, HashMap, HashSet}; use std::fmt::Write; use std::rc::Rc; use std::task::Poll; use tracing::debug; -pub struct RegistryQueryer<'a, T: Registry> { - pub registry: &'a mut T, +pub struct RegistryQueryerAsync<'a, T: Registry> { + pub registry: &'a T, replacements: &'a [(PackageIdSpec, Dependency)], version_prefs: &'a VersionPreferences, - /// a cache of `Candidate`s that fulfil a `Dependency` (and whether `first_version`) - registry_cache: HashMap<(Dependency, Option), Poll>>>, - /// a cache of `Dependency`s that are required for a `Summary` - /// - /// HACK: `first_version` is not kept in the cache key is it is 1:1 with - /// `parent.is_none()` (the first element of the cache key) as it doesn't change through - /// execution. - summary_cache: HashMap< - (Option, Summary, ResolveOpts), - (Rc<(HashSet, Rc>)>, bool), - >, /// all the cases we ended up using a supplied replacement - used_replacements: HashMap, + used_replacements: RefCell>, } -impl<'a, T: Registry> RegistryQueryer<'a, T> { +impl<'a, T: Registry> RegistryQueryerAsync<'a, T> { pub fn new( - registry: &'a mut T, + registry: &'a T, replacements: &'a [(PackageIdSpec, Dependency)], version_prefs: &'a VersionPreferences, ) -> Self { - RegistryQueryer { + RegistryQueryerAsync { registry, replacements, version_prefs, - registry_cache: HashMap::new(), - summary_cache: HashMap::new(), - used_replacements: HashMap::new(), + used_replacements: RefCell::new(HashMap::new()), } } - pub fn reset_pending(&mut self) -> bool { - let mut all_ready = true; - self.registry_cache.retain(|_, r| { - if !r.is_ready() { - all_ready = false; - } - r.is_ready() - }); - self.summary_cache.retain(|_, (_, r)| { - if !*r { - all_ready = false; - } - *r - }); - all_ready - } - - pub fn used_replacement_for(&self, p: PackageId) -> Option<(PackageId, PackageId)> { - self.used_replacements.get(&p).map(|r| (p, r.package_id())) - } - - pub fn replacement_summary(&self, p: PackageId) -> Option<&Summary> { - self.used_replacements.get(&p) - } - /// Queries the `registry` to return a list of candidates for `dep`. /// /// This method is the location where overrides are taken into account. If /// any candidates are returned which match an override then the override is /// applied by performing a second query for what the override should /// return. - pub fn query( - &mut self, - dep: &Dependency, - first_version: Option, - ) -> Poll>>> { - let registry_cache_key = (dep.clone(), first_version); - if let Some(out) = self.registry_cache.get(®istry_cache_key).cloned() { - return out.map(Result::Ok); - } + async fn query( + &self, + key: &(Dependency, Option), + ) -> CargoResult>> { + let (dep, first_version) = key; + let mut summaries = Vec::new(); + self.registry + .query(dep, QueryKind::Exact, &mut |s| { + summaries.push(s.into_summary()); + }) + .await?; - let mut ret = Vec::new(); - let ready = self.registry.query(dep, QueryKind::Exact, &mut |s| { - ret.push(s.into_summary()); - })?; - if ready.is_pending() { - self.registry_cache - .insert((dep.clone(), first_version), Poll::Pending); - return Poll::Pending; - } - for summary in ret.iter() { + for summary in summaries.iter() { let mut potential_matches = self .replacements .iter() @@ -131,14 +88,11 @@ impl<'a, T: Registry> RegistryQueryer<'a, T> { dep.version_req() ); - let mut summaries = match self.registry.query_vec(dep, QueryKind::Exact)? { - Poll::Ready(s) => s.into_iter(), - Poll::Pending => { - self.registry_cache - .insert((dep.clone(), first_version), Poll::Pending); - return Poll::Pending; - } - }; + let mut summaries = self + .registry + .query_vec(dep, QueryKind::Exact) + .await? + .into_iter(); let s = summaries .next() .ok_or_else(|| { @@ -158,13 +112,13 @@ impl<'a, T: Registry> RegistryQueryer<'a, T> { .iter() .map(|s| format!(" * {}", s.package_id())) .collect::>(); - return Poll::Ready(Err(anyhow::anyhow!( + return Err(anyhow::anyhow!( "the replacement specification `{}` matched \ multiple packages:\n * {}\n{}", spec, s.package_id(), bullets.join("\n") - ))); + )); } assert_eq!( @@ -173,13 +127,13 @@ impl<'a, T: Registry> RegistryQueryer<'a, T> { "dependency should be hard coded to have the same name" ); if s.version() != summary.version() { - return Poll::Ready(Err(anyhow::anyhow!( + return Err(anyhow::anyhow!( "replacement specification `{}` matched {} and tried to override it with {}\n\ avoid matching unrelated packages by being more specific", spec, summary.version(), s.version(), - ))); + )); } let replace = if s.source_id() == summary.source_id() { @@ -192,30 +146,111 @@ impl<'a, T: Registry> RegistryQueryer<'a, T> { // Make sure no duplicates if let Some((spec, _)) = potential_matches.next() { - return Poll::Ready(Err(anyhow::anyhow!( + return Err(anyhow::anyhow!( "overlapping replacement specifications found:\n\n \ * {}\n * {}\n\nboth specifications match: {}", matched_spec, spec, summary.package_id() - ))); + )); } for dep in summary.dependencies() { debug!("\t{} => {}", dep.package_name(), dep.version_req()); } if let Some(r) = replace { - self.used_replacements.insert(summary.package_id(), r); + self.used_replacements + .borrow_mut() + .insert(summary.package_id(), r); } } - self.version_prefs.sort_summaries(&mut ret, first_version); + self.version_prefs + .sort_summaries(&mut summaries, *first_version); + Ok(Rc::new(summaries)) + } +} + +/// Wrapper around RegistryQueryerAsync that provides +/// caching and a Poll based interface using `LocalPollAdapter`. +pub struct RegistryQueryer<'a, T: Registry> { + inner: Rc>, + poller: LocalPollAdapter< + 'a, + Rc>, + (Dependency, Option), + CargoResult>>, + >, + + /// a cache of `Dependency`s that are required for a `Summary` + /// + /// HACK: `first_version` is not kept in the cache key is it is 1:1 with + /// `parent.is_none()` (the first element of the cache key) as it doesn't change through + /// execution. + summary_cache: HashMap< + (Option, Summary, ResolveOpts), + (Rc<(HashSet, Rc>)>, bool), + >, +} + +impl<'a, T: Registry> RegistryQueryer<'a, T> { + pub fn new( + registry: &'a T, + replacements: &'a [(PackageIdSpec, Dependency)], + version_prefs: &'a VersionPreferences, + ) -> Self { + let inner = Rc::new(RegistryQueryerAsync::new( + registry, + replacements, + version_prefs, + )); + Self { + inner: inner.clone(), + poller: LocalPollAdapter::new(inner), + summary_cache: HashMap::new(), + } + } + + pub fn registry(&self) -> &T { + self.inner.registry + } + + pub fn query( + &mut self, + dep: &Dependency, + first_version: Option, + ) -> Poll>>> { + self.poller + .poll(RegistryQueryerAsync::query, (dep.clone(), first_version)) + } + + pub fn wait(&mut self) -> CargoResult { + let pending = self.poller.pending_count(); + // Have all outstanding registry requests been completed? + let mut all_ready = self.poller.wait(); + debug!(target: "cargo::core::resolver::restarting", pending); - let out = Poll::Ready(Rc::new(ret)); + // Remove cached summaries that we produced with incomplete information. + self.summary_cache.retain(|_, (_, r)| { + if !*r { + all_ready = false; + } + *r + }); + + Ok(all_ready) + } - self.registry_cache.insert(registry_cache_key, out.clone()); + pub fn used_replacement_for(&self, p: PackageId) -> Option<(PackageId, PackageId)> { + self.inner + .used_replacements + .borrow() + .get(&p) + .map(|r| (p, r.package_id())) + } - out.map(Result::Ok) + pub fn replacement_summary(&self, p: PackageId) -> Option { + self.inner.used_replacements.borrow().get(&p).cloned() } /// Find out what dependencies will be added by activating `candidate`, diff --git a/src/cargo/core/resolver/errors.rs b/src/cargo/core/resolver/errors.rs index f1b3bb8b6c2..cab65502f38 100644 --- a/src/cargo/core/resolver/errors.rs +++ b/src/cargo/core/resolver/errors.rs @@ -1,6 +1,5 @@ use std::fmt; use std::fmt::Write as _; -use std::task::Poll; use crate::core::{Dependency, PackageId, Registry, Summary}; use crate::sources::IndexSummary; @@ -74,7 +73,7 @@ impl From<(PackageId, ConflictReason)> for ActivateError { pub(super) fn activation_error( resolver_ctx: &ResolverContext, - registry: &mut impl Registry, + registry: &impl Registry, parent: &Summary, dep: &Dependency, conflicting_activations: &ConflictMap, @@ -430,22 +429,13 @@ pub(super) fn activation_error( // Maybe the user mistyped the ver_req? Like `dep="2"` when `dep="0.2"` // was meant. So we re-query the registry with `dep="*"` so we can // list a few versions that were actually found. -fn alt_versions( - registry: &mut impl Registry, - dep: &Dependency, -) -> Option>> { +fn alt_versions(registry: &impl Registry, dep: &Dependency) -> Option>> { let mut wild_dep = dep.clone(); wild_dep.set_version_req(OptVersionReq::Any); - let candidates = loop { - match registry.query_vec(&wild_dep, QueryKind::Exact) { - Poll::Ready(Ok(candidates)) => break candidates, - Poll::Ready(Err(e)) => return Some(Err(e)), - Poll::Pending => match registry.block_until_ready() { - Ok(()) => continue, - Err(e) => return Some(Err(e)), - }, - } + let candidates = match crate::util::block_on(registry.query_vec(&wild_dep, QueryKind::Exact)) { + Ok(candidates) => candidates, + Err(e) => return Some(Err(e)), }; let mut candidates: Vec<_> = candidates.into_iter().map(|s| s.into_summary()).collect(); candidates.sort_unstable_by(|a, b| b.version().cmp(a.version())); @@ -458,19 +448,14 @@ fn alt_versions( /// Maybe something is wrong with the available versions fn rejected_versions( - registry: &mut impl Registry, + registry: &impl Registry, dep: &Dependency, ) -> Option>> { - let mut version_candidates = loop { - match registry.query_vec(&dep, QueryKind::RejectedVersions) { - Poll::Ready(Ok(candidates)) => break candidates, - Poll::Ready(Err(e)) => return Some(Err(e)), - Poll::Pending => match registry.block_until_ready() { - Ok(()) => continue, - Err(e) => return Some(Err(e)), - }, - } - }; + let mut version_candidates = + match crate::util::block_on(registry.query_vec(&dep, QueryKind::RejectedVersions)) { + Ok(candidates) => candidates, + Err(e) => return Some(Err(e)), + }; version_candidates.sort_unstable_by_key(|a| a.as_summary().version().clone()); if version_candidates.is_empty() { None @@ -482,22 +467,17 @@ fn rejected_versions( /// Maybe the user mistyped the name? Like `dep-thing` when `Dep_Thing` /// was meant. So we try asking the registry for a `fuzzy` search for suggestions. fn alt_names( - registry: &mut impl Registry, + registry: &impl Registry, dep: &Dependency, ) -> Option>> { let mut wild_dep = dep.clone(); wild_dep.set_version_req(OptVersionReq::Any); - let name_candidates = loop { - match registry.query_vec(&wild_dep, QueryKind::AlternativeNames) { - Poll::Ready(Ok(candidates)) => break candidates, - Poll::Ready(Err(e)) => return Some(Err(e)), - Poll::Pending => match registry.block_until_ready() { - Ok(()) => continue, - Err(e) => return Some(Err(e)), - }, - } - }; + let name_candidates = + match crate::util::block_on(registry.query_vec(&wild_dep, QueryKind::AlternativeNames)) { + Ok(candidates) => candidates, + Err(e) => return Some(Err(e)), + }; let mut name_candidates: Vec<_> = name_candidates .into_iter() .map(|s| s.into_summary()) diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 3d2be9fda4e..5539d46a9ea 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -122,7 +122,7 @@ mod version_prefs; pub fn resolve( summaries: &[(Summary, ResolveOpts)], replacements: &[(PackageIdSpec, Dependency)], - registry: &mut impl Registry, + registry: &impl Registry, version_prefs: &VersionPreferences, resolve_version: ResolveVersion, gctx: Option<&GlobalContext>, @@ -146,10 +146,8 @@ pub fn resolve( gctx, &mut past_conflicting_activations, )?; - if registry.reset_pending() { + if registry.wait()? { break resolver_ctx; - } else { - registry.registry.block_until_ready()?; } }; @@ -350,7 +348,7 @@ fn activate_deps_loop( debug!("no candidates found"); Err(errors::activation_error( &resolver_ctx, - registry.registry, + registry.registry(), &parent, &dep, &conflicting_activations, @@ -648,7 +646,7 @@ fn activate( // does. TBH it basically cause panics in the test suite if // `parent` is passed through here and `[replace]` is otherwise // on life support so it's not critical to fix bugs anyway per se. - if cx.flag_activated(replace, opts, None)? && activated { + if cx.flag_activated(&replace, opts, None)? && activated { return Ok(None); } trace!( diff --git a/src/cargo/ops/cargo_add/mod.rs b/src/cargo/ops/cargo_add/mod.rs index 929d704c677..90ca258b9d7 100644 --- a/src/cargo/ops/cargo_add/mod.rs +++ b/src/cargo/ops/cargo_add/mod.rs @@ -831,14 +831,8 @@ fn get_latest_dependency( unreachable!("registry dependencies required, found a workspace dependency"); } MaybeWorkspace::Other(query) => { - let possibilities = loop { - match registry.query_vec(&query, QueryKind::Normalized) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => registry.block_until_ready()?, - } - }; + let possibilities = + crate::util::block_on(registry.query_vec(&query, QueryKind::Normalized))?; let mut possibilities: Vec<_> = possibilities .into_iter() @@ -963,15 +957,8 @@ fn select_package( unreachable!("path or git dependency expected, found workspace dependency"); } MaybeWorkspace::Other(query) => { - let possibilities = loop { - // Exact to avoid returning all for path/git - match registry.query_vec(&query, QueryKind::Normalized) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => registry.block_until_ready()?, - } - }; + let possibilities = + crate::util::block_on(registry.query_vec(&query, QueryKind::Normalized))?; let possibilities: Vec<_> = possibilities .into_iter() @@ -1196,14 +1183,8 @@ fn populate_available_features( return Ok(dependency); } - let possibilities = loop { - match registry.query_vec(&query, QueryKind::Normalized) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => registry.block_until_ready()?, - } - }; + let possibilities = crate::util::block_on(registry.query_vec(&query, QueryKind::Normalized))?; + // Ensure widest feature flag compatibility by picking the earliest version that could show up // in the lock file for a given version requirement. let lowest_common_denominator = possibilities diff --git a/src/cargo/ops/cargo_package/mod.rs b/src/cargo/ops/cargo_package/mod.rs index 5d60e557dc7..d427f1fabd1 100644 --- a/src/cargo/ops/cargo_package/mod.rs +++ b/src/cargo/ops/cargo_package/mod.rs @@ -5,7 +5,6 @@ use std::fs::File; use std::io::SeekFrom; use std::io::prelude::*; use std::path::{Path, PathBuf}; -use std::task::Poll; use crate::core::PackageIdSpecQuery; use crate::core::Workspace; @@ -37,6 +36,8 @@ use cargo_util_schemas::messages; use cargo_util_terminal::report::Level; use cargo_util_terminal::{Shell, Verbosity}; use flate2::{Compression, GzBuilder}; +use futures::TryStreamExt; +use futures::stream::FuturesUnordered; use tar::{Builder, EntryType, Header, HeaderMode}; use tracing::debug; use unicase::Ascii as UncasedAscii; @@ -468,7 +469,7 @@ fn prepare_archive( opts: &PackageOpts<'_>, ) -> CargoResult> { let gctx = ws.gctx(); - let mut src = PathSource::new(pkg.root(), pkg.package_id().source_id(), gctx); + let src = PathSource::new(pkg.root(), pkg.package_id().source_id(), gctx); src.load()?; if opts.check_metadata { @@ -1035,38 +1036,30 @@ pub fn check_yanked( // maybe updating files, so be sure to lock it here. let _lock = gctx.acquire_package_cache_lock(CacheLockMode::DownloadExclusive)?; - let mut sources = pkg_set.sources_mut(); - let mut pending: Vec = resolve.iter().collect(); - let mut results = Vec::new(); - for (_id, source) in sources.sources_mut() { + for (_id, source) in pkg_set.sources().iter() { source.invalidate_cache(); } - while !pending.is_empty() { - pending.retain(|pkg_id| { - if let Some(source) = sources.get_mut(pkg_id.source_id()) { - match source.is_yanked(*pkg_id) { - Poll::Ready(result) => results.push((*pkg_id, result)), - Poll::Pending => return true, - } - } - false - }); - for (_id, source) in sources.sources_mut() { - source.block_until_ready()?; - } - } - for (pkg_id, is_yanked) in results { - if is_yanked? { - gctx.shell().warn(format!( - "package `{}` in Cargo.lock is yanked in registry `{}`, {}", - pkg_id, - pkg_id.source_id().display_registry_name(), - hint - ))?; - } - } - Ok(()) + let mut futures = resolve + .iter() + .map(|pkg_id| async move { + if let Some(source) = pkg_set.sources().get(pkg_id.source_id()) + && source.is_yanked(pkg_id).await? + { + gctx.shell().warn(format!( + "package `{}` in Cargo.lock is yanked in registry `{}`, {}", + pkg_id, + pkg_id.source_id().display_registry_name(), + hint + ))?; + } + CargoResult::Ok(()) + }) + .collect::>(); + crate::util::block_on(async { + while futures.try_next().await?.is_some() {} + CargoResult::Ok(()) + }) } // It can often be the case that files of a particular name on one platform diff --git a/src/cargo/ops/cargo_update.rs b/src/cargo/ops/cargo_update.rs index 704a7ea78be..0c47dcb3fdf 100644 --- a/src/cargo/ops/cargo_update.rs +++ b/src/cargo/ops/cargo_update.rs @@ -370,16 +370,7 @@ fn upgrade_dependency( let query = crate::core::dependency::Dependency::parse(name, None, dependency.source_id().clone())?; - let possibilities = { - loop { - match registry.query_vec(&query, QueryKind::Exact) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => registry.block_until_ready()?, - } - } - }; + let possibilities = crate::util::block_on(registry.query_vec(&query, QueryKind::Exact))?; let latest = if !possibilities.is_empty() { possibilities @@ -576,14 +567,7 @@ fn print_lockfile_generation( match change.kind { PackageChangeKind::Added => { let possibilities = if let Some(query) = change.alternatives_query() { - loop { - match registry.query_vec(&query, QueryKind::Exact) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => registry.block_until_ready()?, - } - } + crate::util::block_on(registry.query_vec(&query, QueryKind::Exact))? } else { vec![] }; @@ -639,14 +623,7 @@ fn print_lockfile_sync( | PackageChangeKind::Upgraded | PackageChangeKind::Downgraded => { let possibilities = if let Some(query) = change.alternatives_query() { - loop { - match registry.query_vec(&query, QueryKind::Exact) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => registry.block_until_ready()?, - } - } + crate::util::block_on(registry.query_vec(&query, QueryKind::Exact))? } else { vec![] }; @@ -688,14 +665,7 @@ fn print_lockfile_updates( let mut unchanged_behind = 0; for change in changes.values() { let possibilities = if let Some(query) = change.alternatives_query() { - loop { - match registry.query_vec(&query, QueryKind::Exact) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => registry.block_until_ready()?, - } - } + crate::util::block_on(registry.query_vec(&query, QueryKind::Exact))? } else { vec![] }; diff --git a/src/cargo/ops/common_for_install_and_uninstall.rs b/src/cargo/ops/common_for_install_and_uninstall.rs index 75246bf3f0a..1a16b5c1f83 100644 --- a/src/cargo/ops/common_for_install_and_uninstall.rs +++ b/src/cargo/ops/common_for_install_and_uninstall.rs @@ -4,7 +4,6 @@ use std::io::SeekFrom; use std::io::prelude::*; use std::path::{Path, PathBuf}; use std::rc::Rc; -use std::task::Poll; use anyhow::{Context as _, bail, format_err}; use cargo_util::paths; @@ -18,8 +17,7 @@ use crate::core::{Dependency, FeatureValue, Package, PackageId, SourceId}; use crate::core::{PackageSet, Target}; use crate::ops::{self, CompileFilter, CompileOptions}; use crate::sources::PathSource; -use crate::sources::source::Source; -use crate::sources::source::{QueryKind, SourceMap}; +use crate::sources::source::{QueryKind, Source, SourceMap}; use crate::util::GlobalContext; use crate::util::cache_lock::CacheLockMode; use crate::util::context::{ConfigRelativePath, Definition}; @@ -610,12 +608,7 @@ pub fn select_dep_pkg( source.invalidate_cache(); } - let deps = loop { - match source.query_vec(&dep, QueryKind::Exact)? { - Poll::Ready(deps) => break deps, - Poll::Pending => source.block_until_ready()?, - } - }; + let deps = crate::util::block_on(source.query_vec(&dep, QueryKind::Exact))?; match deps .iter() .map(|s| s.as_summary()) @@ -630,12 +623,8 @@ pub fn select_dep_pkg( // Match any version, not just the selected let msrv_dep = Dependency::parse(dep.package_name(), None, dep.source_id())?; - let msrv_deps = loop { - match source.query_vec(&msrv_dep, QueryKind::Exact)? { - Poll::Ready(deps) => break deps, - Poll::Pending => source.block_until_ready()?, - } - }; + let msrv_deps = + crate::util::block_on(source.query_vec(&msrv_dep, QueryKind::Exact))?; if let Some(alt) = msrv_deps .iter() .map(|s| s.as_summary()) @@ -683,13 +672,7 @@ cannot install package `{name} {ver}`, it requires rustc {msrv} or newer, while PackageId::try_new(dep.package_name(), &version[1..], source.source_id()) { source.invalidate_cache(); - loop { - match source.is_yanked(pkg_id) { - Poll::Ready(Ok(is_yanked)) => break is_yanked, - Poll::Ready(Err(_)) => break false, - Poll::Pending => source.block_until_ready()?, - } - } + crate::util::block_on(source.is_yanked(pkg_id)).unwrap_or_default() } else { false } diff --git a/src/cargo/ops/registry/info/mod.rs b/src/cargo/ops/registry/info/mod.rs index 2dfec7b7b8e..3963a918f44 100644 --- a/src/cargo/ops/registry/info/mod.rs +++ b/src/cargo/ops/registry/info/mod.rs @@ -219,15 +219,8 @@ fn query_summaries( ) -> CargoResult<(Vec, Option)> { // Query without version requirement to get all index summaries. let dep = Dependency::parse(spec.name(), None, source_ids.original)?; - let results = loop { - // Use normalized crate name lookup for user-provided package names. - match registry.query_vec(&dep, QueryKind::Normalized) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => registry.block_until_ready()?, - } - }; + // Use normalized crate name lookup for user-provided package names. + let results = crate::util::block_on(registry.query_vec(&dep, QueryKind::Normalized))?; let normalized_name = results.first().map(|s| s.package_id().name().to_string()); diff --git a/src/cargo/ops/registry/mod.rs b/src/cargo/ops/registry/mod.rs index 30e6ded68db..da4cd2edc3a 100644 --- a/src/cargo/ops/registry/mod.rs +++ b/src/cargo/ops/registry/mod.rs @@ -12,7 +12,6 @@ mod yank; use std::collections::HashSet; use std::str; -use std::task::Poll; use anyhow::{Context as _, bail, format_err}; use cargo_credential::{Operation, Secret}; @@ -136,22 +135,16 @@ fn registry<'gctx>( auth::cache_token_from_commandline(gctx, &source_ids.original, token); } - let mut src = RegistrySource::remote(source_ids.replacement, &HashSet::new(), gctx)?; + let src = RegistrySource::remote(source_ids.replacement, &HashSet::new(), gctx)?; let cfg = { let _lock = gctx.acquire_package_cache_lock(CacheLockMode::DownloadExclusive)?; // Only update the index if `force_update` is set. if force_update { src.invalidate_cache() } - let cfg = loop { - match src.config()? { - Poll::Pending => src - .block_until_ready() - .with_context(|| format!("failed to update {}", source_ids.replacement))?, - Poll::Ready(cfg) => break cfg, - } - }; - cfg.expect("remote registries must have config") + crate::util::block_on(src.config()) + .with_context(|| format!("failed to update {}", source_ids.replacement))? + .expect("remote registries must have config") }; let api_host = cfg .api diff --git a/src/cargo/ops/registry/publish.rs b/src/cargo/ops/registry/publish.rs index 2c520861e66..f90744341e5 100644 --- a/src/cargo/ops/registry/publish.rs +++ b/src/cargo/ops/registry/publish.rs @@ -439,19 +439,12 @@ fn wait_for_any_publish_confirmation( fn poll_one_package( registry_src: SourceId, pkg_id: &PackageId, - source: &mut dyn Source, + source: &dyn Source, ) -> CargoResult { let version_req = format!("={}", pkg_id.version()); let query = Dependency::parse(pkg_id.name(), Some(&version_req), registry_src)?; - let summaries = loop { - // Exact to avoid returning all for path/git - match source.query_vec(&query, QueryKind::Exact) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => source.block_until_ready()?, - } - }; + // Exact to avoid returning all for path/git + let summaries = crate::util::block_on(source.query_vec(&query, QueryKind::Exact))?; Ok(!summaries.is_empty()) } @@ -467,14 +460,7 @@ fn verify_unpublished( Some(&pkg.version().to_exact_req().to_string()), source_ids.replacement, )?; - let duplicate_query = loop { - match source.query_vec(&query, QueryKind::Exact) { - std::task::Poll::Ready(res) => { - break res?; - } - std::task::Poll::Pending => source.block_until_ready()?, - } - }; + let duplicate_query = crate::util::block_on(source.query_vec(&query, QueryKind::Exact))?; if !duplicate_query.is_empty() { // Move the registry error earlier in the publish process. // Since dry-run wouldn't talk to the registry to get the error, we downgrade it to a diff --git a/src/cargo/ops/resolve.rs b/src/cargo/ops/resolve.rs index f94d8f5afe1..722cefe5713 100644 --- a/src/cargo/ops/resolve.rs +++ b/src/cargo/ops/resolve.rs @@ -548,7 +548,7 @@ pub fn add_overrides<'a>( for (path, definition) in paths { let id = SourceId::for_path(&path)?; - let mut source = RecursivePathSource::new(&path, id, ws.gctx()); + let source = RecursivePathSource::new(&path, id, ws.gctx()); source.load().with_context(|| { format!( "failed to update path override `{}` \ diff --git a/src/cargo/sources/directory.rs b/src/cargo/sources/directory.rs index a692a019161..c1f0e6cf5d6 100644 --- a/src/cargo/sources/directory.rs +++ b/src/cargo/sources/directory.rs @@ -1,7 +1,7 @@ +use std::cell::{Cell, RefCell}; use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; use std::path::{Path, PathBuf}; -use std::task::Poll; use crate::core::{Dependency, Package, PackageId, SourceId}; use crate::sources::IndexSummary; @@ -60,9 +60,9 @@ pub struct DirectorySource<'gctx> { /// The root path of this source. root: PathBuf, /// Packages that this sources has discovered. - packages: HashMap, + packages: RefCell>, gctx: &'gctx GlobalContext, - updated: bool, + updated: Cell, } /// The checksum file to ensure the integrity of a package in a directory source. @@ -84,57 +84,16 @@ impl<'gctx> DirectorySource<'gctx> { source_id: id, root: path.to_path_buf(), gctx, - packages: HashMap::new(), - updated: false, + packages: RefCell::new(HashMap::new()), + updated: Cell::new(false), } } -} -impl<'gctx> Debug for DirectorySource<'gctx> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "DirectorySource {{ root: {:?} }}", self.root) - } -} - -impl<'gctx> Source for DirectorySource<'gctx> { - fn query( - &mut self, - dep: &Dependency, - kind: QueryKind, - f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { - if !self.updated { - return Poll::Pending; - } - let packages = self.packages.values().map(|p| &p.0); - let matches = packages.filter(|pkg| match kind { - QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(pkg.summary()), - QueryKind::AlternativeNames => true, - QueryKind::Normalized => dep.matches(pkg.summary()), - }); - for summary in matches.map(|pkg| pkg.summary().clone()) { - f(IndexSummary::Candidate(summary)); - } - Poll::Ready(Ok(())) - } - - fn supports_checksums(&self) -> bool { - true - } - - fn requires_precise(&self) -> bool { - true - } - - fn source_id(&self) -> SourceId { - self.source_id - } - - fn block_until_ready(&mut self) -> CargoResult<()> { - if self.updated { + fn update(&self) -> CargoResult<()> { + if self.updated.get() { return Ok(()); } - self.packages.clear(); + self.packages.borrow_mut().clear(); let entries = self.root.read_dir().with_context(|| { format!( "failed to read root of directory source: {}", @@ -201,15 +160,61 @@ impl<'gctx> Source for DirectorySource<'gctx> { .summary_mut() .set_checksum(package.clone()); } - self.packages.insert(pkg.package_id(), (pkg, cksum)); + self.packages + .borrow_mut() + .insert(pkg.package_id(), (pkg, cksum)); } - self.updated = true; + self.updated.set(true); Ok(()) } +} + +impl<'gctx> Debug for DirectorySource<'gctx> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "DirectorySource {{ root: {:?} }}", self.root) + } +} + +#[async_trait::async_trait(?Send)] +impl<'gctx> Source for DirectorySource<'gctx> { + async fn query( + &self, + dep: &Dependency, + kind: QueryKind, + f: &mut dyn FnMut(IndexSummary), + ) -> CargoResult<()> { + if !self.updated.get() { + self.update()?; + } + let packages = self.packages.borrow(); + let packages = packages.values().map(|p| &p.0); + let matches = packages.filter(|pkg| match kind { + QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(pkg.summary()), + QueryKind::AlternativeNames => true, + QueryKind::Normalized => dep.matches(pkg.summary()), + }); + for summary in matches.map(|pkg| pkg.summary().clone()) { + f(IndexSummary::Candidate(summary)); + } + Ok(()) + } + + fn supports_checksums(&self) -> bool { + true + } + + fn requires_precise(&self) -> bool { + true + } + + fn source_id(&self) -> SourceId { + self.source_id + } - fn download(&mut self, id: PackageId) -> CargoResult { + fn download(&self, id: PackageId) -> CargoResult { self.packages + .borrow() .get(&id) .map(|p| &p.0) .cloned() @@ -217,7 +222,7 @@ impl<'gctx> Source for DirectorySource<'gctx> { .ok_or_else(|| anyhow::format_err!("failed to find package with id: {}", id)) } - fn finish_download(&mut self, _id: PackageId, _data: Vec) -> CargoResult { + fn finish_download(&self, _id: PackageId, _data: Vec) -> CargoResult { panic!("no downloads to do") } @@ -226,7 +231,8 @@ impl<'gctx> Source for DirectorySource<'gctx> { } fn verify(&self, id: PackageId) -> CargoResult<()> { - let Some((pkg, cksum)) = self.packages.get(&id) else { + let packages = self.packages.borrow_mut(); + let Some((pkg, cksum)) = packages.get(&id) else { anyhow::bail!("failed to find entry for `{}` in directory source", id); }; @@ -261,13 +267,13 @@ impl<'gctx> Source for DirectorySource<'gctx> { format!("directory source `{}`", self.root.display()) } - fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {} + fn add_to_yanked_whitelist(&self, _pkgs: &[PackageId]) {} - fn is_yanked(&mut self, _pkg: PackageId) -> Poll> { - Poll::Ready(Ok(false)) + async fn is_yanked(&self, _pkg: PackageId) -> CargoResult { + Ok(false) } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { // Directory source has no local cache. } diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index fce8d6ac724..e691f22cd6a 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -19,8 +19,8 @@ use crate::util::hex::short_hash; use crate::util::interning::InternedString; use anyhow::Context as _; use cargo_util::paths::exclude_from_backups_and_indexing; +use std::cell::RefCell; use std::fmt::{self, Debug, Formatter}; -use std::task::Poll; use tracing::trace; use url::Url; @@ -73,22 +73,22 @@ pub struct GitSource<'gctx> { /// The revision which a git source is locked to. /// /// Expected to always be [`Revision::Locked`] after the Git repository is fetched. - locked_rev: Revision, + locked_rev: RefCell, /// The unique identifier of this source. - source_id: SourceId, + source_id: RefCell, /// The underlying path source to discover packages inside the Git repository. /// /// This gets set to `Some` after the git repo has been checked out - /// (automatically handled via [`GitSource::block_until_ready`]). - path_source: Option>, + /// (automatically handled via [`GitSource::update`]). + path_source: RefCell>>, /// A short string that uniquely identifies the version of the checkout. /// /// This is typically a 7-character string of the OID hash, automatically /// increasing in size if it is ambiguous. /// /// This is set to `Some` after the git repo has been checked out - /// (automatically handled via [`GitSource::block_until_ready`]). - short_id: Option, + /// (automatically handled via [`GitSource::update`]). + short_id: RefCell>, /// The identifier of this source for Cargo's Git cache directory. /// See [`ident`] for more. ident: InternedString, @@ -138,10 +138,10 @@ impl<'gctx> GitSource<'gctx> { let source = GitSource { remote, - locked_rev, - source_id, - path_source: None, - short_id: None, + locked_rev: RefCell::new(locked_rev), + source_id: RefCell::new(source_id), + path_source: RefCell::new(None), + short_id: RefCell::new(None), ident: ident.into(), gctx, quiet: false, @@ -151,19 +151,23 @@ impl<'gctx> GitSource<'gctx> { } /// Gets the remote repository URL. - pub fn url(&self) -> &Url { - self.source_id.url() + pub fn url(&self) -> Url { + self.source_id.borrow().url().clone() } /// Returns the packages discovered by this source. It may fetch the Git /// repository as well as walk the filesystem if package information /// haven't yet updated. pub fn read_packages(&mut self) -> CargoResult> { - if self.path_source.is_none() { + if self.path_source.borrow().is_none() { self.invalidate_cache(); - self.block_until_ready()?; + self.update()?; } - self.path_source.as_mut().unwrap().read_packages() + self.path_source + .borrow_mut() + .as_mut() + .unwrap() + .read_packages() } fn mark_used(&self) -> CargoResult<()> { @@ -171,7 +175,7 @@ impl<'gctx> GitSource<'gctx> { .deferred_global_last_use()? .mark_git_checkout_used(global_cache_tracker::GitCheckout { encoded_git_name: self.ident, - short_name: self.short_id.expect("update before download"), + short_name: self.short_id.borrow().expect("update before download"), size: None, }); Ok(()) @@ -188,7 +192,7 @@ impl<'gctx> GitSource<'gctx> { let db = self.remote.db_at(&db_path).ok(); - let (db, actual_rev) = match (&self.locked_rev, db) { + let (db, actual_rev) = match (&*self.locked_rev.borrow(), db) { // If we have a locked revision, and we have a preexisting database // which has that revision, then no update needs to happen. (Revision::Locked(oid), Some(db)) if db.contains(*oid) => (db, *oid), @@ -236,13 +240,70 @@ impl<'gctx> GitSource<'gctx> { trace!("updating git source `{:?}`", self.remote); let locked_rev = locked_rev.clone().into(); - let manifest_reference = self.source_id.git_reference().unwrap(); + let manifest_reference = self.source_id.borrow().git_reference().unwrap(); self.remote .checkout(&db_path, db, manifest_reference, &locked_rev, self.gctx)? } }; Ok((db, actual_rev)) } + + fn update(&self) -> CargoResult<()> { + if self.path_source.borrow().is_some() { + self.mark_used()?; + return Ok(()); + } + + let git_fs = self.gctx.git_path(); + // Ignore errors creating it, in case this is a read-only filesystem: + // perhaps the later operations can succeed anyhow. + let _ = git_fs.create_dir(); + let git_path = self + .gctx + .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &git_fs); + + // Before getting a checkout, make sure that `/git` is + // marked as excluded from indexing and backups. Older versions of Cargo + // didn't do this, so we do it here regardless of whether `` + // exists. + // + // This does not use `create_dir_all_excluded_from_backups_atomic` for + // the same reason: we want to exclude it even if the directory already + // exists. + exclude_from_backups_and_indexing(&git_path); + + let (db, actual_rev) = self.fetch_db(false)?; + + // Don’t use the full hash, in order to contribute less to reaching the + // path length limit on Windows. See + // . + let short_id = db.to_short_id(actual_rev)?; + + // Check out `actual_rev` from the database to a scoped location on the + // filesystem. This will use hard links and such to ideally make the + // checkout operation here pretty fast. + let checkout_path = self + .gctx + .git_checkouts_path() + .join(&self.ident) + .join(short_id.as_str()); + let checkout_path = checkout_path.into_path_unlocked(); + db.copy_to(actual_rev, &checkout_path, self.gctx, self.quiet)?; + + let source_id = self + .source_id + .borrow() + .with_git_precise(Some(actual_rev.to_string())); + let path_source = RecursivePathSource::new(&checkout_path, source_id, self.gctx); + + self.path_source.replace(Some(path_source)); + self.short_id.replace(Some(short_id.as_str().into())); + self.locked_rev.replace(Revision::Locked(actual_rev)); + self.path_source.borrow().as_ref().unwrap().load()?; + + self.mark_used()?; + Ok(()) + } } /// Indicates a [Git revision] that might be locked or deferred to be resolved. @@ -313,8 +374,8 @@ fn ident_shallow(id: &SourceId, is_shallow: bool) -> String { impl<'gctx> Debug for GitSource<'gctx> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "git repo at {}", self.source_id.url())?; - match &self.locked_rev { + write!(f, "git repo at {}", self.source_id.borrow().url())?; + match &*self.locked_rev.borrow() { Revision::Deferred(git_ref) => match git_ref.pretty_ref(true) { Some(s) => write!(f, " ({})", s), None => Ok(()), @@ -324,18 +385,20 @@ impl<'gctx> Debug for GitSource<'gctx> { } } +#[async_trait::async_trait(?Send)] impl<'gctx> Source for GitSource<'gctx> { - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { - if let Some(src) = self.path_source.as_mut() { - src.query(dep, kind, f) - } else { - Poll::Pending + ) -> CargoResult<()> { + if self.path_source.borrow().is_none() { + self.update()?; } + let src = self.path_source.borrow(); + let src = src.as_ref().unwrap(); + src.query(dep, kind, f).await } fn supports_checksums(&self) -> bool { @@ -347,99 +410,44 @@ impl<'gctx> Source for GitSource<'gctx> { } fn source_id(&self) -> SourceId { - self.source_id - } - - fn block_until_ready(&mut self) -> CargoResult<()> { - if self.path_source.is_some() { - self.mark_used()?; - return Ok(()); - } - - let git_fs = self.gctx.git_path(); - // Ignore errors creating it, in case this is a read-only filesystem: - // perhaps the later operations can succeed anyhow. - let _ = git_fs.create_dir(); - let git_path = self - .gctx - .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &git_fs); - - // Before getting a checkout, make sure that `/git` is - // marked as excluded from indexing and backups. Older versions of Cargo - // didn't do this, so we do it here regardless of whether `` - // exists. - // - // This does not use `create_dir_all_excluded_from_backups_atomic` for - // the same reason: we want to exclude it even if the directory already - // exists. - exclude_from_backups_and_indexing(&git_path); - - let (db, actual_rev) = self.fetch_db(false)?; - - // Don’t use the full hash, in order to contribute less to reaching the - // path length limit on Windows. See - // . - let short_id = db.to_short_id(actual_rev)?; - - // Check out `actual_rev` from the database to a scoped location on the - // filesystem. This will use hard links and such to ideally make the - // checkout operation here pretty fast. - let checkout_path = self - .gctx - .git_checkouts_path() - .join(&self.ident) - .join(short_id.as_str()); - let checkout_path = checkout_path.into_path_unlocked(); - db.copy_to(actual_rev, &checkout_path, self.gctx, self.quiet)?; - - let source_id = self - .source_id - .with_git_precise(Some(actual_rev.to_string())); - let path_source = RecursivePathSource::new(&checkout_path, source_id, self.gctx); - - self.path_source = Some(path_source); - self.short_id = Some(short_id.as_str().into()); - self.locked_rev = Revision::Locked(actual_rev); - self.path_source.as_mut().unwrap().load()?; - - self.mark_used()?; - Ok(()) + *self.source_id.borrow() } - fn download(&mut self, id: PackageId) -> CargoResult { + fn download(&self, id: PackageId) -> CargoResult { trace!( "getting packages for package ID `{}` from `{:?}`", id, self.remote ); self.mark_used()?; self.path_source + .borrow_mut() .as_mut() .expect("BUG: `update()` must be called before `get()`") .download(id) } - fn finish_download(&mut self, _id: PackageId, _data: Vec) -> CargoResult { + fn finish_download(&self, _id: PackageId, _data: Vec) -> CargoResult { panic!("no download should have started") } fn fingerprint(&self, _pkg: &Package) -> CargoResult { - match &self.locked_rev { + match &*self.locked_rev.borrow() { Revision::Locked(oid) => Ok(oid.to_string()), _ => unreachable!("locked_rev must be resolved when computing fingerprint"), } } fn describe(&self) -> String { - format!("Git repository {}", self.source_id) + format!("Git repository {}", self.source_id.borrow()) } - fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {} + fn add_to_yanked_whitelist(&self, _pkgs: &[PackageId]) {} - fn is_yanked(&mut self, _pkg: PackageId) -> Poll> { - Poll::Ready(Ok(false)) + async fn is_yanked(&self, _pkg: PackageId) -> CargoResult { + Ok(false) } - fn invalidate_cache(&mut self) {} + fn invalidate_cache(&self) {} fn set_quiet(&mut self, quiet: bool) { self.quiet = quiet; diff --git a/src/cargo/sources/overlay.rs b/src/cargo/sources/overlay.rs index 8031d9151ea..de31c2625eb 100644 --- a/src/cargo/sources/overlay.rs +++ b/src/cargo/sources/overlay.rs @@ -1,5 +1,3 @@ -use std::task::ready; - use tracing::debug; use crate::sources::IndexSummary; @@ -32,6 +30,7 @@ impl<'gctx> DependencyConfusionThreatOverlaySource<'gctx> { } } +#[async_trait::async_trait(?Send)] impl<'gctx> Source for DependencyConfusionThreatOverlaySource<'gctx> { fn source_id(&self) -> crate::core::SourceId { self.remote.source_id() @@ -45,12 +44,12 @@ impl<'gctx> Source for DependencyConfusionThreatOverlaySource<'gctx> { self.local.requires_precise() || self.remote.requires_precise() } - fn query( - &mut self, + async fn query( + &self, dep: &crate::core::Dependency, kind: super::source::QueryKind, - f: &mut dyn FnMut(super::IndexSummary), - ) -> std::task::Poll> { + f: &mut dyn FnMut(IndexSummary), + ) -> crate::CargoResult<()> { let local_source = self.local.source_id(); let remote_source = self.remote.source_id(); @@ -61,7 +60,9 @@ impl<'gctx> Source for DependencyConfusionThreatOverlaySource<'gctx> { local_packages.insert(index.as_summary().clone()); f(index) }; - ready!(self.local.query(&local_dep, kind, &mut local_callback))?; + self.local + .query(&local_dep, kind, &mut local_callback) + .await?; let mut remote_callback = |index: IndexSummary| { if local_packages.contains(index.as_summary()) { @@ -70,12 +71,12 @@ impl<'gctx> Source for DependencyConfusionThreatOverlaySource<'gctx> { f(index) } }; - ready!(self.remote.query(dep, kind, &mut remote_callback))?; + self.remote.query(dep, kind, &mut remote_callback).await?; - std::task::Poll::Ready(Ok(())) + Ok(()) } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { self.local.invalidate_cache(); self.remote.invalidate_cache(); } @@ -86,7 +87,7 @@ impl<'gctx> Source for DependencyConfusionThreatOverlaySource<'gctx> { } fn download( - &mut self, + &self, package: crate::core::PackageId, ) -> crate::CargoResult { let local_source = self.local.source_id(); @@ -104,7 +105,7 @@ impl<'gctx> Source for DependencyConfusionThreatOverlaySource<'gctx> { } fn finish_download( - &mut self, + &self, pkg_id: crate::core::PackageId, contents: Vec, ) -> crate::CargoResult { @@ -121,20 +122,12 @@ impl<'gctx> Source for DependencyConfusionThreatOverlaySource<'gctx> { self.remote.describe() } - fn add_to_yanked_whitelist(&mut self, pkgs: &[crate::core::PackageId]) { + fn add_to_yanked_whitelist(&self, pkgs: &[crate::core::PackageId]) { self.local.add_to_yanked_whitelist(pkgs); self.remote.add_to_yanked_whitelist(pkgs); } - fn is_yanked( - &mut self, - pkg: crate::core::PackageId, - ) -> std::task::Poll> { - self.remote.is_yanked(pkg) - } - - fn block_until_ready(&mut self) -> crate::CargoResult<()> { - self.local.block_until_ready()?; - self.remote.block_until_ready() + async fn is_yanked(&self, pkg: crate::core::PackageId) -> crate::CargoResult { + self.remote.is_yanked(pkg).await } } diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index b6777f2c4d8..a2565fd3a5a 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -1,9 +1,9 @@ +use std::cell::{Cell, RefCell}; use std::collections::{HashMap, HashSet}; use std::fmt::{self, Debug, Formatter}; use std::fs; use std::io; use std::path::{Path, PathBuf}; -use std::task::Poll; use crate::core::{Dependency, EitherManifest, Manifest, Package, PackageId, SourceId}; use crate::ops; @@ -37,7 +37,7 @@ pub struct PathSource<'gctx> { /// The root path of this source. path: PathBuf, /// Packages that this sources has discovered. - package: Option, + package: RefCell>, gctx: &'gctx GlobalContext, } @@ -50,7 +50,7 @@ impl<'gctx> PathSource<'gctx> { Self { source_id, path: path.to_path_buf(), - package: None, + package: RefCell::new(None), gctx, } } @@ -63,7 +63,7 @@ impl<'gctx> PathSource<'gctx> { Self { source_id, path, - package: Some(pkg), + package: RefCell::new(Some(pkg)), gctx, } } @@ -74,7 +74,7 @@ impl<'gctx> PathSource<'gctx> { self.load()?; - match &self.package { + match &*self.package.borrow() { Some(pkg) => Ok(pkg.clone()), None => Err(internal(format!( "no package found in source {:?}", @@ -100,7 +100,7 @@ impl<'gctx> PathSource<'gctx> { /// Gets the last modified file in a package. fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> { - if self.package.is_none() { + if self.package.borrow().is_none() { return Err(internal(format!( "BUG: source `{:?}` was not loaded", self.path @@ -115,9 +115,10 @@ impl<'gctx> PathSource<'gctx> { } /// Discovers packages inside this source if it hasn't yet done. - pub fn load(&mut self) -> CargoResult<()> { - if self.package.is_none() { - self.package = Some(self.read_package()?); + pub fn load(&self) -> CargoResult<()> { + let mut package = self.package.borrow_mut(); + if package.is_none() { + *package = Some(self.read_package()?); } Ok(()) @@ -136,15 +137,16 @@ impl<'gctx> Debug for PathSource<'gctx> { } } +#[async_trait::async_trait(?Send)] impl<'gctx> Source for PathSource<'gctx> { - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { + ) -> CargoResult<()> { self.load()?; - if let Some(s) = self.package.as_ref().map(|p| p.summary()) { + if let Some(s) = self.package.borrow().as_ref().map(|p| p.summary()) { let matched = match kind { QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(s), QueryKind::AlternativeNames => true, @@ -154,7 +156,7 @@ impl<'gctx> Source for PathSource<'gctx> { f(IndexSummary::Candidate(s.clone())) } } - Poll::Ready(Ok(())) + Ok(()) } fn supports_checksums(&self) -> bool { @@ -169,16 +171,17 @@ impl<'gctx> Source for PathSource<'gctx> { self.source_id } - fn download(&mut self, id: PackageId) -> CargoResult { + fn download(&self, id: PackageId) -> CargoResult { trace!("getting packages; id={}", id); self.load()?; - let pkg = self.package.iter().find(|pkg| pkg.package_id() == id); + let pkg = self.package.borrow(); + let pkg = pkg.iter().find(|pkg| pkg.package_id() == id); pkg.cloned() .map(MaybePackage::Ready) .ok_or_else(|| internal(format!("failed to find {} in path source", id))) } - fn finish_download(&mut self, _id: PackageId, _data: Vec) -> CargoResult { + fn finish_download(&self, _id: PackageId, _data: Vec) -> CargoResult { panic!("no download should have started") } @@ -198,17 +201,13 @@ impl<'gctx> Source for PathSource<'gctx> { } } - fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {} - - fn is_yanked(&mut self, _pkg: PackageId) -> Poll> { - Poll::Ready(Ok(false)) - } + fn add_to_yanked_whitelist(&self, _pkgs: &[PackageId]) {} - fn block_until_ready(&mut self) -> CargoResult<()> { - self.load() + async fn is_yanked(&self, _pkg: PackageId) -> CargoResult { + Ok(false) } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { // Path source has no local cache. } @@ -225,13 +224,13 @@ pub struct RecursivePathSource<'gctx> { /// The root path of this source. path: PathBuf, /// Whether this source has loaded all package information it may contain. - loaded: bool, + loaded: Cell, /// Packages that this sources has discovered. /// /// Tracking all packages for a given ID to warn on-demand for unused packages - packages: HashMap>, + packages: RefCell>>, /// Avoid redundant unused package warnings - warned_duplicate: HashSet, + warned_duplicate: RefCell>, gctx: &'gctx GlobalContext, } @@ -248,7 +247,7 @@ impl<'gctx> RecursivePathSource<'gctx> { Self { source_id, path: root.to_path_buf(), - loaded: false, + loaded: Cell::new(false), packages: Default::default(), warned_duplicate: Default::default(), gctx, @@ -261,9 +260,16 @@ impl<'gctx> RecursivePathSource<'gctx> { self.load()?; Ok(self .packages + .borrow() .iter() .map(|(pkg_id, v)| { - first_package(*pkg_id, v, &mut self.warned_duplicate, self.gctx).clone() + first_package( + *pkg_id, + v, + &mut self.warned_duplicate.borrow_mut(), + self.gctx, + ) + .clone() }) .collect()) } @@ -284,7 +290,7 @@ impl<'gctx> RecursivePathSource<'gctx> { /// Gets the last modified file in a package. fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> { - if !self.loaded { + if !self.loaded.get() { return Err(internal(format!( "BUG: source `{:?}` was not loaded", self.path @@ -299,10 +305,11 @@ impl<'gctx> RecursivePathSource<'gctx> { } /// Discovers packages inside this source if it hasn't yet done. - pub fn load(&mut self) -> CargoResult<()> { - if !self.loaded { - self.packages = read_packages(&self.path, self.source_id, self.gctx)?; - self.loaded = true; + pub fn load(&self) -> CargoResult<()> { + if !self.loaded.get() { + self.packages + .replace(read_packages(&self.path, self.source_id, self.gctx)?); + self.loaded.set(true); } Ok(()) @@ -315,20 +322,27 @@ impl<'gctx> Debug for RecursivePathSource<'gctx> { } } +#[async_trait::async_trait(?Send)] impl<'gctx> Source for RecursivePathSource<'gctx> { - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { + ) -> CargoResult<()> { self.load()?; for s in self .packages + .borrow() .iter() .filter(|(pkg_id, _)| pkg_id.name() == dep.package_name()) .map(|(pkg_id, pkgs)| { - first_package(*pkg_id, pkgs, &mut self.warned_duplicate, self.gctx) + first_package( + *pkg_id, + pkgs, + &mut self.warned_duplicate.borrow_mut(), + self.gctx, + ) }) .map(|p| p.summary()) { @@ -341,7 +355,7 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { f(IndexSummary::Candidate(s.clone())) } } - Poll::Ready(Ok(())) + Ok(()) } fn supports_checksums(&self) -> bool { @@ -356,16 +370,19 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { self.source_id } - fn download(&mut self, id: PackageId) -> CargoResult { + fn download(&self, id: PackageId) -> CargoResult { trace!("getting packages; id={}", id); self.load()?; - let pkg = self.packages.get(&id); - pkg.map(|pkgs| first_package(id, pkgs, &mut self.warned_duplicate, self.gctx).clone()) - .map(MaybePackage::Ready) - .ok_or_else(|| internal(format!("failed to find {} in path source", id))) + let pkgs = self.packages.borrow(); + let pkg = pkgs.get(&id); + pkg.map(|pkgs| { + first_package(id, pkgs, &mut self.warned_duplicate.borrow_mut(), self.gctx).clone() + }) + .map(MaybePackage::Ready) + .ok_or_else(|| internal(format!("failed to find {} in path source", id))) } - fn finish_download(&mut self, _id: PackageId, _data: Vec) -> CargoResult { + fn finish_download(&self, _id: PackageId, _data: Vec) -> CargoResult { panic!("no download should have started") } @@ -385,17 +402,13 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { } } - fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {} - - fn is_yanked(&mut self, _pkg: PackageId) -> Poll> { - Poll::Ready(Ok(false)) - } + fn add_to_yanked_whitelist(&self, _pkgs: &[PackageId]) {} - fn block_until_ready(&mut self) -> CargoResult<()> { - self.load() + async fn is_yanked(&self, _pkg: PackageId) -> CargoResult { + Ok(false) } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { // Path source has no local cache. } diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 7f2b4da4c23..c8b1ed2a3a3 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -1,39 +1,49 @@ //! Access to a HTTP-based crate registry. See [`HttpRegistry`] for details. +use crate::core::PackageId; +use crate::core::SourceId; use crate::core::global_cache_tracker; -use crate::core::{PackageId, SourceId}; +use crate::sources::registry::LoadResponse; use crate::sources::registry::MaybeLock; +use crate::sources::registry::RegistryConfig; +use crate::sources::registry::RegistryData; use crate::sources::registry::download; -use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData}; +use crate::util::Filesystem; +use crate::util::GlobalContext; +use crate::util::IntoUrl; +use crate::util::Progress; +use crate::util::ProgressStyle; +use crate::util::auth; use crate::util::cache_lock::CacheLockMode; -use crate::util::errors::{CargoResult, HttpNotSuccessful}; +use crate::util::errors::CargoResult; +use crate::util::errors::HttpNotSuccessful; use crate::util::interning::InternedString; -use crate::util::network::http::http_handle; -use crate::util::network::retry::{Retry, RetryResult}; -use crate::util::network::sleep::SleepTracker; -use crate::util::{Filesystem, GlobalContext, IntoUrl, Progress, ProgressStyle, auth}; +use crate::util::network::http_async::ResponsePartsExtensions; +use crate::util::network::retry::Retry; +use crate::util::network::retry::RetryResult; use anyhow::Context as _; use cargo_credential::Operation; use cargo_util::paths; -use curl::easy::{Easy, List}; -use curl::multi::{EasyHandle, Multi}; +use futures::lock::Mutex; +use http::HeaderName; +use http::HeaderValue; +use std::cell::Cell; use std::cell::RefCell; -use std::collections::{HashMap, HashSet}; -use std::fs::{self, File}; +use std::collections::HashSet; +use std::fs; +use std::fs::File; use std::io::ErrorKind; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::str; -use std::task::{Poll, ready}; use std::time::Duration; -use tracing::{debug, trace}; +use tracing::debug; +use tracing::trace; +use tracing::warn; use url::Url; // HTTP headers const ETAG: &'static str = "etag"; const LAST_MODIFIED: &'static str = "last-modified"; -const WWW_AUTHENTICATE: &'static str = "www-authenticate"; -const IF_NONE_MATCH: &'static str = "if-none-match"; -const IF_MODIFIED_SINCE: &'static str = "if-modified-since"; const UNKNOWN: &'static str = "Unknown"; @@ -57,128 +67,12 @@ pub struct HttpRegistry<'gctx> { /// The name of this source, a unique string (across all sources) used as /// the directory name where its cached content is stored. name: InternedString, - /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`). - /// - /// To be fair, `HttpRegistry` doesn't store the registry index it - /// downloads on the file system, but other cached data like registry - /// configuration could be stored here. - index_path: Filesystem, - /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/cache/$REG-HASH`). - cache_path: Filesystem, - /// The unique identifier of this registry source. - source_id: SourceId, - gctx: &'gctx GlobalContext, - - /// Store the server URL without the protocol prefix (sparse+) - url: Url, - - /// HTTP multi-handle for asynchronous/parallel requests. - multi: Multi, - - /// Has the client requested a cache update? - /// - /// Only if they have do we double-check the freshness of each locally-stored index file. - requested_update: bool, - - /// State for currently pending index downloads. - downloads: Downloads<'gctx>, - - /// Does the config say that we can use HTTP multiplexing? - multiplexing: bool, - - /// What paths have we already fetched since the last index update? - /// - /// We do not need to double-check any of these index files since we have already done so. - fresh: HashSet, - - /// Have we started to download any index files? - fetch_started: bool, /// Cached registry configuration. - registry_config: Option, - - /// Should we include the authorization header? - auth_required: bool, - - /// Url to get a token for the registry. - login_url: Option, - - /// Headers received with an HTTP 401. - auth_error_headers: Vec, - - /// Disables status messages. - quiet: bool, -} + registry_config: Mutex>, -/// State for currently pending index file downloads. -struct Downloads<'gctx> { - /// When a download is started, it is added to this map. The key is a - /// "token" (see [`Download::token`]). It is removed once the download is - /// finished. - pending: HashMap, EasyHandle)>, - /// Set of paths currently being downloaded. - /// This should stay in sync with the `pending` field. - pending_paths: HashSet, - /// Downloads that have failed and are waiting to retry again later. - sleeping: SleepTracker<(Download<'gctx>, Easy)>, - /// The final result of each download. - results: HashMap>, - /// The next ID to use for creating a token (see [`Download::token`]). - next: usize, - /// Progress bar. - progress: RefCell>>, - /// Number of downloads that have successfully finished. - downloads_finished: usize, - /// Number of times the caller has requested blocking. This is used for - /// an estimate of progress. - blocking_calls: usize, -} - -/// Represents a single index file download, including its progress and retry. -struct Download<'gctx> { - /// The token for this download, used as the key of the - /// [`Downloads::pending`] map and stored in [`EasyHandle`] as well. - token: usize, - - /// The path of the package that we're downloading. - path: PathBuf, - - /// Actual downloaded data, updated throughout the lifetime of this download. - data: RefCell>, - - /// HTTP headers. - header_map: RefCell, - - /// Logic used to track retrying this download if it's a spurious failure. - retry: Retry<'gctx>, -} - -/// HTTPS headers [`HttpRegistry`] cares about. -#[derive(Default)] -struct Headers { - last_modified: Option, - etag: Option, - www_authenticate: Vec, - /// All headers, including explicit headers above. - all: Vec, -} - -/// HTTP status code [`HttpRegistry`] cares about. -enum StatusCode { - Success, - NotModified, - NotFound, - Unauthorized, -} - -/// Represents a complete [`Download`] from an HTTP request. -/// -/// Usually it is constructed in [`HttpRegistry::handle_completed_downloads`], -/// and then returns to the caller of [`HttpRegistry::load()`]. -struct CompletedDownload { - response_code: StatusCode, - data: Vec, - header_map: Headers, + /// Backend used for making network requests. + inner: HttpBackend<'gctx>, } impl<'gctx> HttpRegistry<'gctx> { @@ -191,216 +85,105 @@ impl<'gctx> HttpRegistry<'gctx> { gctx: &'gctx GlobalContext, name: &str, ) -> CargoResult> { - let url = source_id.url().as_str(); - // Ensure the url ends with a slash so we can concatenate paths. - if !url.ends_with('/') { - anyhow::bail!("sparse registry url must end in a slash `/`: {url}") - } - assert!(source_id.is_sparse()); - let url = url - .strip_prefix("sparse+") - .expect("sparse registry needs sparse+ prefix") - .into_url() - .expect("a url with the sparse+ stripped should still be valid"); - Ok(HttpRegistry { name: name.into(), - index_path: gctx.registry_index_path().join(name), - cache_path: gctx.registry_cache_path().join(name), - source_id, - gctx, - url, - multi: Multi::new(), - multiplexing: false, - downloads: Downloads { - next: 0, - pending: HashMap::new(), - pending_paths: HashSet::new(), - sleeping: SleepTracker::new(), - results: HashMap::new(), - progress: RefCell::new(Some(Progress::with_style( - "Fetch", - ProgressStyle::Indeterminate, - gctx, - ))), - downloads_finished: 0, - blocking_calls: 0, - }, - fresh: HashSet::new(), - requested_update: false, - fetch_started: false, - registry_config: None, - auth_required: false, - login_url: None, - auth_error_headers: vec![], - quiet: false, + registry_config: Mutex::new(None), + inner: HttpBackend::new(source_id, gctx, name)?, }) } - /// Splits HTTP `HEADER: VALUE` to a tuple. - fn handle_http_header(buf: &[u8]) -> Option<(&str, &str)> { - if buf.is_empty() { - return None; - } - let buf = std::str::from_utf8(buf).ok()?.trim_end(); - // Don't let server sneak extra lines anywhere. - if buf.contains('\n') { - return None; - } - let (tag, value) = buf.split_once(':')?; - let value = value.trim(); - Some((tag, value)) + fn inner(&self) -> &HttpBackend<'gctx> { + &self.inner } - /// Setup the necessary works before the first fetch gets started. - /// - /// This is a no-op if called more than one time. - fn start_fetch(&mut self) -> CargoResult<()> { - if self.fetch_started { - // We only need to run the setup code once. - return Ok(()); - } - self.fetch_started = true; - - // We've enabled the `http2` feature of `curl` in Cargo, so treat - // failures here as fatal as it would indicate a build-time problem. - self.multiplexing = self.gctx.http_config()?.multiplexing.unwrap_or(true); - - self.multi - .pipelining(false, self.multiplexing) - .context("failed to enable multiplexing/pipelining in curl")?; - - // let's not flood the server with connections - self.multi.set_max_host_connections(2)?; + /// Get the registry configuration from either cache or remote. + async fn config(&self) -> CargoResult { + let Some(config) = self.config_opt().await? else { + return Err(anyhow::anyhow!("config.json not found")); + }; + Ok(config) + } - if !self.quiet { - self.gctx - .shell() - .status("Updating", self.source_id.display_index())?; + /// Get the registry configuration from either cache or remote. + /// Returns None if the config is not available. + async fn config_opt(&self) -> CargoResult> { + let mut config = self.registry_config.lock().await; + if let Some(config) = &*config + && self.inner().is_fresh(RegistryConfig::NAME) + { + Ok(Some(config.clone())) + } else { + let result = self.config_opt_inner().await?; + *config = result.clone(); + Ok(result) } - - Ok(()) } - /// Checks the results inside the [`HttpRegistry::multi`] handle, and - /// updates relevant state in [`HttpRegistry::downloads`] accordingly. - fn handle_completed_downloads(&mut self) -> CargoResult<()> { - assert_eq!( - self.downloads.pending.len(), - self.downloads.pending_paths.len() - ); - - // Collect the results from the Multi handle. - let results = { - let mut results = Vec::new(); - let pending = &mut self.downloads.pending; - self.multi.messages(|msg| { - let token = msg.token().expect("failed to read token"); - let (_, handle) = &pending[&token]; - if let Some(result) = msg.result_for(handle) { - results.push((token, result)); - }; - }); - results - }; - for (token, result) in results { - let (mut download, handle) = self.downloads.pending.remove(&token).unwrap(); - let was_present = self.downloads.pending_paths.remove(&download.path); - assert!( - was_present, - "expected pending_paths to contain {:?}", - download.path - ); - let mut handle = self.multi.remove(handle)?; - let data = download.data.take(); - let url = self.full_url(&download.path); - let result = match download.retry.r#try(|| { - result.with_context(|| format!("failed to download from `{}`", url))?; - let code = handle.response_code()?; - // Keep this list of expected status codes in sync with the codes handled in `load` - let code = match code { - 200 => StatusCode::Success, - 304 => StatusCode::NotModified, - 401 => StatusCode::Unauthorized, - 404 | 410 | 451 => StatusCode::NotFound, - _ => { - return Err(HttpNotSuccessful::new_from_handle( - &mut handle, - &url, - data, - download.header_map.take().all, - ) - .into()); - } - }; - Ok((data, code)) - }) { - RetryResult::Success((data, code)) => Ok(CompletedDownload { - response_code: code, - data, - header_map: download.header_map.take(), - }), - RetryResult::Err(e) => Err(e), - RetryResult::Retry(sleep) => { - debug!(target: "network", "download retry {:?} for {sleep}ms", download.path); - self.downloads.sleeping.push(sleep, (download, handle)); - continue; - } - }; - - self.downloads.results.insert(download.path, result); - self.downloads.downloads_finished += 1; + async fn config_opt_inner(&self) -> CargoResult> { + debug!("loading config"); + let index_path = self.assert_index_locked(&self.inner().index_cache_path); + let config_json_path = index_path.join(RegistryConfig::NAME); + if self.inner().is_fresh(RegistryConfig::NAME) + && let Some(config) = self.config_from_filesystem() + { + return Ok(Some(config.clone())); } - self.downloads.tick()?; - - Ok(()) - } + // Check if there's a cached config that says auth is required. + // This allows avoiding the initial unauthenticated request to probe. + if let Some(c) = self.config_from_filesystem() { + self.inner().auth_required.update(|v| v || c.auth_required); + } - /// Constructs the full URL to download a index file. - fn full_url(&self, path: &Path) -> String { - // self.url always ends with a slash. - format!("{}{}", self.url, path.display()) - } + let response = self + .inner() + .fetch_uncached(RegistryConfig::NAME, None) + .await; + let response = match response { + Err(e) + if !self.inner().auth_required.get() + && e.downcast_ref::() + .map(|e| e.code == 401) + .unwrap_or_default() => + { + self.inner().auth_required.set(true); + debug!(target: "network", "re-attempting request for config.json with authorization included."); + self.inner() + .fetch_uncached(RegistryConfig::NAME, None) + .await + } + resp => resp, + }?; - /// Check if an index file of `path` is up-to-date. - /// - /// The `path` argument is the same as in [`RegistryData::load`]. - fn is_fresh(&self, path: &Path) -> bool { - if !self.requested_update { - trace!( - "using local {} as user did not request update", - path.display() - ); - true - } else if self.gctx.cli_unstable().no_index_update { - trace!("using local {} in no_index_update mode", path.display()); - true - } else if !self.gctx.network_allowed() { - trace!("using local {} in offline mode", path.display()); - true - } else if self.fresh.contains(path) { - trace!("using local {} as it was already fetched", path.display()); - true - } else { - debug!("checking freshness of {}", path.display()); - false + match response { + LoadResponse::Data { + raw_data, + index_version: _, + } => { + trace!("config loaded"); + let config = Some(serde_json::from_slice(&raw_data)?); + if paths::create_dir_all(&config_json_path.parent().unwrap()).is_ok() { + if let Err(e) = fs::write(&config_json_path, &raw_data) { + tracing::debug!("failed to write config.json cache: {}", e); + } + } + Ok(config) + } + LoadResponse::NotFound => Ok(None), + LoadResponse::CacheValid => Err(crate::util::internal( + "config.json is never stored in the index cache", + )), } } - /// Get the cached registry configuration, if it exists. - fn config_cached(&mut self) -> CargoResult> { - if self.registry_config.is_some() { - return Ok(self.registry_config.as_ref()); - } + /// Get the cached registry configuration from the filesystem, if it exists. + fn config_from_filesystem(&self) -> Option { let config_json_path = self - .assert_index_locked(&self.index_path) + .assert_index_locked(&self.inner().index_cache_path) .join(RegistryConfig::NAME); match fs::read(&config_json_path) { Ok(raw_data) => match serde_json::from_slice(&raw_data) { - Ok(json) => { - self.registry_config = Some(json); - } + Ok(json) => return Some(json), Err(e) => tracing::debug!("failed to decode cached config.json: {}", e), }, Err(e) => { @@ -409,58 +192,53 @@ impl<'gctx> HttpRegistry<'gctx> { } } } - Ok(self.registry_config.as_ref()) + None } - /// Get the registry configuration from either cache or remote. - fn config(&mut self) -> Poll> { - debug!("loading config"); - let index_path = self.assert_index_locked(&self.index_path); - let config_json_path = index_path.join(RegistryConfig::NAME); - if self.is_fresh(Path::new(RegistryConfig::NAME)) && self.config_cached()?.is_some() { - return Poll::Ready(Ok(self.registry_config.as_ref().unwrap())); - } - - match ready!(self.load(Path::new(""), Path::new(RegistryConfig::NAME), None)?) { - LoadResponse::Data { - raw_data, - index_version: _, - } => { - trace!("config loaded"); - self.registry_config = Some(serde_json::from_slice(&raw_data)?); - if paths::create_dir_all(&config_json_path.parent().unwrap()).is_ok() { - if let Err(e) = fs::write(&config_json_path, &raw_data) { - tracing::debug!("failed to write config.json cache: {}", e); - } - } - Poll::Ready(Ok(self.registry_config.as_ref().unwrap())) - } - LoadResponse::NotFound => { - Poll::Ready(Err(anyhow::anyhow!("config.json not found in registry"))) + async fn sparse_fetch( + &self, + path: &str, + index_version: Option<&str>, + ) -> CargoResult { + if let Some(index_version) = index_version { + trace!("local cache of {path} is available at version `{index_version}`",); + if self.inner().is_fresh(&path) { + return Ok(LoadResponse::CacheValid); } - LoadResponse::CacheValid => Poll::Ready(Err(crate::util::internal( - "config.json is never stored in the index cache", - ))), + } else if self.inner().fresh.borrow().contains(path) { + // We have no cached copy of this file, and we already downloaded it. + debug!("cache did not contain previously downloaded file {path}",); + return Ok(LoadResponse::NotFound); } - } - /// Moves failed [`Download`]s that are ready to retry to the pending queue. - fn add_sleepers(&mut self) -> CargoResult<()> { - for (dl, handle) in self.downloads.sleeping.to_retry() { - let mut handle = self.multi.add(handle)?; - handle.set_token(dl.token)?; - let is_new = self.downloads.pending_paths.insert(dl.path.to_path_buf()); - assert!(is_new, "path queued for download more than once"); - let previous = self.downloads.pending.insert(dl.token, (dl, handle)); - assert!(previous.is_none(), "dl token queued more than once"); - } - Ok(()) + // If we have a cached copy of the file, include IF_NONE_MATCH or IF_MODIFIED_SINCE header. + let index_version = + index_version + .and_then(|v| v.split_once(':')) + .and_then(|(key, value)| match key { + ETAG => Some(( + http::header::IF_NONE_MATCH, + HeaderValue::from_str(value.trim()).ok()?, + )), + LAST_MODIFIED => Some(( + http::header::IF_MODIFIED_SINCE, + HeaderValue::from_str(value.trim()).ok()?, + )), + _ => { + debug!("unexpected index version: {}", index_version.unwrap()); + None + } + }); + let index_version = index_version.as_ref().map(|(k, v)| (k, v)); + self.inner().fetch_uncached(&path, index_version).await } } +#[async_trait::async_trait(?Send)] impl<'gctx> RegistryData for HttpRegistry<'gctx> { fn prepare(&self) -> CargoResult<()> { - self.gctx + self.inner() + .gctx .deferred_global_last_use()? .mark_registry_index_used(global_cache_tracker::RegistryIndex { encoded_registry_name: self.name, @@ -469,302 +247,66 @@ impl<'gctx> RegistryData for HttpRegistry<'gctx> { } fn index_path(&self) -> &Filesystem { - &self.index_path + &self.inner().index_cache_path } fn cache_path(&self) -> &Filesystem { - &self.cache_path + &self.inner().crate_cache_path } fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path { - self.gctx + self.inner() + .gctx .assert_package_cache_locked(CacheLockMode::DownloadExclusive, path) } fn is_updated(&self) -> bool { - self.requested_update + self.inner().requested_update.get() } - fn load( - &mut self, + async fn load( + &self, _root: &Path, path: &Path, index_version: Option<&str>, - ) -> Poll> { - trace!("load: {}", path.display()); - if let Some(_token) = self.downloads.pending_paths.get(path) { - debug!("dependency is still pending: {}", path.display()); - return Poll::Pending; - } - - if let Some(index_version) = index_version { - trace!( - "local cache of {} is available at version `{}`", - path.display(), - index_version - ); - if self.is_fresh(path) { - return Poll::Ready(Ok(LoadResponse::CacheValid)); - } - } else if self.fresh.contains(path) { - // We have no cached copy of this file, and we already downloaded it. - debug!( - "cache did not contain previously downloaded file {}", - path.display() - ); - return Poll::Ready(Ok(LoadResponse::NotFound)); - } - - if !self.gctx.network_allowed() || self.gctx.cli_unstable().no_index_update { - // Return NotFound in offline mode when the file doesn't exist in the cache. - // If this results in resolution failure, the resolver will suggest - // removing the --offline flag. - return Poll::Ready(Ok(LoadResponse::NotFound)); - } - - if let Some(result) = self.downloads.results.remove(path) { - let result = - result.with_context(|| format!("download of {} failed", path.display()))?; - - let is_new = self.fresh.insert(path.to_path_buf()); - assert!( - is_new, - "downloaded the index file `{}` twice", - path.display() - ); - - // The status handled here need to be kept in sync with the codes handled - // in `handle_completed_downloads` - match result.response_code { - StatusCode::Success => { - let response_index_version = if let Some(etag) = result.header_map.etag { - format!("{}: {}", ETAG, etag) - } else if let Some(lm) = result.header_map.last_modified { - format!("{}: {}", LAST_MODIFIED, lm) - } else { - UNKNOWN.to_string() - }; - trace!("index file version: {}", response_index_version); - return Poll::Ready(Ok(LoadResponse::Data { - raw_data: result.data, - index_version: Some(response_index_version), - })); - } - StatusCode::NotModified => { - // Not Modified: the data in the cache is still the latest. - if index_version.is_none() { - return Poll::Ready(Err(anyhow::anyhow!( - "server said not modified (HTTP 304) when no local cache exists" - ))); - } - return Poll::Ready(Ok(LoadResponse::CacheValid)); - } - StatusCode::NotFound => { - // The crate was not found or deleted from the registry. - return Poll::Ready(Ok(LoadResponse::NotFound)); - } - StatusCode::Unauthorized - if !self.auth_required && path == Path::new(RegistryConfig::NAME) => - { - debug!(target: "network", "re-attempting request for config.json with authorization included."); - self.fresh.remove(path); - self.auth_required = true; - - // Look for a `www-authenticate` header with the `Cargo` scheme. - for header in &result.header_map.www_authenticate { - for challenge in http_auth::ChallengeParser::new(header) { - match challenge { - Ok(challenge) if challenge.scheme.eq_ignore_ascii_case("Cargo") => { - // Look for the `login_url` parameter. - for (param, value) in challenge.params { - if param.eq_ignore_ascii_case("login_url") { - self.login_url = Some(value.to_unescaped().into_url()?); - } - } - } - Ok(challenge) => { - debug!(target: "network", "ignoring non-Cargo challenge: {}", challenge.scheme) - } - Err(e) => { - debug!(target: "network", "failed to parse challenge: {}", e) - } - } - } - } - self.auth_error_headers = result.header_map.all; - } - StatusCode::Unauthorized => { - let err = Err(HttpNotSuccessful { - code: 401, - body: result.data, - url: self.full_url(path), - ip: None, - headers: result.header_map.all, - } - .into()); - if self.auth_required { - let auth_error = auth::AuthorizationError::new( - self.gctx, - self.source_id, - self.login_url.clone(), - auth::AuthorizationErrorReason::TokenRejected, - )?; - return Poll::Ready(err.context(auth_error)); - } else { - return Poll::Ready(err); - } - } - } - } - - if path != Path::new(RegistryConfig::NAME) { - self.auth_required = ready!(self.config()?).auth_required; - } else if !self.auth_required { - // Check if there's a cached config that says auth is required. - // This allows avoiding the initial unauthenticated request to probe. - if let Some(config) = self.config_cached()? { - self.auth_required = config.auth_required; - } - } - - // Looks like we're going to have to do a network request. - self.start_fetch()?; - - let mut handle = http_handle(self.gctx)?; - let full_url = self.full_url(path); - debug!(target: "network", "fetch {}", full_url); - handle.get(true)?; - handle.url(&full_url)?; - handle.follow_location(true)?; - - // Enable HTTP/2 if possible. - crate::try_old_curl_http2_pipewait!(self.multiplexing, handle); - - let mut headers = List::new(); - // Include a header to identify the protocol. This allows the server to - // know that Cargo is attempting to use the sparse protocol. - headers.append("cargo-protocol: version=1")?; - headers.append("accept: text/plain")?; - - // If we have a cached copy of the file, include IF_NONE_MATCH or IF_MODIFIED_SINCE header. - if let Some(index_version) = index_version { - if let Some((key, value)) = index_version.split_once(':') { - match key { - ETAG => headers.append(&format!("{}: {}", IF_NONE_MATCH, value.trim()))?, - LAST_MODIFIED => { - headers.append(&format!("{}: {}", IF_MODIFIED_SINCE, value.trim()))? - } - _ => debug!("unexpected index version: {}", index_version), - } - } - } - if self.auth_required { - let authorization = auth::auth_token( - self.gctx, - &self.source_id, - self.login_url.as_ref(), - Operation::Read, - self.auth_error_headers.clone(), - true, - )?; - headers.append(&format!("Authorization: {}", authorization))?; - trace!(target: "network", "including authorization for {}", full_url); - } - handle.http_headers(headers)?; - - // We're going to have a bunch of downloads all happening "at the same time". - // So, we need some way to track what headers/data/responses are for which request. - // We do that through this token. Each request (and associated response) gets one. - let token = self.downloads.next; - self.downloads.next += 1; - debug!(target: "network", "downloading {} as {}", path.display(), token); - let is_new = self.downloads.pending_paths.insert(path.to_path_buf()); - assert!(is_new, "path queued for download more than once"); - - // Each write should go to self.downloads.pending[&token].data. - // Since the write function must be 'static, we access downloads through a thread-local. - // That thread-local is set up in `block_until_ready` when it calls self.multi.perform, - // which is what ultimately calls this method. - handle.write_function(move |buf| { - trace!(target: "network", "{} - {} bytes of data", token, buf.len()); - tls::with(|downloads| { - if let Some(downloads) = downloads { - downloads.pending[&token] - .0 - .data - .borrow_mut() - .extend_from_slice(buf); - } - }); - Ok(buf.len()) - })?; - - // And ditto for the header function. - handle.header_function(move |buf| { - if let Some((tag, value)) = Self::handle_http_header(buf) { - tls::with(|downloads| { - if let Some(downloads) = downloads { - let mut header_map = downloads.pending[&token].0.header_map.borrow_mut(); - header_map.all.push(format!("{tag}: {value}")); - match tag.to_ascii_lowercase().as_str() { - LAST_MODIFIED => header_map.last_modified = Some(value.to_string()), - ETAG => header_map.etag = Some(value.to_string()), - WWW_AUTHENTICATE => header_map.www_authenticate.push(value.to_string()), - _ => {} - } - } - }); - } - - true - })?; - - let dl = Download { - token, - path: path.to_path_buf(), - data: RefCell::new(Vec::new()), - header_map: Default::default(), - retry: Retry::new(self.gctx)?, + ) -> CargoResult { + // Ensure the config is loaded. + let Some(config) = self.config_opt().await? else { + return Ok(LoadResponse::NotFound); }; - - // Finally add the request we've lined up to the pool of requests that cURL manages. - let mut handle = self.multi.add(handle)?; - handle.set_token(token)?; - self.downloads.pending.insert(dl.token, (dl, handle)); - - Poll::Pending + self.inner() + .auth_required + .update(|v| v || config.auth_required); + + let path = path + .to_str() + .ok_or_else(|| anyhow::anyhow!("non UTF8 path: {}", path.display()))?; + self.sparse_fetch(path, index_version).await } - fn config(&mut self) -> Poll>> { - let cfg = ready!(self.config()?).clone(); - Poll::Ready(Ok(Some(cfg))) + async fn config(&self) -> CargoResult> { + Ok(Some(self.config().await?)) } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { // Actually updating the index is more or less a no-op for this implementation. // All it does is ensure that a subsequent load will double-check files with the // server rather than rely on a locally cached copy of the index files. debug!("invalidated index cache"); - self.fresh.clear(); - self.requested_update = true; + self.inner().fresh.borrow_mut().clear(); + self.inner().requested_update.set(true); } fn set_quiet(&mut self, quiet: bool) { - self.quiet = quiet; - self.downloads.progress.replace(None); + self.inner().quiet.set(quiet); + self.inner().progress.replace(None); } - fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult { - let registry_config = loop { - match self.config()? { - Poll::Pending => self.block_until_ready()?, - Poll::Ready(cfg) => break cfg.to_owned(), - } - }; - + fn download(&self, pkg: PackageId, checksum: &str) -> CargoResult { + let registry_config = crate::util::block_on(self.config())?; download::download( - &self.cache_path, - &self.gctx, + &self.inner().crate_cache_path, + &self.inner().gctx, self.name.clone(), pkg, checksum, @@ -772,15 +314,10 @@ impl<'gctx> RegistryData for HttpRegistry<'gctx> { ) } - fn finish_download( - &mut self, - pkg: PackageId, - checksum: &str, - data: &[u8], - ) -> CargoResult { + fn finish_download(&self, pkg: PackageId, checksum: &str, data: &[u8]) -> CargoResult { download::finish_download( - &self.cache_path, - &self.gctx, + &self.inner().crate_cache_path, + &self.inner().gctx, self.name.clone(), pkg, checksum, @@ -789,116 +326,331 @@ impl<'gctx> RegistryData for HttpRegistry<'gctx> { } fn is_crate_downloaded(&self, pkg: PackageId) -> bool { - download::is_crate_downloaded(&self.cache_path, &self.gctx, pkg) + download::is_crate_downloaded(&self.inner().crate_cache_path, &self.inner().gctx, pkg) } +} - fn block_until_ready(&mut self) -> CargoResult<()> { - trace!(target: "network::HttpRegistry::block_until_ready", - "{} transfers pending", - self.downloads.pending.len() - ); - self.downloads.blocking_calls += 1; - - loop { - let remaining_in_multi = tls::set(&self.downloads, || { - self.multi - .perform() - .context("failed to perform http requests") - })?; - trace!(target: "network", "{} transfers remaining", remaining_in_multi); - // Handles transfers performed by `self.multi` above and adds to - // `self.downloads.results`. Failed transfers get added to - // `self.downloads.sleeping` for retry. - self.handle_completed_downloads()?; - if remaining_in_multi + self.downloads.sleeping.len() as u32 == 0 { - return Ok(()); - } - // Handles failed transfers in `self.downloads.sleeping` and - // re-adds them to `self.multi`. - self.add_sleepers()?; - - if self.downloads.pending.is_empty() { - let delay = self.downloads.sleeping.time_to_next().unwrap(); - debug!(target: "network", "sleeping main thread for {delay:?}"); - std::thread::sleep(delay); - } else { - // We have no more replies to provide the caller with, - // so we need to wait until cURL has something new for us. - let timeout = self - .multi - .get_timeout()? - .unwrap_or_else(|| Duration::new(1, 0)); - self.multi - .wait(&mut [], timeout) - .context("failed to wait on curl `Multi`")?; - } +struct HttpBackend<'gctx> { + /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`). + index_cache_path: Filesystem, + + /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/cache/$REG-HASH`). + crate_cache_path: Filesystem, + + /// The unique identifier of this registry source. + source_id: SourceId, + gctx: &'gctx GlobalContext, + + /// Store the server URL without the protocol prefix (sparse+) + url: Url, + + /// Has the client requested a cache update? + /// + /// Only if they have do we double-check the freshness of each locally-stored index file. + requested_update: Cell, + + /// Progress bar for transfers. + progress: RefCell>>, + + /// Number of in-flight requests. + pending: Cell, + + /// What paths have we already fetched since the last index update? + /// + /// We do not need to double-check any of these index files since we have already done so. + fresh: RefCell>, + + /// Have we started to download any index files? + fetch_started: Cell, + + /// Should we include the authorization header? + auth_required: Cell, + + /// Url to get a token for the registry. + login_url: RefCell>, + + /// Headers received with an HTTP 401. + auth_error_headers: RefCell>, + + /// Disables status messages. + quiet: Cell, +} + +impl<'gctx> HttpBackend<'gctx> { + pub fn new( + source_id: SourceId, + gctx: &'gctx GlobalContext, + name: &str, + ) -> CargoResult> { + let url = source_id.url().as_str(); + // Ensure the url ends with a slash so we can concatenate paths. + if !url.ends_with('/') { + anyhow::bail!("sparse registry url must end in a slash `/`: {url}") } + assert!(source_id.is_sparse()); + let url = url + .strip_prefix("sparse+") + .expect("sparse registry needs sparse+ prefix") + .into_url() + .expect("a url with the sparse+ stripped should still be valid"); + + let index_cache_path = gctx.registry_index_path().join(name); + Ok(HttpBackend { + index_cache_path: index_cache_path.clone(), + crate_cache_path: gctx.registry_cache_path().join(name), + source_id, + gctx, + url, + progress: RefCell::new(Some(Progress::with_style( + "Fetch", + ProgressStyle::Indeterminate, + gctx, + ))), + fresh: RefCell::new(HashSet::new()), + requested_update: Cell::new(false), + fetch_started: Cell::new(false), + auth_required: Cell::new(false), + login_url: RefCell::new(None), + auth_error_headers: RefCell::new(vec![]), + quiet: Cell::new(false), + pending: Cell::new(0), + }) } -} -impl<'gctx> Downloads<'gctx> { - /// Updates the state of the progress bar for downloads. - fn tick(&self) -> CargoResult<()> { - let mut progress = self.progress.borrow_mut(); - let Some(progress) = progress.as_mut() else { + /// Constructs the full URL to download a index file. + fn full_url(&self, path: &str) -> String { + // self.url always ends with a slash. + format!("{}{}", self.url, path) + } + + /// Setup the necessary works before the first fetch gets started. + /// + /// This is a no-op if called more than one time. + fn start_fetch(&self) -> CargoResult<()> { + if self.fetch_started.get() { + // We only need to run the setup code once. return Ok(()); - }; + } + self.fetch_started.set(true); - // Since the sparse protocol discovers dependencies as it goes, - // it's not possible to get an accurate progress indication. - // - // As an approximation, we assume that the depth of the dependency graph - // is fixed, and base the progress on how many times the caller has asked - // for blocking. If there are actually additional dependencies, the progress - // bar will get stuck. If there are fewer dependencies, it will disappear - // early. It will never go backwards. - // - // The status text also contains the number of completed & pending requests, which - // gives an better indication of forward progress. - let approximate_tree_depth = 10; - - progress.tick( - self.blocking_calls.min(approximate_tree_depth), - approximate_tree_depth + 1, - &format!( - " {} complete; {} pending", - self.downloads_finished, - self.pending.len() + self.sleeping.len() - ), - ) - } -} + if !self.quiet.get() { + self.gctx + .shell() + .status("Updating", self.source_id.display_index())?; + } -mod tls { - use super::Downloads; - use std::cell::Cell; + Ok(()) + } - thread_local!(static PTR: Cell = const { Cell::new(0) }); + /// Are we in offline mode? + /// + /// Return NotFound in offline mode when the file doesn't exist in the cache. + /// If this results in resolution failure, the resolver will suggest + /// removing the --offline flag. + fn offline(&self) -> bool { + !self.gctx.network_allowed() || self.gctx.cli_unstable().no_index_update + } - pub(super) fn with(f: impl FnOnce(Option<&Downloads<'_>>) -> R) -> R { - let ptr = PTR.with(|p| p.get()); - if ptr == 0 { - f(None) + /// Check if an index file of `path` is up-to-date. + fn is_fresh(&self, path: &str) -> bool { + if !self.requested_update.get() { + trace!("using local {path} as user did not request update",); + true + } else if self.offline() { + trace!("using local {path} in offline mode"); + true + } else if self.fresh.borrow().contains(path) { + trace!("using local {path} as it was already fetched"); + true } else { - // Safety: * `ptr` is only set by `set` below which ensures the type is correct. - let ptr = unsafe { &*(ptr as *const Downloads<'_>) }; - f(Some(ptr)) + debug!("checking freshness of {path}"); + false + } + } + + async fn fetch_uncached( + &self, + path: &str, + extra_header: Option<(&HeaderName, &HeaderValue)>, + ) -> CargoResult { + if self.offline() { + return Ok(LoadResponse::NotFound); + } + + if !self.fresh.borrow_mut().insert(path.to_string()) { + warn!("downloaded the index file `{path}` twice"); } + + let mut r = Retry::new(self.gctx)?; + self.pending.update(|v| v + 1); + let response = loop { + let response = self.fetch_uncached_no_retry(path, extra_header).await; + match r.r#try(|| response) { + RetryResult::Success(result) => break Ok(result), + RetryResult::Err(error) => break Err(error), + RetryResult::Retry(delay_ms) => { + futures_timer::Delay::new(Duration::from_millis(delay_ms)).await; + } + } + }; + self.pending.update(|v| v - 1); + response } - pub(super) fn set(dl: &Downloads<'_>, f: impl FnOnce() -> R) -> R { - struct Reset<'a, T: Copy>(&'a Cell, T); + async fn fetch_uncached_no_retry( + &self, + path: &str, + extra_header: Option<(&HeaderName, &HeaderValue)>, + ) -> CargoResult { + trace!("load: {path}"); + self.start_fetch()?; + let full_url = self.full_url(path); + let mut request = http::Request::get(&full_url); + + // Include a header to identify the protocol. This allows the server to + // know that Cargo is attempting to use the sparse protocol. + request = request.header("cargo-protocol", "version=1"); + request = request.header(http::header::ACCEPT, "text/plain"); + + if let Some((k, v)) = extra_header { + request = request.header(k, v); + } - impl<'a, T: Copy> Drop for Reset<'a, T> { - fn drop(&mut self) { - self.0.set(self.1); + if self.auth_required.get() { + let authorization = auth::auth_token( + self.gctx, + &self.source_id, + self.login_url.borrow().clone().as_ref(), + Operation::Read, + self.auth_error_headers.borrow().clone(), + true, + )?; + request = request.header(http::header::AUTHORIZATION, authorization); + trace!(target: "network", "including authorization for {}", full_url); + } + + let response = self + .gctx + .http_async()? + .request(request.body(Vec::new())?) + .await + .with_context(|| format!("download of {path} failed"))?; + + self.tick()?; + + let (response, body) = response.into_parts(); + + match response.status { + http::StatusCode::OK => { + let response_index_version = + if let Some(etag) = response.headers.get(http::header::ETAG) { + format!("{}: {}", ETAG, etag.to_str().unwrap()) + } else if let Some(lm) = response.headers.get(http::header::LAST_MODIFIED) { + format!("{}: {}", LAST_MODIFIED, lm.to_str().unwrap()) + } else { + UNKNOWN.to_string() + }; + trace!("index file version: {}", response_index_version); + Ok(LoadResponse::Data { + raw_data: body, + index_version: Some(response_index_version), + }) + } + http::StatusCode::NOT_MODIFIED => { + // Not Modified: the data in the cache is still the latest. + Ok(LoadResponse::CacheValid) + } + http::StatusCode::NOT_FOUND => { + // The crate was not found or deleted from the registry. + return Ok(LoadResponse::NotFound); } + http::StatusCode::UNAUTHORIZED => { + // Store the headers for later error reporting if needed. + self.auth_error_headers.replace( + response + .headers + .iter() + .map(|(name, value)| { + format!("{}: {}", name.as_str(), value.to_str().unwrap_or_default()) + }) + .collect(), + ); + + // Look for a `www-authenticate` header with the `Cargo` scheme. + for value in &response.headers.get_all(http::header::WWW_AUTHENTICATE) { + for challenge in + http_auth::ChallengeParser::new(value.to_str().unwrap_or_default()) + { + match challenge { + Ok(challenge) if challenge.scheme.eq_ignore_ascii_case("Cargo") => { + // Look for the `login_url` parameter. + for (param, value) in challenge.params { + if param.eq_ignore_ascii_case("login_url") { + self.login_url + .replace(Some(value.to_unescaped().into_url()?)); + } + } + } + Ok(challenge) => { + debug!(target: "network", "ignoring non-Cargo challenge: {}", challenge.scheme) + } + Err(e) => { + debug!(target: "network", "failed to parse challenge: {}", e) + } + } + } + } + + let mut err = Err(HttpNotSuccessful { + code: http::StatusCode::UNAUTHORIZED.as_u16() as u32, + body: body, + url: full_url, + ip: None, + headers: response + .headers + .iter() + .map(|(k, v)| format!("{}: {}", k, v.to_str().unwrap_or_default())) + .collect(), + } + .into()); + if self.auth_required.get() { + let auth_error = auth::AuthorizationError::new( + self.gctx, + self.source_id, + self.login_url.borrow().clone(), + auth::AuthorizationErrorReason::TokenRejected, + )?; + err = err.context(auth_error) + } + err + } + code => Err(HttpNotSuccessful { + code: code.as_u16() as u32, + body: body, + url: full_url, + ip: response.client_ip().map(str::to_owned), + headers: response + .headers + .iter() + .map(|(k, v)| format!("{}: {}", k, v.to_str().unwrap_or_default())) + .collect(), + } + .into()), } + } - PTR.with(|p| { - let _reset = Reset(p, p.get()); - p.set(dl as *const Downloads<'_> as usize); - f() - }) + /// Updates the state of the progress bar for downloads. + fn tick(&self) -> CargoResult<()> { + let mut progress = self.progress.borrow_mut(); + let Some(progress) = progress.as_mut() else { + return Ok(()); + }; + + if progress.update_allowed() { + let complete = self.fresh.borrow().len(); + let pending = self.pending.get(); + progress.print_now(&format!("{complete} complete; {pending} pending"))?; + } + Ok(()) } } diff --git a/src/cargo/sources/registry/index/mod.rs b/src/cargo/sources/registry/index/mod.rs index e8f7e4fae1a..2c5a09f2008 100644 --- a/src/cargo/sources/registry/index/mod.rs +++ b/src/cargo/sources/registry/index/mod.rs @@ -30,14 +30,16 @@ use crate::util::{CargoResult, Filesystem, GlobalContext, OptVersionReq, interna use cargo_util::registry::make_dep_path; use cargo_util_schemas::index::{IndexPackage, RegistryDependency}; use cargo_util_schemas::manifest::RustVersion; +use futures::channel::oneshot; use semver::Version; use serde::{Deserialize, Serialize}; use std::borrow::Cow; +use std::cell::RefCell; use std::collections::BTreeMap; use std::collections::HashMap; use std::path::Path; +use std::rc::Rc; use std::str; -use std::task::{Poll, ready}; use tracing::info; mod cache; @@ -75,7 +77,9 @@ pub struct RegistryIndex<'gctx> { /// hasn't been cached already, it uses [`RegistryData::load`] to access /// to JSON files from the index, and the creates the optimized on-disk /// summary cache. - summaries_cache: HashMap, + summaries_cache: RefCell>>, + /// Requests that are currently running. + summaries_inflight: RefCell>>>>, /// [`GlobalContext`] reference for convenience. gctx: &'gctx GlobalContext, /// Manager of on-disk caches. @@ -109,7 +113,7 @@ struct Summaries { /// All known versions of a crate, keyed from their `Version` to the /// possibly parsed or unparsed version of the full summary. - versions: HashMap, + versions: Vec<(Version, RefCell)>, } /// A lazily parsed [`IndexSummary`]. @@ -253,7 +257,8 @@ impl<'gctx> RegistryIndex<'gctx> { RegistryIndex { source_id, path: path.clone(), - summaries_cache: HashMap::new(), + summaries_cache: RefCell::new(HashMap::new()), + summaries_inflight: RefCell::new(HashMap::new()), gctx, cache_manager: CacheManager::new(path.join(".cache"), gctx), } @@ -262,15 +267,16 @@ impl<'gctx> RegistryIndex<'gctx> { /// Returns the hash listed for a specified `PackageId`. Primarily for /// checking the integrity of a downloaded package matching the checksum in /// the index file, aka [`IndexSummary`]. - pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll> { + pub async fn hash(&self, pkg: PackageId, load: &dyn RegistryData) -> CargoResult { let req = OptVersionReq::lock_to_exact(pkg.version()); - let summary = self.summaries(pkg.name(), &req, load)?; - let summary = ready!(summary).next(); - Poll::Ready(Ok(summary + let mut summary = self.summaries(pkg.name(), &req, load).await?; + Ok(summary + .next() .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))? .as_summary() .checksum() - .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?)) + .map(|checksum| checksum.to_string()) + .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?) } /// Load a list of summaries for `name` package in this registry which @@ -284,21 +290,17 @@ impl<'gctx> RegistryIndex<'gctx> { /// /// Internally there's quite a few layer of caching to amortize this cost /// though since this method is called quite a lot on null builds in Cargo. - fn summaries<'a, 'b>( - &'a mut self, + async fn summaries<'a, 'b>( + &'a self, name: InternedString, req: &'b OptVersionReq, - load: &mut dyn RegistryData, - ) -> Poll + 'b>> + load: &dyn RegistryData, + ) -> CargoResult + 'b> where 'a: 'b, { - let cli_unstable = self.gctx.cli_unstable(); - - let source_id = self.source_id; - // First up parse what summaries we have available. - let summaries = ready!(self.load_summaries(name, load)?); + let summaries = self.load_summaries(name, load).await?; // Iterate over our summaries, extract all relevant ones which match our // version requirement, and then parse all corresponding rows in the @@ -306,20 +308,45 @@ impl<'gctx> RegistryIndex<'gctx> { // entry in a lock file on every build, so we want to absolutely // minimize the amount of work being done here and parse as little as // necessary. - let raw_data = &summaries.raw_data; - Poll::Ready(Ok(summaries - .versions - .iter_mut() - .filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None }) - .filter_map(move |maybe| { - match maybe.parse(raw_data, source_id, cli_unstable) { - Ok(sum) => Some(sum), - Err(e) => { - info!("failed to parse `{}` registry package: {}", name, e); - None + + struct I<'a> { + name: InternedString, + index: &'a RegistryIndex<'a>, + req: &'a OptVersionReq, + summaries: Rc, + i: usize, + } + + impl<'a> Iterator for I<'a> { + type Item = IndexSummary; + + fn next(&mut self) -> Option { + while let Some((v, summary)) = self.summaries.versions.get(self.i) { + self.i += 1; + if self.req.matches(v) { + match summary.borrow_mut().parse( + &self.summaries.raw_data, + self.index.source_id, + self.index.gctx.cli_unstable(), + ) { + Ok(summary) => return Some(summary.clone()), + Err(e) => { + info!("failed to parse `{}` registry package: {}", self.name, e); + } + } } } - }))) + None + } + } + + Ok(I { + name, + index: self, + req, + summaries, + i: 0, + }) } /// Actually parses what summaries we have available. @@ -337,52 +364,87 @@ impl<'gctx> RegistryIndex<'gctx> { /// In effect, this is intended to be a quite cheap operation. /// /// [`RemoteRegistry`]: super::remote::RemoteRegistry - fn load_summaries( - &mut self, + async fn load_summaries( + &self, name: InternedString, - load: &mut dyn RegistryData, - ) -> Poll> { + load: &dyn RegistryData, + ) -> CargoResult> { // If we've previously loaded what versions are present for `name`, just // return that since our in-memory cache should still be valid. - if self.summaries_cache.contains_key(&name) { - return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap())); + if let Some(summaries) = self.summaries_cache.borrow().get(&name) { + return Ok(summaries.clone()); + } + + // Check if this request has already started. If so, return a oneshot that hands out the same data. + let rx = { + let mut pending = self.summaries_inflight.borrow_mut(); + if let Some(waiters) = pending.get_mut(&name) { + let (tx, rx) = oneshot::channel(); + waiters.push(tx); + Some(rx) + } else { + // We'll be the one to do the work. When we're done, we'll let all the pending queries know. + pending.insert(name, Vec::new()); + None + } + }; + if let Some(rx) = rx { + return Ok(rx.await?); } + let summaries = self.load_summaries_uncached(name, load).await; + let pending = self.summaries_inflight.borrow_mut().remove(&name).unwrap(); + if let Ok(summaries) = &summaries { + // Insert into the cache + self.summaries_cache + .borrow_mut() + .insert(name, summaries.clone()); + + // Send the value to all waiting futures. + for entry in pending { + let _ = entry.send(summaries.clone()); + } + }; + summaries + } + + async fn load_summaries_uncached( + &self, + name: InternedString, + load: &dyn RegistryData, + ) -> CargoResult> { // Prepare the `RegistryData` which will lazily initialize internal data // structures. load.prepare()?; let root = load.assert_index_locked(&self.path); - let summaries = ready!(Summaries::parse( + let summaries = Summaries::parse( root, &name, self.source_id, load, self.gctx.cli_unstable(), &self.cache_manager, - ))? + ) + .await? .unwrap_or_default(); - self.summaries_cache.insert(name, summaries); - Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap())) + Ok(Rc::new(summaries)) } /// Clears the in-memory summaries cache. - pub fn clear_summaries_cache(&mut self) { - self.summaries_cache.clear(); + pub fn clear_summaries_cache(&self) { + self.summaries_cache.borrow_mut().clear(); } - /// Attempts to find the packages that match a `name` and a version `req`. - /// - /// This is primarily used by [`Source::query`](super::Source). - pub fn query_inner( - &mut self, + pub async fn query_inner( + &self, name: InternedString, req: &OptVersionReq, - load: &mut dyn RegistryData, + load: &dyn RegistryData, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { + ) -> CargoResult<()> { if !self.gctx.network_allowed() { - // This should only return `Poll::Ready(Ok(()))` if there is at least 1 match. + // This should only return `Ok(())` if there is at least 1 match. // // If there are 0 matches it should fall through and try again with online. // This is necessary for dependencies that are not used (such as @@ -399,27 +461,29 @@ impl<'gctx> RegistryIndex<'gctx> { f(s); } }; - ready!(self.query_inner_with_online(name, req, load, callback, false)?); + self.query_inner_with_online(name, req, load, callback, false) + .await?; if called { - return Poll::Ready(Ok(())); + return Ok(()); } } - self.query_inner_with_online(name, req, load, f, true) + self.query_inner_with_online(name, req, load, f, true).await } /// Inner implementation of [`Self::query_inner`]. Returns the number of /// summaries we've got. /// /// The `online` controls whether Cargo can access the network when needed. - fn query_inner_with_online( - &mut self, + async fn query_inner_with_online( + &self, name: InternedString, req: &OptVersionReq, - load: &mut dyn RegistryData, + load: &dyn RegistryData, f: &mut dyn FnMut(IndexSummary), online: bool, - ) -> Poll> { - ready!(self.summaries(name, &req, load))? + ) -> CargoResult<()> { + self.summaries(name, &req, load) + .await? // First filter summaries for `--offline`. If we're online then // everything is a candidate, otherwise if we're offline we're only // going to consider candidates which are actually present on disk. @@ -438,18 +502,17 @@ impl<'gctx> RegistryIndex<'gctx> { } }) .for_each(f); - Poll::Ready(Ok(())) + Ok(()) } /// Looks into the summaries to check if a package has been yanked. - pub fn is_yanked( - &mut self, - pkg: PackageId, - load: &mut dyn RegistryData, - ) -> Poll> { + pub async fn is_yanked(&self, pkg: PackageId, load: &dyn RegistryData) -> CargoResult { let req = OptVersionReq::lock_to_exact(pkg.version()); - let found = ready!(self.summaries(pkg.name(), &req, load))?.any(|s| s.is_yanked()); - Poll::Ready(Ok(found)) + let found = self + .summaries(pkg.name(), &req, load) + .await? + .any(|s| s.is_yanked()); + Ok(found) } } @@ -473,14 +536,14 @@ impl Summaries { /// * `load` --- the actual index implementation which may be very slow to /// call. We avoid this if we can. /// * `bindeps` --- whether the `-Zbindeps` unstable flag is enabled - pub fn parse( + pub async fn parse( root: &Path, name: &str, source_id: SourceId, - load: &mut dyn RegistryData, + load: &dyn RegistryData, cli_unstable: &CliUnstable, cache_manager: &CacheManager<'_>, - ) -> Poll>> { + ) -> CargoResult> { // This is the file we're loading from cache or the index data. // See module comment in `registry/mod.rs` for why this is structured the way it is. let lowered_name = &name.to_lowercase(); @@ -500,16 +563,23 @@ impl Summaries { } } - let response = ready!(load.load(root, relative.as_ref(), index_version.as_deref())?); + let response = load + .load(root, relative.as_ref(), index_version.as_deref()) + .await?; match response { LoadResponse::CacheValid => { tracing::debug!("fast path for registry cache of {:?}", relative); - return Poll::Ready(Ok(cached_summaries)); + if cached_summaries.is_none() { + return Err(anyhow::anyhow!( + "registry said cache valid when no cache exists" + )); + } + return Ok(cached_summaries); } LoadResponse::NotFound => { cache_manager.invalidate(lowered_name); - return Poll::Ready(Ok(None)); + return Ok(None); } LoadResponse::Data { raw_data, @@ -549,7 +619,7 @@ impl Summaries { }; let version = summary.package_id().version().clone(); cache.versions.push((version.clone(), line)); - ret.versions.insert(version, summary.into()); + ret.versions.push((version, RefCell::new(summary.into()))); } if let Some(index_version) = index_version { tracing::trace!("caching index_version {}", index_version); @@ -572,7 +642,7 @@ impl Summaries { assert_eq!(readback.versions, cache.versions, "versions mismatch"); } } - Poll::Ready(Ok(Some(ret))) + Ok(Some(ret)) } } } @@ -585,8 +655,10 @@ impl Summaries { let mut ret = Summaries::default(); for (version, summary) in cache.versions { let (start, end) = subslice_bounds(&contents, summary); - ret.versions - .insert(version, MaybeIndexSummary::Unparsed { start, end }); + ret.versions.push(( + version, + RefCell::new(MaybeIndexSummary::Unparsed { start, end }), + )); } ret.raw_data = contents; return Ok((ret, index_version)); diff --git a/src/cargo/sources/registry/local.rs b/src/cargo/sources/registry/local.rs index c39085f3624..146ba6b57e1 100644 --- a/src/cargo/sources/registry/local.rs +++ b/src/cargo/sources/registry/local.rs @@ -5,11 +5,11 @@ use crate::sources::registry::{LoadResponse, MaybeLock, RegistryConfig, Registry use crate::util::errors::CargoResult; use crate::util::{Filesystem, GlobalContext}; use cargo_util::{Sha256, paths}; +use std::cell::Cell; use std::fs::File; use std::io::SeekFrom; use std::io::{self, prelude::*}; use std::path::Path; -use std::task::Poll; /// A local registry is a registry that lives on the filesystem as a set of /// `.crate` files with an `index` directory in the [same format] as a remote @@ -64,7 +64,7 @@ pub struct LocalRegistry<'gctx> { src_path: Filesystem, gctx: &'gctx GlobalContext, /// Whether this source has updated all package information it may contain. - updated: bool, + updated: Cell, /// Disables status messages. quiet: bool, } @@ -80,12 +80,35 @@ impl<'gctx> LocalRegistry<'gctx> { index_path: Filesystem::new(root.join("index")), root: Filesystem::new(root.to_path_buf()), gctx, - updated: false, + updated: Cell::new(false), quiet: false, } } + + fn update(&self) -> CargoResult<()> { + if self.updated.get() { + return Ok(()); + } + // Nothing to update, we just use what's on disk. Verify it actually + // exists though. We don't use any locks as we're just checking whether + // these directories exist. + let root = self.root.clone().into_path_unlocked(); + if !root.is_dir() { + anyhow::bail!("local registry path is not a directory: {}", root.display()); + } + let index_path = self.index_path.clone().into_path_unlocked(); + if !index_path.is_dir() { + anyhow::bail!( + "local registry index path is not a directory: {}", + index_path.display() + ); + } + self.updated.set(true); + Ok(()) + } } +#[async_trait::async_trait(?Send)] impl<'gctx> RegistryData for LocalRegistry<'gctx> { fn prepare(&self) -> CargoResult<()> { Ok(()) @@ -105,60 +128,37 @@ impl<'gctx> RegistryData for LocalRegistry<'gctx> { path.as_path_unlocked() } - fn load( - &mut self, + async fn load( + &self, root: &Path, path: &Path, _index_version: Option<&str>, - ) -> Poll> { - if self.updated { - let raw_data = match paths::read_bytes(&root.join(path)) { - Err(e) - if e.downcast_ref::() - .map_or(false, |ioe| ioe.kind() == io::ErrorKind::NotFound) => - { - return Poll::Ready(Ok(LoadResponse::NotFound)); - } - r => r, - }?; - Poll::Ready(Ok(LoadResponse::Data { - raw_data, - index_version: None, - })) - } else { - Poll::Pending + ) -> CargoResult { + if !self.updated.get() { + self.update()?; } + let raw_data = match paths::read_bytes(&root.join(path)) { + Err(e) + if e.downcast_ref::() + .map_or(false, |ioe| ioe.kind() == io::ErrorKind::NotFound) => + { + return Ok(LoadResponse::NotFound); + } + r => r, + }?; + Ok(LoadResponse::Data { + raw_data, + index_version: None, + }) } - fn config(&mut self) -> Poll>> { + async fn config(&self) -> CargoResult> { // Local registries don't have configuration for remote APIs or anything // like that - Poll::Ready(Ok(None)) - } - - fn block_until_ready(&mut self) -> CargoResult<()> { - if self.updated { - return Ok(()); - } - // Nothing to update, we just use what's on disk. Verify it actually - // exists though. We don't use any locks as we're just checking whether - // these directories exist. - let root = self.root.clone().into_path_unlocked(); - if !root.is_dir() { - anyhow::bail!("local registry path is not a directory: {}", root.display()); - } - let index_path = self.index_path.clone().into_path_unlocked(); - if !index_path.is_dir() { - anyhow::bail!( - "local registry index path is not a directory: {}", - index_path.display() - ); - } - self.updated = true; - Ok(()) + Ok(None) } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { // Local registry has no cache - just reads from disk. } @@ -167,10 +167,11 @@ impl<'gctx> RegistryData for LocalRegistry<'gctx> { } fn is_updated(&self) -> bool { - self.updated + // There is nothing to update. + true } - fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult { + fn download(&self, pkg: PackageId, checksum: &str) -> CargoResult { // Note that the usage of `into_path_unlocked` here is because the local // crate files here never change in that we're not the one writing them, // so it's not our responsibility to synchronize access to them. @@ -200,12 +201,7 @@ impl<'gctx> RegistryData for LocalRegistry<'gctx> { Ok(MaybeLock::Ready(crate_file)) } - fn finish_download( - &mut self, - _pkg: PackageId, - _checksum: &str, - _data: &[u8], - ) -> CargoResult { + fn finish_download(&self, _pkg: PackageId, _checksum: &str, _data: &[u8]) -> CargoResult { panic!("this source doesn't download") } } diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 2da4c7ec351..7ecc64e41bc 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -182,6 +182,7 @@ //! ``` //! +use std::cell::RefCell; use std::collections::HashSet; use std::fs; use std::fs::{File, OpenOptions}; @@ -189,12 +190,12 @@ use std::io; use std::io::Read; use std::io::Write; use std::path::{Path, PathBuf}; -use std::task::{Poll, ready}; use anyhow::Context as _; -use cargo_util::paths::{self, exclude_from_backups_and_indexing}; +use cargo_util::paths; use cargo_util_terminal::report::Level; use flate2::read::GzDecoder; +use futures::FutureExt as _; use serde::Deserialize; use serde::Serialize; use tar::Archive; @@ -209,7 +210,6 @@ use crate::sources::source::QueryKind; use crate::sources::source::Source; use crate::util::cache_lock::CacheLockMode; use crate::util::interning::InternedString; -use crate::util::network::PollExt; use crate::util::{CargoResult, Filesystem, GlobalContext, LimitErrorReader, restricted_names}; use crate::util::{VersionExt, hex}; @@ -261,13 +261,13 @@ pub struct RegistrySource<'gctx> { /// `cargo update somepkg` won't unlock yanked entries in `Cargo.lock`. /// Otherwise, the resolver would think that those entries no longer /// exist, and it would trigger updates to unrelated packages. - yanked_whitelist: HashSet, + yanked_whitelist: RefCell>, /// Yanked versions that have already been selected during queries. /// /// As of this writing, this is for not emitting the `--precise ` /// warning twice, with the assumption of (`dep.package_name()` + `--precise` /// version) being sufficient to uniquely identify the same query result. - selected_precise_yanked: HashSet<(InternedString, semver::Version)>, + selected_precise_yanked: RefCell>, } /// The [`config.json`] file stored in the index. @@ -320,6 +320,7 @@ pub struct RegistryConfig { } /// Result from loading data from a registry. +#[derive(Debug, Clone)] pub enum LoadResponse { /// The cache is valid. The cached data should be used. CacheValid, @@ -340,6 +341,7 @@ pub enum LoadResponse { /// This allows [`RegistrySource`] to abstractly handle each registry kind. /// /// For general concepts of registries, see the [module-level documentation](crate::sources::registry). +#[async_trait::async_trait(?Send)] pub trait RegistryData { /// Performs initialization for the registry. /// @@ -365,20 +367,20 @@ pub trait RegistryData { /// * `path` is the relative path to the package to load (like `ca/rg/cargo`). /// * `index_version` is the version of the requested crate data currently /// in cache. This is useful for checking if a local cache is outdated. - fn load( - &mut self, + async fn load( + &self, root: &Path, path: &Path, index_version: Option<&str>, - ) -> Poll>; + ) -> CargoResult; /// Loads the `config.json` file and returns it. /// /// Local registries don't have a config, and return `None`. - fn config(&mut self) -> Poll>>; + async fn config(&self) -> CargoResult>; /// Invalidates locally cached data. - fn invalidate_cache(&mut self); + fn invalidate_cache(&self); /// If quiet, the source should not display any progress or status messages. fn set_quiet(&mut self, quiet: bool); @@ -401,7 +403,7 @@ pub trait RegistryData { /// `finish_download`. For already downloaded `.crate` files, it does not /// validate the checksum, assuming the filesystem does not suffer from /// corruption or manipulation. - fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult; + fn download(&self, pkg: PackageId, checksum: &str) -> CargoResult; /// Finish a download by saving a `.crate` file to disk. /// @@ -411,8 +413,7 @@ pub trait RegistryData { /// the given data to the on-disk cache. /// /// Returns a [`File`] handle to the `.crate` file, positioned at the start. - fn finish_download(&mut self, pkg: PackageId, checksum: &str, data: &[u8]) - -> CargoResult; + fn finish_download(&self, pkg: PackageId, checksum: &str, data: &[u8]) -> CargoResult; /// Returns whether or not the `.crate` file is already downloaded. fn is_crate_downloaded(&self, _pkg: PackageId) -> bool { @@ -427,9 +428,6 @@ pub trait RegistryData { /// /// Returns the [`Path`] to the [`Filesystem`]. fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path; - - /// Block until all outstanding `Poll::Pending` requests are `Poll::Ready`. - fn block_until_ready(&mut self) -> CargoResult<()>; } /// The status of [`RegistryData::download`] which indicates if a `.crate` @@ -535,23 +533,38 @@ impl<'gctx> RegistrySource<'gctx> { ops: Box, yanked_whitelist: &HashSet, ) -> RegistrySource<'gctx> { + // Before starting to work on the registry, make sure that + // `/registry` is marked as excluded from indexing and + // backups. Older versions of Cargo didn't do this, so we do it here + // regardless of whether `` exists. + // + // This does not use `create_dir_all_excluded_from_backups_atomic` for + // the same reason: we want to exclude it even if the directory already + // exists. + // + // IO errors in creating and marking it are ignored, e.g. in case we're on a + // read-only filesystem. + let registry_base = gctx.registry_base_path(); + let _ = registry_base.create_dir(); + cargo_util::paths::exclude_from_backups_and_indexing(®istry_base.into_path_unlocked()); + RegistrySource { name: name.into(), src_path: gctx.registry_source_path().join(name), gctx, source_id, index: index::RegistryIndex::new(source_id, ops.index_path(), gctx), - yanked_whitelist: yanked_whitelist.clone(), + yanked_whitelist: RefCell::new(yanked_whitelist.clone()), ops, - selected_precise_yanked: HashSet::new(), + selected_precise_yanked: RefCell::new(HashSet::new()), } } /// Decode the [configuration](RegistryConfig) stored within the registry. /// /// This requires that the index has been at least checked out. - pub fn config(&mut self) -> Poll>> { - self.ops.config() + pub async fn config(&self) -> CargoResult> { + self.ops.config().await } /// Unpacks a downloaded package into a location where it's ready to be @@ -696,11 +709,11 @@ impl<'gctx> RegistrySource<'gctx> { /// should only be called after doing integrity check. That is to say, /// you need to call either [`RegistryData::download`] or /// [`RegistryData::finish_download`] before calling this method. - fn get_pkg(&mut self, package: PackageId, path: &File) -> CargoResult { + fn get_pkg(&self, package: PackageId, path: &File) -> CargoResult { let path = self .unpack_package(package, path) .with_context(|| format!("failed to unpack package `{}`", package))?; - let mut src = PathSource::new(&path, self.source_id, self.gctx); + let src = PathSource::new(&path, self.source_id, self.gctx); src.load()?; let mut pkg = match src.download(package)? { MaybePackage::Ready(pkg) => pkg, @@ -711,7 +724,8 @@ impl<'gctx> RegistrySource<'gctx> { // field with the checksum we know for this `PackageId`. let cksum = self .index - .hash(package, &mut *self.ops) + .hash(package, &*self.ops) + .now_or_never() .expect("a downloaded dep now pending!?") .expect("summary not found"); pkg.manifest_mut() @@ -722,13 +736,14 @@ impl<'gctx> RegistrySource<'gctx> { } } +#[async_trait::async_trait(?Send)] impl<'gctx> Source for RegistrySource<'gctx> { - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { + ) -> CargoResult<()> { let mut req = dep.version_req().clone(); // Handle `cargo update --precise` here. @@ -758,139 +773,131 @@ impl<'gctx> Source for RegistrySource<'gctx> { // updated, so we fall back to performing a lazy update. if kind == QueryKind::Exact && req.is_locked() && !self.ops.is_updated() { debug!("attempting query without update"); - ready!( - self.index - .query_inner(dep.package_name(), &req, &mut *self.ops, &mut |s| { - if matches!(s, IndexSummary::Candidate(_) | IndexSummary::Yanked(_)) - && dep.matches(s.as_summary()) - { - // We are looking for a package from a lock file so we do not care about yank - callback(s) - } - },) - )?; + self.index + .query_inner(dep.package_name(), &req, &*self.ops, &mut |s| { + if matches!(s, IndexSummary::Candidate(_) | IndexSummary::Yanked(_)) + && dep.matches(s.as_summary()) + { + // We are looking for a package from a lock file so we do not care about yank + callback(s) + } + }) + .await?; if called { - Poll::Ready(Ok(())) + return Ok(()); } else { debug!("falling back to an update"); self.invalidate_cache(); - Poll::Pending } - } else { - let mut precise_yanked_in_use = false; - ready!( - self.index - .query_inner(dep.package_name(), &req, &mut *self.ops, &mut |s| { - let matched = match kind { - QueryKind::Exact | QueryKind::RejectedVersions => { - if req.is_precise() && self.gctx.cli_unstable().unstable_options { - dep.matches_prerelease(s.as_summary()) - } else { - dep.matches(s.as_summary()) - } - } - QueryKind::AlternativeNames => true, - QueryKind::Normalized => true, - }; - if !matched { - return; - } - // Next filter out all yanked packages. Some yanked packages may - // leak through if they're in a whitelist (aka if they were - // previously in `Cargo.lock` - match s { - s @ _ if kind == QueryKind::RejectedVersions => callback(s), - s @ IndexSummary::Candidate(_) => callback(s), - s @ IndexSummary::Yanked(_) => { - if self.yanked_whitelist.contains(&s.package_id()) { - callback(s); - } else if req.is_precise() { - precise_yanked_in_use = true; - callback(s); - } - } - IndexSummary::Unsupported(summary, v) => { - tracing::debug!( - "unsupported schema version {} ({} {})", - v, - summary.name(), - summary.version() - ); - } - IndexSummary::Invalid(summary) => { - tracing::debug!( - "invalid ({} {})", - summary.name(), - summary.version() - ); - } - IndexSummary::Offline(summary) => { - tracing::debug!( - "offline ({} {})", - summary.name(), - summary.version() - ); - } + } + + let mut called = false; + let callback = &mut |s| { + called = true; + f(s); + }; + + let mut precise_yanked_in_use = false; + self.index + .query_inner(dep.package_name(), &req, &*self.ops, &mut |s| { + let matched = match kind { + QueryKind::Exact | QueryKind::RejectedVersions => { + if req.is_precise() && self.gctx.cli_unstable().unstable_options { + dep.matches_prerelease(s.as_summary()) + } else { + dep.matches(s.as_summary()) } - }) - )?; - if precise_yanked_in_use { - let name = dep.package_name(); - let version = req - .precise_version() - .expect("--precise in use"); - if self.selected_precise_yanked.insert((name, version.clone())) { - let mut shell = self.gctx.shell(); - shell.print_report( - &[Level::WARNING - .secondary_title(format!( - "selected package `{name}@{version}` was yanked by the author" - )) - .element( - Level::HELP - .message("if possible, try a compatible non-yanked version"), - )], - false, - )?; + } + QueryKind::AlternativeNames => true, + QueryKind::Normalized => true, + }; + if !matched { + return; } - } - if called { - return Poll::Ready(Ok(())); - } - let mut any_pending = false; - if kind == QueryKind::AlternativeNames || kind == QueryKind::Normalized { - // Attempt to handle misspellings by searching for a chain of related - // names to the original name. The resolver will later - // reject any candidates that have the wrong name, and with this it'll - // have enough information to offer "a similar crate exists" suggestions. - // For now we only try canonicalizing `-` to `_` and vice versa. - // More advanced fuzzy searching become in the future. - for name_permutation in [ - dep.package_name().replace('-', "_"), - dep.package_name().replace('_', "-"), - ] { - let name_permutation = name_permutation.into(); - if name_permutation == dep.package_name() { - continue; + // Next filter out all yanked packages. Some yanked packages may + // leak through if they're in a whitelist (aka if they were + // previously in `Cargo.lock` + match s { + s @ _ if kind == QueryKind::RejectedVersions => callback(s), + s @ IndexSummary::Candidate(_) => callback(s), + s @ IndexSummary::Yanked(_) => { + if self.yanked_whitelist.borrow().contains(&s.package_id()) { + callback(s); + } else if req.is_precise() { + precise_yanked_in_use = true; + callback(s); + } + } + IndexSummary::Unsupported(summary, v) => { + tracing::debug!( + "unsupported schema version {} ({} {})", + v, + summary.name(), + summary.version() + ); + } + IndexSummary::Invalid(summary) => { + tracing::debug!("invalid ({} {})", summary.name(), summary.version()); + } + IndexSummary::Offline(summary) => { + tracing::debug!("offline ({} {})", summary.name(), summary.version()); } - any_pending |= self - .index - .query_inner(name_permutation, &req, &mut *self.ops, &mut |s| { - if !s.is_yanked() { - f(s); - } else if kind == QueryKind::AlternativeNames { - f(s); - } - })? - .is_pending(); } + }) + .await?; + if precise_yanked_in_use { + let name = dep.package_name(); + let version = req + .precise_version() + .expect("--precise in use"); + if self + .selected_precise_yanked + .borrow_mut() + .insert((name, version.clone())) + { + let mut shell = self.gctx.shell(); + shell.print_report( + &[Level::WARNING + .secondary_title(format!( + "selected package `{name}@{version}` was yanked by the author" + )) + .element( + Level::HELP.message("if possible, try a compatible non-yanked version"), + )], + false, + )?; } - if any_pending { - Poll::Pending - } else { - Poll::Ready(Ok(())) + } + if called { + return Ok(()); + } + if kind == QueryKind::AlternativeNames || kind == QueryKind::Normalized { + // Attempt to handle misspellings by searching for a chain of related + // names to the original name. The resolver will later + // reject any candidates that have the wrong name, and with this it'll + // have enough information to offer "a similar crate exists" suggestions. + // For now we only try canonicalizing `-` to `_` and vice versa. + // More advanced fuzzy searching become in the future. + for name_permutation in [ + dep.package_name().replace('-', "_"), + dep.package_name().replace('_', "-"), + ] { + let name_permutation = name_permutation.into(); + if name_permutation == dep.package_name() { + continue; + } + self.index + .query_inner(name_permutation, &req, &*self.ops, &mut |s| { + if !s.is_yanked() { + f(s); + } else if kind == QueryKind::AlternativeNames { + f(s); + } + }) + .await?; } } + Ok(()) } fn supports_checksums(&self) -> bool { @@ -905,7 +912,7 @@ impl<'gctx> Source for RegistrySource<'gctx> { self.source_id } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { self.index.clear_summaries_cache(); self.ops.invalidate_cache(); } @@ -914,14 +921,9 @@ impl<'gctx> Source for RegistrySource<'gctx> { self.ops.set_quiet(quiet); } - fn download(&mut self, package: PackageId) -> CargoResult { - let hash = loop { - match self.index.hash(package, &mut *self.ops)? { - Poll::Pending => self.block_until_ready()?, - Poll::Ready(hash) => break hash, - } - }; - match self.ops.download(package, hash)? { + fn download(&self, package: PackageId) -> CargoResult { + let hash = crate::util::block_on(self.index.hash(package, &*self.ops))?; + match self.ops.download(package, &hash)? { MaybeLock::Ready(file) => self.get_pkg(package, &file).map(MaybePackage::Ready), MaybeLock::Download { url, @@ -935,14 +937,9 @@ impl<'gctx> Source for RegistrySource<'gctx> { } } - fn finish_download(&mut self, package: PackageId, data: Vec) -> CargoResult { - let hash = loop { - match self.index.hash(package, &mut *self.ops)? { - Poll::Pending => self.block_until_ready()?, - Poll::Ready(hash) => break hash, - } - }; - let file = self.ops.finish_download(package, hash, &data)?; + fn finish_download(&self, package: PackageId, data: Vec) -> CargoResult { + let hash = crate::util::block_on(self.index.hash(package, &*self.ops))?; + let file = self.ops.finish_download(package, &hash, &data)?; self.get_pkg(package, &file) } @@ -954,31 +951,12 @@ impl<'gctx> Source for RegistrySource<'gctx> { self.source_id.display_index() } - fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) { - self.yanked_whitelist.extend(pkgs); - } - - fn is_yanked(&mut self, pkg: PackageId) -> Poll> { - self.index.is_yanked(pkg, &mut *self.ops) + fn add_to_yanked_whitelist(&self, pkgs: &[PackageId]) { + self.yanked_whitelist.borrow_mut().extend(pkgs); } - fn block_until_ready(&mut self) -> CargoResult<()> { - // Before starting to work on the registry, make sure that - // `/registry` is marked as excluded from indexing and - // backups. Older versions of Cargo didn't do this, so we do it here - // regardless of whether `` exists. - // - // This does not use `create_dir_all_excluded_from_backups_atomic` for - // the same reason: we want to exclude it even if the directory already - // exists. - // - // IO errors in creating and marking it are ignored, e.g. in case we're on a - // read-only filesystem. - let registry_base = self.gctx.registry_base_path(); - let _ = registry_base.create_dir(); - exclude_from_backups_and_indexing(®istry_base.into_path_unlocked()); - - self.ops.block_until_ready() + async fn is_yanked(&self, pkg: PackageId) -> CargoResult { + self.index.is_yanked(pkg, &*self.ops).await } } diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index b99fa60822f..b12119ac786 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -11,16 +11,14 @@ use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData}; use crate::util::cache_lock::CacheLockMode; use crate::util::errors::CargoResult; use crate::util::interning::InternedString; -use crate::util::{Filesystem, GlobalContext, OnceExt}; +use crate::util::{Filesystem, GlobalContext}; use anyhow::Context as _; use cargo_util::paths; -use std::cell::OnceCell; use std::cell::{Cell, Ref, RefCell}; use std::fs::File; use std::mem; use std::path::Path; use std::str; -use std::task::{Poll, ready}; use tracing::{debug, trace}; /// A remote registry is a registry that lives at a remote URL (such as @@ -71,7 +69,7 @@ pub struct RemoteRegistry<'gctx> { /// [tree object]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#_tree_objects tree: RefCell>>, /// A Git repository that contains the actual index we want. - repo: OnceCell, + repo: RefCell>, /// The current HEAD commit of the underlying Git repository. head: Cell>, /// This stores sha value of the current HEAD commit for convenience. @@ -80,7 +78,7 @@ pub struct RemoteRegistry<'gctx> { /// /// See [`RemoteRegistry::mark_updated`] on how to make sure a registry /// index is updated only once per session. - needs_update: bool, + needs_update: Cell, /// Disables status messages. quiet: bool, } @@ -103,24 +101,24 @@ impl<'gctx> RemoteRegistry<'gctx> { gctx, index_git_ref: GitReference::DefaultBranch, tree: RefCell::new(None), - repo: OnceCell::new(), + repo: RefCell::new(None), head: Cell::new(None), current_sha: Cell::new(None), - needs_update: false, + needs_update: Cell::new(false), quiet: false, } } /// Creates intermediate dirs and initialize the repository. - fn repo(&self) -> CargoResult<&git2::Repository> { - self.repo.try_borrow_with(|| { + fn repo(&self) -> CargoResult>> { + if self.repo.borrow().is_none() { trace!("acquiring registry index lock"); let path = self .gctx .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &self.index_path); - match git2::Repository::open(&path) { - Ok(repo) => Ok(repo), + self.repo.replace(Some(match git2::Repository::open(&path) { + Ok(repo) => repo, Err(_) => { drop(paths::remove_dir_all(&path)); paths::create_dir_all(&path)?; @@ -142,18 +140,21 @@ impl<'gctx> RemoteRegistry<'gctx> { // things that we don't want. let mut opts = git2::RepositoryInitOptions::new(); opts.external_template(false); - Ok(git2::Repository::init_opts(&path, &opts).with_context(|| { + git2::Repository::init_opts(&path, &opts).with_context(|| { format!("failed to initialize index git repository (in {:?})", path) - })?) + })? } - } - }) + })); + } + + Ok(self.repo.borrow()) } /// Get the object ID of the HEAD commit from the underlying Git repository. fn head(&self) -> CargoResult { if self.head.get().is_none() { let repo = self.repo()?; + let repo = repo.as_ref().unwrap(); let oid = resolve_ref(&self.index_git_ref, repo)?; self.head.set(Some(oid)); } @@ -170,6 +171,7 @@ impl<'gctx> RemoteRegistry<'gctx> { } } let repo = self.repo()?; + let repo = repo.as_ref().unwrap(); let commit = repo.find_commit(self.head()?)?; let tree = commit.tree()?; @@ -216,8 +218,69 @@ impl<'gctx> RemoteRegistry<'gctx> { fn mark_updated(&self) { self.gctx.updated_sources().insert(self.source_id); } + + fn update(&self) -> CargoResult<()> { + if !self.needs_update.get() { + return Ok(()); + } + + self.needs_update.set(false); + + if self.is_updated() { + return Ok(()); + } + self.mark_updated(); + + if !self.gctx.network_allowed() { + return Ok(()); + } + if self.gctx.cli_unstable().no_index_update { + return Ok(()); + } + + debug!("updating the index"); + + // Ensure that we'll actually be able to acquire an HTTP handle later on + // once we start trying to download crates. This will weed out any + // problems with `.cargo/config` configuration related to HTTP. + // + // This way if there's a problem the error gets printed before we even + // hit the index, which may not actually read this configuration. + self.gctx.http()?; + + self.prepare()?; + self.head.set(None); + *self.tree.borrow_mut() = None; + self.current_sha.set(None); + let _path = self + .gctx + .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &self.index_path); + if !self.quiet { + self.gctx + .shell() + .status("Updating", self.source_id.display_index())?; + } + + // Fetch the latest version of our `index_git_ref` into the index + // checkout. + let url = self.source_id.url(); + let mut repo = self.repo.borrow_mut(); + let repo = repo.as_mut().unwrap(); + git::fetch( + repo, + url.as_str(), + &self.index_git_ref, + &self.index_git_ref, + self.gctx, + RemoteKind::Registry, + ) + .with_context(|| format!("failed to fetch `{}`", url))?; + + Ok(()) + } } +#[async_trait::async_trait(?Send)] impl<'gctx> RegistryData for RemoteRegistry<'gctx> { fn prepare(&self) -> CargoResult<()> { self.repo()?; @@ -256,21 +319,21 @@ impl<'gctx> RegistryData for RemoteRegistry<'gctx> { /// read it, as long as we check for that hash value. /// /// Cargo now uses a hash of the file's contents as provided by git. - fn load( - &mut self, + async fn load( + &self, _root: &Path, path: &Path, index_version: Option<&str>, - ) -> Poll> { - if self.needs_update { - return Poll::Pending; + ) -> CargoResult { + if self.needs_update.get() { + self.update()?; } // Check if the cache is valid. let git_commit_hash = self.current_version(); if index_version.is_some() && index_version == git_commit_hash.as_deref() { // This file was written by an old version of cargo, but it is // still up-to-date. - return Poll::Ready(Ok(LoadResponse::CacheValid)); + return Ok(LoadResponse::CacheValid); } // Note that the index calls this method and the filesystem is locked // in the index, so we don't need to worry about an `update_index` @@ -281,6 +344,7 @@ impl<'gctx> RegistryData for RemoteRegistry<'gctx> { index_version: Option<&str>, ) -> CargoResult { let repo = registry.repo()?; + let repo = repo.as_ref().unwrap(); let tree = registry.tree()?; let entry = tree.get_path(path); let entry = entry?; @@ -302,106 +366,53 @@ impl<'gctx> RegistryData for RemoteRegistry<'gctx> { }) } - match load_helper(&self, path, index_version) { - Ok(result) => Poll::Ready(Ok(result)), - Err(_) if !self.is_updated() => { - // If git returns an error and we haven't updated the repo, - // return pending to allow an update to try again. - self.needs_update = true; - Poll::Pending - } - Err(e) - if e.downcast_ref::() - .map(|e| e.code() == git2::ErrorCode::NotFound) - .unwrap_or_default() => - { - // The repo has been updated and the file does not exist. - Poll::Ready(Ok(LoadResponse::NotFound)) - } - Err(e) => Poll::Ready(Err(e)), + loop { + return match load_helper(&self, path, index_version) { + Ok(result) => Ok(result), + Err(_) if !self.is_updated() => { + // If git returns an error and we haven't updated the repo, + // return pending to allow an update to try again. + self.needs_update.set(true); + self.update()?; + continue; + } + Err(e) + if e.downcast_ref::() + .map(|e| e.code() == git2::ErrorCode::NotFound) + .unwrap_or_default() => + { + // The repo has been updated and the file does not exist. + Ok(LoadResponse::NotFound) + } + Err(e) => Err(e), + }; } } - fn config(&mut self) -> Poll>> { + async fn config(&self) -> CargoResult> { debug!("loading config"); self.prepare()?; self.gctx .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &self.index_path); - match ready!(self.load(Path::new(""), Path::new(RegistryConfig::NAME), None)?) { + match self + .load(Path::new(""), Path::new(RegistryConfig::NAME), None) + .await? + { LoadResponse::Data { raw_data, .. } => { trace!("config loaded"); let cfg: RegistryConfig = serde_json::from_slice(&raw_data)?; - Poll::Ready(Ok(Some(cfg))) + Ok(Some(cfg)) } - _ => Poll::Ready(Ok(None)), - } - } - - fn block_until_ready(&mut self) -> CargoResult<()> { - if !self.needs_update { - return Ok(()); - } - - self.needs_update = false; - - if self.is_updated() { - return Ok(()); - } - self.mark_updated(); - - if !self.gctx.network_allowed() { - return Ok(()); - } - if self.gctx.cli_unstable().no_index_update { - return Ok(()); - } - - debug!("updating the index"); - - // Ensure that we'll actually be able to acquire an HTTP handle later on - // once we start trying to download crates. This will weed out any - // problems with `.cargo/config` configuration related to HTTP. - // - // This way if there's a problem the error gets printed before we even - // hit the index, which may not actually read this configuration. - self.gctx.http()?; - - self.prepare()?; - self.head.set(None); - *self.tree.borrow_mut() = None; - self.current_sha.set(None); - let _path = self - .gctx - .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &self.index_path); - if !self.quiet { - self.gctx - .shell() - .status("Updating", self.source_id.display_index())?; + _ => Ok(None), } - - // Fetch the latest version of our `index_git_ref` into the index - // checkout. - let url = self.source_id.url(); - let repo = self.repo.get_mut().unwrap(); - git::fetch( - repo, - url.as_str(), - &self.index_git_ref, - &self.index_git_ref, - self.gctx, - RemoteKind::Registry, - ) - .with_context(|| format!("failed to fetch `{}`", url))?; - - Ok(()) } /// Read the general concept for `invalidate_cache()` on /// [`RegistryData::invalidate_cache`]. /// /// To fully invalidate, undo [`RemoteRegistry::mark_updated`]'s work. - fn invalidate_cache(&mut self) { - self.needs_update = true; + fn invalidate_cache(&self) { + self.needs_update.set(true); } fn set_quiet(&mut self, quiet: bool) { @@ -412,13 +423,8 @@ impl<'gctx> RegistryData for RemoteRegistry<'gctx> { self.is_updated() } - fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult { - let registry_config = loop { - match self.config()? { - Poll::Pending => self.block_until_ready()?, - Poll::Ready(cfg) => break cfg.unwrap(), - } - }; + fn download(&self, pkg: PackageId, checksum: &str) -> CargoResult { + let registry_config = crate::util::block_on(self.config())?.unwrap(); download::download( &self.cache_path, @@ -430,12 +436,7 @@ impl<'gctx> RegistryData for RemoteRegistry<'gctx> { ) } - fn finish_download( - &mut self, - pkg: PackageId, - checksum: &str, - data: &[u8], - ) -> CargoResult { + fn finish_download(&self, pkg: PackageId, checksum: &str, data: &[u8]) -> CargoResult { download::finish_download( &self.cache_path, &self.gctx, diff --git a/src/cargo/sources/replaced.rs b/src/cargo/sources/replaced.rs index d6f6519d87b..23eff200cdf 100644 --- a/src/cargo/sources/replaced.rs +++ b/src/cargo/sources/replaced.rs @@ -4,7 +4,6 @@ use crate::sources::source::MaybePackage; use crate::sources::source::QueryKind; use crate::sources::source::Source; use crate::util::errors::CargoResult; -use std::task::Poll; /// A source that replaces one source with the other. This manages the [source /// replacement] feature. @@ -45,6 +44,7 @@ impl<'gctx> ReplacedSource<'gctx> { } } +#[async_trait::async_trait(?Send)] impl<'gctx> Source for ReplacedSource<'gctx> { fn source_id(&self) -> SourceId { self.to_replace @@ -62,12 +62,12 @@ impl<'gctx> Source for ReplacedSource<'gctx> { self.inner.requires_precise() } - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { + ) -> CargoResult<()> { let (replace_with, to_replace) = (self.replace_with, self.to_replace); let dep = dep.clone().map_source(to_replace, replace_with); @@ -75,6 +75,7 @@ impl<'gctx> Source for ReplacedSource<'gctx> { .query(&dep, kind, &mut |summary| { f(summary.map_summary(|s| s.map_source(replace_with, to_replace))) }) + .await .map_err(|e| { if self.is_builtin_replacement() { e @@ -87,7 +88,7 @@ impl<'gctx> Source for ReplacedSource<'gctx> { }) } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { self.inner.invalidate_cache() } @@ -95,7 +96,7 @@ impl<'gctx> Source for ReplacedSource<'gctx> { self.inner.set_quiet(quiet); } - fn download(&mut self, id: PackageId) -> CargoResult { + fn download(&self, id: PackageId) -> CargoResult { let id = id.with_source_id(self.replace_with); let pkg = self.inner.download(id).map_err(|e| { if self.is_builtin_replacement() { @@ -115,7 +116,7 @@ impl<'gctx> Source for ReplacedSource<'gctx> { }) } - fn finish_download(&mut self, id: PackageId, data: Vec) -> CargoResult { + fn finish_download(&self, id: PackageId, data: Vec) -> CargoResult { let id = id.with_source_id(self.replace_with); let pkg = self.inner.finish_download(id, data).map_err(|e| { if self.is_builtin_replacement() { @@ -155,7 +156,7 @@ impl<'gctx> Source for ReplacedSource<'gctx> { !self.is_builtin_replacement() } - fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) { + fn add_to_yanked_whitelist(&self, pkgs: &[PackageId]) { let pkgs = pkgs .iter() .map(|id| id.with_source_id(self.replace_with)) @@ -163,20 +164,7 @@ impl<'gctx> Source for ReplacedSource<'gctx> { self.inner.add_to_yanked_whitelist(&pkgs); } - fn is_yanked(&mut self, pkg: PackageId) -> Poll> { - self.inner.is_yanked(pkg) - } - - fn block_until_ready(&mut self) -> CargoResult<()> { - self.inner.block_until_ready().map_err(|e| { - if self.is_builtin_replacement() { - e - } else { - e.context(format!( - "failed to update replaced source {}", - self.to_replace - )) - } - }) + async fn is_yanked(&self, pkg: PackageId) -> CargoResult { + self.inner.is_yanked(pkg).await } } diff --git a/src/cargo/sources/source.rs b/src/cargo/sources/source.rs index 18dfb74cbdd..f0673e99d31 100644 --- a/src/cargo/sources/source.rs +++ b/src/cargo/sources/source.rs @@ -2,7 +2,7 @@ use std::collections::hash_map::HashMap; use std::fmt; -use std::task::Poll; +use std::rc::Rc; use crate::core::SourceId; use crate::core::{Dependency, Package, PackageId}; @@ -27,6 +27,7 @@ use crate::util::CargoResult; /// all use cases. See [`crate::sources`] for implementations provided by Cargo. /// /// [dependency confusion attack]: https://medium.com/@alex.birsan/dependency-confusion-4a5d60fec610 +#[async_trait::async_trait(?Send)] pub trait Source { /// Returns the [`SourceId`] corresponding to this source. fn source_id(&self) -> SourceId; @@ -46,31 +47,25 @@ pub trait Source { /// Attempts to find the packages that match a dependency request. /// - /// Usually you should call [`Source::block_until_ready`] somewhere and - /// wait until package information become available. Otherwise any query - /// may return a [`Poll::Pending`]. - /// /// The `f` argument is expected to get called when any [`IndexSummary`] becomes available. - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll>; + ) -> CargoResult<()>; /// Gathers the result from [`Source::query`] as a list of [`IndexSummary`] items /// when they become available. - fn query_vec( - &mut self, - dep: &Dependency, - kind: QueryKind, - ) -> Poll>> { + async fn query_vec(&self, dep: &Dependency, kind: QueryKind) -> CargoResult> { let mut ret = Vec::new(); - self.query(dep, kind, &mut |s| ret.push(s)).map_ok(|_| ret) + self.query(dep, kind, &mut |s| ret.push(s)) + .await + .map(|()| ret) } /// Ensure that the source is fully up-to-date for the current session on the next query. - fn invalidate_cache(&mut self); + fn invalidate_cache(&self); /// If quiet, the source should not display any progress or status messages. fn set_quiet(&mut self, quiet: bool); @@ -86,7 +81,7 @@ pub trait Source { /// In the case where [`MaybePackage::Download`] is returned, then the /// package downloader will call [`Source::finish_download`] after the /// download has finished. - fn download(&mut self, package: PackageId) -> CargoResult; + fn download(&self, package: PackageId) -> CargoResult; /// Gives the source the downloaded `.crate` file. /// @@ -95,7 +90,7 @@ pub trait Source { /// the results of the download of the given URL. The source is /// responsible for saving to disk, and returning the appropriate /// [`Package`]. - fn finish_download(&mut self, pkg_id: PackageId, contents: Vec) -> CargoResult; + fn finish_download(&self, pkg_id: PackageId, contents: Vec) -> CargoResult; /// Generates a unique string which represents the fingerprint of the /// current state of the source. @@ -133,20 +128,11 @@ pub trait Source { /// Add a number of crates that should be whitelisted for showing up during /// queries, even if they are yanked. Currently only applies to registry /// sources. - fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]); + fn add_to_yanked_whitelist(&self, pkgs: &[PackageId]); /// Query if a package is yanked. Only registry sources can mark packages /// as yanked. This ignores the yanked whitelist. - fn is_yanked(&mut self, _pkg: PackageId) -> Poll>; - - /// Block until all outstanding [`Poll::Pending`] requests are [`Poll::Ready`]. - /// - /// After calling this function, the source should return `Poll::Ready` for - /// any queries that previously returned `Poll::Pending`. - /// - /// If no queries previously returned `Poll::Pending`, and [`Source::invalidate_cache`] - /// was not called, this function should be a no-op. - fn block_until_ready(&mut self) -> CargoResult<()>; + async fn is_yanked(&self, pkg: PackageId) -> CargoResult; } /// Defines how a dependency query will be performed for a [`Source`]. @@ -193,6 +179,7 @@ pub enum MaybePackage { } /// A blanket implementation forwards all methods to [`Source`]. +#[async_trait::async_trait(?Send)] impl<'a, T: Source + ?Sized + 'a> Source for &'a mut T { fn source_id(&self) -> SourceId { (**self).source_id() @@ -210,16 +197,16 @@ impl<'a, T: Source + ?Sized + 'a> Source for &'a mut T { (**self).requires_precise() } - fn query( - &mut self, + async fn query( + &self, dep: &Dependency, kind: QueryKind, f: &mut dyn FnMut(IndexSummary), - ) -> Poll> { - (**self).query(dep, kind, f) + ) -> CargoResult<()> { + (**self).query(dep, kind, f).await } - fn invalidate_cache(&mut self) { + fn invalidate_cache(&self) { (**self).invalidate_cache() } @@ -227,11 +214,11 @@ impl<'a, T: Source + ?Sized + 'a> Source for &'a mut T { (**self).set_quiet(quiet) } - fn download(&mut self, id: PackageId) -> CargoResult { + fn download(&self, id: PackageId) -> CargoResult { (**self).download(id) } - fn finish_download(&mut self, id: PackageId, data: Vec) -> CargoResult { + fn finish_download(&self, id: PackageId, data: Vec) -> CargoResult { (**self).finish_download(id, data) } @@ -251,23 +238,19 @@ impl<'a, T: Source + ?Sized + 'a> Source for &'a mut T { (**self).is_replaced() } - fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) { + fn add_to_yanked_whitelist(&self, pkgs: &[PackageId]) { (**self).add_to_yanked_whitelist(pkgs); } - fn is_yanked(&mut self, pkg: PackageId) -> Poll> { - (**self).is_yanked(pkg) - } - - fn block_until_ready(&mut self) -> CargoResult<()> { - (**self).block_until_ready() + async fn is_yanked(&self, pkg: PackageId) -> CargoResult { + (**self).is_yanked(pkg).await } } /// A [`HashMap`] of [`SourceId`] to `Box`. #[derive(Default)] pub struct SourceMap<'src> { - map: HashMap>, + map: HashMap>, } // `impl Debug` on source requires specialization, if even desirable at all. @@ -287,19 +270,14 @@ impl<'src> SourceMap<'src> { } /// Like `HashMap::get`. - pub fn get(&self, id: SourceId) -> Option<&(dyn Source + 'src)> { - self.map.get(&id).map(|s| s.as_ref()) - } - - /// Like `HashMap::get_mut`. - pub fn get_mut(&mut self, id: SourceId) -> Option<&mut (dyn Source + 'src)> { - self.map.get_mut(&id).map(|s| s.as_mut()) + pub fn get(&self, id: SourceId) -> Option<&Rc> { + self.map.get(&id) } /// Like `HashMap::insert`, but derives the [`SourceId`] key from the [`Source`]. pub fn insert(&mut self, source: Box) { let id = source.source_id(); - self.map.insert(id, source); + self.map.insert(id, source.into()); } /// Like `HashMap::len`. @@ -307,11 +285,9 @@ impl<'src> SourceMap<'src> { self.map.len() } - /// Like `HashMap::iter_mut`. - pub fn sources_mut<'a>( - &'a mut self, - ) -> impl Iterator { - self.map.iter_mut().map(|(a, b)| (a, &mut **b)) + /// Like `HashMap::iter`. + pub fn iter<'a>(&'a self) -> impl Iterator { + self.map.iter().map(|(a, b)| (a, &**b)) } /// Merge the given map into self. diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index d0287e94a41..fed1d94c0ae 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -85,8 +85,8 @@ use crate::sources::CRATES_IO_REGISTRY; use crate::util::OnceExt as _; use crate::util::cache_lock::{CacheLock, CacheLockMode, CacheLocker}; use crate::util::errors::CargoResult; -use crate::util::network::http::configure_http_handle; -use crate::util::network::http::http_handle; +use crate::util::network::http::{HandleConfiguration, configure_http_handle, http_handle}; +use crate::util::network::http_async; use crate::util::restricted_names::is_glob_pattern; use crate::util::{CanonicalUrl, closest_msg, internal}; use crate::util::{Filesystem, IntoUrl, IntoUrlWithBase, Rustc}; @@ -267,6 +267,7 @@ pub struct GlobalContext { package_cache_lock: CacheLocker, /// Cached configuration parsed by Cargo http_config: OnceLock, + http_async: OnceLock, future_incompat_config: OnceLock, net_config: OnceLock, build_config: OnceLock, @@ -361,6 +362,7 @@ impl GlobalContext { registry_config: Default::default(), package_cache_lock: CacheLocker::new(), http_config: Default::default(), + http_async: Default::default(), future_incompat_config: Default::default(), net_config: Default::default(), build_config: Default::default(), @@ -1894,6 +1896,13 @@ impl GlobalContext { Ok(http) } + pub fn http_async(&self) -> CargoResult<&http_async::Client> { + self.http_async.try_borrow_with(|| { + let handle_config = HandleConfiguration::new(&self)?; + Ok(http_async::Client::new(handle_config)) + }) + } + pub fn http_config(&self) -> CargoResult<&CargoHttpConfig> { self.http_config.try_borrow_with(|| { let mut http = self.get::("http")?; diff --git a/src/cargo/util/local_poll_adapter.rs b/src/cargo/util/local_poll_adapter.rs new file mode 100644 index 00000000000..455def33b50 --- /dev/null +++ b/src/cargo/util/local_poll_adapter.rs @@ -0,0 +1,185 @@ +use futures::{FutureExt, future::LocalBoxFuture, stream::FuturesUnordered}; +use std::{collections::HashMap, hash::Hash, ops::Deref, task::Poll}; + +/// A local (!Send) adapter for caching and executing an async method +/// from a non-async context. +/// +/// The `self_parameter`, `key`, and successful (Ok) results must all be cheap to `clone`. +/// +/// Ensures at most one in-flight computation per key. Results are: +/// - cached on success +/// - not retained on error +pub struct LocalPollAdapter<'a, S, K, R> { + pool: FuturesUnordered>, + cache: HashMap>, + self_parameter: S, +} + +impl<'a, S, K, V, E> LocalPollAdapter<'a, S, K, Result> +where + S: Clone + Deref + 'a, + K: Clone + Hash + Eq + 'a, + V: Clone, +{ + pub fn new(self_parameter: S) -> Self { + Self { + pool: FuturesUnordered::new(), + cache: HashMap::new(), + self_parameter, + } + } + + /// Polls the result for `key`, spawning work if needed. + /// + /// If this function returns [`Poll::Pending`], call [`LocalPollAdapter::wait`] + /// to execute the work, then call this function again with the same key + /// to pick up the result. + /// + /// Futures that complete immediately are not queued. + pub fn poll(&mut self, f: F, key: K) -> Poll> + where + F: AsyncFn(&S::Target, &K) -> Result + 'a, + { + match self.cache.get(&key) { + // We have a cached success value, clone it and return. + Some(Poll::Ready(Ok(v))) => return Poll::Ready(Ok(v.clone())), + // We have a cached error value, remove it and return. + // Errors are not Clone, so they are only stored once. + Some(Poll::Ready(Err(_))) => return self.cache.remove(&key).unwrap(), + // This key is already pending. + Some(Poll::Pending) => return Poll::Pending, + // Looks like we have work to do! + None => {} + } + + // Created a pinned future that executes the function, + // returning the key and the result. + let mut future = { + let key = key.clone(); + let self_parameter = self.self_parameter.clone(); + async move { + let v = f(self_parameter.deref(), &key).await; + (key, v) + } + .boxed_local() + }; + + // Attempt to run the future immediately. If it has no `await` yields, + // it will return here. + if let Some((k, v)) = (&mut future).now_or_never() { + if let Ok(success) = &v { + // Only cache successful results. + self.cache.insert(k, Poll::Ready(Ok(success.clone()))); + } + return Poll::Ready(v); + } + + // Insert Pending into the cache so we avoid queuing the same future twice. + self.cache.insert(key.clone(), Poll::Pending); + + // Add the future to the pending queue. + self.pool.push(future); + Poll::Pending + } + + /// Returns the number of pending futures. + pub fn pending_count(&self) -> usize { + self.pool.len() + } + + /// Run all pending futures. Returns true if there was no work to do. + pub fn wait(&mut self) -> bool { + let is_empty = self.pool.is_empty(); + for (k, v) in crate::util::block_on_stream(&mut self.pool) { + *self + .cache + .get_mut(&k) + .expect("all pending work is in the cache") = Poll::Ready(v); + } + is_empty + } +} + +#[cfg(test)] +mod tests { + use super::LocalPollAdapter; + use std::{rc::Rc, task::Poll, time::Duration}; + + struct Thing {} + + impl Thing { + async fn widen(&self, i: &i32) -> Result { + if *i > 10 { + // Big numbers take longer to process (need to test futures that yield). + futures_timer::Delay::new(Duration::from_millis(1)).await + } + if *i % 2 != 0 { + // Odd numbers are not supported (need to test errors). + return Err(()); + } + Ok(*i as i64) + } + } + + /// Poll wrapper around `Thing` + struct PolledThing<'a> { + poller: LocalPollAdapter<'a, Rc, i32, Result>, + } + + impl<'a> PolledThing<'a> { + fn new() -> Self { + Self { + poller: LocalPollAdapter::new(Rc::new(Thing {})), + } + } + + // Non-async version of the widen method. + fn widen(&mut self, i: &i32) -> Poll> { + self.poller.poll(Thing::widen, i.clone()) + } + + fn wait(&mut self) -> bool { + self.poller.wait() + } + } + + #[test] + fn immediate_success() { + let mut p = PolledThing::new(); + assert_eq!(p.widen(&2), Poll::Ready(Ok(2))); + assert!(p.wait()); + } + + #[test] + fn immediate_error() { + let mut p = PolledThing::new(); + assert_eq!(p.widen(&1), Poll::Ready(Err(()))); + assert!(p.wait()); + } + + #[test] + fn deferred_error() { + let mut p = PolledThing::new(); + assert_eq!(p.widen(&1001), Poll::Pending); + assert!(!p.wait()); + assert_eq!(p.widen(&1001), Poll::Ready(Err(()))); + assert!(p.wait()); + // Errors are not cached + assert_eq!(p.widen(&1001), Poll::Pending); + assert!(!p.wait()); + assert_eq!(p.widen(&1001), Poll::Ready(Err(()))); + assert!(p.wait()); + } + + #[test] + fn deferred_success() { + let mut p = PolledThing::new(); + assert_eq!(p.widen(&50), Poll::Pending); + assert!(!p.wait()); + assert_eq!(p.widen(&50), Poll::Ready(Ok(50))); + assert!(p.wait()); + // Success is cached. + assert_eq!(p.widen(&50), Poll::Ready(Ok(50))); + assert!(p.wait()); + } +} diff --git a/src/cargo/util/mod.rs b/src/cargo/util/mod.rs index d6d47eed04a..f96c45ea763 100644 --- a/src/cargo/util/mod.rs +++ b/src/cargo/util/mod.rs @@ -52,6 +52,8 @@ pub mod into_url; mod into_url_with_base; mod io; pub mod job; +mod local_poll_adapter; +pub use local_poll_adapter::LocalPollAdapter; mod lockserver; pub mod log_message; pub mod logger; @@ -72,6 +74,8 @@ mod vcs; mod workspace; pub use cargo_util_terminal::style; +pub(crate) use futures::executor::block_on; +pub(crate) use futures::executor::block_on_stream; pub fn is_rustup() -> bool { #[expect(clippy::disallowed_methods, reason = "consistency with rustup")] diff --git a/src/cargo/util/network/http.rs b/src/cargo/util/network/http.rs index 343b2ddfe10..ba089b676db 100644 --- a/src/cargo/util/network/http.rs +++ b/src/cargo/util/network/http.rs @@ -1,10 +1,12 @@ //! Configures libcurl's http handles. +use std::path::PathBuf; use std::str; use std::time::Duration; use anyhow::bail; use curl::easy::Easy; +use curl::easy::Easy2; use curl::easy::InfoType; use curl::easy::SslOpt; use curl::easy::SslVersion; @@ -15,7 +17,6 @@ use crate::CargoResult; use crate::GlobalContext; use crate::util::context::SslVersionConfig; use crate::util::context::SslVersionConfigRange; -use crate::version; /// Creates a new HTTP handle with appropriate global configuration for cargo. pub fn http_handle(gctx: &GlobalContext) -> CargoResult { @@ -25,13 +26,6 @@ pub fn http_handle(gctx: &GlobalContext) -> CargoResult { } pub fn http_handle_and_timeout(gctx: &GlobalContext) -> CargoResult<(Easy, HttpTimeout)> { - if let Some(offline_flag) = gctx.offline_flag() { - bail!( - "attempting to make an HTTP request, but {offline_flag} was \ - specified" - ) - } - // The timeout option for libcurl by default times out the entire transfer, // but we probably don't want this. Instead we only set timeouts for the // connect phase as well as a "low speed" timeout so if we don't receive @@ -50,96 +44,197 @@ pub fn needs_custom_http_transport(gctx: &GlobalContext) -> CargoResult { || *gctx.http_config()? != Default::default() || gctx.get_env_os("HTTP_TIMEOUT").is_some()) } - /// Configure a libcurl http handle with the defaults options for Cargo +pub struct HandleConfiguration { + proxy: Option, + cainfo: Option, + proxy_cainfo: Option, + ssl_options: Option, + useragent: String, + ssl_version: Option, + ssl_min_max_version: Option<(SslVersion, SslVersion)>, + timeout: HttpTimeout, + pub verbose: bool, + pub multiplexing: bool, +} + pub fn configure_http_handle(gctx: &GlobalContext, handle: &mut Easy) -> CargoResult { - let http = gctx.http_config()?; - if let Some(proxy) = super::proxy::http_proxy(http) { - handle.proxy(&proxy)?; - } - if let Some(cainfo) = &http.cainfo { - let cainfo = cainfo.resolve_path(gctx); - handle.cainfo(&cainfo)?; - } - // Use `proxy_cainfo` if explicitly set; otherwise, fall back to `cainfo` as curl does #15376. - if let Some(proxy_cainfo) = http.proxy_cainfo.as_ref().or(http.cainfo.as_ref()) { - let proxy_cainfo = proxy_cainfo.resolve_path(gctx); - handle.proxy_cainfo(&format!("{}", proxy_cainfo.display()))?; - } - if let Some(check) = http.check_revoke { - handle.ssl_options(SslOpt::new().no_revoke(!check))?; - } + let configuration = HandleConfiguration::new(gctx)?; + configuration.configure(handle)?; + Ok(configuration.timeout) +} - if let Some(user_agent) = &http.user_agent { - handle.useragent(user_agent)?; - } else { - handle.useragent(&format!("cargo/{}", version()))?; - } +impl HandleConfiguration { + pub fn new(gctx: &GlobalContext) -> CargoResult { + if let Some(offline_flag) = gctx.offline_flag() { + bail!( + "attempting to make an HTTP request, but {offline_flag} was \ + specified" + ) + } - fn to_ssl_version(s: &str) -> CargoResult { - let version = match s { - "default" => SslVersion::Default, - "tlsv1" => SslVersion::Tlsv1, - "tlsv1.0" => SslVersion::Tlsv10, - "tlsv1.1" => SslVersion::Tlsv11, - "tlsv1.2" => SslVersion::Tlsv12, - "tlsv1.3" => SslVersion::Tlsv13, - _ => bail!( - "Invalid ssl version `{s}`,\ - choose from 'default', 'tlsv1', 'tlsv1.0', 'tlsv1.1', 'tlsv1.2', 'tlsv1.3'." - ), + let http = gctx.http_config()?; + let timeout = HttpTimeout::new(gctx)?; + let useragent = if let Some(user_agent) = http.user_agent.clone() { + user_agent + } else { + format!("cargo/{}", crate::version()) }; - Ok(version) - } + let multiplexing = http.multiplexing.unwrap_or(true); + let mut handle = HandleConfiguration { + proxy: None, + cainfo: None, + proxy_cainfo: None, + ssl_options: None, + useragent, + ssl_version: None, + ssl_min_max_version: None, + verbose: false, + timeout, + multiplexing, + }; + if let Some(proxy) = super::proxy::http_proxy(http) { + handle.proxy = Some(proxy); + } + if let Some(cainfo) = &http.cainfo { + let cainfo = cainfo.resolve_path(gctx); + handle.cainfo = Some(cainfo); + } + // Use `proxy_cainfo` if explicitly set; otherwise, fall back to `cainfo` as curl does #15376. + if let Some(proxy_cainfo) = http.proxy_cainfo.as_ref().or(http.cainfo.as_ref()) { + let proxy_cainfo = proxy_cainfo.resolve_path(gctx); + handle.proxy_cainfo = Some(format!("{}", proxy_cainfo.display())); + } + if let Some(check) = http.check_revoke { + let mut v = SslOpt::new(); + v.no_revoke(!check); + handle.ssl_options = Some(v); + } - // Empty string accept encoding expands to the encodings supported by the current libcurl. - handle.accept_encoding("")?; - if let Some(ssl_version) = &http.ssl_version { - match ssl_version { - SslVersionConfig::Single(s) => { - let version = to_ssl_version(s.as_str())?; - handle.ssl_version(version)?; - } - SslVersionConfig::Range(SslVersionConfigRange { min, max }) => { - let min_version = min - .as_ref() - .map_or(Ok(SslVersion::Default), |s| to_ssl_version(s))?; - let max_version = max - .as_ref() - .map_or(Ok(SslVersion::Default), |s| to_ssl_version(s))?; - handle.ssl_min_max_version(min_version, max_version)?; + fn to_ssl_version(s: &str) -> CargoResult { + let version = match s { + "default" => SslVersion::Default, + "tlsv1" => SslVersion::Tlsv1, + "tlsv1.0" => SslVersion::Tlsv10, + "tlsv1.1" => SslVersion::Tlsv11, + "tlsv1.2" => SslVersion::Tlsv12, + "tlsv1.3" => SslVersion::Tlsv13, + _ => bail!( + "Invalid ssl version `{s}`,\ + choose from 'default', 'tlsv1', 'tlsv1.0', 'tlsv1.1', 'tlsv1.2', 'tlsv1.3'." + ), + }; + Ok(version) + } + + if let Some(ssl_version) = &http.ssl_version { + match ssl_version { + SslVersionConfig::Single(s) => { + let version = to_ssl_version(s.as_str())?; + handle.ssl_version = Some(version); + } + SslVersionConfig::Range(SslVersionConfigRange { min, max }) => { + let min_version = min + .as_ref() + .map_or(Ok(SslVersion::Default), |s| to_ssl_version(s))?; + let max_version = max + .as_ref() + .map_or(Ok(SslVersion::Default), |s| to_ssl_version(s))?; + handle.ssl_min_max_version = Some((min_version, max_version)); + } } + } else if cfg!(windows) { + // This is a temporary workaround for some bugs with libcurl and + // schannel and TLS 1.3. + // + // Our libcurl on Windows is usually built with schannel. + // On Windows 11 (or Windows Server 2022), libcurl recently (late + // 2022) gained support for TLS 1.3 with schannel, and it now defaults + // to 1.3. Unfortunately there have been some bugs with this. + // https://github.com/curl/curl/issues/9431 is the most recent. Once + // that has been fixed, and some time has passed where we can be more + // confident that the 1.3 support won't cause issues, this can be + // removed. + // + // Windows 10 is unaffected. libcurl does not support TLS 1.3 on + // Windows 10. (Windows 10 sorta had support, but it required enabling + // an advanced option in the registry which was buggy, and libcurl + // does runtime checks to prevent it.) + handle.ssl_min_max_version = Some((SslVersion::Default, SslVersion::Tlsv12)); + } + + if let Some(true) = http.debug { + handle.verbose = true; } - } else if cfg!(windows) { - // This is a temporary workaround for some bugs with libcurl and - // schannel and TLS 1.3. - // - // Our libcurl on Windows is usually built with schannel. - // On Windows 11 (or Windows Server 2022), libcurl recently (late - // 2022) gained support for TLS 1.3 with schannel, and it now defaults - // to 1.3. Unfortunately there have been some bugs with this. - // https://github.com/curl/curl/issues/9431 is the most recent. Once - // that has been fixed, and some time has passed where we can be more - // confident that the 1.3 support won't cause issues, this can be - // removed. - // - // Windows 10 is unaffected. libcurl does not support TLS 1.3 on - // Windows 10. (Windows 10 sorta had support, but it required enabling - // an advanced option in the registry which was buggy, and libcurl - // does runtime checks to prevent it.) - handle.ssl_min_max_version(SslVersion::Default, SslVersion::Tlsv12)?; + + Ok(handle) } - if let Some(true) = http.debug { - handle.verbose(true)?; - tracing::debug!(target: "network", "{:#?}", curl::Version::get()); - handle.debug_function(debug)?; + pub fn configure(&self, handle: &mut Easy) -> Result<(), curl::Error> { + if let Some(v) = &self.proxy { + handle.proxy(&v)?; + } + if let Some(v) = &self.cainfo { + handle.cainfo(&v)?; + } + if let Some(v) = &self.proxy_cainfo { + handle.proxy_cainfo(&v)?; + } + if let Some(v) = &self.ssl_options { + handle.ssl_options(&v)?; + } + handle.useragent(&self.useragent)?; + // Empty string accept encoding expands to the encodings supported by the current libcurl. + handle.accept_encoding("")?; + if let Some(v) = &self.ssl_version { + handle.ssl_version(v.clone())?; + } + if let Some((min, max)) = &self.ssl_min_max_version { + handle.ssl_min_max_version(min.clone(), max.clone())?; + } + if self.verbose { + handle.verbose(true)?; + tracing::debug!(target: "network", "{:#?}", curl::Version::get()); + handle.debug_function(debug)?; + } + Ok(()) } - HttpTimeout::new(gctx) + pub fn configure2(&self, handle: &mut Easy2) -> Result<(), curl::Error> { + if let Some(v) = &self.proxy { + handle.proxy(&v)?; + } + if let Some(v) = &self.cainfo { + handle.cainfo(&v)?; + } + if let Some(v) = &self.proxy_cainfo { + handle.proxy_cainfo(&v)?; + } + if let Some(v) = &self.ssl_options { + handle.ssl_options(&v)?; + } + handle.useragent(&self.useragent)?; + // Empty string accept encoding expands to the encodings supported by the current libcurl. + handle.accept_encoding("")?; + if let Some(v) = &self.ssl_version { + handle.ssl_version(v.clone())?; + } + if let Some((min, max)) = &self.ssl_min_max_version { + handle.ssl_min_max_version(min.clone(), max.clone())?; + } + if self.verbose { + handle.verbose(true)?; + tracing::debug!(target: "network", "{:#?}", curl::Version::get()); + } + self.timeout.configure2(handle)?; + + // Enable HTTP/2 if possible. + crate::try_old_curl_http2_pipewait!(self.multiplexing, handle); + Ok(()) + } } -pub fn debug(kind: InfoType, data: &[u8]) { +pub(crate) fn debug(kind: InfoType, data: &[u8]) { enum LogLevel { Debug, Trace, @@ -224,4 +319,16 @@ impl HttpTimeout { handle.low_speed_limit(self.low_speed_limit)?; Ok(()) } + + pub fn configure2(&self, handle: &mut Easy2) -> Result<(), curl::Error> { + // The timeout option for libcurl by default times out the entire + // transfer, but we probably don't want this. Instead we only set + // timeouts for the connect phase as well as a "low speed" timeout so + // if we don't receive many bytes in a large-ish period of time then we + // time out. + handle.connect_timeout(self.dur)?; + handle.low_speed_time(self.dur)?; + handle.low_speed_limit(self.low_speed_limit)?; + Ok(()) + } } diff --git a/src/cargo/util/network/http_async.rs b/src/cargo/util/network/http_async.rs new file mode 100644 index 00000000000..50054f71aab --- /dev/null +++ b/src/cargo/util/network/http_async.rs @@ -0,0 +1,378 @@ +//! Async wrapper around cURL for making managing HTTP requests. +//! +//! Requests are executed in parallel using cURL [`Multi`] on +//! a worker thread that is owned by the Client. + +use std::collections::HashMap; +use std::io::{Cursor, Read}; +use std::str::FromStr; +use std::sync::mpsc::{self, Receiver, Sender}; +use std::thread::JoinHandle; +use std::time::Duration; + +use curl::easy::WriteError; +use curl::easy::{Easy2, Handler, InfoType}; +use curl::multi::{Easy2Handle, Multi}; + +use crate::util::network::http::HandleConfiguration; +use futures::channel::oneshot; +use tracing::{debug, error, trace}; + +type Response = http::Response>; +type Request = http::Request>; +type HttpResult = std::result::Result; + +#[derive(Debug, Clone, thiserror::Error)] +#[non_exhaustive] +pub enum Error { + #[error("curl multi failed")] + Multi(#[from] curl::MultiError), + + #[error("curl failed")] + Easy(#[from] curl::Error), + + #[error("failed to convert header value of `{name}` to string: {bytes:?}")] + BadHeader { name: String, bytes: Vec }, +} + +struct Message { + easy: Easy2, + sender: oneshot::Sender>, +} + +/// HTTP Client. Creating a new client spawns a cURL `Multi` and +/// thread that is used for all HTTP requests by this client. +pub struct Client { + channel: Option>, + thread_handle: Option>, + handle_config: HandleConfiguration, +} + +impl Client { + /// Spawns a new worker thread where HTTP request execute. + pub fn new(handle_config: HandleConfiguration) -> Client { + let (tx, rx) = mpsc::channel(); + let handle = std::thread::spawn(move || WorkerServer::run(rx, handle_config.multiplexing)); + Client { + channel: Some(tx), + thread_handle: Some(handle), + handle_config, + } + } + + /// Perform an HTTP request using this client. + pub async fn request(&self, request: Request) -> HttpResult { + let url = request.uri().to_string(); + debug!(target: "network::fetch", url); + let mut collector = Collector::new(); + let (parts, body) = request.into_parts(); + let body_len = body.len(); + collector.request_body = Cursor::new(body); + collector.debug = self.handle_config.verbose; + let mut handle = curl::easy::Easy2::new(collector); + self.handle_config.configure2(&mut handle)?; + + handle.url(&url)?; + handle.follow_location(true)?; + + match parts.method { + http::Method::HEAD => handle.nobody(true)?, + http::Method::GET => handle.get(true)?, + http::Method::POST => { + handle.post_field_size(body_len as u64)?; + handle.post(true)?; + } + http::Method::PUT => { + handle.in_filesize(body_len as u64)?; + handle.put(true)?; + } + method => { + handle.upload(true)?; + handle.in_filesize(body_len as u64)?; + handle.custom_request(method.as_str())?; + } + } + + let mut headers = curl::easy::List::new(); + for (name, value) in parts.headers { + if let Some(name) = name { + let value: &str = value.to_str().map_err(|_| Error::BadHeader { + name: name.to_string(), + bytes: value.as_bytes().to_owned(), + })?; + headers.append(&format!("{}: {}", name, value))?; + } + } + handle.http_headers(headers)?; + + let (sender, receiver) = oneshot::channel(); + let req = Message { + easy: handle, + sender, + }; + + self.channel.as_ref().unwrap().send(req).unwrap(); + receiver.await.unwrap() + } +} + +impl Drop for Client { + fn drop(&mut self) { + // Close the channel + drop(self.channel.take().unwrap()); + // Join the thread + let _ = self.thread_handle.take().unwrap().join(); + } +} + +impl std::fmt::Debug for Client { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("http_async::Client").finish() + } +} + +/// Manages the cURL `Multi`. Processes incoming work sent over the +/// channel, and returns responses. +struct WorkerServer { + incoming_work: Receiver, + multi: Multi, + handles: HashMap< + usize, + ( + Easy2Handle, + oneshot::Sender>, + ), + >, + token: usize, +} + +impl WorkerServer { + fn run(incoming_work: Receiver, multiplex: bool) { + let mut multi = Multi::new(); + // let's not flood the server with connections + if let Err(e) = multi.set_max_host_connections(2) { + error!("failed to set max host connections in curl: {e}"); + } + if let Err(e) = multi.pipelining(false, multiplex) { + error!("failed to enable multiplexing/pipelining in curl: {e}"); + } + + let mut worker = Self { + incoming_work, + multi, + handles: HashMap::new(), + token: 0, + }; + worker.worker_loop(); + } + + fn fail_and_drain(&mut self, e: &Error) { + for (_token, (_handle, sender)) in self.handles.drain() { + let _ = sender.send(Err(e.clone())); + } + } + + fn worker_loop(&mut self) { + const INITIAL_DELAY: Duration = Duration::from_millis(1); + let mut wait_backoff = INITIAL_DELAY; + loop { + // Start any pending work. + while let Ok(msg) = self.incoming_work.try_recv() { + self.enqueue_request(msg); + wait_backoff = INITIAL_DELAY; + } + + match self.multi.perform() { + Err(e) if e.is_call_perform() => { + // cURL states if you receive `is_call_perform`, this means that you should call `perform` again. + } + Err(e) => { + self.fail_and_drain(&Error::Multi(e)); + } + Ok(running) => { + self.multi.messages(|msg| { + let t = msg.token().expect("all handles have tokens"); + trace!(token = t, "finish"); + let Some((handle, sender)) = self.handles.remove(&t) else { + error!("missing entry {t} in handle table"); + return; + }; + let result = msg.result_for2(&handle).expect("handle must have a result"); + let mut easy = self.multi.remove2(handle).expect("handle must be in multi"); + let mut response = std::mem::replace( + &mut easy.get_mut().response, + Response::new(Vec::new()), + ); + if let Ok(status) = easy.response_code() + && status != 0 + && let Ok(status) = http::StatusCode::from_u16(status as u16) + { + *response.status_mut() = status; + } + // Would be nice to set HTTP version via `response.version_mut()`, but `curl` doesn't have it exposed. + let extensions = Extensions { + client_ip: easy.primary_ip().ok().flatten().map(str::to_string), + }; + response.extensions_mut().insert(extensions); + let _ = sender.send(result.map(|()| response).map_err(Into::into)); + }); + + if running > 0 { + let max_timeout = Duration::from_millis(1000); + let mut timeout = self + .multi + .get_timeout() + .ok() + .flatten() + .unwrap_or(max_timeout) + .min(max_timeout); + if timeout.is_zero() { + // curl said not to wait. + continue; + } + // Ideally we would use `Multi::poll` + a `MultiWaker` instead of `Multi::wait` + // to wake the thread when new work is queued. But it requires curl 7.68+, + // which is not available everywhere we support. + // + // Instead, we use an exponential backoff approach so that as long as requests + // are being queued, we poll quickly to allow the requests to be added sooner. + // Without this, we end up sitting in `Multi::wait` too long while new work is + // added to the channel. + // + // `get_timeout` says we should wait *at most* the timeout amount, so reducing + // the wait time is fine. + if wait_backoff < timeout { + wait_backoff *= 2; + timeout = wait_backoff + } + trace!( + pending = self.handles.len(), + timeout = timeout.as_millis(), + "curl wait" + ); + if let Err(e) = self.multi.wait(&mut [], timeout) { + self.fail_and_drain(&Error::Multi(e)); + } + } else { + // Block, waiting for more work + trace!("all work completed"); + match self.incoming_work.recv() { + Ok(msg) => { + trace!("resuming work"); + self.enqueue_request(msg); + wait_backoff = INITIAL_DELAY; + } + Err(_) => { + // The sending channel is closed. Shut down the worker. + break; + } + } + } + } + } + } + } + + /// Adds the request to the `Multi`, or send an error back through the channel. + fn enqueue_request(&mut self, message: Message) { + match self.multi.add2(message.easy) { + Ok(mut handle) => { + self.token = self.token.wrapping_add(1); + handle.set_token(self.token).ok(); + self.handles.insert(self.token, (handle, message.sender)); + } + Err(e) => { + let _ = message.sender.send(Err(e.into())); + } + } + } +} + +/// Interface that cURL (`Easy2`) uses to make progress. +struct Collector { + response: Response, + request_body: Cursor>, + debug: bool, +} + +impl Collector { + fn new() -> Self { + Collector { + response: Response::new(Vec::new()), + request_body: Cursor::new(Vec::new()), + debug: false, + } + } +} + +impl Handler for Collector { + fn write(&mut self, data: &[u8]) -> Result { + self.response.body_mut().extend_from_slice(data); + Ok(data.len()) + } + + fn header(&mut self, data: &[u8]) -> bool { + if let Some((name, value)) = handle_http_header(data) + && let Ok(name) = http::HeaderName::from_str(name) + && let Ok(value) = http::HeaderValue::from_str(value) + { + self.response.headers_mut().append(name, value); + } + true + } + + fn read(&mut self, data: &mut [u8]) -> Result { + Ok(self.request_body.read(data).unwrap()) + } + + fn debug(&mut self, kind: InfoType, data: &[u8]) { + if self.debug { + super::http::debug(kind, data); + } + } + + fn progress(&mut self, _dltotal: f64, _dlnow: f64, _ultotal: f64, _ulnow: f64) -> bool { + true + } +} + +/// Additional fields on an [`http::Response`]. +#[derive(Clone)] +struct Extensions { + client_ip: Option, +} + +pub trait ResponsePartsExtensions { + fn client_ip(&self) -> Option<&str>; +} + +impl ResponsePartsExtensions for http::response::Parts { + fn client_ip(&self) -> Option<&str> { + self.extensions + .get::() + .and_then(|extensions| extensions.client_ip.as_deref()) + } +} + +impl ResponsePartsExtensions for Response { + fn client_ip(&self) -> Option<&str> { + self.extensions() + .get::() + .and_then(|extensions| extensions.client_ip.as_deref()) + } +} + +/// Splits HTTP `HEADER: VALUE` to a tuple. +fn handle_http_header(buf: &[u8]) -> Option<(&str, &str)> { + if buf.is_empty() { + return None; + } + let buf = std::str::from_utf8(buf).ok()?.trim_end(); + // Don't let server sneak extra lines anywhere. + if buf.contains('\n') { + return None; + } + let (tag, value) = buf.split_once(':')?; + let value = value.trim(); + Some((tag, value)) +} diff --git a/src/cargo/util/network/mod.rs b/src/cargo/util/network/mod.rs index a38fad19611..c2785e30bad 100644 --- a/src/cargo/util/network/mod.rs +++ b/src/cargo/util/network/mod.rs @@ -8,6 +8,7 @@ use std::net::SocketAddrV6; use std::task::Poll; pub mod http; +pub mod http_async; pub mod proxy; pub mod retry; pub mod sleep; @@ -43,10 +44,10 @@ macro_rules! try_old_curl { ::tracing::warn!(target: "network", "ignoring libcurl {} error: {}", $msg, e); } } else { - use ::anyhow::Context; - result.with_context(|| { - ::anyhow::format_err!("failed to enable {}, is curl not built right?", $msg) - })?; + if let Err(e) = &result { + ::tracing::error!(target: "network", "failed to enable {}, is curl not built right? error: {}", $msg, e); + } + result?; } }; } diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index a6b3d68977b..1d1b72b8c66 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -842,7 +842,14 @@ fn update_with_shared_deps() { .with_status(101) .with_stderr_data(str![[r#" [UPDATING] git repository `[ROOTURL]/bar` -[ERROR] unable to update [ROOTURL]/bar#0.1.2 +[ERROR] failed to get `bar` as a dependency of package `dep1 v0.5.0 ([ROOT]/foo/dep1)` + ... which satisfies path dependency `dep1` (locked to 0.5.0) of package `foo v0.5.0 ([ROOT]/foo)` + +Caused by: + failed to load source for dependency `bar` + +Caused by: + unable to update [ROOTURL]/bar#0.1.2 Caused by: revspec '0.1.2' not found; class=Reference (4); code=NotFound (-3) diff --git a/tests/testsuite/local_registry.rs b/tests/testsuite/local_registry.rs index 260d12865fb..90c8c80c136 100644 --- a/tests/testsuite/local_registry.rs +++ b/tests/testsuite/local_registry.rs @@ -426,7 +426,7 @@ Caused by: unable to update registry `crates-io` Caused by: - failed to update replaced source registry `crates-io` + failed to query replaced source registry `crates-io` Caused by: local registry path is not a directory: [..]path[..]to[..]nowhere diff --git a/tests/testsuite/registry.rs b/tests/testsuite/registry.rs index f99b3a3e098..956ee3862a3 100644 --- a/tests/testsuite/registry.rs +++ b/tests/testsuite/registry.rs @@ -3562,27 +3562,38 @@ fn sparse_blocking_count() { .file("src/main.rs", "fn main() {}") .build(); - Package::new("bar", "0.0.1").publish(); - - // Ensure we have the expected number of `block_until_ready` calls. - // The 1st (0 transfers pending), is the deliberate extra call in `ensure_loaded` for a source. - // The 2nd (1 transfers pending), is the registry `config.json`. - // the 3rd (1 transfers pending), is the package metadata for `bar`. + Package::new("dep1", "0.0.1").publish(); + Package::new("dep2", "0.0.1").publish(); + Package::new("bar", "0.0.1") + .dep("dep1", "0.0.1") + .dep("dep2", "0.0.1") + .publish(); + // Ensure we have the expected number of resolver restarts and network requests. p.cargo("check") - .env("CARGO_LOG", "network::HttpRegistry::block_until_ready=trace") + .env("CARGO_LOG", "cargo::core::resolver::restarting=debug,network::fetch=debug") .with_stderr_data(str![[r#" - [..] TRACE network::HttpRegistry::block_until_ready: 0 transfers pending [UPDATING] `dummy-registry` index - [..] TRACE network::HttpRegistry::block_until_ready: 1 transfers pending - [..] TRACE network::HttpRegistry::block_until_ready: 1 transfers pending + [..] DEBUG network::fetch: url="[..]/index/config.json" + [..] DEBUG network::fetch: url="[..]/index/3/b/bar" [WARNING] spurious network error (3 tries remaining): failed to get successful HTTP response from `[..]/index/3/b/bar` ([..]), got 500 body: internal server error -[LOCKING] 1 package to latest compatible version + [..] DEBUG network::fetch: url="[..]/index/3/b/bar" + [..] DEBUG cargo::core::resolver::restarting: pending=1 + [..] DEBUG network::fetch: url="[..]/index/de/p1/dep1" + [..] DEBUG network::fetch: url="[..]/index/de/p2/dep2" + [..] DEBUG cargo::core::resolver::restarting: pending=2 + [..] DEBUG cargo::core::resolver::restarting: pending=0 +[LOCKING] 3 packages to latest compatible versions + [..] DEBUG cargo::core::resolver::restarting: pending=0 [DOWNLOADING] crates ... -[DOWNLOADED] bar v0.0.1 (registry `dummy-registry`) -[CHECKING] bar v0.0.1 +[DOWNLOADED] [..] v0.0.1 (registry `dummy-registry`) +[DOWNLOADED] [..] v0.0.1 (registry `dummy-registry`) +[DOWNLOADED] [..] v0.0.1 (registry `dummy-registry`) +[CHECKING] [..] v0.0.1 +[CHECKING] [..] v0.0.1 +[CHECKING] [..] v0.0.1 [CHECKING] foo v0.0.1 ([ROOT]/foo) [FINISHED] `dev` profile [unoptimized + debuginfo] target(s) in [ELAPSED]s @@ -4282,10 +4293,13 @@ Please slow down [ERROR] failed to get `bar` as a dependency of package `foo v0.1.0 ([ROOT]/foo)` Caused by: - failed to query replaced source registry `crates-io` + failed to load source for dependency `bar` Caused by: - download of 3/b/bar failed + unable to update registry `crates-io` + +Caused by: + failed to query replaced source registry `crates-io` Caused by: failed to get successful HTTP response from `http://127.0.0.1:[..]/index/3/b/bar` (127.0.0.1), got 503 diff --git a/tests/testsuite/registry_auth.rs b/tests/testsuite/registry_auth.rs index c097984905b..ed02b381c79 100644 --- a/tests/testsuite/registry_auth.rs +++ b/tests/testsuite/registry_auth.rs @@ -222,6 +222,12 @@ fn bad_environment_token_with_asymmetric_subject() { [UPDATING] `alternative` index [ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([ROOT]/foo)` +Caused by: + failed to load source for dependency `bar` + +Caused by: + unable to update registry `alternative` + Caused by: token rejected for `alternative`, please run `cargo login --registry alternative` or use environment variable CARGO_REGISTRIES_ALTERNATIVE_TOKEN @@ -257,6 +263,12 @@ fn bad_environment_token_with_asymmetric_incorrect_subject() { [UPDATING] `alternative` index [ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([ROOT]/foo)` +Caused by: + failed to load source for dependency `bar` + +Caused by: + unable to update registry `alternative` + Caused by: token rejected for `alternative`, please run `cargo login --registry alternative` or use environment variable CARGO_REGISTRIES_ALTERNATIVE_TOKEN @@ -295,6 +307,12 @@ fn bad_environment_token_with_incorrect_asymmetric() { [UPDATING] `alternative` index [ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([ROOT]/foo)` +Caused by: + failed to load source for dependency `bar` + +Caused by: + unable to update registry `alternative` + Caused by: token rejected for `alternative`, please run `cargo login --registry alternative` or use environment variable CARGO_REGISTRIES_ALTERNATIVE_TOKEN @@ -325,6 +343,12 @@ fn missing_token() { [UPDATING] `alternative` index [ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([ROOT]/foo)` +Caused by: + failed to load source for dependency `bar` + +Caused by: + unable to update registry `alternative` + Caused by: no token found for `alternative`, please run `cargo login --registry alternative` or use environment variable CARGO_REGISTRIES_ALTERNATIVE_TOKEN @@ -377,6 +401,12 @@ fn incorrect_token() { [UPDATING] `alternative` index [ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([ROOT]/foo)` +Caused by: + failed to load source for dependency `bar` + +Caused by: + unable to update registry `alternative` + Caused by: token rejected for `alternative`, please run `cargo login --registry alternative` or use environment variable CARGO_REGISTRIES_ALTERNATIVE_TOKEN @@ -549,7 +579,7 @@ fn token_not_logged() { str![[r#" ... [PUBLISHED] foo v0.1.0 at registry `crates-io` - +... "#]], ); let authorizations: Vec<_> = log