Skip to content

Commit 3031523

Browse files
mmastracclaude
andcommitted
feat: strip basedirs from Rust hash key for cross-machine cache hits
SCCACHE_BASEDIRS now normalizes cwd, CARGO_MANIFEST_DIR, CARGO_WORKSPACE_DIR, and dep-info env var values in the Rust compiler's hash key computation. This enables cache hits when the same crate is compiled from different absolute paths on different machines (e.g., CI runners with different checkout dirs). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9c26759 commit 3031523

2 files changed

Lines changed: 246 additions & 10 deletions

File tree

src/compiler/rust.rs

Lines changed: 225 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,52 @@ use std::time;
6262

6363
use crate::errors::*;
6464

65+
/// CARGO_* environment variables known to contain absolute paths that should
66+
/// have basedir prefixes stripped for cross-machine cache portability.
67+
/// See: https://doc.rust-lang.org/cargo/reference/environment-variables.html
68+
const CARGO_PATH_ENV_VARS: &[&str] = &[
69+
"CARGO_MANIFEST_DIR",
70+
"CARGO_MANIFEST_PATH",
71+
"CARGO_TARGET_TMPDIR",
72+
"CARGO_WORKSPACE_DIR",
73+
];
74+
75+
/// Prefixes of CARGO_* environment variables that contain absolute paths.
76+
/// Variables matching these prefixes have their values basedir-stripped.
77+
const CARGO_PATH_ENV_PREFIXES: &[&str] = &["CARGO_BIN_EXE_"];
78+
79+
/// Returns true if a CARGO_* env var is known to contain an absolute path.
80+
fn is_cargo_path_var(var: &str) -> bool {
81+
CARGO_PATH_ENV_VARS.contains(&var)
82+
|| CARGO_PATH_ENV_PREFIXES.iter().any(|&p| var.starts_with(p))
83+
}
84+
85+
/// Strip a basedir prefix from a byte slice, returning the relative portion.
86+
/// Basedirs are pre-normalized with trailing `/` (see config.rs), so the
87+
/// result is a clean relative path. On Windows, the value is normalized
88+
/// (lowercase + forward slashes) before comparison since basedirs are stored
89+
/// normalized.
90+
fn strip_basedir_prefix<'a>(value: &'a [u8], basedirs: &[Vec<u8>]) -> Cow<'a, [u8]> {
91+
if basedirs.is_empty() {
92+
return Cow::Borrowed(value);
93+
}
94+
95+
#[cfg(target_os = "windows")]
96+
let normalized = crate::util::normalize_win_path(value);
97+
#[cfg(not(target_os = "windows"))]
98+
let normalized = value;
99+
100+
for basedir in basedirs {
101+
if normalized.starts_with(basedir) {
102+
#[cfg(target_os = "windows")]
103+
return Cow::Owned(normalized[basedir.len()..].to_vec());
104+
#[cfg(not(target_os = "windows"))]
105+
return Cow::Borrowed(&value[basedir.len()..]);
106+
}
107+
}
108+
Cow::Borrowed(value)
109+
}
110+
65111
#[cfg(feature = "dist-client")]
66112
const RLIB_PREFIX: &str = "lib";
67113
#[cfg(feature = "dist-client")]
@@ -1343,10 +1389,11 @@ where
13431389
_may_dist: bool,
13441390
pool: &tokio::runtime::Handle,
13451391
_rewrite_includes_only: bool,
1346-
_storage: Arc<dyn Storage>,
1392+
storage: Arc<dyn Storage>,
13471393
_cache_control: CacheControl,
13481394
) -> Result<HashResult<T>> {
13491395
trace!("[{}]: generate_hash_key", self.parsed_args.crate_name);
1396+
let basedirs = storage.basedirs();
13501397
// TODO: this doesn't produce correct arguments if they should be concatenated - should use iter_os_strings
13511398
let os_string_arguments: Vec<(OsString, Option<OsString>)> = self
13521399
.parsed_args
@@ -1493,7 +1540,12 @@ where
14931540
a
14941541
})
14951542
};
1496-
args.hash(&mut HashToDigest { digest: &mut m });
1543+
// Strip basedir prefixes from arguments before hashing. Arguments like
1544+
// --remap-path-prefix=/abs/path=..., -Clinker=/abs/path, etc. contain
1545+
// absolute paths that differ across machines. See mozilla/sccache#2652.
1546+
let args_bytes = args.as_encoded_bytes();
1547+
crate::util::strip_basedirs(args_bytes, basedirs)
1548+
.hash(&mut HashToDigest { digest: &mut m });
14971549
// 4. The digest of all source files (this includes src file from cmdline).
14981550
// 5. The digest of all files listed on the commandline (self.externs).
14991551
// 6. The digest of all static libraries listed on the commandline (self.staticlibs).
@@ -1513,7 +1565,10 @@ where
15131565
for (var, val) in env_deps.iter() {
15141566
var.hash(&mut HashToDigest { digest: &mut m });
15151567
m.update(b"=");
1516-
val.hash(&mut HashToDigest { digest: &mut m });
1568+
// Strip basedir prefixes from dep-info env var values (e.g. OUT_DIR)
1569+
// to enable cross-machine cache hits.
1570+
let val_bytes = val.as_encoded_bytes();
1571+
strip_basedir_prefix(val_bytes, basedirs).hash(&mut HashToDigest { digest: &mut m });
15171572
}
15181573
let mut env_vars: Vec<_> = env_vars
15191574
.iter()
@@ -1544,10 +1599,21 @@ where
15441599

15451600
var.hash(&mut HashToDigest { digest: &mut m });
15461601
m.update(b"=");
1547-
val.hash(&mut HashToDigest { digest: &mut m });
1602+
// Strip basedir prefixes from path-containing CARGO_* vars
1603+
// to enable cross-machine cache hits.
1604+
let var_str = var.to_string_lossy();
1605+
if is_cargo_path_var(&var_str) {
1606+
let val_bytes = val.as_encoded_bytes();
1607+
strip_basedir_prefix(val_bytes, basedirs)
1608+
.hash(&mut HashToDigest { digest: &mut m });
1609+
} else {
1610+
val.hash(&mut HashToDigest { digest: &mut m });
1611+
}
15481612
}
15491613
// 9. The cwd of the compile. This will wind up in the rlib.
1550-
cwd.hash(&mut HashToDigest { digest: &mut m });
1614+
// Strip basedir prefix for cross-machine cache portability.
1615+
let cwd_bytes = cwd.as_os_str().as_encoded_bytes();
1616+
strip_basedir_prefix(cwd_bytes, basedirs).hash(&mut HashToDigest { digest: &mut m });
15511617
// 10. The version of the compiler.
15521618
self.version.hash(&mut HashToDigest { digest: &mut m });
15531619
// 11. SCCACHE_RUST_CRATE_TYPE_ALLOW_HASH, if set and we have unsupported
@@ -3541,8 +3607,12 @@ proc_macro false
35413607
m.update(CACHE_VERSION);
35423608
// sysroot shlibs digests.
35433609
m.update(FAKE_DIGEST.as_bytes());
3544-
// Arguments, with cfgs sorted at the end.
3545-
OsStr::new("ab--cfgabc--cfgxyz").hash(&mut HashToDigest { digest: &mut m });
3610+
// Arguments, with cfgs sorted at the end (hashed as bytes via strip_basedirs).
3611+
// With empty basedirs, strip_basedirs returns Cow::Borrowed of the original bytes.
3612+
let args_str = OsStr::new("ab--cfgabc--cfgxyz");
3613+
args_str
3614+
.as_encoded_bytes()
3615+
.hash(&mut HashToDigest { digest: &mut m });
35463616
// bar.rs (source file, from dep-info)
35473617
m.update(empty_digest.as_bytes());
35483618
// foo.rs (source file, from dep-info)
@@ -3552,14 +3622,21 @@ proc_macro false
35523622
// libbaz.a (static library, from staticlibs), containing a single
35533623
// file, baz.o, consisting of 1024 bytes of zeroes.
35543624
m.update(libbaz_a_digest.as_bytes());
3555-
// Env vars
3625+
// Env vars (dep-info env vars hashed as bytes via strip_basedir_prefix)
35563626
OsStr::new("CARGO_BLAH").hash(&mut HashToDigest { digest: &mut m });
35573627
m.update(b"=");
3558-
OsStr::new("abc").hash(&mut HashToDigest { digest: &mut m });
3628+
OsStr::new("abc")
3629+
.as_encoded_bytes()
3630+
.hash(&mut HashToDigest { digest: &mut m });
35593631
OsStr::new("CARGO_PKG_NAME").hash(&mut HashToDigest { digest: &mut m });
35603632
m.update(b"=");
35613633
OsStr::new("foo").hash(&mut HashToDigest { digest: &mut m });
3562-
f.tempdir.path().hash(&mut HashToDigest { digest: &mut m });
3634+
// cwd (hashed as bytes via strip_basedir_prefix)
3635+
f.tempdir
3636+
.path()
3637+
.as_os_str()
3638+
.as_encoded_bytes()
3639+
.hash(&mut HashToDigest { digest: &mut m });
35633640
TEST_RUSTC_VERSION.hash(&mut HashToDigest { digest: &mut m });
35643641
let digest = m.finish();
35653642
assert_eq!(res.key, digest);
@@ -3893,6 +3970,144 @@ proc_macro false
38933970
);
38943971
}
38953972

3973+
fn hash_key_with_basedirs<F>(
3974+
f: &TestFixture,
3975+
args: &[&'static str],
3976+
env_vars: &[(OsString, OsString)],
3977+
pre_func: F,
3978+
basedirs: Vec<Vec<u8>>,
3979+
) -> String
3980+
where
3981+
F: Fn(&Path) -> Result<()>,
3982+
{
3983+
let oargs = args.iter().map(OsString::from).collect::<Vec<OsString>>();
3984+
let parsed_args = match parse_arguments(&oargs, f.tempdir.path()) {
3985+
CompilerArguments::Ok(parsed_args) => parsed_args,
3986+
o => panic!("Got unexpected parse result: {:?}", o),
3987+
};
3988+
{
3989+
let src = &"foo.rs";
3990+
f.touch(src).expect("Failed to create foo.rs");
3991+
}
3992+
for e in parsed_args.externs.iter() {
3993+
f.touch(e.to_str().unwrap())
3994+
.expect("Failed to create extern");
3995+
}
3996+
pre_func(f.tempdir.path()).expect("Failed to execute pre_func");
3997+
let mut hasher = Box::new(RustHasher {
3998+
executable: "rustc".into(),
3999+
host: "x86-64-unknown-unknown-unknown".to_owned(),
4000+
version: TEST_RUSTC_VERSION.to_string(),
4001+
sysroot: f.tempdir.path().join("sysroot"),
4002+
compiler_shlibs_digests: vec![],
4003+
#[cfg(feature = "dist-client")]
4004+
rlib_dep_reader: None,
4005+
parsed_args,
4006+
});
4007+
4008+
let creator = new_creator();
4009+
let runtime = single_threaded_runtime();
4010+
let pool = runtime.handle().clone();
4011+
4012+
mock_dep_info(&creator, &["foo.rs"]);
4013+
mock_file_names(&creator, &["foo.rlib"]);
4014+
hasher
4015+
.generate_hash_key(
4016+
&creator,
4017+
f.tempdir.path().to_owned(),
4018+
env_vars.to_owned(),
4019+
false,
4020+
&pool,
4021+
false,
4022+
Arc::new(MockStorage::with_basedirs(None, false, basedirs)),
4023+
CacheControl::Default,
4024+
)
4025+
.wait()
4026+
.unwrap()
4027+
.key
4028+
}
4029+
4030+
#[test]
4031+
fn test_basedirs_strips_cwd_and_cargo_manifest_dir() {
4032+
let f = TestFixture::new();
4033+
let cwd = f.tempdir.path().to_string_lossy().into_owned();
4034+
4035+
let args = &[
4036+
"--emit",
4037+
"link",
4038+
"foo.rs",
4039+
"--out-dir",
4040+
"out",
4041+
"--crate-name",
4042+
"foo",
4043+
"--crate-type",
4044+
"lib",
4045+
];
4046+
4047+
let manifest_dir = format!("{}/some/pkg", cwd);
4048+
let env_vars = vec![
4049+
(
4050+
OsString::from("CARGO_MANIFEST_DIR"),
4051+
OsString::from(&manifest_dir),
4052+
),
4053+
(OsString::from("CARGO_PKG_NAME"), OsString::from("foo")),
4054+
];
4055+
4056+
// Hash without basedirs
4057+
let key_without = hash_key_with_basedirs(&f, args, &env_vars, nothing, vec![]);
4058+
4059+
// Hash with basedirs that strip the cwd prefix.
4060+
// Basedirs are normalized at config time (forward slashes, lowercase on Windows,
4061+
// trailing slash) — replicate that here.
4062+
let basedir = cwd.into_bytes();
4063+
#[cfg(target_os = "windows")]
4064+
let basedir = crate::util::normalize_win_path(&basedir);
4065+
let mut basedir = basedir;
4066+
basedir.push(b'/');
4067+
let key_with = hash_key_with_basedirs(&f, args, &env_vars, nothing, vec![basedir]);
4068+
4069+
// The keys should differ because basedirs changes the hash
4070+
assert_ne!(key_without, key_with, "basedirs should change the hash key");
4071+
4072+
// Two different "machines" with different cwds but same basedirs should
4073+
// produce the same hash. We simulate this by noting that the basedir-
4074+
// stripped hash is deterministic regardless of cwd.
4075+
// (We can't easily create two different tempdirs with the same content
4076+
// in this test framework, but we verify the stripping changes the hash.)
4077+
}
4078+
4079+
#[test]
4080+
fn test_basedirs_deterministic() {
4081+
// Running the same compilation with the same basedirs twice should
4082+
// produce the same hash, and it should differ from no-basedirs.
4083+
let f = TestFixture::new();
4084+
let cwd = f.tempdir.path().to_string_lossy().into_owned();
4085+
4086+
let args = &[
4087+
"--emit",
4088+
"link",
4089+
"foo.rs",
4090+
"--out-dir",
4091+
"out",
4092+
"--crate-name",
4093+
"foo",
4094+
"--crate-type",
4095+
"lib",
4096+
];
4097+
let env_vars = vec![(OsString::from("CARGO_PKG_NAME"), OsString::from("foo"))];
4098+
4099+
let basedir = cwd.into_bytes();
4100+
#[cfg(target_os = "windows")]
4101+
let basedir = crate::util::normalize_win_path(&basedir);
4102+
let mut basedir = basedir;
4103+
basedir.push(b'/');
4104+
4105+
let key1 = hash_key_with_basedirs(&f, args, &env_vars, nothing, vec![basedir.clone()]);
4106+
let key2 = hash_key_with_basedirs(&f, args, &env_vars, nothing, vec![basedir]);
4107+
4108+
assert_eq!(key1, key2, "Same basedir should produce deterministic hash");
4109+
}
4110+
38964111
#[test]
38974112
fn test_parse_unstable_profile_flag() {
38984113
let h = parses!(

src/test/mock_storage.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub struct MockStorage {
2828
tx: mpsc::UnboundedSender<Result<Cache>>,
2929
delay: Option<Duration>,
3030
preprocessor_cache_mode: bool,
31+
basedirs: Vec<Vec<u8>>,
3132
}
3233

3334
impl MockStorage {
@@ -39,6 +40,23 @@ impl MockStorage {
3940
rx: Arc::new(Mutex::new(rx)),
4041
delay,
4142
preprocessor_cache_mode,
43+
basedirs: vec![],
44+
}
45+
}
46+
47+
/// Create a new `MockStorage` with basedirs configured.
48+
pub(crate) fn with_basedirs(
49+
delay: Option<Duration>,
50+
preprocessor_cache_mode: bool,
51+
basedirs: Vec<Vec<u8>>,
52+
) -> MockStorage {
53+
let (tx, rx) = mpsc::unbounded();
54+
Self {
55+
tx,
56+
rx: Arc::new(Mutex::new(rx)),
57+
delay,
58+
preprocessor_cache_mode,
59+
basedirs,
4260
}
4361
}
4462

@@ -75,6 +93,9 @@ impl Storage for MockStorage {
7593
async fn max_size(&self) -> Result<Option<u64>> {
7694
Ok(None)
7795
}
96+
fn basedirs(&self) -> &[Vec<u8>] {
97+
&self.basedirs
98+
}
7899
fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig {
79100
PreprocessorCacheModeConfig {
80101
use_preprocessor_cache_mode: self.preprocessor_cache_mode,

0 commit comments

Comments
 (0)