Skip to content

Commit 4d82e88

Browse files
authored
Follow links when cache-key is a glob (#13438)
## Summary There's some inconsistent behaviour in handling symlinks when `cache-key` is a glob or a file path. This PR attempts to address that. - When cache-key is a path, [`Path::metadata()`](https://doc.rust-lang.org/std/path/struct.Path.html#method.metadata) is used to check if it's a file or not. According to the docs: > This function will traverse symbolic links to query information about the destination file. So, if the target file is a symlink, it will be resolved and the metadata will be queried for the underlying file. - When cache-key is a glob, `globwalk` is used, specifically allowing for symlinks: ```rust .file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK) ``` - However, without enabling link following, `DirEntry::metadata()` will return an equivalent of `Path::symlink_metadata()` (and not `Path::metadata()`), which will have a file type that looks like ```rust FileType { is_file: false, is_dir: false, is_symlink: true, .. } ``` - Then, there's a check for `metadata.is_file()` which fails and complains that the target entry "is a directory when file was expected". - TLDR: glob cache-keys don't work with symlinks. ## Solutions Option 1 (current PR): follow symlinks. Option 2 (also doable): don't follow symlinks, but resolve the resulting target entry manually in case its file type is a symlink. However, this would be a little weird and unobvious in that we resolve files but not directories for some reason. Also, symlinking directories is pretty useful if you want to symlink directories of local dependencies that are not under the project's path. ## Test Plan This has been tested manually: ```rust fn main() { for follow_links in [false, true] { let walker = globwalk::GlobWalkerBuilder::from_patterns(".", &["a/*"]) .file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK) .follow_links(follow_links) .build() .unwrap(); let entry = walker.into_iter().next().unwrap().unwrap(); dbg!(&entry); dbg!(entry.file_type()); dbg!(entry.path_is_symlink()); dbg!(entry.path()); let meta = entry.metadata().unwrap(); dbg!(meta.is_file()); } let path = std::path::PathBuf::from("./a/b"); dbg!(path.metadata().unwrap().file_type()); dbg!(path.symlink_metadata().unwrap().file_type()); } ``` Current behaviour (glob cache-key, don't follow links): ``` [src/main.rs:9:9] &entry = DirEntry("./a/b") [src/main.rs:10:9] entry.file_type() = FileType { is_file: false, is_dir: false, is_symlink: true, .. } [src/main.rs:11:9] entry.path_is_symlink() = true [src/main.rs:12:9] entry.path() = "./a/b" [src/main.rs:14:9] meta.is_file() = false ``` Glob cache-key, follow links: ``` [src/main.rs:9:9] &entry = DirEntry("./a/b") [src/main.rs:10:9] entry.file_type() = FileType { is_file: true, is_dir: false, is_symlink: false, .. } [src/main.rs:11:9] entry.path_is_symlink() = true [src/main.rs:12:9] entry.path() = "./a/b" [src/main.rs:14:9] meta.is_file() = true ``` Using `path.metadata()` for a non-glob cache key: ``` [src/main.rs:18:5] path.metadata().unwrap().file_type() = FileType { is_file: true, is_dir: false, is_symlink: false, .. } [src/main.rs:19:5] path.symlink_metadata().unwrap().file_type() = FileType { is_file: false, is_dir: false, is_symlink: true, .. } ```
1 parent 34fbc06 commit 4d82e88

File tree

3 files changed

+97
-9
lines changed

3 files changed

+97
-9
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/uv-cache-info/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,7 @@ thiserror = { workspace = true }
2424
toml = { workspace = true }
2525
tracing = { workspace = true }
2626
walkdir = { workspace = true }
27+
28+
[dev-dependencies]
29+
anyhow = { workspace = true }
30+
tempfile = { workspace = true }

crates/uv-cache-info/src/cache_info.rs

Lines changed: 91 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -230,18 +230,32 @@ impl CacheInfo {
230230
continue;
231231
}
232232
};
233-
let metadata = match entry.metadata() {
234-
Ok(metadata) => metadata,
235-
Err(err) => {
236-
warn!("Failed to read metadata for glob entry: {err}");
237-
continue;
233+
let metadata = if entry.path_is_symlink() {
234+
// resolve symlinks for leaf entries without following symlinks while globbing
235+
match fs_err::metadata(entry.path()) {
236+
Ok(metadata) => metadata,
237+
Err(err) => {
238+
warn!("Failed to resolve symlink for glob entry: {err}");
239+
continue;
240+
}
241+
}
242+
} else {
243+
match entry.metadata() {
244+
Ok(metadata) => metadata,
245+
Err(err) => {
246+
warn!("Failed to read metadata for glob entry: {err}");
247+
continue;
248+
}
238249
}
239250
};
240251
if !metadata.is_file() {
241-
warn!(
242-
"Expected file for cache key, but found directory: `{}`",
243-
entry.path().display()
244-
);
252+
if !entry.path_is_symlink() {
253+
// don't warn if it was a symlink - it may legitimately resolve to a directory
254+
warn!(
255+
"Expected file for cache key, but found directory: `{}`",
256+
entry.path().display()
257+
);
258+
}
245259
continue;
246260
}
247261
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
@@ -346,3 +360,71 @@ enum DirectoryTimestamp {
346360
Timestamp(Timestamp),
347361
Inode(u64),
348362
}
363+
364+
#[cfg(all(test, unix))]
365+
mod tests_unix {
366+
use anyhow::Result;
367+
368+
use super::{CacheInfo, Timestamp};
369+
370+
#[test]
371+
fn test_cache_info_symlink_resolve() -> Result<()> {
372+
let dir = tempfile::tempdir()?;
373+
let dir = dir.path().join("dir");
374+
fs_err::create_dir_all(&dir)?;
375+
376+
let write_manifest = |cache_key: &str| {
377+
fs_err::write(
378+
dir.join("pyproject.toml"),
379+
format!(
380+
r#"
381+
[tool.uv]
382+
cache-keys = [
383+
"{cache_key}"
384+
]
385+
"#
386+
),
387+
)
388+
};
389+
390+
let touch = |path: &str| -> Result<_> {
391+
let path = dir.join(path);
392+
fs_err::create_dir_all(path.parent().unwrap())?;
393+
fs_err::write(&path, "")?;
394+
Ok(Timestamp::from_metadata(&path.metadata()?))
395+
};
396+
397+
let cache_timestamp = || -> Result<_> { Ok(CacheInfo::from_directory(&dir)?.timestamp) };
398+
399+
write_manifest("x/**")?;
400+
assert_eq!(cache_timestamp()?, None);
401+
let y = touch("x/y")?;
402+
assert_eq!(cache_timestamp()?, Some(y));
403+
let z = touch("x/z")?;
404+
assert_eq!(cache_timestamp()?, Some(z));
405+
406+
// leaf entry symlink should be resolved
407+
let a = touch("../a")?;
408+
fs_err::os::unix::fs::symlink(dir.join("../a"), dir.join("x/a"))?;
409+
assert_eq!(cache_timestamp()?, Some(a));
410+
411+
// symlink directories should not be followed while globbing
412+
let c = touch("../b/c")?;
413+
fs_err::os::unix::fs::symlink(dir.join("../b"), dir.join("x/b"))?;
414+
assert_eq!(cache_timestamp()?, Some(a));
415+
416+
// no globs, should work as expected
417+
write_manifest("x/y")?;
418+
assert_eq!(cache_timestamp()?, Some(y));
419+
write_manifest("x/a")?;
420+
assert_eq!(cache_timestamp()?, Some(a));
421+
write_manifest("x/b/c")?;
422+
assert_eq!(cache_timestamp()?, Some(c));
423+
424+
// symlink pointing to a directory
425+
write_manifest("x/*b*")?;
426+
assert_eq!(cache_timestamp()?, None);
427+
428+
Ok(())
429+
}
430+
}

0 commit comments

Comments
 (0)