Skip to content

Commit de1a587

Browse files
archive: Unconditionally honor PAX size (#441)
This synchronizes our behavior with most other tar parsers (including astral-tokio-tar and Go archive/tar) ensuring that we don't parse things differently. The problem with parsing size in particular differently is it's easy to craft a tar archive that appears completely differently between two parsers. This is the case with e.g. crates.io where astral-tokio-tar is used for validation server side, but cargo uses the `tar` crate to upload. With this, the two projects agree. Signed-off-by: Colin Walters <walters@verbum.org> Co-authored-by: Colin Walters <walters@verbum.org>
1 parent 6071cbe commit de1a587

3 files changed

Lines changed: 159 additions & 4 deletions

File tree

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,11 @@ contents are never required to be entirely resident in memory all at once.
2323
filetime = "0.2.8"
2424

2525
[dev-dependencies]
26+
astral-tokio-tar = "0.5"
2627
rand = { version = "0.8", features = ["small_rng"] }
2728
tempfile = "3"
29+
tokio = { version = "1", features = ["macros", "rt"] }
30+
tokio-stream = "0.1"
2831

2932
[target."cfg(unix)".dependencies]
3033
xattr = { version = "1.1.3", optional = true }

src/archive.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,11 @@ impl<'a> EntriesFields<'a> {
337337

338338
let file_pos = self.next;
339339
let mut size = header.entry_size()?;
340-
if size == 0 {
341-
if let Some(pax_size) = pax_size {
342-
size = pax_size;
343-
}
340+
// If this exists, it must override the header size. Disagreement among
341+
// parsers allows construction of malicious archives that appear different
342+
// when parsed.
343+
if let Some(pax_size) = pax_size {
344+
size = pax_size;
344345
}
345346
let ret = EntryFields {
346347
size,

tests/all.rs

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,3 +1871,154 @@ fn append_data_error_does_not_corrupt_subsequent_entries() {
18711871
assert_eq!(entries.len(), 1);
18721872
assert_eq!(entries[0].path().unwrap().to_str().unwrap(), "clean.txt");
18731873
}
1874+
1875+
/// Build the PAX size smuggling archive described in the original report.
1876+
///
1877+
/// A PAX extended header declares `size=2048` for a regular file whose
1878+
/// actual header size field is 8. A symlink entry is hidden inside the
1879+
/// inflated region. A correct parser honours the PAX size and skips over
1880+
/// the symlink; a buggy one reads only the header size and exposes it.
1881+
fn build_pax_smuggle_archive() -> Vec<u8> {
1882+
const B: usize = 512;
1883+
const INFLATED: usize = 2048;
1884+
let end_of_archive = || std::iter::repeat(0u8).take(B * 2);
1885+
1886+
let mut ar: Vec<u8> = Vec::new();
1887+
1888+
// PAX extended header declaring size=2048 for the next entry.
1889+
let pax_rec = format!("13 size={INFLATED}\n");
1890+
let mut pax_hdr = Header::new_ustar();
1891+
pax_hdr.set_path("./PaxHeaders/regular").unwrap();
1892+
pax_hdr.set_size(pax_rec.as_bytes().len() as u64);
1893+
pax_hdr.set_entry_type(EntryType::XHeader);
1894+
pax_hdr.set_cksum();
1895+
ar.extend_from_slice(pax_hdr.as_bytes());
1896+
ar.extend_from_slice(pax_rec.as_bytes());
1897+
ar.resize(ar.len().next_multiple_of(B), 0);
1898+
1899+
// Regular file whose header says size=8, but PAX says 2048.
1900+
let content = b"regular\n";
1901+
let mut file_hdr = Header::new_ustar();
1902+
file_hdr.set_path("regular.txt").unwrap();
1903+
file_hdr.set_size(content.len() as u64);
1904+
file_hdr.set_entry_type(EntryType::Regular);
1905+
file_hdr.set_cksum();
1906+
ar.extend_from_slice(file_hdr.as_bytes());
1907+
let mark = ar.len();
1908+
ar.extend_from_slice(content);
1909+
ar.resize(ar.len().next_multiple_of(B), 0);
1910+
1911+
// Smuggled symlink hidden in the inflated region.
1912+
let mut sym_hdr = Header::new_ustar();
1913+
sym_hdr.set_path("smuggled").unwrap();
1914+
sym_hdr.set_size(0);
1915+
sym_hdr.set_entry_type(EntryType::Symlink);
1916+
sym_hdr.set_link_name("/etc/shadow").unwrap();
1917+
sym_hdr.set_cksum();
1918+
ar.extend_from_slice(sym_hdr.as_bytes());
1919+
ar.extend(end_of_archive());
1920+
1921+
// Pad to fill the inflated window.
1922+
let used = ar.len() - mark;
1923+
let pad = INFLATED.saturating_sub(used);
1924+
ar.extend(std::iter::repeat(0u8).take(pad.next_multiple_of(B)));
1925+
1926+
// End-of-archive.
1927+
ar.extend(end_of_archive());
1928+
ar
1929+
}
1930+
1931+
/// Regression test for PAX size smuggling.
1932+
///
1933+
/// A crafted archive uses a PAX extended header to declare a file size (2048)
1934+
/// larger than the header's octal size field (8). Before the fix, `tar-rs`
1935+
/// only applied the PAX size override when the header size was 0, so it would
1936+
/// read the small header size, advance too little, and expose a symlink entry
1937+
/// hidden in the "padding" area. After the fix, the PAX size unconditionally
1938+
/// overrides the header size, causing the parser to skip over the smuggled
1939+
/// symlink — matching the behavior of compliant parsers.
1940+
#[test]
1941+
fn pax_size_smuggled_symlink() {
1942+
let data = build_pax_smuggle_archive();
1943+
1944+
let mut archive = Archive::new(random_cursor_reader(&data[..]));
1945+
let entries: Vec<_> = archive
1946+
.entries()
1947+
.unwrap()
1948+
.map(|e| {
1949+
let e = e.unwrap();
1950+
let path = e.path().unwrap().to_path_buf();
1951+
let kind = e.header().entry_type();
1952+
let link = e.link_name().unwrap().map(|l| l.to_path_buf());
1953+
(path, kind, link)
1954+
})
1955+
.collect();
1956+
1957+
// With the fix applied, only "regular.txt" should be visible.
1958+
// The smuggled symlink must NOT appear.
1959+
let expected: Vec<(PathBuf, EntryType, Option<PathBuf>)> =
1960+
vec![(PathBuf::from("regular.txt"), EntryType::Regular, None)];
1961+
assert_eq!(
1962+
entries, expected,
1963+
"smuggled symlink visible or unexpected entries\n\
1964+
got: {entries:?}"
1965+
);
1966+
}
1967+
1968+
/// Cross-validate that `tar` and `astral-tokio-tar` parse the PAX size
1969+
/// smuggling archive identically, guarding against parsing differentials.
1970+
#[tokio::test]
1971+
async fn pax_size_smuggle_matches_astral_tokio_tar() {
1972+
use tokio_stream::StreamExt;
1973+
1974+
let data = build_pax_smuggle_archive();
1975+
1976+
// Parse with sync tar.
1977+
let sync_entries: Vec<_> = {
1978+
let mut ar = Archive::new(&data[..]);
1979+
ar.entries()
1980+
.unwrap()
1981+
.map(|e| {
1982+
let e = e.unwrap();
1983+
let path = e.path().unwrap().to_path_buf();
1984+
let kind = e.header().entry_type();
1985+
let link = e.link_name().unwrap().map(|l| l.to_path_buf());
1986+
(path, kind, link)
1987+
})
1988+
.collect()
1989+
};
1990+
1991+
// Parse with async astral-tokio-tar.
1992+
let async_entries: Vec<_> = {
1993+
let mut ar = tokio_tar::Archive::new(&data[..]);
1994+
let mut entries = ar.entries().unwrap();
1995+
let mut result = Vec::new();
1996+
while let Some(e) = entries.next().await {
1997+
let e = e.unwrap();
1998+
let entry_type = e.header().entry_type();
1999+
result.push((
2000+
e.path().unwrap().to_path_buf(),
2001+
// Map through the raw byte so the two crates' EntryTypes compare.
2002+
EntryType::new(entry_type.as_byte()),
2003+
e.link_name().unwrap().map(|l| l.to_path_buf()),
2004+
));
2005+
}
2006+
result
2007+
};
2008+
2009+
// Assert exact expected content for both parsers independently,
2010+
// so we verify correctness — not just mutual agreement.
2011+
let expected: Vec<(PathBuf, EntryType, Option<PathBuf>)> =
2012+
vec![(PathBuf::from("regular.txt"), EntryType::Regular, None)];
2013+
2014+
assert_eq!(
2015+
sync_entries, expected,
2016+
"tar-rs produced unexpected entries (smuggled symlink visible?)\n\
2017+
got: {sync_entries:?}"
2018+
);
2019+
assert_eq!(
2020+
async_entries, expected,
2021+
"astral-tokio-tar produced unexpected entries (smuggled symlink visible?)\n\
2022+
got: {async_entries:?}"
2023+
);
2024+
}

0 commit comments

Comments
 (0)