@@ -1871,3 +1871,154 @@ fn append_data_error_does_not_corrupt_subsequent_entries() {
18711871 assert_eq ! ( entries. len( ) , 1 ) ;
18721872 assert_eq ! ( entries[ 0 ] . path( ) . unwrap( ) . to_str( ) . unwrap( ) , "clean.txt" ) ;
18731873}
1874+
1875+ /// Build the PAX size smuggling archive described in the original report.
1876+ ///
1877+ /// A PAX extended header declares `size=2048` for a regular file whose
1878+ /// actual header size field is 8. A symlink entry is hidden inside the
1879+ /// inflated region. A correct parser honours the PAX size and skips over
1880+ /// the symlink; a buggy one reads only the header size and exposes it.
1881+ fn build_pax_smuggle_archive ( ) -> Vec < u8 > {
1882+ const B : usize = 512 ;
1883+ const INFLATED : usize = 2048 ;
1884+ let end_of_archive = || std:: iter:: repeat ( 0u8 ) . take ( B * 2 ) ;
1885+
1886+ let mut ar: Vec < u8 > = Vec :: new ( ) ;
1887+
1888+ // PAX extended header declaring size=2048 for the next entry.
1889+ let pax_rec = format ! ( "13 size={INFLATED}\n " ) ;
1890+ let mut pax_hdr = Header :: new_ustar ( ) ;
1891+ pax_hdr. set_path ( "./PaxHeaders/regular" ) . unwrap ( ) ;
1892+ pax_hdr. set_size ( pax_rec. as_bytes ( ) . len ( ) as u64 ) ;
1893+ pax_hdr. set_entry_type ( EntryType :: XHeader ) ;
1894+ pax_hdr. set_cksum ( ) ;
1895+ ar. extend_from_slice ( pax_hdr. as_bytes ( ) ) ;
1896+ ar. extend_from_slice ( pax_rec. as_bytes ( ) ) ;
1897+ ar. resize ( ar. len ( ) . next_multiple_of ( B ) , 0 ) ;
1898+
1899+ // Regular file whose header says size=8, but PAX says 2048.
1900+ let content = b"regular\n " ;
1901+ let mut file_hdr = Header :: new_ustar ( ) ;
1902+ file_hdr. set_path ( "regular.txt" ) . unwrap ( ) ;
1903+ file_hdr. set_size ( content. len ( ) as u64 ) ;
1904+ file_hdr. set_entry_type ( EntryType :: Regular ) ;
1905+ file_hdr. set_cksum ( ) ;
1906+ ar. extend_from_slice ( file_hdr. as_bytes ( ) ) ;
1907+ let mark = ar. len ( ) ;
1908+ ar. extend_from_slice ( content) ;
1909+ ar. resize ( ar. len ( ) . next_multiple_of ( B ) , 0 ) ;
1910+
1911+ // Smuggled symlink hidden in the inflated region.
1912+ let mut sym_hdr = Header :: new_ustar ( ) ;
1913+ sym_hdr. set_path ( "smuggled" ) . unwrap ( ) ;
1914+ sym_hdr. set_size ( 0 ) ;
1915+ sym_hdr. set_entry_type ( EntryType :: Symlink ) ;
1916+ sym_hdr. set_link_name ( "/etc/shadow" ) . unwrap ( ) ;
1917+ sym_hdr. set_cksum ( ) ;
1918+ ar. extend_from_slice ( sym_hdr. as_bytes ( ) ) ;
1919+ ar. extend ( end_of_archive ( ) ) ;
1920+
1921+ // Pad to fill the inflated window.
1922+ let used = ar. len ( ) - mark;
1923+ let pad = INFLATED . saturating_sub ( used) ;
1924+ ar. extend ( std:: iter:: repeat ( 0u8 ) . take ( pad. next_multiple_of ( B ) ) ) ;
1925+
1926+ // End-of-archive.
1927+ ar. extend ( end_of_archive ( ) ) ;
1928+ ar
1929+ }
1930+
1931+ /// Regression test for PAX size smuggling.
1932+ ///
1933+ /// A crafted archive uses a PAX extended header to declare a file size (2048)
1934+ /// larger than the header's octal size field (8). Before the fix, `tar-rs`
1935+ /// only applied the PAX size override when the header size was 0, so it would
1936+ /// read the small header size, advance too little, and expose a symlink entry
1937+ /// hidden in the "padding" area. After the fix, the PAX size unconditionally
1938+ /// overrides the header size, causing the parser to skip over the smuggled
1939+ /// symlink — matching the behavior of compliant parsers.
1940+ #[ test]
1941+ fn pax_size_smuggled_symlink ( ) {
1942+ let data = build_pax_smuggle_archive ( ) ;
1943+
1944+ let mut archive = Archive :: new ( random_cursor_reader ( & data[ ..] ) ) ;
1945+ let entries: Vec < _ > = archive
1946+ . entries ( )
1947+ . unwrap ( )
1948+ . map ( |e| {
1949+ let e = e. unwrap ( ) ;
1950+ let path = e. path ( ) . unwrap ( ) . to_path_buf ( ) ;
1951+ let kind = e. header ( ) . entry_type ( ) ;
1952+ let link = e. link_name ( ) . unwrap ( ) . map ( |l| l. to_path_buf ( ) ) ;
1953+ ( path, kind, link)
1954+ } )
1955+ . collect ( ) ;
1956+
1957+ // With the fix applied, only "regular.txt" should be visible.
1958+ // The smuggled symlink must NOT appear.
1959+ let expected: Vec < ( PathBuf , EntryType , Option < PathBuf > ) > =
1960+ vec ! [ ( PathBuf :: from( "regular.txt" ) , EntryType :: Regular , None ) ] ;
1961+ assert_eq ! (
1962+ entries, expected,
1963+ "smuggled symlink visible or unexpected entries\n \
1964+ got: {entries:?}"
1965+ ) ;
1966+ }
1967+
1968+ /// Cross-validate that `tar` and `astral-tokio-tar` parse the PAX size
1969+ /// smuggling archive identically, guarding against parsing differentials.
1970+ #[ tokio:: test]
1971+ async fn pax_size_smuggle_matches_astral_tokio_tar ( ) {
1972+ use tokio_stream:: StreamExt ;
1973+
1974+ let data = build_pax_smuggle_archive ( ) ;
1975+
1976+ // Parse with sync tar.
1977+ let sync_entries: Vec < _ > = {
1978+ let mut ar = Archive :: new ( & data[ ..] ) ;
1979+ ar. entries ( )
1980+ . unwrap ( )
1981+ . map ( |e| {
1982+ let e = e. unwrap ( ) ;
1983+ let path = e. path ( ) . unwrap ( ) . to_path_buf ( ) ;
1984+ let kind = e. header ( ) . entry_type ( ) ;
1985+ let link = e. link_name ( ) . unwrap ( ) . map ( |l| l. to_path_buf ( ) ) ;
1986+ ( path, kind, link)
1987+ } )
1988+ . collect ( )
1989+ } ;
1990+
1991+ // Parse with async astral-tokio-tar.
1992+ let async_entries: Vec < _ > = {
1993+ let mut ar = tokio_tar:: Archive :: new ( & data[ ..] ) ;
1994+ let mut entries = ar. entries ( ) . unwrap ( ) ;
1995+ let mut result = Vec :: new ( ) ;
1996+ while let Some ( e) = entries. next ( ) . await {
1997+ let e = e. unwrap ( ) ;
1998+ let entry_type = e. header ( ) . entry_type ( ) ;
1999+ result. push ( (
2000+ e. path ( ) . unwrap ( ) . to_path_buf ( ) ,
2001+ // Map through the raw byte so the two crates' EntryTypes compare.
2002+ EntryType :: new ( entry_type. as_byte ( ) ) ,
2003+ e. link_name ( ) . unwrap ( ) . map ( |l| l. to_path_buf ( ) ) ,
2004+ ) ) ;
2005+ }
2006+ result
2007+ } ;
2008+
2009+ // Assert exact expected content for both parsers independently,
2010+ // so we verify correctness — not just mutual agreement.
2011+ let expected: Vec < ( PathBuf , EntryType , Option < PathBuf > ) > =
2012+ vec ! [ ( PathBuf :: from( "regular.txt" ) , EntryType :: Regular , None ) ] ;
2013+
2014+ assert_eq ! (
2015+ sync_entries, expected,
2016+ "tar-rs produced unexpected entries (smuggled symlink visible?)\n \
2017+ got: {sync_entries:?}"
2018+ ) ;
2019+ assert_eq ! (
2020+ async_entries, expected,
2021+ "astral-tokio-tar produced unexpected entries (smuggled symlink visible?)\n \
2022+ got: {async_entries:?}"
2023+ ) ;
2024+ }
0 commit comments