Skip to content

Commit e866f33

Browse files
bpo-41316: Make tarfile follow specs for FNAME (GH-21511)
tarfile writes full path to FNAME field of GZIP format instead of just basename if user specified absolute path. Some archive viewers may process file incorrectly. Also it creates security issue because anyone can know structure of directories on system and know username or other personal information. RFC1952 says about FNAME: This is the original name of the file being compressed, with any directory components removed. So tarfile must remove directory names from FNAME and write only basename of file. Automerge-Triggered-By: @jaraco (cherry picked from commit 22748a8) Co-authored-by: Artem Bulgakov <[email protected]>
1 parent 6443a8c commit e866f33

File tree

4 files changed

+17
-1
lines changed

4 files changed

+17
-1
lines changed

Lib/tarfile.py

+2
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,8 @@ def _init_write_gz(self):
420420
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
421421
if self.name.endswith(".gz"):
422422
self.name = self.name[:-3]
423+
# Honor "directory components removed" from RFC1952
424+
self.name = os.path.basename(self.name)
423425
# RFC1952 says we must use ISO-8859-1 for the FNAME field.
424426
self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
425427

Lib/test/test_tarfile.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -1384,12 +1384,15 @@ def write(self, data):
13841384
pax_headers={'non': 'empty'})
13851385
self.assertFalse(f.closed)
13861386

1387+
13871388
class GzipWriteTest(GzipTest, WriteTest):
13881389
pass
13891390

1391+
13901392
class Bz2WriteTest(Bz2Test, WriteTest):
13911393
pass
13921394

1395+
13931396
class LzmaWriteTest(LzmaTest, WriteTest):
13941397
pass
13951398

@@ -1432,8 +1435,17 @@ def test_file_mode(self):
14321435
finally:
14331436
os.umask(original_umask)
14341437

1438+
14351439
class GzipStreamWriteTest(GzipTest, StreamWriteTest):
1436-
pass
1440+
def test_source_directory_not_leaked(self):
1441+
"""
1442+
Ensure the source directory is not included in the tar header
1443+
per bpo-41316.
1444+
"""
1445+
tarfile.open(tmpname, self.mode).close()
1446+
payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
1447+
assert os.path.dirname(tmpname) not in payload
1448+
14371449

14381450
class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
14391451
decompressor = bz2.BZ2Decompressor if bz2 else None

Misc/ACKS

+1
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ Colm Buckley
237237
Erik de Bueger
238238
Jan-Hein Bührman
239239
Lars Buitinck
240+
Artem Bulgakov
240241
Dick Bulterman
241242
Bill Bumgarner
242243
Jimmy Burgett
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix the :mod:`tarfile` module to write only basename of TAR file to GZIP compression header.

0 commit comments

Comments
 (0)