From 5639f66f6707bf3f41a0bbd09318ab1c4b6e1b29 Mon Sep 17 00:00:00 2001 From: Artem Bulgakov Date: Thu, 16 Jul 2020 21:39:30 +0300 Subject: [PATCH 1/5] bpo-41316: Make tarfile follow specs for FNAME tarfile writes full path to FNAME field of GZIP format instead of just basename if user specified absolute path. Some archive viewers may process file incorrectly. Also it creates security issue because anyone can know structure of directories on system and know username or other personal information. RFC1952 says about FNAME: This is the original name of the file being compressed, with any directory components removed. So tarfile must remove directory names from FNAME and write only basename of file. --- Lib/tarfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 6769066cabd6fc..119b61f8eadac6 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -420,6 +420,8 @@ def _init_write_gz(self): self.__write(b"\037\213\010\010" + timestamp + b"\002\377") if self.name.endswith(".gz"): self.name = self.name[:-3] + # Remove directory components + self.name = os.path.basename(self.name) # RFC1952 says we must use ISO-8859-1 for the FNAME field. self.__write(self.name.encode("iso-8859-1", "replace") + NUL) From 6f751feed40a85c51123d45200b7acf05948643a Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 28 Jul 2020 12:08:59 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst diff --git a/Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst b/Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst new file mode 100644 index 00000000000000..139a170866ed49 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst @@ -0,0 +1 @@ +Fix the :mod:`tarfile` module to write only basename of TAR file to GZIP compression header. \ No newline at end of file From 4a8bd2933fafc72567a4870b2564f74e6d600962 Mon Sep 17 00:00:00 2001 From: Artem Bulgakov Date: Tue, 28 Jul 2020 15:11:11 +0300 Subject: [PATCH 3/5] Update ACKS --- Misc/ACKS | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/ACKS b/Misc/ACKS index f5e9459276c86c..a9b9f5b7dbc488 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -242,6 +242,7 @@ Colm Buckley Erik de Bueger Jan-Hein Bührman Lars Buitinck +Artem Bulgakov Dick Bulterman Bill Bumgarner Jimmy Burgett From 394888035a21a21dc6584527a06d903f0e83cdc6 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 7 Sep 2020 12:22:15 -0400 Subject: [PATCH 4/5] Add test capturing missed expectation when the path appears in the resultant tarfile. --- Lib/test/test_tarfile.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 3ddeb97f5268fe..1e5186a90b37f9 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1416,12 +1416,15 @@ def write(self, data): pax_headers={'non': 'empty'}) self.assertFalse(f.closed) + class GzipWriteTest(GzipTest, WriteTest): pass + class Bz2WriteTest(Bz2Test, WriteTest): pass + class LzmaWriteTest(LzmaTest, WriteTest): pass @@ -1464,8 +1467,17 @@ def test_file_mode(self): finally: os.umask(original_umask) + class GzipStreamWriteTest(GzipTest, StreamWriteTest): - pass + def test_source_directory_not_leaked(self): + """ + Ensure the source directory is not included in the tar header + per bpo-41316. + """ + tarfile.open(tmpname, self.mode).close() + payload = pathlib.Path(tmpname).read_text(encoding='latin-1') + assert os.path.dirname(tmpname) not in payload + class Bz2StreamWriteTest(Bz2Test, StreamWriteTest): decompressor = bz2.BZ2Decompressor if bz2 else None From 4baaf5ee34e5a8e1fd45177c618c359b12f6cce2 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 7 Sep 2020 12:23:23 -0400 Subject: [PATCH 5/5] Reference RFC and precise language for easier searching. --- Lib/tarfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 119b61f8eadac6..1fae29430fefff 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -420,7 +420,7 @@ def _init_write_gz(self): self.__write(b"\037\213\010\010" + timestamp + b"\002\377") if self.name.endswith(".gz"): self.name = self.name[:-3] - # Remove directory components + # Honor "directory components removed" from RFC1952 self.name = os.path.basename(self.name) # RFC1952 says we must use ISO-8859-1 for the FNAME field. self.__write(self.name.encode("iso-8859-1", "replace") + NUL)