Skip to content

Commit a8c8337

Browse files
committed
feat(storage): support misc tar compression; progress
1 parent bb7434a commit a8c8337

File tree

2 files changed

+18
-14
lines changed

2 files changed

+18
-14
lines changed

api/utils/storage/BaseStorage.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import re
33
import subprocess
44
from abc import ABC, abstractmethod
5+
import xtarfile as tarfile
56

67

78
class BaseArchive(ABC):
@@ -22,10 +23,10 @@ def splitext(self):
2223
return base, ext, subext
2324

2425

25-
class TarZstdArchive(BaseArchive):
26+
class TarArchive(BaseArchive):
2627
@staticmethod
2728
def test(path):
28-
return re.search(r"\.tar\.zstd?$", path)
29+
return re.search(r"\.tar", path)
2930

3031
def extract(self, dir, dry_run=False):
3132
self.updateStatus("extract", 0)
@@ -36,25 +37,26 @@ def extract(self, dir, dry_run=False):
3637

3738
if not dry_run:
3839
os.mkdir(dir)
39-
subprocess.run(
40-
[
41-
"tar",
42-
"--use-compress-program=unzstd",
43-
"-C",
44-
dir,
45-
"-xvf",
46-
self.path,
47-
],
48-
check=True,
49-
)
40+
41+
def track_progress(tar):
42+
i = 0
43+
members = tar.getmembers()
44+
for member in members:
45+
i += 1
46+
self.updateStatus("extract", i / len(members))
47+
yield member
48+
49+
with tarfile.open(self.path, "r") as tar:
50+
tar.extractall(path=dir, members=track_progress(tar))
51+
tar.close()
5052
subprocess.run(["ls", "-l"])
5153
os.remove(self.path)
5254

5355
self.updateStatus("extract", 1)
5456
return dir # , base, ext, subext
5557

5658

57-
archiveClasses = [TarZstdArchive]
59+
archiveClasses = [TarArchive]
5860

5961

6062
def Archive(path, **kwargs):

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,5 @@ datasets==2.8.0
5555
omegaconf==2.3.0
5656
pytorch_lightning==1.9.2
5757
tensorboard==2.12.0
58+
59+
xtarfile[zstd]==0.1.0

0 commit comments

Comments
 (0)