Skip to content

Commit b6536be

Browse files
authored
Remove trailing "/" from path names in archives (#1445)
* Remove trailing "/" from path names in ZIP * Fix path names in all archives * update tests
1 parent 5cf9cd9 commit b6536be

File tree

8 files changed

+36
-18
lines changed

8 files changed

+36
-18
lines changed

docs/source/changelog.rst

+7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
Changelog
22
=========
33

4+
2023.12.1
5+
---------
6+
7+
Fixes
8+
9+
- Remove trailing "/" from directory names in zipFS (#1445)
10+
411
2023.12.0
512
---------
613

fsspec/archive.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def info(self, path, **kwargs):
3838
self._get_dirs()
3939
path = self._strip_protocol(path)
4040
if path in {"", "/"} and self.dir_cache:
41-
return {"name": "/", "type": "directory", "size": 0}
41+
return {"name": "", "type": "directory", "size": 0}
4242
if path in self.dir_cache:
4343
return self.dir_cache[path]
4444
elif path + "/" in self.dir_cache:
@@ -64,7 +64,7 @@ def ls(self, path, detail=True, **kwargs):
6464
# root directory entry
6565
ppath = p.rstrip("/").split("/", 1)[0]
6666
if ppath not in paths:
67-
out = {"name": ppath + "/", "size": 0, "type": "directory"}
67+
out = {"name": ppath, "size": 0, "type": "directory"}
6868
paths[ppath] = out
6969
out = sorted(paths.values(), key=lambda _: _["name"])
7070
if detail:

fsspec/implementations/libarchive.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,7 @@ def _get_dirs(self):
164164
continue
165165
self.dir_cache.update(
166166
{
167-
dirname
168-
+ "/": {"name": dirname + "/", "size": 0, "type": "directory"}
167+
dirname: {"name": dirname, "size": 0, "type": "directory"}
169168
for dirname in self._all_dirnames(set(entry.name))
170169
}
171170
)
@@ -178,7 +177,7 @@ def _get_dirs(self):
178177
# not in all formats), so get the directories names from the files names
179178
self.dir_cache.update(
180179
{
181-
dirname + "/": {"name": dirname + "/", "size": 0, "type": "directory"}
180+
dirname: {"name": dirname, "size": 0, "type": "directory"}
182181
for dirname in self._all_dirnames(list_names)
183182
}
184183
)

fsspec/implementations/tar.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,12 @@ def _get_dirs(self):
106106

107107
# This enables ls to get directories as children as well as files
108108
self.dir_cache = {
109-
dirname + "/": {"name": dirname + "/", "size": 0, "type": "directory"}
109+
dirname: {"name": dirname, "size": 0, "type": "directory"}
110110
for dirname in self._all_dirnames(self.tar.getnames())
111111
}
112112
for member in self.tar.getmembers():
113113
info = member.get_info()
114+
info["name"] = info["name"].rstrip("/")
114115
info["type"] = typemap.get(info["type"], "file")
115116
self.dir_cache[info["name"]] = info
116117

fsspec/implementations/tests/test_archive.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,10 @@ def test_ls(self, scenario: ArchiveTestScenario):
276276
with scenario.provider(archive_data) as archive:
277277
fs = fsspec.filesystem(scenario.protocol, fo=archive)
278278

279-
assert fs.ls("", detail=False) == ["a", "b", "deeply/"]
279+
assert fs.ls("", detail=False) == ["a", "b", "deeply"]
280280
assert fs.ls("/") == fs.ls("")
281281

282-
assert fs.ls("deeply", detail=False) == ["deeply/nested/"]
282+
assert fs.ls("deeply", detail=False) == ["deeply/nested"]
283283
assert fs.ls("deeply/") == fs.ls("deeply")
284284

285285
assert fs.ls("deeply/nested", detail=False) == ["deeply/nested/path"]
@@ -293,8 +293,8 @@ def test_find(self, scenario: ArchiveTestScenario):
293293
assert fs.find("", withdirs=True) == [
294294
"a",
295295
"b",
296-
"deeply/",
297-
"deeply/nested/",
296+
"deeply",
297+
"deeply/nested",
298298
"deeply/nested/path",
299299
]
300300

@@ -347,7 +347,7 @@ def project(mapping, keys):
347347
# Iterate over all directories.
348348
for d in fs._all_dirnames(archive_data.keys()):
349349
lhs = project(fs.info(d), ["name", "size", "type"])
350-
expected = {"name": f"{d}/", "size": 0, "type": "directory"}
350+
expected = {"name": f"{d}", "size": 0, "type": "directory"}
351351
assert lhs == expected
352352

353353
# Iterate over all files.

fsspec/implementations/tests/test_tar.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def test_info():
2626
lhs = fs.info(d)
2727
del lhs["chksum"]
2828
expected = {
29-
"name": f"{d}/",
29+
"name": f"{d}",
3030
"size": 0,
3131
"type": "directory",
3232
"devmajor": 0,
@@ -234,10 +234,10 @@ def test_ls_with_folders(compression: str, tmp_path: Path):
234234
fs = TarFileSystem(fd)
235235
assert fs.find("/", withdirs=True) == [
236236
"a.pdf",
237-
"b/",
237+
"b",
238238
"b/c.pdf",
239-
"d/",
240-
"d/e/",
239+
"d",
240+
"d/e",
241241
"d/e/f.pdf",
242242
"d/g.pdf",
243243
]

fsspec/implementations/tests/test_zip.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ def test_not_cached():
4646
def test_root_info():
4747
with tempzip(archive_data) as z:
4848
fs = fsspec.filesystem("zip", fo=z)
49-
assert fs.info("/") == {"name": "/", "type": "directory", "size": 0}
50-
assert fs.info("") == {"name": "/", "type": "directory", "size": 0}
49+
assert fs.info("/") == {"name": "", "type": "directory", "size": 0}
50+
assert fs.info("") == {"name": "", "type": "directory", "size": 0}
5151

5252

5353
def test_write_seek(m):
@@ -83,3 +83,14 @@ def test_mapper(m):
8383
# fails because this is write mode and we cannot also read
8484
mapper["a"]
8585
assert "a" in mapper # but be can list
86+
87+
88+
def test_zip_glob_star(m):
89+
with fsspec.open(
90+
"zip://adir/afile::memory://out.zip", mode="wb", zip={"mode": "w"}
91+
) as f:
92+
f.write(b"data")
93+
94+
fs, _ = fsspec.core.url_to_fs("zip::memory://out.zip")
95+
outfiles = fs.glob("*")
96+
assert len(outfiles) == 1

fsspec/implementations/zip.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def _get_dirs(self):
8383
# not read from the file.
8484
files = self.zip.infolist()
8585
self.dir_cache = {
86-
dirname + "/": {"name": dirname + "/", "size": 0, "type": "directory"}
86+
dirname: {"name": dirname, "size": 0, "type": "directory"}
8787
for dirname in self._all_dirnames(self.zip.namelist())
8888
}
8989
for z in files:

0 commit comments

Comments
 (0)