Skip to content

Commit 16af667

Browse files
authored
improve error handling for GDrive downloads (#5704)
* improve error handling for GDrive downloads * perform HTML check regardless of MD5 check
1 parent ac56f52 commit 16af667

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

torchvision/datasets/utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import bz2
2+
import contextlib
23
import gzip
34
import hashlib
45
import itertools
@@ -262,6 +263,26 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
262263

263264
_save_response_content(content, fpath)
264265

266+
# In case we deal with an unhandled GDrive API response, the file should be smaller than 10kB and contain only text
267+
if os.stat(fpath).st_size < 10 * 1024:
268+
with contextlib.suppress(UnicodeDecodeError), open(fpath) as fh:
269+
text = fh.read()
270+
# Regular expression to detect HTML. Copied from https://stackoverflow.com/a/70585604
271+
if re.search(r"</?\s*[a-z-][^>]*\s*>|(&(?:[\w\d]+|#\d+|#x[a-f\d]+);)", text):
272+
warnings.warn(
273+
f"We detected some HTML elements in the downloaded file. "
274+
f"This most likely means that the download triggered an unhandled API response by GDrive. "
275+
f"Please report this to torchvision at https://github.com/pytorch/vision/issues including "
276+
f"the response:\n\n{text}"
277+
)
278+
279+
if md5 and not check_md5(fpath, md5):
280+
raise RuntimeError(
281+
f"The MD5 checksum of the download file {fpath} does not match the one on record."
282+
f"Please delete the file and try again. "
283+
f"If the issue persists, please report this to torchvision at https://github.com/pytorch/vision/issues."
284+
)
285+
265286

266287
def _extract_tar(from_path: str, to_path: str, compression: Optional[str]) -> None:
267288
with tarfile.open(from_path, f"r:{compression[1:]}" if compression else "r") as tar:

0 commit comments

Comments
 (0)