Skip to content

update urls for kinetics dataset #5578

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 22, 2022
16 changes: 8 additions & 8 deletions torchvision/datasets/kinetics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv
import os
import time
import urllib
import warnings
from functools import partial
from multiprocessing import Pool
Expand Down Expand Up @@ -53,7 +54,7 @@ class Kinetics(VisionDataset):
Note: split is appended automatically using the split argument.
frames_per_clip (int): number of frames in a clip
num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700
split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"``
split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"`` ``"test"``
frame_rate (float): If omitted, interpolate different frame rate for each clip.
step_between_clips (int): number of frames between each clip
transform (callable, optional): A function/transform that takes in a TxHxWxC video
Expand Down Expand Up @@ -81,7 +82,7 @@ class Kinetics(VisionDataset):
}
_ANNOTATION_URLS = {
"400": "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv",
"600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt",
"600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.csv",
"700": "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv",
}

Expand Down Expand Up @@ -122,7 +123,7 @@ def __init__(
raise ValueError("Cannot download the videos using legacy_structure.")
else:
self.split_folder = path.join(root, split)
self.split = verify_str_arg(split, arg="split", valid_values=["train", "val"])
self.split = verify_str_arg(split, arg="split", valid_values=["train", "val", "test"])

if download:
self.download_and_process_videos()
Expand Down Expand Up @@ -177,17 +178,16 @@ def _download_videos(self) -> None:
split_url_filepath = path.join(file_list_path, path.basename(split_url))
if not check_integrity(split_url_filepath):
download_url(split_url, file_list_path)
list_video_urls = open(split_url_filepath)
with open(split_url_filepath) as file:
list_video_urls = [urllib.parse.quote(line, safe="/,:") for line in file.read().splitlines()]

if self.num_download_workers == 1:
for line in list_video_urls.readlines():
line = str(line).replace("\n", "")
for line in list_video_urls:
download_and_extract_archive(line, tar_path, self.split_folder)
else:
part = partial(_dl_wrap, tar_path, self.split_folder)
lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()]
poolproc = Pool(self.num_download_workers)
poolproc.map(part, lines)
poolproc.map(part, list_video_urls)

def _make_ds_structure(self) -> None:
"""move videos from
Expand Down