Skip to content

Commit b311461

Browse files
jdsgomesNicolasHug
authored andcommitted
[fbsync] update urls for kinetics dataset (#5578)
Summary: * update urls for kinetics dataset * update urls for kinetics dataset * remove errors * update the changes and add test option to split * added test to valid values for split arg * change .txt to .csv for annotation url of k600 (Note: this ignores all push blocking failures!) Reviewed By: datumbox Differential Revision: D35216772 fbshipit-source-id: 558aad2137bdb6808cbbe863f2d01e7b490fa329 Co-authored-by: Nicolas Hug <[email protected]>
1 parent c71b846 commit b311461

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

torchvision/datasets/kinetics.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import csv
22
import os
33
import time
4+
import urllib
45
import warnings
56
from functools import partial
67
from multiprocessing import Pool
@@ -53,7 +54,7 @@ class Kinetics(VisionDataset):
5354
Note: split is appended automatically using the split argument.
5455
frames_per_clip (int): number of frames in a clip
5556
num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700
56-
split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"``
57+
split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"`` ``"test"``
5758
frame_rate (float): If omitted, interpolate different frame rate for each clip.
5859
step_between_clips (int): number of frames between each clip
5960
transform (callable, optional): A function/transform that takes in a TxHxWxC video
@@ -81,7 +82,7 @@ class Kinetics(VisionDataset):
8182
}
8283
_ANNOTATION_URLS = {
8384
"400": "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv",
84-
"600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt",
85+
"600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.csv",
8586
"700": "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv",
8687
}
8788

@@ -122,7 +123,7 @@ def __init__(
122123
raise ValueError("Cannot download the videos using legacy_structure.")
123124
else:
124125
self.split_folder = path.join(root, split)
125-
self.split = verify_str_arg(split, arg="split", valid_values=["train", "val"])
126+
self.split = verify_str_arg(split, arg="split", valid_values=["train", "val", "test"])
126127

127128
if download:
128129
self.download_and_process_videos()
@@ -177,17 +178,16 @@ def _download_videos(self) -> None:
177178
split_url_filepath = path.join(file_list_path, path.basename(split_url))
178179
if not check_integrity(split_url_filepath):
179180
download_url(split_url, file_list_path)
180-
list_video_urls = open(split_url_filepath)
181+
with open(split_url_filepath) as file:
182+
list_video_urls = [urllib.parse.quote(line, safe="/,:") for line in file.read().splitlines()]
181183

182184
if self.num_download_workers == 1:
183-
for line in list_video_urls.readlines():
184-
line = str(line).replace("\n", "")
185+
for line in list_video_urls:
185186
download_and_extract_archive(line, tar_path, self.split_folder)
186187
else:
187188
part = partial(_dl_wrap, tar_path, self.split_folder)
188-
lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()]
189189
poolproc = Pool(self.num_download_workers)
190-
poolproc.map(part, lines)
190+
poolproc.map(part, list_video_urls)
191191

192192
def _make_ds_structure(self) -> None:
193193
"""move videos from

0 commit comments

Comments
 (0)