File tree Expand file tree Collapse file tree 2 files changed +22
-8
lines changed
references/video_classification Expand file tree Collapse file tree 2 files changed +22
-8
lines changed Original file line number Diff line number Diff line change @@ -81,6 +81,7 @@ Video resnet models:
81
81
```
82
82
# number of frames per clip
83
83
--clip_len 16 \
84
+ --frame-rate 15 \
84
85
# allow for temporal jittering
85
86
--clips_per_video 5 \
86
87
--batch-size 24 \
@@ -97,6 +98,21 @@ Video resnet models:
97
98
--val-crop-size 112 112
98
99
```
99
100
101
+ ### S3D
102
+
103
+ The S3D model was trained similarly to the above but with the following changes on the default configuration:
104
+ ```
105
+ --batch-size=12 --lr 0.2 --clip-len 64 --clips-per-video 5 --sync-bn \
106
+ --train-resize-size 256 256 --train-crop-size 224 224 --val-resize-size 256 256 --val-crop-size 224 224
107
+ ```
108
+
109
+ We used 64 GPUs to train the architecture.
110
+
111
+ To estimate the validation statistics of the model, we run the reference script with the following configuration:
112
+ ```
113
+ --batch-size=16 --test-only --clip-len 128 --clips-per-video 1
114
+ ```
115
+
100
116
### Additional video modelling resources
101
117
102
118
- [ Video Model Zoo] ( https://github.com/facebookresearch/VMZ )
Original file line number Diff line number Diff line change @@ -104,7 +104,7 @@ class S3D(nn.Module):
104
104
def __init__ (
105
105
self ,
106
106
num_classes : int = 400 ,
107
- dropout : float = 0.0 ,
107
+ dropout : float = 0.2 ,
108
108
norm_layer : Optional [Callable [..., torch .nn .Module ]] = None ,
109
109
) -> None :
110
110
super ().__init__ ()
@@ -153,28 +153,26 @@ def forward(self, x):
153
153
154
154
class S3D_Weights (WeightsEnum ):
155
155
KINETICS400_V1 = Weights (
156
- url = "https://download.pytorch.org/models/s3d-1bd8ae63 .pth" ,
156
+ url = "https://download.pytorch.org/models/s3d-d76dad2f .pth" ,
157
157
transforms = partial (
158
158
VideoClassification ,
159
159
crop_size = (224 , 224 ),
160
160
resize_size = (256 , 256 ),
161
- mean = (0.5 , 0.5 , 0.5 ),
162
- std = (0.5 , 0.5 , 0.5 ),
163
161
),
164
162
meta = {
165
163
"min_size" : (224 , 224 ),
166
164
"min_temporal_size" : 14 ,
167
165
"categories" : _KINETICS400_CATEGORIES ,
168
- "recipe" : "https://github.com/pytorch/vision/pull/6412#issuecomment-1219687434 " ,
166
+ "recipe" : "https://github.com/pytorch/vision/tree/main/references/video_classification#s3d " ,
169
167
"_docs" : (
170
- "The weights are ported from a community repository . The accuracies are estimated on clip-level "
168
+ "The weights aim to approximate the accuracy of the paper . The accuracies are estimated on clip-level "
171
169
"with parameters `frame_rate=15`, `clips_per_video=1`, and `clip_len=128`."
172
170
),
173
171
"num_params" : 8320048 ,
174
172
"_metrics" : {
175
173
"Kinetics-400" : {
176
- "acc@1" : 67.315 ,
177
- "acc@5" : 87.593 ,
174
+ "acc@1" : 68.368 ,
175
+ "acc@5" : 88.050 ,
178
176
}
179
177
},
180
178
},
You can’t perform that action at this time.
0 commit comments