Skip to content

Commit 81aa1b0

Browse files
Haonan Sunfacebook-github-bot
Haonan Sun
authored andcommitted
Revert D33426965: [fbsync] Adding pretrained ViT weights (#5085)
Differential Revision: D33426965 Original commit changeset: 753ce1d1318d Original Phabricator Diff: D33426965 fbshipit-source-id: db9a9f51c5365b2dd9c002aa681da0be33b3cb7d
1 parent 7a1a7e6 commit 81aa1b0

File tree

2 files changed

+14
-116
lines changed

2 files changed

+14
-116
lines changed

references/classification/README.md

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -143,60 +143,6 @@ torchrun --nproc_per_node=8 train.py\
143143
```
144144
Here `$MODEL` is one of `regnet_x_32gf`, `regnet_y_16gf` and `regnet_y_32gf`.
145145

146-
### Vision Transformer
147-
148-
#### vit_b_16
149-
```
150-
torchrun --nproc_per_node=8 train.py\
151-
--model vit_b_16 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
152-
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
153-
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
154-
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
155-
```
156-
157-
Note that the above command corresponds to training on a single node with 8 GPUs.
158-
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
159-
and `--batch_size 64`.
160-
161-
#### vit_b_32
162-
```
163-
torchrun --nproc_per_node=8 train.py\
164-
--model vit_b_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
165-
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
166-
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment imagenet\
167-
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
168-
```
169-
170-
Note that the above command corresponds to training on a single node with 8 GPUs.
171-
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
172-
and `--batch_size 256`.
173-
174-
#### vit_l_16
175-
```
176-
torchrun --nproc_per_node=8 train.py\
177-
--model vit_l_16 --epochs 600 --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr\
178-
--lr-warmup-method linear --lr-warmup-epochs 5 --label-smoothing 0.1 --mixup-alpha 0.2\
179-
--auto-augment ta_wide --random-erase 0.1 --weight-decay 0.00002 --norm-weight-decay 0.0\
180-
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema --val-resize-size 232
181-
```
182-
183-
Note that the above command corresponds to training on a single node with 8 GPUs.
184-
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
185-
and `--batch_size 64`.
186-
187-
#### vit_l_32
188-
```
189-
torchrun --nproc_per_node=8 train.py\
190-
--model vit_l_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
191-
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
192-
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
193-
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
194-
```
195-
196-
Note that the above command corresponds to training on a single node with 8 GPUs.
197-
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
198-
and `--batch_size 64`.
199-
200146
## Mixed precision training
201147
Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp).
202148

torchvision/prototype/models/vision_transformer.py

Lines changed: 14 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@
1010
import torch
1111
import torch.nn as nn
1212
from torch import Tensor
13-
from torchvision.prototype.transforms import ImageNetEval
14-
from torchvision.transforms.functional import InterpolationMode
1513

1614
from ...utils import _log_api_usage_once
17-
from ._api import WeightsEnum, Weights
18-
from ._meta import _IMAGENET_CATEGORIES
15+
from ._api import WeightsEnum
1916
from ._utils import handle_legacy_interface
2017

18+
2119
__all__ = [
2220
"VisionTransformer",
2321
"ViT_B_16_Weights",
@@ -235,70 +233,24 @@ def forward(self, x: torch.Tensor):
235233
return x
236234

237235

238-
_COMMON_META = {
239-
"categories": _IMAGENET_CATEGORIES,
240-
"interpolation": InterpolationMode.BILINEAR,
241-
}
242-
243-
244236
class ViT_B_16_Weights(WeightsEnum):
245-
ImageNet1K_V1 = Weights(
246-
url="https://download.pytorch.org/models/vit_b_16-c867db91.pth",
247-
transforms=partial(ImageNetEval, crop_size=224),
248-
meta={
249-
**_COMMON_META,
250-
"size": (224, 224),
251-
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_16",
252-
"acc@1": 81.072,
253-
"acc@5": 95.318,
254-
},
255-
)
256-
default = ImageNet1K_V1
237+
# If a default model is added here the corresponding changes need to be done in vit_b_16
238+
pass
257239

258240

259241
class ViT_B_32_Weights(WeightsEnum):
260-
ImageNet1K_V1 = Weights(
261-
url="https://download.pytorch.org/models/vit_b_32-d86f8d99.pth",
262-
transforms=partial(ImageNetEval, crop_size=224),
263-
meta={
264-
**_COMMON_META,
265-
"size": (224, 224),
266-
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_32",
267-
"acc@1": 75.912,
268-
"acc@5": 92.466,
269-
},
270-
)
271-
default = ImageNet1K_V1
242+
# If a default model is added here the corresponding changes need to be done in vit_b_32
243+
pass
272244

273245

274246
class ViT_L_16_Weights(WeightsEnum):
275-
ImageNet1K_V1 = Weights(
276-
url="https://download.pytorch.org/models/vit_l_16-852ce7e3.pth",
277-
transforms=partial(ImageNetEval, crop_size=224, resize_size=242),
278-
meta={
279-
**_COMMON_META,
280-
"size": (224, 224),
281-
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16",
282-
"acc@1": 79.662,
283-
"acc@5": 94.638,
284-
},
285-
)
286-
default = ImageNet1K_V1
247+
# If a default model is added here the corresponding changes need to be done in vit_l_16
248+
pass
287249

288250

289251
class ViT_L_32_Weights(WeightsEnum):
290-
ImageNet1K_V1 = Weights(
291-
url="https://download.pytorch.org/models/vit_l_32-c7638314.pth",
292-
transforms=partial(ImageNetEval, crop_size=224),
293-
meta={
294-
**_COMMON_META,
295-
"size": (224, 224),
296-
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_32",
297-
"acc@1": 76.972,
298-
"acc@5": 93.07,
299-
},
300-
)
301-
default = ImageNet1K_V1
252+
# If a default model is added here the corresponding changes need to be done in vit_l_32
253+
pass
302254

303255

304256
def _vision_transformer(
@@ -329,7 +281,7 @@ def _vision_transformer(
329281
return model
330282

331283

332-
@handle_legacy_interface(weights=("pretrained", ViT_B_16_Weights.ImageNet1K_V1))
284+
@handle_legacy_interface(weights=("pretrained", None))
333285
def vit_b_16(*, weights: Optional[ViT_B_16_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
334286
"""
335287
Constructs a vit_b_16 architecture from
@@ -354,7 +306,7 @@ def vit_b_16(*, weights: Optional[ViT_B_16_Weights] = None, progress: bool = Tru
354306
)
355307

356308

357-
@handle_legacy_interface(weights=("pretrained", ViT_B_32_Weights.ImageNet1K_V1))
309+
@handle_legacy_interface(weights=("pretrained", None))
358310
def vit_b_32(*, weights: Optional[ViT_B_32_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
359311
"""
360312
Constructs a vit_b_32 architecture from
@@ -379,7 +331,7 @@ def vit_b_32(*, weights: Optional[ViT_B_32_Weights] = None, progress: bool = Tru
379331
)
380332

381333

382-
@handle_legacy_interface(weights=("pretrained", ViT_L_16_Weights.ImageNet1K_V1))
334+
@handle_legacy_interface(weights=("pretrained", None))
383335
def vit_l_16(*, weights: Optional[ViT_L_16_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
384336
"""
385337
Constructs a vit_l_16 architecture from
@@ -404,7 +356,7 @@ def vit_l_16(*, weights: Optional[ViT_L_16_Weights] = None, progress: bool = Tru
404356
)
405357

406358

407-
@handle_legacy_interface(weights=("pretrained", ViT_L_32_Weights.ImageNet1K_V1))
359+
@handle_legacy_interface(weights=("pretrained", None))
408360
def vit_l_32(*, weights: Optional[ViT_L_32_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
409361
"""
410362
Constructs a vit_l_32 architecture from

0 commit comments

Comments
 (0)