|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# script to extract ImageNet dataset |
| 4 | +# ILSVRC2012_img_train.tar (about 138 GB) |
| 5 | +# ILSVRC2012_img_val.tar (about 6.3 GB) |
| 6 | +# make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory |
| 7 | +# |
| 8 | +# Adapted from: |
| 9 | +# https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md |
| 10 | +# https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4 |
| 11 | +# |
| 12 | +# imagenet/train/ |
| 13 | +# ├── n01440764 |
| 14 | +# │ ├── n01440764_10026.JPEG |
| 15 | +# │ ├── n01440764_10027.JPEG |
| 16 | +# │ ├── ...... |
| 17 | +# ├── ...... |
| 18 | +# imagenet/val/ |
| 19 | +# ├── n01440764 |
| 20 | +# │ ├── ILSVRC2012_val_00000293.JPEG |
| 21 | +# │ ├── ILSVRC2012_val_00002138.JPEG |
| 22 | +# │ ├── ...... |
| 23 | +# ├── ...... |
| 24 | +# |
| 25 | +# |
| 26 | +# Make imagnet directory |
| 27 | +# |
| 28 | +mkdir imagenet |
| 29 | +# |
| 30 | +# Extract the training data: |
| 31 | +# |
| 32 | +# Create train directory; move .tar file; change directory |
| 33 | +mkdir imagenet/train && mv ILSVRC2012_img_train.tar imagenet/train/ && cd imagenet/train |
| 34 | +# Extract training set; remove compressed file |
| 35 | +tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar |
| 36 | +# |
| 37 | +# At this stage imagenet/train will contain 1000 compressed .tar files, one for each category |
| 38 | +# |
| 39 | +# For each .tar file: |
| 40 | +# 1. create directory with same name as .tar file |
| 41 | +# 2. extract and copy contents of .tar file into directory |
| 42 | +# 3. remove .tar file |
| 43 | +find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done |
| 44 | +# |
| 45 | +# This results in a training directory like so: |
| 46 | +# |
| 47 | +# imagenet/train/ |
| 48 | +# ├── n01440764 |
| 49 | +# │ ├── n01440764_10026.JPEG |
| 50 | +# │ ├── n01440764_10027.JPEG |
| 51 | +# │ ├── ...... |
| 52 | +# ├── ...... |
| 53 | +# |
| 54 | +# Change back to original directory |
| 55 | +cd ../.. |
| 56 | +# |
| 57 | +# Extract the validation data and move images to subfolders: |
| 58 | +# |
| 59 | +# Create validation directory; move .tar file; change directory; extract validation .tar; remove compressed file |
| 60 | +mkdir imagenet/val && mv ILSVRC2012_img_val.tar imagenet/val/ && cd imagenet/val && tar -xvf ILSVRC2012_img_val.tar && rm -f ILSVRC2012_img_val.tar |
| 61 | +# get script from soumith and run; this script creates all class directories and moves images into corresponding directories |
| 62 | +wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash |
| 63 | +# |
| 64 | +# This results in a validation directory like so: |
| 65 | +# |
| 66 | +# imagenet/val/ |
| 67 | +# ├── n01440764 |
| 68 | +# │ ├── ILSVRC2012_val_00000293.JPEG |
| 69 | +# │ ├── ILSVRC2012_val_00002138.JPEG |
| 70 | +# │ ├── ...... |
| 71 | +# ├── ...... |
| 72 | +# |
| 73 | +# |
| 74 | +# Check total files after extract |
| 75 | +# |
| 76 | +# $ find train/ -name "*.JPEG" | wc -l |
| 77 | +# 1281167 |
| 78 | +# $ find val/ -name "*.JPEG" | wc -l |
| 79 | +# 50000 |
| 80 | +# |
0 commit comments