Skip to content

Add Flowers102 dataset #5177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Jan 12, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
53fd509
Add Flowers102 datasets
zhiqwang Jan 6, 2022
79d0596
Fix initialization of images and labels
zhiqwang Jan 8, 2022
3b7ce90
Fix _check_exists in Flowers102
zhiqwang Jan 8, 2022
38df988
Add Flowers102 to datasets and docs
zhiqwang Jan 8, 2022
ba8889b
Add Flowers102TestCase to unittest
zhiqwang Jan 8, 2022
bf3e8d5
Fixing Python type statically
zhiqwang Jan 8, 2022
8cf1bfd
Shuffle the fake labels
zhiqwang Jan 9, 2022
f3949ab
Merge branch 'main' into datasets/flowers-102
zhiqwang Jan 10, 2022
4792f9e
Update test/test_datasets.py
zhiqwang Jan 10, 2022
fb3ae0d
Apply the suggestions by pmeier
zhiqwang Jan 10, 2022
d4b00a3
Use check_integrity to check file existence
zhiqwang Jan 12, 2022
b55568f
Save the labels to base_folder
zhiqwang Jan 12, 2022
52b6bb8
Merged with upstream
zhiqwang Jan 12, 2022
7fb9876
Minor fixes
zhiqwang Jan 12, 2022
87cc4f1
Using a loop makes this more concise without reducing readability
zhiqwang Jan 12, 2022
d84399e
Using a loop makes this more concise without reducing readability
zhiqwang Jan 12, 2022
6adabad
Remove self.labels and self.label_to_index attributes
zhiqwang Jan 12, 2022
8618415
minor simplification
pmeier Jan 12, 2022
2bb1ee6
Check the exitence of image folder
zhiqwang Jan 12, 2022
9fef169
Revert the check
zhiqwang Jan 12, 2022
d8a343a
Check the existence of image folder
zhiqwang Jan 12, 2022
d3d0698
valid -> val
NicolasHug Jan 12, 2022
7fa9c67
keep some stuff private
NicolasHug Jan 12, 2022
ce957c6
minor doc arrangements
NicolasHug Jan 12, 2022
a5b701e
remove default FEATURE_TYPES
NicolasHug Jan 12, 2022
4b21a2f
Simplify the datasets existence
zhiqwang Jan 12, 2022
53ad2c9
check if the image folder exists
zhiqwang Jan 12, 2022
0791dfc
isdir -> is_dir
NicolasHug Jan 12, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ You can also create your own datasets using the provided :ref:`base classes <bas
FER2013
Flickr8k
Flickr30k
Flowers102
FlyingChairs
FlyingThings3D
Food101
Expand Down
40 changes: 40 additions & 0 deletions test/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2463,5 +2463,45 @@ def _meta_to_split_and_classification_ann(self, meta, idx):
return (image_id, class_id, species, breed_id)


class Flowers102TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Flowers102
FEATURE_TYPES = (PIL.Image.Image, int)

ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test"))
REQUIRED_PACKAGES = ("scipy",)

def inject_fake_data(self, tmpdir: str, config):
base_folder = pathlib.Path(tmpdir) / "flowers-102"

num_classes = 3
num_images_per_split = dict(train=3, valid=3, test=4)
num_images_total = sum(num_images_per_split.values())
datasets_utils.create_image_folder(
base_folder,
"jpg",
file_name_fn=lambda idx: f"image_{idx + 1:05d}.jpg",
num_examples=num_images_total,
)

meta_folder = base_folder / "labels"
meta_folder.mkdir()

label_dict = dict(
labels=np.random.randint(1, num_classes + 1, size=(1, num_images_total), dtype=np.uint8),
)
datasets_utils.lazy_importer.scipy.io.savemat(str(meta_folder / "imagelabels.mat"), label_dict)

setid_mat = np.arange(1, num_images_total + 1, dtype=np.uint16)
np.random.shuffle(setid_mat)
setid_dict = dict(
trnid=setid_mat[: num_images_per_split["train"]].reshape(1, -1),
valid=setid_mat[num_images_per_split["train"] : -num_images_per_split["test"]].reshape(1, -1),
tstid=setid_mat[-num_images_per_split["test"] :].reshape(1, -1),
)
datasets_utils.lazy_importer.scipy.io.savemat(str(meta_folder / "setid.mat"), setid_dict)

return num_images_per_split[config["split"]]


if __name__ == "__main__":
unittest.main()
2 changes: 2 additions & 0 deletions torchvision/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .fakedata import FakeData
from .fer2013 import FER2013
from .flickr import Flickr8k, Flickr30k
from .flowers102 import Flowers102
from .folder import ImageFolder, DatasetFolder
from .food101 import Food101
from .gtsrb import GTSRB
Expand Down Expand Up @@ -60,6 +61,7 @@
"SBU",
"Flickr8k",
"Flickr30k",
"Flowers102",
"VOCSegmentation",
"VOCDetection",
"Cityscapes",
Expand Down
116 changes: 116 additions & 0 deletions torchvision/datasets/flowers102.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from pathlib import Path
from typing import Any, Tuple, Callable, Optional

import numpy as np
import PIL.Image

from .utils import verify_str_arg, download_and_extract_archive, download_url
from .vision import VisionDataset


class Flowers102(VisionDataset):
"""`Oxford 102 Flower <https://www.robots.ox.ac.uk/~vgg/data/flowers/102/>`_ Dataset.

.. warning::

This class needs `scipy <https://docs.scipy.org/doc/>`_ to load target files from `.mat` format.

Oxford 102 Flower is an image classification dataset consisting of 102 flower categories. The
flowers chosen to be flower commonly occurring in the United Kingdom. Each class consists of
between 40 and 258 images.

The images have large scale, pose and light variations. In addition, there are categories that
have large variations within the category and several very similar categories.

Args:
root (string): Root directory of the dataset.
split (string, optional): The dataset split, supports ``"train"`` (default), ``"val"``, or ``"test"``.
transform (callable, optional): A function/transform that takes in an PIL image and returns a
transformed version. E.g, ``transforms.RandomCrop``.
target_transform (callable, optional): A function/transform that takes in the target and transforms it.
"""

def __init__(
self,
root: str,
split: str = "train",
download: bool = True,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
) -> None:
super().__init__(root, transform=transform, target_transform=target_transform)
self._split = verify_str_arg(split, "split", ("train", "valid", "test"))
self._base_folder = Path(self.root) / "flowers-102"
self._meta_folder = self._base_folder / "labels"
self._images_folder = self._base_folder / "jpg"

if download:
self._download()

if not self._check_exists():
raise RuntimeError("Dataset not found. You can use download=True to download it")

self._labels = []
self._image_files = []

from scipy.io import loadmat

# Read the label ids
label_mat = loadmat(self._meta_folder / "imagelabels.mat")
labels = label_mat["labels"][0]

self.classes = np.unique(labels).tolist()
self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))

# Read the image ids
set_ids = loadmat(self._meta_folder / "setid.mat")
splits_map = {"train": "trnid", "valid": "valid", "test": "tstid"}

image_ids = set_ids[splits_map[self._split]][0]

for image_id in image_ids:
self._labels.append(self.class_to_idx[labels[image_id - 1]])
self._image_files.append(self._images_folder / f"image_{image_id:05d}.jpg")

def __len__(self) -> int:
return len(self._image_files)

def __getitem__(self, idx) -> Tuple[Any, Any]:
image_file, label = self._image_files[idx], self._labels[idx]
image = PIL.Image.open(image_file).convert("RGB")

if self.transform:
image = self.transform(image)

if self.target_transform:
label = self.target_transform(label)

return image, label

def extra_repr(self) -> str:
return f"split={self._split}"

def _check_exists(self) -> bool:
return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._images_folder))

def _download(self) -> None:
if self._check_exists():
return

download_and_extract_archive(
"https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz",
download_root=str(self._base_folder),
md5="52808999861908f626f3c1f4e79d11fa",
)

download_url(
"https://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat",
str(self._meta_folder),
md5="a5357ecc9cb78c4bef273ce3793fc85c",
)

download_url(
"https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat",
str(self._meta_folder),
md5="e0620be6f572b9609742df49c70aed4d",
)