Add tests for VOC(Segmentation|Detection) and fix existing bugs (#3415)

pmeier · fmassa · web-flow · commit ccb7f45a3570 · 2021-02-22T13:39:13.000+01:00
* use common download utils in VOC and SBDataset

* add tests for VOC

* use common base class for VOC datasets

* remove old voc test and fake data generation

Co-authored-by: Francisco Massa &lt;fvsmassa@gmail.com&gt;
diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py
@@ -369,19 +369,6 @@ def _make_mat(file):
         yield root
 
 
-@contextlib.contextmanager
-def voc_root():
-    with get_tmp_dir() as tmp_dir:
-        voc_dir = os.path.join(tmp_dir, 'VOCdevkit',
-                               'VOC2012', 'ImageSets', 'Main')
-        os.makedirs(voc_dir)
-        train_file = os.path.join(voc_dir, 'train.txt')
-        with open(train_file, 'w') as f:
-            f.write('test')
-
-        yield tmp_dir
-
-
 @contextlib.contextmanager
 def ucf101_root():
     with get_tmp_dir() as tmp_dir:
diff --git a/test/test_datasets.py b/test/test_datasets.py
@@ -11,7 +11,7 @@
 from torchvision.datasets import utils
 from common_utils import get_tmp_dir
 from fakedata_generation import mnist_root, cifar_root, imagenet_root, \
-    cityscapes_root, svhn_root, voc_root, ucf101_root, places365_root, widerface_root, stl10_root
+    cityscapes_root, svhn_root, ucf101_root, places365_root, widerface_root, stl10_root
 import xml.etree.ElementTree as ET
 from urllib.request import Request, urlopen
 import itertools
@@ -20,6 +20,7 @@
 import pickle
 from torchvision import datasets
 import torch
+import shutil
 
 
 try:
@@ -259,38 +260,6 @@ def test_svhn(self, mock_check):
             dataset = torchvision.datasets.SVHN(root, split="extra")
             self.generic_classification_dataset_test(dataset, num_images=2)
 
-    @mock.patch('torchvision.datasets.voc.download_extract')
-    def test_voc_parse_xml(self, mock_download_extract):
-        with voc_root() as root:
-            dataset = torchvision.datasets.VOCDetection(root)
-
-            single_object_xml = """<annotation>
-              <object>
-                <name>cat</name>
-              </object>
-            </annotation>"""
-            multiple_object_xml = """<annotation>
-              <object>
-                <name>cat</name>
-              </object>
-              <object>
-                <name>dog</name>
-              </object>
-            </annotation>"""
-
-            single_object_parsed = dataset.parse_voc_xml(ET.fromstring(single_object_xml))
-            multiple_object_parsed = dataset.parse_voc_xml(ET.fromstring(multiple_object_xml))
-
-            self.assertEqual(single_object_parsed, {'annotation': {'object': [{'name': 'cat'}]}})
-            self.assertEqual(multiple_object_parsed,
-                             {'annotation': {
-                                 'object': [{
-                                     'name': 'cat'
-                                 }, {
-                                     'name': 'dog'
-                                 }]
-                             }})
-
     @unittest.skipIf(not HAS_PYAV, "PyAV unavailable")
     def test_ucf101(self):
         cached_meta_data = None
@@ -756,5 +725,119 @@ def test_attr_names(self):
             self.assertEqual(tuple(dataset.attr_names), info["attr_names"])
 
 
+class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase):
+    DATASET_CLASS = datasets.VOCSegmentation
+    FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image)
+
+    CONFIGS = (
+        *datasets_utils.combinations_grid(
+            year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval")
+        ),
+        dict(year="2007", image_set="test"),
+        dict(year="2007-test", image_set="test"),
+    )
+
+    def inject_fake_data(self, tmpdir, config):
+        year, is_test_set = (
+            ("2007", True)
+            if config["year"] == "2007-test" or config["image_set"] == "test"
+            else (config["year"], False)
+        )
+        image_set = config["image_set"]
+
+        base_dir = pathlib.Path(tmpdir)
+        if year == "2011":
+            base_dir /= "TrainVal"
+        base_dir = base_dir / "VOCdevkit" / f"VOC{year}"
+        os.makedirs(base_dir)
+
+        num_images, num_images_per_image_set = self._create_image_set_files(base_dir, "ImageSets", is_test_set)
+        datasets_utils.create_image_folder(base_dir, "JPEGImages", lambda idx: f"{idx:06d}.jpg", num_images)
+
+        datasets_utils.create_image_folder(base_dir, "SegmentationClass", lambda idx: f"{idx:06d}.png", num_images)
+        annotation = self._create_annotation_files(base_dir, "Annotations", num_images)
+
+        return dict(num_examples=num_images_per_image_set[image_set], annotation=annotation)
+
+    def _create_image_set_files(self, root, name, is_test_set):
+        root = pathlib.Path(root) / name
+        src = pathlib.Path(root) / "Main"
+        os.makedirs(src, exist_ok=True)
+
+        idcs = dict(train=(0, 1, 2), val=(3, 4), test=(5,))
+        idcs["trainval"] = (*idcs["train"], *idcs["val"])
+
+        for image_set in ("test",) if is_test_set else ("train", "val", "trainval"):
+            self._create_image_set_file(src, image_set, idcs[image_set])
+
+        shutil.copytree(src, root / "Segmentation")
+
+        num_images = max(itertools.chain(*idcs.values())) + 1
+        num_images_per_image_set = dict([(image_set, len(idcs_)) for image_set, idcs_ in idcs.items()])
+        return num_images, num_images_per_image_set
+
+    def _create_image_set_file(self, root, image_set, idcs):
+        with open(pathlib.Path(root) / f"{image_set}.txt", "w") as fh:
+            fh.writelines([f"{idx:06d}\n" for idx in idcs])
+
+    def _create_annotation_files(self, root, name, num_images):
+        root = pathlib.Path(root) / name
+        os.makedirs(root)
+
+        for idx in range(num_images):
+            annotation = self._create_annotation_file(root, f"{idx:06d}.xml")
+
+        return annotation
+
+    def _create_annotation_file(self, root, name):
+        def add_child(parent, name, text=None):
+            child = ET.SubElement(parent, name)
+            child.text = text
+            return child
+
+        def add_name(obj, name="dog"):
+            add_child(obj, "name", name)
+            return name
+
+        def add_bndbox(obj, bndbox=None):
+            if bndbox is None:
+                bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"}
+
+            obj = add_child(obj, "bndbox")
+            for name, text in bndbox.items():
+                add_child(obj, name, text)
+
+            return bndbox
+
+        annotation = ET.Element("annotation")
+        obj = add_child(annotation, "object")
+        data = dict(name=add_name(obj), bndbox=add_bndbox(obj))
+
+        with open(pathlib.Path(root) / name, "wb") as fh:
+            fh.write(ET.tostring(annotation))
+
+        return data
+
+
+class VOCDetectionTestCase(VOCSegmentationTestCase):
+    DATASET_CLASS = datasets.VOCDetection
+    FEATURE_TYPES = (PIL.Image.Image, dict)
+
+    def test_annotations(self):
+        with self.create_dataset() as (dataset, info):
+            _, target = dataset[0]
+
+            self.assertIn("annotation", target)
+            annotation = target["annotation"]
+
+            self.assertIn("object", annotation)
+            objects = annotation["object"]
+
+            self.assertEqual(len(objects), 1)
+            object = objects[0]
+
+            self.assertEqual(object, info["annotation"])
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/torchvision/datasets/sbd.py b/torchvision/datasets/sbd.py
@@ -6,8 +6,7 @@
 import numpy as np
 
 from PIL import Image
-from .utils import download_url, verify_str_arg
-from .voc import download_extract
+from .utils import download_url, verify_str_arg, download_and_extract_archive
 
 
 class SBDataset(VisionDataset):
@@ -77,7 +76,7 @@ def __init__(
         mask_dir = os.path.join(sbd_root, 'cls')
 
         if download:
-            download_extract(self.url, self.root, self.filename, self.md5)
+            download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.md5)
             extracted_ds_root = os.path.join(self.root, "benchmark_RELEASE", "dataset")
             for f in ["cls", "img", "inst", "train.txt", "val.txt"]:
                 old_path = os.path.join(extracted_ds_root, f)
diff --git a/torchvision/datasets/voc.py b/torchvision/datasets/voc.py