From 7556dd8ca7e4be5761e75a5a2994898766d030a2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:21:28 +0100 Subject: [PATCH 01/32] Update timm requirement from <=1.0.7,>=1.0.7 to >=1.0.7,<=1.0.9 (#2274) * Update timm requirement from <=1.0.7,>=1.0.7 to >=1.0.7,<=1.0.9 Updates the requirements on [timm](https://github.com/huggingface/pytorch-image-models) to permit the latest version. - [Release notes](https://github.com/huggingface/pytorch-image-models/releases) - [Commits](https://github.com/huggingface/pytorch-image-models/compare/v1.0.7...v1.0.9) --- updated-dependencies: - dependency-name: timm dependency-type: direct:production ... Signed-off-by: dependabot[bot] * Update pyproject.toml --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Samet Akcay --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9709c1a112..e23a4a92b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ core = [ "matplotlib>=3.4.3", "opencv-python>=4.5.3.56", "pandas>=1.1.0", - "timm<=1.0.7,>=1.0.7", + "timm", "lightning>=2.2", "torch>=2", "torchmetrics>=1.3.2", From 3768bb35d7d1ffae4e8ee801585caa5434ea99cc Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Wed, 25 Sep 2024 10:37:45 +0100 Subject: [PATCH 02/32] =?UTF-8?q?=F0=9F=90=9EUpdate=20`setuptools`=20requi?= =?UTF-8?q?rement=20for=20PEP=20660=20support=20(#2320)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update setup tools Signed-off-by: Samet Akcay --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e23a4a92b4..2893ad20c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # SETUP CONFIGURATION. # [build-system] -requires = ["setuptools>=42", "wheel"] +requires = ["setuptools>=64.0.0", "wheel"] build-backend = "setuptools.build_meta" [project] From 983ec58e4a861a8f221222913b45e8ae28e5d44f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Rolih?= <61357777+blaz-r@users.noreply.github.com> Date: Thu, 26 Sep 2024 08:53:38 +0200 Subject: [PATCH 03/32] Fix transforms for draem, dsr and rkde (#2324) Signed-off-by: Blaz Rolih --- src/anomalib/models/image/draem/lightning_model.py | 11 +++++++++++ src/anomalib/models/image/dsr/lightning_model.py | 11 +++++++++++ src/anomalib/models/image/rkde/lightning_model.py | 11 +++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/anomalib/models/image/draem/lightning_model.py b/src/anomalib/models/image/draem/lightning_model.py index f33bff6538..6eb0e197fc 100644 --- a/src/anomalib/models/image/draem/lightning_model.py +++ b/src/anomalib/models/image/draem/lightning_model.py @@ -12,6 +12,7 @@ import torch from lightning.pytorch.utilities.types import STEP_OUTPUT from torch import nn +from torchvision.transforms.v2 import Compose, Resize, Transform from anomalib import LearningType from anomalib.data.utils import Augmenter @@ -150,3 +151,13 @@ def learning_type(self) -> LearningType: LearningType: Learning type of the model. """ return LearningType.ONE_CLASS + + @staticmethod + def configure_transforms(image_size: tuple[int, int] | None = None) -> Transform: + """Default transform for DRAEM. Normalization is not needed as the images are scaled to [0, 1] in Dataset.""" + image_size = image_size or (256, 256) + return Compose( + [ + Resize(image_size, antialias=True), + ], + ) diff --git a/src/anomalib/models/image/dsr/lightning_model.py b/src/anomalib/models/image/dsr/lightning_model.py index b9a1136fd3..8381fce73d 100644 --- a/src/anomalib/models/image/dsr/lightning_model.py +++ b/src/anomalib/models/image/dsr/lightning_model.py @@ -13,6 +13,7 @@ import torch from lightning.pytorch.utilities.types import STEP_OUTPUT, OptimizerLRScheduler from torch import Tensor +from torchvision.transforms.v2 import Compose, Resize, Transform from anomalib import LearningType from anomalib.data.utils import DownloadInfo, download_and_extract @@ -191,3 +192,13 @@ def learning_type(self) -> LearningType: LearningType: Learning type of the model. """ return LearningType.ONE_CLASS + + @staticmethod + def configure_transforms(image_size: tuple[int, int] | None = None) -> Transform: + """Default transform for DSR. Normalization is not needed as the images are scaled to [0, 1] in Dataset.""" + image_size = image_size or (256, 256) + return Compose( + [ + Resize(image_size, antialias=True), + ], + ) diff --git a/src/anomalib/models/image/rkde/lightning_model.py b/src/anomalib/models/image/rkde/lightning_model.py index 02ad6c2564..f8b6af6d7a 100644 --- a/src/anomalib/models/image/rkde/lightning_model.py +++ b/src/anomalib/models/image/rkde/lightning_model.py @@ -11,6 +11,7 @@ import torch from lightning.pytorch.utilities.types import STEP_OUTPUT +from torchvision.transforms.v2 import Compose, Resize, Transform from anomalib import LearningType from anomalib.models.components import AnomalyModule, MemoryBankMixin @@ -143,3 +144,13 @@ def learning_type(self) -> LearningType: LearningType: Learning type of the model. """ return LearningType.ONE_CLASS + + @staticmethod + def configure_transforms(image_size: tuple[int, int] | None = None) -> Transform: + """Default transform for RKDE.""" + image_size = image_size or (240, 360) + return Compose( + [ + Resize(image_size, antialias=True), + ], + ) From f473df85e54e72824cba028a04fc210c57c4c30b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Rolih?= <61357777+blaz-r@users.noreply.github.com> Date: Thu, 26 Sep 2024 09:54:12 +0200 Subject: [PATCH 04/32] Add check before loading metrics data from checkpoint (#2323) Add check before loading from checkpoint Signed-off-by: Blaz Rolih Co-authored-by: Samet Akcay --- .../models/components/base/anomaly_module.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/anomalib/models/components/base/anomaly_module.py b/src/anomalib/models/components/base/anomaly_module.py index 7751818e63..963ce485a3 100644 --- a/src/anomalib/models/components/base/anomaly_module.py +++ b/src/anomalib/models/components/base/anomaly_module.py @@ -168,20 +168,19 @@ def load_state_dict(self, state_dict: OrderedDict[str, Any], strict: bool = True if "pixel_threshold_class" in state_dict: self.pixel_threshold = self._get_instance(state_dict, "pixel_threshold_class") - if "anomaly_maps_normalization_class" in state_dict: - self.anomaly_maps_normalization_metrics = self._get_instance(state_dict, "anomaly_maps_normalization_class") - if "box_scores_normalization_class" in state_dict: - self.box_scores_normalization_metrics = self._get_instance(state_dict, "box_scores_normalization_class") + # check only for pred score normalization metrics, because if this one is present, all others are too if "pred_scores_normalization_class" in state_dict: + self.box_scores_normalization_metrics = self._get_instance(state_dict, "box_scores_normalization_class") + self.anomaly_maps_normalization_metrics = self._get_instance(state_dict, "anomaly_maps_normalization_class") self.pred_scores_normalization_metrics = self._get_instance(state_dict, "pred_scores_normalization_class") - self.normalization_metrics = MetricCollection( - { - "anomaly_maps": self.anomaly_maps_normalization_metrics, - "box_scores": self.box_scores_normalization_metrics, - "pred_scores": self.pred_scores_normalization_metrics, - }, - ) + self.normalization_metrics = MetricCollection( + { + "anomaly_maps": self.anomaly_maps_normalization_metrics, + "box_scores": self.box_scores_normalization_metrics, + "pred_scores": self.pred_scores_normalization_metrics, + }, + ) # Used to load metrics if there is any related data in state_dict self._load_metrics(state_dict) From 191e21f424faad0c7cae0a1d9e588f380f991c5a Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Wed, 2 Oct 2024 13:46:23 +0200 Subject: [PATCH 05/32] Add PIMO (#2329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * PIMO (#1726) * update Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * test binclf curves numpy and numba and fixes Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct som docstrings Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * torch interface and tests Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * torch interface and tests Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * constants regrouped in dataclass as class vars Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * result class was unneccesary for per_image_binclf_curve Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * factorize function _get_threshs_minmax_linspace Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * small docs fixes Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add pimo numpy version and test Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * move validation Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add `shared_fpr_metric` option Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add pimo torch functional version and test Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add torchmetrics interface and test Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * renames and put things in init Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * validate inputs in result objects Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * result objects to from dict and tests Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add save and load methods to result objects and test Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor validations and minor changes Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * test result objects' properties Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * minor refactors Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add missing docstrings Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * minore vocabulary fix for consistency Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add per image scores statistics and test it Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor constants notation Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add stats tests and test it Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * change the meaning of AUPIMO.num_thresh Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * interface to format pairwise test results Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * improve doc Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add optional `paths` to result objects and some minor fixes and refactors Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * remove frozen from dataclasses and some done todos Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * review headers Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * doc modifs Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor `score_less_than_thresh` in `_binclf_one_curve_python` Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct license comments Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * fix doc Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * numba as extra requirement Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor copyrights from jpcbertoldo Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * remove from __future__ import annotations Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor validations names Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * dedupe file path validation Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * fix tests Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * Add todo Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor enums Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * only logger.warning Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor test imports Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor docs Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * refactor some docs Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct pre commit errors Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * remove author tag Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add thrid party program Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * Update src/anomalib/metrics/per_image/pimo.py * move HAS_NUMBA Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * remove PIMOSharedFPRMetric Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * make torchmetrics compute avg by dft Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * pre-commit hooks corrections Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct numpy.trapezoid Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --------- Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> Co-authored-by: Samet Akcay * πŸ—‘οΈ Remove numba (#2313) * remove numba Signed-off-by: Ashwin Vaidya * fix pre-commit checks Signed-off-by: Ashwin Vaidya * add third-party-programs.txt Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * πŸ—‘οΈ Remove unused methods (#2315) * remove numba Signed-off-by: Ashwin Vaidya * fix pre-commit checks Signed-off-by: Ashwin Vaidya * remove all unused methods Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * PIMO: Port Numpy β†’ Torch (#2316) * remove numba Signed-off-by: Ashwin Vaidya * fix pre-commit checks Signed-off-by: Ashwin Vaidya * remove all unused methods Signed-off-by: Ashwin Vaidya * replace numpy with torch Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * πŸ”¨Refactor methods across files (#2321) * remove numba Signed-off-by: Ashwin Vaidya * fix pre-commit checks Signed-off-by: Ashwin Vaidya * remove all unused methods Signed-off-by: Ashwin Vaidya * replace numpy with torch Signed-off-by: Ashwin Vaidya * refactor code Signed-off-by: Ashwin Vaidya * refactor move functional inside update remove path from the metric * Add changes from comments Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * Remove model to model comparison (#2325) * rename to pimo Signed-off-by: Ashwin Vaidya * minor refactor Signed-off-by: Ashwin Vaidya * remove model to model comparison Signed-off-by: Ashwin Vaidya * fix test Signed-off-by: Ashwin Vaidya * PR comments Signed-off-by: Ashwin Vaidya * Minor refactor Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * PR comments Signed-off-by: Ashwin Vaidya * Remove unused enums Signed-off-by: Ashwin Vaidya * update doc strings Signed-off-by: Ashwin Vaidya * update param names Signed-off-by: Ashwin Vaidya * add aupimo basic usage tutorial notebook (#2330) * add aupimo basic usage tutorial notebook Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * update scipy import Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add cite us Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * minor Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * modify texts and add illustration Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * udpate working dir Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --------- Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --------- Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> Signed-off-by: Ashwin Vaidya Co-authored-by: Joao P C Bertoldo <24547377+jpcbertoldo@users.noreply.github.com> Co-authored-by: Samet Akcay --- notebooks/700_metrics/701a_aupimo.ipynb | 549 ++++++++++++++ notebooks/700_metrics/roc_pro_pimo.svg | 690 ++++++++++++++++++ src/anomalib/data/utils/path.py | 14 +- src/anomalib/metrics/__init__.py | 3 + src/anomalib/metrics/pimo/__init__.py | 23 + src/anomalib/metrics/pimo/_validate.py | 427 +++++++++++ .../pimo/binary_classification_curve.py | 334 +++++++++ src/anomalib/metrics/pimo/dataclasses.py | 226 ++++++ src/anomalib/metrics/pimo/functional.py | 355 +++++++++ src/anomalib/metrics/pimo/pimo.py | 296 ++++++++ src/anomalib/metrics/pimo/utils.py | 19 + tests/unit/data/utils/test_path.py | 6 + tests/unit/metrics/pimo/__init__.py | 8 + .../pimo/test_binary_classification_curve.py | 423 +++++++++++ tests/unit/metrics/pimo/test_pimo.py | 368 ++++++++++ third-party-programs.txt | 4 + 16 files changed, 3744 insertions(+), 1 deletion(-) create mode 100644 notebooks/700_metrics/701a_aupimo.ipynb create mode 100644 notebooks/700_metrics/roc_pro_pimo.svg create mode 100644 src/anomalib/metrics/pimo/__init__.py create mode 100644 src/anomalib/metrics/pimo/_validate.py create mode 100644 src/anomalib/metrics/pimo/binary_classification_curve.py create mode 100644 src/anomalib/metrics/pimo/dataclasses.py create mode 100644 src/anomalib/metrics/pimo/functional.py create mode 100644 src/anomalib/metrics/pimo/pimo.py create mode 100644 src/anomalib/metrics/pimo/utils.py create mode 100644 tests/unit/metrics/pimo/__init__.py create mode 100644 tests/unit/metrics/pimo/test_binary_classification_curve.py create mode 100644 tests/unit/metrics/pimo/test_pimo.py diff --git a/notebooks/700_metrics/701a_aupimo.ipynb b/notebooks/700_metrics/701a_aupimo.ipynb new file mode 100644 index 0000000000..e6333df6df --- /dev/null +++ b/notebooks/700_metrics/701a_aupimo.ipynb @@ -0,0 +1,549 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AUPIMO\n", + "\n", + "Basic usage of the metric AUPIMO (pronounced \"a-u-pee-mo\")." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# What is AUPIMO?\n", + "\n", + "The `Area Under the Per-Image Overlap [curve]` (AUPIMO) is a metric of recall (higher is better) designed for visual anomaly detection.\n", + "\n", + "Inspired by the [ROC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) and [PRO](https://link.springer.com/article/10.1007/s11263-020-01400-4) curves, \n", + "\n", + "> AUPIMO is the area under a curve of True Positive Rate (TPR or _recall_) as a function of False Positive Rate (FPR) restricted to a fixed range. \n", + "\n", + "But:\n", + "- the TPR (Y-axis) is *per-image* (1 image = 1 curve/score);\n", + "- the FPR (X-axis) considers the (average of) **normal** images only; \n", + "- the FPR (X-axis) is in log scale and its range is [1e-5, 1e-4]\\* (harder detection task!).\n", + "\n", + "\\* The score (the area under the curve) is normalized to be in [0, 1].\n", + "\n", + "AUPIMO can be interpreted as\n", + "\n", + "> average segmentation recall in an image given that the model (nearly) does not yield false positives in normal images.\n", + "\n", + "References in the last cell.\n", + "\n", + "![AUROC vs. AUPRO vs. AUPIMO](./roc_pro_pimo.svg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install `anomalib` using `pip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO(jpcbertoldo): replace by `pip install anomalib` when AUPIMO is released # noqa: TD003\n", + "%pip install ../.." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change the directory to have access to the datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "# NOTE: Provide the path to the dataset root directory.\n", + "# If the datasets is not downloaded, it will be downloaded\n", + "# to this directory.\n", + "dataset_root = Path.cwd().parent.parent / \"datasets\" / \"MVTec\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.ticker import MaxNLocator, PercentFormatter\n", + "from scipy import stats\n", + "\n", + "from anomalib import TaskType\n", + "from anomalib.data import MVTec\n", + "from anomalib.engine import Engine\n", + "from anomalib.metrics import AUPIMO\n", + "from anomalib.models import Padim" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Module\n", + "\n", + "We will use dataset Leather from MVTec AD. \n", + "\n", + "> See the notebooks below for more details on datamodules. \n", + "> [github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules]((https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "task = TaskType.SEGMENTATION\n", + "datamodule = MVTec(\n", + " root=dataset_root,\n", + " category=\"leather\",\n", + " image_size=256,\n", + " train_batch_size=32,\n", + " eval_batch_size=32,\n", + " num_workers=8,\n", + " task=task,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model\n", + "\n", + "We will use `PaDiM` (performance is not the best, but it is fast to train).\n", + "\n", + "> See the notebooks below for more details on models. \n", + "> [github.com/openvinotoolkit/anomalib/tree/main/notebooks/200_models](https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/200_models)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Instantiate the model." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "model = Padim(\n", + " # only use one layer to speed it up\n", + " layers=[\"layer1\"],\n", + " n_features=32,\n", + " backbone=\"resnet18\",\n", + " pre_trained=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Average AUPIMO (Basic)\n", + "\n", + "The easiest way to use AUPIMO is via the collection of pixel metrics in the engine.\n", + "\n", + "By default, the average AUPIMO is calculated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "engine = Engine(\n", + " pixel_metrics=\"AUPIMO\", # others can be added\n", + " accelerator=\"auto\", # \\<\"cpu\", \"gpu\", \"tpu\", \"ipu\", \"hpu\", \"auto\">,\n", + " devices=1,\n", + " logger=False,\n", + ")\n", + "engine.fit(datamodule=datamodule, model=model)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "F1Score class exists for backwards compatibility. It will be removed in v1.1. Please use BinaryF1Score from torchmetrics instead\n", + "Metric `AUPIMO` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "58335955473a43dab43e586caf66aa11", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Testing: | | 0/? [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric ┃ DataLoader 0 ┃\n", + "┑━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "β”‚ image_AUROC β”‚ 0.9735053777694702 β”‚\n", + "β”‚ image_F1Score β”‚ 0.9518716335296631 β”‚\n", + "β”‚ pixel_AUPIMO β”‚ 0.6273086756193275 β”‚\n", + "β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┑━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "β”‚\u001b[36m \u001b[0m\u001b[36m image_AUROC \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.9735053777694702 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", + "β”‚\u001b[36m \u001b[0m\u001b[36m image_F1Score \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.9518716335296631 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", + "β”‚\u001b[36m \u001b[0m\u001b[36m pixel_AUPIMO \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.6273086756193275 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", + "β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "[{'pixel_AUPIMO': 0.6273086756193275,\n", + " 'image_AUROC': 0.9735053777694702,\n", + " 'image_F1Score': 0.9518716335296631}]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# will output the AUPIMO score on the test set\n", + "engine.test(datamodule=datamodule, model=model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Individual AUPIMO Scores (Detailed)\n", + "\n", + "AUPIMO assigns one recall score per anomalous image in the dataset.\n", + "\n", + "It is possible to access each of the individual AUPIMO scores and look at the distribution." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Collect the predictions and the ground truth." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ckpt_path is not provided. Model weights will not be loaded.\n", + "F1Score class exists for backwards compatibility. It will be removed in v1.1. Please use BinaryF1Score from torchmetrics instead\n", + "Metric `AUPIMO` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "678cb90805ee4b7bb1dd0c30944edab9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Predicting: | | 0/? [00:00" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fig, ax = plt.subplots()\n", + "ax.hist(aupimo_result.aupimos.numpy(), bins=np.linspace(0, 1, 11), edgecolor=\"black\")\n", + "ax.set_ylabel(\"Count (number of images)\")\n", + "ax.yaxis.set_major_locator(MaxNLocator(5, integer=True))\n", + "ax.set_xlim(0, 1)\n", + "ax.set_xlabel(\"AUPIMO [%]\")\n", + "ax.xaxis.set_major_formatter(PercentFormatter(1))\n", + "ax.grid()\n", + "ax.set_title(\"AUPIMO distribution\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cite Us\n", + "\n", + "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "\n", + "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "\n", + "```bibtex\n", + "@misc{bertoldo2024aupimo,\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", + " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " year={2024},\n", + " eprint={2401.01984},\n", + " archivePrefix={arXiv},\n", + " primaryClass={cs.CV},\n", + " url={https://arxiv.org/abs/2401.01984}, \n", + "}\n", + "```\n", + "\n", + "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", + "\n", + "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "anomalib-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/700_metrics/roc_pro_pimo.svg b/notebooks/700_metrics/roc_pro_pimo.svg new file mode 100644 index 0000000000..b580e89d17 --- /dev/null +++ b/notebooks/700_metrics/roc_pro_pimo.svg @@ -0,0 +1,690 @@ + + + +image/svg+xmlEach curve summarizesthe test set with di + + +ff + + +erent aggregations. + + +ROC + + +PRO + + +One per image! + + +AUROC + + +AUPRO + + +AUPIMO + + +PIMO + + +i + + +i + + +Recall + + diff --git a/src/anomalib/data/utils/path.py b/src/anomalib/data/utils/path.py index 9c3f56273b..7bc61b27fe 100644 --- a/src/anomalib/data/utils/path.py +++ b/src/anomalib/data/utils/path.py @@ -142,13 +142,20 @@ def contains_non_printable_characters(path: str | Path) -> bool: return not printable_pattern.match(str(path)) -def validate_path(path: str | Path, base_dir: str | Path | None = None, should_exist: bool = True) -> Path: +def validate_path( + path: str | Path, + base_dir: str | Path | None = None, + should_exist: bool = True, + extensions: tuple[str, ...] | None = None, +) -> Path: """Validate the path. Args: path (str | Path): Path to validate. base_dir (str | Path): Base directory to restrict file access. should_exist (bool): If True, do not raise an exception if the path does not exist. + extensions (tuple[str, ...] | None): Accepted extensions for the path. An exception is raised if the + path does not have one of the accepted extensions. If None, no check is performed. Defaults to None. Returns: Path: Validated path. @@ -213,6 +220,11 @@ def validate_path(path: str | Path, base_dir: str | Path | None = None, should_e msg = f"Read or execute permissions denied for the path: {path}" raise PermissionError(msg) + # Check if the path has one of the accepted extensions + if extensions is not None and path.suffix not in extensions: + msg = f"Path extension is not accepted. Accepted extensions: {extensions}. Path: {path}" + raise ValueError(msg) + return path diff --git a/src/anomalib/metrics/__init__.py b/src/anomalib/metrics/__init__.py index 4c3eafa811..81bab3c93f 100644 --- a/src/anomalib/metrics/__init__.py +++ b/src/anomalib/metrics/__init__.py @@ -19,6 +19,7 @@ from .f1_max import F1Max from .f1_score import F1Score from .min_max import MinMax +from .pimo import AUPIMO, PIMO from .precision_recall_curve import BinaryPrecisionRecallCurve from .pro import PRO from .threshold import F1AdaptiveThreshold, ManualThreshold @@ -35,6 +36,8 @@ "ManualThreshold", "MinMax", "PRO", + "PIMO", + "AUPIMO", ] logger = logging.getLogger(__name__) diff --git a/src/anomalib/metrics/pimo/__init__.py b/src/anomalib/metrics/pimo/__init__.py new file mode 100644 index 0000000000..174f546e4d --- /dev/null +++ b/src/anomalib/metrics/pimo/__init__.py @@ -0,0 +1,23 @@ +"""Per-Image Metrics.""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from .binary_classification_curve import ThresholdMethod +from .pimo import AUPIMO, PIMO, AUPIMOResult, PIMOResult + +__all__ = [ + # constants + "ThresholdMethod", + # result classes + "PIMOResult", + "AUPIMOResult", + # torchmetrics interfaces + "PIMO", + "AUPIMO", + "StatsOutliersPolicy", +] diff --git a/src/anomalib/metrics/pimo/_validate.py b/src/anomalib/metrics/pimo/_validate.py new file mode 100644 index 0000000000..f0ba7af4bf --- /dev/null +++ b/src/anomalib/metrics/pimo/_validate.py @@ -0,0 +1,427 @@ +"""Utils for validating arguments and results. + +TODO(jpcbertoldo): Move validations to a common place and reuse them across the codebase. +https://github.com/openvinotoolkit/anomalib/issues/2093 +""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging + +import torch +from torch import Tensor + +from .utils import images_classes_from_masks + +logger = logging.getLogger(__name__) + + +def is_num_thresholds_gte2(num_thresholds: int) -> None: + """Validate the number of thresholds is a positive integer >= 2.""" + if not isinstance(num_thresholds, int): + msg = f"Expected the number of thresholds to be an integer, but got {type(num_thresholds)}" + raise TypeError(msg) + + if num_thresholds < 2: + msg = f"Expected the number of thresholds to be larger than 1, but got {num_thresholds}" + raise ValueError(msg) + + +def is_same_shape(*args) -> None: + """Works both for tensors and ndarrays.""" + assert len(args) > 0 + shapes = sorted({tuple(arg.shape) for arg in args}) + if len(shapes) > 1: + msg = f"Expected arguments to have the same shape, but got {shapes}" + raise ValueError(msg) + + +def is_rate(rate: float | int, zero_ok: bool, one_ok: bool) -> None: + """Validates a rate parameter. + + Args: + rate (float | int): The rate to be validated. + zero_ok (bool): Flag indicating if rate can be 0. + one_ok (bool): Flag indicating if rate can be 1. + """ + if not isinstance(rate, float | int): + msg = f"Expected rate to be a float or int, but got {type(rate)}." + raise TypeError(msg) + + if rate < 0.0 or rate > 1.0: + msg = f"Expected rate to be in [0, 1], but got {rate}." + raise ValueError(msg) + + if not zero_ok and rate == 0.0: + msg = "Rate cannot be 0." + raise ValueError(msg) + + if not one_ok and rate == 1.0: + msg = "Rate cannot be 1." + raise ValueError(msg) + + +def is_rate_range(bounds: tuple[float, float]) -> None: + """Validates the range of rates within the bounds. + + Args: + bounds (tuple[float, float]): The lower and upper bounds of the rates. + """ + if not isinstance(bounds, tuple): + msg = f"Expected the bounds to be a tuple, but got {type(bounds)}" + raise TypeError(msg) + + if len(bounds) != 2: + msg = f"Expected the bounds to be a tuple of length 2, but got {len(bounds)}" + raise ValueError(msg) + + lower, upper = bounds + is_rate(lower, zero_ok=False, one_ok=False) + is_rate(upper, zero_ok=False, one_ok=True) + + if lower >= upper: + msg = f"Expected the upper bound to be larger than the lower bound, but got {upper=} <= {lower=}" + raise ValueError(msg) + + +def is_valid_threshold(thresholds: Tensor) -> None: + """Validate that the thresholds are valid and monotonically increasing.""" + if not isinstance(thresholds, Tensor): + msg = f"Expected thresholds to be an Tensor, but got {type(thresholds)}" + raise TypeError(msg) + + if thresholds.ndim != 1: + msg = f"Expected thresholds to be 1D, but got {thresholds.ndim}" + raise ValueError(msg) + + if not thresholds.dtype.is_floating_point: + msg = f"Expected thresholds to be of float type, but got Tensor with dtype {thresholds.dtype}" + raise TypeError(msg) + + # make sure they are strictly increasing + if not torch.all(torch.diff(thresholds) > 0): + msg = "Expected thresholds to be strictly increasing, but it is not." + raise ValueError(msg) + + +def validate_threshold_bounds(threshold_bounds: tuple[float, float]) -> None: + if not isinstance(threshold_bounds, tuple): + msg = f"Expected threshold bounds to be a tuple, but got {type(threshold_bounds)}." + raise TypeError(msg) + + if len(threshold_bounds) != 2: + msg = f"Expected threshold bounds to be a tuple of length 2, but got {len(threshold_bounds)}." + raise ValueError(msg) + + lower, upper = threshold_bounds + + if not isinstance(lower, float): + msg = f"Expected lower threshold bound to be a float, but got {type(lower)}." + raise TypeError(msg) + + if not isinstance(upper, float): + msg = f"Expected upper threshold bound to be a float, but got {type(upper)}." + raise TypeError(msg) + + if upper <= lower: + msg = f"Expected the upper bound to be greater than the lower bound, but got {upper} <= {lower}." + raise ValueError(msg) + + +def is_anomaly_maps(anomaly_maps: Tensor) -> None: + if anomaly_maps.ndim != 3: + msg = f"Expected anomaly maps have 3 dimensions (N, H, W), but got {anomaly_maps.ndim} dimensions" + raise ValueError(msg) + + if not anomaly_maps.dtype.is_floating_point: + msg = ( + "Expected anomaly maps to be an floating Tensor with anomaly scores," + f" but got Tensor with dtype {anomaly_maps.dtype}" + ) + raise TypeError(msg) + + +def is_masks(masks: Tensor) -> None: + if masks.ndim != 3: + msg = f"Expected masks have 3 dimensions (N, H, W), but got {masks.ndim} dimensions" + raise ValueError(msg) + + if masks.dtype == torch.bool: + pass + elif masks.dtype.is_floating_point: + msg = ( + "Expected masks to be an integer or boolean Tensor with ground truth labels, " + f"but got Tensor with dtype {masks.dtype}" + ) + raise TypeError(msg) + else: + # assumes the type to be (signed or unsigned) integer + # this will change with the dataclass refactor + masks_unique_vals = torch.unique(masks) + if torch.any((masks_unique_vals != 0) & (masks_unique_vals != 1)): + msg = ( + "Expected masks to be a *binary* Tensor with ground truth labels, " + f"but got Tensor with unique values {sorted(masks_unique_vals)}" + ) + raise ValueError(msg) + + +def is_binclf_curves(binclf_curves: Tensor, valid_thresholds: Tensor | None) -> None: + if binclf_curves.ndim != 4: + msg = f"Expected binclf curves to be 4D, but got {binclf_curves.ndim}D" + raise ValueError(msg) + + if binclf_curves.shape[-2:] != (2, 2): + msg = f"Expected binclf curves to have shape (..., 2, 2), but got {binclf_curves.shape}" + raise ValueError(msg) + + if binclf_curves.dtype != torch.int64: + msg = f"Expected binclf curves to have dtype int64, but got {binclf_curves.dtype}." + raise TypeError(msg) + + if (binclf_curves < 0).any(): + msg = "Expected binclf curves to have non-negative values, but got negative values." + raise ValueError(msg) + + neg = binclf_curves[:, :, 0, :].sum(axis=-1) # (num_images, num_thresholds) + + if (neg != neg[:, :1]).any(): + msg = "Expected binclf curves to have the same number of negatives per image for every thresh." + raise ValueError(msg) + + pos = binclf_curves[:, :, 1, :].sum(axis=-1) # (num_images, num_thresholds) + + if (pos != pos[:, :1]).any(): + msg = "Expected binclf curves to have the same number of positives per image for every thresh." + raise ValueError(msg) + + if valid_thresholds is None: + return + + if binclf_curves.shape[1] != valid_thresholds.shape[0]: + msg = ( + "Expected the binclf curves to have as many confusion matrices as the thresholds sequence, " + f"but got {binclf_curves.shape[1]} and {valid_thresholds.shape[0]}" + ) + raise RuntimeError(msg) + + +def is_images_classes(images_classes: Tensor) -> None: + if images_classes.ndim != 1: + msg = f"Expected image classes to be 1D, but got {images_classes.ndim}D." + raise ValueError(msg) + + if images_classes.dtype == torch.bool: + pass + elif images_classes.dtype.is_floating_point: + msg = ( + "Expected image classes to be an integer or boolean Tensor with ground truth labels, " + f"but got Tensor with dtype {images_classes.dtype}" + ) + raise TypeError(msg) + else: + # assumes the type to be (signed or unsigned) integer + # this will change with the dataclass refactor + unique_vals = torch.unique(images_classes) + if torch.any((unique_vals != 0) & (unique_vals != 1)): + msg = ( + "Expected image classes to be a *binary* Tensor with ground truth labels, " + f"but got Tensor with unique values {sorted(unique_vals)}" + ) + raise ValueError(msg) + + +def is_rates(rates: Tensor, nan_allowed: bool) -> None: + if rates.ndim != 1: + msg = f"Expected rates to be 1D, but got {rates.ndim}D." + raise ValueError(msg) + + if not rates.dtype.is_floating_point: + msg = f"Expected rates to have dtype of float type, but got {rates.dtype}." + raise ValueError(msg) + + isnan_mask = torch.isnan(rates) + if nan_allowed: + # if they are all nan, then there is nothing to validate + if isnan_mask.all(): + return + valid_values = rates[~isnan_mask] + elif isnan_mask.any(): + msg = "Expected rates to not contain NaN values, but got NaN values." + raise ValueError(msg) + else: + valid_values = rates + + if (valid_values < 0).any(): + msg = "Expected rates to have values in the interval [0, 1], but got values < 0." + raise ValueError(msg) + + if (valid_values > 1).any(): + msg = "Expected rates to have values in the interval [0, 1], but got values > 1." + raise ValueError(msg) + + +def is_rate_curve(rate_curve: Tensor, nan_allowed: bool, decreasing: bool) -> None: + is_rates(rate_curve, nan_allowed=nan_allowed) + + diffs = torch.diff(rate_curve) + diffs_valid = diffs[~torch.isnan(diffs)] if nan_allowed else diffs + + if decreasing and (diffs_valid > 0).any(): + msg = "Expected rate curve to be monotonically decreasing, but got non-monotonically decreasing values." + raise ValueError(msg) + + if not decreasing and (diffs_valid < 0).any(): + msg = "Expected rate curve to be monotonically increasing, but got non-monotonically increasing values." + raise ValueError(msg) + + +def is_per_image_rate_curves(rate_curves: Tensor, nan_allowed: bool, decreasing: bool | None) -> None: + if rate_curves.ndim != 2: + msg = f"Expected per-image rate curves to be 2D, but got {rate_curves.ndim}D." + raise ValueError(msg) + + if not rate_curves.dtype.is_floating_point: + msg = f"Expected per-image rate curves to have dtype of float type, but got {rate_curves.dtype}." + raise ValueError(msg) + + isnan_mask = torch.isnan(rate_curves) + if nan_allowed: + # if they are all nan, then there is nothing to validate + if isnan_mask.all(): + return + valid_values = rate_curves[~isnan_mask] + elif isnan_mask.any(): + msg = "Expected per-image rate curves to not contain NaN values, but got NaN values." + raise ValueError(msg) + else: + valid_values = rate_curves + + if (valid_values < 0).any(): + msg = "Expected per-image rate curves to have values in the interval [0, 1], but got values < 0." + raise ValueError(msg) + + if (valid_values > 1).any(): + msg = "Expected per-image rate curves to have values in the interval [0, 1], but got values > 1." + raise ValueError(msg) + + if decreasing is None: + return + + diffs = torch.diff(rate_curves, axis=1) + diffs_valid = diffs[~torch.isnan(diffs)] if nan_allowed else diffs + + if decreasing and (diffs_valid > 0).any(): + msg = ( + "Expected per-image rate curves to be monotonically decreasing, " + "but got non-monotonically decreasing values." + ) + raise ValueError(msg) + + if not decreasing and (diffs_valid < 0).any(): + msg = ( + "Expected per-image rate curves to be monotonically increasing, " + "but got non-monotonically increasing values." + ) + raise ValueError(msg) + + +def is_scores_batch(scores_batch: torch.Tensor) -> None: + """scores_batch (torch.Tensor): floating (N, D).""" + if not isinstance(scores_batch, torch.Tensor): + msg = f"Expected `scores_batch` to be an torch.Tensor, but got {type(scores_batch)}" + raise TypeError(msg) + + if not scores_batch.dtype.is_floating_point: + msg = ( + "Expected `scores_batch` to be an floating torch.Tensor with anomaly scores_batch," + f" but got torch.Tensor with dtype {scores_batch.dtype}" + ) + raise TypeError(msg) + + if scores_batch.ndim != 2: + msg = f"Expected `scores_batch` to be 2D, but got {scores_batch.ndim}" + raise ValueError(msg) + + +def is_gts_batch(gts_batch: torch.Tensor) -> None: + """gts_batch (torch.Tensor): boolean (N, D).""" + if not isinstance(gts_batch, torch.Tensor): + msg = f"Expected `gts_batch` to be an torch.Tensor, but got {type(gts_batch)}" + raise TypeError(msg) + + if gts_batch.dtype != torch.bool: + msg = ( + "Expected `gts_batch` to be an boolean torch.Tensor with anomaly scores_batch," + f" but got torch.Tensor with dtype {gts_batch.dtype}" + ) + raise TypeError(msg) + + if gts_batch.ndim != 2: + msg = f"Expected `gts_batch` to be 2D, but got {gts_batch.ndim}" + raise ValueError(msg) + + +def has_at_least_one_anomalous_image(masks: torch.Tensor) -> None: + is_masks(masks) + image_classes = images_classes_from_masks(masks) + if (image_classes == 1).sum() == 0: + msg = "Expected at least one ANOMALOUS image, but found none." + raise ValueError(msg) + + +def has_at_least_one_normal_image(masks: torch.Tensor) -> None: + is_masks(masks) + image_classes = images_classes_from_masks(masks) + if (image_classes == 0).sum() == 0: + msg = "Expected at least one NORMAL image, but found none." + raise ValueError(msg) + + +def joint_validate_thresholds_shared_fpr(thresholds: torch.Tensor, shared_fpr: torch.Tensor) -> None: + if thresholds.shape[0] != shared_fpr.shape[0]: + msg = ( + "Expected `thresholds` and `shared_fpr` to have the same number of elements, " + f"but got {thresholds.shape[0]} != {shared_fpr.shape[0]}" + ) + raise ValueError(msg) + + +def is_per_image_tprs(per_image_tprs: torch.Tensor, image_classes: torch.Tensor) -> None: + is_images_classes(image_classes) + # general validations + is_per_image_rate_curves( + per_image_tprs, + nan_allowed=True, # normal images have NaN TPRs + decreasing=None, # not checked here + ) + + # specific to anomalous images + is_per_image_rate_curves( + per_image_tprs[image_classes == 1], + nan_allowed=False, + decreasing=True, + ) + + # specific to normal images + normal_images_tprs = per_image_tprs[image_classes == 0] + if not normal_images_tprs.isnan().all(): + msg = "Expected all normal images to have NaN TPRs, but some have non-NaN values." + raise ValueError(msg) + + +def is_per_image_scores(per_image_scores: torch.Tensor) -> None: + if per_image_scores.ndim != 1: + msg = f"Expected per-image scores to be 1D, but got {per_image_scores.ndim}D." + raise ValueError(msg) + + +def is_image_class(image_class: int) -> None: + if image_class not in {0, 1}: + msg = f"Expected image class to be either 0 for 'normal' or 1 for 'anomalous', but got {image_class}." + raise ValueError(msg) diff --git a/src/anomalib/metrics/pimo/binary_classification_curve.py b/src/anomalib/metrics/pimo/binary_classification_curve.py new file mode 100644 index 0000000000..1a80944041 --- /dev/null +++ b/src/anomalib/metrics/pimo/binary_classification_curve.py @@ -0,0 +1,334 @@ +"""Binary classification curve (numpy-only implementation). + +A binary classification (binclf) matrix (TP, FP, FN, TN) is evaluated at multiple thresholds. + +The thresholds are shared by all instances/images, but their binclf are computed independently for each instance/image. +""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import itertools +import logging +from enum import Enum +from functools import partial + +import numpy as np +import torch + +from . import _validate + +logger = logging.getLogger(__name__) + + +class ThresholdMethod(Enum): + """Sequence of thresholds to use.""" + + GIVEN: str = "given" + MINMAX_LINSPACE: str = "minmax-linspace" + MEAN_FPR_OPTIMIZED: str = "mean-fpr-optimized" + + +def _binary_classification_curve(scores: np.ndarray, gts: np.ndarray, thresholds: np.ndarray) -> np.ndarray: + """One binary classification matrix at each threshold. + + In the case where the thresholds are given (i.e. not considering all possible thresholds based on the scores), + this weird-looking function is faster than the two options in `torchmetrics` on the CPU: + - `_binary_precision_recall_curve_update_vectorized` + - `_binary_precision_recall_curve_update_loop` + (both in module `torchmetrics.functional.classification.precision_recall_curve` in `torchmetrics==1.1.0`). + Note: VALIDATION IS NOT DONE HERE. Make sure to validate the arguments before calling this function. + + Args: + scores (np.ndarray): Anomaly scores (D,). + gts (np.ndarray): Binary (bool) ground truth of shape (D,). + thresholds (np.ndarray): Sequence of thresholds in ascending order (K,). + + Returns: + np.ndarray: Binary classification matrix curve (K, 2, 2) + Details: `anomalib.metrics.per_image.binclf_curve_numpy.binclf_multiple_curves`. + """ + num_th = len(thresholds) + + # POSITIVES + scores_positives = scores[gts] + # the sorting is very important for the algorithm to work and the speedup + scores_positives = np.sort(scores_positives) + # variable updated in the loop; start counting with lowest thresh ==> everything is predicted as positive + num_pos = current_count_tp = scores_positives.size + tps = np.empty((num_th,), dtype=np.int64) + + # NEGATIVES + # same thing but for the negative samples + scores_negatives = scores[~gts] + scores_negatives = np.sort(scores_negatives) + num_neg = current_count_fp = scores_negatives.size + fps = np.empty((num_th,), dtype=np.int64) + + def score_less_than_thresh(score: float, thresh: float) -> bool: + return score < thresh + + # it will progressively drop the scores that are below the current thresh + for thresh_idx, thresh in enumerate(thresholds): + # UPDATE POSITIVES + # < becasue it is the same as ~(>=) + num_drop = sum(1 for _ in itertools.takewhile(partial(score_less_than_thresh, thresh=thresh), scores_positives)) + scores_positives = scores_positives[num_drop:] + current_count_tp -= num_drop + tps[thresh_idx] = current_count_tp + + # UPDATE NEGATIVES + # same with the negatives + num_drop = sum(1 for _ in itertools.takewhile(partial(score_less_than_thresh, thresh=thresh), scores_negatives)) + scores_negatives = scores_negatives[num_drop:] + current_count_fp -= num_drop + fps[thresh_idx] = current_count_fp + + # deduce the rest of the matrix counts + fns = num_pos * np.ones((num_th,), dtype=np.int64) - tps + tns = num_neg * np.ones((num_th,), dtype=np.int64) - fps + + # sequence of dimensions is (thresholds, true class, predicted class) (see docstring) + return np.stack( + [ + np.stack([tns, fps], axis=-1), + np.stack([fns, tps], axis=-1), + ], + axis=-1, + ).transpose(0, 2, 1) + + +def binary_classification_curve( + scores_batch: torch.Tensor, + gts_batch: torch.Tensor, + thresholds: torch.Tensor, +) -> torch.Tensor: + """Returns a binary classification matrix at each threshold for each image in the batch. + + This is a wrapper around `_binary_classification_curve`. + Validation of the arguments is done here (not in the actual implementation functions). + + Note: predicted as positive condition is `score >= thresh`. + + Args: + scores_batch (torch.Tensor): Anomaly scores (N, D,). + gts_batch (torch.Tensor): Binary (bool) ground truth of shape (N, D,). + thresholds (torch.Tensor): Sequence of thresholds in ascending order (K,). + + Returns: + torch.Tensor: Binary classification matrix curves (N, K, 2, 2) + + The last two dimensions are the confusion matrix (ground truth, predictions) + So for each thresh it gives: + - `tp`: `[... , 1, 1]` + - `fp`: `[... , 0, 1]` + - `fn`: `[... , 1, 0]` + - `tn`: `[... , 0, 0]` + + `t` is for `true` and `f` is for `false`, `p` is for `positive` and `n` is for `negative`, so: + - `tp` stands for `true positive` + - `fp` stands for `false positive` + - `fn` stands for `false negative` + - `tn` stands for `true negative` + + The numbers in each confusion matrix are the counts (not the ratios). + + Counts are relative to each instance (i.e. from 0 to D, e.g. the total is the number of pixels in the image). + + Thresholds are shared across all instances, so all confusion matrices, for instance, + at position [:, 0, :, :] are relative to the 1st threshold in `thresholds`. + + Thresholds are sorted in ascending order. + """ + _validate.is_scores_batch(scores_batch) + _validate.is_gts_batch(gts_batch) + _validate.is_same_shape(scores_batch, gts_batch) + _validate.is_valid_threshold(thresholds) + # TODO(ashwinvaidya17): this is kept as numpy for now because it is much faster. + # TEMP-0 + result = np.vectorize(_binary_classification_curve, signature="(n),(n),(k)->(k,2,2)")( + scores_batch.detach().cpu().numpy(), + gts_batch.detach().cpu().numpy(), + thresholds.detach().cpu().numpy(), + ) + return torch.from_numpy(result).to(scores_batch.device) + + +def _get_linspaced_thresholds(anomaly_maps: torch.Tensor, num_thresholds: int) -> torch.Tensor: + """Get thresholds linearly spaced between the min and max of the anomaly maps.""" + _validate.is_num_thresholds_gte2(num_thresholds) + # this operation can be a bit expensive + thresh_low, thresh_high = thresh_bounds = (anomaly_maps.min().item(), anomaly_maps.max().item()) + try: + _validate.validate_threshold_bounds(thresh_bounds) + except ValueError as ex: + msg = f"Invalid threshold bounds computed from the given anomaly maps. Cause: {ex}" + raise ValueError(msg) from ex + return torch.linspace(thresh_low, thresh_high, num_thresholds, dtype=anomaly_maps.dtype) + + +def threshold_and_binary_classification_curve( + anomaly_maps: torch.Tensor, + masks: torch.Tensor, + threshold_choice: ThresholdMethod | str = ThresholdMethod.MINMAX_LINSPACE, + thresholds: torch.Tensor | None = None, + num_thresholds: int | None = None, +) -> tuple[torch.Tensor, torch.Tensor]: + """Return thresholds and binary classification matrix at each threshold for each image in the batch. + + Args: + anomaly_maps (torch.Tensor): Anomaly score maps of shape (N, H, W) + masks (torch.Tensor): Binary ground truth masks of shape (N, H, W) + threshold_choice (str, optional): Sequence of thresholds to use. Defaults to THRESH_SEQUENCE_MINMAX_LINSPACE. + thresholds (torch.Tensor, optional): Sequence of thresholds to use. + Only applicable when threshold_choice is THRESH_SEQUENCE_GIVEN. + num_thresholds (int, optional): Number of thresholds between the min and max of the anomaly maps. + Only applicable when threshold_choice is THRESH_SEQUENCE_MINMAX_LINSPACE. + + Returns: + tuple[torch.Tensor, torch.Tensor]: + [0] Thresholds of shape (K,) and dtype is the same as `anomaly_maps.dtype`. + + [1] Binary classification matrices of shape (N, K, 2, 2) + + N: number of images/instances + K: number of thresholds + + The last two dimensions are the confusion matrix (ground truth, predictions) + So for each thresh it gives: + - `tp`: `[... , 1, 1]` + - `fp`: `[... , 0, 1]` + - `fn`: `[... , 1, 0]` + - `tn`: `[... , 0, 0]` + + `t` is for `true` and `f` is for `false`, `p` is for `positive` and `n` is for `negative`, so: + - `tp` stands for `true positive` + - `fp` stands for `false positive` + - `fn` stands for `false negative` + - `tn` stands for `true negative` + + The numbers in each confusion matrix are the counts of pixels in the image (not the ratios). + + Thresholds are shared across all images, so all confusion matrices, for instance, + at position [:, 0, :, :] are relative to the 1st threshold in `thresholds`. + + Thresholds are sorted in ascending order. + """ + threshold_choice = ThresholdMethod(threshold_choice) + _validate.is_anomaly_maps(anomaly_maps) + _validate.is_masks(masks) + _validate.is_same_shape(anomaly_maps, masks) + + if threshold_choice == ThresholdMethod.GIVEN: + assert thresholds is not None + _validate.is_valid_threshold(thresholds) + if num_thresholds is not None: + logger.warning( + "Argument `num_thresholds` was given, " + f"but it is ignored because `thresholds_choice` is '{threshold_choice.value}'.", + ) + thresholds = thresholds.to(anomaly_maps.dtype) + + elif threshold_choice == ThresholdMethod.MINMAX_LINSPACE: + assert num_thresholds is not None + if thresholds is not None: + logger.warning( + "Argument `thresholds_given` was given, " + f"but it is ignored because `thresholds_choice` is '{threshold_choice.value}'.", + ) + # `num_thresholds` is validated in the function below + thresholds = _get_linspaced_thresholds(anomaly_maps, num_thresholds) + + elif threshold_choice == ThresholdMethod.MEAN_FPR_OPTIMIZED: + raise NotImplementedError(f"TODO implement {threshold_choice.value}") # noqa: EM102 + + else: + msg = ( + f"Expected `threshs_choice` to be from {list(ThresholdMethod.__members__)}," + f" but got '{threshold_choice.value}'" + ) + raise NotImplementedError(msg) + + # keep the batch dimension and flatten the rest + scores_batch = anomaly_maps.reshape(anomaly_maps.shape[0], -1) + gts_batch = masks.reshape(masks.shape[0], -1).to(bool) # make sure it is boolean + + binclf_curves = binary_classification_curve(scores_batch, gts_batch, thresholds) + + num_images = anomaly_maps.shape[0] + + try: + _validate.is_binclf_curves(binclf_curves, valid_thresholds=thresholds) + + # these two validations cannot be done in `_validate.binclf_curves` because it does not have access to the + # original shapes of `anomaly_maps` + if binclf_curves.shape[0] != num_images: + msg = ( + "Expected `binclf_curves` to have the same number of images as `anomaly_maps`, " + f"but got {binclf_curves.shape[0]} and {anomaly_maps.shape[0]}" + ) + raise RuntimeError(msg) + + except (TypeError, ValueError) as ex: + msg = f"Invalid `binclf_curves` was computed. Cause: {ex}" + raise RuntimeError(msg) from ex + + return thresholds, binclf_curves + + +def per_image_tpr(binclf_curves: torch.Tensor) -> torch.Tensor: + """True positive rates (TPR) for image for each thresh. + + TPR = TP / P = TP / (TP + FN) + + TP: true positives + FM: false negatives + P: positives (TP + FN) + + Args: + binclf_curves (torch.Tensor): Binary classification matrix curves (N, K, 2, 2). See `per_image_binclf_curve`. + + Returns: + torch.Tensor: shape (N, K), dtype float64 + N: number of images + K: number of thresholds + + Thresholds are sorted in ascending order, so TPR is in descending order. + """ + # shape: (num images, num thresholds) + tps = binclf_curves[..., 1, 1] + pos = binclf_curves[..., 1, :].sum(axis=2) # 2 was the 3 originally + + # tprs will be nan if pos == 0 (normal image), which is expected + return tps.to(torch.float64) / pos.to(torch.float64) + + +def per_image_fpr(binclf_curves: torch.Tensor) -> torch.Tensor: + """False positive rates (TPR) for image for each thresh. + + FPR = FP / N = FP / (FP + TN) + + FP: false positives + TN: true negatives + N: negatives (FP + TN) + + Args: + binclf_curves (torch.Tensor): Binary classification matrix curves (N, K, 2, 2). See `per_image_binclf_curve`. + + Returns: + torch.Tensor: shape (N, K), dtype float64 + N: number of images + K: number of thresholds + + Thresholds are sorted in ascending order, so FPR is in descending order. + """ + # shape: (num images, num thresholds) + fps = binclf_curves[..., 0, 1] + neg = binclf_curves[..., 0, :].sum(axis=2) # 2 was the 3 originally + + # it can be `nan` if an anomalous image is fully covered by the mask + return fps.to(torch.float64) / neg.to(torch.float64) diff --git a/src/anomalib/metrics/pimo/dataclasses.py b/src/anomalib/metrics/pimo/dataclasses.py new file mode 100644 index 0000000000..0c5aeb025d --- /dev/null +++ b/src/anomalib/metrics/pimo/dataclasses.py @@ -0,0 +1,226 @@ +"""Dataclasses for PIMO metrics.""" + +# Based on the code: https://github.com/jpcbertoldo/aupimo +# +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import dataclass, field + +import torch + +from . import _validate, functional + + +@dataclass +class PIMOResult: + """Per-Image Overlap (PIMO, pronounced pee-mo) curve. + + This interface gathers the PIMO curve data and metadata and provides several utility methods. + + Notation: + - N: number of images + - K: number of thresholds + - FPR: False Positive Rate + - TPR: True Positive Rate + + Attributes: + thresholds (torch.Tensor): sequence of K (monotonically increasing) thresholds used to compute the PIMO curve + shared_fpr (torch.Tensor): K values of the shared FPR metric at the corresponding thresholds + per_image_tprs (torch.Tensor): for each of the N images, the K values of in-image TPR at the corresponding + thresholds + """ + + # data + thresholds: torch.Tensor = field(repr=False) # shape => (K,) + shared_fpr: torch.Tensor = field(repr=False) # shape => (K,) + per_image_tprs: torch.Tensor = field(repr=False) # shape => (N, K) + + @property + def num_threshsholds(self) -> int: + """Number of thresholds.""" + return self.thresholds.shape[0] + + @property + def num_images(self) -> int: + """Number of images.""" + return self.per_image_tprs.shape[0] + + @property + def image_classes(self) -> torch.Tensor: + """Image classes (0: normal, 1: anomalous). + + Deduced from the per-image TPRs. + If any TPR value is not NaN, the image is considered anomalous. + """ + return (~torch.isnan(self.per_image_tprs)).any(dim=1).to(torch.int32) + + def __post_init__(self) -> None: + """Validate the inputs for the result object are consistent.""" + try: + _validate.is_valid_threshold(self.thresholds) + _validate.is_rate_curve(self.shared_fpr, nan_allowed=False, decreasing=True) # is_shared_apr + _validate.is_per_image_tprs(self.per_image_tprs, self.image_classes) + + except (TypeError, ValueError) as ex: + msg = f"Invalid inputs for {self.__class__.__name__} object. Cause: {ex}." + raise TypeError(msg) from ex + + if self.thresholds.shape != self.shared_fpr.shape: + msg = ( + f"Invalid {self.__class__.__name__} object. Attributes have inconsistent shapes: " + f"{self.thresholds.shape=} != {self.shared_fpr.shape=}." + ) + raise TypeError(msg) + + if self.thresholds.shape[0] != self.per_image_tprs.shape[1]: + msg = ( + f"Invalid {self.__class__.__name__} object. Attributes have inconsistent shapes: " + f"{self.thresholds.shape[0]=} != {self.per_image_tprs.shape[1]=}." + ) + raise TypeError(msg) + + def thresh_at(self, fpr_level: float) -> tuple[int, float, float]: + """Return the threshold at the given shared FPR. + + See `anomalib.metrics.per_image.pimo_numpy.thresh_at_shared_fpr_level` for details. + + Args: + fpr_level (float): shared FPR level + + Returns: + tuple[int, float, float]: + [0] index of the threshold + [1] threshold + [2] the actual shared FPR value at the returned threshold + """ + return functional.thresh_at_shared_fpr_level( + self.thresholds, + self.shared_fpr, + fpr_level, + ) + + +@dataclass +class AUPIMOResult: + """Area Under the Per-Image Overlap (AUPIMO, pronounced a-u-pee-mo) curve. + + This interface gathers the AUPIMO data and metadata and provides several utility methods. + + Attributes: + fpr_lower_bound (float): [metadata] LOWER bound of the FPR integration range + fpr_upper_bound (float): [metadata] UPPER bound of the FPR integration range + num_thresholds (int): [metadata] number of thresholds used to effectively compute AUPIMO; + should not be confused with the number of thresholds used to compute the PIMO curve + thresh_lower_bound (float): LOWER threshold bound --> corresponds to the UPPER FPR bound + thresh_upper_bound (float): UPPER threshold bound --> corresponds to the LOWER FPR bound + aupimos (torch.Tensor): values of AUPIMO scores (1 per image) + """ + + # metadata + fpr_lower_bound: float + fpr_upper_bound: float + num_thresholds: int + + # data + thresh_lower_bound: float = field(repr=False) + thresh_upper_bound: float = field(repr=False) + aupimos: torch.Tensor = field(repr=False) # shape => (N,) + + @property + def num_images(self) -> int: + """Number of images.""" + return self.aupimos.shape[0] + + @property + def num_normal_images(self) -> int: + """Number of normal images.""" + return int((self.image_classes == 0).sum()) + + @property + def num_anomalous_images(self) -> int: + """Number of anomalous images.""" + return int((self.image_classes == 1).sum()) + + @property + def image_classes(self) -> torch.Tensor: + """Image classes (0: normal, 1: anomalous).""" + # if an instance has `nan` aupimo it's because it's a normal image + return self.aupimos.isnan().to(torch.int32) + + @property + def fpr_bounds(self) -> tuple[float, float]: + """Lower and upper bounds of the FPR integration range.""" + return self.fpr_lower_bound, self.fpr_upper_bound + + @property + def thresh_bounds(self) -> tuple[float, float]: + """Lower and upper bounds of the threshold integration range. + + Recall: they correspond to the FPR bounds in reverse order. + I.e.: + fpr_lower_bound --> thresh_upper_bound + fpr_upper_bound --> thresh_lower_bound + """ + return self.thresh_lower_bound, self.thresh_upper_bound + + def __post_init__(self) -> None: + """Validate the inputs for the result object are consistent.""" + try: + _validate.is_rate_range((self.fpr_lower_bound, self.fpr_upper_bound)) + # TODO(jpcbertoldo): warn when it's too low (use parameters from the numpy code) # noqa: TD003 + _validate.is_num_thresholds_gte2(self.num_thresholds) + _validate.is_rates(self.aupimos, nan_allowed=True) # validate is_aupimos + + _validate.validate_threshold_bounds((self.thresh_lower_bound, self.thresh_upper_bound)) + + except (TypeError, ValueError) as ex: + msg = f"Invalid inputs for {self.__class__.__name__} object. Cause: {ex}." + raise TypeError(msg) from ex + + @classmethod + def from_pimo_result( + cls: type["AUPIMOResult"], + pimo_result: PIMOResult, + fpr_bounds: tuple[float, float], + num_thresholds_auc: int, + aupimos: torch.Tensor, + ) -> "AUPIMOResult": + """Return an AUPIMO result object from a PIMO result object. + + Args: + pimo_result: PIMO result object + fpr_bounds: lower and upper bounds of the FPR integration range + num_thresholds_auc: number of thresholds used to effectively compute AUPIMO; + NOT the number of thresholds used to compute the PIMO curve! + aupimos: AUPIMO scores + paths: paths to the source images to which the AUPIMO scores correspond. + """ + if pimo_result.per_image_tprs.shape[0] != aupimos.shape[0]: + msg = ( + f"Invalid {cls.__name__} object. Attributes have inconsistent shapes: " + f"there are {pimo_result.per_image_tprs.shape[0]} PIMO curves but {aupimos.shape[0]} AUPIMO scores." + ) + raise TypeError(msg) + + if not torch.isnan(aupimos[pimo_result.image_classes == 0]).all(): + msg = "Expected all normal images to have NaN AUPIMOs, but some have non-NaN values." + raise TypeError(msg) + + if torch.isnan(aupimos[pimo_result.image_classes == 1]).any(): + msg = "Expected all anomalous images to have valid AUPIMOs (not nan), but some have NaN values." + raise TypeError(msg) + + fpr_lower_bound, fpr_upper_bound = fpr_bounds + # recall: fpr upper/lower bounds are the same as the thresh lower/upper bounds + _, thresh_lower_bound, __ = pimo_result.thresh_at(fpr_upper_bound) + _, thresh_upper_bound, __ = pimo_result.thresh_at(fpr_lower_bound) + # `_` is the threshold's index, `__` is the actual fpr value + return cls( + fpr_lower_bound=fpr_lower_bound, + fpr_upper_bound=fpr_upper_bound, + num_thresholds=num_thresholds_auc, + thresh_lower_bound=float(thresh_lower_bound), + thresh_upper_bound=float(thresh_upper_bound), + aupimos=aupimos, + ) diff --git a/src/anomalib/metrics/pimo/functional.py b/src/anomalib/metrics/pimo/functional.py new file mode 100644 index 0000000000..7eac07b1bd --- /dev/null +++ b/src/anomalib/metrics/pimo/functional.py @@ -0,0 +1,355 @@ +"""Per-Image Overlap curve (PIMO, pronounced pee-mo) and its area under the curve (AUPIMO). + +Details: `anomalib.metrics.per_image.pimo`. +""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging + +import numpy as np +import torch + +from . import _validate +from .binary_classification_curve import ( + ThresholdMethod, + _get_linspaced_thresholds, + per_image_fpr, + per_image_tpr, + threshold_and_binary_classification_curve, +) +from .utils import images_classes_from_masks + +logger = logging.getLogger(__name__) + + +def pimo_curves( + anomaly_maps: torch.Tensor, + masks: torch.Tensor, + num_thresholds: int, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Compute the Per-IMage Overlap (PIMO, pronounced pee-mo) curves. + + PIMO is a curve of True Positive Rate (TPR) values on each image across multiple anomaly score thresholds. + The anomaly score thresholds are indexed by a (cross-image shared) value of False Positive Rate (FPR) measure on + the normal images. + + Details: `anomalib.metrics.per_image.pimo`. + + Args' notation: + N: number of images + H: image height + W: image width + K: number of thresholds + + Args: + anomaly_maps: floating point anomaly score maps of shape (N, H, W) + masks: binary (bool or int) ground truth masks of shape (N, H, W) + num_thresholds: number of thresholds to compute (K) + + Returns: + tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + [0] thresholds of shape (K,) in ascending order + [1] shared FPR values of shape (K,) in descending order (indices correspond to the thresholds) + [2] per-image TPR curves of shape (N, K), axis 1 in descending order (indices correspond to the thresholds) + [3] image classes of shape (N,) with values 0 (normal) or 1 (anomalous) + """ + # validate the strings are valid + _validate.is_num_thresholds_gte2(num_thresholds) + _validate.is_anomaly_maps(anomaly_maps) + _validate.is_masks(masks) + _validate.is_same_shape(anomaly_maps, masks) + _validate.has_at_least_one_anomalous_image(masks) + _validate.has_at_least_one_normal_image(masks) + + image_classes = images_classes_from_masks(masks) + + # the thresholds are computed here so that they can be restrained to the normal images + # therefore getting a better resolution in terms of FPR quantization + # otherwise the function `binclf_curve_numpy.per_image_binclf_curve` would have the range of thresholds + # computed from all the images (normal + anomalous) + thresholds = _get_linspaced_thresholds( + anomaly_maps[image_classes == 0], + num_thresholds, + ) + + # N: number of images, K: number of thresholds + # shapes are (K,) and (N, K, 2, 2) + thresholds, binclf_curves = threshold_and_binary_classification_curve( + anomaly_maps=anomaly_maps, + masks=masks, + threshold_choice=ThresholdMethod.GIVEN.value, + thresholds=thresholds, + num_thresholds=None, + ) + + shared_fpr: torch.Tensor + # mean-per-image-fpr on normal images + # shape -> (N, K) + per_image_fprs_normals = per_image_fpr(binclf_curves[image_classes == 0]) + try: + _validate.is_per_image_rate_curves(per_image_fprs_normals, nan_allowed=False, decreasing=True) + except ValueError as ex: + msg = f"Cannot compute PIMO because the per-image FPR curves from normal images are invalid. Cause: {ex}" + raise RuntimeError(msg) from ex + + # shape -> (K,) + # this is the only shared FPR metric implemented so far, + # see note about shared FPR in Details: `anomalib.metrics.per_image.pimo`. + shared_fpr = per_image_fprs_normals.mean(axis=0) + + # shape -> (N, K) + per_image_tprs = per_image_tpr(binclf_curves) + + return thresholds, shared_fpr, per_image_tprs, image_classes + + +# =========================================== AUPIMO =========================================== + + +def aupimo_scores( + anomaly_maps: torch.Tensor, + masks: torch.Tensor, + num_thresholds: int = 300_000, + fpr_bounds: tuple[float, float] = (1e-5, 1e-4), + force: bool = False, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, int]: + """Compute the PIMO curves and their Area Under the Curve (i.e. AUPIMO) scores. + + Scores are computed from the integration of the PIMO curves within the given FPR bounds, then normalized to [0, 1]. + It can be thought of as the average TPR of the PIMO curves within the given FPR bounds. + + Details: `anomalib.metrics.per_image.pimo`. + + Args' notation: + N: number of images + H: image height + W: image width + K: number of thresholds + + Args: + anomaly_maps: floating point anomaly score maps of shape (N, H, W) + masks: binary (bool or int) ground truth masks of shape (N, H, W) + num_thresholds: number of thresholds to compute (K) + fpr_bounds: lower and upper bounds of the FPR integration range + force: whether to force the computation despite bad conditions + + Returns: + tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + [0] thresholds of shape (K,) in ascending order + [1] shared FPR values of shape (K,) in descending order (indices correspond to the thresholds) + [2] per-image TPR curves of shape (N, K), axis 1 in descending order (indices correspond to the thresholds) + [3] image classes of shape (N,) with values 0 (normal) or 1 (anomalous) + [4] AUPIMO scores of shape (N,) in [0, 1] + [5] number of points used in the AUC integration + """ + _validate.is_rate_range(fpr_bounds) + + # other validations are done in the `pimo` function + thresholds, shared_fpr, per_image_tprs, image_classes = pimo_curves( + anomaly_maps=anomaly_maps, + masks=masks, + num_thresholds=num_thresholds, + ) + try: + _validate.is_valid_threshold(thresholds) + _validate.is_rate_curve(shared_fpr, nan_allowed=False, decreasing=True) + _validate.is_images_classes(image_classes) + _validate.is_per_image_rate_curves(per_image_tprs[image_classes == 1], nan_allowed=False, decreasing=True) + + except ValueError as ex: + msg = f"Cannot compute AUPIMO because the PIMO curves are invalid. Cause: {ex}" + raise RuntimeError(msg) from ex + + fpr_lower_bound, fpr_upper_bound = fpr_bounds + + # get the threshold indices where the fpr bounds are achieved + fpr_lower_bound_thresh_idx, _, fpr_lower_bound_defacto = thresh_at_shared_fpr_level( + thresholds, + shared_fpr, + fpr_lower_bound, + ) + fpr_upper_bound_thresh_idx, _, fpr_upper_bound_defacto = thresh_at_shared_fpr_level( + thresholds, + shared_fpr, + fpr_upper_bound, + ) + + if not torch.isclose( + fpr_lower_bound_defacto, + torch.tensor(fpr_lower_bound, dtype=fpr_lower_bound_defacto.dtype, device=fpr_lower_bound_defacto.device), + rtol=(rtol := 1e-2), + ): + logger.warning( + "The lower bound of the shared FPR integration range is not exactly achieved. " + f"Expected {fpr_lower_bound} but got {fpr_lower_bound_defacto}, which is not within {rtol=}.", + ) + + if not torch.isclose( + fpr_upper_bound_defacto, + torch.tensor(fpr_upper_bound, dtype=fpr_upper_bound_defacto.dtype, device=fpr_upper_bound_defacto.device), + rtol=rtol, + ): + logger.warning( + "The upper bound of the shared FPR integration range is not exactly achieved. " + f"Expected {fpr_upper_bound} but got {fpr_upper_bound_defacto}, which is not within {rtol=}.", + ) + + # reminder: fpr lower/upper bound is threshold upper/lower bound (reversed) + thresh_lower_bound_idx = fpr_upper_bound_thresh_idx + thresh_upper_bound_idx = fpr_lower_bound_thresh_idx + + # deal with edge cases + if thresh_lower_bound_idx >= thresh_upper_bound_idx: + msg = ( + "The thresholds corresponding to the given `fpr_bounds` are not valid because " + "they matched the same threshold or the are in the wrong order. " + f"FPR upper/lower = threshold lower/upper = {thresh_lower_bound_idx} and {thresh_upper_bound_idx}." + ) + raise RuntimeError(msg) + + # limit the curves to the integration range [lbound, ubound] + shared_fpr_bounded: torch.Tensor = shared_fpr[thresh_lower_bound_idx : (thresh_upper_bound_idx + 1)] + per_image_tprs_bounded: torch.Tensor = per_image_tprs[:, thresh_lower_bound_idx : (thresh_upper_bound_idx + 1)] + + # `shared_fpr` and `tprs` are in descending order; `flip()` reverts to ascending order + shared_fpr_bounded = torch.flip(shared_fpr_bounded, dims=[0]) + per_image_tprs_bounded = torch.flip(per_image_tprs_bounded, dims=[1]) + + # the log's base does not matter because it's a constant factor canceled by normalization factor + shared_fpr_bounded_log = torch.log(shared_fpr_bounded) + + # deal with edge cases + invalid_shared_fpr = ~torch.isfinite(shared_fpr_bounded_log) + + if invalid_shared_fpr.all(): + msg = ( + "Cannot compute AUPIMO because the shared fpr integration range is invalid). " + "Try increasing the number of thresholds." + ) + raise RuntimeError(msg) + + if invalid_shared_fpr.any(): + logger.warning( + "Some values in the shared fpr integration range are nan. " + "The AUPIMO will be computed without these values.", + ) + + # get rid of nan values by removing them from the integration range + shared_fpr_bounded_log = shared_fpr_bounded_log[~invalid_shared_fpr] + per_image_tprs_bounded = per_image_tprs_bounded[:, ~invalid_shared_fpr] + + num_points_integral = int(shared_fpr_bounded_log.shape[0]) + + if num_points_integral <= 30: + msg = ( + "Cannot compute AUPIMO because the shared fpr integration range doesn't have enough points. " + f"Found {num_points_integral} points in the integration range. " + "Try increasing `num_thresholds`." + ) + if not force: + raise RuntimeError(msg) + msg += " Computation was forced!" + logger.warning(msg) + + if num_points_integral < 300: + logger.warning( + "The AUPIMO may be inaccurate because the shared fpr integration range doesn't have enough points. " + f"Found {num_points_integral} points in the integration range. " + "Try increasing `num_thresholds`.", + ) + + aucs: torch.Tensor = torch.trapezoid(per_image_tprs_bounded, x=shared_fpr_bounded_log, axis=1) + + # normalize, then clip(0, 1) makes sure that the values are in [0, 1] in case of numerical errors + normalization_factor = aupimo_normalizing_factor(fpr_bounds) + aucs = (aucs / normalization_factor).clip(0, 1) + + return thresholds, shared_fpr, per_image_tprs, image_classes, aucs, num_points_integral + + +# =========================================== AUX =========================================== + + +def thresh_at_shared_fpr_level( + thresholds: torch.Tensor, + shared_fpr: torch.Tensor, + fpr_level: float, +) -> tuple[int, float, torch.Tensor]: + """Return the threshold and its index at the given shared FPR level. + + Three cases are possible: + - fpr_level == 0: the lowest threshold that achieves 0 FPR is returned + - fpr_level == 1: the highest threshold that achieves 1 FPR is returned + - 0 < fpr_level < 1: the threshold that achieves the closest (higher or lower) FPR to `fpr_level` is returned + + Args: + thresholds: thresholds at which the shared FPR was computed. + shared_fpr: shared FPR values. + fpr_level: shared FPR value at which to get the threshold. + + Returns: + tuple[int, float, float]: + [0] index of the threshold + [1] threshold + [2] the actual shared FPR value at the returned threshold + """ + _validate.is_valid_threshold(thresholds) + _validate.is_rate_curve(shared_fpr, nan_allowed=False, decreasing=True) + _validate.joint_validate_thresholds_shared_fpr(thresholds, shared_fpr) + _validate.is_rate(fpr_level, zero_ok=True, one_ok=True) + + shared_fpr_min, shared_fpr_max = shared_fpr.min(), shared_fpr.max() + + if fpr_level < shared_fpr_min: + msg = ( + "Invalid `fpr_level` because it's out of the range of `shared_fpr` = " + f"[{shared_fpr_min}, {shared_fpr_max}], and got {fpr_level}." + ) + raise ValueError(msg) + + if fpr_level > shared_fpr_max: + msg = ( + "Invalid `fpr_level` because it's out of the range of `shared_fpr` = " + f"[{shared_fpr_min}, {shared_fpr_max}], and got {fpr_level}." + ) + raise ValueError(msg) + + # fpr_level == 0 or 1 are special case + # because there may be multiple solutions, and the chosen should their MINIMUM/MAXIMUM respectively + if fpr_level == 0.0: + index = torch.min(torch.where(shared_fpr == fpr_level)[0]) + + elif fpr_level == 1.0: + index = torch.max(torch.where(shared_fpr == fpr_level)[0]) + + else: + index = torch.argmin(torch.abs(shared_fpr - fpr_level)) + + index = int(index) + fpr_level_defacto = shared_fpr[index] + thresh = thresholds[index] + return index, thresh, fpr_level_defacto + + +def aupimo_normalizing_factor(fpr_bounds: tuple[float, float]) -> float: + """Constant that normalizes the AUPIMO integral to 0-1 range. + + It is the maximum possible value from the integral in AUPIMO's definition. + It corresponds to assuming a constant function T_i: thresh --> 1. + + Args: + fpr_bounds: lower and upper bounds of the FPR integration range. + + Returns: + float: the normalization factor (>0). + """ + _validate.is_rate_range(fpr_bounds) + fpr_lower_bound, fpr_upper_bound = fpr_bounds + # the log's base must be the same as the one used in the integration! + return float(np.log(fpr_upper_bound / fpr_lower_bound)) diff --git a/src/anomalib/metrics/pimo/pimo.py b/src/anomalib/metrics/pimo/pimo.py new file mode 100644 index 0000000000..9703b60b59 --- /dev/null +++ b/src/anomalib/metrics/pimo/pimo.py @@ -0,0 +1,296 @@ +"""Per-Image Overlap curve (PIMO, pronounced pee-mo) and its area under the curve (AUPIMO). + +# PIMO + +PIMO is a curve of True Positive Rate (TPR) values on each image across multiple anomaly score thresholds. +The anomaly score thresholds are indexed by a (shared) valued of False Positive Rate (FPR) measure on the normal images. + +Each *anomalous* image has its own curve such that the X-axis is shared by all of them. + +At a given threshold: + X-axis: Shared FPR (may vary) + 1. Log of the Average of per-image FPR on normal images. + SEE NOTE BELOW. + Y-axis: per-image TP Rate (TPR), or "Overlap" between the ground truth and the predicted masks. + +*** Note about other shared FPR alternatives *** +The shared FPR metric can be made harder by using the cross-image max (or high-percentile) FPRs instead of the mean. +Rationale: this will further punish models that have exceptional FPs in normal images. +So far there is only one shared FPR metric implemented but others will be added in the future. + +# AUPIMO + +`AUPIMO` is the area under each `PIMO` curve with bounded integration range in terms of shared FPR. + +# Disclaimer + +This module implements torch interfaces to access the numpy code in `pimo_numpy.py`. +Tensors are converted to numpy arrays and then passed and validated in the numpy code. +The results are converted back to tensors and eventually wrapped in an dataclass object. + +Validations will preferably happen in ndarray so the numpy code can be reused without torch, +so often times the Tensor arguments will be converted to ndarray and then validated. +""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging + +import torch +from torchmetrics import Metric + +from . import _validate, functional +from .dataclasses import AUPIMOResult, PIMOResult + +logger = logging.getLogger(__name__) + + +class PIMO(Metric): + """Per-IMage Overlap (PIMO, pronounced pee-mo) curves. + + This torchmetrics interface is a wrapper around the functional interface, which is a wrapper around the numpy code. + The tensors are converted to numpy arrays and then passed and validated in the numpy code. + The results are converted back to tensors and wrapped in an dataclass object. + + PIMO is a curve of True Positive Rate (TPR) values on each image across multiple anomaly score thresholds. + The anomaly score thresholds are indexed by a (cross-image shared) value of False Positive Rate (FPR) measure on + the normal images. + + Details: `anomalib.metrics.per_image.pimo`. + + Notation: + N: number of images + H: image height + W: image width + K: number of thresholds + + Attributes: + anomaly_maps: floating point anomaly score maps of shape (N, H, W) + masks: binary (bool or int) ground truth masks of shape (N, H, W) + + Args: + num_thresholds: number of thresholds to compute (K) + binclf_algorithm: algorithm to compute the binary classifier curve (see `binclf_curve_numpy.Algorithm`) + + Returns: + PIMOResult: PIMO curves dataclass object. See `PIMOResult` for details. + """ + + is_differentiable: bool = False + higher_is_better: bool | None = None + full_state_update: bool = False + + num_thresholds: int + binclf_algorithm: str + + anomaly_maps: list[torch.Tensor] + masks: list[torch.Tensor] + + @property + def _is_empty(self) -> bool: + """Return True if the metric has not been updated yet.""" + return len(self.anomaly_maps) == 0 + + @property + def num_images(self) -> int: + """Number of images.""" + return sum(am.shape[0] for am in self.anomaly_maps) + + @property + def image_classes(self) -> torch.Tensor: + """Image classes (0: normal, 1: anomalous).""" + return functional.images_classes_from_masks(self.masks) + + def __init__(self, num_thresholds: int) -> None: + """Per-Image Overlap (PIMO) curve. + + Args: + num_thresholds: number of thresholds used to compute the PIMO curve (K) + """ + super().__init__() + + logger.warning( + f"Metric `{self.__class__.__name__}` will save all targets and predictions in buffer." + " For large datasets this may lead to large memory footprint.", + ) + + # the options below are, redundantly, validated here to avoid reaching + # an error later in the execution + + _validate.is_num_thresholds_gte2(num_thresholds) + self.num_thresholds = num_thresholds + + self.add_state("anomaly_maps", default=[], dist_reduce_fx="cat") + self.add_state("masks", default=[], dist_reduce_fx="cat") + + def update(self, anomaly_maps: torch.Tensor, masks: torch.Tensor) -> None: + """Update lists of anomaly maps and masks. + + Args: + anomaly_maps (torch.Tensor): predictions of the model (ndim == 2, float) + masks (torch.Tensor): ground truth masks (ndim == 2, binary) + """ + _validate.is_anomaly_maps(anomaly_maps) + _validate.is_masks(masks) + _validate.is_same_shape(anomaly_maps, masks) + self.anomaly_maps.append(anomaly_maps) + self.masks.append(masks) + + def compute(self) -> PIMOResult: + """Compute the PIMO curves. + + Call the functional interface `pimo_curves()`, which is a wrapper around the numpy code. + + Returns: + PIMOResult: PIMO curves dataclass object. See `PIMOResult` for details. + """ + if self._is_empty: + msg = "No anomaly maps and masks have been added yet. Please call `update()` first." + raise RuntimeError(msg) + anomaly_maps = torch.concat(self.anomaly_maps, dim=0) + masks = torch.concat(self.masks, dim=0) + + thresholds, shared_fpr, per_image_tprs, _ = functional.pimo_curves( + anomaly_maps, + masks, + self.num_thresholds, + ) + return PIMOResult( + thresholds=thresholds, + shared_fpr=shared_fpr, + per_image_tprs=per_image_tprs, + ) + + +class AUPIMO(PIMO): + """Area Under the Per-Image Overlap (PIMO) curve. + + This torchmetrics interface is a wrapper around the functional interface, which is a wrapper around the numpy code. + The tensors are converted to numpy arrays and then passed and validated in the numpy code. + The results are converted back to tensors and wrapped in an dataclass object. + + Scores are computed from the integration of the PIMO curves within the given FPR bounds, then normalized to [0, 1]. + It can be thought of as the average TPR of the PIMO curves within the given FPR bounds. + + Details: `anomalib.metrics.per_image.pimo`. + + Notation: + N: number of images + H: image height + W: image width + K: number of thresholds + + Attributes: + anomaly_maps: floating point anomaly score maps of shape (N, H, W) + masks: binary (bool or int) ground truth masks of shape (N, H, W) + + Args: + num_thresholds: number of thresholds to compute (K) + fpr_bounds: lower and upper bounds of the FPR integration range + force: whether to force the computation despite bad conditions + + Returns: + tuple[PIMOResult, AUPIMOResult]: PIMO and AUPIMO results dataclass objects. See `PIMOResult` and `AUPIMOResult`. + """ + + fpr_bounds: tuple[float, float] + return_average: bool + force: bool + + @staticmethod + def normalizing_factor(fpr_bounds: tuple[float, float]) -> float: + """Constant that normalizes the AUPIMO integral to 0-1 range. + + It is the maximum possible value from the integral in AUPIMO's definition. + It corresponds to assuming a constant function T_i: thresh --> 1. + + Args: + fpr_bounds: lower and upper bounds of the FPR integration range. + + Returns: + float: the normalization factor (>0). + """ + return functional.aupimo_normalizing_factor(fpr_bounds) + + def __repr__(self) -> str: + """Show the metric name and its integration bounds.""" + lower, upper = self.fpr_bounds + return f"{self.__class__.__name__}([{lower:.2g}, {upper:.2g}])" + + def __init__( + self, + num_thresholds: int = 300_000, + fpr_bounds: tuple[float, float] = (1e-5, 1e-4), + return_average: bool = True, + force: bool = False, + ) -> None: + """Area Under the Per-Image Overlap (PIMO) curve. + + Args: + num_thresholds: [passed to parent `PIMO`] number of thresholds used to compute the PIMO curve + fpr_bounds: lower and upper bounds of the FPR integration range + return_average: if True, return the average AUPIMO score; if False, return all the individual AUPIMO scores + force: if True, force the computation of the AUPIMO scores even in bad conditions (e.g. few points) + """ + super().__init__(num_thresholds=num_thresholds) + + # other validations are done in PIMO.__init__() + + _validate.is_rate_range(fpr_bounds) + self.fpr_bounds = fpr_bounds + self.return_average = return_average + self.force = force + + def compute(self, force: bool | None = None) -> tuple[PIMOResult, AUPIMOResult]: # type: ignore[override] + """Compute the PIMO curves and their Area Under the curve (AUPIMO) scores. + + Call the functional interface `aupimo_scores()`, which is a wrapper around the numpy code. + + Args: + force: if given (not None), override the `force` attribute. + + Returns: + tuple[PIMOResult, AUPIMOResult]: PIMO curves and AUPIMO scores dataclass objects. + See `PIMOResult` and `AUPIMOResult` for details. + """ + if self._is_empty: + msg = "No anomaly maps and masks have been added yet. Please call `update()` first." + raise RuntimeError(msg) + anomaly_maps = torch.concat(self.anomaly_maps, dim=0) + masks = torch.concat(self.masks, dim=0) + force = force if force is not None else self.force + + # other validations are done in the numpy code + + thresholds, shared_fpr, per_image_tprs, _, aupimos, num_thresholds_auc = functional.aupimo_scores( + anomaly_maps, + masks, + self.num_thresholds, + fpr_bounds=self.fpr_bounds, + force=force, + ) + + pimo_result = PIMOResult( + thresholds=thresholds, + shared_fpr=shared_fpr, + per_image_tprs=per_image_tprs, + ) + aupimo_result = AUPIMOResult.from_pimo_result( + pimo_result, + fpr_bounds=self.fpr_bounds, + # not `num_thresholds`! + # `num_thresholds` is the number of thresholds used to compute the PIMO curve + # this is the number of thresholds used to compute the AUPIMO integral + num_thresholds_auc=num_thresholds_auc, + aupimos=aupimos, + ) + if self.return_average: + # normal images have NaN AUPIMO scores + is_nan = torch.isnan(aupimo_result.aupimos) + return aupimo_result.aupimos[~is_nan].mean() + return pimo_result, aupimo_result diff --git a/src/anomalib/metrics/pimo/utils.py b/src/anomalib/metrics/pimo/utils.py new file mode 100644 index 0000000000..f0cac45657 --- /dev/null +++ b/src/anomalib/metrics/pimo/utils.py @@ -0,0 +1,19 @@ +"""Torch-oriented interfaces for `utils.py`.""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging + +import torch + +logger = logging.getLogger(__name__) + + +def images_classes_from_masks(masks: torch.Tensor) -> torch.Tensor: + """Deduce the image classes from the masks.""" + return (masks == 1).any(axis=(1, 2)).to(torch.int32) diff --git a/tests/unit/data/utils/test_path.py b/tests/unit/data/utils/test_path.py index c3f134b021..09f88496ad 100644 --- a/tests/unit/data/utils/test_path.py +++ b/tests/unit/data/utils/test_path.py @@ -76,3 +76,9 @@ def test_no_read_execute_permission() -> None: Path(tmp_dir).chmod(0o222) # Remove read and execute permission with pytest.raises(PermissionError, match=r"Read or execute permissions denied for the path:*"): validate_path(tmp_dir, base_dir=Path(tmp_dir)) + + @staticmethod + def test_file_wrongsuffix() -> None: + """Test ``validate_path`` raises ValueError for a file with wrong suffix.""" + with pytest.raises(ValueError, match="Path extension is not accepted."): + validate_path("file.png", should_exist=False, extensions=(".json", ".txt")) diff --git a/tests/unit/metrics/pimo/__init__.py b/tests/unit/metrics/pimo/__init__.py new file mode 100644 index 0000000000..555d67a102 --- /dev/null +++ b/tests/unit/metrics/pimo/__init__.py @@ -0,0 +1,8 @@ +"""Per-Image Metrics Tests.""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/metrics/pimo/test_binary_classification_curve.py b/tests/unit/metrics/pimo/test_binary_classification_curve.py new file mode 100644 index 0000000000..5459d08a14 --- /dev/null +++ b/tests/unit/metrics/pimo/test_binary_classification_curve.py @@ -0,0 +1,423 @@ +"""Tests for per-image binary classification curves using numpy version.""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# ruff: noqa: SLF001, PT011 + +import pytest +import torch + +from anomalib.metrics.pimo.binary_classification_curve import ( + _binary_classification_curve, + binary_classification_curve, + per_image_fpr, + per_image_tpr, + threshold_and_binary_classification_curve, +) + + +def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: + """Generate test cases.""" + pred = torch.arange(1, 5, dtype=torch.float32) + thresholds = torch.arange(1, 5, dtype=torch.float32) + + gt_norm = torch.zeros(4).to(bool) + gt_anom = torch.concatenate([torch.zeros(2), torch.ones(2)]).to(bool) + + # in the case where thresholds are all unique values in the predictions + expected_norm = torch.stack( + [ + torch.tensor([[0, 4], [0, 0]]), + torch.tensor([[1, 3], [0, 0]]), + torch.tensor([[2, 2], [0, 0]]), + torch.tensor([[3, 1], [0, 0]]), + ], + axis=0, + ).to(int) + expected_anom = torch.stack( + [ + torch.tensor([[0, 2], [0, 2]]), + torch.tensor([[1, 1], [0, 2]]), + torch.tensor([[2, 0], [0, 2]]), + torch.tensor([[2, 0], [1, 1]]), + ], + axis=0, + ).to(int) + + expected_tprs_norm = torch.tensor([torch.nan, torch.nan, torch.nan, torch.nan]) + expected_tprs_anom = torch.tensor([1.0, 1.0, 1.0, 0.5]) + expected_tprs = torch.stack([expected_tprs_anom, expected_tprs_norm], axis=0).to(torch.float64) + + expected_fprs_norm = torch.tensor([1.0, 0.75, 0.5, 0.25]) + expected_fprs_anom = torch.tensor([1.0, 0.5, 0.0, 0.0]) + expected_fprs = torch.stack([expected_fprs_anom, expected_fprs_norm], axis=0).to(torch.float64) + + # in the case where all thresholds are higher than the highest prediction + expected_norm_thresholds_too_high = torch.stack( + [ + torch.tensor([[4, 0], [0, 0]]), + torch.tensor([[4, 0], [0, 0]]), + torch.tensor([[4, 0], [0, 0]]), + torch.tensor([[4, 0], [0, 0]]), + ], + axis=0, + ).to(int) + expected_anom_thresholds_too_high = torch.stack( + [ + torch.tensor([[2, 0], [2, 0]]), + torch.tensor([[2, 0], [2, 0]]), + torch.tensor([[2, 0], [2, 0]]), + torch.tensor([[2, 0], [2, 0]]), + ], + axis=0, + ).to(int) + + # in the case where all thresholds are lower than the lowest prediction + expected_norm_thresholds_too_low = torch.stack( + [ + torch.tensor([[0, 4], [0, 0]]), + torch.tensor([[0, 4], [0, 0]]), + torch.tensor([[0, 4], [0, 0]]), + torch.tensor([[0, 4], [0, 0]]), + ], + axis=0, + ).to(int) + expected_anom_thresholds_too_low = torch.stack( + [ + torch.tensor([[0, 2], [0, 2]]), + torch.tensor([[0, 2], [0, 2]]), + torch.tensor([[0, 2], [0, 2]]), + torch.tensor([[0, 2], [0, 2]]), + ], + axis=0, + ).to(int) + + if metafunc.function is test__binclf_one_curve: + metafunc.parametrize( + argnames=("pred", "gt", "thresholds", "expected"), + argvalues=[ + (pred, gt_anom, thresholds[:3], expected_anom[:3]), + (pred, gt_anom, thresholds, expected_anom), + (pred, gt_norm, thresholds, expected_norm), + (pred, gt_norm, 10 * thresholds, expected_norm_thresholds_too_high), + (pred, gt_anom, 10 * thresholds, expected_anom_thresholds_too_high), + (pred, gt_norm, 0.001 * thresholds, expected_norm_thresholds_too_low), + (pred, gt_anom, 0.001 * thresholds, expected_anom_thresholds_too_low), + ], + ) + + preds = torch.stack([pred, pred], axis=0) + gts = torch.stack([gt_anom, gt_norm], axis=0) + binclf_curves = torch.stack([expected_anom, expected_norm], axis=0) + binclf_curves_thresholds_too_high = torch.stack( + [expected_anom_thresholds_too_high, expected_norm_thresholds_too_high], + axis=0, + ) + binclf_curves_thresholds_too_low = torch.stack( + [expected_anom_thresholds_too_low, expected_norm_thresholds_too_low], + axis=0, + ) + + if metafunc.function is test__binclf_multiple_curves: + metafunc.parametrize( + argnames=("preds", "gts", "thresholds", "expecteds"), + argvalues=[ + (preds, gts, thresholds[:3], binclf_curves[:, :3]), + (preds, gts, thresholds, binclf_curves), + ], + ) + + if metafunc.function is test_binclf_multiple_curves: + metafunc.parametrize( + argnames=( + "preds", + "gts", + "thresholds", + "expected_binclf_curves", + ), + argvalues=[ + (preds[:1], gts[:1], thresholds, binclf_curves[:1]), + (preds, gts, thresholds, binclf_curves), + (10 * preds, gts, 10 * thresholds, binclf_curves), + ], + ) + + if metafunc.function is test_binclf_multiple_curves_validations: + metafunc.parametrize( + argnames=("args", "kwargs", "exception"), + argvalues=[ + # `scores` and `gts` must be 2D + ([preds.reshape(2, 2, 2), gts, thresholds], {}, ValueError), + ([preds, gts.flatten(), thresholds], {}, ValueError), + # `thresholds` must be 1D + ([preds, gts, thresholds.reshape(2, 2)], {}, ValueError), + # `scores` and `gts` must have the same shape + ([preds, gts[:1], thresholds], {}, ValueError), + ([preds[:, :2], gts, thresholds], {}, ValueError), + # `scores` be of type float + ([preds.to(int), gts, thresholds], {}, TypeError), + # `gts` be of type bool + ([preds, gts.to(int), thresholds], {}, TypeError), + # `thresholds` be of type float + ([preds, gts, thresholds.to(int)], {}, TypeError), + # `thresholds` must be sorted in ascending order + ([preds, gts, torch.flip(thresholds, dims=[0])], {}, ValueError), + ([preds, gts, torch.concatenate([thresholds[-2:], thresholds[:2]])], {}, ValueError), + # `thresholds` must be unique + ([preds, gts, torch.sort(torch.concatenate([thresholds, thresholds]))[0]], {}, ValueError), + ], + ) + + # the following tests are for `per_image_binclf_curve()`, which expects + # inputs in image spatial format, i.e. (height, width) + preds = preds.reshape(2, 2, 2) + gts = gts.reshape(2, 2, 2) + + per_image_binclf_curves_argvalues = [ + # `thresholds_choice` = "given" + ( + preds, + gts, + "given", + thresholds, + None, + thresholds, + binclf_curves, + ), + ( + preds, + gts, + "given", + 10 * thresholds, + 2, + 10 * thresholds, + binclf_curves_thresholds_too_high, + ), + ( + preds, + gts, + "given", + 0.01 * thresholds, + None, + 0.01 * thresholds, + binclf_curves_thresholds_too_low, + ), + # `thresholds_choice` = 'minmax-linspace'" + ( + preds, + gts, + "minmax-linspace", + None, + len(thresholds), + thresholds, + binclf_curves, + ), + ( + 2 * preds, + gts.to(int), # this is ok + "minmax-linspace", + None, + len(thresholds), + 2 * thresholds, + binclf_curves, + ), + ] + + if metafunc.function is test_per_image_binclf_curve: + metafunc.parametrize( + argnames=( + "anomaly_maps", + "masks", + "threshold_choice", + "thresholds", + "num_thresholds", + "expected_thresholds", + "expected_binclf_curves", + ), + argvalues=per_image_binclf_curves_argvalues, + ) + + if metafunc.function is test_per_image_binclf_curve_validations: + metafunc.parametrize( + argnames=("args", "exception"), + argvalues=[ + # `scores` and `gts` must be 3D + ([preds.reshape(2, 2, 2, 1), gts], ValueError), + ([preds, gts.flatten()], ValueError), + # `scores` and `gts` must have the same shape + ([preds, gts[:1]], ValueError), + ([preds[:, :1], gts], ValueError), + # `scores` be of type float + ([preds.to(int), gts], TypeError), + # `gts` be of type bool or int + ([preds, gts.to(float)], TypeError), + # `thresholds` be of type float + ([preds, gts, thresholds.to(int)], TypeError), + ], + ) + metafunc.parametrize( + argnames=("kwargs",), + argvalues=[ + ( + { + "threshold_choice": "minmax-linspace", + "thresholds": None, + "num_thresholds": len(thresholds), + }, + ), + ], + ) + + # same as above but testing other validations + if metafunc.function is test_per_image_binclf_curve_validations_alt: + metafunc.parametrize( + argnames=("args", "kwargs", "exception"), + argvalues=[ + # invalid `thresholds_choice` + ( + [preds, gts], + {"threshold_choice": "glfrb", "thresholds": thresholds, "num_thresholds": None}, + ValueError, + ), + ], + ) + + if metafunc.function is test_rate_metrics: + metafunc.parametrize( + argnames=("binclf_curves", "expected_fprs", "expected_tprs"), + argvalues=[ + (binclf_curves, expected_fprs, expected_tprs), + (10 * binclf_curves, expected_fprs, expected_tprs), + ], + ) + + +# ================================================================================================== +# LOW-LEVEL FUNCTIONS (PYTHON) + + +def test__binclf_one_curve( + pred: torch.Tensor, + gt: torch.Tensor, + thresholds: torch.Tensor, + expected: torch.Tensor, +) -> None: + """Test if `_binclf_one_curve()` returns the expected values.""" + computed = _binary_classification_curve(pred, gt, thresholds) + assert computed.shape == (thresholds.numel(), 2, 2) + assert (computed == expected.numpy()).all() + + +def test__binclf_multiple_curves( + preds: torch.Tensor, + gts: torch.Tensor, + thresholds: torch.Tensor, + expecteds: torch.Tensor, +) -> None: + """Test if `_binclf_multiple_curves()` returns the expected values.""" + computed = binary_classification_curve(preds, gts, thresholds) + assert computed.shape == (preds.shape[0], thresholds.numel(), 2, 2) + assert (computed == expecteds).all() + + +# ================================================================================================== +# API FUNCTIONS (NUMPY) + + +def test_binclf_multiple_curves( + preds: torch.Tensor, + gts: torch.Tensor, + thresholds: torch.Tensor, + expected_binclf_curves: torch.Tensor, +) -> None: + """Test if `binclf_multiple_curves()` returns the expected values.""" + computed = binary_classification_curve( + preds, + gts, + thresholds, + ) + assert computed.shape == expected_binclf_curves.shape + assert (computed == expected_binclf_curves).all() + + # it's ok to have the threhsholds beyond the range of the preds + binary_classification_curve(preds, gts, 2 * thresholds) + + # or inside the bounds without reaching them + binary_classification_curve(preds, gts, 0.5 * thresholds) + + # it's also ok to have more thresholds than unique values in the preds + # add the values in between the thresholds + thresholds_unncessary = 0.5 * (thresholds[:-1] + thresholds[1:]) + thresholds_unncessary = torch.concatenate([thresholds_unncessary, thresholds]) + thresholds_unncessary = torch.sort(thresholds_unncessary)[0] + binary_classification_curve(preds, gts, thresholds_unncessary) + + # or less + binary_classification_curve(preds, gts, thresholds[1:3]) + + +def test_binclf_multiple_curves_validations(args: list, kwargs: dict, exception: Exception) -> None: + """Test if `_binclf_multiple_curves_python()` raises the expected errors.""" + with pytest.raises(exception): + binary_classification_curve(*args, **kwargs) + + +def test_per_image_binclf_curve( + anomaly_maps: torch.Tensor, + masks: torch.Tensor, + threshold_choice: str, + thresholds: torch.Tensor | None, + num_thresholds: int | None, + expected_thresholds: torch.Tensor, + expected_binclf_curves: torch.Tensor, +) -> None: + """Test if `per_image_binclf_curve()` returns the expected values.""" + computed_thresholds, computed_binclf_curves = threshold_and_binary_classification_curve( + anomaly_maps, + masks, + threshold_choice=threshold_choice, + thresholds=thresholds, + num_thresholds=num_thresholds, + ) + + # thresholds + assert computed_thresholds.shape == expected_thresholds.shape + assert computed_thresholds.dtype == computed_thresholds.dtype + assert (computed_thresholds == expected_thresholds).all() + + # binclf_curves + assert computed_binclf_curves.shape == expected_binclf_curves.shape + assert computed_binclf_curves.dtype == expected_binclf_curves.dtype + assert (computed_binclf_curves == expected_binclf_curves).all() + + +def test_per_image_binclf_curve_validations(args: list, kwargs: dict, exception: Exception) -> None: + """Test if `per_image_binclf_curve()` raises the expected errors.""" + with pytest.raises(exception): + threshold_and_binary_classification_curve(*args, **kwargs) + + +def test_per_image_binclf_curve_validations_alt(args: list, kwargs: dict, exception: Exception) -> None: + """Test if `per_image_binclf_curve()` raises the expected errors.""" + test_per_image_binclf_curve_validations(args, kwargs, exception) + + +def test_rate_metrics( + binclf_curves: torch.Tensor, + expected_fprs: torch.Tensor, + expected_tprs: torch.Tensor, +) -> None: + """Test if rate metrics are computed correctly.""" + tprs = per_image_tpr(binclf_curves) + fprs = per_image_fpr(binclf_curves) + + assert tprs.shape == expected_tprs.shape + assert fprs.shape == expected_fprs.shape + + assert torch.allclose(tprs, expected_tprs, equal_nan=True) + assert torch.allclose(fprs, expected_fprs, equal_nan=True) diff --git a/tests/unit/metrics/pimo/test_pimo.py b/tests/unit/metrics/pimo/test_pimo.py new file mode 100644 index 0000000000..81bafe4c8e --- /dev/null +++ b/tests/unit/metrics/pimo/test_pimo.py @@ -0,0 +1,368 @@ +"""Test `anomalib.metrics.per_image.functional`.""" + +# Original Code +# https://github.com/jpcbertoldo/aupimo +# +# Modified +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging + +import pytest +import torch +from torch import Tensor + +from anomalib.metrics.pimo import AUPIMOResult, PIMOResult, functional, pimo + + +def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: + """Generate tests for all functions in this module. + + All functions are parametrized with the same setting: 1 normal and 2 anomalous images. + The anomaly maps are the same for all functions, but the masks are different. + """ + expected_thresholds = torch.arange(1, 7 + 1, dtype=torch.float32) + shape = (1000, 1000) # (H, W), 1 million pixels + + # --- normal --- + # histogram of scores: + # value: 7 6 5 4 3 2 1 + # count: 1 9 90 900 9k 90k 900k + # cumsum: 1 10 100 1k 10k 100k 1M + pred_norm = torch.ones(1_000_000, dtype=torch.float32) + pred_norm[:100_000] += 1 + pred_norm[:10_000] += 1 + pred_norm[:1_000] += 1 + pred_norm[:100] += 1 + pred_norm[:10] += 1 + pred_norm[:1] += 1 + pred_norm = pred_norm.reshape(shape) + mask_norm = torch.zeros_like(pred_norm, dtype=torch.int32) + + expected_fpr_norm = torch.tensor([1.0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6], dtype=torch.float64) + expected_tpr_norm = torch.full((7,), torch.nan, dtype=torch.float64) + + # --- anomalous --- + pred_anom1 = pred_norm.clone() + mask_anom1 = torch.ones_like(pred_anom1, dtype=torch.int32) + expected_tpr_anom1 = expected_fpr_norm.clone() + + # only the first 100_000 pixels are anomalous + # which corresponds to the first 100_000 highest scores (2 to 7) + pred_anom2 = pred_norm.clone() + mask_anom2 = torch.concatenate([torch.ones(100_000), torch.zeros(900_000)]).reshape(shape).to(torch.int32) + expected_tpr_anom2 = (10 * expected_fpr_norm).clip(0, 1) + + anomaly_maps = torch.stack([pred_norm, pred_anom1, pred_anom2], axis=0) + masks = torch.stack([mask_norm, mask_anom1, mask_anom2], axis=0) + + expected_shared_fpr = expected_fpr_norm + expected_per_image_tprs = torch.stack([expected_tpr_norm, expected_tpr_anom1, expected_tpr_anom2], axis=0) + expected_image_classes = torch.tensor([0, 1, 1], dtype=torch.int32) + + if metafunc.function is test_pimo or metafunc.function is test_aupimo_values: + argvalues_tensors = [ + ( + anomaly_maps, + masks, + expected_thresholds, + expected_shared_fpr, + expected_per_image_tprs, + expected_image_classes, + ), + ( + 10 * anomaly_maps, + masks, + 10 * expected_thresholds, + expected_shared_fpr, + expected_per_image_tprs, + expected_image_classes, + ), + ] + metafunc.parametrize( + argnames=( + "anomaly_maps", + "masks", + "expected_thresholds", + "expected_shared_fpr", + "expected_per_image_tprs", + "expected_image_classes", + ), + argvalues=argvalues_tensors, + ) + + if metafunc.function is test_aupimo_values: + argvalues_tensors = [ + ( + (1e-1, 1.0), + torch.tensor( + [ + torch.nan, + # recall: trapezium area = (a + b) * h / 2 + (0.10 + 1.0) * 1 / 2, + (1.0 + 1.0) * 1 / 2, + ], + dtype=torch.float64, + ), + ), + ( + (1e-3, 1e-1), + torch.tensor( + [ + torch.nan, + # average of two trapezium areas / 2 (normalizing factor) + (((1e-3 + 1e-2) * 1 / 2) + ((1e-2 + 1e-1) * 1 / 2)) / 2, + (((1e-2 + 1e-1) * 1 / 2) + ((1e-1 + 1.0) * 1 / 2)) / 2, + ], + dtype=torch.float64, + ), + ), + ( + (1e-5, 1e-4), + torch.tensor( + [ + torch.nan, + (1e-5 + 1e-4) * 1 / 2, + (1e-4 + 1e-3) * 1 / 2, + ], + dtype=torch.float64, + ), + ), + ] + metafunc.parametrize( + argnames=( + "fpr_bounds", + "expected_aupimos", # trapezoid surfaces + ), + argvalues=argvalues_tensors, + ) + + if metafunc.function is test_aupimo_edge: + metafunc.parametrize( + argnames=( + "anomaly_maps", + "masks", + ), + argvalues=[ + ( + anomaly_maps, + masks, + ), + ( + 10 * anomaly_maps, + masks, + ), + ], + ) + metafunc.parametrize( + argnames=("fpr_bounds",), + argvalues=[ + ((1e-1, 1.0),), + ((1e-3, 1e-2),), + ((1e-5, 1e-4),), + (None,), + ], + ) + + +def _do_test_pimo_outputs( + thresholds: Tensor, + shared_fpr: Tensor, + per_image_tprs: Tensor, + image_classes: Tensor, + expected_thresholds: Tensor, + expected_shared_fpr: Tensor, + expected_per_image_tprs: Tensor, + expected_image_classes: Tensor, +) -> None: + """Test if the outputs of any of the PIMO interfaces are correct.""" + assert isinstance(shared_fpr, Tensor) + assert isinstance(per_image_tprs, Tensor) + assert isinstance(image_classes, Tensor) + assert isinstance(expected_thresholds, Tensor) + assert isinstance(expected_shared_fpr, Tensor) + assert isinstance(expected_per_image_tprs, Tensor) + assert isinstance(expected_image_classes, Tensor) + allclose = torch.allclose + + assert thresholds.ndim == 1 + assert shared_fpr.ndim == 1 + assert per_image_tprs.ndim == 2 + assert tuple(image_classes.shape) == (3,) + + assert allclose(thresholds, expected_thresholds) + assert allclose(shared_fpr, expected_shared_fpr) + assert allclose(per_image_tprs, expected_per_image_tprs, equal_nan=True) + assert (image_classes == expected_image_classes).all() + + +def test_pimo( + anomaly_maps: Tensor, + masks: Tensor, + expected_thresholds: Tensor, + expected_shared_fpr: Tensor, + expected_per_image_tprs: Tensor, + expected_image_classes: Tensor, +) -> None: + """Test if `pimo()` returns the expected values.""" + + def do_assertions(pimo_result: PIMOResult) -> None: + thresholds = pimo_result.thresholds + shared_fpr = pimo_result.shared_fpr + per_image_tprs = pimo_result.per_image_tprs + image_classes = pimo_result.image_classes + _do_test_pimo_outputs( + thresholds, + shared_fpr, + per_image_tprs, + image_classes, + expected_thresholds, + expected_shared_fpr, + expected_per_image_tprs, + expected_image_classes, + ) + + # metric interface + metric = pimo.PIMO( + num_thresholds=7, + ) + metric.update(anomaly_maps, masks) + pimo_result = metric.compute() + do_assertions(pimo_result) + + +def _do_test_aupimo_outputs( + thresholds: Tensor, + shared_fpr: Tensor, + per_image_tprs: Tensor, + image_classes: Tensor, + aupimos: Tensor, + expected_thresholds: Tensor, + expected_shared_fpr: Tensor, + expected_per_image_tprs: Tensor, + expected_image_classes: Tensor, + expected_aupimos: Tensor, +) -> None: + _do_test_pimo_outputs( + thresholds, + shared_fpr, + per_image_tprs, + image_classes, + expected_thresholds, + expected_shared_fpr, + expected_per_image_tprs, + expected_image_classes, + ) + assert isinstance(aupimos, Tensor) + assert isinstance(expected_aupimos, Tensor) + allclose = torch.allclose + assert tuple(aupimos.shape) == (3,) + assert allclose(aupimos, expected_aupimos, equal_nan=True) + + +def test_aupimo_values( + anomaly_maps: torch.Tensor, + masks: torch.Tensor, + fpr_bounds: tuple[float, float], + expected_thresholds: torch.Tensor, + expected_shared_fpr: torch.Tensor, + expected_per_image_tprs: torch.Tensor, + expected_image_classes: torch.Tensor, + expected_aupimos: torch.Tensor, +) -> None: + """Test if `aupimo()` returns the expected values.""" + + def do_assertions(pimo_result: PIMOResult, aupimo_result: AUPIMOResult) -> None: + # test metadata + assert aupimo_result.fpr_bounds == fpr_bounds + # recall: this one is not the same as the number of thresholds in the curve + # this is the number of thresholds used to compute the integral in `aupimo()` + # always less because of the integration bounds + assert aupimo_result.num_thresholds < 7 + + # test data + # from pimo result + thresholds = pimo_result.thresholds + shared_fpr = pimo_result.shared_fpr + per_image_tprs = pimo_result.per_image_tprs + image_classes = pimo_result.image_classes + # from aupimo result + aupimos = aupimo_result.aupimos + _do_test_aupimo_outputs( + thresholds, + shared_fpr, + per_image_tprs, + image_classes, + aupimos, + expected_thresholds, + expected_shared_fpr, + expected_per_image_tprs, + expected_image_classes, + expected_aupimos, + ) + thresh_lower_bound = aupimo_result.thresh_lower_bound + thresh_upper_bound = aupimo_result.thresh_upper_bound + assert anomaly_maps.min() <= thresh_lower_bound < thresh_upper_bound <= anomaly_maps.max() + + # metric interface + metric = pimo.AUPIMO( + num_thresholds=7, + fpr_bounds=fpr_bounds, + return_average=False, + force=True, + ) + metric.update(anomaly_maps, masks) + pimo_result_from_metric, aupimo_result_from_metric = metric.compute() + do_assertions(pimo_result_from_metric, aupimo_result_from_metric) + + # metric interface + metric = pimo.AUPIMO( + num_thresholds=7, + fpr_bounds=fpr_bounds, + return_average=True, # only return the average AUPIMO + force=True, + ) + metric.update(anomaly_maps, masks) + metric.compute() + + +def test_aupimo_edge( + anomaly_maps: torch.Tensor, + masks: torch.Tensor, + fpr_bounds: tuple[float, float], + caplog: pytest.LogCaptureFixture, +) -> None: + """Test some edge cases.""" + # None is the case of testing the default bounds + fpr_bounds = {"fpr_bounds": fpr_bounds} if fpr_bounds is not None else {} + + # not enough points on the curve + # 10 thresholds / 6 decades = 1.6 thresholds per decade < 3 + with pytest.raises(RuntimeError): # force=False --> raise error + functional.aupimo_scores( + anomaly_maps, + masks, + num_thresholds=10, + force=False, + **fpr_bounds, + ) + + with caplog.at_level(logging.WARNING): # force=True --> warn + functional.aupimo_scores( + anomaly_maps, + masks, + num_thresholds=10, + force=True, + **fpr_bounds, + ) + assert "Computation was forced!" in caplog.text + + # default number of points on the curve (300k thresholds) should be enough + torch.manual_seed(42) + functional.aupimo_scores( + anomaly_maps * torch.FloatTensor(anomaly_maps.shape).uniform_(1.0, 1.1), + masks, + force=False, + **fpr_bounds, + ) diff --git a/third-party-programs.txt b/third-party-programs.txt index 3155b2a930..5eeaca8ea9 100644 --- a/third-party-programs.txt +++ b/third-party-programs.txt @@ -42,3 +42,7 @@ terms are listed below. 7. CLIP neural network used for deep feature extraction in AI-VAD model Copyright (c) 2022 @openai, https://github.com/openai/CLIP. SPDX-License-Identifier: MIT + +8. AUPIMO metric implementation is based on the original code + Copyright (c) 2023 @jpcbertoldo, https://github.com/jpcbertoldo/aupimo + SPDX-License-Identifier: MIT From 404e896c769aa069f77deb79081e19024e8596a8 Mon Sep 17 00:00:00 2001 From: Marcus Pertlwieser <116986601+Marcus1506@users.noreply.github.com> Date: Thu, 3 Oct 2024 16:20:38 +0200 Subject: [PATCH 06/32] Makes batch size dynamic (#2339) Made batch dimension of ONNX export dynamic when specifying input shape. --- src/anomalib/models/components/base/export_mixin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/anomalib/models/components/base/export_mixin.py b/src/anomalib/models/components/base/export_mixin.py index e0627b462c..5e7e5e9481 100644 --- a/src/anomalib/models/components/base/export_mixin.py +++ b/src/anomalib/models/components/base/export_mixin.py @@ -142,7 +142,9 @@ def to_onnx( export_root = _create_export_root(export_root, ExportType.ONNX) input_shape = torch.zeros((1, 3, *input_size)) if input_size else torch.zeros((1, 3, 1, 1)) dynamic_axes = ( - None if input_size else {"input": {0: "batch_size", 2: "height", 3: "weight"}, "output": {0: "batch_size"}} + {"input": {0: "batch_size"}, "output": {0: "batch_size"}} + if input_size + else {"input": {0: "batch_size", 2: "height", 3: "weight"}, "output": {0: "batch_size"}} ) _write_metadata_to_json(self._get_metadata(task), export_root) onnx_path = export_root / "model.onnx" From 2ba5ede95daa2abfdb2026ec08f29f4e8a818f67 Mon Sep 17 00:00:00 2001 From: Joao P C Bertoldo <24547377+jpcbertoldo@users.noreply.github.com> Date: Sun, 6 Oct 2024 08:43:49 +0200 Subject: [PATCH 07/32] Add pimo tutorial advanced i (fixed) (#2336) * uset all padim features to make it deterministic Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add aupimo notebook advanced i Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * update readme Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * modify changelog Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct readme Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct again Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * minor corrections Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --------- Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --- CHANGELOG.md | 2 + notebooks/700_metrics/701a_aupimo.ipynb | 68 +- .../700_metrics/701b_aupimo_advanced_i.ipynb | 1433 +++++++++++++++++ notebooks/700_metrics/pimo_viz.svg | 619 +++++++ notebooks/README.md | 7 + 5 files changed, 2092 insertions(+), 37 deletions(-) create mode 100644 notebooks/700_metrics/701b_aupimo_advanced_i.ipynb create mode 100644 notebooks/700_metrics/pimo_viz.svg diff --git a/CHANGELOG.md b/CHANGELOG.md index fc80fa3e7e..e0e0cc955e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -120,6 +120,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added +- Add `AUPIMO` tutorials notebooks in https://github.com/openvinotoolkit/anomalib/pull/2330 and https://github.com/openvinotoolkit/anomalib/pull/2336 +- Add `AUPIMO` metric by [jpcbertoldo](https://github.com/jpcbertoldo) in https://github.com/openvinotoolkit/anomalib/pull/1726 and refactored by [ashwinvaidya17](https://github.com/ashwinvaidya17) in https://github.com/openvinotoolkit/anomalib/pull/2329 - Add requirements into `pyproject.toml` & Refactor anomalib install `get_requirements` by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/1808 ### Changed diff --git a/notebooks/700_metrics/701a_aupimo.ipynb b/notebooks/700_metrics/701a_aupimo.ipynb index e6333df6df..c6831fd1f7 100644 --- a/notebooks/700_metrics/701a_aupimo.ipynb +++ b/notebooks/700_metrics/701a_aupimo.ipynb @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -177,7 +177,7 @@ "model = Padim(\n", " # only use one layer to speed it up\n", " layers=[\"layer1\"],\n", - " n_features=32,\n", + " n_features=64,\n", " backbone=\"resnet18\",\n", " pre_trained=True,\n", ")" @@ -225,7 +225,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "58335955473a43dab43e586caf66aa11", + "model_id": "880e325e4e4842b2b679340ca8007849", "version_major": 2, "version_minor": 0 }, @@ -242,9 +242,9 @@ "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
        "┃        Test metric        ┃       DataLoader 0        ┃\n",
        "┑━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
-       "β”‚        image_AUROC        β”‚    0.9735053777694702     β”‚\n",
-       "β”‚       image_F1Score       β”‚    0.9518716335296631     β”‚\n",
-       "β”‚       pixel_AUPIMO        β”‚    0.6273086756193275     β”‚\n",
+       "β”‚        image_AUROC        β”‚    0.9887908697128296     β”‚\n",
+       "β”‚       image_F1Score       β”‚    0.9726775884628296     β”‚\n",
+       "β”‚       pixel_AUPIMO        β”‚    0.7428419829089654     β”‚\n",
        "β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n",
        "
\n" ], @@ -252,9 +252,9 @@ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", "┑━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", - "β”‚\u001b[36m \u001b[0m\u001b[36m image_AUROC \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.9735053777694702 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", - "β”‚\u001b[36m \u001b[0m\u001b[36m image_F1Score \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.9518716335296631 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", - "β”‚\u001b[36m \u001b[0m\u001b[36m pixel_AUPIMO \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.6273086756193275 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", + "β”‚\u001b[36m \u001b[0m\u001b[36m image_AUROC \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.9887908697128296 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", + "β”‚\u001b[36m \u001b[0m\u001b[36m image_F1Score \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.9726775884628296 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", + "β”‚\u001b[36m \u001b[0m\u001b[36m pixel_AUPIMO \u001b[0m\u001b[36m \u001b[0mβ”‚\u001b[35m \u001b[0m\u001b[35m 0.7428419829089654 \u001b[0m\u001b[35m \u001b[0mβ”‚\n", "β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n" ] }, @@ -264,9 +264,9 @@ { "data": { "text/plain": [ - "[{'pixel_AUPIMO': 0.6273086756193275,\n", - " 'image_AUROC': 0.9735053777694702,\n", - " 'image_F1Score': 0.9518716335296631}]" + "[{'pixel_AUPIMO': 0.7428419829089654,\n", + " 'image_AUROC': 0.9887908697128296,\n", + " 'image_F1Score': 0.9726775884628296}]" ] }, "execution_count": 8, @@ -314,7 +314,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "678cb90805ee4b7bb1dd0c30944edab9", + "model_id": "e8116b80da39406e966c2099ecb2fdb1", "version_major": 2, "version_minor": 0 }, @@ -390,27 +390,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "tensor([8.7932e-01, 8.4367e-01, 7.9861e-02, 9.2154e-02, 1.5300e-04, 5.8312e-01,\n", - " 8.5351e-01, 3.8730e-01, 1.9997e-02, 1.7658e-01, 8.0739e-01, 7.1827e-01,\n", - " 5.2631e-01, 4.3051e-01, 5.0168e-01, 3.5604e-01, 8.9605e-01, 8.8349e-02,\n", - " 6.0475e-01, 9.6092e-01, 5.8595e-01, 5.7159e-01, 9.8821e-01, 8.8012e-01,\n", - " 5.8205e-01, 9.9295e-01, 1.0000e+00, 9.9967e-01, 5.5366e-01, 8.7399e-01,\n", - " 7.0559e-01, 9.4203e-01, 7.3299e-01, 6.6430e-01, 8.0979e-01, 9.4388e-01,\n", - " 9.9854e-01, 5.8814e-01, 8.8821e-01, 6.3341e-01, 4.2244e-01, 7.3422e-01,\n", - " 4.4623e-01, 5.9982e-01, 1.1232e-01, 2.5705e-01, 3.2403e-01, 5.6662e-02,\n", - " 5.3151e-02, 3.1629e-01, 2.6974e-01, 2.8646e-01, 5.3762e-01, 4.5617e-01,\n", - " 4.4067e-01, 9.8349e-01, 1.2953e-02, 7.9532e-01, 1.7765e-01, 1.1363e-01,\n", - " 9.7337e-01, 4.9871e-01, 2.7917e-01, 4.9118e-01, 2.5533e-02, 0.0000e+00,\n", - " 9.0295e-04, 0.0000e+00, 9.3272e-01, 1.0000e+00, 1.0000e+00, 3.0749e-02,\n", - " 8.0794e-01, 9.4464e-01, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, 9.8743e-01, 8.4611e-01,\n", - " 9.7309e-01, 9.8823e-01, 1.0000e+00, 1.0000e+00, 9.6653e-01, 9.6560e-01,\n", - " 1.0000e+00, 1.0000e+00, 9.5783e-01, 1.0000e+00, 9.1427e-01, 9.9806e-01,\n", - " 1.0000e+00, 1.0000e+00, 9.9345e-01, 1.0000e+00], dtype=torch.float64)\n" + "tensor([1.0000, 0.9144, 0.4944, 0.2837, 0.2784, 0.8687, 1.0000, 0.7463, 0.2899,\n", + " 0.8998, 1.0000, 0.9147, 0.6389, 0.9422, 0.9582, 0.9396, 0.9890, 0.5130,\n", + " 0.9698, 0.9237, 0.5732, 0.4620, 0.9995, 0.9078, 0.5873, 1.0000, 1.0000,\n", + " 1.0000, 0.3785, 0.6764, 0.4217, 0.9299, 0.7756, 0.4339, 0.8334, 0.9297,\n", + " 0.9992, 0.5584, 0.9937, 0.7811, 0.4986, 0.7630, 0.5361, 0.7157, 0.1689,\n", + " 0.3086, 0.3604, 0.2423, 0.2880, 0.6404, 0.5570, 0.3274, 0.7749, 0.6740,\n", + " 0.5516, 1.0000, 0.2399, 0.9721, 0.5346, 0.4709, 1.0000, 0.9732, 0.8470,\n", + " 0.8863, 0.0596, 0.0000, 0.5244, 0.0000, 1.0000, 1.0000, 1.0000, 0.0088,\n", + " 0.9706, 1.0000, nan, nan, nan, nan, nan, nan, nan,\n", + " nan, nan, nan, nan, nan, nan, nan, nan, nan,\n", + " nan, nan, nan, nan, nan, nan, nan, nan, nan,\n", + " nan, nan, nan, nan, nan, nan, nan, 0.9895, 0.8531,\n", + " 0.9985, 0.9470, 1.0000, 1.0000, 0.9918, 0.9792, 1.0000, 1.0000, 0.8824,\n", + " 1.0000, 0.9996, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n", + " dtype=torch.float64)\n" ] } ], @@ -439,9 +433,9 @@ "output_type": "stream", "text": [ "MEAN\n", - "aupimo_result.aupimos[~isnan].mean().item()=0.6273086756193275\n", + "aupimo_result.aupimos[~isnan].mean().item()=0.7428419829089654\n", "OTHER STATISTICS\n", - "DescribeResult(nobs=92, minmax=(0.0, 1.0), mean=0.6273086756193275, variance=0.12220088826183258, skewness=-0.506530110649306, kurtosis=-1.1586400848600655)\n" + "DescribeResult(nobs=92, minmax=(0.0, 1.0), mean=0.7428419829089654, variance=0.08757789538421837, skewness=-0.9285672286850366, kurtosis=-0.3299234749959594)\n" ] } ], @@ -469,7 +463,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] diff --git a/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb b/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb new file mode 100644 index 0000000000..37876e5bf6 --- /dev/null +++ b/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb @@ -0,0 +1,1433 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AUPIMO\n", + "\n", + "Advance use cases of the metric AUPIMO (pronounced \"a-u-pee-mo\").\n", + "\n", + "> For basic usage, please check the notebook [701a_aupimo.ipynb](./701a_aupimo.ipynb).\n", + "\n", + "Includes:\n", + "- selection of test representative samples for qualitative analysis\n", + "- visualization of the AUPIMO metric with heatmaps" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# What is AUPIMO?\n", + "\n", + "The `Area Under the Per-Image Overlap [curve]` (AUPIMO) is a metric of recall (higher is better) designed for visual anomaly detection.\n", + "\n", + "Inspired by the [ROC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) and [PRO](https://link.springer.com/article/10.1007/s11263-020-01400-4) curves, \n", + "\n", + "> AUPIMO is the area under a curve of True Positive Rate (TPR or _recall_) as a function of False Positive Rate (FPR) restricted to a fixed range. \n", + "\n", + "But:\n", + "- the TPR (Y-axis) is *per-image* (1 image = 1 curve/score);\n", + "- the FPR (X-axis) considers the (average of) **normal** images only; \n", + "- the FPR (X-axis) is in log scale and its range is [1e-5, 1e-4]\\* (harder detection task!).\n", + "\n", + "\\* The score (the area under the curve) is normalized to be in [0, 1].\n", + "\n", + "AUPIMO can be interpreted as\n", + "\n", + "> average segmentation recall in an image given that the model (nearly) does not yield false positives in normal images.\n", + "\n", + "References in the last cell.\n", + "\n", + "![AUROC vs. AUPRO vs. AUPIMO](./roc_pro_pimo.svg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install `anomalib` using `pip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO(jpcbertoldo): replace by `pip install anomalib` when AUPIMO is released # noqa: TD003\n", + "%pip install ../.." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change the directory to have access to the datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "# NOTE: Provide the path to the dataset root directory.\n", + "# If the datasets is not downloaded, it will be downloaded\n", + "# to this directory.\n", + "dataset_root = Path.cwd().parent.parent / \"datasets\" / \"MVTec\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.ticker import PercentFormatter\n", + "from scipy import stats\n", + "\n", + "from anomalib import TaskType\n", + "from anomalib.data import MVTec\n", + "from anomalib.data.utils import read_image\n", + "from anomalib.engine import Engine\n", + "from anomalib.metrics import AUPIMO\n", + "from anomalib.models import Padim" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option(\"display.float_format\", \"{:.2f}\".format)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Basics\n", + "\n", + "This part was covered in the notebook [701a_aupimo.ipynb](701a_aupimo.ipynb), so we'll not discuss it here.\n", + "\n", + "It will train a model and evaluate it using AUPIMO.\n", + "We will use dataset Leather from MVTec AD with `PaDiM` (performance is not the best, but it is fast to train).\n", + "\n", + "> See the notebooks below for more details on:\n", + "> - datamodules: [100_datamodules]((https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules));\n", + "> - models: [200_models](https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/200_models)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# train the model\n", + "task = TaskType.SEGMENTATION\n", + "datamodule = MVTec(\n", + " root=dataset_root,\n", + " category=\"leather\",\n", + " image_size=256,\n", + " train_batch_size=32,\n", + " eval_batch_size=32,\n", + " num_workers=8,\n", + " task=task,\n", + ")\n", + "model = Padim(\n", + " # only use one layer to speed it up\n", + " layers=[\"layer1\"],\n", + " n_features=64,\n", + " backbone=\"resnet18\",\n", + " pre_trained=True,\n", + ")\n", + "engine = Engine(\n", + " pixel_metrics=\"AUPIMO\", # others can be added\n", + " accelerator=\"auto\", # \\<\"cpu\", \"gpu\", \"tpu\", \"ipu\", \"hpu\", \"auto\">,\n", + " devices=1,\n", + " logger=False,\n", + ")\n", + "engine.fit(datamodule=datamodule, model=model)\n", + "# infer\n", + "predictions = engine.predict(dataloaders=datamodule.test_dataloader(), model=model, return_predictions=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compute AUPIMO" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Metric `AUPIMO` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.\n" + ] + } + ], + "source": [ + "aupimo = AUPIMO(\n", + " # with `False` all the values are returned in a dataclass\n", + " return_average=False,\n", + ")\n", + "\n", + "anomaly_maps = []\n", + "masks = []\n", + "labels = []\n", + "image_paths = []\n", + "for batch in predictions:\n", + " anomaly_maps.append(batch_anomaly_maps := batch[\"anomaly_maps\"].squeeze(dim=1))\n", + " masks.append(batch_masks := batch[\"mask\"])\n", + " labels.append(batch[\"label\"])\n", + " image_paths.append(batch[\"image_path\"])\n", + " aupimo.update(anomaly_maps=batch_anomaly_maps, masks=batch_masks)\n", + "\n", + "# list[list[str]] -> list[str]\n", + "image_paths = [item for sublist in image_paths for item in sublist]\n", + "anomaly_maps = torch.cat(anomaly_maps, dim=0)\n", + "masks = torch.cat(masks, dim=0)\n", + "labels = torch.cat(labels, dim=0)\n", + "\n", + "# `pimo_result` has the PIMO curves of each image\n", + "# `aupimo_result` has the AUPIMO values\n", + "# i.e. their Area Under the Curve (AUC)\n", + "pimo_result, aupimo_result = aupimo.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Statistics and score distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MEAN\n", + "aupimo_result.aupimos[labels == 1].mean().item()=0.742841961578308\n", + "OTHER STATISTICS\n", + "DescribeResult(nobs=92, minmax=(0.0, 1.0), mean=0.742841961578308, variance=0.08757792704451817, skewness=-0.9285678601866055, kurtosis=-0.3299211772047075)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# the normal images have `nan` values because\n", + "# recall is not defined for them so we ignore them\n", + "print(f\"MEAN\\n{aupimo_result.aupimos[labels == 1].mean().item()=}\")\n", + "print(f\"OTHER STATISTICS\\n{stats.describe(aupimo_result.aupimos[labels == 1])}\")\n", + "\n", + "fig, ax = plt.subplots()\n", + "ax.hist(aupimo_result.aupimos[labels == 1].numpy(), bins=np.linspace(0, 1, 11), edgecolor=\"black\")\n", + "ax.set_ylabel(\"Count (number of images)\")\n", + "ax.set_xlim(0, 1)\n", + "ax.set_xlabel(\"AUPIMO [%]\")\n", + "ax.xaxis.set_major_formatter(PercentFormatter(1))\n", + "ax.grid()\n", + "ax.set_title(\"AUPIMO distribution\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Until here we just reproduded the notebook with the basic usage of AUPIMO." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Selecting Representative Samples for Qualitative Analysis\n", + "\n", + "Instead of cherry picking or inspecting the 92 samples from above, we'll try to choose them smartly.\n", + "\n", + "Our goal here is to select a handful of samples in a meaningful way.\n", + "\n", + "> Notice that a random selection from the distribution above would probably miss the worst cases.\n", + "\n", + "We will summarize this distribution with a boxplot, then select the samples corresponding to the statistics in it." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(7, 2))\n", + "boxplot_data = ax.boxplot(\n", + " aupimo_result.aupimos[labels == 1].numpy(),\n", + " vert=False,\n", + " widths=0.4,\n", + ")\n", + "_ = ax.set_yticks([])\n", + "ax.set_xlim(0 - (eps := 2e-2), 1 + eps)\n", + "ax.xaxis.set_major_formatter(PercentFormatter(1))\n", + "ax.set_xlabel(\"AUPIMO [%]\")\n", + "ax.set_title(\"AUPIMO Scores Boxplot\")\n", + "num_images = (labels == 1).sum().item()\n", + "ax.annotate(\n", + " text=f\"Number of images: {num_images}\",\n", + " xy=(0.03, 0.95),\n", + " xycoords=\"axes fraction\",\n", + " xytext=(0, 0),\n", + " textcoords=\"offset points\",\n", + " annotation_clip=False,\n", + " verticalalignment=\"top\",\n", + ")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To get the values in the boxplot (e.g., whiskers, quartiles, etc.), we're going to use `matplotlib`'s internal function `mpl.cbook.boxplot_stats()`." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['mean', 'iqr', 'cilo', 'cihi', 'whishi', 'whislo', 'fliers', 'q1', 'med', 'q3'])\n" + ] + } + ], + "source": [ + "boxplot_data = mpl.cbook.boxplot_stats(aupimo_result.aupimos[labels == 1].numpy())[0]\n", + "print(boxplot_data.keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll select 5 of those and find images in the dataset that match them." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " statistic value image_index\n", + "0 whislo 0.00 65\n", + "1 q1 0.53 58\n", + "2 med 0.89 63\n", + "3 q3 1.00 22\n", + "4 whishi 1.00 0\n" + ] + } + ], + "source": [ + "image_selection = []\n", + "\n", + "for key in [\"whislo\", \"q1\", \"med\", \"q3\", \"whishi\"]:\n", + " value = boxplot_data[key]\n", + " # find the image that is closest to the value of the statistic\n", + " # `[labels == 1]` is not used here so that the image's\n", + " # indexes are the same as the ones in the dataset\n", + " # we use `sort()` -- instead of `argmin()` -- so that\n", + " # the `nan`s are not considered (they are at the end)\n", + " closest_image_index = (aupimo_result.aupimos - value).abs().argsort()[0]\n", + " image_selection.append({\"statistic\": key, \"value\": value, \"image_index\": closest_image_index.item()})\n", + "\n", + "image_selection = pd.DataFrame(image_selection)\n", + "print(image_selection)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that they are sorted from the worst to the best AUPIMO score." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualizing the Representative Samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's visualize what the heatmaps of these samples." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# will be used to normalize the anomaly maps to fit a colormap\n", + "global_vmin, global_vmax = torch.quantile(anomaly_maps, torch.tensor([0.02, 0.98]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(2, 5, figsize=(16, 7), layout=\"constrained\")\n", + "\n", + "for ax_column, (_, row) in zip(axes.T, image_selection.iterrows(), strict=False):\n", + " ax_above, ax_below = ax_column\n", + " image = cv2.resize(read_image(image_paths[row.image_index]), (256, 256))\n", + " anomaly_map = anomaly_maps[row.image_index].numpy()\n", + " mask = masks[row.image_index].squeeze().numpy()\n", + " ax_above.imshow(image)\n", + " ax_above.contour(mask, levels=[0.5], colors=\"magenta\", linewidths=1)\n", + " ax_below.imshow(image)\n", + " ax_below.imshow(anomaly_map, cmap=\"jet\", vmin=global_vmin, vmax=global_vmax, alpha=0.30)\n", + " ax_below.contour(mask, levels=[0.5], colors=\"magenta\", linewidths=1)\n", + " ax_above.set_title(f\"{row.statistic}: {row.value:.0%} AUPIMO image {row.image_index}\")\n", + "\n", + "for ax in axes.flatten():\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "\n", + "axes[0, 0].set_ylabel(\"Image + GT Mask\")\n", + "axes[1, 0].set_ylabel(\"Image + GT Mask + Anomaly Map\")\n", + "fig.text(\n", + " 0.03,\n", + " -0.01,\n", + " \"Magenta: contours of the ground truth (GT) mask. \"\n", + " \"Anomaly maps colored in JET colormap with global (across all images) min-max normalization.\",\n", + " ha=\"left\",\n", + " va=\"top\",\n", + " fontsize=\"small\",\n", + " color=\"dimgray\",\n", + ")\n", + "\n", + "fig.suptitle(\"Anomalous samples from AUPIMO boxplot's statistics\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The heatmaps give the impression that all samples are properly detected, right?\n", + "\n", + "Notice that the lowest AUPIMO (left) is 0, but the heatmap is (contradictorily) showing a good detection.\n", + "\n", + "Why is that?\n", + "\n", + "These heatmaps are colored with a gradient from the minimum to the maximum value in all the heatmaps from the test set.\n", + "\n", + "This is not taking into account the contraints (FPR restriction) in AUPIMO.\n", + "\n", + "Let's compare with the heatmaps from some normal images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(2, 5, figsize=(16, 7), layout=\"constrained\")\n", + "\n", + "# random selection of normal images\n", + "rng = np.random.default_rng(42)\n", + "normal_images_selection = rng.choice(np.where(labels == 0)[0], size=5, replace=False)\n", + "\n", + "for ax_column, index in zip(axes.T, normal_images_selection, strict=False):\n", + " ax_above, ax_below = ax_column\n", + " image = cv2.resize(read_image(image_paths[index]), (256, 256))\n", + " anomaly_map = anomaly_maps[index].numpy()\n", + " ax_above.imshow(image)\n", + " ax_below.imshow(image)\n", + " ax_below.imshow(anomaly_map, cmap=\"jet\", vmin=global_vmin, vmax=global_vmax, alpha=0.30)\n", + " ax_above.set_title(f\"image {index}\")\n", + "\n", + "for ax in axes.flatten():\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "\n", + "axes[0, 0].set_ylabel(\"Image\")\n", + "axes[1, 0].set_ylabel(\"Image + Anomaly Map\")\n", + "fig.text(\n", + " 0.03,\n", + " -0.01,\n", + " \"Anomaly maps colored in JET colormap with global (across all images) min-max normalization.\",\n", + " ha=\"left\",\n", + " va=\"top\",\n", + " fontsize=\"small\",\n", + " color=\"dimgray\",\n", + ")\n", + "\n", + "fig.suptitle(\"Normal samples (test set)\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how the normal images also have high anomaly scores (\"hot\" colors) although there is no anomaly.\n", + "\n", + "As a matter of fact, the heatmaps can barely differentiate between some normal and anomalous images.\n", + "\n", + "See the two heatmaps below for instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(1, 2, figsize=(7, 4), layout=\"constrained\")\n", + "\n", + "for ax, index in zip(axes.flatten(), [87, 65], strict=False):\n", + " image = cv2.resize(read_image(image_paths[index]), (256, 256))\n", + " anomaly_map = anomaly_maps[index].numpy()\n", + " mask = masks[index].squeeze().numpy()\n", + " ax.imshow(image)\n", + " ax.contour(mask, levels=[0.5], colors=\"magenta\", linewidths=1)\n", + " ax.imshow(anomaly_map, cmap=\"jet\", vmin=global_vmin, vmax=global_vmax, alpha=0.30)\n", + " ax.set_title(f\"image {index}\")\n", + "\n", + "for ax in axes.flatten():\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "\n", + "axes[0].set_title(f\"{axes[0].get_title()} (normal)\")\n", + "axes[1].set_title(f\"{axes[1].get_title()} (anomalous)\")\n", + "\n", + "fig.text(\n", + " 0.03,\n", + " -0.01,\n", + " \"Magenta: contours of the ground truth (GT) mask.\\n\"\n", + " \"Anomaly maps colored in JET colormap with global (across all images) min-max normalization.\",\n", + " ha=\"left\",\n", + " va=\"top\",\n", + " fontsize=\"small\",\n", + " color=\"dimgray\",\n", + ")\n", + "\n", + "fig.suptitle(\"Normal vs. Anomalous Samples\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "One would expect image 65 (anomalous) to a 'hotter' heatmap than image 87 (normal), but it is the opposite.\n", + "\n", + "This shows that the model is not doing a great job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualizing the AUPIMO on the Heatmaps\n", + "\n", + "We will create another visualization to link the heatmaps to AUPIMO.\n", + "\n", + "Recall that AUPIMO computes this integral (simplified):\n", + "\n", + "$$\n", + " \\int_{\\log(L)}^{\\log(U)} \n", + " \\operatorname{TPR}^{i}\\left( \\operatorname{FRP^{-1}}( z ) \\right)\n", + " \\, \n", + " \\mathrm{d}\\log(z) \n", + "$$\n", + "\n", + "The integration bounds -- $L$[ower] and $U$[pper] -- are FPR values.\n", + "\n", + "> More details about their meaning in the next notebook.\n", + "\n", + "We will leverage these two bounds to create a heatmap that shows them in a gradient like this:\n", + "\n", + "![Visualization of AUPIMO on the heatmaps](./pimo_viz.svg)\n", + "\n", + "If the anomaly score is\n", + "1. too low (below the lowest threshold of AUPIMO) $\\rightarrow$ not shown; \n", + "2. between the bounds $\\rightarrow$ shown in a JET gradient;\n", + "3. too high (above the highest threshold of AUPIMO) $\\rightarrow$ shown in a single color.\n", + "\n", + "> Technical detail: lower/upper bound of FPR correspond to the upper/lower bound of threshold.\n", + "\n", + "> **Why low values are not shown?**\n", + ">\n", + "> Because the values below the lower (threshold) bound would _never_ be seen as \"anomalous\" by the metric.\n", + ">\n", + "> Analogously, high values are shown in red because they are _always_ seen as \"anomalous\" by the metric." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "FPR bounds\n", + "Lower bound: 0.00001\n", + "Upper bound: 0.00010\n", + "Thresholds corresponding to the FPR bounds\n", + "Lower threshold: 0.504\n", + "Upper threshold: 0.553\n" + ] + } + ], + "source": [ + "# the fpr bounds are fixed in advance in the metric object\n", + "print(f\"\"\"FPR bounds\n", + "Lower bound: {aupimo.fpr_bounds[0]:.5f}\n", + "Upper bound: {aupimo.fpr_bounds[1]:.5f}\"\"\")\n", + "\n", + "# their corresponding thresholds depend on the model's behavior\n", + "# so they only show in the result object\n", + "print(f\"\"\"Thresholds corresponding to the FPR bounds\n", + "Lower threshold: {aupimo_result.thresh_lower_bound:.3g}\n", + "Upper threshold: {aupimo_result.thresh_upper_bound:.3g}\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# we re-sample other normal images\n", + "# the FPR bounds are so strict that the heatmaps in the normal images\n", + "# become almost invisible with this colormap\n", + "max_anom_score_per_image = anomaly_maps.max(dim=2).values.max(dim=1).values # noqa: PD011\n", + "normal_images_with_highest_max_score = sorted(\n", + " zip(max_anom_score_per_image[labels == 0], torch.where(labels == 0)[0], strict=False),\n", + " reverse=True,\n", + " key=lambda x: x[0],\n", + ")\n", + "normal_images_with_highest_max_score = [idx.item() for _, idx in normal_images_with_highest_max_score[:5]]\n", + "\n", + "fig, axes = plt.subplots(2, 5, figsize=(16, 7), layout=\"constrained\")\n", + "\n", + "for ax, (_, row) in zip(axes[0], image_selection.iterrows(), strict=False):\n", + " image = cv2.resize(read_image(image_paths[row.image_index]), (256, 256))\n", + " anomaly_map = anomaly_maps[row.image_index].numpy()\n", + " mask = masks[row.image_index].squeeze().numpy()\n", + " ax.imshow(image)\n", + " #\n", + " # where the magic happens!\n", + " #\n", + " ax.imshow(\n", + " # anything below the lower threshold is set to `nan` so it's not shown\n", + " # because such values would never be detected as anomalies with AUPIMO's contraints\n", + " np.where(anomaly_map < aupimo_result.thresh_lower_bound, np.nan, anomaly_map),\n", + " cmap=\"jet\",\n", + " alpha=0.50,\n", + " # notice that vmin/vmax changed here to use the thresholds from the result object\n", + " vmin=aupimo_result.thresh_lower_bound,\n", + " vmax=aupimo_result.thresh_upper_bound,\n", + " )\n", + " ax.contour(anomaly_map, levels=[aupimo_result.thresh_lower_bound], colors=[\"blue\"], linewidths=1)\n", + " ax.contour(mask, levels=[0.5], colors=\"magenta\", linewidths=1)\n", + " ax.set_title(f\"{row.statistic}: {row.value:.0%}AUPIMO image {row.image_index}\")\n", + "\n", + "for ax, index in zip(axes[1], normal_images_with_highest_max_score, strict=False):\n", + " image = cv2.resize(read_image(image_paths[index]), (256, 256))\n", + " anomaly_map = anomaly_maps[index].numpy()\n", + " mask = masks[index].squeeze().numpy()\n", + " ax.imshow(image)\n", + " ax.imshow(\n", + " np.where(anomaly_map < aupimo_result.thresh_lower_bound, np.nan, anomaly_map),\n", + " cmap=\"jet\",\n", + " alpha=0.30,\n", + " vmin=aupimo_result.thresh_lower_bound,\n", + " vmax=aupimo_result.thresh_upper_bound,\n", + " )\n", + " ax.contour(anomaly_map, levels=[aupimo_result.thresh_lower_bound], colors=[\"blue\"], linewidths=1)\n", + " ax.set_title(f\"image {index}\")\n", + "\n", + "for ax in axes.flatten():\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "\n", + "axes[0, 0].set_ylabel(\"Anomalous\")\n", + "axes[1, 0].set_ylabel(\"Normal\")\n", + "fig.text(\n", + " 0.03,\n", + " -0.01,\n", + " \"Magenta: contours of the ground truth (GT) mask. \"\n", + " \"Anomaly maps colored in JET colormap between the thresholds in AUPIMO's integral. \"\n", + " \"Lower values are transparent, higher values are red.\",\n", + " ha=\"left\",\n", + " va=\"top\",\n", + " fontsize=\"small\",\n", + " color=\"dimgray\",\n", + ")\n", + "\n", + "fig.suptitle(\"Visualization linked to AUPIMO's bounds\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now the AUPIMO scores make sense with what you see in the heatmaps.\n", + "\n", + "The samples on the left and right are special cases: \n", + "- left (0% AUPIMO): nothing is seen because the model completely misses the anomaly\\*;\n", + "- right (100% AUPIMO): is practically red only because the detected the anomaly very well. \n", + "\n", + "\\* Because the scores in image 65 are as low as those in normal images." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cite Us\n", + "\n", + "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "\n", + "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "\n", + "```bibtex\n", + "@misc{bertoldo2024aupimo,\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", + " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " year={2024},\n", + " eprint={2401.01984},\n", + " archivePrefix={arXiv},\n", + " primaryClass={cs.CV},\n", + " url={https://arxiv.org/abs/2401.01984}, \n", + "}\n", + "```\n", + "\n", + "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", + "\n", + "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Utils\n", + "\n", + "Here we provide some utility functions to reproduce the techniques shown in this notebook.\n", + "\n", + "They are `numpy` compatible and cover edge cases not discussed here (check the examples)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Representative samples from the boxplot's statistics\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from numpy import ndarray\n", + "from torch import Tensor\n", + "\n", + "\n", + "def _validate_tensor_or_ndarray(x: Tensor | ndarray) -> ndarray:\n", + " if not isinstance(x, Tensor | ndarray):\n", + " msg = f\"Expected argument to be a tensor or ndarray, but got {type(x)}.\"\n", + " raise TypeError(msg)\n", + "\n", + " if isinstance(x, Tensor):\n", + " x = x.cpu().numpy()\n", + "\n", + " return x\n", + "\n", + "\n", + "def _validate_values(values: ndarray) -> None:\n", + " if values.ndim != 1:\n", + " msg = f\"Expected argument `values` to be a 1D, but got {values.ndim}D.\"\n", + " raise ValueError(msg)\n", + "\n", + "\n", + "def _validate_labels(labels: ndarray) -> ndarray:\n", + " if labels.ndim != 1:\n", + " msg = f\"Expected argument `labels` to be a 1D, but got {labels.ndim}D.\"\n", + " raise ValueError(msg)\n", + "\n", + " # if torch.is_floating_point(labels):\n", + " if np.issubdtype(labels.dtype, np.floating):\n", + " msg = f\"Expected argument `labels` to be of int or binary types, but got float: {labels.dtype}.\"\n", + " raise TypeError(msg)\n", + "\n", + " # check if it is binary and convert to int\n", + " if np.issubdtype(labels.dtype, np.bool_):\n", + " labels = labels.astype(int)\n", + "\n", + " unique_values = np.unique(labels)\n", + " nor_0_nor_1 = (unique_values != 0) & (unique_values != 1)\n", + " if nor_0_nor_1.any():\n", + " msg = f\"Expected argument `labels` to have 0s and 1s as ground truth labels, but got values {unique_values}.\"\n", + " raise ValueError(msg)\n", + "\n", + " return labels\n", + "\n", + "\n", + "def boxplot_stats(\n", + " values: Tensor | ndarray,\n", + " labels: Tensor | ndarray,\n", + " only_label: int | None = 1,\n", + " flier_policy: str | None = None,\n", + " repeated_policy: str | None = \"avoid\",\n", + ") -> list[dict[str, str | int | float | None]]:\n", + " \"\"\"Compute boxplot statistics of `values` and find the samples that are closest to them.\n", + "\n", + " This function uses `matplotlib.cbook.boxplot_stats`, which is the same function used by `matplotlib.pyplot.boxplot`.\n", + "\n", + " Args:\n", + " values (Tensor | ndarray): Values to compute boxplot statistics from.\n", + " labels (Tensor | ndarray): Labels of the samples (0=normal, 1=anomalous). Must have the same shape as `values`.\n", + " only_label (int | None): If 0 or 1, only use samples of that class. If None, use both. Defaults to 1.\n", + " flier_policy (str | None): What happens with the fliers ('outliers')?\n", + " - None: Do not include fliers.\n", + " - 'high': Include only high fliers.\n", + " - 'low': Include only low fliers.\n", + " - 'both': Include both high and low fliers.\n", + " Defaults to None.\n", + " repeated_policy (str | None): What happens if a sample has already selected [for another statistic]?\n", + " - None: Don't care, repeat the sample.\n", + " - 'avoid': Avoid selecting the same one, go to the next closest.\n", + " Defaults to 'avoid'.\n", + "\n", + " Returns:\n", + " list[dict[str, str | int | float | None]]: List of boxplot statistics.\n", + " Keys:\n", + " - 'statistic' (str): Name of the statistic.\n", + " - 'value' (float): Value of the statistic (same units as `values`).\n", + " - 'nearest' (float): Value of the sample in `values` that is closest to the statistic.\n", + " Some statistics (e.g. 'mean') are not guaranteed to be a value in `values`.\n", + " This value is the actual one when they that is the case.\n", + " - 'index': Index in `values` that has the `nearest` value to the statistic.\n", + " \"\"\"\n", + " # operate on numpy arrays only for simplicity\n", + " values = _validate_tensor_or_ndarray(values) # (N,)\n", + " labels = _validate_tensor_or_ndarray(labels) # (N,)\n", + "\n", + " # validate the arguments\n", + " _validate_values(values)\n", + " labels = _validate_labels(labels)\n", + " if values.shape != labels.shape:\n", + " msg = (\n", + " \"Expected arguments `values` and `labels` to have the same shape, \"\n", + " f\"but got {values.shape=} and {labels.shape=}.\"\n", + " )\n", + " raise ValueError(msg)\n", + " assert only_label in {None, 0, 1}, f\"Invalid argument `only_label`: {only_label}\"\n", + " assert flier_policy in {None, \"high\", \"low\", \"both\"}, f\"Invalid argument `flier_policy`: {flier_policy}\"\n", + " assert repeated_policy in {None, \"avoid\"}, f\"Invalid argument `repeated_policy`: {repeated_policy}\"\n", + "\n", + " if only_label is not None and only_label not in labels:\n", + " msg = f\"Argument {only_label=} but `labels` does not contain this class.\"\n", + " raise ValueError(msg)\n", + "\n", + " # only consider samples of the given label\n", + " # `values` and `labels` now have shape (n,) instead of (N,), where n <= N\n", + " label_filter_mask = (labels == only_label) if only_label is not None else np.ones_like(labels, dtype=bool)\n", + " values = values[label_filter_mask] # (n,)\n", + " labels = labels[label_filter_mask] # (n,)\n", + " indexes = np.nonzero(label_filter_mask)[0] # (n,) values are indices in {0, 1, ..., N-1}\n", + "\n", + " indexes_selected = set() # values in {0, 1, ..., N-1}\n", + "\n", + " def append(records_: dict, statistic_: str, value_: float) -> None:\n", + " indices_sorted_by_distance = np.abs(values - value_).argsort() # (n,)\n", + " candidate = indices_sorted_by_distance[0] # idx that refers to {0, 1, ..., n-1}\n", + "\n", + " nearest = values[candidate]\n", + " index = indexes[candidate] # index has value in {0, 1, ..., N-1}\n", + " label = labels[candidate]\n", + "\n", + " if index in indexes_selected and repeated_policy == \"avoid\":\n", + " for candidate in indices_sorted_by_distance:\n", + " index_of_candidate = indexes[candidate]\n", + " if index_of_candidate in indexes_selected:\n", + " continue\n", + " # if the code reaches here, it means that `index_of_candidate` is not repeated\n", + " # if this is never reached, the first choice will be kept\n", + " nearest = values[candidate]\n", + " label = labels[candidate]\n", + " index = index_of_candidate\n", + " break\n", + "\n", + " indexes_selected.add(index)\n", + "\n", + " records_.append(\n", + " {\n", + " \"statistic\": statistic_,\n", + " \"value\": float(value_),\n", + " \"nearest\": float(nearest),\n", + " \"index\": int(index),\n", + " \"label\": int(label),\n", + " },\n", + " )\n", + "\n", + " # function used in `matplotlib.boxplot`\n", + " boxplot_stats = mpl.cbook.boxplot_stats(values)[0] # [0] is for the only boxplot\n", + "\n", + " records = []\n", + " for stat, val in boxplot_stats.items():\n", + " if stat in {\"iqr\", \"cilo\", \"cihi\"}:\n", + " continue\n", + "\n", + " if stat != \"fliers\":\n", + " append(records, stat, val)\n", + " continue\n", + "\n", + " if flier_policy is None:\n", + " continue\n", + "\n", + " for val_ in val:\n", + " stat_ = \"flierhi\" if val_ > boxplot_stats[\"med\"] else \"flierlo\"\n", + " if flier_policy == \"high\" and stat_ == \"flierlo\":\n", + " continue\n", + " if flier_policy == \"low\" and stat_ == \"flierhi\":\n", + " continue\n", + " # else means that they match or `fliers == \"both\"`\n", + " append(records, stat_, val_)\n", + "\n", + " return sorted(records, key=lambda r: r[\"value\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " statistic value nearest index label\n", + "0 whislo 0.00 0.00 65 1\n", + "1 q1 0.53 0.53 58 1\n", + "2 mean 0.74 0.75 7 1\n", + "3 med 0.89 0.89 63 1\n", + "4 q3 1.00 1.00 22 1\n", + "5 whishi 1.00 1.00 0 1\n" + ] + } + ], + "source": [ + "# basic usage\n", + "boxplot_statistics = boxplot_stats(aupimo_result.aupimos, labels)\n", + "boxplot_statistics = pd.DataFrame.from_records(boxplot_statistics)\n", + "print(boxplot_statistics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Repeated Statistics" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " statistic value nearest index label\n", + "0 whislo 0.00 0.00 67 1\n", + "1 q1 0.59 0.59 58 1\n", + "2 mean 0.78 0.79 43 1\n", + "3 med 0.98 0.99 9 1\n", + "4 whishi 1.00 1.00 0 1\n", + "5 q3 1.00 1.00 36 1\n" + ] + } + ], + "source": [ + "# repeated values\n", + "# if the distribution is very skewed to one side,\n", + "# some statistics may have the same value\n", + "# e.g. the Q3 and the high whisker\n", + "#\n", + "# let's simulate this situation\n", + "\n", + "# increase all values by 10% and clip to [0, 1]\n", + "mock = torch.clip(aupimo_result.aupimos.clone() * 1.10, 0, 1)\n", + "\n", + "# 'avoid' is the default policy\n", + "# notice how Q3 and the high whisker have the same value, but different indexes\n", + "print(pd.DataFrame.from_records(boxplot_stats(mock, labels, repeated_policy=\"avoid\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " statistic value nearest index label\n", + "0 whislo 0.00 0.00 67 1\n", + "1 q1 0.59 0.59 58 1\n", + "2 mean 0.78 0.79 43 1\n", + "3 med 0.98 0.99 9 1\n", + "4 whishi 1.00 1.00 0 1\n", + "5 q3 1.00 1.00 0 1\n" + ] + } + ], + "source": [ + "# this behavior can be changed to allow repeated values\n", + "print(pd.DataFrame.from_records(boxplot_stats(mock, labels, repeated_policy=None)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fliers" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# fliers\n", + "# if the distribution is very skewed to one side,\n", + "# it is possible that some extreme values are considered\n", + "# are considered as outliers, showing as fliers in the boxplot\n", + "#\n", + "# there are two types of fliers: high and low\n", + "# they are defined as:\n", + "# - high: values > high whisker = Q3 + 1.5 * IQR\n", + "# - low: values < low whisker = Q1 - 1.5 * IQR\n", + "# where IQR = Q3 - Q1\n", + "\n", + "# let's artificially simulate this situation\n", + "# we will create a distortion in the values so that\n", + "# high values (close to 1) become even higher\n", + "# and low values (close to 0) become even lower\n", + "\n", + "\n", + "def distortion(vals: Tensor) -> Tensor:\n", + " \"\"\"Artificial distortion to simulate a skewed distribution.\n", + "\n", + " To visualize it:\n", + " ```\n", + " fig, ax = plt.subplots()\n", + " t = np.linspace(0, 1, 100)\n", + " ax.plot(t, np.clip(distortion(t), 0, 1), label=\"distortion\")\n", + " ax.plot(t, t, label=\"identity\", linestyle=\"--\")\n", + " fig\n", + " ```\n", + " \"\"\"\n", + " return vals + 0.12 * (vals * (1 - vals) * 4)\n", + "\n", + "\n", + "mock = torch.clip(distortion(aupimo_result.aupimos.clone()), 0, 1)\n", + "\n", + "fig, ax = plt.subplots(figsize=(7, 2))\n", + "ax.boxplot(\n", + " mock[labels == 1].numpy(),\n", + " vert=False,\n", + " widths=0.4,\n", + ")\n", + "_ = ax.set_yticks([])\n", + "ax.set_xlim(0 - (eps := 2e-2), 1 + eps)\n", + "ax.xaxis.set_major_formatter(PercentFormatter(1))\n", + "ax.set_xlabel(\"AUPIMO [%]\")\n", + "ax.set_title(\"AUPIMO Scores Boxplot\")\n", + "num_images = (labels == 1).sum().item()\n", + "ax.annotate(\n", + " text=f\"Number of images: {num_images}\",\n", + " xy=(0.03, 0.95),\n", + " xycoords=\"axes fraction\",\n", + " xytext=(0, 0),\n", + " textcoords=\"offset points\",\n", + " annotation_clip=False,\n", + " verticalalignment=\"top\",\n", + ")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " statistic value nearest index label\n", + "0 whislo 0.24 0.24 44 1\n", + "1 q1 0.65 0.65 58 1\n", + "2 mean 0.79 0.78 29 1\n", + "3 med 0.94 0.93 63 1\n", + "4 q3 1.00 1.00 22 1\n", + "5 whishi 1.00 1.00 0 1\n" + ] + } + ], + "source": [ + "# `None` is the default policy, so the fliers are not returned\n", + "print(pd.DataFrame.from_records(boxplot_stats(mock, labels, flier_policy=None)))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "with option 'low'\n", + " statistic value nearest index label\n", + "0 flierlo 0.00 0.00 65 1\n", + "1 flierlo 0.00 0.00 67 1\n", + "2 flierlo 0.01 0.01 71 1\n", + "3 flierlo 0.09 0.09 64 1\n", + "4 whislo 0.24 0.24 44 1\n", + "5 q1 0.65 0.65 58 1\n", + "6 mean 0.79 0.78 29 1\n", + "7 med 0.94 0.93 63 1\n", + "8 q3 1.00 1.00 22 1\n", + "9 whishi 1.00 1.00 0 1\n", + "with option 'both'\n", + " statistic value nearest index label\n", + "0 flierlo 0.00 0.00 65 1\n", + "1 flierlo 0.00 0.00 67 1\n", + "2 flierlo 0.01 0.01 71 1\n", + "3 flierlo 0.09 0.09 64 1\n", + "4 whislo 0.24 0.24 44 1\n", + "5 q1 0.65 0.65 58 1\n", + "6 mean 0.79 0.78 29 1\n", + "7 med 0.94 0.93 63 1\n", + "8 q3 1.00 1.00 22 1\n", + "9 whishi 1.00 1.00 0 1\n" + ] + } + ], + "source": [ + "# one can choose to include only high or low fliers, or both\n", + "# since there are only low fliers...\n", + "\n", + "# 'low' and 'both' will return the same result\n", + "print(\"with option 'low'\")\n", + "print(pd.DataFrame.from_records(boxplot_stats(mock, labels, flier_policy=\"low\")))\n", + "\n", + "print(\"with option 'both'\")\n", + "print(pd.DataFrame.from_records(boxplot_stats(mock, labels, flier_policy=\"both\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "with option 'high'\n", + " statistic value nearest index label\n", + "0 whislo 0.24 0.24 44 1\n", + "1 q1 0.65 0.65 58 1\n", + "2 mean 0.79 0.78 29 1\n", + "3 med 0.94 0.93 63 1\n", + "4 q3 1.00 1.00 22 1\n", + "5 whishi 1.00 1.00 0 1\n" + ] + } + ], + "source": [ + "# and 'high' will return no fliers (same as `flier_policy=None` in this case)\n", + "print(\"with option 'high'\")\n", + "print(pd.DataFrame.from_records(boxplot_stats(mock, labels, flier_policy=\"high\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other applications and `only_label` argument" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "stats for the maximum anomaly score in the anomaly maps\n", + " statistic value nearest index label\n", + "0 whislo 0.46 0.46 65 1\n", + "1 q1 0.63 0.63 48 1\n", + "2 med 0.70 0.71 10 1\n", + "3 mean 0.73 0.73 118 1\n", + "4 q3 0.81 0.81 115 1\n", + "5 whishi 1.00 1.00 22 1\n" + ] + } + ], + "source": [ + "# other applications\n", + "# since the function is agnostic to the meaning of the values\n", + "# we can also use it to find representative samples\n", + "# with another metric or signal\n", + "#\n", + "# in the last plot cell we used the maximum anomaly score per image\n", + "# to select normal images, so let's reuse that criterion here\n", + "\n", + "# recompute it for didactic purposes\n", + "max_anom_score_per_image = anomaly_maps.max(dim=2).values.max(dim=1).values # noqa: PD011\n", + "print(\"stats for the maximum anomaly score in the anomaly maps\")\n", + "print(pd.DataFrame.from_records(boxplot_stats(max_anom_score_per_image, labels)))\n", + "# notice that the indices are not the same as before" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " statistic value nearest index label\n", + "0 whislo 0.42 0.42 90 0\n", + "1 q1 0.43 0.43 80 0\n", + "2 med 0.45 0.45 105 0\n", + "3 mean 0.46 0.46 89 0\n", + "4 q3 0.48 0.48 75 0\n", + "5 whishi 0.52 0.52 95 0\n" + ] + } + ], + "source": [ + "# we can also use the `only_label` argument to select only the\n", + "# samples from the normal class\n", + "print(pd.DataFrame.from_records(boxplot_stats(max_anom_score_per_image, labels, only_label=0)))\n", + "# notice the labels are all 0 now" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " statistic value nearest index label\n", + "0 whislo 0.42 0.42 90 0\n", + "1 q1 0.52 0.52 95 0\n", + "2 med 0.65 0.65 17 1\n", + "3 mean 0.66 0.66 45 1\n", + "4 q3 0.77 0.77 108 1\n", + "5 whishi 1.00 1.00 22 1\n" + ] + } + ], + "source": [ + "# or we can consider data from both classes (`None` option)\n", + "print(pd.DataFrame.from_records(boxplot_stats(max_anom_score_per_image, labels, only_label=None)))\n", + "# notice that the labels are mixed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cite Us\n", + "\n", + "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "\n", + "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "\n", + "```bibtex\n", + "@misc{bertoldo2024aupimo,\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", + " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " year={2024},\n", + " eprint={2401.01984},\n", + " archivePrefix={arXiv},\n", + " primaryClass={cs.CV},\n", + " url={https://arxiv.org/abs/2401.01984}, \n", + "}\n", + "```\n", + "\n", + "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", + "\n", + "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "anomalib-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/700_metrics/pimo_viz.svg b/notebooks/700_metrics/pimo_viz.svg new file mode 100644 index 0000000000..962c95f463 --- /dev/null +++ b/notebooks/700_metrics/pimo_viz.svg @@ -0,0 +1,619 @@ + + + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +PIMO + + + + + +i + + + + + +AUPIMO + + + + + +i + + +Recall(t) + + +Upper bound + + +Lower bound + + +FPR(t) + + +β‡’ + +Recall(t) + +Upper bound + +Lower bound + +t [anomaly score threholds] + +Transparent(never detected as anomalous) + +RED(always detectedas anomalous) + +JET(AUPIMO range) + + diff --git a/notebooks/README.md b/notebooks/README.md index 36976a6855..2f93aa5c8c 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -51,3 +51,10 @@ To install Python, Git and other required tools, [OpenVINO Notebooks](https://gi | ---------------------- | ------------------------------------------------------------------------------------------------------------- | ----- | | Dobot Dataset Creation | [501a_training](/notebooks/500_use_cases/501_dobot/501a_training_a_model_with_cubes_from_a_robotic_arm.ipynb) | | | Training | [501b_training](/notebooks/500_use_cases/501_dobot/501b_inference_with_a_robotic_arm.ipynb) | | + +## 7. Metrics + +| Notebook | GitHub | Colab | +| ----------------------------------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| AUPIMO basics | [701a_aupimo](/notebooks/700_metrics/701a_aupimo.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701a_aupimo.ipynb) | +| AUPIMO representative samples and visualization | [701b_aupimo_advanced_i](/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | From f1c734f6c67085fcefe4efd3f9b940399b270416 Mon Sep 17 00:00:00 2001 From: Joao P C Bertoldo <24547377+jpcbertoldo@users.noreply.github.com> Date: Wed, 9 Oct 2024 13:23:54 +0200 Subject: [PATCH 08/32] Pimo tutorials/02 advanced ii (#2347) * uset all padim features to make it deterministic Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add aupimo notebook advanced i Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * update readme Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * modify changelog Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct readme Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct again Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * minor corrections Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add aupimo notebook advanced ii (pimo curve and integration bounds) Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * fix links Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * correct change log Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --------- Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --- CHANGELOG.md | 5 +- notebooks/700_metrics/701a_aupimo.ipynb | 2 +- .../700_metrics/701b_aupimo_advanced_i.ipynb | 4 +- .../700_metrics/701c_aupimo_advanced_ii.ipynb | 936 ++++++++++++++++++ notebooks/README.md | 9 +- 5 files changed, 947 insertions(+), 9 deletions(-) create mode 100644 notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb diff --git a/CHANGELOG.md b/CHANGELOG.md index e0e0cc955e..340641fb7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added +- Add `AUPIMO` tutorials notebooks in https://github.com/openvinotoolkit/anomalib/pull/2330 and https://github.com/openvinotoolkit/anomalib/pull/2336 +- Add `AUPIMO` metric by [jpcbertoldo](https://github.com/jpcbertoldo) in https://github.com/openvinotoolkit/anomalib/pull/1726 and refactored by [ashwinvaidya17](https://github.com/ashwinvaidya17) in https://github.com/openvinotoolkit/anomalib/pull/2329 + ### Changed ### Deprecated @@ -120,8 +123,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added -- Add `AUPIMO` tutorials notebooks in https://github.com/openvinotoolkit/anomalib/pull/2330 and https://github.com/openvinotoolkit/anomalib/pull/2336 -- Add `AUPIMO` metric by [jpcbertoldo](https://github.com/jpcbertoldo) in https://github.com/openvinotoolkit/anomalib/pull/1726 and refactored by [ashwinvaidya17](https://github.com/ashwinvaidya17) in https://github.com/openvinotoolkit/anomalib/pull/2329 - Add requirements into `pyproject.toml` & Refactor anomalib install `get_requirements` by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/1808 ### Changed diff --git a/notebooks/700_metrics/701a_aupimo.ipynb b/notebooks/700_metrics/701a_aupimo.ipynb index c6831fd1f7..5c5497b3b8 100644 --- a/notebooks/700_metrics/701a_aupimo.ipynb +++ b/notebooks/700_metrics/701a_aupimo.ipynb @@ -127,7 +127,7 @@ "We will use dataset Leather from MVTec AD. \n", "\n", "> See the notebooks below for more details on datamodules. \n", - "> [github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules]((https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules))" + "> [github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules](https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules)" ] }, { diff --git a/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb b/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb index 37876e5bf6..a785075060 100644 --- a/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb +++ b/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb @@ -143,13 +143,13 @@ "source": [ "# Basics\n", "\n", - "This part was covered in the notebook [701a_aupimo.ipynb](701a_aupimo.ipynb), so we'll not discuss it here.\n", + "This part was covered in the notebook [701a_aupimo.ipynb](./701a_aupimo.ipynb), so we'll not discuss it here.\n", "\n", "It will train a model and evaluate it using AUPIMO.\n", "We will use dataset Leather from MVTec AD with `PaDiM` (performance is not the best, but it is fast to train).\n", "\n", "> See the notebooks below for more details on:\n", - "> - datamodules: [100_datamodules]((https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules));\n", + "> - datamodules: [100_datamodules](https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules);\n", "> - models: [200_models](https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/200_models)." ] }, diff --git a/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb b/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb new file mode 100644 index 0000000000..ed647ef666 --- /dev/null +++ b/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb @@ -0,0 +1,936 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AUPIMO\n", + "\n", + "Advance use cases of the metric AUPIMO (pronounced \"a-u-pee-mo\").\n", + "\n", + "> For basic usage, please check the notebook [701a_aupimo.ipynb](./701a_aupimo.ipynb).\n", + "\n", + "Includes:\n", + "- visualization of the PIMO curve\n", + "- theoretical AUPIMO of a random classifier (\"baseline\")\n", + "- understanding the x-axis (FPR) bounds\n", + "- customizing the x-axis (FPR) bounds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# What is AUPIMO?\n", + "\n", + "The `Area Under the Per-Image Overlap [curve]` (AUPIMO) is a metric of recall (higher is better) designed for visual anomaly detection.\n", + "\n", + "Inspired by the [ROC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) and [PRO](https://link.springer.com/article/10.1007/s11263-020-01400-4) curves, \n", + "\n", + "> AUPIMO is the area under a curve of True Positive Rate (TPR or _recall_) as a function of False Positive Rate (FPR) restricted to a fixed range. \n", + "\n", + "But:\n", + "- the TPR (Y-axis) is *per-image* (1 image = 1 curve/score);\n", + "- the FPR (X-axis) considers the (average of) **normal** images only; \n", + "- the FPR (X-axis) is in log scale and its range is [1e-5, 1e-4]\\* (harder detection task!).\n", + "\n", + "\\* The score (the area under the curve) is normalized to be in [0, 1].\n", + "\n", + "AUPIMO can be interpreted as\n", + "\n", + "> average segmentation recall in an image given that the model (nearly) does not yield false positives in normal images.\n", + "\n", + "References in the last cell.\n", + "\n", + "![AUROC vs. AUPRO vs. AUPIMO](./roc_pro_pimo.svg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install `anomalib` using `pip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO(jpcbertoldo): replace by `pip install anomalib` when AUPIMO is released # noqa: TD003\n", + "%pip install ../.." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change the directory to have access to the datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "# NOTE: Provide the path to the dataset root directory.\n", + "# If the datasets is not downloaded, it will be downloaded\n", + "# to this directory.\n", + "dataset_root = Path.cwd().parent.parent / \"datasets\" / \"MVTec\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "import torch\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.axes import Axes\n", + "from matplotlib.ticker import FixedLocator, PercentFormatter\n", + "from numpy import ndarray\n", + "from scipy import stats\n", + "from torch import Tensor\n", + "\n", + "from anomalib import TaskType\n", + "from anomalib.data import MVTec\n", + "from anomalib.data.utils import read_image\n", + "from anomalib.engine import Engine\n", + "from anomalib.metrics import AUPIMO\n", + "from anomalib.models import Padim" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Basics\n", + "\n", + "This part was covered in the notebook [701a_aupimo.ipynb](./701a_aupimo.ipynb), so we'll not discuss it here.\n", + "\n", + "It will train a model and evaluate it using AUPIMO.\n", + "We will use dataset Leather from MVTec AD with `PaDiM` (performance is not the best, but it is fast to train).\n", + "\n", + "> See the notebooks below for more details on:\n", + "> - datamodules: [100_datamodules](https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/100_datamodules);\n", + "> - models: [200_models](https://github.com/openvinotoolkit/anomalib/tree/main/notebooks/200_models)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# train the model\n", + "task = TaskType.SEGMENTATION\n", + "datamodule = MVTec(\n", + " root=dataset_root,\n", + " category=\"leather\",\n", + " image_size=256,\n", + " train_batch_size=32,\n", + " eval_batch_size=32,\n", + " num_workers=8,\n", + " task=task,\n", + ")\n", + "model = Padim(\n", + " # only use one layer to speed it up\n", + " layers=[\"layer1\"],\n", + " n_features=64,\n", + " backbone=\"resnet18\",\n", + " pre_trained=True,\n", + ")\n", + "engine = Engine(\n", + " pixel_metrics=\"AUPIMO\", # others can be added\n", + " accelerator=\"auto\", # \\<\"cpu\", \"gpu\", \"tpu\", \"ipu\", \"hpu\", \"auto\">,\n", + " devices=1,\n", + " logger=False,\n", + ")\n", + "engine.fit(datamodule=datamodule, model=model)\n", + "# infer\n", + "predictions = engine.predict(dataloaders=datamodule.test_dataloader(), model=model, return_predictions=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compute AUPIMO" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Metric `AUPIMO` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.\n" + ] + } + ], + "source": [ + "aupimo = AUPIMO(\n", + " # with `False` all the values are returned in a dataclass\n", + " return_average=False,\n", + ")\n", + "\n", + "anomaly_maps = []\n", + "masks = []\n", + "labels = []\n", + "image_paths = []\n", + "for batch in predictions:\n", + " anomaly_maps.append(batch_anomaly_maps := batch[\"anomaly_maps\"].squeeze(dim=1))\n", + " masks.append(batch_masks := batch[\"mask\"])\n", + " labels.append(batch[\"label\"])\n", + " image_paths.append(batch[\"image_path\"])\n", + " aupimo.update(anomaly_maps=batch_anomaly_maps, masks=batch_masks)\n", + "\n", + "# list[list[str]] -> list[str]\n", + "image_paths = [item for sublist in image_paths for item in sublist]\n", + "anomaly_maps = torch.cat(anomaly_maps, dim=0)\n", + "masks = torch.cat(masks, dim=0)\n", + "labels = torch.cat(labels, dim=0)\n", + "\n", + "# `pimo_result` has the PIMO curves of each image\n", + "# `aupimo_result` has the AUPIMO values\n", + "# i.e. their Area Under the Curve (AUC)\n", + "pimo_result, aupimo_result = aupimo.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Statistics and score distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MEAN\n", + "aupimo_result.aupimos[labels == 1].mean().item()=0.742841961578308\n", + "OTHER STATISTICS\n", + "DescribeResult(nobs=92, minmax=(0.0, 1.0), mean=0.742841961578308, variance=0.08757792704451818, skewness=-0.9285678601866053, kurtosis=-0.3299211772047079)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# the normal images have `nan` values because\n", + "# recall is not defined for them so we ignore them\n", + "print(f\"MEAN\\n{aupimo_result.aupimos[labels == 1].mean().item()=}\")\n", + "print(f\"OTHER STATISTICS\\n{stats.describe(aupimo_result.aupimos[labels == 1])}\")\n", + "\n", + "fig, ax = plt.subplots()\n", + "ax.hist(aupimo_result.aupimos[labels == 1].numpy(), bins=np.linspace(0, 1, 11), edgecolor=\"black\")\n", + "ax.set_ylabel(\"Count (number of images)\")\n", + "ax.set_xlim(0, 1)\n", + "ax.set_xlabel(\"AUPIMO [%]\")\n", + "ax.xaxis.set_major_formatter(PercentFormatter(1))\n", + "ax.grid()\n", + "ax.set_title(\"AUPIMO distribution\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Until here we just reproduded the notebook with the basic usage of AUPIMO." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The PIMO curve \n", + "\n", + "We'll select a bunch of images to visualize the PIMO curves.\n", + "\n", + "To make sure we have best and worst detection examples, we'll use the representative samples selected in the previous notebook ([701b_aupimo_advanced_i.ipynb](./701b_aupimo_advanced_i.ipynb)).\n", + "\n", + "> Note the FPR (X-axis) is the average (in-image) FPR of the normal images in the test set. We'll note it as `FPRn`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# representative samples (in terms of the AUPIMO value)\n", + "# from lowest to highest AUPIMO score\n", + "samples = [65, 7, 58, 63, 22]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def fmt_pow10(value: float) -> str:\n", + " \"\"\"Format the power of 10.\"\"\"\n", + " return \"1\" if value == 1 else f\"$10^{{{int(np.log10(value))}}}$\"\n", + "\n", + "\n", + "def plot_pimo_with_auc_zone(\n", + " ax: Axes,\n", + " tpr: ndarray,\n", + " fpr: ndarray,\n", + " lower_bound: float,\n", + " upper_bound: float,\n", + " fpr_in_auc: ndarray,\n", + " tpr_in_auc: ndarray,\n", + ") -> None:\n", + " \"\"\"Helper function to plot the PIMO curve with the AUC zone.\"\"\"\n", + " # plot\n", + " ax.plot(fpr, tpr, linewidth=3.5)\n", + " ax.axvspan(lower_bound, upper_bound, color=\"magenta\", alpha=0.3, zorder=-1)\n", + " ax.fill_between(fpr_in_auc, tpr_in_auc, alpha=1, color=\"tab:purple\", zorder=1)\n", + "\n", + " # config plots\n", + " ax.set_ylabel(\"TPR [%]\")\n", + " ax.yaxis.set_major_locator(FixedLocator(np.linspace(0, 1, 6)))\n", + " ax.yaxis.set_major_formatter(PercentFormatter(1, 0, symbol=\"\"))\n", + " ax.set_ylim(0, 1 + 3e-2)\n", + " ax.set_xlabel(\"FPRn\")\n", + " ax.set_xscale(\"log\")\n", + " ax.xaxis.set_major_locator(FixedLocator(np.logspace(-6, 0, 7)))\n", + " ax.xaxis.set_major_formatter(lambda x, _: fmt_pow10(x))\n", + " ax.set_xlim(1e-6 / (eps := (1 + 3e-1)), 1 * eps)\n", + " ax.grid()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fig, axes = plt.subplots(2, 3, figsize=(10, 5), layout=\"tight\")\n", + "\n", + "for ax, index in zip(axes.flatten(), samples, strict=False):\n", + " score = aupimo_result.aupimos[index].item()\n", + " tpr = pimo_result.per_image_tprs[index]\n", + " fpr = pimo_result.shared_fpr\n", + " lower_bound, upper_bound = aupimo.fpr_bounds\n", + " threshs_auc_mask = (pimo_result.thresholds > aupimo_result.thresh_lower_bound) & (\n", + " pimo_result.thresholds < aupimo_result.thresh_upper_bound\n", + " )\n", + " fpr_in_auc = fpr[threshs_auc_mask]\n", + " tpr_in_auc = tpr[threshs_auc_mask]\n", + "\n", + " plot_pimo_with_auc_zone(ax, tpr, fpr, lower_bound, upper_bound, fpr_in_auc, tpr_in_auc)\n", + " ax.set_title(f\"Image {index} ({score:.0%} AUPIMO)\")\n", + "\n", + "axes[-1, -1].axis(\"off\")\n", + "axes[-1, -1].text(\n", + " -0.08,\n", + " 0,\n", + " \"\"\"\n", + "FPRn: Avg. [in-image] False Positive Rate (FPR)\n", + " on normal images only ('n').\n", + "\n", + "TPR: [in-image] True Positive Rate (TPR),\n", + " or Recall.\n", + "\n", + "Integration zone in light pink, and area\n", + "under the curve (AUC) in purple.\n", + "\n", + "This area is normalized by the range size\n", + "so that AUPIMO is in [0, 1].\n", + "\"\"\",\n", + " ha=\"left\",\n", + " va=\"bottom\",\n", + " fontsize=\"x-small\",\n", + " color=\"dimgray\",\n", + " font=\"monospace\",\n", + ")\n", + "\n", + "fig.suptitle(\"PIMO curves\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Meaning of the FPRn bounds\n", + "\n", + "AUPIMOo only uses _normal images_ in the X-axis -- i.e. the $\\operatorname{FPRn}$.\n", + "\n", + "**Why?** \n", + "\n", + "Because the integration range is a validation\\* of \"usable operating thresholds\", so using $\\operatorname{FPRn}$ makes it unbiased (to the anomalies).\n", + "\n", + "> Recall that, in practice, a threshold is set to decide if a pixel/image is anomalous.\n", + "> \n", + "> This strategy was inspired on [AUPRO](https://link.springer.com/article/10.1007/s11263-020-01400-4).\n", + "\n", + "---\n", + "\n", + "**Definition 1**: Average FPR on Normal Images ($\\operatorname{FPRn}$):\n", + "\n", + "$$\n", + " \\operatorname{FPRn} : t \\mapsto \\frac{1}{N} \\sum_{i=1}^{N} \\; \\times \\; \\operatorname{FPR}^{i}(t)\n", + "$$\n", + "\n", + "where $i$ and $N$ are, respectively, the index and the number of normal images in the test set. Note that $\\operatorname{FPRn}$ is the empirical average of $\\operatorname{FPR}^{i}$, so \n", + "\n", + "$$\n", + " \\operatorname{FPRn} \\approx \\mathbb{E} \\left[ \\operatorname{FPR}^{i} \\right]\n", + "$$\n", + "\n", + "**Defintion 2**: FPR of the $i$-th normal image ($\\operatorname{FPR}^{i}$): \n", + "\n", + "$$\n", + " \\operatorname{FPR}^{i} : t \\mapsto \\frac{\\text{Area of } \\mathbb{a}^{i} \\text{ above } t}{\\text{Area of } \\mathbb{a}^{i}}\n", + "$$\n", + "\n", + "where $\\mathbb{a}^{i}$ is the anomaly score map of the $i$-th image.\n", + "\n", + "---\n", + "\n", + "No further ado, let's visualize this $\\operatorname{FPRn}$!\n", + "\n", + "> For more details on this topic, check our paper in the last cell." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualizing the FPR of normal images ($\\operatorname{FPR}^{i}$)\n", + "\n", + "$\\operatorname{FPRn}$ is the average of $\\operatorname{FPR}^{i}$, so let's first visualize the latter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# visalization of $FPR^i$\n", + "# since normal images do not have anomalous pixels\n", + "# their FPR actually correspond to the ratio of pixels\n", + "# (wrongly) classified as anomalous\n", + "\n", + "# we'll visualize 3 levels of FPR^(i) on some normal images\n", + "FRP_levels = [1e-2, 1e-3, 1e-4]\n", + "# technical detail: decreasing order of FPR --> increasing order of threshold\n", + "\n", + "\n", + "def threshold_from_fpr(anomaly_map: Tensor, fpr_level: float | Tensor) -> float:\n", + " \"\"\"Find the threshold that corresponds to the given FPR level.\n", + "\n", + " Args:\n", + " anomaly_map (torch.Tensor): Anomaly map, assumed to be from a normal image.\n", + " fpr_level (float): Desired FPR level.\n", + "\n", + " Returns:\n", + " float: Threshold such that `(anomaly_map > threshold).mean() == fpr_level`.\n", + " \"\"\"\n", + " # make a dicothomic search\n", + " lower, upper = anomaly_map.min(), anomaly_map.max() # initial bounds\n", + " middle = (lower + upper) / 2\n", + " fpr_level = torch.tensor(fpr_level)\n", + "\n", + " def fpr(threshold: Tensor) -> Tensor:\n", + " return (anomaly_map > threshold).float().mean()\n", + "\n", + " while not torch.isclose(fpr(middle), fpr_level, rtol=1e-2):\n", + " if torch.isclose(lower, upper, rtol=1e-3):\n", + " break\n", + " if fpr(middle) < fpr_level:\n", + " upper = middle\n", + " else:\n", + " lower = middle\n", + " middle = (lower + upper) / 2\n", + " return middle.item()\n", + "\n", + "\n", + "fig, axes = plt.subplots(1, 3, figsize=(13, 5), layout=\"constrained\")\n", + "\n", + "# select normal images with low and high mean anomaly scores\n", + "avg_anom_score_per_image = anomaly_maps.mean(dim=(1, 2))\n", + "# get the indices of the normal images sorted by their mean anomaly score\n", + "argsort = avg_anom_score_per_image.sort().indices\n", + "argsort = argsort[torch.isin(argsort, torch.where(labels == 0)[0])]\n", + "# select first, median and last\n", + "normal_images_selection = argsort[[0, len(argsort) // 2, -1]]\n", + "\n", + "# heatmaps will be normalized across *normal* images\n", + "# so the range of thresholds have an exact mapping to the range of [0, 1] in FPRn\n", + "# PS: it is not exactly true because we don't get a min-max, but a quantile-based normalization\n", + "global_normal_vmin, global_normal_vmax = torch.quantile(anomaly_maps[labels == 0], torch.tensor([0.02, 0.98]))\n", + "\n", + "for ax, index in zip(axes, normal_images_selection, strict=False):\n", + " image = cv2.resize(read_image(image_paths[index]), (256, 256))\n", + " anomaly_map = anomaly_maps[index]\n", + " thresholds = [threshold_from_fpr(anomaly_map, fpr_level) for fpr_level in FRP_levels]\n", + " anomaly_map = anomaly_map.numpy()\n", + "\n", + " ax.imshow(image)\n", + " ax.imshow(anomaly_map, cmap=\"jet\", alpha=0.10, vmin=global_normal_vmin, vmax=global_normal_vmax)\n", + " c = ax.contour(anomaly_map, levels=thresholds, linewidths=1, colors=[\"blue\", \"yellow\", \"red\"])\n", + " ax.set_title(f\"image {index}\")\n", + "\n", + "for ax in axes.flatten():\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "\n", + "fig.text(\n", + " 0.03,\n", + " -0.01,\n", + " \"Anomaly maps colored in JET colormap with min-max normalization across all normal images. \"\n", + " \" $\\\\operatorname{FPR}^{i}$ levels: \"\n", + " f\"Blue = {fmt_pow10(FRP_levels[0])} Yellow = {fmt_pow10(FRP_levels[1])} Red = {fmt_pow10(FRP_levels[2])}\",\n", + " ha=\"left\",\n", + " va=\"top\",\n", + " color=\"dimgray\",\n", + ")\n", + "\n", + "fig.suptitle(\"Contours of $\\\\operatorname{FPR}^{i}$ levels on normal samples from the test set\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A few notes about the different FPR levels:\n", + "- $10^{-2}$ (blue): images have many and/or quite visible false positive regions;\n", + "- $10^{-3}$ (yellow): most regions disappear, but a few are still visible; \n", + "- $10^{-4}$ (red): usually one or two regions, barely visible." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualizing the Average FPR on Normal Images ($\\operatorname{FPRn}$)\n", + "\n", + "Let's now visualize the $\\operatorname{FPRn}$ and the variance of $\\operatorname{FPR}^{i}$ across the normal images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# visalization of $FPRn$\n", + "# this one is an average behavior of the previous\n", + "# so one should expect a similar behavior but with\n", + "# some variations at each FPR level\n", + "\n", + "# we'll visualize the same FPR levels\n", + "FRP_levels = [1e-2, 1e-3, 1e-4]\n", + "# technical detail: decreasing order of FPR --> increasing order of threshold\n", + "\n", + "fig, axes = plt.subplots(1, 3, figsize=(14, 5.2), layout=\"constrained\")\n", + "\n", + "# function `threshold_from_fpr()` is replaced by an equivalent function\n", + "# for FPRn is already implemented in `pimo_result.thresh_at`\n", + "thresholds = [pimo_result.thresh_at(fpr_level)[1] for fpr_level in FRP_levels]\n", + "# note that all images used the same (ie 'shared') thresholds now\n", + "\n", + "# `normal_images_selection` is the same from the previous cell\n", + "for ax, index in zip(axes, normal_images_selection, strict=False):\n", + " image = cv2.resize(read_image(image_paths[index]), (256, 256))\n", + " anomaly_map = anomaly_maps[index]\n", + " fprs = [(anomaly_map > threshold).float().mean() for threshold in thresholds]\n", + " anomaly_map = anomaly_map.numpy()\n", + "\n", + " ax.imshow(image)\n", + " # `global_normal_vmin` and `global_normal_vmax` are the same from the previous cell\n", + " ax.imshow(anomaly_map, cmap=\"jet\", alpha=0.10, vmin=global_normal_vmin, vmax=global_normal_vmax)\n", + " c = ax.contour(anomaly_map, levels=thresholds, linewidths=1, colors=[\"blue\", \"yellow\", \"red\"])\n", + " ax.set_title(f\"image {index}\")\n", + "\n", + " ax.annotate(\n", + " \"$\\\\operatorname{FPR}^{i}$ levels: \"\n", + " f\"Blue = {fprs[0] * 100:.1g}% Yellow = {fprs[1] * 100:.1g}% Red = {fprs[2] * 100:.1g}%\",\n", + " xy=(0.01, 0.01),\n", + " xycoords=\"axes fraction\",\n", + " ha=\"left\",\n", + " va=\"bottom\",\n", + " color=\"white\",\n", + " )\n", + "\n", + "for ax in axes.flatten():\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "\n", + "fig.text(\n", + " 0.03,\n", + " -0.01,\n", + " \"Anomaly maps colored in JET colormap with min-max normalization across all normal images. \"\n", + " \" $\\\\operatorname{FPRn}$ levels: \"\n", + " f\"Blue = {fmt_pow10(FRP_levels[0])} Yellow = {fmt_pow10(FRP_levels[1])} Red = {fmt_pow10(FRP_levels[2])}\",\n", + " ha=\"left\",\n", + " va=\"top\",\n", + " color=\"dimgray\",\n", + ")\n", + "\n", + "fig.suptitle(\"Contours of $\\\\operatorname{FPRn}$ levels on normal samples from the test set\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Discussion\n", + "\n", + "#### Variance\n", + "\n", + "Note that each $\\operatorname{FPR}^{i}$ has a wide variance\\* of visual results across images.\n", + " \n", + "For instance, the blue level ranges from 0.2% to 3%, which visually is a huge difference, and the red level doesn't even show in most images.\n", + "\n", + "This variance is specific to each model-dataset, we observed many state-of-the-art models on the datasets from MVTec-AD and VisA, and we noticed that low levels tend to have a negligible visual variance.\n", + "\n", + "#### Default bounds (L and U)\n", + "\n", + "So how were the default bounds chosen?\n", + "\n", + "> Recall: \n", + "> \n", + "> $$\n", + "> \\text{AUPIMO} \n", + "> \\; = \\; \n", + "> \\frac{1}{\\log(U/L)}\n", + "> \\int_{\\log(L)}^{\\log(U)} \n", + "> \\operatorname{TPR}^{i}\\left( \\operatorname{FRPn^{-1}}( z ) \\right)\n", + "> \\, \n", + "> \\mathrm{d}\\log(z) \n", + "> $$\n", + "\n", + "##### Upper bound U = 10^{-4}\n", + "\n", + "The upper bound $U$ sets the requirement level of the detection task.\n", + "\n", + "The lower the $U$, the harder the task, and ideally we'd like it be zero (i.e. anomalies are detected with no false positives).\n", + "\n", + "Compared to the images' content, the regions at $\\operatorname{FPRn} = 10^{-4}$ are _visually negligible_\\*.\n", + " \n", + "##### Lower bound L = 10^{-5}\n", + "\n", + "The lower bound $L$ has two numerical motivations.\n", + "\n", + "First, AUPIMO's integral is in log scale, so necessarily $L > 0$ and more weight is given to lower FPR levels.\n", + "\n", + "Second, images/masks/anomaly maps have finite resolution ($\\approx 10^{6}$ pixels/image\\*) -- so $\\operatorname{FPR}^{i}$ and $\\operatorname{FPRn}$ have discrete ranges.\n", + "\n", + "At $\\operatorname{FPRn} = 10^{-5}$, the discretization effects are still reasonable.\n", + "\n", + "##### Be careful!\n", + "\n", + "\\* These observations are based on the datasets we analyzed (from MVTec-AD and VisA).\n", + "\n", + "For other datasets, the default bounds may not be the best choice.\n", + "\n", + "Fortunately, AUPIMO allows customizing the bounds!\n", + "\n", + "> More details on these topics in our paper (see the last cell)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Custom FPRn bounds\n", + "\n", + "It's very easy to customize the $\\operatorname{FPRn}$ bounds $L$ and $U$ in AUPIMO.\n", + "\n", + "You can guess from the signature:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0;31mInit signature:\u001b[0m\n", + "\u001b[0mAUPIMO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mnum_thresholds\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m300000\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mfpr_bounds\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1e-05\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.0001\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mreturn_average\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mforce\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mDocstring:\u001b[0m \n", + "Area Under the Per-Image Overlap (PIMO) curve.\n", + "\n", + "This torchmetrics interface is a wrapper around the functional interface, which is a wrapper around the numpy code.\n", + "The tensors are converted to numpy arrays and then passed and validated in the numpy code.\n", + "The results are converted back to tensors and wrapped in an dataclass object.\n", + "\n", + "Scores are computed from the integration of the PIMO curves within the given FPR bounds, then normalized to [0, 1].\n", + "It can be thought of as the average TPR of the PIMO curves within the given FPR bounds.\n", + "\n", + "Details: `anomalib.metrics.per_image.pimo`.\n", + "\n", + "Notation:\n", + " N: number of images\n", + " H: image height\n", + " W: image width\n", + " K: number of thresholds\n", + "\n", + "Attributes:\n", + " anomaly_maps: floating point anomaly score maps of shape (N, H, W)\n", + " masks: binary (bool or int) ground truth masks of shape (N, H, W)\n", + "\n", + "Args:\n", + " num_thresholds: number of thresholds to compute (K)\n", + " fpr_bounds: lower and upper bounds of the FPR integration range\n", + " force: whether to force the computation despite bad conditions\n", + "\n", + "Returns:\n", + " tuple[PIMOResult, AUPIMOResult]: PIMO and AUPIMO results dataclass objects. See `PIMOResult` and `AUPIMOResult`.\n", + "\u001b[0;31mInit docstring:\u001b[0m\n", + "Area Under the Per-Image Overlap (PIMO) curve.\n", + "\n", + "Args:\n", + " num_thresholds: [passed to parent `PIMO`] number of thresholds used to compute the PIMO curve\n", + " fpr_bounds: lower and upper bounds of the FPR integration range\n", + " return_average: if True, return the average AUPIMO score; if False, return all the individual AUPIMO scores\n", + " force: if True, force the computation of the AUPIMO scores even in bad conditions (e.g. few points)\n", + "\u001b[0;31mFile:\u001b[0m ~/miniconda3/envs/anomalib-dev/lib/python3.10/site-packages/anomalib/metrics/pimo/pimo.py\n", + "\u001b[0;31mType:\u001b[0m ABCMeta\n", + "\u001b[0;31mSubclasses:\u001b[0m " + ] + } + ], + "source": [ + "AUPIMO?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's recompute the scores with the following situation: \n", + "- $U = 10^{-2}$ to make the detection task easier;\n", + "- $L = 10^{-4}$ assuming that \"small\" anomalies are not important for the application." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Metric `AUPIMO` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.\n" + ] + } + ], + "source": [ + "aupimo_custom = AUPIMO(\n", + " # with `False` all the values are returned in a dataclass\n", + " return_average=False,\n", + " # customized!\n", + " fpr_bounds=(1e-4, 1e-2),\n", + ")\n", + "\n", + "# we already have all of them in concatenated tensors\n", + "# so we don't need to loop over the batches like before\n", + "aupimo_custom.update(anomaly_maps=anomaly_maps, masks=masks)\n", + "pimo_result_custom, aupimo_result_custom = aupimo_custom.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fig, axes = plt.subplots(2, 3, figsize=(10, 5), layout=\"tight\")\n", + "\n", + "for ax, index in zip(axes.flatten(), samples, strict=False):\n", + " score = aupimo_result_custom.aupimos[index].item()\n", + " tpr = pimo_result_custom.per_image_tprs[index]\n", + " fpr = pimo_result_custom.shared_fpr\n", + " lower_bound, upper_bound = aupimo_custom.fpr_bounds\n", + " threshs_auc_mask = (pimo_result_custom.thresholds > aupimo_result_custom.thresh_lower_bound) & (\n", + " pimo_result_custom.thresholds < aupimo_result_custom.thresh_upper_bound\n", + " )\n", + " fpr_in_auc = fpr[threshs_auc_mask]\n", + " tpr_in_auc = tpr[threshs_auc_mask]\n", + "\n", + " plot_pimo_with_auc_zone(ax, tpr, fpr, lower_bound, upper_bound, fpr_in_auc, tpr_in_auc)\n", + " ax.set_title(f\"Image {index} ({score:.0%} AUPIMO)\")\n", + "\n", + "axes[-1, -1].axis(\"off\")\n", + "axes[-1, -1].text(\n", + " -0.08,\n", + " 0,\n", + " \"\"\"\n", + "FPRn: Avg. [in-image] False Positive Rate (FPR)\n", + " on normal images only ('n').\n", + "\n", + "TPR: [in-image] True Positive Rate (TPR),\n", + " or Recall.\n", + "\n", + "Integration zone in light pink, and area\n", + "under the curve (AUC) in purple.\n", + "\n", + "This area is normalized by the range size\n", + "so that AUPIMO is in [0, 1].\n", + "\"\"\",\n", + " ha=\"left\",\n", + " va=\"bottom\",\n", + " fontsize=\"x-small\",\n", + " color=\"dimgray\",\n", + " font=\"monospace\",\n", + ")\n", + "\n", + "fig.suptitle(\"PIMO curves\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how the AUPIMO score increased with the easier task :) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cite Us\n", + "\n", + "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "\n", + "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "\n", + "```bibtex\n", + "@misc{bertoldo2024aupimo,\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", + " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " year={2024},\n", + " eprint={2401.01984},\n", + " archivePrefix={arXiv},\n", + " primaryClass={cs.CV},\n", + " url={https://arxiv.org/abs/2401.01984}, \n", + "}\n", + "```\n", + "\n", + "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", + "\n", + "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "anomalib-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/README.md b/notebooks/README.md index 2f93aa5c8c..8d8724a228 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -54,7 +54,8 @@ To install Python, Git and other required tools, [OpenVINO Notebooks](https://gi ## 7. Metrics -| Notebook | GitHub | Colab | -| ----------------------------------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| AUPIMO basics | [701a_aupimo](/notebooks/700_metrics/701a_aupimo.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701a_aupimo.ipynb) | -| AUPIMO representative samples and visualization | [701b_aupimo_advanced_i](/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | +| Notebook | GitHub | Colab | +| ----------------------------------------------- | ------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| AUPIMO basics | [701a_aupimo](/notebooks/700_metrics/701a_aupimo.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701a_aupimo.ipynb) | +| AUPIMO representative samples and visualization | [701b_aupimo_advanced_i](/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | +| PIMO curve and integration bounds | [701c_aupimo_advanced_ii](/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb) | From 3acf51ad66591d00c0da2aca9f3099ab761affad Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Mon, 14 Oct 2024 09:36:23 +0100 Subject: [PATCH 09/32] Create epic.yaml --- .github/ISSUE_TEMPLATE/epic.yaml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/epic.yaml diff --git a/.github/ISSUE_TEMPLATE/epic.yaml b/.github/ISSUE_TEMPLATE/epic.yaml new file mode 100644 index 0000000000..b3e2cc6b71 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/epic.yaml @@ -0,0 +1,30 @@ +name: Epic +description: A large body of work that can be broken down into smaller stories +title: "[EPIC] " +labels: ["epic"] +assignees: [] +body: + - type: markdown + attributes: + value: "## Epic Description" + - type: textarea + id: description + attributes: + label: Describe the epic + description: Provide a clear and concise description of what this epic encompasses + validations: + required: true + - type: textarea + id: goals + attributes: + label: Goals + description: What are the main goals of this epic? + validations: + required: true + - type: textarea + id: acceptance-criteria + attributes: + label: Acceptance Criteria + description: List the high-level acceptance criteria for this epic + validations: + required: true From 71e48240a55d2b6f96df6900c64be3e443ff520b Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Mon, 14 Oct 2024 09:53:54 +0100 Subject: [PATCH 10/32] =?UTF-8?q?=F0=9F=94=A8=20Update=20the=20issue=20tem?= =?UTF-8?q?plates=20(#2363)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update epic.yaml * Update epic.yaml * Update epic.yaml * Update epic.yaml --- .github/ISSUE_TEMPLATE/epic.yaml | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/epic.yaml b/.github/ISSUE_TEMPLATE/epic.yaml index b3e2cc6b71..e6bdd31e8a 100644 --- a/.github/ISSUE_TEMPLATE/epic.yaml +++ b/.github/ISSUE_TEMPLATE/epic.yaml @@ -1,12 +1,12 @@ -name: Epic +name: 🎯 Epic description: A large body of work that can be broken down into smaller stories -title: "[EPIC] " +title: "🎯 [EPIC] " labels: ["epic"] assignees: [] body: - type: markdown attributes: - value: "## Epic Description" + value: "## 🎯 Epic Description" - type: textarea id: description attributes: @@ -22,9 +22,18 @@ body: validations: required: true - type: textarea - id: acceptance-criteria + id: tasks attributes: - label: Acceptance Criteria - description: List the high-level acceptance criteria for this epic + label: Tasks + description: Break down the epic into smaller tasks. Add or remove tasks as needed. + value: | + - [ ] Task 1: + - [ ] Task 2: + - [ ] Task 3: + - [ ] Task 4: + - [ ] Task 5: validations: required: true + - type: markdown + attributes: + value: "Remember to create separate issues for each task and link them to this epic." From e5dd67f2b22877cc945d01007ad79a3c299254e3 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Mon, 14 Oct 2024 10:16:49 +0100 Subject: [PATCH 11/32] Update task.yaml --- .github/ISSUE_TEMPLATE/task.yaml | 74 ++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/task.yaml b/.github/ISSUE_TEMPLATE/task.yaml index 9369e33c96..065712d1dc 100644 --- a/.github/ISSUE_TEMPLATE/task.yaml +++ b/.github/ISSUE_TEMPLATE/task.yaml @@ -1,35 +1,65 @@ -name: Tasks -description: This is used to capture tasks being implemented/to implement such as features, maintenance, refactor, etc. -title: "[Task]: " -labels: ["Task"] +name: πŸ“‹ Task +description: A specific piece of work to be completed +title: "πŸ“‹ [TASK] " +labels: ["task"] +assignees: [] body: - type: markdown attributes: - value: | - We encourage our users to submit feature requests in our [Discussion forum](https://github.com/openvinotoolkit/anomalib/discussions/categories/ideas-feature-requests). You can use this template for consistency. - + value: "## πŸ“‹ Task Description" - type: textarea - id: motivation + id: description attributes: - label: What is the motivation for this task? - description: A clear and concise description of what the problem is. - placeholder: | - 1. I'm always frustrated when [...]. It would be better if we could [...] - 2. I would like to have [...] model/dataset to be supported in Anomalib. + label: Describe the task + description: Provide a clear and concise description of the task to be completed validations: required: true - type: textarea - id: solution + id: acceptance-criteria attributes: - label: Describe the solution you'd like - description: A clear and concise description of what you want to happen. Add screenshots or code-blocks if necessary. - placeholder: | - I would like to have [...] to do this we would need to [...] - Here is what I would like to see [...] + label: Acceptance Criteria + description: List the specific criteria that must be met for this task to be considered complete + validations: + required: true + - type: dropdown + id: priority + attributes: + label: Priority + options: + - Low + - Medium + - High + validations: + required: true + - type: input + id: epic-link + attributes: + label: Related Epic + description: If this task is part of an epic, provide the epic's issue number (e.g., #123) + validations: + required: false + - type: input + id: estimated-time + attributes: + label: Estimated Time + description: Provide an estimate of how long this task will take (e.g., 2h, 1d) + validations: + required: false + - type: dropdown + id: status + attributes: + label: Current Status + options: + - Not Started + - In Progress + - Blocked + - Ready for Review validations: required: true - type: textarea - id: additional-context + id: additional-info attributes: - label: Additional context - description: Add any other context or screenshots about the feature request here. + label: Additional Information + description: Any other relevant details or context for this task + validations: + required: false From 2c2fac1a32cb2101262971a1991d625d15e61005 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Mon, 14 Oct 2024 11:10:20 +0100 Subject: [PATCH 12/32] Create user_story.yaml --- .github/ISSUE_TEMPLATE/user_story.yaml | 69 ++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/user_story.yaml diff --git a/.github/ISSUE_TEMPLATE/user_story.yaml b/.github/ISSUE_TEMPLATE/user_story.yaml new file mode 100644 index 0000000000..c32d1e45ea --- /dev/null +++ b/.github/ISSUE_TEMPLATE/user_story.yaml @@ -0,0 +1,69 @@ +name: πŸ“– User Story +description: A small, self-contained unit of development work describing a feature from an end-user perspective +title: "πŸ“– [STORY] " +labels: ["user-story"] +assignees: [] +body: + - type: markdown + attributes: + value: "## πŸ“– User Story Description" + - type: textarea + id: user-story + attributes: + label: User Story + description: As a [type of user], I want [an action] so that [a benefit/a value] + placeholder: As a computer vision researcher, I want to implement a new anomaly detection algorithm so that I can improve detection accuracy for industrial defect scenarios. + validations: + required: true + - type: textarea + id: acceptance-criteria + attributes: + label: Acceptance Criteria + description: List the acceptance criteria for this user story + placeholder: | + 1. The new algorithm is implemented and integrated into the anomalib framework + 2. Unit tests are written and pass for the new implementation + 3. Performance benchmarks show improvement over existing methods on specified datasets + 4. Documentation is updated to include usage instructions and theory behind the new algorithm + 5. An example notebook is provided demonstrating the algorithm's application + validations: + required: true + - type: input + id: story-points + attributes: + label: Story Points + description: Estimate the complexity of this story (e.g., 1, 2, 3, 5, 8, 13) + validations: + required: true + - type: input + id: epic-link + attributes: + label: Related Epic + description: If this story is part of an epic, provide the epic's issue number (e.g., #123) + validations: + required: false + - type: dropdown + id: model-category + attributes: + label: Category + description: Select the category this story primarily relates to + options: + - Data + - Anomaly Detection Algorithms + - Pre-processing + - Post-processing + - Evaluation Metrics + - Visualization + - Performance Optimization + - API/Interface + - Documentation + - Others + validations: + required: true + - type: textarea + id: additional-context + attributes: + label: Additional Context + description: Add any other context, background, or relevant research papers about the user story here + validations: + required: false From 4e13e8abc5378bdf84e07af6edac5e3f3314eb65 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 15 Oct 2024 06:02:21 +0100 Subject: [PATCH 13/32] Update epic.yaml --- .github/ISSUE_TEMPLATE/epic.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/epic.yaml b/.github/ISSUE_TEMPLATE/epic.yaml index e6bdd31e8a..23c3bf51d3 100644 --- a/.github/ISSUE_TEMPLATE/epic.yaml +++ b/.github/ISSUE_TEMPLATE/epic.yaml @@ -27,11 +27,11 @@ body: label: Tasks description: Break down the epic into smaller tasks. Add or remove tasks as needed. value: | - - [ ] Task 1: - - [ ] Task 2: - - [ ] Task 3: - - [ ] Task 4: - - [ ] Task 5: + - [ ] Task 1: + - [ ] Task 2: + - [ ] Task 3: + - [ ] Task 4: + - [ ] Task 5: validations: required: true - type: markdown From 71cb1365396a36f5423f7cb59c9e22031364b93b Mon Sep 17 00:00:00 2001 From: Joao P C Bertoldo <24547377+jpcbertoldo@users.noreply.github.com> Date: Tue, 15 Oct 2024 16:19:21 +0200 Subject: [PATCH 14/32] Pimo tutorials/03 advanced iii (#2348) * add aupimo notebook advanced iii (aupimo score of a random model) Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add cite us Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * update notebooks readme Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --------- Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> Co-authored-by: Samet Akcay --- .../701d_aupimo_advanced_iii.ipynb | 372 ++++++++++++++++++ notebooks/README.md | 11 +- 2 files changed, 378 insertions(+), 5 deletions(-) create mode 100644 notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb diff --git a/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb b/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb new file mode 100644 index 0000000000..6d446d171e --- /dev/null +++ b/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AUPIMO Score of a Random Model\n", + "\n", + "If model randomly assigns scores to the pixels -- i.e. no discrimination -- its AUROC score will be 50%. \n", + "\n", + "What would be its AUPIMO score?\n", + "\n", + "> AUPIMO is pronounced \"a-u-pee-mo\".\n", + "\n", + "> For basic usage, please check the notebook [701a_aupimo.ipynb](./701a_aupimo.ipynb).\n", + "\n", + "> For PIMO curve plots, please check the notebook [701c_aupimo_advanced_ii.ipynb](./701c_aupimo_advanced_ii.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# What is AUPIMO?\n", + "\n", + "The `Area Under the Per-Image Overlap [curve]` (AUPIMO) is a metric of recall (higher is better) designed for visual anomaly detection.\n", + "\n", + "Inspired by the [ROC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) and [PRO](https://link.springer.com/article/10.1007/s11263-020-01400-4) curves, \n", + "\n", + "> AUPIMO is the area under a curve of True Positive Rate (TPR or _recall_) as a function of False Positive Rate (FPR) restricted to a fixed range. \n", + "\n", + "But:\n", + "- the TPR (Y-axis) is *per-image* (1 image = 1 curve/score);\n", + "- the FPR (X-axis) considers the (average of) **normal** images only; \n", + "- the FPR (X-axis) is in log scale and its range is [1e-5, 1e-4]\\* (harder detection task!).\n", + "\n", + "\\* The score (the area under the curve) is normalized to be in [0, 1].\n", + "\n", + "AUPIMO can be interpreted as\n", + "\n", + "> average segmentation recall in an image given that the model (nearly) does not yield false positives in normal images.\n", + "\n", + "References in the last cell.\n", + "\n", + "![AUROC vs. AUPRO vs. AUPIMO](./roc_pro_pimo.svg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install `anomalib` using `pip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO(jpcbertoldo): replace by `pip install anomalib` when AUPIMO is released # noqa: TD003\n", + "%pip install ../.." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.axes import Axes\n", + "from matplotlib.ticker import FixedLocator, PercentFormatter\n", + "from numpy import ndarray" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Random Model\n", + "\n", + "If a model cannot discriminate between normal and anomalous images, the survival fuctions\\* of the anomaly scores conditioned to each class would be the same.\n", + "\n", + "> \\* https://en.wikipedia.org/wiki/Survival_function\n", + "\n", + "In other words, FPR and TPR would be the same.\n", + "\n", + "Let's simulate this situation." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "thresholds = torch.linspace(0, 1, 1001)\n", + "\n", + "# fpr and tpr as a function of the threshold (i.e. the survival functions)\n", + "# generaly look like logistic functions flipped horizontally\n", + "# their actual shapes don't matter much, but rather how they compare to each other\n", + "# in this case, since they're the same, this choice is arbitrary as long as\n", + "# they're monotonically decreasing with the threshold\n", + "fpr = 1 - 1e2 / (1e2 + torch.exp(-20 * (thresholds - 0.5)))\n", + "tpr = fpr.clone()\n", + "\n", + "fig, axes = plt.subplots(1, 2, figsize=(8, 2), constrained_layout=True, sharey=True)\n", + "\n", + "axes[0].plot(thresholds, fpr, label=\"FPR\")\n", + "axes[1].plot(thresholds, tpr, label=\"TPR\")\n", + "\n", + "for ax in axes:\n", + " ax.set_xlabel(\"Threshold\")\n", + " ax.legend(loc=\"upper right\")\n", + " ax.set_yticks([0, 0.5, 1])\n", + " ax.set_xticks([])\n", + " ax.grid()\n", + "\n", + "fig.supylabel(\"FPR or TPR\", x=-0.03)\n", + "fig.suptitle(\"Simulated FPR and TPR curves\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PIMO curve\n", + "\n", + "In the ROC curve, the FPR = TPR looks like a straight line.\n", + "\n", + "What does it look like in the PIMO curve?" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# utility plot functions (from the previous notebook)\n", + "\n", + "\n", + "def fmt_pow10(value: float) -> str:\n", + " \"\"\"Format the power of 10.\"\"\"\n", + " return \"1\" if value == 1 else f\"$10^{{{int(np.log10(value))}}}$\"\n", + "\n", + "\n", + "def plot_pimo_with_auc_zone(\n", + " ax: Axes,\n", + " tpr: ndarray,\n", + " fpr: ndarray,\n", + " lower_bound: float,\n", + " upper_bound: float,\n", + " fpr_in_auc: ndarray,\n", + " tpr_in_auc: ndarray,\n", + ") -> None:\n", + " \"\"\"Helper function to plot the PIMO curve with the AUC zone.\"\"\"\n", + " # plot\n", + " ax.plot(fpr, tpr, linewidth=3.5)\n", + " ax.axvspan(lower_bound, upper_bound, color=\"magenta\", alpha=0.3, zorder=-1)\n", + " ax.fill_between(fpr_in_auc, tpr_in_auc, alpha=1, color=\"tab:purple\", zorder=1)\n", + "\n", + " # config plots\n", + " ax.set_ylabel(\"TPR [%]\")\n", + " ax.yaxis.set_major_locator(FixedLocator(np.linspace(0, 1, 6)))\n", + " ax.yaxis.set_major_formatter(PercentFormatter(1, 0, symbol=\"\"))\n", + " ax.set_ylim(0, 1 + 3e-2)\n", + " ax.set_xlabel(\"FPR\")\n", + " ax.set_xscale(\"log\")\n", + " ax.xaxis.set_major_locator(FixedLocator(np.logspace(-6, 0, 7)))\n", + " ax.xaxis.set_major_formatter(lambda x, _: fmt_pow10(x))\n", + " ax.set_xlim(1e-6 / (eps := (1 + 3e-1)), 1 * eps)\n", + " ax.grid()\n", + "\n", + "\n", + "# simulate a random model's curve\n", + "lower_bound, upper_bound = 1e-5, 1e-4\n", + "threshs_auc_mask = (fpr > lower_bound) & (fpr < upper_bound)\n", + "fpr_in_auc = fpr[threshs_auc_mask]\n", + "tpr_in_auc = tpr[threshs_auc_mask]\n", + "\n", + "fig, ax = plt.subplots(figsize=(6, 4.5))\n", + "plot_pimo_with_auc_zone(ax, tpr, fpr, lower_bound, upper_bound, fpr_in_auc, tpr_in_auc)\n", + "\n", + "fig.text(\n", + " 0.15,\n", + " -0.01,\n", + " \"\"\"\n", + "FPR: Avg. [in-image] False Positive Rate (FPR) on normal images only.\n", + "\n", + "TPR: [in-image] True Positive Rate (TPR), or Recall.\n", + "\n", + "Integration zone in light pink, and area under the curve (AUC) in purple.\n", + "\n", + "This area is normalized by the range size so that AUPIMO is in [0, 1].\n", + "\"\"\",\n", + " ha=\"left\",\n", + " va=\"top\",\n", + " fontsize=\"x-small\",\n", + " color=\"dimgray\",\n", + " font=\"monospace\",\n", + ")\n", + "\n", + "fig.suptitle(\"Random model's PIMO curve\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AUPIMO Score\n", + "\n", + "Recall that AUPIMO is computed from this integral:\n", + "\n", + "$$\n", + " \\frac{1}{\\log(U/L)}\n", + " \\int_{\\log(L)}^{\\log(U)} \n", + " \\operatorname{TPR}^{i}\\left( \\operatorname{FRP^{-1}}( z ) \\right)\n", + " \\, \n", + " \\mathrm{d}\\log(z) \n", + "$$\n", + "\n", + "where the integration bounds -- $L$[ower] and $U$[pper] -- are the FPR bounds.\n", + "\n", + "By assuming $\\operatorname{TPR}^{i} = \\operatorname{FPR}$, the AUPIMO score only depends on the FPR bounds:\n", + "\n", + "$$\n", + " \\text{AUPIMO of a random model} = \\frac{U - L}{\\log(U/L)}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "random_model_aupimo(1e-4, 1e-5)=0.004%\n" + ] + } + ], + "source": [ + "def random_model_aupimo(lower_bound: float, upper_bound: float) -> float:\n", + " \"\"\"AUPIMO score obtained by a random model (no class discrimination).\"\"\"\n", + " return (upper_bound - lower_bound) / np.log(upper_bound / lower_bound)\n", + "\n", + "\n", + "print(f\"{random_model_aupimo(1e-4, 1e-5)=:.3%}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how a random model's AUPIMO score of $0.004%$ is numerically neglegible in the scale up to 100% -- while its AUROC is 50%.\n", + "\n", + "It's easier to interpret the meaning of AUPIMO scores: \n", + "- $0$%: random or worse, \n", + "- $100$%: perfect." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cite Us\n", + "\n", + "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "\n", + "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "\n", + "```bibtex\n", + "@misc{bertoldo2024aupimo,\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", + " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " year={2024},\n", + " eprint={2401.01984},\n", + " archivePrefix={arXiv},\n", + " primaryClass={cs.CV},\n", + " url={https://arxiv.org/abs/2401.01984}, \n", + "}\n", + "```\n", + "\n", + "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", + "\n", + "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "anomalib-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/README.md b/notebooks/README.md index 8d8724a228..15935b93cf 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -54,8 +54,9 @@ To install Python, Git and other required tools, [OpenVINO Notebooks](https://gi ## 7. Metrics -| Notebook | GitHub | Colab | -| ----------------------------------------------- | ------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| AUPIMO basics | [701a_aupimo](/notebooks/700_metrics/701a_aupimo.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701a_aupimo.ipynb) | -| AUPIMO representative samples and visualization | [701b_aupimo_advanced_i](/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | -| PIMO curve and integration bounds | [701c_aupimo_advanced_ii](/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb) | +| Notebook | GitHub | Colab | +| ----------------------------------------------- | --------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| AUPIMO basics | [701a_aupimo](/notebooks/700_metrics/701a_aupimo.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701a_aupimo.ipynb) | +| AUPIMO representative samples and visualization | [701b_aupimo_advanced_i](/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | +| PIMO curve and integration bounds | [701c_aupimo_advanced_ii](/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb) | +| (AU)PIMO of a random model | [701d_aupimo_advanced_iii](/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb) | From b36b780bfe447367549109eca22e5d75da8d0f3c Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 15 Oct 2024 15:21:59 +0100 Subject: [PATCH 15/32] =?UTF-8?q?=F0=9F=94=A8=20Deprecate=20try=20import?= =?UTF-8?q?=20and=20replace=20it=20with=20Lightning's=20package=5Favailabl?= =?UTF-8?q?e=20(#2373)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace try_import with lightnings package_available function Signed-off-by: Samet Akcay --- src/anomalib/cli/pipelines.py | 4 ++-- src/anomalib/cli/utils/openvino.py | 5 ++--- src/anomalib/loggers/wandb.py | 5 ++--- src/anomalib/models/components/base/export_mixin.py | 6 +++--- src/anomalib/utils/exceptions/imports.py | 9 +++++++++ 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/anomalib/cli/pipelines.py b/src/anomalib/cli/pipelines.py index a76e57c298..8cfb04fd2e 100644 --- a/src/anomalib/cli/pipelines.py +++ b/src/anomalib/cli/pipelines.py @@ -6,13 +6,13 @@ import logging from jsonargparse import Namespace +from lightning_utilities.core.imports import package_available from anomalib.cli.utils.help_formatter import get_short_docstring -from anomalib.utils.exceptions import try_import logger = logging.getLogger(__name__) -if try_import("anomalib.pipelines"): +if package_available("anomalib.pipelines"): from anomalib.pipelines import Benchmark from anomalib.pipelines.components.base import Pipeline diff --git a/src/anomalib/cli/utils/openvino.py b/src/anomalib/cli/utils/openvino.py index 40046ac615..ee54bf09b2 100644 --- a/src/anomalib/cli/utils/openvino.py +++ b/src/anomalib/cli/utils/openvino.py @@ -6,13 +6,12 @@ import logging from jsonargparse import ArgumentParser - -from anomalib.utils.exceptions import try_import +from lightning_utilities.core.imports import package_available logger = logging.getLogger(__name__) -if try_import("openvino"): +if package_available("openvino"): from openvino.tools.ovc.cli_parser import get_common_cli_parser else: get_common_cli_parser = None diff --git a/src/anomalib/loggers/wandb.py b/src/anomalib/loggers/wandb.py index 0a23c25192..55e65e6d54 100644 --- a/src/anomalib/loggers/wandb.py +++ b/src/anomalib/loggers/wandb.py @@ -9,13 +9,12 @@ from lightning.fabric.utilities.types import _PATH from lightning.pytorch.loggers.wandb import WandbLogger from lightning.pytorch.utilities import rank_zero_only +from lightning_utilities.core.imports import package_available from matplotlib.figure import Figure -from anomalib.utils.exceptions import try_import - from .base import ImageLoggerBase -if try_import("wandb"): +if package_available("wandb"): import wandb if TYPE_CHECKING: diff --git a/src/anomalib/models/components/base/export_mixin.py b/src/anomalib/models/components/base/export_mixin.py index 5e7e5e9481..d11b50ff99 100644 --- a/src/anomalib/models/components/base/export_mixin.py +++ b/src/anomalib/models/components/base/export_mixin.py @@ -12,6 +12,7 @@ import numpy as np import torch +from lightning_utilities.core.imports import package_available from torch import nn from torchmetrics import Metric from torchvision.transforms.v2 import Transform @@ -20,7 +21,6 @@ from anomalib.data import AnomalibDataModule from anomalib.deploy.export import CompressionType, ExportType, InferenceModel from anomalib.metrics import create_metric_collection -from anomalib.utils.exceptions import try_import if TYPE_CHECKING: from importlib.util import find_spec @@ -245,7 +245,7 @@ def to_openvino( ... task="segmentation", ... ) """ - if not try_import("openvino"): + if not package_available("openvino"): logger.exception("Could not find OpenVINO. Please check OpenVINO installation.") raise ModuleNotFoundError @@ -294,7 +294,7 @@ def _compress_ov_model( Returns: model (CompiledModel): Model in the OpenVINO format compressed with NNCF quantization. """ - if not try_import("nncf"): + if not package_available("nncf"): logger.exception("Could not find NCCF. Please check NNCF installation.") raise ModuleNotFoundError diff --git a/src/anomalib/utils/exceptions/imports.py b/src/anomalib/utils/exceptions/imports.py index ebf6f11c61..dac22ba056 100644 --- a/src/anomalib/utils/exceptions/imports.py +++ b/src/anomalib/utils/exceptions/imports.py @@ -18,6 +18,15 @@ def try_import(import_path: str) -> bool: Returns: bool: True if import succeeds, False otherwise. """ + import warnings + + warnings.warn( + "The 'try_import' function is deprecated and will be removed in v2.0.0. " + "Use 'package_available' from lightning-utilities instead.", + DeprecationWarning, + stacklevel=2, + ) + try: import_module(import_path) except ImportError: From c99f8686f68d7c6276ce726cea24d7ca0e079d9b Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Wed, 16 Oct 2024 13:13:40 +0100 Subject: [PATCH 16/32] Refactor folder3d to avoid complex-structure (C901) issue (#2185) * Refactored-make_folder3d_dataset-ruff-error-C901 (#1926) Signed-off-by: sahusiddharth * Simplify folder 3d dataset (#2184) --------- Signed-off-by: sahusiddharth Co-authored-by: Siddharth Sahu <112792547+sahusiddharth@users.noreply.github.com> --- src/anomalib/data/depth/folder_3d.py | 60 +++++++++++++--------------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/src/anomalib/data/depth/folder_3d.py b/src/anomalib/data/depth/folder_3d.py index 41a12fbf40..0fac137850 100644 --- a/src/anomalib/data/depth/folder_3d.py +++ b/src/anomalib/data/depth/folder_3d.py @@ -24,7 +24,7 @@ from anomalib.data.utils.path import _prepare_files_labels, validate_and_resolve_path -def make_folder3d_dataset( # noqa: C901 +def make_folder3d_dataset( normal_dir: str | Path, root: str | Path | None = None, abnormal_dir: str | Path | None = None, @@ -78,37 +78,28 @@ def make_folder3d_dataset( # noqa: C901 msg = "A folder location must be provided in normal_dir." raise ValueError(msg) - filenames = [] - labels = [] - dirs = {DirType.NORMAL: normal_dir} - - if abnormal_dir: - dirs[DirType.ABNORMAL] = abnormal_dir - - if normal_test_dir: - dirs[DirType.NORMAL_TEST] = normal_test_dir - - if normal_depth_dir: - dirs[DirType.NORMAL_DEPTH] = normal_depth_dir - - if abnormal_depth_dir: - dirs[DirType.ABNORMAL_DEPTH] = abnormal_depth_dir - - if normal_test_depth_dir: - dirs[DirType.NORMAL_TEST_DEPTH] = normal_test_depth_dir - - if mask_dir: - dirs[DirType.MASK] = mask_dir - - for dir_type, path in dirs.items(): - filename, label = _prepare_files_labels(path, dir_type, extensions) - filenames += filename - labels += label + dirs = { + DirType.NORMAL: normal_dir, + DirType.ABNORMAL: abnormal_dir, + DirType.NORMAL_TEST: normal_test_dir, + DirType.NORMAL_DEPTH: normal_depth_dir, + DirType.ABNORMAL_DEPTH: abnormal_depth_dir, + DirType.NORMAL_TEST_DEPTH: normal_test_depth_dir, + DirType.MASK: mask_dir, + } + + filenames: list[Path] = [] + labels: list[str] = [] + + for dir_type, dir_path in dirs.items(): + if dir_path is not None: + filename, label = _prepare_files_labels(dir_path, dir_type, extensions) + filenames += filename + labels += label samples = DataFrame({"image_path": filenames, "label": labels}) samples = samples.sort_values(by="image_path", ignore_index=True) - # Create label index for normal (0) and abnormal (1) images. samples.loc[ (samples.label == DirType.NORMAL) | (samples.label == DirType.NORMAL_TEST), "label_index", @@ -137,9 +128,12 @@ def make_folder3d_dataset( # noqa: C901 .all() ) if not mismatch: - msg = """Mismatch between anomalous images and depth images. Make sure the mask files - in 'xyz' folder follow the same naming convention as the anomalous images in the dataset - (e.g. image: '000.png', depth: '000.tiff').""" + msg = ( + "Mismatch between anomalous images and depth images. " + "Make sure the mask files in 'xyz' folder follow the same naming " + "convention as the anomalous images in the dataset" + "(e.g. image: '000.png', depth: '000.tiff')." + ) raise MisMatchError(msg) missing_depth_files = samples.depth_path.apply( @@ -159,7 +153,7 @@ def make_folder3d_dataset( # noqa: C901 samples["mask_path"] = samples["mask_path"].fillna("") samples = samples.astype({"mask_path": "str"}) - # make sure all the files exist + # Make sure all the files exist if not samples.mask_path.apply( lambda x: Path(x).exists() if x != "" else True, ).all(): @@ -168,7 +162,7 @@ def make_folder3d_dataset( # noqa: C901 else: samples["mask_path"] = "" - # remove all the rows with temporal image samples that have already been assigned + # Remove all the rows with temporal image samples that have already been assigned samples = samples.loc[ (samples.label == DirType.NORMAL) | (samples.label == DirType.ABNORMAL) | (samples.label == DirType.NORMAL_TEST) ] From 0823ab80d937e9c67596a1d27456c04b44fc5b44 Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Thu, 17 Oct 2024 15:36:17 +0200 Subject: [PATCH 17/32] =?UTF-8?q?=F0=9F=9A=80=20Add=20datumaro=20annotatio?= =?UTF-8?q?n=20dataloader=20(#2377)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add datumaro annotation dataloader Signed-off-by: Ashwin Vaidya * Update changelog Signed-off-by: Ashwin Vaidya * Add examples Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya --- CHANGELOG.md | 1 + configs/data/datumaro.yaml | 15 ++ src/anomalib/data/__init__.py | 3 +- src/anomalib/data/image/__init__.py | 10 +- src/anomalib/data/image/datumaro.py | 226 +++++++++++++++++++++++++ tests/helpers/data.py | 38 +++++ tests/unit/data/image/test_datumaro.py | 39 +++++ 7 files changed, 327 insertions(+), 5 deletions(-) create mode 100644 configs/data/datumaro.yaml create mode 100644 src/anomalib/data/image/datumaro.py create mode 100644 tests/unit/data/image/test_datumaro.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 340641fb7c..cf9807af26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added +- Add `Datumaro` annotation format support by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2377 - Add `AUPIMO` tutorials notebooks in https://github.com/openvinotoolkit/anomalib/pull/2330 and https://github.com/openvinotoolkit/anomalib/pull/2336 - Add `AUPIMO` metric by [jpcbertoldo](https://github.com/jpcbertoldo) in https://github.com/openvinotoolkit/anomalib/pull/1726 and refactored by [ashwinvaidya17](https://github.com/ashwinvaidya17) in https://github.com/openvinotoolkit/anomalib/pull/2329 diff --git a/configs/data/datumaro.yaml b/configs/data/datumaro.yaml new file mode 100644 index 0000000000..31867f34fa --- /dev/null +++ b/configs/data/datumaro.yaml @@ -0,0 +1,15 @@ +class_path: anomalib.data.Datumaro +init_args: + root: "datasets/datumaro" + train_batch_size: 32 + eval_batch_size: 32 + num_workers: 8 + image_size: null + transform: null + train_transform: null + eval_transform: null + test_split_mode: FROM_DIR + test_split_ratio: 0.2 + val_split_mode: FROM_TEST + val_split_ratio: 0.5 + seed: null diff --git a/src/anomalib/data/__init__.py b/src/anomalib/data/__init__.py index e7eaf11156..0ad469ac69 100644 --- a/src/anomalib/data/__init__.py +++ b/src/anomalib/data/__init__.py @@ -14,7 +14,7 @@ from .base import AnomalibDataModule, AnomalibDataset from .depth import DepthDataFormat, Folder3D, MVTec3D -from .image import BTech, Folder, ImageDataFormat, Kolektor, MVTec, Visa +from .image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTec, Visa from .predict import PredictDataset from .utils import LabelName from .video import Avenue, ShanghaiTech, UCSDped, VideoDataFormat @@ -70,6 +70,7 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule "VideoDataFormat", "get_datamodule", "BTech", + "Datumaro", "Folder", "Folder3D", "PredictDataset", diff --git a/src/anomalib/data/image/__init__.py b/src/anomalib/data/image/__init__.py index 0bea0f07ad..147db09418 100644 --- a/src/anomalib/data/image/__init__.py +++ b/src/anomalib/data/image/__init__.py @@ -9,6 +9,7 @@ from enum import Enum from .btech import BTech +from .datumaro import Datumaro from .folder import Folder from .kolektor import Kolektor from .mvtec import MVTec @@ -18,13 +19,14 @@ class ImageDataFormat(str, Enum): """Supported Image Dataset Types.""" - MVTEC = "mvtec" - MVTEC_3D = "mvtec_3d" BTECH = "btech" - KOLEKTOR = "kolektor" + DATUMARO = "datumaro" FOLDER = "folder" FOLDER_3D = "folder_3d" + KOLEKTOR = "kolektor" + MVTEC = "mvtec" + MVTEC_3D = "mvtec_3d" VISA = "visa" -__all__ = ["BTech", "Folder", "Kolektor", "MVTec", "Visa"] +__all__ = ["BTech", "Datumaro", "Folder", "Kolektor", "MVTec", "Visa"] diff --git a/src/anomalib/data/image/datumaro.py b/src/anomalib/data/image/datumaro.py new file mode 100644 index 0000000000..b4836990ec --- /dev/null +++ b/src/anomalib/data/image/datumaro.py @@ -0,0 +1,226 @@ +"""Dataloader for Datumaro format. + +Note: This currently only works for annotations exported from Intel Getiβ„’. +""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +from pathlib import Path + +import pandas as pd +from torchvision.transforms.v2 import Transform + +from anomalib import TaskType +from anomalib.data.base import AnomalibDataModule, AnomalibDataset +from anomalib.data.utils import LabelName, Split, TestSplitMode, ValSplitMode + + +def make_datumaro_dataset(root: str | Path, split: str | Split | None = None) -> pd.DataFrame: + """Make Datumaro Dataset. + + Assumes the following directory structure: + + dataset + β”œβ”€β”€ annotations + β”‚ └── default.json + └── images + └── default + β”œβ”€β”€ image1.jpg + β”œβ”€β”€ image2.jpg + └── ... + + Args: + root (str | Path): Path to the dataset root directory. + split (str | Split | None): Split of the dataset, usually Split.TRAIN or Split.TEST. + Defaults to ``None``. + + Examples: + >>> root = Path("path/to/dataset") + >>> samples = make_datumaro_dataset(root) + >>> samples.head() + image_path label label_index split mask_path + 0 path/to/dataset... Normal 0 Split.TRAIN + 1 path/to/dataset... Normal 0 Split.TRAIN + 2 path/to/dataset... Normal 0 Split.TRAIN + 3 path/to/dataset... Normal 0 Split.TRAIN + 4 path/to/dataset... Normal 0 Split.TRAIN + + + Returns: + DataFrame: an output dataframe containing samples for the requested split (ie., train or test). + """ + annotation_file = Path(root) / "annotations" / "default.json" + with annotation_file.open() as f: + annotations = json.load(f) + + categories = annotations["categories"] + categories = {idx: label["name"] for idx, label in enumerate(categories["label"]["labels"])} + + samples = [] + for item in annotations["items"]: + image_path = Path(root) / "images" / "default" / item["image"]["path"] + label_index = item["annotations"][0]["label_id"] + label = categories[label_index] + samples.append({ + "image_path": str(image_path), + "label": label, + "label_index": label_index, + "split": None, + "mask_path": "", # mask is provided in the annotation file and is not on disk. + }) + samples_df = pd.DataFrame( + samples, + columns=["image_path", "label", "label_index", "split", "mask_path"], + index=range(len(samples)), + ) + # Create test/train split + # By default assign all "Normal" samples to train and all "Anomalous" samples to test + samples_df.loc[samples_df["label_index"] == LabelName.NORMAL, "split"] = Split.TRAIN + samples_df.loc[samples_df["label_index"] == LabelName.ABNORMAL, "split"] = Split.TEST + + # Get the data frame for the split. + if split: + samples_df = samples_df[samples_df.split == split].reset_index(drop=True) + + return samples_df + + +class DatumaroDataset(AnomalibDataset): + """Datumaro dataset class. + + Args: + task (TaskType): Task type, ``classification``, ``detection`` or ``segmentation``. + root (str | Path): Path to the dataset root directory. + transform (Transform, optional): Transforms that should be applied to the input images. + Defaults to ``None``. + split (str | Split | None): Split of the dataset, usually Split.TRAIN or Split.TEST + Defaults to ``None``. + + + Examples: + .. code-block:: python + + from anomalib.data.image.datumaro import DatumaroDataset + from torchvision.transforms.v2 import Resize + + dataset = DatumaroDataset(root=root, + task="classification", + transform=Resize((256, 256)), + ) + print(dataset[0].keys()) + # Output: dict_keys(['dm_format_version', 'infos', 'categories', 'items']) + + """ + + def __init__( + self, + task: TaskType, + root: str | Path, + transform: Transform | None = None, + split: str | Split | None = None, + ) -> None: + super().__init__(task, transform) + self.split = split + self.samples = make_datumaro_dataset(root, split) + + +class Datumaro(AnomalibDataModule): + """Datumaro datamodule. + + Args: + root (str | Path): Path to the dataset root directory. + train_batch_size (int): Batch size for training dataloader. + Defaults to ``32``. + eval_batch_size (int): Batch size for evaluation dataloader. + Defaults to ``32``. + num_workers (int): Number of workers for dataloaders. + Defaults to ``8``. + task (TaskType): Task type, ``classification``, ``detection`` or ``segmentation``. + Defaults to ``TaskType.CLASSIFICATION``. Currently only supports classification. + image_size (tuple[int, int], optional): Size to which input images should be resized. + Defaults to ``None``. + transform (Transform, optional): Transforms that should be applied to the input images. + Defaults to ``None``. + train_transform (Transform, optional): Transforms that should be applied to the input images during training. + Defaults to ``None``. + eval_transform (Transform, optional): Transforms that should be applied to the input images during evaluation. + Defaults to ``None``. + test_split_mode (TestSplitMode): Setting that determines how the testing subset is obtained. + Defaults to ``TestSplitMode.FROM_DIR``. + test_split_ratio (float): Fraction of images from the train set that will be reserved for testing. + Defaults to ``0.2``. + val_split_mode (ValSplitMode): Setting that determines how the validation subset is obtained. + Defaults to ``ValSplitMode.SAME_AS_TEST``. + val_split_ratio (float): Fraction of train or test images that will be reserved for validation. + Defaults to ``0.5``. + seed (int | None, optional): Seed which may be set to a fixed value for reproducibility. + Defualts to ``None``. + + Examples: + To create a Datumaro datamodule + + >>> from pathlib import Path + >>> from torchvision.transforms.v2 import Resize + >>> root = Path("path/to/dataset") + >>> datamodule = Datumaro(root, transform=Resize((256, 256))) + >>> datamodule.setup() + >>> i, data = next(enumerate(datamodule.train_dataloader())) + >>> data.keys() + dict_keys(['image_path', 'label', 'image']) + + >>> data["image"].shape + torch.Size([32, 3, 256, 256]) + """ + + def __init__( + self, + root: str | Path, + train_batch_size: int = 32, + eval_batch_size: int = 32, + num_workers: int = 8, + task: TaskType = TaskType.CLASSIFICATION, + image_size: tuple[int, int] | None = None, + transform: Transform | None = None, + train_transform: Transform | None = None, + eval_transform: Transform | None = None, + test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR, + test_split_ratio: float = 0.5, + val_split_mode: ValSplitMode | str = ValSplitMode.FROM_TEST, + val_split_ratio: float = 0.5, + seed: int | None = None, + ) -> None: + if task != TaskType.CLASSIFICATION: + msg = "Datumaro dataloader currently only supports classification task." + raise ValueError(msg) + super().__init__( + train_batch_size=train_batch_size, + eval_batch_size=eval_batch_size, + num_workers=num_workers, + val_split_mode=val_split_mode, + val_split_ratio=val_split_ratio, + test_split_mode=test_split_mode, + test_split_ratio=test_split_ratio, + image_size=image_size, + transform=transform, + train_transform=train_transform, + eval_transform=eval_transform, + seed=seed, + ) + self.root = root + self.task = task + + def _setup(self, _stage: str | None = None) -> None: + self.train_data = DatumaroDataset( + task=self.task, + root=self.root, + transform=self.train_transform, + split=Split.TRAIN, + ) + self.test_data = DatumaroDataset( + task=self.task, + root=self.root, + transform=self.eval_transform, + split=Split.TEST, + ) diff --git a/tests/helpers/data.py b/tests/helpers/data.py index 0ad699fb2f..60433df9eb 100644 --- a/tests/helpers/data.py +++ b/tests/helpers/data.py @@ -5,6 +5,7 @@ from __future__ import annotations +import json import shutil from contextlib import ContextDecorator from pathlib import Path @@ -319,6 +320,43 @@ def __init__( self.min_size = min_size self.image_generator = DummyImageGenerator(image_shape=image_shape, rng=self.rng) + def _generate_dummy_datumaro_dataset(self) -> None: + """Generates dummy Datumaro dataset in a temporary directory.""" + # generate images + image_root = self.dataset_root / "images" / "default" + image_root.mkdir(parents=True, exist_ok=True) + + file_names: list[str] = [] + + # Create normal images + for i in range(self.num_train + self.num_test): + label = LabelName.NORMAL + image_filename = image_root / f"normal_{i:03}.png" + file_names.append(image_filename) + self.image_generator.generate_image(label, image_filename) + + # Create abnormal images + for i in range(self.num_test): + label = LabelName.ABNORMAL + image_filename = image_root / f"abnormal_{i:03}.png" + file_names.append(image_filename) + self.image_generator.generate_image(label, image_filename) + + # create annotation file + annotation_file = self.dataset_root / "annotations" / "default.json" + annotation_file.parent.mkdir(parents=True, exist_ok=True) + annotations = { + "categories": {"label": {"labels": [{"name": "Normal"}, {"name": "Anomalous"}]}}, + "items": [], + } + for file_name in file_names: + annotations["items"].append({ + "annotations": [{"label_id": 1 if "abnormal" in str(file_name) else 0}], + "image": {"path": file_name.name}, + }) + with annotation_file.open("w") as f: + json.dump(annotations, f) + def _generate_dummy_mvtec_dataset( self, normal_dir: str = "good", diff --git a/tests/unit/data/image/test_datumaro.py b/tests/unit/data/image/test_datumaro.py new file mode 100644 index 0000000000..2aef9ae715 --- /dev/null +++ b/tests/unit/data/image/test_datumaro.py @@ -0,0 +1,39 @@ +"""Unit tests - Datumaro Datamodule.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +import pytest + +from anomalib import TaskType +from anomalib.data import Datumaro +from tests.unit.data.base.image import _TestAnomalibImageDatamodule + + +class TestDatumaro(_TestAnomalibImageDatamodule): + """Datumaro Datamodule Unit Tests.""" + + @pytest.fixture() + @staticmethod + def datamodule(dataset_path: Path, task_type: TaskType) -> Datumaro: + """Create and return a Datumaro datamodule.""" + if task_type != TaskType.CLASSIFICATION: + pytest.skip("Datumaro only supports classification tasks.") + + _datamodule = Datumaro( + root=dataset_path / "datumaro", + task=task_type, + train_batch_size=4, + eval_batch_size=4, + ) + _datamodule.setup() + + return _datamodule + + @pytest.fixture() + @staticmethod + def fxt_data_config_path() -> str: + """Return the path to the test data config.""" + return "configs/data/datumaro.yaml" From 1465b05fd9ff5c20bfe6df661187e6866e04cec7 Mon Sep 17 00:00:00 2001 From: Joao P C Bertoldo <24547377+jpcbertoldo@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:54:24 +0200 Subject: [PATCH 18/32] Pimo tutorials/04 advanced iv (#2352) * add notebook 701e_aupimo_advanced_iv on load/save and statistical comparisons Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * make `AUPIMOResult.num_thresholds` optional Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * add aupimo notebook advanced iv (load/save and statistical tests) Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * simplify cite us and mention intal Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> * fix readme Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> --------- Signed-off-by: jpcbertoldo <24547377+jpcbertoldo@users.noreply.github.com> Co-authored-by: Samet Akcay --- notebooks/700_metrics/701a_aupimo.ipynb | 21 +- .../700_metrics/701b_aupimo_advanced_i.ipynb | 42 +- .../700_metrics/701c_aupimo_advanced_ii.ipynb | 21 +- .../701d_aupimo_advanced_iii.ipynb | 21 +- .../700_metrics/701e_aupimo_advanced_iv.ipynb | 1507 +++++++++++++++++ notebooks/README.md | 1 + src/anomalib/metrics/pimo/dataclasses.py | 6 +- 7 files changed, 1541 insertions(+), 78 deletions(-) create mode 100644 notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb diff --git a/notebooks/700_metrics/701a_aupimo.ipynb b/notebooks/700_metrics/701a_aupimo.ipynb index 5c5497b3b8..d780c5a964 100644 --- a/notebooks/700_metrics/701a_aupimo.ipynb +++ b/notebooks/700_metrics/701a_aupimo.ipynb @@ -492,29 +492,20 @@ "source": [ "# Cite Us\n", "\n", - "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "AUPIMO was developed during [Google Summer of Code 2023 (GSoC 2023)](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd) with the `anomalib` team from Intel's OpenVINO Toolkit.\n", "\n", - "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", "\n", "```bibtex\n", "@misc{bertoldo2024aupimo,\n", - " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " year={2024},\n", - " eprint={2401.01984},\n", - " archivePrefix={arXiv},\n", - " primaryClass={cs.CV},\n", " url={https://arxiv.org/abs/2401.01984}, \n", "}\n", - "```\n", - "\n", - "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", - "\n", - "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", - "\n", - "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", - "\n", - "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + "```" ] } ], diff --git a/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb b/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb index a785075060..ea322102f8 100644 --- a/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb +++ b/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb @@ -775,29 +775,20 @@ "source": [ "# Cite Us\n", "\n", - "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "AUPIMO was developed during [Google Summer of Code 2023 (GSoC 2023)](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd) with the `anomalib` team from Intel's OpenVINO Toolkit.\n", "\n", - "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", "\n", "```bibtex\n", "@misc{bertoldo2024aupimo,\n", - " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " year={2024},\n", - " eprint={2401.01984},\n", - " archivePrefix={arXiv},\n", - " primaryClass={cs.CV},\n", " url={https://arxiv.org/abs/2401.01984}, \n", "}\n", - "```\n", - "\n", - "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", - "\n", - "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", - "\n", - "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", - "\n", - "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + "```" ] }, { @@ -1382,29 +1373,20 @@ "source": [ "# Cite Us\n", "\n", - "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "AUPIMO was developed during [Google Summer of Code 2023 (GSoC 2023)](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd) with the `anomalib` team from Intel's OpenVINO Toolkit.\n", "\n", - "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", "\n", "```bibtex\n", "@misc{bertoldo2024aupimo,\n", - " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " year={2024},\n", - " eprint={2401.01984},\n", - " archivePrefix={arXiv},\n", - " primaryClass={cs.CV},\n", " url={https://arxiv.org/abs/2401.01984}, \n", "}\n", - "```\n", - "\n", - "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", - "\n", - "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", - "\n", - "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", - "\n", - "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + "```" ] } ], diff --git a/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb b/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb index ed647ef666..6911b9c546 100644 --- a/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb +++ b/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb @@ -885,29 +885,20 @@ "source": [ "# Cite Us\n", "\n", - "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "AUPIMO was developed during [Google Summer of Code 2023 (GSoC 2023)](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd) with the `anomalib` team from Intel's OpenVINO Toolkit.\n", "\n", - "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", "\n", "```bibtex\n", "@misc{bertoldo2024aupimo,\n", - " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " year={2024},\n", - " eprint={2401.01984},\n", - " archivePrefix={arXiv},\n", - " primaryClass={cs.CV},\n", " url={https://arxiv.org/abs/2401.01984}, \n", "}\n", - "```\n", - "\n", - "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", - "\n", - "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", - "\n", - "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", - "\n", - "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + "```" ] } ], diff --git a/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb b/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb index 6d446d171e..7cbd29823b 100644 --- a/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb +++ b/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb @@ -321,29 +321,20 @@ "source": [ "# Cite Us\n", "\n", - "AUPIMO was developed during Google Summer of Code 2023 (GSoC 2023) with the `anomalib` team from OpenVINO Toolkit.\n", + "AUPIMO was developed during [Google Summer of Code 2023 (GSoC 2023)](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd) with the `anomalib` team from Intel's OpenVINO Toolkit.\n", "\n", - "Our work was accepted to the British Machine Vision Conference 2024 (BMVC 2024).\n", + "arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", "\n", "```bibtex\n", "@misc{bertoldo2024aupimo,\n", - " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", " year={2024},\n", - " eprint={2401.01984},\n", - " archivePrefix={arXiv},\n", - " primaryClass={cs.CV},\n", " url={https://arxiv.org/abs/2401.01984}, \n", "}\n", - "```\n", - "\n", - "Paper on arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", - "\n", - "Medium post: [medium.com/p/c653ac30e802](https://medium.com/p/c653ac30e802)\n", - "\n", - "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", - "\n", - "GSoC 2023 page: [summerofcode.withgoogle.com/archive/2023/projects/SPMopugd](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd)" + "```" ] } ], diff --git a/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb b/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb new file mode 100644 index 0000000000..e117006951 --- /dev/null +++ b/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb @@ -0,0 +1,1507 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AUPIMO statistical comparison between two models\n", + "\n", + "Model A has a higher average AUPIMO than model B. Can you be _sure_ that A is better than B? \n", + "\n", + "We'll use statistical tests here to make informed decisions about this.\n", + "\n", + "This notebook covers:\n", + "- load/save functions to import/export AUPIMO scores;\n", + "- statistical tests between two models, in particular:\n", + " - parametrical test with Student's t-test;\n", + " - non-parametrical test with Wilcoxon signed-rank test;\n", + "\n", + "> AUPIMO is pronounced \"a-u-pee-mo\".\n", + "\n", + "> For basic usage, please check the notebook [701a_aupimo.ipynb](./701a_aupimo.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# What is AUPIMO?\n", + "\n", + "The `Area Under the Per-Image Overlap [curve]` (AUPIMO) is a metric of recall (higher is better) designed for visual anomaly detection.\n", + "\n", + "Inspired by the [ROC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) and [PRO](https://link.springer.com/article/10.1007/s11263-020-01400-4) curves, \n", + "\n", + "> AUPIMO is the area under a curve of True Positive Rate (TPR or _recall_) as a function of False Positive Rate (FPR) restricted to a fixed range. \n", + "\n", + "But:\n", + "- the TPR (Y-axis) is *per-image* (1 image = 1 curve/score);\n", + "- the FPR (X-axis) considers the (average of) **normal** images only; \n", + "- the FPR (X-axis) is in log scale and its range is [1e-5, 1e-4]\\* (harder detection task!).\n", + "\n", + "\\* The score (the area under the curve) is normalized to be in [0, 1].\n", + "\n", + "AUPIMO can be interpreted as\n", + "\n", + "> average segmentation recall in an image given that the model (nearly) does not yield false positives in normal images.\n", + "\n", + "References in the last cell.\n", + "\n", + "![AUROC vs. AUPRO vs. AUPIMO](./roc_pro_pimo.svg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install `anomalib` using `pip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO(jpcbertoldo): replace by `pip install anomalib` when AUPIMO is released # noqa: TD003\n", + "%pip install ../.." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import urllib.request\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.ticker import FixedLocator, IndexLocator, MaxNLocator, PercentFormatter\n", + "from scipy import stats\n", + "\n", + "from anomalib.metrics.pimo import AUPIMOResult" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "pd.options.display.float_format = \"{:.3f}\".format" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load AUPIMO scores\n", + "\n", + "Unlike previous notebook, we will not train and evaluate the models here.\n", + "\n", + "We'll load the AUPIMO scores from the benchmark presented in our paper (check the reference in the last cell).\n", + "\n", + "These scores can be found in AUPIMO's official repository in [`jpcbertoldo:aupimo/data/experiments/benchmark`](https://github.com/jpcbertoldo/aupimo/tree/main/data/experiments/benchmark). " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading benchmark results for model 'patchcore_wr101' and dataset 'mvtec/capsule'\n", + "Dowloading JSON file from https://raw.githubusercontent.com/jpcbertoldo/aupimo/refs/heads/main/data/experiments/benchmark/patchcore_wr101/mvtec/capsule/aupimo/aupimos.json\n", + "Converting payload to dataclass\n", + "Done!\n", + "Loading benchmark results for model 'patchcore_wr50' and dataset 'mvtec/capsule'\n", + "Dowloading JSON file from https://raw.githubusercontent.com/jpcbertoldo/aupimo/refs/heads/main/data/experiments/benchmark/patchcore_wr50/mvtec/capsule/aupimo/aupimos.json\n", + "Converting payload to dataclass\n", + "Done!\n" + ] + } + ], + "source": [ + "def get_benchmark_scores_url(model: str, dataset: str) -> str:\n", + " \"\"\"Generate the URL for the JSON file of a specific model and dataset.\"\"\"\n", + " root_url = \"https://raw.githubusercontent.com/jpcbertoldo/aupimo/refs/heads/main/data/experiments/benchmark\"\n", + " models = {\n", + " \"efficientad_wr101_m_ext\",\n", + " \"efficientad_wr101_s_ext\",\n", + " \"fastflow_cait_m48_448\",\n", + " \"fastflow_wr50\",\n", + " \"padim_r18\",\n", + " \"padim_wr50\",\n", + " \"patchcore_wr101\",\n", + " \"patchcore_wr50\",\n", + " \"pyramidflow_fnf_ext\",\n", + " \"pyramidflow_r18_ext\",\n", + " \"rd++_wr50_ext\",\n", + " \"simplenet_wr50_ext\",\n", + " \"uflow_ext\",\n", + " }\n", + " if model not in models:\n", + " msg = f\"Model '{model}' not available. Choose one of {sorted(models)}.\"\n", + " raise ValueError(msg)\n", + " datasets = {\n", + " \"mvtec/bottle\",\n", + " \"mvtec/cable\",\n", + " \"mvtec/capsule\",\n", + " \"mvtec/carpet\",\n", + " \"mvtec/grid\",\n", + " \"mvtec/hazelnut\",\n", + " \"mvtec/leather\",\n", + " \"mvtec/metal_nut\",\n", + " \"mvtec/pill\",\n", + " \"mvtec/screw\",\n", + " \"mvtec/tile\",\n", + " \"mvtec/toothbrush\",\n", + " \"mvtec/transistor\",\n", + " \"mvtec/wood\",\n", + " \"mvtec/zipper\",\n", + " \"visa/candle\",\n", + " \"visa/capsules\",\n", + " \"visa/cashew\",\n", + " \"visa/chewinggum\",\n", + " \"visa/fryum\",\n", + " \"visa/macaroni1\",\n", + " \"visa/macaroni2\",\n", + " \"visa/pcb1\",\n", + " \"visa/pcb2\",\n", + " \"visa/pcb3\",\n", + " \"visa/pcb4\",\n", + " \"visa/pipe_fryum\",\n", + " }\n", + " if dataset not in datasets:\n", + " msg = f\"Dataset '{dataset}' not available. Choose one of {sorted(datasets)}.\"\n", + " raise ValueError(msg)\n", + " return f\"{root_url}/{model}/{dataset}/aupimo/aupimos.json\"\n", + "\n", + "\n", + "def download_json(url_str: str) -> dict[str, str | float | int | list[str]]:\n", + " \"\"\"Download the JSON content from an URL.\"\"\"\n", + " with urllib.request.urlopen(url_str) as url: # noqa: S310\n", + " return json.load(url)\n", + "\n", + "\n", + "def load_aupimo_result_from_json_dict(payload: dict[str, str | float | int | list[str]]) -> AUPIMOResult:\n", + " \"\"\"Convert the JSON payload to an AUPIMOResult dataclass.\"\"\"\n", + " if not isinstance(payload, dict):\n", + " msg = f\"Invalid payload. Must be a dictionary. Got {type(payload)}.\"\n", + " raise TypeError(msg)\n", + " try:\n", + " return AUPIMOResult(\n", + " fpr_lower_bound=payload[\"fpr_lower_bound\"],\n", + " fpr_upper_bound=payload[\"fpr_upper_bound\"],\n", + " # `num_threshs` vs `num_thresholds` is an inconsistency with an older version of the JSON file\n", + " num_thresholds=payload[\"num_threshs\"] if \"num_threshs\" in payload else payload[\"num_thresholds\"],\n", + " thresh_lower_bound=payload[\"thresh_lower_bound\"],\n", + " thresh_upper_bound=payload[\"thresh_upper_bound\"],\n", + " aupimos=torch.tensor(payload[\"aupimos\"], dtype=torch.float64),\n", + " )\n", + "\n", + " except KeyError as ex:\n", + " msg = f\"Invalid payload. Missing key {ex}.\"\n", + " raise ValueError(msg) from ex\n", + "\n", + " except (TypeError, ValueError) as ex:\n", + " msg = f\"Invalid payload. Cause: {ex}.\"\n", + " raise ValueError(msg) from ex\n", + "\n", + "\n", + "def get_benchmark_aupimo_scores(model: str, dataset: str, verbose: bool = True) -> AUPIMOResult:\n", + " \"\"\"Get the benchmark AUPIMO scores for a specific model and dataset.\n", + "\n", + " Args:\n", + " model: The model name. See `_get_json_url` for the available models.\n", + " dataset: The \"collection/dataset\", where 'collection' is either 'mvtec' or 'visa', and 'dataset' is\n", + " the name of the dataset within the collection. See `_get_json_url` for the available datasets.\n", + " verbose: Whether to print the progress.\n", + "\n", + " Returns:\n", + " A `AUPIMOResult` dataclass with the AUPIMO scores from the benchmark results.\n", + "\n", + " More details in our paper: https://arxiv.org/abs/2401.01984\n", + " \"\"\"\n", + " if verbose:\n", + " print(f\"Loading benchmark results for model '{model}' and dataset '{dataset}'\")\n", + " url = get_benchmark_scores_url(model, dataset)\n", + " if verbose:\n", + " print(f\"Dowloading JSON file from {url}\")\n", + " payload = download_json(url)\n", + " if verbose:\n", + " print(\"Converting payload to dataclass\")\n", + " aupimo_result = load_aupimo_result_from_json_dict(payload)\n", + " if verbose:\n", + " print(\"Done!\")\n", + " return payload, aupimo_result\n", + "\n", + "\n", + "json_model_a, aupimo_result_model_a = get_benchmark_aupimo_scores(\"patchcore_wr101\", \"mvtec/capsule\")\n", + "_, aupimo_result_model_b = get_benchmark_aupimo_scores(\"patchcore_wr50\", \"mvtec/capsule\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's remove the `nan` values from the normal images." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "modela.shape=(109,) modelb.shape=(109,) labels.shape=(109,)\n" + ] + } + ], + "source": [ + "# corresponding paths to the images\n", + "# where the AUPIMO scores were computed from\n", + "paths = json_model_a[\"paths\"]\n", + "\n", + "# extract the labels (i.e. anomaly type or 'good')\n", + "labels = np.array([p.split(\"/\")[-2] for p in paths])\n", + "\n", + "# let's extract only the AUPIMO scores from anomalies\n", + "modela = aupimo_result_model_a.aupimos[labels != \"good\"].numpy()\n", + "modelb = aupimo_result_model_b.aupimos[labels != \"good\"].numpy()\n", + "labels = labels[labels != \"good\"]\n", + "print(f\"{modela.shape=} {modelb.shape=} {labels.shape=}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(6, 3))\n", + "ax.boxplot(\n", + " [modela, modelb],\n", + " tick_labels=[f\"A mean: {modela.mean():.0%}\", f\"B mean: {modelb.mean():.0%}\"],\n", + " vert=False,\n", + " showmeans=True,\n", + " meanline=True,\n", + " widths=0.5,\n", + ")\n", + "ax.invert_yaxis()\n", + "ax.set_title(\"AUPIMO scores distributions from two models\")\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Is this difference significant?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image by image comparison\n", + "\n", + "Since we have the scores of each model for each image, we can compare them image by image." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(5, 5))\n", + "modela_is_better = modela > modelb\n", + "ax.scatter(modela[modela_is_better], modelb[modela_is_better], alpha=0.3, s=10, color=\"red\", marker=\"o\")\n", + "ax.scatter(modela[~modela_is_better], modelb[~modela_is_better], alpha=0.3, s=10, color=\"blue\", marker=\"o\")\n", + "ax.plot([0, 1], [0, 1], color=\"black\", linestyle=\"--\")\n", + "ax.set_xlabel(\"Model A\")\n", + "ax.set_ylabel(\"Model B\")\n", + "ax.set_title(\"AUPIMO scores direct comparison\")\n", + "ax.grid()\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The dashed line is where both models have the same AUPIMO score.\n", + "\n", + "Notice that there are images where one performs better than the other and vice-versa." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parametric Comparison\n", + "\n", + "Before using the statistical test, let's first visualize the data seen by the test.\n", + "\n", + "We'll use a _paired_ t-test, which means we'll compare the AUPIMO scores of the same image one by one." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "num_samples = modela.shape[0]\n", + "indexes = np.arange(num_samples)\n", + "\n", + "fig, ax = plt.subplots(figsize=(18, 4))\n", + "\n", + "# plot sample index vs score and their mean\n", + "ax.scatter(indexes, modela, s=30, color=\"tab:blue\", marker=\"o\", label=\"Model A\", zorder=3, alpha=0.6)\n", + "ax.axhline(modela.mean(), color=\"tab:blue\", linestyle=\"--\", label=\"Mean\", zorder=3)\n", + "ax.scatter(indexes, modelb, s=30, color=\"tab:red\", marker=\"o\", label=\"Model B\", zorder=3, alpha=0.6)\n", + "ax.axhline(modelb.mean(), color=\"tab:red\", linestyle=\"--\", label=\"Mean\", zorder=3)\n", + "\n", + "# configure the x-axis\n", + "ax.set_xlabel(\"Sample index\")\n", + "ax.set_xlim(0 - (eps := 0.01 * num_samples), num_samples + eps)\n", + "ax.xaxis.set_major_locator(IndexLocator(5, 0))\n", + "ax.xaxis.set_minor_locator(IndexLocator(1, 0))\n", + "\n", + "# configure the y-axis\n", + "ax.set_ylabel(\"AUPIMO [%]\")\n", + "ax.set_ylim(0 - 0.05, 1 + 0.05)\n", + "ax.yaxis.set_major_locator(MaxNLocator(6))\n", + "ax.yaxis.set_major_formatter(PercentFormatter(1))\n", + "\n", + "# configure the grid, legend, etc\n", + "ax.grid(axis=\"both\", which=\"major\", linestyle=\"-\")\n", + "ax.grid(axis=\"x\", which=\"minor\", linestyle=\"--\", alpha=0.5)\n", + "ax.legend(ncol=4, loc=\"upper left\", bbox_to_anchor=(0, -0.08))\n", + "ax.set_title(\"AUPIMO scores direct comparison\")\n", + "\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that several images actually have the same AUPIMO score for both models (e.g. from 10 to 15).\n", + "\n", + "Others like 21 show a big difference -- model B didn't detect the anomaly at all, but model A did a good job (60% AUPIMO).\n", + "\n", + "Let's simplify this and only show the differences." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "differences = modela - modelb\n", + "\n", + "fig, ax = plt.subplots(figsize=(9, 3))\n", + "ax.hist(differences, bins=np.linspace(-1, 1, 61), edgecolor=\"black\")\n", + "ax.axvline(differences.mean(), color=\"black\", linestyle=\"--\", label=\"Mean\")\n", + "\n", + "# configure the x-axis\n", + "ax.set_xlabel(\"AUPIMO [%]\")\n", + "ax.set_xlim(-1, 1)\n", + "ax.xaxis.set_major_locator(MaxNLocator(9))\n", + "ax.xaxis.set_minor_locator(MaxNLocator(41))\n", + "ax.xaxis.set_major_formatter(PercentFormatter(1))\n", + "\n", + "# configure the y-axis\n", + "ax.set_ylabel(\"Count\")\n", + "\n", + "# configure the grid, legend, etc\n", + "ax.grid(axis=\"both\", which=\"major\", linestyle=\"-\", alpha=1, linewidth=1.0)\n", + "ax.grid(axis=\"x\", which=\"minor\", linestyle=\"-\", alpha=0.3)\n", + "ax.legend(loc=\"upper right\")\n", + "ax.set_title(\"AUPIMO scores differences distribution (Model A - Model B)\")\n", + "\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It looks like there is a bias to the right indeed (so model A > model B). \n", + "\n", + "Is that statistically significant or just random?\n", + "\n", + "> **Dependent t-test for paired samples**\n", + "> \n", + "> - null hypothesis: `average(A) == average(B)` \n", + "> - alternative hypothesis: `average(A) != average(B)`\n", + "> \n", + "> See [`scipy.stats.ttest_rel`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html) and [\" Wikipedia's page on \"Student's t-test\"](https://en.wikipedia.org/wiki/Student's_t-test#Dependent_t-test_for_paired_samples).\n", + ">\n", + "> **Confidence Level**\n", + "> \n", + "> Instead of reporting the p-value, we'll report the \"confidence level\" [that the null hypothesis is false], which is `1 - pvalue`.\n", + "> \n", + "> *Higher* confidence level *more confident* that `average(A) > average(B)`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test_result=TtestResult(statistic=2.8715471705520033, pvalue=0.004917091449731462, df=108)\n", + "confidence=99.5%\n" + ] + } + ], + "source": [ + "test_result = stats.ttest_rel(modela, modelb)\n", + "confidence = 1.0 - float(test_result.pvalue)\n", + "print(f\"{test_result=}\")\n", + "print(f\"{confidence=:.1%}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So, we're very confident that model A has a higher AUPIMO score than model B.\n", + "\n", + "Maybe is that due to some big differences in a few images?\n", + "\n", + "What if we don't count much for these big differences?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Non-parametric (rank comparison)\n", + "\n", + "In non-parametric comparison, bigger differences don't matter more than smaller differences. \n", + "\n", + "It's all about their relative position.\n", + "\n", + "Let's look at the analogous plots for this type of comparison." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# the `-` sign is to sort in descending order because higher AUPIMO is better\n", + "# the rank values are 1 or 2 because there are only two models\n", + "# where 1 is the best and 2 is the worst\n", + "# when the scores are the same, 1.5 is assigned to both models\n", + "ranks = stats.rankdata(-np.stack([modela, modelb], axis=1), method=\"average\", axis=1)\n", + "ranksa, ranksb = ranks[:, 0], ranks[:, 1]\n", + "\n", + "num_samples = ranks.shape[0]\n", + "indexes = np.arange(num_samples)\n", + "\n", + "fig, ax = plt.subplots(figsize=(18, 2.5))\n", + "\n", + "# plot sample index vs score and their mean\n", + "ax.scatter(indexes, ranksa, s=30, color=\"tab:blue\", marker=\"o\", label=\"Model A\", zorder=3, alpha=0.6)\n", + "ax.axhline(ranksa.mean(), color=\"tab:blue\", linestyle=\"--\", label=\"Mean\", zorder=3)\n", + "ax.scatter(indexes, ranksb, s=30, color=\"tab:red\", marker=\"o\", label=\"Model B\", zorder=3, alpha=0.6)\n", + "ax.axhline(ranksb.mean(), color=\"tab:red\", linestyle=\"--\", label=\"Mean\", zorder=3)\n", + "\n", + "# configure the x-axis\n", + "ax.set_xlabel(\"Sample index\")\n", + "ax.set_xlim(0 - (eps := 0.01 * num_samples), num_samples + eps)\n", + "ax.xaxis.set_major_locator(IndexLocator(5, 0))\n", + "ax.xaxis.set_minor_locator(IndexLocator(1, 0))\n", + "\n", + "# configure the y-axis\n", + "ax.set_ylabel(\"AUPIMO Rank\")\n", + "ax.set_ylim(1 - 0.1, 2 + 0.1)\n", + "ax.yaxis.set_major_locator(FixedLocator([1, 1.5, 2]))\n", + "ax.invert_yaxis()\n", + "\n", + "# configure the grid, legend, etc\n", + "ax.grid(axis=\"both\", which=\"major\", linestyle=\"-\")\n", + "ax.grid(axis=\"x\", which=\"minor\", linestyle=\"--\", alpha=0.5)\n", + "ax.legend(ncol=4, loc=\"upper left\", bbox_to_anchor=(0, -0.15))\n", + "ax.set_title(\"AUPIMO scores ranks\")\n", + "\n", + "fig.text(\n", + " 0.9,\n", + " -0.1,\n", + " \"Ranks: 1 is the best, 2 is the worst, 1.5 when the scores are the same.\",\n", + " ha=\"right\",\n", + " va=\"top\",\n", + " fontsize=\"small\",\n", + ")\n", + "\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Again, blue seems to have a slight advantage, but -- again -- is it significant enough to be sure that model A is better than model B?\n", + "\n", + "Remember that AUPIMO is a recall metric, so it is basically a ratio of the area of anomalies. \n", + "\n", + "Is it relevant if model A has 1% more recall than model B in a given image?\n", + "\n", + "> You can check that out in [`701b_aupimo_advanced_i.ipybn`](./701b_aupimo_advanced_i.ipynb).\n", + "\n", + "We'll --arbitrarily -- assume that only differences above 5% are relevant." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MIN_ABS_DIFF = 0.05\n", + "scores = np.stack([modela, modelb], axis=1)\n", + "ranks = stats.rankdata(-scores, method=\"average\", axis=1)\n", + "abs_diff = np.abs(np.diff(scores, axis=1)).flatten()\n", + "ranks[abs_diff < MIN_ABS_DIFF, :] = 1.5\n", + "ranksa, ranksb = ranks[:, 0], ranks[:, 1]\n", + "\n", + "num_samples = ranks.shape[0]\n", + "indexes = np.arange(num_samples)\n", + "\n", + "fig, ax = plt.subplots(figsize=(18, 2.5))\n", + "\n", + "# plot sample index vs score and their mean\n", + "ax.scatter(indexes, ranksa, s=30, color=\"tab:blue\", marker=\"o\", label=\"Model A\", zorder=3, alpha=0.6)\n", + "ax.axhline(ranksa.mean(), color=\"tab:blue\", linestyle=\"--\", label=\"Mean\", zorder=3)\n", + "ax.scatter(indexes, ranksb, s=30, color=\"tab:red\", marker=\"o\", label=\"Model B\", zorder=3, alpha=0.6)\n", + "ax.axhline(ranksb.mean(), color=\"tab:red\", linestyle=\"--\", label=\"Mean\", zorder=3)\n", + "\n", + "# configure the x-axis\n", + "ax.set_xlabel(\"Sample index\")\n", + "ax.set_xlim(0 - (eps := 0.01 * num_samples), num_samples + eps)\n", + "ax.xaxis.set_major_locator(IndexLocator(5, 0))\n", + "ax.xaxis.set_minor_locator(IndexLocator(1, 0))\n", + "\n", + "# configure the y-axis\n", + "ax.set_ylabel(\"AUPIMO Rank\")\n", + "ax.set_ylim(1 - 0.1, 2 + 0.1)\n", + "ax.yaxis.set_major_locator(FixedLocator([1, 1.5, 2]))\n", + "ax.invert_yaxis()\n", + "\n", + "# configure the grid, legend, etc\n", + "ax.grid(axis=\"both\", which=\"major\", linestyle=\"-\")\n", + "ax.grid(axis=\"x\", which=\"minor\", linestyle=\"--\", alpha=0.5)\n", + "ax.legend(ncol=4, loc=\"upper left\", bbox_to_anchor=(0, -0.15))\n", + "ax.set_title(\"AUPIMO scores ranks\")\n", + "\n", + "fig.text(\n", + " 0.9,\n", + " -0.1,\n", + " \"Ranks: 1 is the best, 2 is the worst, 1.5 when the scores are the same.\",\n", + " ha=\"right\",\n", + " va=\"top\",\n", + " fontsize=\"small\",\n", + ")\n", + "\n", + "fig # noqa: B018, RUF100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The advantage of A over B is clearer now.\n", + "\n", + "Most of cases where B was better were within the difference margin of 5%.\n", + "\n", + "The average ranks also got more distant.\n", + "\n", + "Could it be by chance or can we be confident that model A is better than model B?\n", + "\n", + "> **Wilcoxon signed rank test**\n", + "> \n", + "> - null hypothesis: `average(rankA) == average(rankB)` \n", + "> - alternative hypothesis: `average(rankA) != average(rankB)`\n", + "> \n", + "> See [`scipy.stats.wilcoxon`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html#scipy.stats.wilcoxon) and [\"Wilcoxon signed-rank test\" in Wikipedia](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test).\n", + ">\n", + "> Confidence Level (reminder): *higher* confidence level *more confident* that `average(rankA) > average(rankB)`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test_result=WilcoxonResult(statistic=1823.0, pvalue=0.0002876893285960681)\n", + "confidence=100.0%\n" + ] + } + ], + "source": [ + "MIN_ABS_DIFF = 0.05\n", + "differences = modela - modelb\n", + "differences[abs_diff < MIN_ABS_DIFF] = 0.0\n", + "test_result = stats.wilcoxon(differences, zero_method=\"zsplit\")\n", + "confidence = 1.0 - float(test_result.pvalue)\n", + "print(f\"{test_result=}\")\n", + "print(f\"{confidence=:.1%}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We got such a high confidence that we can say for sure that these differences are not due to chance.\n", + "\n", + "So we can say that model A is _consistently_ better than model B -- even though some counter examples exist as we saw in the image by image comparison." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cite Us\n", + "\n", + "AUPIMO was developed during [Google Summer of Code 2023 (GSoC 2023)](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd) with the `anomalib` team from Intel's OpenVINO Toolkit.\n", + "\n", + "arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", + "\n", + "```bibtex\n", + "@misc{bertoldo2024aupimo,\n", + " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", + " year={2024},\n", + " url={https://arxiv.org/abs/2401.01984}, \n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Utils\n", + "\n", + "Some utility functions to expand what this notebook shows." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save AUPIMO scores\n", + "\n", + "At the begin of the notebook we defined a function `load_aupimo_result_from_json_dict()` that deserializes `AUPIMOResult` objects.\n", + "\n", + "Let's define the opposite operator so you can save and publish your AUPIMO scores." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "payload.keys()=dict_keys(['fpr_lower_bound', 'fpr_upper_bound', 'num_thresholds', 'thresh_lower_bound', 'thresh_upper_bound', 'aupimos'])\n" + ] + } + ], + "source": [ + "def save_aupimo_result_to_json_dict(\n", + " aupimo_result: AUPIMOResult,\n", + " paths: list[str | Path] | None = None,\n", + ") -> dict[str, str | float | int | list[str]]:\n", + " \"\"\"Convert the AUPIMOResult dataclass to a JSON payload.\"\"\"\n", + " payload = {\n", + " \"fpr_lower_bound\": aupimo_result.fpr_lower_bound,\n", + " \"fpr_upper_bound\": aupimo_result.fpr_upper_bound,\n", + " \"num_thresholds\": aupimo_result.num_thresholds,\n", + " \"thresh_lower_bound\": aupimo_result.thresh_lower_bound,\n", + " \"thresh_upper_bound\": aupimo_result.thresh_upper_bound,\n", + " \"aupimos\": aupimo_result.aupimos.tolist(),\n", + " }\n", + " if paths is not None:\n", + " if len(paths) != aupimo_result.aupimos.shape[0]:\n", + " msg = (\n", + " \"Invalid paths. It must have the same length as the AUPIMO scores. \"\n", + " f\"Got {len(paths)} paths and {aupimo_result.aupimos.shape[0]} scores.\"\n", + " )\n", + " raise ValueError(msg)\n", + " # make sure the paths are strings, not pathlib.Path objects\n", + " payload[\"paths\"] = [str(p) for p in paths]\n", + " return payload\n", + "\n", + "\n", + "payload = save_aupimo_result_to_json_dict(aupimo_result_model_a)\n", + "print(f\"{payload.keys()=}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "payload.keys()=dict_keys(['fpr_lower_bound', 'fpr_upper_bound', 'num_thresholds', 'thresh_lower_bound', 'thresh_upper_bound', 'aupimos'])\n" + ] + } + ], + "source": [ + "payload = save_aupimo_result_to_json_dict(aupimo_result_model_a)\n", + "print(f\"{payload.keys()=}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "payload.keys()=dict_keys(['fpr_lower_bound', 'fpr_upper_bound', 'num_thresholds', 'thresh_lower_bound', 'thresh_upper_bound', 'aupimos', 'paths'])\n" + ] + } + ], + "source": [ + "# you can optionally save the paths to the images\n", + "# where the AUPIMO scores were computed from\n", + "payload = save_aupimo_result_to_json_dict(aupimo_result_model_a, paths)\n", + "print(f\"{payload.keys()=}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "8,0K\t/tmp/tmpsuauy_de/aupimo_result.json\n" + ] + } + ], + "source": [ + "# let's check that it can be saved to a file and loaded back\n", + "\n", + "from tempfile import TemporaryDirectory\n", + "\n", + "with TemporaryDirectory() as tmpdir:\n", + " cache_dir = Path(tmpdir)\n", + "\n", + " with (cache_dir / \"aupimo_result.json\").open(\"w\") as file:\n", + " json.dump(payload, file)\n", + "\n", + " !du -sh {cache_dir / \"aupimo_result.json\"}\n", + "\n", + " with (cache_dir / \"aupimo_result.json\").open(\"r\") as file:\n", + " payload_reloaded = json.load(file)\n", + "\n", + "aupimo_result_reloaded = load_aupimo_result_from_json_dict(payload_reloaded)\n", + "assert torch.allclose(aupimo_result_model_a.aupimos, aupimo_result_reloaded.aupimos, equal_nan=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pairwise statistical tests (multiple models)\n", + "\n", + "What if you have multiple models to compare?\n", + "\n", + "Here we define a functions that will return all the pairwise comparisons between the models." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "import itertools\n", + "from typing import Any, Literal\n", + "\n", + "import numpy as np\n", + "from numpy import ndarray\n", + "from scipy import stats\n", + "from torch import Tensor\n", + "\n", + "\n", + "def _validate_models(models: dict[str, Tensor | ndarray]) -> dict[str, ndarray]:\n", + " \"\"\"Make sure the input `models` is valid and convert all the dict's values to `ndarray`.\n", + "\n", + " Args:\n", + " models (dict[str, Tensor | ndarray]): {\"model name\": sequence of shape (num_images,)}.\n", + " Validations:\n", + " - keys are strings (model names)\n", + " - there are at least two models\n", + " - values are sequences of floats in [0, 1] or `nan`\n", + " - all sequences have the same shape\n", + " - all `nan` values are at the positions\n", + " Returns:\n", + " dict[str, ndarray]: {\"model name\": array (num_images,)}.\n", + " \"\"\"\n", + " if not isinstance(models, dict):\n", + " msg = f\"Expected argument `models` to be a dict, but got {type(models)}.\"\n", + " raise TypeError(msg)\n", + "\n", + " if len(models) < 2:\n", + " msg = \"Expected argument `models` to have at least one key, but got none.\"\n", + " raise ValueError(msg)\n", + "\n", + " ref_num_samples = None\n", + " ref_nans = None\n", + " for key in models:\n", + " if not isinstance(key, str):\n", + " msg = f\"Expected argument `models` to have all keys of type str. Found {type(key)}.\"\n", + " raise TypeError(msg)\n", + "\n", + " value = models[key]\n", + "\n", + " if not isinstance(value, Tensor | ndarray):\n", + " msg = (\n", + " \"Expected argument `models` to have all values of type Tensor or ndarray. \"\n", + " f\"Found {type(value)} on {key=}.\"\n", + " )\n", + " raise TypeError(msg)\n", + "\n", + " if isinstance(value, Tensor):\n", + " models[key] = value = value.numpy()\n", + "\n", + " if not np.issubdtype(value.dtype, np.floating):\n", + " msg = f\"Expected argument `models` to have all values of floating type. Found {value.dtype} on {key=}.\"\n", + " raise ValueError(msg)\n", + "\n", + " if value.ndim != 1:\n", + " msg = f\"Expected argument `models` to have all values of 1D arrays. Found {value.ndim} on {key=}.\"\n", + " raise ValueError(msg)\n", + "\n", + " if ref_num_samples is None:\n", + " ref_num_samples = num_samples = value.shape[0]\n", + " ref_nans = nans = np.isnan(value)\n", + "\n", + " if num_samples != ref_num_samples:\n", + " msg = \"Argument `models` has inconsistent number of samples.\"\n", + " raise ValueError(msg)\n", + "\n", + " if (nans != ref_nans).any():\n", + " msg = \"Argument `models` has inconsistent `nan` values (in different positions).\"\n", + " raise ValueError(msg)\n", + "\n", + " if (value[~nans] < 0).any() or (value[~nans] > 1).any():\n", + " msg = (\n", + " \"Expected argument `models` to have all sequences of floats \\\\in [0, 1]. \"\n", + " f\"Key {key} has values outside this range.\"\n", + " )\n", + " raise ValueError(msg)\n", + "\n", + " return models\n", + "\n", + "\n", + "def test_pairwise(\n", + " models: dict[str, Tensor | ndarray],\n", + " *,\n", + " test: Literal[\"ttest_rel\", \"wilcoxon\"],\n", + " min_abs_diff: float | None = None,\n", + ") -> list[dict[str, Any]]:\n", + " \"\"\"Compare all pairs of models using statistical tests.\n", + "\n", + " Scores are assumed to be *higher is better*.\n", + "\n", + " General hypothesis in the tests:\n", + " - Null hypothesis: two models are equivalent on average.\n", + " - Alternative hypothesis: one model is better than the other (two-sided test).\n", + "\n", + " Args:\n", + " models (dict[str, Tensor | ndarray]): {\"model name\": sequence of shape (num_images,)}.\n", + " test (Literal[\"ttest_rel\", \"wilcoxon\"]): The statistical test to use.\n", + " - \"ttest_rel\": Paired Student's t-test (parametric).\n", + " - \"wilcoxon\": Wilcoxon signed-rank test (non-parametric).\n", + " min_abs_diff (float | None): Minimum absolute difference to consider in the Wilcoxon test. If `None`, all\n", + " differences are considered. Default is `None`. Ignored in the t-test.\n", + " \"\"\"\n", + " models = _validate_models(models)\n", + " if test not in {\"ttest_rel\", \"wilcoxon\"}:\n", + " msg = f\"Expected argument `test` to be 'ttest_rel' or 'wilcoxon', but got '{test}'.\"\n", + " raise ValueError(msg)\n", + " # remove nan values\n", + " models = {k: v[~np.isnan(v)] for k, v in models.items()}\n", + " models_names = sorted(models.keys())\n", + " num_models = len(models)\n", + " comparisons = list(itertools.combinations(range(num_models), 2))\n", + "\n", + " # for each comparison, compute the test and confidence (1 - p-value)\n", + " test_results = []\n", + " for modela_idx, modelb_idx in comparisons: # indices of the sorted model names\n", + " modela = models_names[modela_idx]\n", + " modelb = models_names[modelb_idx]\n", + " modela_scores = models[modela]\n", + " modelb_scores = models[modelb]\n", + " if test == \"ttest_rel\":\n", + " test_result = stats.ttest_rel(modela_scores, modelb_scores, alternative=\"two-sided\")\n", + " else: # test == \"wilcoxon\"\n", + " differences = modela_scores - modelb_scores\n", + " if min_abs_diff is not None:\n", + " differences[np.abs(differences) < min_abs_diff] = 0.0\n", + " # extreme case\n", + " if (differences == 0).all():\n", + " test_result = stats._morestats.WilcoxonResult(np.nan, 1.0) # noqa: SLF001\n", + " else:\n", + " test_result = stats.wilcoxon(differences, zero_method=\"zsplit\", alternative=\"two-sided\")\n", + " test_results.append({\n", + " \"modela\": modela,\n", + " \"modelb\": modelb,\n", + " \"confidence\": 1 - test_result.pvalue,\n", + " \"pvalue\": test_result.pvalue,\n", + " \"statistic\": test_result.statistic,\n", + " })\n", + "\n", + " return test_results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's first test it with the same two models we used before." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
modelamodelbconfidencepvaluestatistic
0AB0.9950.0052.872
\n", + "
" + ], + "text/plain": [ + " modela modelb confidence pvalue statistic\n", + "0 A B 0.995 0.005 2.872" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# parametric test\n", + "pd.DataFrame.from_records(test_pairwise({\"A\": modela, \"B\": modelb}, test=\"ttest_rel\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
modelamodelbconfidencepvaluestatistic
0AB0.9980.0021965.500
\n", + "
" + ], + "text/plain": [ + " modela modelb confidence pvalue statistic\n", + "0 A B 0.998 0.002 1965.500" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# non-parametric test\n", + "pd.DataFrame.from_records(test_pairwise({\"A\": modela, \"B\": modelb}, test=\"wilcoxon\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
modelamodelbconfidencepvaluestatistic
0AB1.0000.0001823.000
\n", + "
" + ], + "text/plain": [ + " modela modelb confidence pvalue statistic\n", + "0 A B 1.000 0.000 1823.000" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# non-parametric test with a minimum absolute difference\n", + "pd.DataFrame.from_records(test_pairwise({\"A\": modela, \"B\": modelb}, test=\"wilcoxon\", min_abs_diff=0.05))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's get the best models from the benchmark in our paper and compare them two by two.\n", + "\n", + "We'll look at the dataset `cashew` from `VisA`.\n", + "\n", + "> More details in the paper (see the last cell)." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 modelamodelbconfidencepvaluestatistic
0efficientad_wr101_s_extpatchcore_wr1010.9994020.0005981580.000000
1efficientad_wr101_s_extrd++_wr50_ext0.7736590.2263412193.500000
2efficientad_wr101_s_extsimplenet_wr50_ext1.0000000.000000690.500000
3efficientad_wr101_s_extuflow_ext0.9994470.0005531550.500000
4patchcore_wr101rd++_wr50_ext0.9999800.0000201333.000000
5patchcore_wr101simplenet_wr50_ext1.0000000.000000351.500000
6patchcore_wr101uflow_ext0.7318750.2681252213.000000
7rd++_wr50_extsimplenet_wr50_ext1.0000000.000000967.000000
8rd++_wr50_extuflow_ext0.9999450.0000551383.000000
9simplenet_wr50_extuflow_ext1.0000000.000000318.500000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "models = {\n", + " model_name: get_benchmark_aupimo_scores(model_name, \"visa/cashew\", verbose=False)[1].aupimos.numpy()\n", + " for model_name in [\n", + " \"efficientad_wr101_s_ext\",\n", + " \"patchcore_wr101\",\n", + " \"rd++_wr50_ext\",\n", + " \"simplenet_wr50_ext\",\n", + " \"uflow_ext\",\n", + " ]\n", + "}\n", + "models = test_pairwise(models, test=\"wilcoxon\", min_abs_diff=0.1)\n", + "pd.DataFrame.from_records(models).style.background_gradient(cmap=\"jet\", vmin=0, vmax=1, subset=[\"confidence\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compare to the benchmark (coming up)\n", + "\n", + "Compare your freshly trained models to the benchmark datasets in our paper." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO(jpcbertoldo): implement utility function to load and compare to the results from the benchmark # noqa: TD003" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cite Us\n", + "\n", + "AUPIMO was developed during [Google Summer of Code 2023 (GSoC 2023)](https://summerofcode.withgoogle.com/archive/2023/projects/SPMopugd) with the `anomalib` team from Intel's OpenVINO Toolkit.\n", + "\n", + "arXiv: [arxiv.org/abs/2401.01984](https://arxiv.org/abs/2401.01984) (accepted to BMVC 2024)\n", + "\n", + "Official repository: [github.com/jpcbertoldo/aupimo](https://github.com/jpcbertoldo/aupimo) (numpy-only API and numba-accelerated versions available)\n", + "\n", + "```bibtex\n", + "@misc{bertoldo2024aupimo,\n", + " author={Joao P. C. Bertoldo and Dick Ameln and Ashwin Vaidya and Samet AkΓ§ay},\n", + " title={{AUPIMO: Redefining Visual Anomaly Detection Benchmarks with High Speed and Low Tolerance}}, \n", + " year={2024},\n", + " url={https://arxiv.org/abs/2401.01984}, \n", + "}\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "anomalib-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/README.md b/notebooks/README.md index 15935b93cf..de33e5b7e9 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -60,3 +60,4 @@ To install Python, Git and other required tools, [OpenVINO Notebooks](https://gi | AUPIMO representative samples and visualization | [701b_aupimo_advanced_i](/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb) | | PIMO curve and integration bounds | [701c_aupimo_advanced_ii](/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb) | | (AU)PIMO of a random model | [701d_aupimo_advanced_iii](/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701d_aupimo_advanced_iii.ipynb) | +| AUPIMO load/save, statistical comparison | [701e_aupimo_advanced_iv](/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb) | diff --git a/src/anomalib/metrics/pimo/dataclasses.py b/src/anomalib/metrics/pimo/dataclasses.py index 0c5aeb025d..3eaa04cd12 100644 --- a/src/anomalib/metrics/pimo/dataclasses.py +++ b/src/anomalib/metrics/pimo/dataclasses.py @@ -120,7 +120,7 @@ class AUPIMOResult: # metadata fpr_lower_bound: float fpr_upper_bound: float - num_thresholds: int + num_thresholds: int | None # data thresh_lower_bound: float = field(repr=False) @@ -169,7 +169,8 @@ def __post_init__(self) -> None: try: _validate.is_rate_range((self.fpr_lower_bound, self.fpr_upper_bound)) # TODO(jpcbertoldo): warn when it's too low (use parameters from the numpy code) # noqa: TD003 - _validate.is_num_thresholds_gte2(self.num_thresholds) + if self.num_thresholds is not None: + _validate.is_num_thresholds_gte2(self.num_thresholds) _validate.is_rates(self.aupimos, nan_allowed=True) # validate is_aupimos _validate.validate_threshold_bounds((self.thresh_lower_bound, self.thresh_upper_bound)) @@ -194,7 +195,6 @@ def from_pimo_result( num_thresholds_auc: number of thresholds used to effectively compute AUPIMO; NOT the number of thresholds used to compute the PIMO curve! aupimos: AUPIMO scores - paths: paths to the source images to which the AUPIMO scores correspond. """ if pimo_result.per_image_tprs.shape[0] != aupimos.shape[0]: msg = ( From 6eeb7f6755d95f7a02481ef76507f9ee3ca07961 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 22 Oct 2024 14:26:16 +0100 Subject: [PATCH 19/32] =?UTF-8?q?=F0=9F=90=9E=20Defer=20OpenVINO=20import?= =?UTF-8?q?=20to=20avoid=20unnecessary=20warnings=20(#2385)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix openvino import issue Signed-off-by: Samet Akcay * Fix pre-commit issues Signed-off-by: Samet Akcay --------- Signed-off-by: Samet Akcay --- .../deploy/inferencers/openvino_inferencer.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/anomalib/deploy/inferencers/openvino_inferencer.py b/src/anomalib/deploy/inferencers/openvino_inferencer.py index bb57a8d65a..8dea77b92e 100644 --- a/src/anomalib/deploy/inferencers/openvino_inferencer.py +++ b/src/anomalib/deploy/inferencers/openvino_inferencer.py @@ -4,12 +4,12 @@ # SPDX-License-Identifier: Apache-2.0 import logging -from importlib.util import find_spec from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import Any import cv2 import numpy as np +from lightning_utilities.core.imports import package_available from omegaconf import DictConfig from PIL import Image @@ -21,14 +21,6 @@ logger = logging.getLogger("anomalib") -if find_spec("openvino") is not None: - import openvino as ov - - if TYPE_CHECKING: - from openvino import CompiledModel -else: - logger.warning("OpenVINO is not installed. Please install OpenVINO to use OpenVINOInferencer.") - class OpenVINOInferencer(Inferencer): """OpenVINO implementation for the inference. @@ -102,6 +94,10 @@ def __init__( task: str | None = None, config: dict | None = None, ) -> None: + if not package_available("openvino"): + msg = "OpenVINO is not installed. Please install OpenVINO to use OpenVINOInferencer." + raise ImportError(msg) + self.device = device self.config = config @@ -110,7 +106,7 @@ def __init__( self.task = TaskType(task) if task else TaskType(self.metadata["task"]) - def load_model(self, path: str | Path | tuple[bytes, bytes]) -> tuple[Any, Any, "CompiledModel"]: + def load_model(self, path: str | Path | tuple[bytes, bytes]) -> tuple[Any, Any, Any]: """Load the OpenVINO model. Args: @@ -121,6 +117,8 @@ def load_model(self, path: str | Path | tuple[bytes, bytes]) -> tuple[Any, Any, [tuple[str, str, ExecutableNetwork]]: Input and Output blob names together with the Executable network. """ + import openvino as ov + core = ov.Core() # If tuple of bytes is passed if isinstance(path, tuple): From 3a403ae8c6cf91d9fb52aa58a198d559a71dfd00 Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Tue, 22 Oct 2024 17:23:32 +0200 Subject: [PATCH 20/32] =?UTF-8?q?=F0=9F=9A=80=20Add=20VLM=20based=20Anomal?= =?UTF-8?q?y=20Model=20(#2344)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [Draft] Llm on (#2165) * Add TaskType Explanation Signed-off-by: Bepitic * Add llm model Signed-off-by: Bepitic * add ollama Signed-off-by: Bepitic * better description for descr in title Signed-off-by: Bepitic * add text of llm into imageResult visualization * add text of llm into imageResult visualization Signed-off-by: Bepitic * latest changes Signed-off-by: Bepitic * add wip llava/llava_next Signed-off-by: Bepitic * add init Signed-off-by: Bepitic * add text of llm into imageResult visualization Signed-off-by: Bepitic * latest changes Signed-off-by: Bepitic * upd Lint Signed-off-by: Bepitic * fix visualization with description Signed-off-by: Bepitic * show the images every batch Signed-off-by: Bepitic * fix docstring and error management Signed-off-by: Bepitic * Add compatibility for TaskType.EXPLANATION. Signed-off-by: Bepitic * Remove, show in the engine-Visualization. * fix visualization and llm openai multishot. * fix Circular import problem * Add HugginFace To LLavaNext Signed-off-by: Bepitic --------- Signed-off-by: Bepitic * πŸ”¨ Scaffold for refactor (#2340) * initial scafold Signed-off-by: Ashwin Vaidya * Apply PR comments Signed-off-by: Ashwin Vaidya * rename dir Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * Add ChatGPT (#2341) * initial scafold Signed-off-by: Ashwin Vaidya * Apply PR comments Signed-off-by: Ashwin Vaidya * rename dir Signed-off-by: Ashwin Vaidya * delete llm_ollama Signed-off-by: Ashwin Vaidya * Add ChatGPT Signed-off-by: Ashwin Vaidya * Add ChatGPT Signed-off-by: Ashwin Vaidya * Remove LLM model Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * Add Huggingface (#2343) * initial scafold Signed-off-by: Ashwin Vaidya * Apply PR comments Signed-off-by: Ashwin Vaidya * rename dir Signed-off-by: Ashwin Vaidya * delete llm_ollama Signed-off-by: Ashwin Vaidya * Add ChatGPT Signed-off-by: Ashwin Vaidya * Add ChatGPT Signed-off-by: Ashwin Vaidya * Remove LLM model Signed-off-by: Ashwin Vaidya * Add transformers Signed-off-by: Ashwin Vaidya * Remove llava Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * πŸ”¨ Minor Refactor (#2345) Refactor Signed-off-by: Ashwin Vaidya * undo changes Signed-off-by: Ashwin Vaidya * undo changes Signed-off-by: Ashwin Vaidya * undo changes to image.py Signed-off-by: Ashwin Vaidya * Add explanation visualizer (#2351) * Add explanation visualizer Signed-off-by: Ashwin Vaidya * bug-fix Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * πŸ”¨ Allow setting API keys from env (#2353) Allow setting API keys from env Signed-off-by: Ashwin Vaidya * πŸ§ͺ Add tests (#2355) * Add tests Signed-off-by: Ashwin Vaidya * remove explanation task type Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya * minor fixes Signed-off-by: Ashwin Vaidya * Update changelog Signed-off-by: Ashwin Vaidya * Fix tests Signed-off-by: Ashwin Vaidya * Address PR comments Signed-off-by: Ashwin Vaidya * update name Signed-off-by: Ashwin Vaidya * Update src/anomalib/models/image/vlm_ad/lightning_model.py Co-authored-by: Samet Akcay * update name Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Bepitic Signed-off-by: Ashwin Vaidya Co-authored-by: Paco Co-authored-by: Samet Akcay --- CHANGELOG.md | 1 + pyproject.toml | 3 +- src/anomalib/callbacks/metrics.py | 5 +- src/anomalib/engine/engine.py | 16 ++- src/anomalib/models/__init__.py | 2 + src/anomalib/models/image/__init__.py | 2 + src/anomalib/models/image/vlm_ad/__init__.py | 8 ++ .../models/image/vlm_ad/backends/__init__.py | 11 ++ .../models/image/vlm_ad/backends/base.py | 30 +++++ .../models/image/vlm_ad/backends/chat_gpt.py | 109 +++++++++++++++++ .../image/vlm_ad/backends/huggingface.py | 96 +++++++++++++++ .../models/image/vlm_ad/backends/ollama.py | 73 +++++++++++ .../models/image/vlm_ad/lightning_model.py | 115 ++++++++++++++++++ src/anomalib/models/image/vlm_ad/utils.py | 25 ++++ src/anomalib/utils/visualization/__init__.py | 2 + .../utils/visualization/explanation.py | 106 ++++++++++++++++ tests/integration/model/test_models.py | 8 +- 17 files changed, 603 insertions(+), 9 deletions(-) create mode 100644 src/anomalib/models/image/vlm_ad/__init__.py create mode 100644 src/anomalib/models/image/vlm_ad/backends/__init__.py create mode 100644 src/anomalib/models/image/vlm_ad/backends/base.py create mode 100644 src/anomalib/models/image/vlm_ad/backends/chat_gpt.py create mode 100644 src/anomalib/models/image/vlm_ad/backends/huggingface.py create mode 100644 src/anomalib/models/image/vlm_ad/backends/ollama.py create mode 100644 src/anomalib/models/image/vlm_ad/lightning_model.py create mode 100644 src/anomalib/models/image/vlm_ad/utils.py create mode 100644 src/anomalib/utils/visualization/explanation.py diff --git a/CHANGELOG.md b/CHANGELOG.md index cf9807af26..b50bf09ecb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added +- Add `VlmAd` metric by [Bepitic](https://github.com/Bepitic) and refactored by [ashwinvaidya17](https://github.com/ashwinvaidya17) in https://github.com/openvinotoolkit/anomalib/pull/2344 - Add `Datumaro` annotation format support by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2377 - Add `AUPIMO` tutorials notebooks in https://github.com/openvinotoolkit/anomalib/pull/2330 and https://github.com/openvinotoolkit/anomalib/pull/2336 - Add `AUPIMO` metric by [jpcbertoldo](https://github.com/jpcbertoldo) in https://github.com/openvinotoolkit/anomalib/pull/1726 and refactored by [ashwinvaidya17](https://github.com/ashwinvaidya17) in https://github.com/openvinotoolkit/anomalib/pull/2329 diff --git a/pyproject.toml b/pyproject.toml index 2893ad20c4..268544ad2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ core = [ "open-clip-torch>=2.23.0,<2.26.1", ] openvino = ["openvino>=2024.0", "nncf>=2.10.0", "onnx>=1.16.0"] +vlm = ["ollama", "openai", "python-dotenv","transformers"] loggers = [ "comet-ml>=3.31.7", "gradio>=4", @@ -84,7 +85,7 @@ test = [ "coverage[toml]", "tox", ] -full = ["anomalib[core,openvino,loggers,notebooks]"] +full = ["anomalib[core,openvino,loggers,notebooks, vlm]"] dev = ["anomalib[full,docs,test]"] [project.scripts] diff --git a/src/anomalib/callbacks/metrics.py b/src/anomalib/callbacks/metrics.py index 5cee830dad..e09e622d41 100644 --- a/src/anomalib/callbacks/metrics.py +++ b/src/anomalib/callbacks/metrics.py @@ -78,9 +78,8 @@ def setup( elif self.task == TaskType.CLASSIFICATION: pixel_metric_names = [] logger.warning( - "Cannot perform pixel-level evaluation when task type is classification. " - "Ignoring the following pixel-level metrics: %s", - self.pixel_metric_names, + "Cannot perform pixel-level evaluation when task type is {self.task.value}. " + f"Ignoring the following pixel-level metrics: {self.pixel_metric_names}", ) else: pixel_metric_names = ( diff --git a/src/anomalib/engine/engine.py b/src/anomalib/engine/engine.py index 83b9714416..b537819729 100644 --- a/src/anomalib/engine/engine.py +++ b/src/anomalib/engine/engine.py @@ -32,7 +32,7 @@ from anomalib.utils.normalization import NormalizationMethod from anomalib.utils.path import create_versioned_dir from anomalib.utils.types import NORMALIZATION, THRESHOLD -from anomalib.utils.visualization import ImageVisualizer +from anomalib.utils.visualization import BaseVisualizer, ExplanationVisualizer, ImageVisualizer logger = logging.getLogger(__name__) @@ -322,7 +322,7 @@ def _setup_trainer(self, model: AnomalyModule) -> None: self._cache.update(model) # Setup anomalib callbacks to be used with the trainer - self._setup_anomalib_callbacks() + self._setup_anomalib_callbacks(model) # Temporarily set devices to 1 to avoid issues with multiple processes self._cache.args["devices"] = 1 @@ -405,7 +405,7 @@ def _setup_transform( if not getattr(dataloader.dataset, "transform", None): dataloader.dataset.transform = transform - def _setup_anomalib_callbacks(self) -> None: + def _setup_anomalib_callbacks(self, model: AnomalyModule) -> None: """Set up callbacks for the trainer.""" _callbacks: list[Callback] = [] @@ -432,9 +432,17 @@ def _setup_anomalib_callbacks(self) -> None: _callbacks.append(_ThresholdCallback(self.threshold)) _callbacks.append(_MetricsCallback(self.task, self.image_metric_names, self.pixel_metric_names)) + visualizer: BaseVisualizer + + # TODO(ashwinvaidya17): temporary # noqa: TD003 ignoring as visualizer is getting a complete overhaul + if model.__class__.__name__ == "VlmAd": + visualizer = ExplanationVisualizer() + else: + visualizer = ImageVisualizer(task=self.task, normalize=self.normalization == NormalizationMethod.NONE) + _callbacks.append( _VisualizationCallback( - visualizers=ImageVisualizer(task=self.task, normalize=self.normalization == NormalizationMethod.NONE), + visualizers=visualizer, save=True, root=self._cache.args["default_root_dir"] / "images", ), diff --git a/src/anomalib/models/__init__.py b/src/anomalib/models/__init__.py index b4bb36a875..ea091d1640 100644 --- a/src/anomalib/models/__init__.py +++ b/src/anomalib/models/__init__.py @@ -30,6 +30,7 @@ Rkde, Stfpm, Uflow, + VlmAd, WinClip, ) from .video import AiVad @@ -58,6 +59,7 @@ class UnknownModelError(ModuleNotFoundError): "Stfpm", "Uflow", "AiVad", + "VlmAd", "WinClip", ] diff --git a/src/anomalib/models/image/__init__.py b/src/anomalib/models/image/__init__.py index f3a5435038..b09da8b07b 100644 --- a/src/anomalib/models/image/__init__.py +++ b/src/anomalib/models/image/__init__.py @@ -20,6 +20,7 @@ from .rkde import Rkde from .stfpm import Stfpm from .uflow import Uflow +from .vlm_ad import VlmAd from .winclip import WinClip __all__ = [ @@ -40,5 +41,6 @@ "Rkde", "Stfpm", "Uflow", + "VlmAd", "WinClip", ] diff --git a/src/anomalib/models/image/vlm_ad/__init__.py b/src/anomalib/models/image/vlm_ad/__init__.py new file mode 100644 index 0000000000..46ab8e0fee --- /dev/null +++ b/src/anomalib/models/image/vlm_ad/__init__.py @@ -0,0 +1,8 @@ +"""Visual Anomaly Model.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from .lightning_model import VlmAd + +__all__ = ["VlmAd"] diff --git a/src/anomalib/models/image/vlm_ad/backends/__init__.py b/src/anomalib/models/image/vlm_ad/backends/__init__.py new file mode 100644 index 0000000000..44009f8f83 --- /dev/null +++ b/src/anomalib/models/image/vlm_ad/backends/__init__.py @@ -0,0 +1,11 @@ +"""VLM backends.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from .base import Backend +from .chat_gpt import ChatGPT +from .huggingface import Huggingface +from .ollama import Ollama + +__all__ = ["Backend", "ChatGPT", "Huggingface", "Ollama"] diff --git a/src/anomalib/models/image/vlm_ad/backends/base.py b/src/anomalib/models/image/vlm_ad/backends/base.py new file mode 100644 index 0000000000..b4aadf9a22 --- /dev/null +++ b/src/anomalib/models/image/vlm_ad/backends/base.py @@ -0,0 +1,30 @@ +"""Base backend.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from abc import ABC, abstractmethod +from pathlib import Path + +from anomalib.models.image.vlm_ad.utils import Prompt + + +class Backend(ABC): + """Base backend.""" + + @abstractmethod + def __init__(self, model_name: str) -> None: + """Initialize the backend.""" + + @abstractmethod + def add_reference_images(self, image: str | Path) -> None: + """Add reference images for k-shot.""" + + @abstractmethod + def predict(self, image: str | Path, prompt: Prompt) -> str: + """Predict the anomaly label.""" + + @property + @abstractmethod + def num_reference_images(self) -> int: + """Get the number of reference images.""" diff --git a/src/anomalib/models/image/vlm_ad/backends/chat_gpt.py b/src/anomalib/models/image/vlm_ad/backends/chat_gpt.py new file mode 100644 index 0000000000..741288354f --- /dev/null +++ b/src/anomalib/models/image/vlm_ad/backends/chat_gpt.py @@ -0,0 +1,109 @@ +"""ChatGPT backend.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import base64 +import logging +import os +from pathlib import Path +from typing import TYPE_CHECKING + +from dotenv import load_dotenv +from lightning_utilities.core.imports import package_available + +from anomalib.models.image.vlm_ad.utils import Prompt + +from .base import Backend + +if package_available("openai"): + from openai import OpenAI +else: + OpenAI = None + +if TYPE_CHECKING: + from openai.types.chat import ChatCompletion + +logger = logging.getLogger(__name__) + + +class ChatGPT(Backend): + """ChatGPT backend.""" + + def __init__(self, model_name: str, api_key: str | None = None) -> None: + """Initialize the ChatGPT backend.""" + self._ref_images_encoded: list[str] = [] + self.model_name: str = model_name + self._client: OpenAI | None = None + self.api_key = self._get_api_key(api_key) + + @property + def client(self) -> OpenAI: + """Get the OpenAI client.""" + if OpenAI is None: + msg = "OpenAI is not installed. Please install it to use ChatGPT backend." + raise ImportError(msg) + if self._client is None: + self._client = OpenAI(api_key=self.api_key) + return self._client + + def add_reference_images(self, image: str | Path) -> None: + """Add reference images for k-shot.""" + self._ref_images_encoded.append(self._encode_image_to_url(image)) + + @property + def num_reference_images(self) -> int: + """Get the number of reference images.""" + return len(self._ref_images_encoded) + + def predict(self, image: str | Path, prompt: Prompt) -> str: + """Predict the anomaly label.""" + image_encoded = self._encode_image_to_url(image) + messages = [] + + # few-shot + if len(self._ref_images_encoded) > 0: + messages.append(self._generate_message(content=prompt.few_shot, images=self._ref_images_encoded)) + + messages.append(self._generate_message(content=prompt.predict, images=[image_encoded])) + + response: ChatCompletion = self.client.chat.completions.create(messages=messages, model=self.model_name) + return response.choices[0].message.content + + @staticmethod + def _generate_message(content: str, images: list[str] | None) -> dict: + """Generate a message.""" + message: dict[str, list[dict] | str] = {"role": "user"} + if images is not None: + _content: list[dict[str, str | dict]] = [{"type": "text", "text": content}] + _content.extend([{"type": "image_url", "image_url": {"url": image}} for image in images]) + message["content"] = _content + else: + message["content"] = content + return message + + def _encode_image_to_url(self, image: str | Path) -> str: + """Encode the image to base64 and embed in url string.""" + image_path = Path(image) + extension = image_path.suffix + base64_encoded = self._encode_image_to_base_64(image_path) + return f"data:image/{extension};base64,{base64_encoded}" + + @staticmethod + def _encode_image_to_base_64(image: str | Path) -> str: + """Encode the image to base64.""" + image = Path(image) + return base64.b64encode(image.read_bytes()).decode("utf-8") + + def _get_api_key(self, api_key: str | None = None) -> str: + if api_key is None: + load_dotenv() + api_key = os.getenv("OPENAI_API_KEY") + if api_key is None: + msg = ( + f"OpenAI API key must be provided to use {self.model_name}." + " Please provide the API key in the constructor, or set the OPENAI_API_KEY environment variable" + " or in a `.env` file." + ) + raise ValueError(msg) + return api_key diff --git a/src/anomalib/models/image/vlm_ad/backends/huggingface.py b/src/anomalib/models/image/vlm_ad/backends/huggingface.py new file mode 100644 index 0000000000..c234ecfbc5 --- /dev/null +++ b/src/anomalib/models/image/vlm_ad/backends/huggingface.py @@ -0,0 +1,96 @@ +"""Huggingface backend.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from pathlib import Path + +from lightning_utilities.core.imports import package_available +from PIL import Image +from transformers.modeling_utils import PreTrainedModel + +from anomalib.models.image.vlm_ad.utils import Prompt + +from .base import Backend + +if package_available("transformers"): + import transformers + from transformers.modeling_utils import PreTrainedModel + from transformers.processing_utils import ProcessorMixin +else: + transformers = None + + +logger = logging.getLogger(__name__) + + +class Huggingface(Backend): + """Huggingface backend.""" + + def __init__( + self, + model_name: str, + ) -> None: + """Initialize the Huggingface backend.""" + self.model_name: str = model_name + self._ref_images: list[str] = [] + self._processor: ProcessorMixin | None = None + self._model: PreTrainedModel | None = None + + @property + def processor(self) -> ProcessorMixin: + """Get the Huggingface processor.""" + if self._processor is None: + if transformers is None: + msg = "transformers is not installed." + raise ValueError(msg) + self._processor = transformers.LlavaNextProcessor.from_pretrained(self.model_name) + return self._processor + + @property + def model(self) -> PreTrainedModel: + """Get the Huggingface model.""" + if self._model is None: + if transformers is None: + msg = "transformers is not installed." + raise ValueError(msg) + self._model = transformers.LlavaNextForConditionalGeneration.from_pretrained(self.model_name) + return self._model + + @staticmethod + def _generate_message(content: str, images: list[str] | None) -> dict: + """Generate a message.""" + message: dict[str, str | list[dict]] = {"role": "user"} + _content: list[dict[str, str]] = [{"type": "text", "text": content}] + if images is not None: + _content.extend([{"type": "image"} for _ in images]) + message["content"] = _content + return message + + def add_reference_images(self, image: str | Path) -> None: + """Add reference images for k-shot.""" + self._ref_images.append(Image.open(image)) + + @property + def num_reference_images(self) -> int: + """Get the number of reference images.""" + return len(self._ref_images) + + def predict(self, image_path: str | Path, prompt: Prompt) -> str: + """Predict the anomaly label.""" + image = Image.open(image_path) + messages: list[dict] = [] + + if len(self._ref_images) > 0: + messages.append(self._generate_message(content=prompt.few_shot, images=self._ref_images)) + + messages.append(self._generate_message(content=prompt.predict, images=[image])) + processed_prompt = [self.processor.apply_chat_template(messages, add_generation_prompt=True)] + + images = [*self._ref_images, image] + inputs = self.processor(images, processed_prompt, return_tensors="pt", padding=True).to(self.model.device) + outputs = self.model.generate(**inputs, max_new_tokens=100) + result = self.processor.decode(outputs[0], skip_special_tokens=True) + print(result) + return result diff --git a/src/anomalib/models/image/vlm_ad/backends/ollama.py b/src/anomalib/models/image/vlm_ad/backends/ollama.py new file mode 100644 index 0000000000..db5a215bb3 --- /dev/null +++ b/src/anomalib/models/image/vlm_ad/backends/ollama.py @@ -0,0 +1,73 @@ +"""Ollama backend. + +Assumes that the Ollama service is running in the background. +See: https://github.com/ollama/ollama +Ensure that ollama is running. On linux: `ollama serve` +On Mac and Windows ensure that the ollama service is running by launching from the application list. +""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from pathlib import Path + +from lightning_utilities.core.imports import package_available + +from anomalib.models.image.vlm_ad.utils import Prompt + +from .base import Backend + +if package_available("ollama"): + from ollama import chat + from ollama._client import _encode_image +else: + chat = None + +logger = logging.getLogger(__name__) + + +class Ollama(Backend): + """Ollama backend.""" + + def __init__(self, model_name: str) -> None: + """Initialize the Ollama backend.""" + self.model_name: str = model_name + self._ref_images_encoded: list[str] = [] + + def add_reference_images(self, image: str | Path) -> None: + """Encode the image to base64.""" + self._ref_images_encoded.append(_encode_image(image)) + + @property + def num_reference_images(self) -> int: + """Get the number of reference images.""" + return len(self._ref_images_encoded) + + @staticmethod + def _generate_message(content: str, images: list[str] | None) -> dict: + """Generate a message.""" + message: dict[str, str | list[str]] = {"role": "user", "content": content} + if images: + message["images"] = images + return message + + def predict(self, image: str | Path, prompt: Prompt) -> str: + """Predict the anomaly label.""" + if not chat: + msg = "Ollama is not installed. Please install it using `pip install ollama`." + raise ImportError(msg) + image_encoded = _encode_image(image) + messages = [] + + # few-shot + if len(self._ref_images_encoded) > 0: + messages.append(self._generate_message(content=prompt.few_shot, images=self._ref_images_encoded)) + + messages.append(self._generate_message(content=prompt.predict, images=[image_encoded])) + + response = chat( + model=self.model_name, + messages=messages, + ) + return response["message"]["content"].strip() diff --git a/src/anomalib/models/image/vlm_ad/lightning_model.py b/src/anomalib/models/image/vlm_ad/lightning_model.py new file mode 100644 index 0000000000..1279f7a31e --- /dev/null +++ b/src/anomalib/models/image/vlm_ad/lightning_model.py @@ -0,0 +1,115 @@ +"""Visual Anomaly Model for Zero/Few-Shot Anomaly Classification.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging + +import torch +from torch.utils.data import DataLoader + +from anomalib import LearningType +from anomalib.models import AnomalyModule + +from .backends import Backend, ChatGPT, Huggingface, Ollama +from .utils import ModelName, Prompt + +logger = logging.getLogger(__name__) + + +class VlmAd(AnomalyModule): + """Visual anomaly model.""" + + def __init__( + self, + model: ModelName | str = ModelName.LLAMA_OLLAMA, + api_key: str | None = None, + k_shot: int = 0, + ) -> None: + super().__init__() + self.k_shot = k_shot + model = ModelName(model) + self.vlm_backend: Backend = self._setup_vlm_backend(model, api_key) + + @staticmethod + def _setup_vlm_backend(model_name: ModelName, api_key: str | None) -> Backend: + if model_name == ModelName.LLAMA_OLLAMA: + return Ollama(model_name=model_name.value) + if model_name == ModelName.GPT_4O_MINI: + return ChatGPT(api_key=api_key, model_name=model_name.value) + if model_name in {ModelName.VICUNA_7B_HF, ModelName.VICUNA_13B_HF, ModelName.MISTRAL_7B_HF}: + return Huggingface(model_name=model_name.value) + + msg = f"Unsupported VLM model: {model_name}" + raise ValueError(msg) + + def _setup(self) -> None: + if self.k_shot > 0 and self.vlm_backend.num_reference_images != self.k_shot: + logger.info("Collecting reference images from training dataset.") + dataloader = self.trainer.datamodule.train_dataloader() + self.collect_reference_images(dataloader) + + def collect_reference_images(self, dataloader: DataLoader) -> None: + """Collect reference images for few-shot inference.""" + for batch in dataloader: + for img_path in batch["image_path"]: + self.vlm_backend.add_reference_images(img_path) + if self.vlm_backend.num_reference_images == self.k_shot: + return + + @property + def prompt(self) -> Prompt: + """Get the prompt.""" + return Prompt( + predict=( + "You are given an image. It is either normal or anomalous." + " First say 'YES' if the image is anomalous, or 'NO' if it is normal.\n" + "Then give the reason for your decision.\n" + "For example, 'YES: The image has a crack on the wall.'" + ), + few_shot=( + "These are a few examples of normal picture without any anomalies." + " You have to use these to determine if the image I provide in the next" + " chat is normal or anomalous." + ), + ) + + def validation_step(self, batch: dict[str, str | torch.Tensor], *args, **kwargs) -> dict: + """Validation step.""" + del args, kwargs # These variables are not used. + responses = [(self.vlm_backend.predict(img_path, self.prompt)) for img_path in batch["image_path"]] + batch["explanation"] = responses + batch["pred_scores"] = torch.tensor([1.0 if r.startswith("Y") else 0.0 for r in responses], device=self.device) + return batch + + @property + def learning_type(self) -> LearningType: + """The learning type of the model.""" + return LearningType.ZERO_SHOT if self.k_shot == 0 else LearningType.FEW_SHOT + + @property + def trainer_arguments(self) -> dict[str, int | float]: + """Doesn't need training.""" + return {} + + @staticmethod + def configure_transforms(image_size: tuple[int, int] | None = None) -> None: + """This modes does not require any transforms.""" + if image_size is not None: + logger.warning("Ignoring image_size argument as each backend has its own transforms.") + + @staticmethod + def _export_not_supported_message() -> None: + logging.warning("Exporting the model is not supported for VLM-AD model. Skipping...") + + def to_torch(self, *_, **__) -> None: # type: ignore[override] + """Skip export to torch.""" + return self._export_not_supported_message() + + def to_onnx(self, *_, **__) -> None: # type: ignore[override] + """Skip export to onnx.""" + return self._export_not_supported_message() + + def to_openvino(self, *_, **__) -> None: # type: ignore[override] + """Skip export to openvino.""" + return self._export_not_supported_message() diff --git a/src/anomalib/models/image/vlm_ad/utils.py b/src/anomalib/models/image/vlm_ad/utils.py new file mode 100644 index 0000000000..ce9b9067ac --- /dev/null +++ b/src/anomalib/models/image/vlm_ad/utils.py @@ -0,0 +1,25 @@ +"""Dataclasses.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import dataclass +from enum import Enum + + +@dataclass +class Prompt: + """Prompt.""" + + few_shot: str + predict: str + + +class ModelName(Enum): + """List of supported models.""" + + LLAMA_OLLAMA = "llava" + GPT_4O_MINI = "gpt-4o-mini" + VICUNA_7B_HF = "llava-hf/llava-v1.6-vicuna-7b-hf" + VICUNA_13B_HF = "llava-hf/llava-v1.6-vicuna-13b-hf" + MISTRAL_7B_HF = "llava-hf/llava-v1.6-mistral-7b-hf" diff --git a/src/anomalib/utils/visualization/__init__.py b/src/anomalib/utils/visualization/__init__.py index f68036ed78..404036dfad 100644 --- a/src/anomalib/utils/visualization/__init__.py +++ b/src/anomalib/utils/visualization/__init__.py @@ -4,11 +4,13 @@ # SPDX-License-Identifier: Apache-2.0 from .base import BaseVisualizer, GeneratorResult, VisualizationStep +from .explanation import ExplanationVisualizer from .image import ImageResult, ImageVisualizer from .metrics import MetricsVisualizer __all__ = [ "BaseVisualizer", + "ExplanationVisualizer", "ImageResult", "ImageVisualizer", "GeneratorResult", diff --git a/src/anomalib/utils/visualization/explanation.py b/src/anomalib/utils/visualization/explanation.py new file mode 100644 index 0000000000..10904161e3 --- /dev/null +++ b/src/anomalib/utils/visualization/explanation.py @@ -0,0 +1,106 @@ +"""Explanation visualization generator. + +Note: This is a temporary visualizer, and will be replaced with the new visualizer in the future. +""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from collections.abc import Iterator +from pathlib import Path + +import numpy as np +from PIL import Image, ImageDraw, ImageFont + +from .base import BaseVisualizer, GeneratorResult, VisualizationStep + + +class ExplanationVisualizer(BaseVisualizer): + """Explanation visualization generator.""" + + def __init__(self) -> None: + super().__init__(visualize_on=VisualizationStep.BATCH) + self.padding = 3 + self.font = ImageFont.load_default(size=16) + + def generate(self, **kwargs) -> Iterator[GeneratorResult]: + """Generate images and return them as an iterator.""" + outputs = kwargs.get("outputs", None) + if outputs is None: + msg = "Outputs must be provided to generate images." + raise ValueError(msg) + return self._visualize_batch(outputs) + + def _visualize_batch(self, batch: dict) -> Iterator[GeneratorResult]: + """Visualize batch of images.""" + batch_size = batch["image"].shape[0] + height, width = batch["image"].shape[-2:] + for i in range(batch_size): + image = batch["image"][i] + explanation = batch["explanation"][i] + file_name = Path(batch["image_path"][i]) + image = Image.open(file_name) + image = image.resize((width, height)) + image = self._draw_image(width, height, image=image, explanation=explanation) + yield GeneratorResult(image=image, file_name=file_name) + + def _draw_image(self, width: int, height: int, image: Image, explanation: str) -> np.ndarray: + text_canvas: Image = self._get_explanation_image(width, height, image, explanation) + label_canvas: Image = self._get_label_image(explanation) + + final_width = max(text_canvas.size[0], width) + final_height = height + text_canvas.size[1] + combined_image = Image.new("RGB", (final_width, final_height), (255, 255, 255)) + combined_image.paste(image, (self.padding, 0)) + combined_image.paste(label_canvas, (10, 10)) + combined_image.paste(text_canvas, (0, height)) + return np.array(combined_image) + + def _get_label_image(self, explanation: str) -> Image: + # Draw label + # Can't use pred_labels as it is computed from the pred_scores using image_threshold. It gives incorrect value. + # So, using explanation. This will probably change with the new design. + label = "Anomalous" if explanation.startswith("Y") else "Normal" + label_color = "red" if label == "Anomalous" else "green" + label_canvas = Image.new("RGB", (100, 20), color=label_color) + draw = ImageDraw.Draw(label_canvas) + draw.text((0, 0), label, font=self.font, fill="white", align="center") + return label_canvas + + def _get_explanation_image(self, width: int, height: int, image: Image, explanation: str) -> Image: + # compute wrap width + text_canvas = Image.new("RGB", (width, height), color="white") + dummy_image = ImageDraw.Draw(image) + text_bbox = dummy_image.textbbox((0, 0), explanation, font=self.font, align="center") + text_canvas_width = text_bbox[2] - text_bbox[0] + self.padding + + # split lines based on the width + lines = list(explanation.split("\n")) + line_with_max_len = max(lines, key=len) + new_width = int(width * len(line_with_max_len) // text_canvas_width) + + # wrap text based on the new width + lines = [] + current_line: list[str] = [] + for word in explanation.split(" "): + test_line = " ".join([*current_line, word]) + if len(test_line) <= new_width: + current_line.append(word) + else: + lines.append(" ".join(current_line)) + current_line = [word] + lines.append(" ".join(current_line)) + wrapped_lines = "\n".join(lines) + + # recompute height + dummy_image = Image.new("RGB", (new_width, height), color="white") + draw = ImageDraw.Draw(dummy_image) + text_bbox = draw.textbbox((0, 0), wrapped_lines, font=self.font, align="center") + new_width = int(text_bbox[2] - text_bbox[0] + self.padding) + new_height = int(text_bbox[3] - text_bbox[1] + self.padding) + + # Final text image + text_canvas = Image.new("RGB", (new_width, new_height), color="white") + draw = ImageDraw.Draw(text_canvas) + draw.text((self.padding // 2, 0), wrapped_lines, font=self.font, fill="black", align="center") + return text_canvas diff --git a/tests/integration/model/test_models.py b/tests/integration/model/test_models.py index e743cd52f2..eea3d88e66 100644 --- a/tests/integration/model/test_models.py +++ b/tests/integration/model/test_models.py @@ -7,6 +7,7 @@ # SPDX-License-Identifier: Apache-2.0 from pathlib import Path +from unittest.mock import MagicMock import pytest @@ -179,7 +180,7 @@ def _get_objects( # select task type if model_name in {"rkde", "ai_vad"}: task_type = TaskType.DETECTION - elif model_name in {"ganomaly", "dfkde"}: + elif model_name in {"ganomaly", "dfkde", "vlm_ad"}: task_type = TaskType.CLASSIFICATION else: task_type = TaskType.SEGMENTATION @@ -209,6 +210,11 @@ def _get_objects( ) model = get_model(model_name, **extra_args) + + if model_name == "vlm_ad": + model.vlm_backend = MagicMock() + model.vlm_backend.predict.return_value = "YES: Because reasons..." + engine = Engine( logger=False, default_root_dir=project_path, From db4c2850047eec8c1665826a2b4d854e102bf975 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Rolih?= <61357777+blaz-r@users.noreply.github.com> Date: Thu, 24 Oct 2024 11:51:39 +0200 Subject: [PATCH 21/32] Add ensembling methods for tiling to Anomalib (#1226) * Fixed broken links in readme * Fixed inference command in readme * Add tiling for ensemble * Add tests for tiling for ensemble * Moved ensemble tiler to separate file * Modify padim config for ensemble * Add tiling to dataset * Revert changes to train * Add tiling to collate fn * Fix tiling in collate * Change val. function to protected * Add tile number logic * Move collate fn to separate file * Update tests for tiler * Add training loop for ensemble * Add model input size setup * Move ens config to separate file * Revert mvtec modifications * Remove unused imports in mvtec * Add batch adjustment to untiling * Add predict step to ensemble * Add comment and docstring to tile joining function * Move tile joining to separate function * Add joining for all tiled data * Add joining for all box data * Refactor pred. joining as modular class * Fix box joining * Add label and score joining * Add ensemble visualization * Add end of predict hook * Add metric computation * Fix metric thresholds * Add removal of individual visualization * Add demo1 notebook * Add docstrings and cleanup * Add memory benchmark * Add modular class for storing predictions * Add metric to separate class * Refactor to support prediction data class * Rename predictions class * Add filesystem predictions class * Add resized predictions class * Fix joiner for classification task * Add page peak to memory benchmark * Add global stats calculation * Add docstrings to stats calculation * Refactor joiner for pipeline * Refactor stats into pipeline * Refactor metrics as pipeline block * Refactor visualization as pipeline block * Refactor postprocessing into a pipeline * Add normalization and thresholding on joined predictions * Refactor tiler to accept config file * Add smoothing of tile joins. * Refactor ensemble datamodule preparation * Remove unused changes in dataloader * Fix metric configuration * Fix box coordinates in joining * Add ensemble callbacks preparation function * Fix box prediction bug in postprocess * Add ensemble params to config * Refactor postprocessing. * Refactor post-processing * Refactor predictions * Code cleanup * Optimize prediction storage * Make join smoothing configurable * Cleanup before PR * Fix stats pipeline * Fix logging strings * Fix memory benchmark * Fix tiler issues * Fix import issues * Fix naming in metrics and visualization * Fix cyclic import * Make logging lazy * Refactor tiler tests * Added collate tiling tests * Added ensemble helper functions tests * Refactor for dummy ensemble config * Refactor for dummy base config * Add tests for prediction storage * Add tests for prediction joiner * Add tests for visualization * Fix small issues in tests * Add metrics test * Add post-processing tests * Fix tiler to work with different instance * Move seed setting inside train loop * Fix pipeline stats bug * Rename ensemble config fixture * Add pipeline tests * Fix config in pipeline tests * Add training script test * Fix types and docstrings * Move and rename to tiled_ensemble * Fix bug in label joining. * Remove memory benchmark * Cleanup files * Fix metrics setup * Rename collate function * Add license to test files * Rename fixtures * Add more comments to tiled ensemble training * Add start of training log message * Refactor tiler to have explicit arguments * Refactor pred. storage to have explicit arguments * Refactor metrics to have explicit arguments * Refactor visualization to have explicit arguments * Refactor post-processing to have explicit arguments * Sort imports * Add test ensemble script * Fix join smoothing bug * Add more documentation to doc-strings * Remove unused import * Add brief tiled ensemble documentation * Update typehints * Make training args more clear * Revert addition of no threshold option. * Refactor normalization and threshold config * Remove tiled ensemble from docs index * Add comments to clarify parts of ensemble config * Improve ensemble config comments * Add num_tiles attribute to tiler. * Fix metrics process docstring * Fix visualization bug and cover with test * Replace strings with enum * Improve comments in joiner. * Fix bug when model doesn't have anomaly maps. * Improve docstrings (types, clarify). * Fix visualization tests * Fix dict membership checks * Add saving of ensemble config file * Update test script args * Cover test script with tests * Update export warning * Fix case when no test or val data * Improve documentation images * Add images for documentation * Add codacy suggestion * Refactor joiner to single class * Refactor storage names and config * Update normalization and threshold stage names * Add transforms independent input size to models Signed-off-by: blaz-r * Make collate function a datamodule attribute Signed-off-by: blaz-r * Refactor tiled ensemble train into pipeline step Signed-off-by: blaz-r * Refactor tiled ensemble prediction into pipeline step Signed-off-by: blaz-r * Refactor tiled ensemble merging into pipeline step Signed-off-by: blaz-r * Refactor tiled ensemble seam smoothing into pipeline step Signed-off-by: blaz-r * Refactor tiled stats calculation into pipeline step Signed-off-by: blaz-r * Fix ckpt loading when predicting on test set. Signed-off-by: blaz-r * Add logging and add tqdm to pipeline steps. Signed-off-by: blaz-r * Refactor normalization pipeline step Signed-off-by: blaz-r * Refactor thresholding into new pipeline job * Fix transforms issue when predicting with dataloader * Add visualization as new pipeline step * Add metrics as new pipeline step * Format the code and address some lint problems Signed-off-by: Blaz Rolih * Add code to skip test if test split is none Signed-off-by: Blaz Rolih * Add accelerator to metrics and smoothing Signed-off-by: Blaz Rolih * Make threshold acq helper function and add to threshold to metrics Signed-off-by: Blaz Rolih * Make a separate test pipeline Signed-off-by: Blaz Rolih * Restructure tiled ensemble files into directories Signed-off-by: Blaz Rolih * Pipeline code cleanup Signed-off-by: Blaz Rolih * Remove old tiled ensemble files Signed-off-by: blaz-r * Remove old post processing files Signed-off-by: blaz-r * Fix sigma value read in smoothing Signed-off-by: blaz-r * Update stats calc and normalization Signed-off-by: blaz-r * Update args naming convention Signed-off-by: blaz-r * Refactor code for nice config Signed-off-by: blaz-r * Update docs structure for new system Signed-off-by: blaz-r * Cleanup train code Signed-off-by: blaz-r * Fix test script args Signed-off-by: blaz-r * Update box merging Signed-off-by: blaz-r * Refactor helper function tests Signed-off-by: blaz-r * Small changes in helper and engine Signed-off-by: blaz-r * Refactor merging tests Signed-off-by: blaz-r * Refactor tiling tests Signed-off-by: blaz-r * Refactor metrics test Signed-off-by: blaz-r * Add support for different threshold methods Signed-off-by: blaz-r * Format tests Signed-off-by: blaz-r * Change test to predict Signed-off-by: blaz-r * Refactor stats calculation tests Signed-off-by: blaz-r * Refactor prediction data tests Signed-off-by: blaz-r * Update metrics tests Signed-off-by: blaz-r * Move metrics tests to components Signed-off-by: blaz-r * Refactor seam smoothing tests Signed-off-by: blaz-r * Refactor normalization tests Signed-off-by: blaz-r * Move mock stats to conftest Signed-off-by: blaz-r * Fix typehints for generator Signed-off-by: blaz-r * Refactor threshold tests Signed-off-by: blaz-r * Temporarily disable box minmax Signed-off-by: blaz-r * Add tiled ensemble integration test Signed-off-by: blaz-r * Fix normalization tests and add additional merging test Signed-off-by: blaz-r * Add tile collater tests Signed-off-by: blaz-r * Change dataset in tests to dummy Signed-off-by: blaz-r * Format and fix linter errors Signed-off-by: blaz-r * Format and some cleanup Signed-off-by: blaz-r * Rename predict to eval Signed-off-by: blaz-r * Update docs for refactored version of code Signed-off-by: blaz-r * Cleanup the docs Signed-off-by: blaz-r * Update ensemble engine Signed-off-by: blaz-r * Remove boxes from pipelines and tests Signed-off-by: blaz-r * Fix TODO comment issue Signed-off-by: blaz-r * Fix unused model in ens. engine Signed-off-by: blaz-r * Fix path case in test Signed-off-by: blaz-r * Change temporary dir to project_path Signed-off-by: blaz-r * Change mvtec to MVTec in test path Signed-off-by: Blaz Rolih --------- Signed-off-by: blaz-r Signed-off-by: Blaz Rolih Co-authored-by: Samet Akcay --- .../images/tiled_ensemble/ensemble_flow.png | Bin 0 -> 87660 bytes .../how_to/pipelines/custom_pipeline.md | 254 ++++++++++++ .../markdown/guides/how_to/pipelines/index.md | 264 +----------- .../guides/how_to/pipelines/tiled_ensemble.md | 157 +++++++ src/anomalib/data/base/datamodule.py | 7 +- src/anomalib/data/utils/tiler.py | 14 +- .../models/components/base/anomaly_module.py | 6 + .../pipelines/tiled_ensemble/__init__.py | 12 + .../tiled_ensemble/components/__init__.py | 30 ++ .../tiled_ensemble/components/merging.py | 110 +++++ .../components/metrics_calculation.py | 217 ++++++++++ .../components/model_training.py | 192 +++++++++ .../components/normalization.py | 120 ++++++ .../tiled_ensemble/components/prediction.py | 228 +++++++++++ .../tiled_ensemble/components/smoothing.py | 167 ++++++++ .../components/stats_calculation.py | 180 ++++++++ .../tiled_ensemble/components/thresholding.py | 114 ++++++ .../components/utils/__init__.py | 44 ++ .../components/utils/ensemble_engine.py | 92 +++++ .../components/utils/ensemble_tiling.py | 147 +++++++ .../components/utils/helper_functions.py | 179 ++++++++ .../components/utils/prediction_data.py | 45 ++ .../components/utils/prediction_merging.py | 167 ++++++++ .../components/visualization.py | 125 ++++++ .../pipelines/tiled_ensemble/test_pipeline.py | 124 ++++++ .../tiled_ensemble/train_pipeline.py | 123 ++++++ .../pipelines/test_tiled_ensemble.py | 62 +++ .../integration/pipelines/tiled_ensemble.yaml | 43 ++ tests/unit/pipelines/__init__.py | 4 + .../unit/pipelines/tiled_ensemble/__init__.py | 4 + .../unit/pipelines/tiled_ensemble/conftest.py | 151 +++++++ .../tiled_ensemble/dummy_config.yaml | 52 +++ .../tiled_ensemble/test_components.py | 387 ++++++++++++++++++ .../tiled_ensemble/test_helper_functions.py | 113 +++++ .../tiled_ensemble/test_prediction_data.py | 69 ++++ .../pipelines/tiled_ensemble/test_tiler.py | 119 ++++++ tools/tiled_ensemble/ens_config.yaml | 43 ++ tools/tiled_ensemble/eval.py | 28 ++ tools/tiled_ensemble/train.py | 17 + 39 files changed, 3961 insertions(+), 249 deletions(-) create mode 100644 docs/source/images/tiled_ensemble/ensemble_flow.png create mode 100644 docs/source/markdown/guides/how_to/pipelines/custom_pipeline.md create mode 100644 docs/source/markdown/guides/how_to/pipelines/tiled_ensemble.md create mode 100644 src/anomalib/pipelines/tiled_ensemble/__init__.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/__init__.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/merging.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/metrics_calculation.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/model_training.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/normalization.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/prediction.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/smoothing.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/stats_calculation.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/thresholding.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/__init__.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_engine.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_tiling.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/helper_functions.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_data.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_merging.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/components/visualization.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/test_pipeline.py create mode 100644 src/anomalib/pipelines/tiled_ensemble/train_pipeline.py create mode 100644 tests/integration/pipelines/test_tiled_ensemble.py create mode 100644 tests/integration/pipelines/tiled_ensemble.yaml create mode 100644 tests/unit/pipelines/__init__.py create mode 100644 tests/unit/pipelines/tiled_ensemble/__init__.py create mode 100644 tests/unit/pipelines/tiled_ensemble/conftest.py create mode 100644 tests/unit/pipelines/tiled_ensemble/dummy_config.yaml create mode 100644 tests/unit/pipelines/tiled_ensemble/test_components.py create mode 100644 tests/unit/pipelines/tiled_ensemble/test_helper_functions.py create mode 100644 tests/unit/pipelines/tiled_ensemble/test_prediction_data.py create mode 100644 tests/unit/pipelines/tiled_ensemble/test_tiler.py create mode 100644 tools/tiled_ensemble/ens_config.yaml create mode 100644 tools/tiled_ensemble/eval.py create mode 100644 tools/tiled_ensemble/train.py diff --git a/docs/source/images/tiled_ensemble/ensemble_flow.png b/docs/source/images/tiled_ensemble/ensemble_flow.png new file mode 100644 index 0000000000000000000000000000000000000000..7a5a81fa79819261f77be24d3e74b1dc8cd495ac GIT binary patch literal 87660 zcmXtgWmFtZ*DVb0?(PH#?(XgZfZ(3x?|q~?QdL<71(6UD0s;a>PF7MK0s;VnfPh4ThXs$om&g->e;{4dWyB$>Cy9?B zAjl!)B*ip54Ntq^vJA|#h2OK>LGv2q`61+crSRIdL6z;t0d5~rf%FA=x9bMuQwD3A zkQCSy-y!k~N2*Vznk%Af3tY{MKXrvAB&7fOb5{E1KXsGC5t`mMIx&H6&;D>dO1HKC zW#cBtCFkKqcra>*SNdSiItXPNQ1g1nQ0aO6b!D#dOe8oRIkSeg4k6UqWZ?Sjck}SH!*So2`}I{5 zrE8J<<=cgwi1G6qN6+Bf)cSpxe-BT*<=qP_Uk=apOMIhKkI3_3pO^D50Vfum)QA4{$)6ooMw*SU!@zWqj zyzz;S1Cc>rGt-*@;TNo9U$j5RIo~JRd2G9#crKHmK{xvopkAM|)9N|E*&?7c$D4uQ zxZ|yQoYqG&{{7nc^m@I4d-*2w4_3$9@(Y{oUmseRpPuAQ zQtol+c5&}@zp=Xmk6eNG%EtdYN~akd&|wqy;^jn7^wGnFQAXy+?ZuMa!TsI_PXKMr$NxRf%iRT3wh)l` z^E4!gXxMGVHLm_0Q&sOdZ~wb%<|B-K$DAz$zcSyM#-o2A zeN^IvnOF!hW1Ltf@vswmSiUk1q6BYs#fBYdXk-Lla7xU|3fm1bZcjGYSrdGCyA%#IdIOa*(PcDiccIii!ibIt-LD~HIw_1#P z8S-#5cqa7W#QURm=1GwcUq#v_!{^sGZLXcBEW6-1EM;EERC9_LD(&W2-|9f`rji@x zRC#{7;U;uwNbe3G)tW^NJ?`sewTzPgDg7rj?UKKnv_=0a8a!;9P1CGIC* z(t&}2OF@}|w0O^C0oq5%OSQG=AP-oH);&aQs8#w)ONVJ9OrgR-S( z;ei((^C%J9vTtK?#J`*HPk+Ykk-MAp?tf&w1TKy?e{SfUa4^x1gOOx?h&CUnUoH#k zMZ*YZL)U3HWE0o!+|*wL0EcI~hTd(RU>L(DHa^=kisPrhOni?XiBIyotdSkgf1~+$ zdq|p6vgrwMH>QzqYuo9)@0M9LUZFxff34sPDS{)7G{155uOehpr`kP7S zHj*vj^imE+Z}J}1&-$9{@?Ms=-%$O@jt$2>b>@$`T75}jDA0e`Qa0xl-Zxjm$Jq{C z6C314EXvryw5CD@ctZDdT#d+`Jxq08lM6S4ELYCia!FO;fGuamN5o68_Ccsi>%Az# z3m!Z(uKbLk(j0$So2He+=q&OYeFg-4OyoJQKnPtH768`p-{@qAdif^6uoFx>Lc$Jx z@QK&+fF0&p~3Qs$eCd*96@;VRBC>x(8^V1Sd@VeP zx7GFbar64LgQ6yPpzjw7h`nbJ_nR9vx@-b5+%s<6a34(zlmQPE^)d{yBiGJng~c=w z2b}pplieEJr&`p3Ku?2*^n+`DfmXjOOWLN5kLz@qx{1@TYqFkZrw=crUJ~rA$jLjL zA@-y|pw4jv^xA3iX#-)_a3ZV^>Yoelv<32oK;xVm9FR@GI!EewP&wqDe&9n$GntHT z59t%(uS-4?m`+(-;_-#<;Ji3`)NeNg{gda~5&ih6#N^QZ`IPUMHN%Q!{u_{>FK@9M zd>@Vf0UI6|*c7R;sg{I~f0}Oc=0H#|F8a>Ywf{~lOV$X77cYm7dlf3XZOf^U=SZ76 zXt=B=fc7`;6<7C*HSK1SfGT+s|NAiv$Wy~d09KS&;Wa-g1wePBIAZ^dG<^)JxnoGN z0lZEqWRqVfDKKYfOtU2tA&3n-4U?o1SA^J9Md@>w_5u3P#D2n3%Pp+C@z(xAGA?5n z?o$|&V9u0MeSq249#mm0eC;rN(43*|9ocgxR75v(B2NMk1vfhPmW#-cQ3!%0xOZsA zp8+3+LpY+9S|=lB9I&-@lw@JrtO4&o03@-`Z$9y@hIU2bP3@R9xp`c{yrCz{&l#H& z78_=FDPMVFV5qq^DXLxmkSD(7q^$pZYghK1Fo&--g_Hx;!$5eEEFJKD*|iV~RWp7@ z#@jbe1#MO)!R97fh)0mM#W6j2qFrg*!F~8WS<*Fas8A=E^#n}e)%<;XVtC{-_eJ&9 zjN8Z1v$*`$mMZF`_~%bEB2`o5!6oT8oE3;IUSow!NElF1OI!I|rMt}5;pPv&)WG%G zKu%6$fo5Z@UoNnH&FHXhDE=m>hMrZto!|7raZjhkH7YmJKHP7A96ylMc8M9)WM?-y zA-VRt6B%VGT}=}P#qB8bilxl^eogDmEg6&0TGQsc-|{MiHdWY6!qMSqI_XC{ zK0G?o8jL3o9>?UYZ@06eLp{E;0c?L(6Z;cA3o%n&-APXez%DYOzQtLjxQt00_oh#g zM^L3`qNcNfPV*7!y_dA7h9>b1aw1o+N=+QKP^*C(LBxJ3=eMY@&QG|#r$iM0JpO_9 z^c-Ihnq*2*Dkfq6Lt5N;;^v`KQJn<2>CL7%Ot&h^m!jtAntYpE2wDxf#}ManTUb(n zV)H8(`XzfDtny%iHC1qduW;!G-lP$0Szh6qH8uFHb{n<#X;jL=L~2*jPJ~@fi3$CJ zDo?_6CEAo_C8#Tqw}E5)D;ZvCg5?L@k@5N;rEyW_}%)dN94^b z+d%8%XN&2+k5kzLpii7dfyNN7X1L~SsQo~VFM0L%m9R_?3<^o^T&|F4GtsF7xy{8b zGkvIOzRj(M(d{~jZ`n}jsPe5O5{p}FB@6-|(Ik>h=uTz90s-Eq)DJIlM585-Ij323 zC5r&?`#mz4YZfk)o;w=O^-aphpZA4ai{3Yw{(Dfz47bP--;pSKeG^U+RxLgWai)uc zgv+FxHeh{XuD9pZ(xF=01kf1;zdPN1Uz(ZPf#X-7q{Pb0_AXs_dY znn6?b>9-O9s%Rc1@Ms>vwE9cf;ja`KbdQS?Iq@ccrh- zS@dlgmWo1uv%NdM;jt}9CI6?mI&S`6GzMCOS?ByV6a@r8>AgR)%qg=5Ui%#7^p4$Y z;3;uCQ$1~O8HqFJ&S3jv!)SL})%D(M;3_o>#1>I3yM(wB1OR6I#k@q)Vgno)8(#~sdBIA07o+v?gAJ1-(^2TzKzkk?rKI}VK% z-Tu4OBjDUd6@`#ux^fac8yk2w@X|o}^>4jUXq!dJ%7hQyD{hMhPLq5Gc{MdP#U47A z#iO=CmbreomN{Ou-Omd(ChUe*GnRQsgZD=mV00%wjXwSsKI&rMz!9>f z-Ksv3XcR-nSLaMp&9XaTU=ImT3H-1fA#u`g{yl{?cuS~9%YVl1x2KHDqiKoS5<5>C z6sUKLOp@=tJ(&53zZh^XpTBh*^xk`P-;iW^ta~k4WZFp^H9&j5=0G^yUC^BfbsF~p z?7*}TmtLu4Uk^%WH-33K$c>*fM!JTRBUcPTdhDrr&omynM4EV@RirQ&s(e08 zgL-!nAiWA{m7x%|;I1gzp@YA|Egp3b1z)%ZCU`TbKtIy)e@%W%h~XWAcx^iK zh&9d}fItyvTCRyj)#a{xUy6`+em*K4{~mgvY;*vFv~T)vF$mQ0{QFkRuWY_y%|2gQ zbb|jmibuY-1Wyu+KAnn1u|R+SSy}+>i;~m-SsAIbR7MjO(LhHv?NZK4!KnMi!n8$~*Nq#OTt$ z7LIkRD-vXi)&IJC#iTfELZS+Cdl#}h2F+|JwiZ_D7wc*!88D|Uw7Ge>rSJAGi4QfY zr5H6>Vj@~Ao$wZJq48UQQd!uNpB{Ny(qU^ZH)87d+b#uXT}7VR8QWNq_ry-&xQc6L zpwj#31=FgltJMN0{MZc~zQBNDLrv&%d;$Fl-w5Lb8(2Co0slE(?OWRvSvfM=dVFd*$p1EQdgj;0#Y@`>4mTFxzSkh<&|69Ce~u%7+P;b`ua6~o6JCKj491mapB@K~ z+P7$NZQ?i#OdJUrSt99R-iD5NWZEXIBRhO&GjaLiTE;Rqk5Z;r@2FH`CmTjMn zmv!F5hm;!MJ+2pm4v4F{Wy{=H7XPd0$ElCksTn(!#sJa>mnore2~ zzUE+OFStnGlp(k7htX$Uix(Au|2d5(3-PrE)3ZbVCsp3**@?tNO}y4I_9Yy-%G4ge z-6lEtgtjp$+QWb!K-*raL< zapaj()~tOyMO4y3A%JYUiit#Zq=s~A&d{=N$S$%4-RbnQ`56S*+9SW-%H2mFnsAu2zu9pZL(r3xpH?Kgc`v>h=BpqD`PB9@tUy z%>EBFcckm@oh_&c5&D)@0-omuHJjLn7|TMIz%OXZmM4#B?U^G_+u7K;$2!~ zvEy;Q*g~{_G-1q;44}&tqmVSbxZ@BISW$hk(#|rd`Q9hZQ!<7cMV-1#zqnc8tZlbL z5@#-$>uqj4*sjFSo;1c(-7zjiF%0ULY7=W6Oj&RMdMiKb#_R-QAm~c7tbsYRzaO0y z<`6^Z)r)gd_5^ooPTb6}MeKU{A4W~7$>~rJye^f`%DQ9>PR?krT5=-!gWLPZWYEp1!vXW0m3hIg!JeQ z#m27pr^`DK`%7Xn90k9np;FxJ;_HS*+>OB@d^`zqF;whAkAo|ztxa?j+6-+#y311dmKM+>jlUmcGPw(eRXnOr#aUY`MJqV-#3!u4sg}r*KPteOi0l=LU5-zab zxFb>ZwDF6|+*3mht8+Hh%KynN=Mf6HlXa6&p_lEZMZ;+D;OcZ3=`>NRuty1dS2-}R3EAEE~B8iq4L2okvuL6W6-e164T=NgnsCTic{nOpp7RASKU zP8GC!JiG$m>zuwIb^sO+w1}e;XhQR~>f>&-SipFpX6fN}T8M0|$uF!uB{xc-VhmYB zKLeBUrY0HPNC!!C9lY8J+rAxnJ0d*-&q`a7oR~eCzI!LS>><6}Jf9?)P_$G03+EFPpEbkgFCfo^D$skJqfN;ZUq}nkNpO%BL z6~G{l&okG+UXKdF(CauDT$P@T9M-dBPioTt&0*uKx&q zQRH}gSD?=jIj(fVXW(u)5}TTW))e=_DQ^ z;Nr=*&vC&{;pA$g(h#dNZ5#(up_xpKFQ~h$J=x#XbnEpKeDjnbQX<(e5wD!W|F2>B z4)at7#?3gf289Rz8?(0pL(=`Mh*8-4=Ld<6Rlh~VcA}uO9==zLiTWcxtHIX0BEi>X)9k@D@o%JV~Z(s=_U{7ruE@taYMS#>iijfmR$o`kskuvUDo1710 zDJMc9dzNh|cxxfv?v$?*0mxI&p&0LV(ehCys3#v4>eSZ0DW z*MwMum}vh+{0eGnL&0m3*U_}|*WTm8+3AExEh}%}7fRANL;fCuW26Eqi`BY`@yNjt zzxD~%N2HKjV1=b9p%h|gXR_xOYuWL1#L`9X(UIg;vVdQX{cN-G%OjS~ zGWWt#g1A`lzCP*z)voBlmi8_q93Z;#6;t*v)R#|MnwhUhKj%?<(Ew(L^IM+ygrvc? z9R-Q_x>C}jKQ|i;A810UaYD|?p}|QREsbFP)HsTu@;&HAH?1GZW1OGpDg_%>n9F5> zk6XL1V`FGVSqZQvz7dQxJJLy`7OD(d(+h)${Hiqzu36AsYX(BvgQiO5dzIJGg-Tz4 zkk31X-+JO)WC5lK+uPfv5Ga~Lhx^^Pyb@a`5SyOjD3H;~R%;ee){2JQI#jDQM&{&x z@w;ihKD`7h@EA-aCfO_*hJcODC-4J=9lfIf%96-Wy1ThBj7*p;qdOxqt|^n|aJ6(8-G z{J{M07Kaa+3`_Q@4MK_J&IN&4qXmhI(1d#V$}v@O@i_d-t_GR;8H$bTD}az8t>r~< z2An^>83r^gA7cT0&={nt3cB}-vwNJc4mlSWyLX39sb8s>r$$z^yN2(C`Ku%6E;o9K zDa--smUpBcMInW!79r7_J)$Nit_b6H2?sSacU&`jZG$r-3P1k^l7^)|(6b5F#6Fk{ zpwIu~N4Yry@gR){A2}osD$ zPo7;4xV_~utA5r+T+>Pti^zmuzIW!LZb#y5BETKQBs_#b>fWdHHPaID$cbdL@pLd$#%(Ws=) zuJ`KH@zuo%qQ)Yat^bai+Pz*cR168OYpu!7nr?IdrCISWxc~X9K3m=DFC3WCeJME9R7H&N6ofwU?-FLHuGAR6 zCc>RQKUPm!jrTP}2n&JWQ&$v(dVQi8F%bG4w!XMZ2I)dfq^hJKl4E&X_&ExRafVIx z@L0T99}~yoPBqJ>jaqf`tQON=GK=_YLebDw+w3-9LI;#<$9VV zD`o&srJb7}pKnPmKUxp_k*3S7lI$kh()DTG5)h2(IB?56zn1wCcRB$?q7oQ)xqK)u zY{EPJj!``Qjthv*IXzvu7FZsLuWzBeOD!8oCK9M`i=B&sl8tT51|9$?R9QZFVp77t z#;F0GL7%4gHeLII$I;O?9x`6?!TD5}B*9b-YOg_@uX&m?)Yb++R7TvBhOKo9g04Ef zL~ckIMcIqAg4S=U^VRs`o&)&D2b(vqYCk`g*3=9i^6^IOHRPQWos6GX?{-g)x)?qL zy&a2kAp7n7rP9^Rd&xSf?V2+#2P=&2EpxQ4lBx`hsIJ>h-}xw({&A)7%*?FNzj^$D zTjm}rk3&Wzu4qCJ9DlNIQV|$I&Wo4GXQw^;0fbdiv^qf5s)F+qJ|)A3f2AL)HV;`N zQfYX-ny_o#74Ny@c6Q4>Jd5H*A?1NIFIMO!8rZ zXQzt3;F{Yn3|LYrM8~xxv|>3Fo0snn(XNbqO^~~ib}-d}x1}f`eDQ@4IO%mZ_2hH- z!B7^AdZ1%HC+DXeJC*jEUn?CGHP{=1(ZmAgBB=dnXIb(?k~zSv50xpD%6#=HAkMlB zjQw%l{t7@v&ZI`1p=agzJGYz&k1qZX-nB_>(wh}gqjm%P=$zTI@CV86SzD-U+wHLU zt`*a^`B%Tn_KOArT}-<1PsA|`>8aCIgz1a#rMkhLEs~1 zP8VBgwmp%BuY0LgseAsYVY2%Vo-w~^f0{6yRa_?YXADK#umNp>V{?Kb0BHn@R0L=< z{NwzXo!G_Dft1J6Xf{ZH;z8|G%JdqDgFm|e?oVa-*6j_=AC^b)9_OpQ9Z?x@t}H~v zCC^HuabWk&`!2~A>k6lMLm0BZB0L16!!mfHiVJQSREH$T+VeL82Q$tm|t3>U0y zg~b(>DxIKYT+x#r06`Q)hRAEdq>`uq9S5Ykp$&wmGHM8g2AyKP#(qkRL^TYdn{wN9 z;h!m1&wj&qWWqH!SVR_Jn6_1vUu7>yY?AJQIbtj5CPbK3X3$Dh* zvZ6X}@C}3gvFI#AAF7#eVDZ&PDc_VBAIZ^2^z=8`I4s%!?*+g#V;7|hi;nPPYs6fn z=Ln48GyX=lZ#)Wl8Uq8hYkgp|TCeR`@A>ZQ`bhkX778B~T+>M;8)&jpvV|3@Bcj;n z?hT4XL7&GPZ#}^qZkoR^`as?AL_33M-Wa(tO7p9UZ0KSJ9WmMJ=Gj?O(`ESr(CZi! z7`q#7hHM;l$g>s}k(gT{vWrHQXLp}h|JVXR)`PjAp3T>re=tU#3E@6pn$2aoEc7#q z$XZSDWCt06%n_M{a&teB-S4TsT_EcaCAd~tcMxYSd<6!eUJ{S_Q=r-@bu1|Ju?!!I|oNfen0|9Jr_^wb+0IpQ?L3Z zw;34R9%fP(dY)r1^y`4ep`k`P`Ep zX0#1IyY9@mRaB4;Pb>7~I_g=Q`|QW%N*DTn1PwT_ch8u$x8d4%?UG-$>R~7`ECx1U zVN_JRnHkI(7XA8-%Q#xesW;M|#r11$T)JP;)PY|^RJred+3weEm2&&h$~(b0ReY-8L%J+p?%Ed_dd3~ z+aNDGu>F|vGnNq$Ov2Cea+tbYV}JV-)m`BdzgASPa{0%RN8T~%ohjgU$#-h#n*h31 zhnEVmFN(wAI!zY_^q=K@{j9~1SX`Jab4TOvxZ=4Pd7hk&!>t(F@Ct6)3;gG=Pjx%J z?qEt+x8G@Ak_FTAxw%!#J#iKAOAKwiH2{u3kowhZAN1Ib0b}TUC3Q%xO|!215_64)Ba)yi6+<@!l3uVHOtKx1te5ghA}MhWSFF06n;%s|n#ZFFsqCU5s&Y z41+}UmC2A3)s+)d_WiB&-B8m|!UN&bq2(1C4C@F#^=GrJJdW@kyft@u@gsiKkUOHH z49as7d@(3#+OOb%A_j&QWEG_ZIA6THJVWn3oszfW5`P;>S((@PGAJ$E`KnWP(5;yF zBj3BOe!|+fnGX(v_*O+@*=&71W!_FH{9C?4ybRb?NLk>5&F09>wDlmS_P?_>2=L>nJKW-Y;?~ch z3_w*hh`i&9?)5bXm=svACqpk6Y)VD|Ip{Zqjx z%bUY_$fpbW-bz|g{}!Gb3(l?0UC-ygxjW872{kE7{MizZK+1pMv(YE_If^!tM5B)K zgOmCoDUl*f1xReF_%jK@Vd#=YDs7XK?GC!2kiTRMWWbu`6D&2Z?v|oytl9Ued%{eu z;_JkZ8m7;3XRK<+^Q6<0b>gTzj7Hl*L~M=>5cjr~>qFZ0yJIDN_Ycf%8r|1~*c}AE zH-H^V=vE%Wad=t8BMsD&N(M|>krWfONbLUJgJAMkpbXAi{{(ZX}be*>@UeLr~ZbUElUVv8Z>rX zt24RkVq#}|5O(Oc$Q2%c@&MN9-L7-V^e_}7LlI1y8^SGEx;RS5@1``7DZnNuP%kpC z zJL$^li4Q)TQx-7jUV?&x0+*JRLcb5$g|MRuCop$fk^XfU8N|##{}_oVt|iNjQxuWCY5;Yavv^7B$hJ0 z@g8VTjnHI9B9#nO*pe8=sES+#PEp`xg%Ai2qQC@e+9160wo(UDZ&Ja>$ol3%;lSl$ z>t%?tI<>8hI3|8FlBNHYpOsF$Mh3kA$#_CsWR3e<^{b+NNSGcidMu4Siam*>Sd33K zfzh)4uC(*LO3o0gAXC-+<*Fk_0RMB@mzoEQk%VgGc_;GB&v95H-WZZb9&V#nwtf^6*oNmHagqW_LwoN24+-$ zn_sgvaYj(PE6E*vDq3hU`}zCc)y}gYm~Xn)rR|!W!If8cK3YsGX9ZKKoGLrN>%W9B zAZTvqD_P7}jdbI^(fjFzwPHLH60HB(&5=JiffFxsW!9B&qw{4#@ZdBL^&Q*(0 z4EuqB74bU_PXzNwv%*nm>^-?5QoS7t5Bm42*J{Z7;ZrL@+|iQ7YXeR8|+$1 zU#lTQ(l8w54SaUpM-l0WA?Gzt4tOy&9wpPK@2UE*5(kkV`_g*4em!P-7W?n38@`TzP(9WMw46TxLwtAVA!pMAd zIgwFQ6p~e+iL^|2s;BoVmcEI7HtfOHovYHT6RLYjFwba}K5fO|RZL>v5_WrEXVseW z>c9oTqF~Djgst@5#{!&8e@T&l{+%-Yqu`6gvK$w@pd~6VGFS8@RkyyuQW$1VlKnt= zz_xvVSGe0i+rdLSGbGM}s4Bx&_XlrVv44;7)XAMe%a>)t4cybpp0dVXaBb`9#~gII zP7@VjMC#4=X5kv3O1IV7kmg0=~DPD<_qJ$`nY z;L*>)q2^tQFrv~KG11PWT(z*i<8bG^VG(NyL|tBX}PF|5(* zEF#*y9<>){;>u3GoQ(&b3%cE4&ls3%a6iQ(X-WD{YMwxYUR-{V>hu9U6Ej^ew(N`%u0g5>+OLxU;sMnGo%J znOY?zJxHX)6rpWpdjQhuGc3a$IAt)QZOFYQXA}-2VEp1{H9twqGlq)~PS53fCbcai z)SBjHB81ami7`*YcjiU2&j)$YFhhY0}^ckzWzTb|!!nXoq#==S3l=bfkWyCbx)W*y}52P|me> zPE(*!qoxluWB(-eg(lc8s@m#GCPAX-=%+?xy(m*Qx;i3^RF>d{p8dPioISM!V~)mM zeT4(Z>f^Kv@9UX^7}#ZKn=nV5$-za8+eLjX_<HZlL-4iC_v&&z-4tBu z`YcGn{XI5Fgrn`NaNr&&IF)W44XBlg8Iw5U4sibG^F6$yoS*Ucg~qxWme%@`!ju9G zg(O?1I5xhKN-ZShBpzVpp&&?KZ)afgERnF^1CqoINEHWN(d6b7x7;U%mCw`i`v-)uzg=b<%f*wjj(xO^x(ccM;sy&w3m5(>RqzDl=HS#g78iPU~P9|W+FtBf*tk1@^D#X=RBKIn1 zAa3$j+RWE2D@d3qkdIeY>Vv|Y=s^Ep5PQ1;u1Xs5y;J}ap#7k$@r*cCIFqy_lMCo2 zamzIUHl~a+wKCJ>wtT^Hy)sQ&GSw9dUoM~#|s0 zY?eU@eF~i_>k4j2llAu^bs&xWK zZZ2;`guB3WdAkxhRQXTRsY8)L*OsY79G;r;t`KvUeHr7(4&uUw zO$tE?5^gi37PJW-;$_tZBr!?KARLJ&Ek)H*p3FSg#Kk0wHR*qmjiYBoF8{WW`D%wc ziJ4es!ICx2TP^!Lowx82_uX@?PN^d?>j8AYWvYQ11ec=|7a(H8@(+~mzYm#5ESLpZ zp*D`vSeCOX7N@89c^M|}CquK2)=4!7K(E#@F-tAxA4|MP041<8G%e9lr$ow$dR!*& zFV(FsC5#=s)wB_|GWcNT{#Ms5H_9T^Ku3&guHK9%>mEOx%&O4&EGwJ))ax}ibj3Eo zNcY{RTLwt6=xflu{an=c22$(+k_8TvJo>!S4TLpqiU{y+W%?{r?| zhJ%rtn4VCK8ZtMZ9o@_kU5>P+d^{aFimFon;Ew&rL-Xm3qIj9pD!sLIcnUAS>*YH+ zKHkAkK~q}BXIE%8ToY`?k_7_}P13xPWs2?}daboS*wEmGR&s)>zdrsJ)+(>qZCKgWHDOgDC_b&+55Bt8F-0?sEHFh zdMw|xk#JBX#&Xb>?-$t-?D6R#s711kMxxP9P?~LEn~5d@ns5g|MC9HM3=f|KBsA;f z)>t*om+_K%CLp6G?oWay-CclFv-_1hqR@<<1;dD|bb2`btuYWc+mvceQ!?#HH!8OYUd|j?z)f1MmVn`sSsu-7A3Gy`O%Nqj& zXQnM-`2MyyTfKtX%jaYp(_cG7sflOHB6fuGgP5sIZQg~2iFsl@ADPJkX1sma6MVfj zJrDos2~_e^ZCY`mdO(o1A=0EV{?d?q)fT+MPkBbs8QMRa5xOMln;2H=Qwc>(83)FoCG@EfddwcQ=xPlF{u*hIU);R`dy&PFWJ>VO;OvF(N3*%77xizl zT%EWr`9Pm3Zvn8Lh;m^baJrJ@+}!k2?mc7w73S_M2x(%Z1DR1v^nKP*=M$MJ3L!61 z)~Ka4+zz25^$IeIu@HZM=&$02--u0eK(3Vbv7ayaI>O<*`~4w>H;IMh#Lej>u&p=y zR|rW&Aw+VS#@UnKTy-~593XMHfBqfDV<;uYu`g8fEyDk%RfE_D=zdfAh)|w2bJysV zG9DU(9uS159&xdhqKo=$whGRPN(yz%xIXKATxv#c!3Qr>Og*i8;MmMXS*ag?sAXbM zoug2a?Kw;371Y$ArI;*B2GaR=q}WKMXgLod-2Z@Y(@iRe*uShb?EM}}D!w9FY$5p* zaDcGz&B{vqO?+W_7iN5eo{*D6ik85M^L?%UGFi`m7VJyGG|`v0*M#r04LHF;IFC<@ zasQM!Zn{p}-^~O{p{JM|zJ$b|jnWjK`o8a!CF$VZm5{R?{z69+?b5<&r9HF|NT#ba z8x8d~VZ!jaCE>e(-6a_3QVUNC5|(nW$7I6(?N_=-*oWB1&3JU@9cW7n0h)jxTpt;8 z|CjsR34j++NU*H@uorqD|3_g0^$u;f-lbi{(JQ+!UqBYB9nbg*9yl;S=_*cm$&4i2WPe1e7tnjO6KXm>#4B6L;lD~##=N}WP@KMPt?BWH ze)ykSey&>7vEH!`dbqgtD1=imKoZeAbK`EeyjbR@Q`3K!5e)cf^L9VKRx@2#8MAyR z;K|SdpLr41M3e1?))}-IEE^iJo`RQ+u{wJJM=nV&dJ-NUHCqpW?KRwMn6BUQm6Q8Q z3-iC7W$3Ae|74iOD&eqXjvj6OInv)5F&52Y2rP7Ws=VFXEAL}~Bm3JZZ`#QlAN1w+ zySOq-NcXzopw-EqbZ?Tj_7(4;!XG>*0bfFdL1mE>U{~iZ4hqsi_KQ%~Y=KphXqE1# z^iaXHZDB176eEy!K7r^#n@(sV3yXdCJ~mvAW9X9!j$M)cs`%sVu4u1gFBOWcUk~kZ z#2~y!?iQD3l~Gx$<`qo>H80NB%wCRb8eNXXwLqnASp;>5?x)m}HCfd6zWClTN2fjV z=K+4)Tt8^AbJustr66>1?r`|QwPQ8rJt!*r^ZgRCu7x4dL&kxsH3z^agL^*jl2t+KD)-mU^}gGEjB^( zugli1fxCSVf9s*QMKAg~Y1fTCC!{}M>-d`L23TXRX_ z)E=uHKMy-YIHG`RHzO(-584zm{hfMeV~}M(%i?KJQ3S68orP}qeVlbdoj3b7inp#0 zIsXA3${v1Or}u5AX_)hK!Yk+rE$T#{l&~+Rn(6)QXUn&B2R$q$%&})pGJ@zS=t4P4 z_!77t$R1WGY#lBB2-&lQWTXZ^US7^Al&2J|UyR4X1hR)>wpKjC@Ux#9cS8mp7xu)J z*%&|(4XDbnYMnO6mX1OVjy5Fm4Mmd#nvX1)}iYYZVC3bS@dEsnAISB9S6a;We&`dEc_TX;X zCWlvtCL~Z~ZZu2QKee?B95)}}0hxeUBb91;e@`X_=1d|3$yO#X;7h~>n>3MkuiIv9 zxUep+*I+ovYYA@ElW>f6i)lv8AdrGkEd>uJCa!Ik-gg>B+)!#+k8U~T#pBuVNAngWD)rmP0<-F9cfYWNJhI4qAJ!aR?De92#W$(l_Len!RJU3NW2hnX@!=x*TnW zSgn9MJGT}HBrLy!1ZC~1nZMj}Oa@QD6;f6yA-nP0{keeW?0(1&5TmnBT7^<3$iRcI z$jJg+Ft`$cbTd0Q5b?*vuCnrXsnMoFAAW`fP1jN4tkj85zAKiD-&1Mm%ecDsRG-+enERuDM%nSrKGBlzr*>h(5-+%44`GxNky1p*_ zdeicb0ako8_LV>O#!p`~B{Yp`n4`?;hCfPs;|&+|Q?28OuN)cD1LMR6nTJorAeQIm z%wZw1PuQFi-&Y2c2oj#5S84gP8?ZZJugeak6g=~wQ!&Y2?tUvzwc;gQd^-5C)Q=(p zT#wz|*SqN7WE`(`EeclYa&YK0FXhPx*NGXD7s^-0rjhXDb2K~9AEO9&+D6Rl3AIbq zjxmF1f{35QdBA6F=Z#`-#3@UtRmCvXa(Q(fYh~Nft@%3FcR%Vck}SlNxW(clJ5{zp zSd-kAA>KaTu4mQSg6ozY=}X6>l|YxMm-Li?LA{n$IMAOZud2KbirC%I%x~r<+B&JK zrmu@FX#MZtA`KTanvzKAIL!B?yXmEinIK6H%qS9`5TdA;Lng`<0AqtNo`ZzX{MH;ZOq2oQtl~V9$H%y?ByT z4DVTwzrk>nxS4_`N_OaXkNjM2HGlQyGW8|Yoz8kzLlhrr5>n2{sOB}5U?`E!%Hj~u&&tkYUO8^T0SoZ0 zmj^OV9W}WYr@9pHM>$#I>=3*t{TP*J+749mBk*5yDD67o_DIyn7&0{AI+Xd_Z!2!x zNe8mpHCcN1F`<(}@K*!=@4EojmiRN@*pVD(r2ZA()l(LLuu_`92Ja)HQW&ir_)sPD zNaN7Opia%;v&z>ZiKgOo!w2kKMO7iwedHi~8cMAS-5SQ+0Y@k8#mL3jyb$B8#YN<} zKY=le*<@EOniwVUv2(L^pYK? z);(0Lw9^)BvCN-=JfpsL3S!(**;?AMkuC=fK~N~k#MZ4#C1;p)F!wjQntrJdh+;7& zWmp(>fx>(jmGHFZ=s*#^&S@%LmC~;qJvV&nHNoIUrHR0}QD{)b7^Bmf!9hrX1BYp8 zQ^+|iVDv4n10k=9j|4p%zs2tX9jrKu#-khEh_Nk*A@xxc%p_!@LKxO76T`d>y&N%I z0%-zC`zKnjr!C|%L0jJ^y>y;_xe)A}n_7e`Mir&snFR%|dQYfQ$FL3Ohd!;O42XP*PDdrn#WG?2FKoT(L^dfbif|Iu`gVVQp4+pjwtldY*H zo0DzZuF1A-+qUb@wmsRlYnrBd?(hG1Ja4a8*Ku8Y?X}jqKkM8>xDXJ_>*53jsFIIb zWL3x?V;y-WPvm8Tpw29sCM?TXqNG*TVKw&fa3lNtwO+vHS2xUqYW&A1DLtVu=nQ1p z27g8xL|p*3AQTSXBshMSh6uig3gku8yMJJ4F31ZcLjx6fpAprhY*(q9y1KkoXUGLf z73q-c2mif2E=Xn3o$X5w;1t)v@yL1^t~k68Dz2CT_Ve<3LuiZziPFgpNw|s?=N!P~ zQbVrg=Y<2xD?iY_QB&OK2Ji;9O6i+GapcBvrn;I;me~ytEd~|g;9%tyT8%OzJ;#Yf0eICFkW<_|kr)z8TwIpJN)PVH?B8wb zEM{a+e^n7?Fq_~-k&6XF0Y+JdG`%{R*tiO`VAWea2_+5gb;lz9#ym%EAtKsuskV4* z!!daNtQ+FT*iX|Lbq1#M`wOVgc+6Q7<&`9MJMY`?#l`GZxZ6Z4{HyQC;IQTRPl^^W zTZA;Fpc5SalyanaZ#H;bnN!g~eyKP!MLAh+Hd!=uD&TOc`-t51pIofDcd}Fjx1MvM*@OXG1k_5a1~&dSNPzA5UV! zHAEYq94Haz72F%Rl*i*7{||D#Ui}Ku=j`v=`HA0UD}X@NS3KWjxfq53iM16G2IfK* zbmQOAWSL?JnS-Zw@yR5t|2cY$(Nyx-bSyjQcKU{gFQ#oy87Ro%xtGVa*=SoA`LDeL zC9`=Rb5tEuH~4N1(P88(j!=)4(+O2Iy8Sf#u=Zz{crANyIZhL1e*KB&zo zhaE>4qd7fy3|L!`=E8%w?aac>`fiV3g(Q}jg}d*6qzhA{*j47Gpn(qU*^rbueG`#Z zJWv$fX|U9Ro&0hDrZS5WStqox%&jnoYp1ehFV>B+NfZ#FVXy2@u>1n%#_ zn)nKeA|9SFAImZ{KyXn67D!$@RN?r|rTCoNu@!E)5>skcKC&S zY8YTgS}sL^C$Q85SG>IcqSNPzF7P_s9tY(=4aUa(Up;d(XY^~ItvAT!!6xS}4iOYd z(kb$Iv0OIfn_M;p5=JSM4M%aGyUUXFbbG150f90us0tI6ZFsTNO48#n?~W{ z58^;S@7pfih8|LI&V3vRx!5)84aB#Ik4{KBPMFkJRT8{x)DXHiE&Ui^KWHy<`X%js?$xG>1kxmcqtJ6+^{s zOC(d1u?DngWNhnpHDIKhjvFV=3$4#D zIF+dsnz~wTiWOJPfoW@J$1Ek5f1u~97qcNd6MBW13kdf+P26oaVokfq{lnImIvc-u-A2`& z%@5k$z5nau1{jDdN=EP24bkS+A%^`TYHZ1GdB&Ao-A5>n^-0tzu1NwXhOxmuoBe@9g|qvYR)EF4!phx(hM^_| zkBC~lCvHg)_s7|gqG8C&h;n6(m7n^~IAf_E=3CekuDO~vCMSaihaLRU=gZSm1&uT* zb}rr@`>2t{-KBsOm;7$(6>aW&&6AXBTTze5sW3-eW)7#A){|ghbu~h&lT^?c8%8+F zmQ$j--)LN-ts;3F*Bg%Yz|jDm9ni$%=;{6DhC|TaQu4jShTMTX^K9WiT(;OQ;&HXoTv-yYHi@ECfv3e3&=RU z=6OSK^uTaITFU^@-Tn*5pNP&xt1`|w7c6=?6tz*tOEm^%redI3xaO&>_-modVa&E| zqG?fb9PtdK5ArZUc4AwGThSd-t3HZyE8R(3Ld*X9CdOaaY5a4|E=-&SZH-6<%9BrD z33A)sz;!MJG;a9eTL2Ghd}pX@uh&F%UK(|Z2I)ZiJM@4`>j5T%*z zr&QlnsmaoMtTpuobSyn&EF6=bIx5~q9rQ;Qc%c@lR+_9hATcuF4=gdF7>2m&E>h)4 z7PPG1H(b|Pjjy5A6omEWgx8=7NT`%#;uw{LBx9KIaLn>~bb;_TQQMU&OQ-f5Ys3D) z<}q+{%b{rF>FJ|JH3e*r%h{r~J*$4X8SrR`V{3kg4yc9 zklSiou1g-XAXfC~x!r_!a|@Nefx+0tw;5(+)_B6T$=71$_R%`FtaA{?(w*zEh3?v4AJTKaaY1 z(uf8&ZoMu&Y=-v5VP$NUnmH!CL;Je0x>|2rE<`PTYf@FvZ5_J_LEH3G{I7NSOW+YY zzC5#@B+KYTq5fFQ`4V&cFHEPmAiq=l;-4E7re+I8Vzw&ld6qoBMflljeV|Q`AjQEv zOwH+`NeazKEe)c|-S;Zl!CK_c%$#3bpfp^)&jB%045HT%UeM`h&^B-v9>#7nxb3=u z9bO9=+RAy%(rk0BsB}wZQ)vCw@vg%AkoSc12PJjk=A|{zo+&hrj1e0dzc+lD~cag!-gOaxjTC=U8NGYLiy5dYh2N;}wC zg#^MywBrnIF_@T;eNPx5XIuS)650rqX~=34D;m>`N&F1!Vzx;`QDY${gnTG|J5r<%*n^Y5wi3=s7;n%x*T0|ezCdwX~NNjb?Z)I-kOzySH} zbUSVj@(*rsn3L0$avc&ls*ePC=58-e?>R8d#<)|%jQnii`?p(dVEV}P10{}$YEhu$aY zS`~|72w+ZGs+{Jt#voOL%QuO;mVOf406K?(!a%{u3)tTZ>6BQhm_3>0*en?uQ{&T& zH}^ve02Sn=c6KB*_1G{^FM(zvBvBw|SCuKvHt8CK^5C;{E=EKJmF_aJ$AN27=O!td zWJ3zDMTUnJmVC|R@Xh*DGdj1qww@?4{mjt!?*RzD`3;T&G`|W8ofhO05rp!-bzPGxB36!{Y-rXvrTzkiwH5?>>2#UF^2%Ub|Kx9=whL0o`HP=IuzXzc2K&7i4|H$mxscH@Df^!5n&^ z7{e-kFU@;4e^Go!9)l1T-Sn1M`ZHh?v?ToBkDP{|dVzsKr+bII(*xk%S3#K{z7e9X zk&2Zm_lq)9iYLt^=>suZ*(RegnUSkLC6K*l$Ea=p%((s9x~VS&9hL-!q5w9HbGrva zn{%Tx0)HE+f4%7no!0s>R!a=2tbEAyKn%2C z{`<|7V@;myK|@C{T8-Ly%Y-?t#Yivj zzf3*;JYJ6<A@+UIvgY8hlOGl#C277D9|h zE3O0Hp4Z8zPTb{d{7+BLhLK+D9!SXlJ{+;HC#sStOe(65`|(|zt+`3_`G?m|vj>YV z!LXp?clFdTS)A0%Lf+lb&%E=qIeC$`Rirs{|JM7(Dm)?_f&po)Q0ROK&k^OPxo!BR z9Mq|NF9OHCp$Sj(G{HfSUy?Y+BBUkzvEi=DPL>eTtJvg-K*Gmf(wPzD%XX7{rT+qo z4>2TZt_^U*!VbZpJPE9Qw zZrJL4R1y49_yi#v)2X*aiggZv5Y&K!$zZpv#)3W3&2e|~R zDcRBkdH)=a#jFceiI|IiDw|!na%VBMha!~(!oRKmVtjjy}u3G=y}zHJ@gebvGFvVAME<}_sssjK0#p`S155{^@SJ2BY$9^i>ThbPu+Ne#ve66TEhKKMi8{f3HD8!_Xq{Gl`$^h5IU^pT7^2!U|CY_}FpPik)% zrNdjSS5q!gq=XSu51*z6Cp~Qr`yLHFy&%d9^wnznK7rMclTEqln>md@Z4{qx~Zv4^K2ZrJ2d z(|xf-TXJc2oTF|Ltb^Ap;#JG>?3&R#=g^F2Q>gDjX!^Z?!q3nO{SkTA$2Geff~TjK zffqW_CNwt60NIKZ3-)bZ;f<|OE|cBbEGKdh&kM?tpKl`?s+BudUc@^c+qy2;POu;F zMBabvJ+3mxlxkA!PDFOcK4qu0swNbeW5PqCYE^Eby0=v>>R+>Eo>`EA8J~%E zoTG29meaWAqn*iht_haDv;x^&WJ_Vh>{AG08*#16heu_prVG&SZIIN+VdYwwUX!(- z9jkKHei1>mRoyevKOvZN|Zy~!Yb*X=JR3rX&TM`I@ZsfDaO7c+y0kM+443J#gLjG zhh<}+W?pSp%KP&2V%XNlha-S|TC_?eL_bUNIRC+QSZqJw?Y@hf>4R<8ca7bC@DPfF z`@<-}aBjBP->a{ms{XWQ#+=r?8!x5tIZbCjnfTq?=P5Z~Xmo~}@FvUu3)9c-E12s~ zt#jk6j=Pw@eIHmcq<;>n)Z!1Vxwvl%uD!aP*}VGH9Etc+A`RO`mJgF%#6k>*2g@HX z!+u@2WL?AM`M!7tyoZR0h{yw^{g|PVzFc7%PH_7D|-DPGcN%vzytnG8jB|)Yedw0u;-fqkK9^D7>qXW3_etE|%OQ3dQz(L?LM*C&b*SwqTLiORUvA z0rdSI7$4~Noi~qk8^IB;mFno|>{hf~n1!c#RMe$38Z#xYaBppSrkAVR4=0OtE?(^^ z@-F8kcgBC)-}=_0tmzZ?Z^Qpr$MIc&{0zy%5yXAoDa-h3TNYI^NM1Lr(+olRM&;IO zL-c-NaPoOvQp-)brNJ1J6DmJz^6Gk|N)3{)L@(s*wmiNyGf2JLjl;Tq70_EUtXbvs z_^L`bk^Xi!9s$-MEKSi)!O1zlOt`X#af2g>pEc=-t|{Cgqxju$*S3POsSBNC4o?Cd za2^DHdhUliRy#aavtQr@W|Z_ki3cl5J!&P=DVQQXnmSPa9^q)Jh?ze?_SWr(|Lh+Z zR1#!Op3eG|i4o)MrN1IPHlU`#V1?hnr)2Xi8V~6Nr25N7o<$)@(Hahc3Qdbrqx+*l zZZ)I_nF}Bg-3FR}{0fqG!{XnFz{5o0$H4F?!l`8EWJ-G!63UHDg{#oi9c@!pfmox+}x@y`CbF)U4 z?IGbrei%B_S#h?&Ekr+6u?`kQo)S$0NK@H8q*@}ehq@(r$Ui3%V@u_fQPu|Oe!DT2 zP1;q4_1A5KTeu%is~xBD?)!yfM4~zIhS;Eiuye!Apz@aj8L19UG0j*d{F?qsR4|>h z+nX`J+f5W!i@8MK0~6Zn_OD}fI^IF7PZ`bo>0&Q;=#q3LcfoKhELX``csXS%Tr(G9 zQbUxY{x!rTs=5LyiCf{nHoKnJf7)NPwwsx&t<}AYtFYqh4#gLZhioaTYZ=$5&T9$Dga(_br&u|s&SlcHa&3#Lt-&c; zr*{5vJ(7GBQ|MCzO5thtr#efeG9YE|w~z#CD7d$EcEXh~+ffrxG$_kS$GwqDXvD-I zp6hZu?Cla<-Fd`*6xE-*#!+G@4>1ua%bo3!MY9p-$?Thnqc{g9QiyB%Ejq^J?dZ=C z%+u87)_!IAaE2r!?w>DRl%f zmX+{`1`ES|k%>R=$`j_?_vcUqf;Mm33B&(=j3vx%Y673Q7ow@d4z8drR4sQU*~k@w zs0XumUkX8ylVw|NE|$!uBSL_`jeCT;>ZqE%FFz_PYGF|Y|-b5(q%@6vr@{6GJ*+lvuH%=X)nqAh9jWrN`;D&ob~y$+s|;&xzWEX8=8 zgj7>GRNttUVw@XYj|A2hj@f+xnA1i(;XaJhIpPd(P5i4nwMl)a)OfX6_~#!IicXyD?b8ZeII*HcyLtOb@v!{c@njp7EHT;|>8qofuiuuJAkYS);*{QU=Q z=`dAIuJ zzbA7@nN_YLSY70YYTG`roPlcudZCED_s_QlZT_8QQD4XZqVLC5=Jt}`uS0F!*AUgm z#~%c5I9jk>=wMa~jh0Q6OMY=3IgbJ95^PkTtwA|ynBRVIx94^nZ!6bxazc}xD}cub z5i|Ug(R&q~SS+(QKX-e*cRgu`gcL&vTnxN12+1281>b?jjqh2WZ?2h>#~2&mP|U`H zj~7<@mR%73+}Ard^JDj2evp_s5fe)+`8eD#V?FodBs&u{OI^NqWD+mrr_V{PEGmar zatRKggA6-lp%4nrs)GLa+Gj9eo%_388d?v_Fr*7j{x6Xc1xZIasf#+=aws zZP;|SKkYJ@WULgrw>%r6R0yt^ktb|yNH{HyAlyqbDKmv;2|sA&aL`UVlz1~4xOzQS zbX={psT;j4OZ5cJGiG{Jc;}lo%c&Qys##cs+*pP6(_vq~E767R_8XcU(MlU{r$tF& zT_yO}+ZiDaA>|)a#;Xow7uw0t^`da`1Vap+C4I9dWwyHmu@q4fYQw1wTT82#erlYA z{M=xZcs%3>u&rg({@{_-o3VW-U5!#QA+5M{|(C0|6(mhZ;r zPTtM#oxC>*3h1uh&e;?G4;iej&*(c%nu4HTkn#A-sIvk0=2M<-!^c?emyIjryfaj0 zb#W46yaFaxXAk|KyY1IR(2-JSq2h6r`n`_{8YIpWg=`9H;&Ox&dbq>0=KNJj(7fZq zP=-Ea`?=9fny0F#_47O>4ea*b#6-c2Qs8h(RRr_og;SkJ@}F=aZ&M4kK)bNvTXoy@ zjh{6WLZX;LRr>3{D&W)(Hx5eZv2?6Qw8{b2ZqzI)4)tV?vo!0}p=X zqZ>hzYCtr97x8VK~l17i7;JRl-H@_6mUyWZi zpW`;9*^KN=bb$nd{#sHqLM((*M7s%UbpPoiOFur_Mg1S@4Df8QZUVr(of*OaxDq5T8xs<{qnM?G`F?+l>9+1oi z$T`nk)9r@(`uHb<{O^3TnUhl9GsqS3YkMcC4al)4?agW~n?FjDb@HQBR+&?3MWf<= z*G0|=Cfql+)uv6VdWy-_%Q9a*kwUL|sZ8#DDDe7!!F8@L+IO}kDXK%}j>se72^Dcf z5F~C7Ms>oIaJxK&Z8>_n>lYM&9x!4iZ0-jg!$D4ki%uvEk8UP9B*X??O{Cp3{zP1~ zpf!VKv)zQ0`dw{))a6WL4O;}67cYv;a45}oO;ZawR|?X~iEIH!y6TcdjD5VJyx)l3 z(W^A;lw`kICw|+SGbyg$jjU*b)E9nD>$LgHFKQ!Bn^+4qjnR}HwKfe9s5#3!;`QS5O%WPk&`mn zq-Z^>irT(2RJou#{fxLDN<;rzZVmWrP!)uRYPiD^N}*_6ygp9SB?l%|L|X;lru9@A z2Tc~?ZU@X*IWuzNvqTCh1jAD*Uo_tcXdn3%DrIk&#~5fDJ~Y6&#SzEd0aSF&4#*iX zH_PVM)=N?>lRFbjb?5oyaHIH!cJT7<{O#zBUM)Ywy`#ArNT zx_ZFZhDpT}xEXgba31fz?MQoZd6|%z8yN@z&C+lv>JD}HmW8IW>|xfm4&Sv6)x185 z2SKg{8O1a0`|6vbaT73=86-iJ6;hMq!`8W7VP!LLLBAR3fsw5Zpx#zSj@FMC`HMla zR3ni)H%3+o?(1cI)IjQeExw3$CP`uS(CI91jOzMGf2gFUhZI~epd^7LBCpRcaK??q zLSU8)xNo%5sa_d0W{h4RcR|IwY}@$+i@o8oWOv;1p6j8exqxz4jrPV$GfJt%V(fWB zpEKG>#A*`-H(9`;Z`biXoG44=5oH4+;#XcUR+6e?aPI&=Qho^3+kB<5CG~h2e?Jpn zTp(jbTcB|5IMY?MpgKb#5s&tW#mWrrL7TZ5a%o0`VraL~uGwtIlTrTwYaR;uNA$*2 ziUZDv35LP$$0y8YO_*QZ&VZboxm7r>G&H^pw}^{jzZEu`1$|Uzb~s!tK4{ZeT-Jyd zD}-JhU19U9A1KcGP;ptf6od|6p;JM+&_za0&yJZqJF@`_EaU+~*WJFv z?GT}5;E3I@5XAdn;*FpxtPq>1;qO0%A=@^#xS?=Ovtc+f4H;MSqb(7h6gJvDxUKB1 z^E80J&llk)FvKuz#A}L&5-o**R?rnVtU?PFk$o*0r0Ar4%@sj%gqpuUn649YBB|%Y zIF^JZDvN}x{e+MYFEdU2f#d&*I}&5?IpeHipEkao?O->^3BC(F@(&JMQ1>G~+4@&cf$E3_Gh;gv3y}qNEoqM`9;olfS<6)+ruqBe4|v zFFF4z2?S>ZKS{xWX|rDMl`B7Nkt^Dn4+`jo(18x}IHDoW#sar>`R2q~C?=>g%_Gsg z1QExdeo?9VT=)^OD(PmuKIxyefGYe+l3!0+Ja(v3kNYjG%tX+R1DJ+!xPd>$ z%6C4kA}{Wy&OsoLZH&72>ZPQjC*PybMZgfJn_dZT#x!v3)aViOLQ79Sa%$iMVbC8f z6)!hXKprPbjV>f-OQ?>KK4{2t*lO<=fB~+o0(m3*G^ZdIU-Oe-u?;&)X3F7VLE;n9 zh2rABD6_mh?TV)KK0bapc-g?O6Xn7X%Ho4c*LCCGBt~B=^^|jt$z zkV3*$>p^z2su_q_XHU=}0HJOVwS~D8-y%DTKJ%ShhAdfkt;U`zM1iLrkf@M& zahpv0!+sG5wW?KYHKbL>;?)9jEfukIj%FqGln+vvTBUoiAXh?j2Dm`H|xH>OzwW`sI87kQ4;-YFyf0IL#Ra+_iEa;#$I-}!j<#Z)| ztfD7|eN?K39iMeX_QE_0c!2XJ`26w^s^p6m8$(te80tg)x)AQs`0CYqAG+u*ir(3^ z*^<*003WZrgm^%C$q#r0)(&U|GAE&cOmsrb%s`f4C%`CAV8^Yk5T#2@RcrD^o9q}S za`^ILAv(;6Y8wNSWlY7fysfL7W#KafVST~hm}*IiBL{<=2)&~@tNOxGBVmCM??dO3`yENC zneh(u@soeGVRPy%1(~iwS4>4@l?ynt)@j>1D)X?V5qb@#_Z*%o5FBry(n;!8j4#?` z8jL5LxE?zP($XSVHmlkn6OLO7{L3IPOEFRiTtbQt4_cf>oxrpo6j;rnLrJC&=p31vsUI>1J)yf{jl5!=`F%It ztggfFV(Y2%uY$TVXvK2G2nI#?%MW5UdVJze-|JrNJn8Zl!XKf6C~MMW^~k8K2XqVWRi+<6Xm38zaF5tHkaex z=?8K(N+9LKw2JsCoJ)|KZdOD{1*O!wmfNqGH`g+X{79@5g`C;bnefR!0WEBH%7Ok0!!-?%x_Gw zqpI`LfJQ~w+uq0j>c9!+yQ|(p6K$A}^B2#K3mJ7oI=U3%>jUXdHL3Cen zpgv2`is=Fk9$K5PdIdq~~*)+~&OnaI|UZ)zAyh z(~%sxd7cG)Uqmud<_+$@Rly$A+%ulRyay6w&SrB@>n-S*_N0{SdErfcx&p zIWaeL$}#a*d?r}4((jfw6&5^6bLDP`mVx+*%fk}cE6`ezWLT+cr=9wksMgaCJ~IlKM{D$vpI+D!_mzbge!C|1U_quPq*IG z7Z(6cMcv>HXkz0}RAW`E)zS6HkP51!J5t361Q*2^N}KmpAt_h#bIbpG53$LI)~SYG z4?g2WrZ{{@M2%M|*1bu|!{u>9iV$;mR%}5Gnj!~SOtHG^eL={qNPGi*<+u}bl$s(a z3j%*=IxlSC@}C7=mC!7DYgEKHa3?ZSoGgf7#021o~oq1Vs6|>-tk+`F5RA5j#Fb6uU1PZ&*^S?sy@6`fm2{wh_9HROClc zZXViv15HXNrLOtz>h1ckqU_|p%jdjKK1lC`j9SuT(k@{K95#vWC+#K+}Yx;$9365)@*qdT5_e$n+?XC67e4PgxvLJxv0J^MHFa z;hBJlJ>?-PY6rV+R8UZ46~3NW zURbuTxN{A1IvrFj&aDKHR#niV#*zVg!BkZL1Wx8{3|mv}-ZsEDOwj70;;1eH0=5W* z3t>;Dxg8*r-WTQVZ-q&C`$0i$PF^}71sfuA2je4)S;Q@*njkXUiF#XCKLE3u11Po} zw#fI7;oQjyJ&Yra>%Ou9YSPhMAT=+*2d{$_%f}}rIoa%VV&TPhHY~v5EpxKnaZ~BD z-rujDcqXk(@Pc7U&1yr?*Aw8d{(=bnG-zT@m{ zG<`sl!jyD_by%Ik z1y2H3Y%TAMdT(9@Kn6hpLTd&NV4%{%WM^rkeok2nkm@9syEV0=Ae{NH&aAnNNHnXM z<|qU#V45N=K6;ZFP6?_85CcTO- zs4D`nbdXM5`AocWijmR&jO6v+l%8sT;5oW0o^R9e_rN3jV>5xZ97-Yi^AzqU`rdB` zO9bjP-T6icm{Q6#S^P9rkUnzqaZ@zW&L7vBt#0z%naXq~M&Pw3cmqTg;3cQ}#uFUp zyDPOMasoyHJn`nz$iwvev>aEWpeZivBzk_1ZOFdjGWmncWU10nIU0Pj0l#dc6JH_h ziB|OvUlbrT-*#cA8da=c?`C`6F#ttyp}zR%0ojGeMb_f&W`*`u8YhSP?oV@o?cZM8 z{x)*3j>`;G2VbWHl{L%?VBj(l9|AX5eeC--bO;O*{}q*B^3GwjLhnDgb~}j5`VWg) z`T5jV$=!l8sqf?3p4YeHd{h|dsE=~bA(diz0Q<;k;Ewatt+%R}LivClh7JDuy$|lww%1;DmAGsL*5{ zOX?y%@emO-Vcj65`609AYBJhj@YY@un^7)AX##Nh8P$#j42PkAznnIfz%j&4q#Fls zU&t|3Xz$dsp`__XrP7xl$OFqTQk7OUQL;yhZcH!=6Erw684u8fL0p(2>W*ZDZ%(W> zxr;Wgcn=y+HB@6mNWv}xF_aKwaK%mEvr_S^Wmc zs{(aErKzy4opJG%0^WrCu5bAMP&k!e?RBtX4f%Xymt~R_ThsLC2D1~{{revwan%wN zh7R=l+Ks^1dVXBWU}WpnIuLanPb$EPU7qiTMN>WLI8Pc*O3O}|GD>>J76B=u+3_F| z;zNY|ZfRp>3eFg4Q(O@)us4dm%NV${KUjevzTd(+Yi4)JXYU(&OFJPQZ(Pleih*00 zMhQz%HZv(v(S)v@jH#_P`uf%T)CRuU^<8eUo0M;q&wUG83?|3_({qb?LRLmZ-wxbB zWOa zaAw~Nin0vU)~Fx4j(39^t`tfhft68lm^b4UgZr=tct~eLf-C&dBTS8N=_Mf{Z$kx_fo8cDN4&_L1-j+?f52U2 zTRSTD1#_h)pTSphXQIxKLCD$a26PPsS1mdX|JWs=VfKxx+QDn$!Gsj0z`f+O6Ss3i zhDN~)8{8tgd;9LwSVwTYLsFvN3F)~d77VVoS;S6$6Axg9ixL-pV;xEc6(^)?J92E5 zXBbx%KgLj^vB}WkjPK?Lwj5b>YQ`DQkP~_ZWnSvwFD`y5_7>-ap8e}D8TW?X-a}S& z6{FYA`WSa9+mHD+8&HGO&^sG!=u2M(tT)S2%kcqDI7$~e7ij}oQcKviGIr8ig==3T zTmt7$l`JHdBNBx3+-w|L4X##k+Urn%|Kvnre0 zbHLj5F0PG%|Htz$Ois{++4XC+!Fi#0A=H04L4tfANS6t#z>DFE6L81)6@;$+`zvDl zhVS018wIfSYe{fdd15(@Ejhx3^%_SzBT7{%7$X`}yy_%r*IrhrSs>t-0_<-3s-cz@ zCSY0k`|YpTX_~*XO5ryz7}P~r+qHNlH2h8JX02^X3KMQFCZ+W3IXL1zPF|9lVrcYX zY~4D#fg1&o#N5G($AN?XK7>F|tBvXaqv?FkYkvd#}Jg~MHcmo^+ zcQ&JBUTAx|xCZh41de2AF8WTWn>6SQ6n^MY`lCvt>&+TBKr`EB-F-W;#4x%YN2{AUH{5Zv8Ic@ybU5LM5zd_EE#~-Qj|E%<2B#mRHzRh&??-!dkn`bE=ZF5)E>P zV;Oq%B4nO3Kz|X02w@;J^_yVPb+}gq;vBg*6wd~usN0YHJ5YwK6eC;}UY2Y>xC0>4 zI)U!w*>^B!q8B?JMSIh##aS{~F&2nKRr}0g9ziZR@|d(N;aMl?Q?z#vhV!gl^mAm)Z=?*wgdg1o|firI2+_R z6{=JfL%kK$<=K!yvhUS)%a_SjE2ABSe~f)bcd_hnD(QM5$7@du&dd%I#5G^wO`T`{ zpN<`D&ZJ?1}`%Z!Wmm82Yl=^HSv76a{%9@-v+DG)hDlgM-8en@M*oMv2 zOlxQdvAMeKW!t6uz;WY&kMdU#Y!lQ|GJt$6F`t*N-8vIN^Ye1DMtgTDBhV_+<5u>eLmNw5VY^U3ykVNPHB+v@p-0R z3yikybQQ}&x=`TTsJOl_5l^`h-7)^}4|>W0@ud#(|BY?3BzoqbwQUd^dcV$<9jK9s z;cnR5Z%g`}zoWMvMUI*8rTOpVs5WdTmb+`1bsaV@taEMh4!o>r#7Zz>7$WdCx&A`% zF#56rCM7ew-|$L;W-Ltf_>}3ep71@vcd$`{FtAB}nP$+o?K35#e0AS>tkkOk_&*(p z%zOU;Af(gDQ>TGKERt1wNyP2jc|jQAxKN(k4yS!YffX9$zxh6?Ki`I@D$q@1W|f#M z&lF9|%`1vzGPxXS6tdrlj`GCd@71BW0=3vEw#2sLi|cy$%X){3pS!Ey?Z8BBrvbd2kcH-+(9dY<0||%{((YD;)%W_ht9?HX-+eUa z*4pg-^}yy9l-2OB&E507HO#zk8QQ+e6Lp=DgbTrR@X@{pQe&u!eJ7C!;6R1lt8F`R zGUL^fN^Y6NKq7|1t5?2i~(QL=5?H0r1^r!B=s@a(g_Z;_k?=CF|{Uhp%T*-KwFti6M;FN5lIIrP^5LVN8jL>irN)M~`~ z>2LKtMDy(i!BZdu`%no1?nWG+X$~j}<92A7YA3y|D(1!QFlk_0!NY)3;#e+lc<+?58 zwatQ!8AofdM0d5*qUbAGqOjspZ5O&?EYbS;hVG9C9L;ThUT}%8PB1X?#b|X20Pc1a zjCQFVg3EK!Rj@Dl5-AjqG>Cm#aV~tUT}XaGv<)!r=OsOPccZq)_G=lrIzP<9#uV@w z99qw7T!Z^r`RQi}71kMe7c)=eW7!I4{PrIuxr?R>>U`erwg5#Rkn*f9*j82rkV}#jm$U3W|ni!#EBK0B&uBsCVfIaH9_f zHX4*K?8JAb~9d&tqRc2{s1)TZu9>KSJBS?(Xu$%SgYkVe7pR-V-B3^!8v z@tdD51xx~+(Y&CB^c^3&Qn+Gkn;WWQbW@Lr7m!IUR@hX zbrx^${^VX9)^hf8dfvE}dS(_Vx^hN}-+fVY(O#)-T0GLi1~t}l(cC$8|GU+8dt9x@ z>e(f8%BQ$de=Vv19<1`w_N>sCltyG&xnxBxbBF2{J8mhfM1%4Nv@ij)NF`%ob8{$3I~gEMej_wH7*c~( zNGBFX31?>>p=c{>K@R(CWDHcItJJp2O4yz-G;U6TPNw%+oNBm{RH?{UypB$|v( z09vtP0@Di4zBsP`Pi}mCR~c&=T!SuARlmlp#4K)mBu>u8Lar>g5#R6SXfaori@d)> zEb)HBap|s&QAT4k`##A~>Ez`Ph#uy~=GHCXy%t?Jv;1D7TL-tIHNWUAxpY!uyRFY( zvw_!+1|loY^s2R*@<|Du{I&j&2^X2u?|rL%USDns!T9t1zAf5|=k)N)vu<{jtvjTm zLFFY+v3^9q!IME-uW-_c}h`BccaOf2%}xVd-8ws{d=iw zwa5&X3BV*Pc*OV($HZyOii5Y`Z#FkT9yegd_zrH7nHQ)-tW#?e zNsh-V&5qGm?m0C)AKDrJG(jHRi*OOt4YA^T8YaMl9MfSyQ7Po})g|1dL=zj|5>pk9 zi@=+mPzA$jDdh}rR0p1friLI?@N=VQUBEo&>&awimd%dG_kj#5DltCncKHia3K@&Q}^E-yN1|0u(=6DQMB98DK9$x}W<# z7dzqau1Csb%#xWtRmNRANyabI)J2;lKg}v@Xz}pW!Pzj#=;)@LNYyNGxyfDuex)<8 z9uE(X(>HKIH!$}s=SMIghfToe!p4z)rSfP6;Qm+?O4ix)=Kp*Tzvu$}tJiClyI>{o ztjBesOYQX(|6>KH=@Q8FpKW4)zeodYL&MIXW5GVh@8$e3PJO9#n_f zZ9@qXdocP(7$THI)GdR8QW-AC%bsq^KlOkZxS6mL+2j%70&M8t?!rMqqTI{;WWk3L zRR$dd-_{y9+%}6ykX1JIF&I=up%h}>Nqkiu=x$|^ja2c4au5PY6W~N_dISNj)Q~&7 z3Tpl78R2ouf?qGn`(CY2D3+zAWr*=6O_B7eH&@e=!aG}_P|s}^omEi;UfCHY%;oJ_ zOKk+}&1O6pX4uc$x1H>osL0wrT%1;V5+wq3d_KXUlA)wVkQHl44{*RpQ647RB)OGw z3rmhh$c8UjY(3u!-H1vCd-9y(anT9$!d4s|m0v1m)pZfsGSOz;Jd@Nk%|Z>@u~X0~ ztil$i!w%_dUScdkVPzfE!Bi93lO{<&a>-ODN$|%SKkzNS^SIC$7gArX%=nT|^eLuwlW)Yk$1D@r_TMl&+qdI^h!&*KgxcHRhtAC>5qD};D{I8`Ucs8{dRiavSLUy z))Qp2&=W`&o)nc9xPDV+=w(nR;NeBQFu0bT;Rv5WUE4=5rA9h4}|Lm-(cyfL>CIUdf#drQMr_*W=bV1D; zFgHQ@c1$svCR*d``9Q`mI1>NIqPk&fKVM^9R1mamyb1Ni-6=im5$`eKf*1pH3V zEN-uW#mfupHQ~wEe4HDKLyOe?4lkdT#PAyZtr|JuB%_sNF@Nl>#~unP znM%Pb_`z%xaKwLzg135or;}S!-H*)H4Uzig9(Vx4ycEJ#uT>sB`k`=cMF{-ukNoa#qJhZb1*bSh zUbkjlVxgFl-@0d*Krve)*O&WW5*cc1B*Z!q(T%vF;;1Czm5fQIW%7G4aE70pN zXLt!1M3>j`Q`AIg5=?PMV?U_)VtMBuupcj^NmHpbM0IbwW%BYtKBKldl}D#PutrJ| zvG0i&--|tn?7aj9`>Jo$^yp!u(3DjUeXJBO}yNXu>E^(jb5PFt;tX;c-^9Roe5%P zx8VEt^2u#1fnRi($TYQW7R*9H@*J^D<;5nuKN-7$Js9x{erL@K|H4?Ym^#&VUHpi} z`m=11-#p?p;hkQ$0}Gv62*Q>)fv$-!k1&hMq46BP%TJ56d1L0dm>9vsL@E!@Mh#>A zM0p0J2!T|245ZW&NUg70#igYvW%+_wx*L8>$f&ge4}o)dnf)7c3F^6j2(0BXC`V}d zJMbk;R}jwn;iWiNFw#m8p22{AoBN)5JAbzt5=jz@*20{{@2uWFJNB(&`R z+;xt(SdFHai|6BIU+y@(ZOx=F#cn$(#BRW1cl+r6Ffv|OF_e7jMvBvo+dLuq1GW8) zL*66TndIyW;`pua&q$3NXaD{3U5>lY`F=rok|lWvrrW;7F(F{PkQ;eQ>e%Xc{vA@! zm6 zt4q-H6+VVn05h}Wa6e7ETOEg&U3t|hleHlHmQ`#lRjIP|xdnxgHI7&4B>@Jn`Gl5`c z6L9~9O#mIy-D($~BKu1s3II_G?N_vk*5*y%)e~O<6^{t6fWQ(Ed+)b}1w3N|H0qNS z!qZ!BjZ#x*8&xBNQRPfTNHi4X+yWm35wJ*Cf*gg|gZV_+p<6Tvj9=m%!zfb9N8}?T zf3>I_rJ_0hD?>dkZ5;(m`l&#avf@V6pdF^072a(iHQG)fV7*jMRf3OznH$Xuj_Ar^dE2AC05#Pvw+xEe4F? zdzC$L>Hbk{St_wk2%#>Gsv36?mXqw-UvC7tNDMEpN=*J*R*Ye_CImM*Q!mm7hwl(8y#Qi`;*D0v&JAA}L6$`_E*Fx{ zB%s0BB#dMLfu^<&YRHV)WUc@>;?iiFovG5p<-1d^uc-rkYXnm(`!|ALP;GkOsbo>a z5LJa_3LR%kY}Kr+cO|AB3I{F9D=Vbjk8BVOU}q?zFCkc5*mU=J;-bo#Ee#bA|2pX< zce->U+vfXnXbKFgg+TA^N;xaZNSCmiS+t@{gc<2Taj=owRmrhqM9N#9#Bm8 z&q3vokyKK`o)z)YI`7+`SV=8NOC;pVk9ZV=Per_GwLtiJ^;=VIz@lCwTBENnet2u`PCJVu00!=MRF0cS!rL47y!(aMi09p5 zlymd$g))vP>lWt6&Sijbd38kumabHoa0oexk5HjBX3}6?yC2i9Uq9Vm&V0=VFR?N4 zVS-v~2)KGpqMyZKG-hD*X-0?)L^W(BVkpLdkCD^T)G3yjRariPpC#az-Ki<3IS&7h z)xDwTuJJwNpa`_#C#RPeO;e+jh3rb)o@qz*1fxfgpq0p(i~jNGENQ7BoW}*P6sVBZ zqY}go=MhUVfL!MPUZ$wmXioaqbv@~3N&r5z+e0`351GtBD}vVBWYx5E1oW^mj>gBK z$nA`3)Q~kXO^iByblU|%;~G-TKf@!lZSt4sP^O3VDCqNu0@HME?pLkpRG>QZH!~sq zn~*^jSBV+*GPy0suloY_Y@@JoCc7_c#`U+WF2GVNkOhwWDEkb4XK+y*=$ZAYKChJ&l!>RsRlh zVew4ld^&15vK*zOjQAXfd^8q-_eVHZMq!jC#%Q}js`}HwXzAq3{i-~H)5fJKzp?>I zup4H;zLj{%EW7hG`Sn(b+IytS+D%I8QN)Zx*o1v#{{BxaxyVjT)O=aipiz!k(lV3) z%&YP!0v3J;B6Y)=i2QaD_w9OmN#X@dYbl5vqSw((sPd&Ily@L`94}g$M6HP{9P~+6 zImY*Sd;cQoTP3KiCP@p9&H-N}`F;lOI5;B8xY`0W) z&m54JDTA8ewh@Hr-U|90RkGGB2&13Qy7y57?c;44y8HgOGI1Ofgy^7@Vh+7iLN`91etd)mU?v^| zO~~I9SZOHA$8NGJz2@A!V6tEj020g|Hz5Qb46+J-X&3x^WM-XfCg1kVqoK+4EuD+-f^ZP1TpI;RIXjKW;DZWGvK zErg^YN@*(wOsEwNgwMH^*SK6U^xo^O^eLtO8)x^fW`7(xN^Q~Le5Q1Uc5I+=Q~%M! zoLAyC)U)6#XXAh*5mg$6t=SkrSj<$5uFZIjNyu`ga%+|N)-hD1GjaL&#w0O#9VMh! zbJTs^ju`@AQQ%45q(L0TOJaXyFxgY z2LCq#4}y&@;d8!%huHO;2l&PF;#3;J=RDWWNw^l@;bv2xp}nBl;v9svX#xFIm4m}t zBA5Q|xBHbgx>$v@FIa$iry-4lS6I)5g`JyTQv`jqM*4fpWD%hJXmUvm3A6^e+(U7X ze7zun+n5IoE^azFDMaDK_swce-69zZ|gD$ozQdq9_Ma)1x1gK1V4qI5hX+IYFZM2>ZNq92T5x|D;>f zbM1aIfB{M52A=z`o~x+TTP$+WbGjOe=dSFFG3!NrxvstpFi5dbFn5Tu!@>N)8^Rrh z%cN7%@e^&q*cVro)L(GB7TO`kENF{oExke!TsJ?6KE62VIy8puo^G5d0=P;9I zr_GjaR;m-oA9Wpf_ISS{Gpal_m_RrMvS{@9d30KvLBS-1oQCfbMs--kl|^aq1)R1} zfU{ym5ZH{?(IHU(17&x#rb~nC)pQ^$S!g*bRV!YLJ%`u3%XJ$E4N1Wa&sgqf1DkNke_U z>7XOXriQ*S27F>0*03O2)535H&C(jMcnY zkr`opENBo~Q~Zn-H$BvKdA={Y?1y2I4YTZ3u9UuMpPld~DJUqq?PmjEBt^ZLen*Sp zN$bUm&x~v#`xoNgRRzb6;AAE0^o%rd^On>!WC%fwpo$``6UC%yQr5U7Y`^JeQD2D!TI9HJ$b;6cW?3Ts7s6M z^dH<^35L7rpO?;@7xrWu*h+jiFIMdP@AooIkwi4GtbW!FK5faTGX=~P%r)e4o$&l? zsQ+vjED(9$uFvW&M!8t`Bm0~nh{vg#CrqfVolfAhOVLxLQ5i5@Sd=n=^yz117M{AX zK3sAuqF^GPs2XxY05-`HT2>X_zN7169pSqjhHK9qt)2q}wO2XFa{ObN1i`nEB+H*8kQw7&OxifX8cBDLvQ(w!D3F7h;o5uT|?{FZ9-V` zXx$I~Kq89ya!?*ViY*{lY~0b+e>E0i2u+@7EYVdG&q^F0z|5e>?f<)APwSiq!gVS! z%AUqI1T{}qJYy`X9?E&caKp=PjnZPU=TIFI?HEO=pua;`g=I}N7p ztcFgV5rm%c0HtZTJ4M(JAp;fR;wdY(+q=Dp72?`{txV4i2Dz4~Mz-n1R~xoko-I%K zfm`q#r=yl_q%U_J)mc~TP>&W^0|wX@FafZr+eq4$lPYMRheDyj=`fWo`Y8n zfKd4^Nx|{ANF4!uv5*8i?HZ`-f_GKW8>!JIzNeud#lF-$BZGgw7KdK}vn+RN0#UC% zywPwJo8}qxgx?gsd}gfVTxdjGK5()L%rLn5n;7t2SGfyk zs(6XNW(^{fUoX-HAPfzq@dNs}o23a;TJu$kOR&v&{zxj6m>qwGE^AoBJZw9EKG8=Z zM3mEuQk>+=Gv;1OB_0dasj@E1t|(v?4;l#wdOA(ZGvWRwFZA|M<>bqKk=*TrMw1Am zPff`>Sbth^b$D7lmb5A_+pYe4l5$AslR18~o}jx`8NLb~7z>Yf2ZzIpc;YeR#scXM z7PcAC#qv2iFBEEzqHxHNicoUEtSx@Y1&1$5-G;^NK5K7BPA~AfV!d)yK zMTG}k2{RZHQJG89H3=XD1LO&x-1V4cNdhLVvsx=)^_MBu+w8d$V4e-bku|CW-uc4T4;L^gMQna{uM)&$iHI+LaZ#zUkxuH1yPvRq9p` z+&6G4XK@og2?(%fRqJUjhXHh2`$`jcE{;t0ozl|rD_npdzj#Azu!TfZ5E9=DR^{wS z4liiv5=N}ycD-uk`{d$L3}`UL?#0VeS6^EVx32t(W*xF(q|7?O4k6@5O(=G!PqNU< z_an4E!B{>d*H7-BF}8nQ21cAE9cqGVzQIPj2-2kB;4lXU2><7fd|zhC4N)Q|BY3_G zwMjO;ed7}Bw)@9=VG!h>?&CcoiMhYXINb7eet;(qr#M50tGHm)PUJ`aWBZk0MLxYBn1oL$b+fn5Su8)sPm6%i;>TbZx8;nu zw<7&->H>}=6WSe6bZYd*99-jEU7czy@nMi6DZ|>WJvnr$jbm4}$&I_Nnj$ZGs>)l`%!*m8wS(55s9d4&`Uc2ds#2pZUcf|eNu`T6+A zlS?W0Ys&6=s|f=VfpW2J4G-LztsK{0(DGgU%ts2y%=ddZH@6Cp_vLf9yvlggnt8+u zXQ1YRy$ziZ zEO_OX9FBjP|2M{cWIqXZQU$fte`QpUr(d5rl1YmVOR|bM`@wRPx_s~$MeYiYkmd_r z%GnwW(zG+f^EO5V^`iDB3FWyOvE6CYLofaBvswCtaCp0q>X zqvi8Ha>!x@AG*5k*~F+%D_WHMZqaQ}d4<~5yQ5hJA&z}!Lf;h9V30J8Sy8$l9ex{V z!x{%O!lCfPq_K&T&GXzbd>Rd>31=j;wzkd^dhq7v)^aEDH_#yxo^X*Ku(FhGQEi$) zTlGKaCg8TiSb4>S-H)U_0h3ZFW}Sv-Z*(c)SXQ>jw1{Gd&9$r&MET+Mquc#X&)Cdc zfo(g#i?Xh+6zBX_>m`pq^k`ioxYmJid)G!*D&2EQE++SnquYC)n`(l2=*~vh=7|sCOWZ~23 zk_>b2{5+0f(4;9Y6|^vY{H9|`NCvbWB*JEoB1p8ckz*qy)<AZVxW{MDGh!TyLLrSvSy!-Chk_IJMwGaCAIdLb+&xR{zwAB0%p?I2002vm?J8+rb~Uc z`YIM+@ITS>fnUIoZ;>3_shVNk9wNaaBNe>P%Ygt5A=NA+pPXhdpU$_pKvi=2Xk+Yx-uf)ohTiN)-o;qC!#CA5|>E z8{}AUXuno5m%e=vgG#izJ?yQECJ~rRLBBP0?MeJ+HF*kwkn1!j#C97wY8sh%o++_} ze^!%bz;jv$UeluYogM?ur7m{LW+JweIeH(dNj6@XNfus|(}}>G+=6$#7!jFzwtNq) z@&RYXvmUwWHSk8On9R}dd5o5Es>9bu3q@e(6Y`)I2)yTfgr;<=v=6UBC=Yo2&(Fbp zP>scT*cNQkikh=898loFhL2w2F>PHBo~%zrFhqA$Px_UNHiPhV;t$v3Y}8Ot( z;If<^S}@x`h9g)LD%60n%4&Wbv6o8umKv*VI1ZMRGjUsDIbBkk_wm>$JQn)yITU*? z8M_wIXYzBGL!~L9J^n8h&5-+k%DZ3a8k*AsjK-#ihuL5sH*yQNe|=RKB#}Uri48Up zlqffxC4&c{yTyiNP-g{{cDiB8)~`WE0G`GEgW+N}E2w-#?8AdIhRh2?)UbXMHJ2IhvA~b)*W3!FLbq$1+?_xHg=EbK7>437zKnT_DRUWr?xcsGpxk#^vlR*% z_sDjXDKU|}NCV&>sr!)AJ(A?c4c-|k;IdTy^zLs-d+Xbt-L~R+BxO)x>6jb;VA;hO zy$uO_`dLzxh2==n){|^{px}t7O>fLUlOi~o-W_%ev;E~mm@a)7YI;#{4f2n zgGryA@n9mwp>NNbAo1c4uHR5&jy0Wl#pN;#I9qM59vx<(&Wd503}i+!VxD0F-2cZZ)Qt17LtGUPe zMe_{v-RB8p&b*g@2w8=4;D1IFt zz@@ngOnn;!ss+POvPQ;H1|yn}*tQw7;aYwtlCvM0N;X?6q;psoNqDsIF0WyW}_x{)n_a^)(p8>lexK z+E$O(Qx0pbi!NdN|1CTf@DpifxE@AUYi1+PdQH4(>E%5><<3OObGM{%k)TAI@qWvC z$@xcpiC|>(A>lq&>}vUuzvC<;0A)O(vv{hSl^}>R)wuPAOU`0u*@Ah|Aa36cLp74j z9uIk!e>2n4ZNw^yfvz2&q0qKNslqOz|>;#9>anG?(JK_%67-RwHF~=Dj;na!^Ta9nadFJJB$1NHrj|v z6h#bCfG%JbFGVm*gh`~?ajg;Tx*ljmDLu00%>mC9qg%!qsn!b>4qA)q;uM^@cM5QP zfr=Q|vSWIavogRqdjR*Q$p3lT^3m?bB$HTTOz*?#QscjJgtRR+sMCrA&vg-4>qzsc z@=Y&jMc`irA{}M%0@YFxw`SC}X{*+3W=Z%dGWmvlefYnWqZEdkW;9E@}9`SPX7HAPsf=|Cd*yP+Y~poL!LOS*#UtJ_LqIs zOrwh41K4+sh5#KTvlOLxTx%YLsA9L{9d`RL57U&Lb##Shs(o~GIb)#nA*UucN%i_6 zb`b#ydls`%G}I6U6K6+`ciZXd7Qf48KZ0!W;>>h}U?|SSJ9}|G!}bh9$MZF>_{JNA054?uXx_k9R$v)!qPr$aLd*^xEXK?!Q z6Qf?XI_`};a08M}+yBQ$uHf5xd!7l?azzA*HAmy%y zKkXh9f9wj-!lqWaabG&^JlMl`uRiV)aKe$25I5@UPP-n=Lh)5}xGpO5nqHus$O}`o zgq~kE++TkQBEgH6?CTc;RTO=kE`po zqOgg*>Vn&JC2eJ5arc76n)E!Izu--=+<#Q~1EZWt)(w&^(M1qg?%ZK+Q}2bXa;Y+E zF8aDpcr6??@F}-CPX_c(76~@ify0tWY&OuC=pp>y`peLTPhYr#+@x}%P)3w*pF{qN=5n{LD$A581c0158h;N=8j$6+x6%0wKi6z^Dz#3zTvL&Pg$?FRdox1niAnFr# zTN$Dc9W{$e_%%I^a{l*yK#r-a-kNhTzbL;c28<*3I;<5f!c|6vv$gVWNc#=Zl*6U4g z!kaO-zKT$=Ynb!$8;{Ck&Yr0QNa@h;=mA=%&3TS^&Nk&?}i@T?VTK!hB&{_HFMw@Rt8;N-#zM<)r(v4o&JCCfh(?!gpKH`4MrZ z#ER?K?!-%d3nTai!>e)G(2XSaJ>Li9y_lnrrmUVoTl{z_fNya`9tyx6&lr&BwwTx| zkT#qfXulweX1ef~o|Hj3XC~EOh_-yR1IkXPxtBVg)WIuZH~}z_Z=-Au#B8XSy-TvkK(bO@P(o1AdC7mKpYU?K)Kc2kemknk;beE`@d0H)Vj z_SExdw)?pNu!RM(h{q~CFZwH)Or$t22Iic*xOcGrT?mK$;O4p$h{YP8QZ$lQK3TI+ z=#>?Fngdh6R4tq%36&&v36ml{X<@8rd%)0=#q!;PxhtItC+Unh;s^U3aoa&GzIyAq zwHGgWQobG*YD+Y=A(dZ62|8_=dse%d^AN|#xt!L z1Z3OUl=T-!g$N@EDV{e3rU%9S6Be3glzY&LEiuDEcqiP%!K#RI^p=au^|&X3jR=l# zXs(HrYia#9VsI=tfD^7?#*ydgn^KWSv-k$1!`pG6i|Jcj`p}IkbO?P2beLo9G^u~P z5%m1%|KpuIZD*#i3_1iREj)|lZdOT;ASY9MKF0FjZ3;5C?co(APY+2mPzu17+fT@g zUp&#e(_JaXElj|}R#!!ELD1)^Do~*9Y?rC^x{rpP^+#El@}GHC{EE+sHyp_GU5ov9 z#srHYuSFYMaTg}Ahm=jts3WzZPmcc`viQh4+zN3)8)>|Y&_m)}iU)1rLC%cX&HH1# zt4wUCH=M>(U*k>LcH5iLf2NufBJ1}AFBo%Lmf*$#De)Oc{qxd^u39{TGvvD9Fxk&% zhZmtEXRc;B8+C1|$|4s*3iMTrZ}ozX*`-$O?dlphKQ`P*RX5@l(s1sUek3L525~}x zFnahC;Z@dPxUpmK3d_VMUcLSayy3-yl4R>#)&jDK7;$$$bzXYk<%-X4!ih42u+X@Y zt)uyHP)kxKC`>XpL!ewJk%h2i)O^`uc=)K(#;H<#ZTJ(|%;v|0>Ogqhfa+(6r!Vju zchh5&`@V90 z)eLl{`E8Q0*;i-~E4H!XnWWIo_HAc>SL*&JXZdf}_V)9IK3d|$5yd1EL~p$B`~H`= znr0t(7uV~5c6$zW8q@{-%w-4wdT zbt(1b6uAIoqpK;zT3xD>U6?M?CSE)ccq1nPWz%MA4x4S34bdv94*>7NSd6KudL*bl zhyTO4DNqNas;&?yl_$Q!j)BbKkzXrABu}wwO=fyE=VL4ebuJN2`-*z zn$P&o6h~F+^)S%SiCswqbIMJlgk*KPLZe)EDUOw3Y$`muS``?yvKGvGPK^?6-KVFIjV}OUnagw?!PDXJ( z$Eub$dNvmjSJyIw?PkF-5O3E?k|tIMlV^td=%nrE8*r%d5N@w3?SAwatvUV%S)j zk_L@k>U{Z80}#_dixlU>5L_~TyD(H+4gG?|P|R0Kl}XM&RqyxZvJ5SJ{XXA+zUfeO zdc;IdyjV`liQ0k&ScoFxXMQl8|?tfLQ%6bA{tnF?9;5r2~qI&F1In z&p5#&AJpFhR-paPpYUf^(&U)G^fBGAN`X)Fxeax}_KIc~BiQ|9I!gjkHy<&$VK1u)#i4Za$+44q&bGY!e zu$o3`h1O1^N*V1aOa!f&WZYLDlT{vuO1^RFNnZ_l$Grx(5d|#(Op4ze@sgQ^TYvbQ zvLpyWv{aH-K#!ayVlHm<-TM%Y(PjoKum(W!6*00i#{{cXR9(vw3{QpLYyPVzG2fql z4b-SJAgS+>QeY)X@^K@cZ&JydQ$sR7{laypC|H?W1+wJumGso0$@qsTFtG13rY%cc z(F9lMf5V@)B4F5^1;&(^(Wug3Nm600o$xoyeDcB~(OBOCPTXEU zbCD)z$E$kmBv7Vm2>EkBAbVH0_VQEd(d%Aq=&+`mNSxkcl*_T5gF}p|2bh|YGQKyx zwi16w%)1#${wkMy-9A;))1%PAsl?~AS+C1iBtkaq9U4sLpE%V@L-UE@X?x~?_?hsPU#ngd*}NIh)Ck{gSVt~@u`xspc*ez>tY;HsFo z@0VTe7VpVRGn0gj_9G1L4fmy~iN(IWd!YXb?j{f_CapdzF#|ALaWYn!)RS~+3AK1{ zje?Q~dxP&A?OX}rxU9^BP4SGMutF|9#DobD;6+)<{{l`zJkp&Io(jV3r!t?Wth8K? zRmNq7@pKbg`gAE(EIz9-$*0HtDsVT>_IX3)2?sATZLx=yueWMW?%#hC%N0NmALIFp z(RcT|%TvA5?h7}s%W<~_qDy$rcnX4D&Ino5yF77=msgR1%l7VrGEO#`H?xhr#-EOk z7K^(M?a@vF7z0FhC_S(^JB>HA?q_4oZpK9en4Or!kzMyR^CL`Vk%*8OzcE%VuEv^s-(@ZSiJ;G{9;{r*?;7tS=dKA}5B7T> zqCGqt8>2f*Z#R_%yQVLxFq&a!AV%yG!QW|MpDwHFi4dRJ-4AVsZ!(LB$TLm%K!VS?be7hu6v?k*YN&-moVnEeYbS0_W>1&}FY4NY z)ug8=@oy#eIk>bFLCs`T_NfS}5Fe*>Jv>?y`oU+xE(4O9rP`b!~P4OdVLcYYUv-sK?`^7@75Bsvl#IsslQ5L z_^}?btr|FgTf#UIDXVB5#hMTXVcDg|h*wHNX-2SgR7^ib@x3@3widRxYe!fhC}P($ z^!VM|y6;D4THXRv?W$`*y>BBUSCQnaVXdcy%o0G&~l{Vrf++Q3HB=6 zSs--Wb9Oe~_tt>BPpiHc1$7>4f*O}gr|W$nMEVc{@~2BVvn4#Q34pb1U63J><1><< znIR;TF?5V7Q3e<%9pCBPXqPleBmHE;1oIaqwwN{CzHHS#`n1j{6Nc(f9s^$b+V_gE zmYwoGupY>n(NfKiydJ*(dqdU?AyRgNMbY>Mut-{gJ@2AUi(O=ftv-Es zq*)r!I*XFQ!6F7ZD>NE>R?=pDQ9F)){u6>$NTQM*i3`sjhlo(zvIBO_C|;%Uhm(Q` zuw*fEQt|@+w>HQGISC~^L?gw`d3w)j*p+ZA2NUne`KC=Ud27MNy865y9&?|vg^!Ho z3M*J+=81WW>x;;!SF)|CN6*D)m1$hMhjI%6c&4@EjS(X%RQlpH? zH5*~P#zoW2gdr4@0DWUysX{*0PP+fFn9Be6fbUDRdXRP9_ZE}19kB_z0W2WN5*{bN zVL7oGG?o8p44%C>C@)DGSXa(w-i^olMK!SvF}inM9$#sO$10lYhmuhtVY??}s%3_E z+s)eGMzxuMzDHj@mMCnMGvK|Sm{XWLZ{sobJ=pf}^Uw^64lyMz?16Xfd9AU>r^)ui z9J_L*?Lv-{tEJEnhMvGZZ#x&~{&ZcVgz?@R&D-zS1foG-yUHN=KsdipB_Jh&X=U?6 z4_J&j$oO(~3B&2?}Y=a&AEH^v5ZJZP!zOuu4dt-d+0?Re= zfv=NCRa@-NNnL~Twbh- z_1R1pL|D-2XTMtsI6l4(FPvL-d0y4vC6thdU8<_Z6MUseHV8mGrou8Vrw^j-1Q~dnpo_eOA29(H!`N<`QK-)4AtpOSXxp63HnewLPxx{<;`_G**^2f=~<9E9GOqI8w0AJG&Im z4|6k-5*Ddpj*inchq>{`Ik8gXP{%l)p|n(D#=VL3--F+|ZwKpij^qo>MX9R+c;I8> zJQtaUf?-wv^aus_4L5~^15TtKb8)pE6>b3?G~y0O9O^GIsITy3TF%baz%N6sJpME~ zA&$Ix(nGW60Jp&T*;sGY%(q|0^zYEN_Q~egu>BvhUbm(7HEi(IP9>sMmL7pzCIQaa zEkBH;q0x7;iDtej7767IrBbh?PNR0!Ik05>AEv&+JI*g^H@0ni!p7#rwrv{?8ry0b z+qN3JjosL`edqUm_pWvSfLSy1zUQ3%><9Z8+%*{VKL$5B-}VNaB0o{Q%}noh>-^}C zkxMsmvuaaL(p-P>_@8ID6o{F?r!Mhi$L%1=V{YQ^%Wt}HQvc5^aSPnKl_rb)G(_8i z8Cy=(#`3W4$Zn;bGlA&?pQD*;cYF%zwr36ns4{SDO3SAykdU3l1Zjgw0KDs@w-c&b z4exg>fzNZ)3;^r-hDdcjBwMbN-|r?1mn}fj(4ByIUNb|s5%$u=ZR{(TXBQ({ECR%Nuje+;`@MO5#Zzgt zO}#-51xZjpD5ZN&a0j5hKm?Aaz7ic8RjN#hs@SyhcGjb3rR-!@ZSxA=PZAVzBP!Ci zP;d1h#b6^>clePPKUyWBY8obWp}byNJVzS^)7=8toqpIn5d#B^(ozr$uv3NmNz42- zH4U8~N6dzCl;?&JaJ^^aEKB0}{L*$kdG&#%*Qp9ZaZ%b-;j9-Q)^XB?-e_ZXXxEJl zv=*WYDYmp#^O?@d@aIeo*Lx7Mwupm%(v#%niEhjtsfc*7s(P<1tR;LF#@t*?u4K>~`a;-X z%|pw~5ocA8rf&umyF$lC0@)~j4NZ8Q-Mnjb%}Ik$2k+4c?DZ|+&+P0fwcYD&S5G*o zgE1k_SXm0)i_fOpsNl@%N}94z(ZUtiYeb2=!LxUy=~%EOflx~+`MsZ-=qBrI;CSqD zZ_&{XtG#|!7_)#n8KL8fwgeG-{aWv_+BAma7L7qIK#JVzC>ypG3cl8?;SyF1h|ABi z>(;eVQflDl~x%w7nZqz|EV~b*@?bmW4D*;DxQ#_$I%j zQlF=nl5mgnRGZ6|QC{_H=;sUN=uYj6u$Y>e|KjxOO`k=AL7ceoblOwk+XRM|x&I`1sCItKZ|5UMQdGcq%)A-O+(?wahVWKw+%Zc_s zqsc0?u?)W%g`_!XYCgYNOKa?|s?vT0ElR`#;}z+_6DN-j{1zo8c#E&0<>H$R>_?{m zigBfG8)r_HI zB|mV)pQR?=%%|BL-3nD`q(9IIvK{sPm#i8ZxMPF`9xznH1Ta)Ei%!jW-0$(I3ek(d z;a4A@4^(~Vxbydb6~lYL6Zy|bGj6s+BmT5U|LDtTrCAAzd8p&xeSZ##*qe?q8l1Ol z8$gQoR??$fdb~GLGeU_-unkf9OOXR>Y2bRw3{t_kldVWUu@5)=N%{ErhV4FZd!MO6 zwd>yka|R3zid`SK;)fOowb}FD{GaOuKvfd9Nn!YFeb5fF3Y9#j2_Q7y9p!AEz||6q zDGol{@7NjKdYSQ5(kQ>EPo~C>hw3*B@hpy?uQ~esgqyRkg`Tw*2?hBzffn!FqR7;nzL!|7=mf?0%0MrP^N{ zpuW`^4#vga47Sr~`X_~b8(eW~KguXtOeT-1ItFY4UVIuM|IF+bZB%PiWK`)ZPqK$c zxw~3A|Q1}>pE#Z zkdHssWt@sbeDE%d$eukg8O|Wcn?3{$qM0J(Qp-ppzO_c;)Q@=c%x`%7&=<{JO&%d+ zslmM)q1aWiev)_EH`+Sw7GI##SgT%e!G=n_KQ2_{`{9G|K%|OUgUz6>KEEA!GO~hG zhS3*p|07AkK-yF4^$k+_j!Iq`zsJr#7tY@ zj29+QrHw*CJ}ge7jn(5oLB8myDFAN^__~u+R36F4-ONbymc=AM=!ez&@Dpt1UkztY zBXSXV*#}Xazp7&*NPYEtATFM{@Wi;CM!<(4PQ9ps- z$|#Ohsh3zTokd(uoIR5uGfYik0s-L1_*I`XRms6yf-MDY6LJ=vGBqtWQ&zaB5rNSe zq)kyf$2UBpq*3+q0>3MUu9vGeX0Gy=pAMP`S57y+`Z+0L%9t?V@b#7N{^?1s58j3b zyKgcMW;m_MY7!r$0oQwWdfO7h6z)X+jaa*CM=o2W%iPDivy?9&tspH9an{H5aFOSG zz+m)qQTs)yBZi@)?{_Ydv4cnA|4L6J6u=`C@GfkuW5x1$zfMtni$XZ!><5F(dz*>= zXCyM5k4V_0*}gQOjV6K`uuMT}jmK_@MMMh?xSqYjx{hMI@U|YdTbY|rs3##aiI|yW zmLDH1m{e)j1$k*^;vxb*YbrOS9dmsJNq+1?@3TW|lM{Mvdf@)IJ4=C0(C90zL*hIK zAyl&7cQ&{G-rz~3B-JN+f`zZa3MmG2yY(QRx+){ zy=v*w)Vh?#{FMiem#Ab7Nze)L_-*lIMFW|B)4L~43@$354#}obaVeN#FNsm$h^a9m z4I=HN#ba@mz8MbY3QJ@R7SR!W^CWKlH-1^<)62r^!6thK1(8X#k zI}AhvK3irD!oyVs`dpV38lWrRws?}l03o1 zho&KA1EG?7Hblz=qfK^P#%BNe`(=<*O$6sxWAu%8LU$gdy&jA;tVj|j+gm|vh9&zG z`T%Q)3=0hL>Ju8Yy9jp5o!>nW1UDdL4YF8$%-Mxp7pRE*k9m+tmnaAa{bp0BR{Pu} z>gqiQB?Z!MuZ`PK0NH&SQf)hj!yC`fq8=Fy0%0ix`eYkHEw8Nd_?kmC>{bXDmAiOD z!ak1+RO&;6_GN#>)wq5Q#Nj%&u@vrpy32C4a3d?P26WYYc|N7HuXGDPHoq8KgCWA) z-~70ZZ&CsAt#-uVjsN;!$?}^yAa5RLum6Sjd%c3QM=J;lnPx_LB}kjPS)ndHJdv5& z;BBr)K8}1}7#?T@ckUQRN__zIW>xPQyD@|*1}l3K>Z8(&OYxAT%ZFlxbpHjbNssuh zHaOBFgr!e9iH<2c4s45Da^RThpuftey)eBv$g9#Nd-+6k6m3ve{f6O)RB~$Y6(hLUb%#p9P^W3pv4^oG_B!?K5?pzC85c8s)rP;ZMw^5r8u!s7KuKOgW=iR}6y&mVM5r@UwnK~U)@7Lr8 zeyFt=8yl8EXe|-8Vq>B5jBlz3R!bm| z!o&r7-258^BfL5LNot@1gmT4mI#Bw4h^8EXV*qYa7Vs*u-gs^JucnY1e{^B~lUQqm zHIJ28_+4mW&zVwoA4+9-3MLMm{eNiZ&f*?doNR!2Vo{ zv4raZ2H3$F<`+@h?Y7bOUx@Wtdv$cDze*63V(c;}LE2>HO-R19( zUAA|N4;;T{-cT05cIByvHdC>#O zW0PuKWMeJn5G^H^F{7Sfo)ec`X7Z3uQY~wdxX7q&MpHmN8(8eH^KUP+-dc|!${DJy z`gHg@;;{b@F6+xsg7NL~j<)w169vlNk9X$w1}r|Tw!vHse!!XBVpL*frCzcE6F9*M z&01&MxQFLnEa8?5N3v-DBkr-JYAY8Kgn-MN4hR&|L&M00)Indjr)ReEq;Mn<#z?C{ z7-VUZU3=UEB@3oww%e29w3}=x`C{U4xvlmlw?_m;{1+q{Nkq{gTysNW^~6f|3q)Kx zh|^iWsofcenc|_F*=z!7IoV6&iz0@dXX-5l-cJ##&&~#u9OCd6H-m9o?M1H6f_IB_ z*AnXoye&_@3;3RbL5yF8y4-37+=RD8awHc`|Dv@c&O&%q^y zVR}UH@m>w)F#%G*WG2Hmb90f=7~mqWy}mV+R3QpN4UAi^%t}%z5MMdOk>`2N%%BYv zD$sLut|eX>#Dw3N^(p)~YSc>lQ))7^m9#+LnR!f3|E`=Te#muY^gYt9ZI1e7IN5vC zjhL#YERk6RaBy=xaA%kK#%)~S^Dr$9=;$aW-6k~Vm=-$L17Y}>r1w} zY9Sk+{dKrpr?l8qw0rEtBzM+iS`NCrOgLkG@>x1`XlJ}1ISaOekQljMFks-UG;K4bC`t*G9lc#0~-eERDG(@E+1J`oD1cI6-VWb2R_{#~4Gd#_FKQ z1+lTQVX7rUEL&iLtxg_M5vzJ-T(zX><$zt-Dup^;MEsN!Kadvgo}Cdn%6Y)|daz>tsSk z&5svB!W}f%lWJ}1kwWrrr->Ks$G#8km%k}9qeS5zMG3>@-gtSL3$c262I2NSbOYtt zM}=hF<6sImc&){5Vh7}Ki|1(f)`#o9G89bOM)^q6k>_*8{nFW#=_yzAegoW5IwV297qeB>9Q=CZo2K_m>8T z=ezI#B7NKe1HjwNZ6DJ$g3b^MA5osriQx0(K$xqb8~a}sYU|oaA03^e7OVu?(7XvV z1_L6J!I!^hnmRhC8N6;r)ht&*CICMOh%B5Vc^ARknBS|8KIoK0Mvo3-A0CNMKv7(K+|4B&OShYW?xO8{GqB_^_S&}}&bQK;pfSuLpfC30S%14qH@E@rR!%M6y3ajmHf;@Ob9XM(sm6Qq$ znN%UpJT~Td2KQNnIk!eC$iPq>{S9{I7}sB1J`0uZZlRK=m*98xok}WR!oM2Dph}A( zg=yj}W?O}Tn=!*1BY6?xQ2@~SkJ{^EC}8vSvJ@*K`miyL0MbF(%j0>bUQDK6BS*;G3ZLV$%mw!Cv6HmT=gi38h8&fBCuh1^bZ4@>X-Kj zuU}6g9ZB6kP93f}Q3?7zhx*Kxb5)wGF?`_4a?}~4%QzEc;t-u(*V@DT3P@hWkG zEyD^TC}d~KPwMrN;cJhCY!qQsofAdS(OICkPzw658ytpJv6|eIRW~>r?#(o^a-(*u z$9$usM36crf|SgYQY*q;?Q=atpat5Ewel_sP+B7h;)+erv?`7@D<}CwmboE+V%y5B z)$gnU=30U;avWY@zF!1^nQ#X7W={7lZ#IYtueenT)@^{-y%jd*Oyk}h0BY0b%Ok1a z`;>TIhy*;p{JOtZiad|7ww%v%gB^NS9A~8;L9@ny-1^26WYPt2DZBFDZ*aoyNJpD)j~K>2eL z5saq1WQLWr)OH;PB2&`cUEbJq|6=(%tt?@P(b#-PY&|$50+$x^-bT;o)i$d=iZ)q-6!e4rW z@g$9y^%uK@5{vs&sj)Vfu5K1vbqT&DPxP>yVYmrYB!X2tX+VAyHknZjGe)+Pz>%{6 zi~+Ywp1nwC7v#gkWF&C#czvl9#p&(;c>$ud1%hgbbMV+qslIhEW-t2$7ArTLj$z5D zC2TLudp@j^{Eag|#VvE(a&EX6`0?onlF8(0G!Uw;_~sr-gm=(79G9H_(Sc>BwT`4) zy1RRrs=qlGzaB4%G-NP2ID5w7?QICTX7HHUulKOKmt0-e>+h`7u55Xq+Xl+4^fv zT9Fm}+^kJr6tt&SyZa*WTcKDF5utBlNx2Gg9rP?3hqaxPSrG0b47X@`TI1l>fzhbZIzWJYuaLujGKb&M+Ev+b zYY@69hrtY|2JfGg)-}j@L4HCRqi)FD+RCvYWC4$h{}62jFJE>yZLmabT>&emSK3Gf z2hnBUutIctH39DtxZmgWMsRgyW^5XkFcNY+iFAuWkpkJv?aa(t1GHYNeJ~}3T}x|9 zy+*Q_3R$pPL)ZRCATg8+iu$z1qPwGCVE%Fcb+k9=ih5i2RH$TrUu&&$qIceob^cd- zEfLy|B}g4;4E;Nmn~Tk##=c~m-q6sO(%e3PsmJ&+m(l5-qPpiP{fnHWlPsUKE9U48 zT;ncRlxp)IPnD}{N;XPCTt>$ZuxlX9r+O2!7|bM~h=4Di9w>XW6$Y{7{#Ab6ej+sq z&0sEN?y?^;TKf8yW(HmZZu3DShx1X3Pm%vT1$E{#r$l#Zl8)b$QP{a!Z(D<-^F6>9!de1~!Gu2= zf`URxEy!Kw(rmF#zlFt)c_$6)+88%*J7im>GUUMfNjB3wEQ?8$U%r8nH{%Y)neyfK zzzExgf^;2eGMfz(RB;aYp*nt|pSU_h0!gh@V_d_CC<-F4|LsL(#M0ACQe>2b!1z4$ zB;2U+@*~Wc*u~=FGx2RFy7A&L?Z7JDOZ3%Zd6y^AfPifzZLY>mdXDH6xh) z5;naZ?PM5rcxZyh*@-MgdMC8b_vI<@@i!SnPcrx1s{*qM>z7bWma2dKS8XOKQsm5T zGXCNoREpE)MYG38bau8!W~|B-m_hVoIHb*;#JW3Bohf=Z-~XxAg7*oJ(vBrG@GS5a zWQ(0pSrp6>RVShi|4v6su1R%zdb-a(OOirp0wK~x0>!SH?+fl5csOC{7B#9Ypa0-?stX}Ei^bVr%A zUwbqVY8{E#{?XX>x>eDoLDCubC~U+>f?~7s-e236vv9xQfZ{Xby~_#>22{o%v&s`E z4T_woWHkH%NE=SHiwpKXIZW0ac)C{a7u9472t(mF#usSa4kR20d6Y=LG}FIR9A;{?^sEa z`oqNq7~HqkOD{BxylN#*9*)SPQQ4Wz4?9#BbBrMsl@fp0MZX4c95i}_>inX(;Y!WoY7qa%0a-|T7Z7Y<+4 zlxzx!VI4c<#yCAr?duO@3Ik6X*o#b2X2?Mj6uF5V6!}@ZTAD9B z;r2;-I0#VRJDJHE-RvUNZC4A*?bDw(lG^GzQQnz5I#dbEO@s77g9?3!u6&X@n^VQyja7Zo_FeBZb(4Yn}cN# zMQ#p+?3*rOq{9Y7L!%Bhbu~|`q2|Vgk_#(zT}@)f#3W~OWHtc*Q{XHC{S7_T3HL6! za#+DbyN@7%`NZbk!`bj0CySPL=kRY^DwJJppU z|I%3T?Rw>%o~KoEw%FJxm>pkM!Tz&dk-$VwOs%^Qdjg?XiU03ITr<69bQB|a%y3X4 zYT0%bBd(G7N+~w&M&UxXCH7AZZ%X?@Z3c!fH_bod;K9mZO;Zw6;}afIs5XQ+nVA^U zOUpaItc+5%pD1aFqA#aNs`VRSs*e21Y`HP;t7M^9m#(il%a#~3vD#2s0@3U9)i8uz zcWrR&;4*Z^wN5IESB`IcNAJgO|MgwPx}}q4{(|3c2*If@Zm-9zkmaCzk#FD}_L1Pr z?f$7^MY%-vqiduc9or#3R7z#xf+0)ZwbLKcQ7CNm((fqfq5#yRFjiu3`wYN>GVCQW z*Ev;=E4gGZp3bdX(URZro@+qv5^UoAngAnguJKSn-J^W+5lkQ%3tPl;$cU!ZQB- zKo(;GVm-PTN9lr&q9 zNN;m{?iUiqE~6U8G7LnAgaNVYFZES)@y68yf#;7qp5d#bwti&%`AzGFa=!)W#JhEw zuYzaJ;@#1%F231?1qu!KsRU`B(7WkRFMaoyrjmY{V`r4k0`PmQhtJyCw9Y;Ymqgia^h(nZBAa3Zg3K-lg1AyU}X z(ZB5>Zi`YU4q<253$=JIk4@|$c&##agvJmP4L|TAd?XO`yE0P>Sk!+NgY6VOD{-%P z=&Y0#FvuS-`E7;f4ia~ZX^&wc2;=&Avg|*7Jw#PvTIJT_Qni;H{(N9(shC0@OJS3} zEAT$D;DsfJsoc)j8(sQ0M_&c1laF46ntbVUNh%@PWdj=l3tP>Sv8 z;sZ$5ap$f6yq1E(e~XMl-E}6}ScEBkAh;<@_M&O*n?Lawub!|`l9VWPEBxhgl+_un zj_G7{k^Q}C4`kZ-%^IzrSSgcNWY72)-85G7sNc zlBrvZHl58z40`GoDFY3qVfs3OT6C<5k}a0JT*V>_w7aT{$(43g!JV%ejVkVPri0qQ zl%USBs$Ip?C-=?QO9)MVO$-WRU|s_p@zXIh(CwR|&!pQ#wYGeiVcak^ce(UE$ZNlC*lY1*z7JAt?lp!tN*>t znEhUt_kA!Yk~k-Yxl4`LJ=H;pwBwKi)D4**B@+q^aw|_HcNs3t%|rpM1sTvYmIZoo zFZKNRqMZ%XbM&ecSDO147+YRi?Wx^cI^fHW4J!O!+)m)?ooHUE)J(VFpG(M;nTd2f zUp8rdsT{vgh^*}erwoci23`~}C$3}iG}Kjp!SIy@or9IIvIb5+f(Ez z+pSZ$tQjf;xKQJY2g4QE~Q_F}jE26OvJfKjg% z0pPgG$?E~KO{Mhoh(H8}#_xb97zq9+i(Q}@L|T88iFzTe>emN5V2Ip3a7OQ8Qx~gK z{Z2qXD_$;tLjf#SU&gdlnaF4b+jT*;i->rk%HLaN;7i7k_o*2>yZyMJ;^t}mR!eOE8h5)rmsfkO>GSPQ7ogM!gtS0^a0 z+*KW-wu!dLXEJ59nY0-z))!3%TSaV}<5Rw*Rg`S>nxj1*O#%uNJ; zaR*sYdQEN|xV}yS{@vWyJ6hJcD37bED!#o4ty|rie%~vFS(H2_l7mwrFh?qe55Alp z(UYpv0QwkCI9s?(4<4K^DAC&qAfmI1cfF}w(R0`0gTg(T&H2Cfxu$7Btz*74tXced zr~l}~c=pi{)C#V(0|eDoh6dA{m8B(`U71$*m+j|~Js0q!Zc$b4?ZjQOh44sq-!$dr z#Fv4+lPAS`28u>9G!t_pBNiG3ypWp|n%&K>kGsdz+2E{E+%i9edB;vnLT>w^{=Jdi z?4~cZzQ??;+m-*d#mO;usUHIM+$eSRbX`8fa6ur$+@C^6(YU2pcG^c%M)JU;FH5*i zrjr(QYCC;bQ@wh=qTm;Gw`dTPraCQw(1kDG@tCOOu3CEc)w2UMzQjKFTHNveT0YfP z*-;joC5JWCDjX;FKYID7f0dp7u&evsTI%__)zl~mQvY2}HyTB8COT0By!Yt;gLT>Q z3(C>l-|_tbJH^bIXp|C4_USd)E#M}dI>`n#g8--*eaI4sc|lIq#FB}#8Q+|E%ArL4 zXeNzCNvL{4q|ggSf>=;AcaCs*jDPjR| z9Qp3|xhid;A7DZIWR6s{0cVO81j|x;&N0;QCu@2CC99+vDLm_-W&`>oXcQ@JQDn!= zIv{DXr@Hdq1tHkfBwbt`aUG{gfJGR@0H)A)lK)4{*y7c43 zHj#JIZy^YfWM_z{%8ao{rNGcvY5$7|(zoNZy zq?!GXM8TjsBb;4crp>$Qve$s>RX^x4iC*`1yqTq;(-F7DPo4R4LGdB2y>_Ep@d<9WIQt zIDC58?)X%<;9HnOO9^lUH6ch*Co0LzNCYy|pj5Zi9o4i+3L7NTbqv;w2~1IvBRjSf z>?a(E?^6m>8jj6C7Ey9nW?Dj$xn`b23f%`kYIhGgiqkHeK;_OmY`@P9ELWA))Q+ zlSyR-tU+Z|zt3&fjX@5ApBKXnPEf=LfcmHFpeLf>#WSwm#kk+c^SUz$Fn9q_MY{j4 zZwpI~PtFI?VHG48LGll-mhs7YoyLXaZNJkx)6q0DdIjE--xs@WwJn;$$f@<>I5XYi zvqhkSq|`6kNeH+Y0G6^X^#K*I5)PazhI?Pb&22C)nHX{0M+I*DN+mhqh0aO2&?S1y z0P8mU&xBr9GgGj`ssV@Y?79IlO?!d5h6UQ&J9}HUjxi? z)`U<~e$F@Q`TajVE6x(_Y8V#8unR&W#80DWn^89dt(7DZ!e0e_i<`04;bS3MCl=$A z92BINxOdX90p{rYKBE+~u0-vPP85f%62}#Y54yFyeLp+1eYGpSuH3oEmJU;dV<=Bp6p`s6gY(t%|&ICrRr~5m90Sw^i}R>pukFu zb}Bh5?1Yu28$U`>6x$T!4mHi9X@*x|e1RhauI#^!nY&qhGV-JM4O@K?9&X%hY4@CR zA~`wrYccbpY8eTm_I~C0dV&*!G$6l&+dpC9syq8lZXor$k$$x?r!}Hpi$tX=*x!Xhs<0F#3EgX4>KeXYrwzDLc7rMI?>;fm(D2Y?m~p1(MI0SBof>$~mcvPX z9qvqqwPU~)<2W+E$G%g8sm?BitLr&tvbkGRflU2DwS}OAnUPl`SeYT<2uCZk|u5WTL&#o=;b3IVt9pq$9bOHhXgV z+HV%ubXU0FT$YZMMhtrW!TkNPtR zRq)i!s3r%B4CS;MY+_X*{I=%+b&>rOCuQeSMy(NhFPtqLMRhDC$@C{?%*)Xjm*XCa zEW3>s7Qj4FR>gqscK(@@u*Jb!eVUD!Wdy(mx4zd0h)ZeYzv}_HciXR@o16D)ja;fg zsz+R0@!1@B$OvIBhFgqTxHk~6jMSi&Dp**kF>Wahj21&Ysi));5bubCE7B{c0*6=& z>1Sxu2rcNiw$?aGb~O)%-yc_oGFw|~@Eg`Gew*Vlztb=)(D-nrzW%}tp1xcsgKw*w*&DqpzRI^tr z*_Ip-);%6Q4_QcQ&Yqn{uUN)hH{^<5I zR27lu)j#EWAO@h!5H-)d>9HUR8WwZ#e8BQUBTd9LzoAg=za!mMCdyQymMRt;Sw4&072CSy~o)JU>6*u9FfXy9}2`YigRhv}5Wpq@ek@-9(E`3cDgV zB9PDwSB}nvf}s=>X;k0Av$o^)=zj#B?2C(mhs*EgTRSTT?d#T;GVQjK2lJEw$_^?0U0rbOR+ym|(P8p{LL@^7jLp-d*D zT10#QH(7Zb#sVQ6aXB+ckV~3I{|vpR#yI$`(<8_GbL7qodJVGuzW|CIwt}o~162CS zM8bM0N>J;M_w$J$sAj0ppuq9IX2N~T1ZEwmK7n-oXAICViu5yK!N*kyV{WMwm%W>4 z8mzG=vZdhl?Z$TXRt*!yP^fLR75>^riqHaUk$aINFV%^?f-J9=PyTJm#xk_izPm3& z%K9?nEczrrH2bS}@20KASDz;=`zv+=84yL?9i@eVY9YFMvc-=&5#Re`-fS^icss+> z!^7c=w`1{#FJ0%v*YIeg#h-Ab6%uJWF1(dm&;}zD$uFft3F0wx4x!pWS<~!kgPPtq zV$Uulnq*7|u@36`@b7q}SDnpqR0k44%AN?JbIP|R2*Z=){OHx~(b^h;LflUGznDeZ zJBD2 zxpmfi$0VaMMCQJaQJOF?fR~pYa`67kbdbj4^?)C^#*qz0z(qtdsyxcvsT%WC@NxQi zj4_;_y!`(Eya3D20vhJHvP3c9mD}8jrp)anK_(=B444G6vbKDgD8fAz~ zxPj+<p;6bQcyhV{EUns8?4XNL<8q^mLu64{~irC0qMxZ%>-&;;Ug z(?#_eRTNJnu$@65WgQEX@taX#U_Ccn@#3C^BIPEZuMlWkLIo+jYV4`cr33KDW} zxMMQ6Z$o7N_ir}WDZKWps;ejwW)L_f;wqbw zQ4hF4BP{*2sc1PJqh~Rkdy`lj+`SW^Rc= z;C{lVJPw_AXFs-TGQQE`-g)mH|B8oT{POP`s1(P-MN|#>nkr4ed1Q@89<+OEmJC|0ua>g5eme?V8e{;WG~McEk{ zqkGO1_h?$)m*w@oNMahqMcZtdm+d38f3OJ_=z5_g79>5Q1PRO(iiZ5u?Tx$zixfs# z%Tb~5Q3>2iG$M&C{Q0Z{QpZ8n-;ZFiq)D79R{?D;kSw=ectMcHZ&$3xPDgr|^xA&> z(;jp^%enDem(BB_Gby^INa`mpwQ6TZ@*Ugu-V$|A8U!3&DcM}+`n8+B7x^h_7N+&&Q-QN%p&<_Et0xNsN0k*b zsce%)wpm;lKNYR`L|1-hTa z!Im09ni`G});C!=u$ZdksooiBPJFx9x_dC{p%;AfbTTDF~@7;rcZ#f5-n#0}wY zk;-+6whMhQWLuYirO(yB+Wd! z_%&@^j2olM5RB*g<@oZ)_&PaxM6K#D_N`6+{QTyOOJRC3~2e(Cw6dd=*|hnIkj3| zq@i(3jbb-^=O%*o4Pjj|b1XF4zHG0%?#gv(7y``T_#l$SNSYUc*B(q`8C8a&)fj~L zO7kTVum(Ug+CjGX3=_vMKT$^37ahRt<2xgUw74NRCvHy!Yl_|<2&PLUV zIArj4Z-SpToKP#+v}6`OGDn5o_UewgZf*)V)<2X$e{<^1RwBhz=ID*#J|^Iu;#jRU z7%IA$%2{ZRIkhz)vkq~lG=js1++vUVg@ld>3pv&y5)&S6AIS~gh1#{KoG~f+Na`WoAWwgb3Iy?vep0K_)(TygvbX1J96;JD!EKq z$6R2=g6swEP&fc@#+ae@FX(-f_{xvs#Sv z!1&R2#1`Kda<{{6ORw*u{W|Z}1UNlSOZXp& zBl5w#b}vTLXbi6a5N&^L>L{86&LGtH{|{Gh9oKZ&y^kxR(y7uQY=j^o-AIkmqJnhD z1c?y>qd`KtHcCkeMQH{Mqy>cmQj%k&AV`dquHPG<=Xt%pzt2DWb9bC`pL1Q;IrsZk zu(&3!#?=!TUPX8@29^1k{efHE>(RXr4K?i!E7HMl?fTnQGL}<+qlyB*lTSaf{ftty z80}|3<`k3B1wK_98MitK?uaj(+(hlRFb@@~eBu9+G#77ZwiEaLRE7U%4}R*W!@bJre zP*Jp@Ny5hs2EB5{5NHa`r>UCT1&(r z`D)w~16TP#bf+|N#C89wP*OSKh*_Hmifhs5r%D#NVmJiX9G7PD8}*2$f-Ap4K9!V^ zPl@lNJ6Ecqez7~`CfscM53xOc&qeRCyV=sXfxgRl)yOWf7ISIO|J-S#%=p3X*zk(9+Z;{o4zm}AT8xcVz_EqMa)h1;WIx^>AtMMa8z{g zbrG9W;%B4K7_G#JWT9hF$6nazMd-n^3wojTzhZ|5MY-sMnbUeAJHgV|K?-=)S&zx; zgR#?HTH~{U$h_f!^DAt@JXV{dgcb&1#tslpqY%wef4dh@-yju7g7K}}TOvg_;@(#G z$dGdcrMIUj4BoZmi&g3NFzIz04qg+pE4~^@&XJq-`&n7fR=L3VF17-mFMaWBo@G)b0o8%R z^8U-}EB&njyN=SoPyL&jbd)Pxx-qH`$ynq4m2^t!h^xbT2?Guo){ z{GgK5VMBdu`C(ieuLAj#G_jqm_TV_i;0fft$x%7cMBT-eTh+D`@uESUz?0MKVMs-J zl1R-*D4ml&Z6W4vROd>8@nbr??FgAuKsEFB`W&OGKnxOMM9cjyoFTV@1BcMYsH1ou zpyLhIDtM`Xj=l{t>I>-}YHD zL1Lgc%s!VXmma71_=qh)VY7~g7>8)wyqtqd?C%9*Vy1#2XqMM^* ztnU1V?WomZ4n1M-y^9tty+@GPW%#VnOV#jv`JuJ%_*08)Q!u{v{y!Y|XccPM^ z_~^77@si@WQsE!)w#C^IhjwWp(19_II67FDeQ}gE;^2b0_E5Ic6dD^Jc0Y(M;I$qI*Eqa z>a?w8ZmUF?x|MJHel+Oti8Rms=jxQF?E+fCHe5-@USafYJ*ugl13eRl#W41uD)hJZ zz?1kGHbX|Fpq&yYORZP@u(p9xf>n+D5xS;^u`@o9|ME;+4u@JX#N7+MajoqWk0a*Y zBFIp9>eE5O&Ad6`YmraXCO$n54Su9OeWjZPVu-Z9wJ4UKAW;)bR%se-korc!8l1fH zMAqcR+oxZ|EiGxs$v_MK2kSOBY4Qx9Ge^jsLnr0(QE0o)rI+m{ELM{Ujv+cl1NgMt ztOO=F1%06D?!1NCrTGag(eCW4Z*cE!Lf^*kN3Rl>;QGX8?`w*x(>ySt15F#xB&2qA zz`BU%qU#u&JG=CaYHxVZKK9$n$Z%AZ%CXNHjHPv50RJTNxfrq7nFuf1mU>U@=~a}lDvVVdR~gLG`tcN8h&MA}x}P4{XV)wjrxrmrVeNulgA6daRD z@aY%el)Il#1*ApUij$Csq!D@-~{@_;fQTg#g^mD0-1-Eg92`Mneel}OKJ?5Zho=_2zH`jJ@xB4A^J!coqIKeTCZ?@N>_JW>?(| znBSiOouA^U?^`xkIAjN10xBQUeRp!JCcFKbSC3g^M(myroGwAOm^+~E%nYKkRe1}9 zPNe4Vv@!6ZyRQ`yPY>G7yXx5XgjzsenU2Hw5q*D6b!j0h>dr_f=yR_vwcg=YsB!+{ z$K&4Mk#{*zL*P|!48zGxhB#@dUUikX0T1$F_JcH?Z&vq>cM3V-?Gnz9%!x-otF}?J z#ao%bCC>jC!8Xzkn=xK~rx_SG%xzM(#}&6T#>lI$!dZ}7>ubzK{ecIfP6fvMNR#)d z$DJkfYhV)+Dqa-Q1;5CU+D_l=##l?xy$b}^`px&wuP~7v-0I|C_Z4GNWHw`WF__u7 zswqq@l;FLem7vjRYvGawij}`MV#Gw6TWaUcZ*ZGbTuz(hqNDJM{8ZeiK*ftz=DX!O zI-$huCEp8A=}Vxd)_>-99m(H+`bbkuy&jopNoo}eMiNK@R{X`U--cz5lo&^w{8{NZ!y_Ioy^#UTb}1>#oR+U7bB z7dj(48|puSbu*BQc@RcTF)f3MbXcDIpGX4+{HMC!1ekr1*BE1}D8E3SO$S70ATCf0 z-Cctx{fH|sm;FIQ)U#`op?#b=f}1@3SY5qTA0v9 z7E-R0tl!Gkx7lA4Yel)~J|R3Xjs1CB&u%t&UiJeiN{@IdL)<}LoT2hnFdvHzFXZ`8 zSI6Mz`CL50wkwJORWde(OxmE2dWxMlcPCegA9|q-X>20Ns>0G4HSB|pacxgsNYWm` zM1D1g(b0~CsS?F#$K-X=^sB>p}??01tsDot|Bl*l? zd^`(5I1G<2T6icRu3XX@DJ>t9HSXFObt`C|6`12MFvo{x$Tdh@hYq^T~xTi(Nzf?U*`8=ZUaOp?1q3>A%BuvdxC+2b0jtKVXEBu zN>Qp2$eoLAtg!oqK&v>^i;I+-n{pzIl;^eNeW#aLTe4WjmEWXAr4VU|=#y@5;|eAY z@p`?S>ylMkq{B5VAEKTQ{T1sbNn=xEQspnUm!D$v%8b9JPN&l!`ty#? z+xz$6B>Kz7@n9?#!vMnr!Hmj5g;cU6C+zlv$^b5&1E#K!g2j;3WLwPyiS_Z&j!%7d zKN$1_OJ58b8A-A1(misDqqYRvAe!!I>xg|OEl=NW``$zGyxH;2_wQN6&bBERwneAU zVU5^NKf(g5ZqB8I7xMTLE54nhzT8Ik;!;s|q6Z=Ly{FudTH&nE5~;iS1Z{kol6r8~ zdtJzvU#pEQqT|3Cr$*rcvgtY@@^bv+Y8=kO&GWW3 z>O=_0Q(gB|D27}j#{er}BW)>oIm>faeJ=vn4RJcWXT$Boaa z^`pOy*Gef}@yTDjt+e*t(X9?X-LubkU~aDhGozEKgBiT4$4Yp|=4R#&6WYFEF%k|a z^_|CFA-HrLZ9yq3N+ZrrEI(bOrsMFs+2bsG)<(;)oBGx;kwiAB1TZtGw)3iBXSyIt zZ-lNo%7;`7oFKwwU}W0Ewd8Y4jE)R^bjSp`@PWhQmY5shKn$UUEr^|N;;0BHe5nY=d-yOUoQhkf zl=bepyp?eqnB!1U9|E7MC)JKytfe^$k!boTll*59M5^?BWZmwNK?Kz&4dXXWI8;2) zaTx`mBHnV{-{~h9I+lVI@TG98&TyO4jboT z^zDLfG%>98LXGs`j<@)QaDu z(J^d@(4WMX$m`rv>z?vPn#6M?S`1|5xNEWQ4R(|Q0>+QcVe(UzVJ>k{4DHqLtBiav z9uzlj4U?0T?;5}2)`BO!EFk`bUZwF*diOV7v#R(KRF!Eaf7bGd1K)X_*V?%V35c>; zLpr|Bus!V%?=tqf_JJB&YJz2o=G@S{(;(^jJ#LSw+ixAFTB>bjI|7x8D%$p^(FT9B z;@R0sCj~urN$Ko3A&&OfN=zhX|IKoAg+r$SGb1ePQjC;VfSD%q7_f)RF#9}SMDho? ziR?TuMu9iFQ(W-YO%W+KN@7G*e&`K~CpdPk3P*XtQw>y*8cnnw?4y#!jGVzz`wyHV zHQ&bhFMLKpeAQBZJ>+yOZM(aU{ce!|IF&lO_}bQXN!Y8&gZx=Om%zqpX)uUZT`7q0 zGzlIDvpR`w$-9eRA6G1f(6z?rti?^s!i2Gia*rW`tW)SBLvL&7IlT3;-N#|g3i4ME?gUIN@AkPFfhQ|15K@V~L~#s}Chw$yHzxiDHc+ zf(NN*{tnzAPBdGq!iHm~3mbYhci2J~au93r6f_j*SWFmJP)6-HAI3au`~=}r)Dkz1 zS@$1$*RV%W!>A)aBBEZipyPL>MqWH~o-oy@?khq07$Cou*=54OHJcx3+}4urt*EoQ zjE+LTX;4{d)BdD|!7_pW%P~E(CjHyulyP|dfy+Jgruw}58lKR}$rR0ZyzR6c;e0bB z%?zyzoN?n?n#7ecPWlxZt5e^LEDEWdS>w}C#<~#;h{}m&fCnA!PL~#JHg!;Ugj$k{OOR2CtSxD-N?w$w<7x#d1;YYZ ztR%sITj4_kbMzs0-sy5S_P_u8XIKp1<9TNWDYULA%57t~trW4A^uU0^YWL2wHvd?u z%GQf|e;Mcw8P&L5VjrB;lxy_{)1?KF=HM?PDSPG8bn5DOp_)GS^87rD0>zmv|q4)o3?adR;ax>$GWWD22H?A;G_{(bxi_i_{7q_@JyR1dykA*lw#1@U zs2NDOk{X1YsrKicQuyOnzh1od{v!2Ss9DkoR?I4s%=OfkEH2PRRNQIiakRqPczLLt z%Mo{+&uQWyVckP)f93n3^m`U`88LCU9uNvqNGK8e!IGu);-}|YFtdtOZgknW9}RBI zNugnE;L*v28+2G@z3%8?s!j{Exb`33;HR; z428*Q%*i*zVKEwQ{Of6_u__|mjXx>h|wtoR!)t!p8Ed- zUhM|Df-f#q+(nwiK1hln*QV_oKQbg@P4E>~O8Ymh4lP+{HivA;*YWbynDJ8@%d>I* zd#p>r3gj7{H>7!nXm9K{tCH1)7U;dHQ=BH|6|V@bxO1-b*}d*Z19gIadBkLSYe-97 zd@A#^gFX09k*iXBGdJIGYbc-W=GyrWQY|Z;6D*AD8i^gf?}up*DCQ9GwHNH9PWJKP zhHgBiJ#u1eK|=ieWLrX%n}ed~+)dRZ!>^_*fegM*FN}4WcmdAOARM_~T<@1#_B_PT z&W7{myrcLp$Q)R?;mHEf3r)Sst^+Qn$T-*uN?Rye&{*!7e&{tbrPpcMH&hD7lu+$9 z^T&18UypHaE|H3Z)g2!mkN2B2G0m9NFbS;;-4WWDFeYsN|SOlI!@#~M;4jff$46eZXY`rT_HFtinWuP+diF&>+Os;PF?t;}T zmXPvSDsANJUP(yfWo;zRKmShrX+1^?bI!Gk9hAy>v!y3+KIi3Z>X1IuS{R zgIn!>_G+%zyyGYo@Oj()IW=l`|1?l<$6d}%v_BeASKVJbnrLeV#;dxtUj0#sD;IW_ zoBymaUl!cjPp2UBWoaCbgO@{|*D{g3zX@NOi}$qmb*jHVC@F+wo{j0DZjD^9j>sfF}7=F20q8;4SenarR zp<-reCLr(I^ym!QS-*^V$!uH6ou92$N;6#$>WB3}HqsrNWEuAiF{ zo1e7$Ylx_B%Pbx?)8zZez~HLXjE^m|GjHT`-~u$6dMQ0qBx@J3riUlqWko%I(uWPO zzf^+NrN2CP(l|Gv;B7VSK13K~wrAa4iEwU|VZ0f=9qyi}l?<24Am~JA*u*+7%zSH@ zOt#9t9(>YUFo;%vGDOIzVnY%rw(bsX4CY;{ah+uc-pJqt%uRKj?Yu4I8q+;}o@A{% z+&$S?=|yQ2L3Ri}rRRGk>A3af(@ZQz$~y^|K`}C_jN{1uLDs1w_X1uW9wUH*1o+&h zaZ05_VkI&2x3$$kc%S$fN3b2Kws8s48pcwKmqik=y->@bqZg2m+iwa_;>7x2Gl|)W z{OSV<9O9yAL5!S5&+Ld{tmy1G*!0b?tBwc2t2Y*f7Kx*sB!mZ0b+)!~KYA zq~~Ap!QC642FptDpv#6G88J!0WA?hRfW}Xw?x{$(fKsoL?LZGwrN)82-#26&>XIXk zKX3-sVF8 z`}zkp8#_wvVn;r`3|re{!gE<;MPFs$dh{(+EG>=lS2u<9+x7MO!g8}5JC4QvV(gZZ zE4{Yw|CtCTso9tjp5B?u@!BGcUxZ@pXKtIe6BsVVsY3+!#r=fTalsb>jpG>W3rj=L zb_aqPBuC_eMPPmg*UfmodE6}7AAv>eOO(+oR5|Gxz^~Fg(FIJvhO?A8@q6i|!XaJn zrB_ILP#(Y8o@Zh8F$6xe!8=&_`(FFQH3(*CtCHTO6)R~7qMcbP&68=+JvX6}ODzLN z>~YUN=0X#0_I!<>2Oy_Str?olmx)wJ3-gCmz4m)ud7LUH8F?4y;;~whw>0<>O7G&~ zX(m}5Uy=vH*@gQb$Q~l#(x53d%=b+XB{Nj;K0SQ7>e5`_WqY7%@FK(v7mXA;{v`y% z9PL{ot**~kEa9 z&sK^5&bag)H|Ki()Eoq`#-GXB5?9q;=|yLl+-f*W0e&{{P1^=HdzAV!OHGjDiLxoh|Z9db*;ut|gc2PG9Pq20;6 z9dlp90zp%vR5HnN-zfZ2KvU3q0|?j2&ur(&2kmS|=)m1RscT2}I61zrr)XgH$}foA%kM`>db_-=zo3K< zjL5Y&&j&8NF4-aHt-264zqoVXSc#>dFnH4sQJ>N*Wxs9bV?Sh9U#MYLU+-_WZ~1-Q zpfYHGVK8sN?d7b!F+{qF3vkbBf=Zwg^|kp+qd=4YW0c((=L6|#^5IujxYl$dAS2Do z!sLXgnk^a;Nc?n=v?o*KebiltIudAo2_lH-lXpVLnzbm$jE;B+x`{YNRl>8oA~ZFc z&iLK^5FuZ~m0;J`&)YE1Zm)Nf^xDh$wb{${hgK&XQ}oL^B5#GuomUg@8$N9_ut$XP z5Pnm$^$5G%AowLDGY=1R1|`9TVPHnwQiU|J8dmAV_Zg$@Wg=SRQ-1urg~Mp4~~AHv_uW-vFtmDM9CPId6qxGrUu~;6joIH@;rde_Sv0Nq?(Uaqm-C zOYOR{(03&>_M1`y4itUN;t?^;X72-sB`cS=f+n84koPe+o?#0bY4L}c&zQerv=jUs z5i}i>=UR8;F{98)YnSyxmssA~z-HLOz>2V!pd=*nTS4bfh7-3C4w#vj1f3A*K?l88 zJE6mf1V&N;c%CLO`62fx=U6V_6>1w)<`drUu7N9=1=lVi$y#4!b6;g;J|Q5 z0`Qq*THL;<^3QvBRl?0SEQj-IUBb;e>rO&u&s9f79DAPs*$zXvt!kE?F`_!Ru7>=! za($yNAo;a&twD7m;(JxJvwaHE7{s^Paw4>jOO}!8jjYY*1k=8pX_AR!meAVaKUor` zGO3M&{b=fb^iH0*Tg@|A>qqZ3Y+dxsTo!%kZu!*kZ7PByC9~uOkyfT)vgrX;uTp4% zWO>M5QO-@dgiSuLvL_3+nJFDdY$t7`f{lDD1G>BhV0=_pgV&mMf0oQ*?zD5l>vF@D z+-V);x`J- zn&S|&qljZEa{Ulujfp=mDAt)kxnqmz!53To9*>@ylT73HA_`d4D>5iH3VT+sZJkf9 z>`5BtK5@A-^HTd6+tpJKzh$m{jiuV)eZEiT$NDd)>={#X&c)tov*BIvb+mU}eaOpK zgPi8V@ziKS5fIU3kL4) zCXe*5STnzmyy`G~P0QksjxRVvSr4Yj(hO%M5q~02xpUsiUyamUy7f|!Pg8za+|n){UHu z%4h9IJdFo;G$6&Omp>Y<`~V`dBNHi#5G!Xv-B^rK%WyL*z>6Bdr}AV`a0?PCB*E>t zlFzK{r&8lr&HrvYAhX~4?S_eE!Sw@W{H8cHOvjhc|CzC@GAbEv{^tKl>t1&+XNE$C z59{A}QLA;aZbv2BGTkSgKMn|2>ik&%RDS}K%aW4{8|Bab5~sS4a@NRp+x>%aero28 zW)q*^4|6`Th!3AVzC}S=v}Od@|L)xd{B#`1PY?;eRJw#*U}p8qq(ZzhyKXLysr}}i zoc`RP2Nf#+QmS?=i>9lr#S8FKH1UUd9<*R{_8+1|+DE|uyyXXQao45f_ITR~khtOn zbVi|EEd&3SvaTKqcEprb@X>FWW9%5Qh)1+8N?6Hm=qp_9b~EHdFd{5J&Xs}fQc*Rs-0o&fF+ciI0d{barxR2%PisgCc?O#Qbwk&om?q^x5dSfYzXTRrd#gf0#dd;?bAB+tqe2a4k24 zb5affkm~&32v0A4bKNVna2j=Y-m=c0^ywHY$#zV1X3nR3VQgzBszLM0eDDUU9@G1g zVU2ok!sHrNOnP6e1V7tQxLOG>NvN<}Zwgomc=^Rf!ddz(V8UcZbBgA=U~f!DJDefz z(O|L%!#|htX$VKInFBk0Eg{a;TP)#EttBtC+^UgmqyBfg$ggcBmAdMIGW#)M`^{|LP=-pQm82^kKi$Im5o-aq!>{ zI`X|{;)RjZ%d{EG6KDCU>0Ug&q1`-;j$p1kp5hni^rLY1)d)jWKE&Mfb=582Y#(;!=QM#$A4$*|4AA~W{Kx0Slz%pf15=dtz9hk!F>1BjvRwF$Zs4S-tnytG zH3RG?cKzS*<)o;oa~%EkN&Iw`vOLE${P=ME!G|djO z17(ugz+2y-#RuBu9!UT`v;6}=Ms}kwQOQl;68_jFXVi1z5y%5`EXLy2v}Q$)puLv!$k>>g4 z?h}RwT?Wjy)JtYNKHA+lqS!GEa<&D)qnz~7p`F%`E!FC>ogfgp42G-SP}>XY_CjBh z3JRcAMVeZG6Ty|Qr^dhy*4rVkgW>&}@8gyTjoDr}1R<45VT5gf4`gc}YK?HF9R075^P@p+RjETsmo+$JU57T_Pn= zgj;2IMxCuZIs{h=wV2R-Cl_1Ib!!h{W3PKr@T^|cz@19j`6cEl#Ic*=CtvxY+Mdgh zLGkI031p|8T)3@nHi-OS#$PxkC1nvUrQ-kJ9p%Cg=uBQ=$PMHQsPuFKC~HQ1<6PK) z)W-wIy8f>AciD-zXBr!jaPchqjq~5Gs=fu3?H=7Dm8b2R>kDtzW<#&9*>`+lJAODo z%FRC`q-WK0e^A%_)SAK~_+VgYN|9c6#vVJHi1n2z1RgHpGR%PQnZ z4SM&?(Qm(;Ar)Ytsh7@@C4z;z2`$0YJbu3S&EbM&YIOUbZUv;l{(v>BlFQ{^U2M*O zMe2`#Uda}~_1l%?zM?H^rUUYsba**)_i}D9RieT^qx|+a`bU2wr zZs;p?JXY<03D+JC=b0)7b6cuo3c#QT$%I(pmWl23uf@guZB~lQKi;Xr%u=F1PJaG7 zfKnZI5W38tq^hCd{x`{sfTmme%q4L^f+gfTV(g{pruXj0n3|NZ)<}XcrE4Qrl6Q3? zfuj_}59&^wSzui3skYzCPAq0h|SVzwJj!ARVeVao-w})G)s>nE%9v6y#fE?5l7VQtyI@@2^KVhp`MY_DRhlSZj299pK%scXL~2?0a)_%fYF(9D;aiLO*6%{)MP~Juvq!`A?ghv! zyMs4{E}1=hh5w@9G#%KW8XlvdpC=Ski2)hGH13Uifu-GqC_DfFxaK!&oo_%QHC*2V} zHX=2Qm^ITKrf8r<+J87=M{>alh!Ri03W>F*w zu9ik}aBw1UAI2hRq+H*87jIAuY47Gm5qA6K61lYjJA+5pU6qdi6!^h1wRL<#KV<7m zB%7C9EE16SJ$z$KKrVVyo?LiF=?4eBaM9l9hu4%VESrCRr8{TekSb#3^qygQ2xKMO z*#Dx6%StYpJy0ejKzpE5N$|!`akWZCT0E!ZZ7W4%?_Eg$VahdtlU8W3k{+BN=|5B+ zUU+XaZuK$GjSG)}hfNMFxlhvpXZ+o>Asr0T+V(@z11xkSx1L65bM$tGgwSoagvmF=Aqpn~Hl0&1 zM0V109Af|1x?G$6^*^|*s1co5j4af;kV%bKv5!r7t6`WmNsB9Qr1sKFs%f-MauYVG zzDC6=`)f6VtoQo|4b0a!R#w!h>RoN)8dlVqnU-WlF8*W*_%ojSO8Y{Gz+DdMmBwzj zp9#;5GY|AL@(CjV&1^p#_e#bLkfYfQ0jdBO(8SOg47G))uG?|p^}rL9xTTg1?MJUT zPkv_h&4=;<(l#k7IJfZzzthhA)G)pzG~;-PIOMvv!224JtF3=Vkyy-h#y4fia|Rp+ zg#SdquqOdwU<$@kmMiHkNWtzF!Xwn}FNSG8cbYiJ?RI|OZuk4qR4E&4@muP6*^66L zrN_@EU)xr^CDH9~`syF{iuGzRiZyTz_w$)Q0H~9B*@}@{*h&+kS6yu0ZDIrR9U{vu zgVD|&Wf-rJdSW2dbgq^9idZJWSv&kTh}1D3Tuc65cfs!eA2#eE6DEy%xZ-YW^z3Ym19$)|b+RIkIzkUnau3ztOxZg)>P0odHTQyH) zkQw-kCoGqlkDWhBqsP8-~n8kKy7=Gigl`s}#=n`s8hwBg2{(?l#eadW= zyYgh#gmSc&^d-KS>DHaaWfuwyCO#sGTGEMA0z01&Hu7q^XDCKPs@PV4q#Mw8yF;Au z+~afQd051>zs!A7lwHXJ1mnK3^aqnIHKtG;FS(|MDYy?(F74}%8`ILw&u~~%>{Y{5 z={NH8@pogMaLFSH506YToH^tJh_zx^9$H$88i&37n?(!d*?nJpERrI@$A!+OR2Syr z#dX%4$eR@^14O_O@;sd_5wU6>U#OcY`Q@R-W`^(X($#aO2hWKG65p<l>5;uuze;CaBa_BoQxr0a>vlN;4El0$UMc{A`HPFgC&lBFYx^ZD zPKs?MP|@_fb{1CeZg9cydJuHbJjeTbO$|OL(;WBuW+&d!UY#K8ed7B1;XRYS)BlnJ zugQ0bP6Hz$7BK(41w!8boFo(?{Rx$O^L21(g_@>&cda81hbhDIN)D?J~7z zSqoWER?t`2ODXzr@$|%A$~!m8m4PrD9~Tb5Jp(E$Ijy80XuF0axJ6wW0NY2A$>hRK zG(ru3^5e8^p2Ziw8h>njv64JkW3d?*ZpMplK5e1n6S^Y(Wpgv{XuD{w;mjQ8<#R!0 zEV`E?8z}yAimrju=ci$-vSpKP$K%6SW5a_4OsgE-Wu8cZftAaxFjeX#ei%hH#ese8 zVkpPENo$od$BXq&9@<%<$yT7VjsJ`{=GEb(!HN#74hxr}6f zEZSL^&4{SzFFWz=O$7#r_Orbvr4H{BMJ?e?O@?U-4R1v&&o8ykU8DyQIq7XPwv#fG zKL2XsVYF9st9`_ps$Ytk|11zRpK2ELuXw)CXRLJM{%6o$x@J@C{(`yTtgEsSXy_Gi z^+G1w`~9R{(Klbig-Hw;JS4(z>c6fqIq(U*gWm)Y~=6K(na(bv2oxGzUV}-ou{&-Yc`N-aI z-+r|Vun88!A#iJo5uiDFr4YKQeK&U8#wRO_b&8&Mcy4IMgKo8Y0Oh^(|A$yKIE&#L z^_^OxXqY+vrrUVCTBLZIqV@_+NLI4Ih=MCI{7P$|rRp^I82 zcFZErAeO#X>~L~)ULv9PTF5QR}I&i970jum{2*vJK>7~UU)`ptqZy8$& z@gmAU{e0dPZ~G_#iyYm3G|d6jKcZGgb5or;4qZr{O~sify}mpaXpdL(Ux*ohZ;|Mq zH#D?UbjxEz6}W?JX$A>)3VpTo4hFUxStqIMhsjZeH!3d_{GFYUG3HaLk9JJ91PSuB8BwP9!j^pq`$!dQ51d8Q=fmK>khvjEjiK&}v2Wpq1mFK6QDxVB_2!7PXr#C`$tlshpW=-PF>!iz4uHoqVG5-97GWEUE=D$95n zFhw)*>(%#ak*g|W?zNWaTD3``ibp z(^#gvp&_^Yeq^J~qyW7K5Af73L!juaJB8fag@+iX`EvwdtM%3hmgs#cU_ZO=p%S(v z3W+t9i)feZfN_z;nsVkyH&-he(jCWxv7|D_%A!wTnJiJX8)dm?Y*ZW*v`b}l z+W6Sanvh-gb5R9vpm`bwu9||#=*zamn>Sp!a=TnpP5CL`MOO7!MGMBKpoc?F{`b2VrwS0juhvU$Imf>5Mr;OMPzt1--As_dP)-GnU)yVYyERf! z8Hm4A|F^aLb-JYw(9^Vtpyk8|LF${qlhTRU9%!iNl5zsPZWi#AFIcGPbcGs*yO~LX zLFF|TL!+a`a(x99#%jiUQR=D8CQDu&w}>t&x2-Y?h+pMd%Hnv}J-MUXu84 z#s}+s*1-5?Ru~0*oqWrEPoCf94Q0V1bp1iYd27ebjyeDHE4_+V<{Atw{^L|;eHfFm z_LJXd^R0b5jEw_l6Vyu&)VeVo;drO~zGb#<;_ka_j!Cv_i$7k_gR7_4)iBkZYPrjy zuN^O&@YYLc3KJ<_IcJ}G>>IwC<@!O2f?|S4)*h$bj}DgN)yh8&qhF_9YB5Z7_7Lr( zzb``zQR`!N1KMQYF=mpAS-KYCmNsotriI8{(>f;tH-bL=_D|x(Thh6-MoIREU@@nP zq@%Z$)q(b{NXN`?W5%N}v%RxB-yOjXv@>_5T365L7R^g-BeR`BUm;(jOp@W;<#xZ~ z0a{jt>Cs<@zLG@4xsUV!WNw;;lJU|hPT|ITksMH$heP_NbH!J9hXC0@FQ*%11_ThG z#3&pneZw?0o1~1HBc1j$wUlm{zyPy(QJ+|{WE*8*qn*0XwPs$YnME5cDjlqEg+#Vyw=kPjtb8YU59_t2>=&VFUEQ~` zUi>Uw0W}*y;#d^3`9{;v?DpNQ}zOyBq%e{6PLc3E>=mp#Ip<@VaehTnO-hXkn3KPCEbk`W;0+ z`*K<3SJVBeB7}wIxtcMhekc${V`t7ssAwPQV!Tq3pLQNB=6YPX40kK71s&YXzT2%? z1wh>KEseR)T^z8%Z9X(0ZJ5FZ`j;lB`MdWC-ik?88<=c6{O6xB9R8qZ=rTZX)~y+` z`wctE@!tpmDBxEhqs#+69@b6~#Qp#w8*K9Rs9#6-GBR~CpvKd!f|zoAev$GC3)^o7 zo*ym8F?y){(Buz7Y&IXLXlh2o<<^!&=>J^3cB2hmH*K^mTOEjCiN!l(C<$y5j!&~A zZ_1sN!tf4Mdep$Xcgvn(YHyquPA}6fT*>@}XXit-!bytvs_q{YJAD4v1{`;CyIKS2 zluJ~NRP4wL%(Y?St#Fc{6VvP;?E=csXN|g27o{g`rQ|qBmXEmqmIBE7lpeIo%4e&A z;rMcKur-G*S_2zS&X=K_@tS&BV{ernI?p_%H7>+c;t;C!VRt8ARy<}R^l%lo9Z=3rhM8-KOl@{dNZqw zS>+6Hdkst<%p}4Yk9-ve{J`V(t-)E^nps@<-gMy#`tMTswL5Zbz}+Td&0SV8T7ZG# zrtSy2K-&00RjL|54^4F(aj-8yh8J@ee&?QT5g9;}#OZD2Gp7B*8&jQa{e$BCF$*(g z$F7P-JncE!5_D^(%UXkm8%zLnaUo|9*zH3-a5GOA5arRm0r-w30V~<_Xx-A>^!XV3 ztD|TrOcroDI$9?sJ5sH37+${Iq#8b!03^27`0PtN``%v&c;g5p+jOwuD0J$WvBU`9 zV2@Q|L2Cv?tM&Stjh?6%`O9ei4?V{lYWx737xqhHE<004O8^N>ZL3f#VB2)cFg4Oy zo9cO{GwEB7mIK73rN5xF={9rC z?mecTRN7WAz+6cB9P1Uee0MrME9NCqSsA&as9Hn0a#1w=W|JZP#(u2Ga6Da#*nt-0 zc}@u%P%I+U;VBM$)G$)7ExzG57}R$L_UXEvXW_D!W}Ou2fu6R#>Adnt69a5LTa@JF zvSui(Ti7&u?m6pSvj|4z+#LSqF{Ry}Q7z()ZN!6nTcuTTCy7EH_TtabN z_^HPRT~<8`{_bROQt}hArj+Dx?Jtr~Ja|O78AE;<8x9gypVeWSGtdw|R_e313^WHe zspauAMUN`3b1b)c%>ey3u8$kj?An_h2Lg(#2~_>Y7G)XEYnJigachzzdoFyC>2 zpKxIe(B7=?UpLRMu`t9(6!iMHd$Zgj=4X3QP@EbU&SFp&BTK)Qq;rF6dt$N9>X;u(k=y4-KMJxkwrII+XI60k|YS_*|P}@3> z^3JBm_`?^vidp`C{z^I@9t5aafBi|uC#cgV?C15@ePk|@3(a=Q?Bl5Xs9&B0O(0&( zURVYS;-2Ym>OB5sw~xilnm*6?CeuoQn-6)vBb+yto>CTkydS}?^zvp=&WLG|15!|k z#`t{C$hhV?Jw;==oVbhedVZ0cMe$PzUHRv(uF3aVUuH#7Is#fSImHtT#78yE3)+Hx zep-?qrIM_$Csr;n@Wk@cCRN*rw-G>4NXrskZd^a|4SK6<>+|{{oQu?~6@Trzudm1) zN^fxg4}?7~-!)yU1o=m<&&TWz;FKlI(b!d>W~zKI2mUk};1%CIIDX5ny+8FR-@oBL zoz8;z`jK$+y(E(4xexP`t3=n2js`d0Lfo``$*(uRsQ`qCc{4GGSOD>;VLqhC2$5X5 z3M_uy5q$Nbwkp|Onv~!>BJmkCMB%Twh~LI74ry@cBG$$zWxZBYs_+kNJKO5r7j9AN zP9MH`_^98Um#MpcMSJcWM)$HwL9|^FzG2EJKNl(sPookDslKV0?)4>2mx;J}Gb62* zO=6Jz>U8%$76}SDt+w=se2@TmN2-V0YHwNGH)<9K3Y9it;+>X5;KKo^Ke-pD%S@y| z>}X9>BS-@%Hv9i8>e}O(-rqP`*$U0&kXtt2nbL7Yp-i>8%v_^R3dj8`CpMQOQ!Ww9 zC6Nuuy`-kxj%iC0MVMQil4+(o4!AdD`i|jAhtBM7 zPDcNsdA0>MDO8D^1P%8-6#Z#%>abwa9+4T~^sVrAYYez2pgleMZ@wvxi)>>is(N3Y z<9-Zin@nEEqAS=ybpwsuSTjbO6?aq%aOEh)CL1Blm>&Qu6ux-cP z?w7}apI3u+<?8 zN!)1GkUu1Hkyc!I5=JLQ3~vDG;j(cLWrbMSh1|3*8pZ+k_I?E4hwvr3VNbOcT$K=xix-&OUP ze$0Ic4hBvr2W$G9D9>JWe_v@K!j6uIIk`LYR$E@X9%@IKgDakUn5NLxQKK_YK=1eK z^ubfjx_@qi8ZslUsWKI~PouY&&TItI2lug17b+$)B^5~GPN%nr)^W~&j4q`Kbl~+I zxHp1&_=}d97ak6pmkH8<?6YQ-E#sUi%T*NI<8Zop6GC$W#bs6UIhmVN(e|KDtC$KL0R-x}8}<*0Kfq}8%kDdDcw{}&Gr7~iF<^pC@gRIlf{!9*W&W0> zw2>l)Go#mRJWDta6LKpVU?9r;9-sj)L%6${A0m-aiwyvL1(RJ(AF6sN99xi<)1n&1 zfuWf>BoPq(YzraJ!GRY) zr!^!LBpe=j!L|_ZXVrw_!;<|h$zZ>3sc!D_wm!;xoZfX?B1~Wum-6XIwJaw*0x4~U zfstsTf-w19=V%q4Vqx##?s8uyM1d;bw~$l19Z_`WgLLr9ns?ACP=^n4J!B_g(`Ex; zf$8CJAFl+iAJi6L=NzP~DGT$I-5@cUKa|1$v2~7K#bMvg<6T)PW&7gJD_pJA)`=5xf@<0TQ27#;!nc9vZZDaKJm2<$6C?pN z4vvntsmfuXCb-2Qgd)*1RjlXY>q{{e926@*KVMw!{L<&U77#kwspcz}i+4^|HT3*K zP_(_)lhbj9Z>FP$3gpSIv;BQyG)u1RjCgWsqNVaAR$>iITcB#V!0biFe;jzC+b3i? zQsp-ZPHiR^VADv;~#6n43|)A>~qpq4|a&n`y91@Io~-4$5~xX#d1nl-#pD1x7RF> zxNVZC?ypqfPEEB<2R|;ds8JQ?e#_0$G;en{f0YNs%yf zX;>jeRwsU^;cx@qc42=%t$($-Gx;+&V$3SlG%ZYc+!O z8g2sKtA;ki=T(&o&czr+1)HbJ33-l{DxJEyPF>kZ=q&fI&@}covVd-&Y;8|gYMRE% z0F5T0|4;z>A2GDwE!PVm7mi|~cy#N$B^VpJoX7aSJb3T`H(wu~?xs58IW#*g*9kG6tUxaF$rFJ1{csh+$J{ysllIzMUS8r=NlPFi2+m}*pSYrf zVu5PL`7)0kqnseeOV1~5$OFT%&b$G{;C7B-K=3Fe(%lk7AU$(=`%|nM3$!I~53!R7 zJzi`H<=my{8_g_Q)WoNz@$A4MTbs%qKCBsP5zRbEaa91YDFxmwapp^s zK?pm#6$~>(I^?KeMJ4^cKSK{gPvxtyLvR&af*?^?cC9{VCNu6TNun;~4U{`_yi}A3O&SQz&QCGA1e_?r8fbosRqZ>TBn5u dict: + print(f"Training with lr: {self.lr}, backbone: {self.backbone}, stride: {self.stride}") + time.sleep(2) + score = np.random.uniform(0.7, 0.1) + return {"lr": self.lr, "backbone": self.backbone, "stride": self.stride, "score": score} +``` + +Ignore the `task_id` for now. It is used for parallel jobs. We will come back to it later. + +````{note} +The `name` attribute is important and is used to identify the arguments in the job config file. +So, in our case the config `yaml` file will contain an entry like this: + +```yaml +... +train: + lr: + backbone: + stride: +... +```` + +Of course, it is up to us to choose what parameters should be shown under the `train` key. + +Let's also add the `collect` method so that we return a nice dict object that can be used by the next job. + +```python +def collect(results: list[dict]) -> dict: + output: dict = {} + for key in results[0]: + output[key] = [] + for result in results: + for key, value in result.items(): + output[key].append(value) + return output +``` + +We can also define a `save` method that writes the dictionary as a csv file. + +```python +@staticmethod +def save(results: dict) -> None: + """Save results in a csv file.""" + results_df = pd.DataFrame(results) + file_path = Path("runs") / TrainJob.name + file_path.mkdir(parents=True, exist_ok=True) + results_df.to_csv(file_path / "results.csv", index=False) +``` + +The entire job class is shown below. + +```{literalinclude} ../../../../snippets/pipelines/dummy/train_job.txt +:language: python +``` + +Now we need a way to generate this job when the pipeline is run. To do this we need to subclass the [JobGenerator](../../reference/pipelines/base/generator.md) class. + +The job generator is the actual object that is attached to a runner and is responsible for parsing the configuration and generating jobs. It has two methods that need to be implemented. + +- `generate_job`: This method accepts the configuration as a dictionary and, optionally, the results of the previous job. For the train job, we don't need results for previous jobs, so we will ignore it. +- `job_class`: This holds the reference to the class of the job that the generator will yield. It is used to inform the runner about the job that is being run, and is used to access the static attributes of the job such as its name, collect method, etc. + +Let's first start by defining the configuration that the generator will accept. The train job requires three parameters: `lr`, `backbone`, and `stride`. We will also add another parameter that defines the number of experiments we want to run. One way to define it would be as follows: + +```yaml +train: + experiments: 10 + lr: [0.1, 0.99] + backbone: + - resnet18 + - wide_resnet50 + stride: + - 3 + - 5 +``` + +For this example the specification is defined as follows. + +1. The number of experiments is set to 10. +2. Learning rate is sampled from a uniform distribution in the range `[0.1, 0.99]`. +3. The backbone is chosen from the list `["resnet18", "wide_resnet50"]`. +4. The stride is chosen from the list `[3, 5]`. + +```{note} +While the `[ ]` and `-` syntax in `yaml` both signify a list, for visual disambiguation this example uses `[ ]` to denote closed interval and `-` for a list of options. +``` + +With this defined, we can define the generator class as follows. + +```{literalinclude} ../../../../snippets/pipelines/dummy/train_generator.txt +:language: python +``` + +Since this is a dummy example, we generate the next experiment randomly. In practice, you would use a more sophisticated method that relies on your validation metrics to generate the next experiment. + +```{admonition} Challenge +:class: tip +For a challenge define your own configuration and a generator to parse that configuration. +``` + +Okay, so now we can train the model. We still need a way to find out which parameters contribute the most to the final score. We will do this by computing the shapely values to find out the contribution of each parameter to the final score. + +Let's first start by adding the library to our environment + +```bash +pip install shap +``` + +The following listing shows the job that computes the shapely values and saves a plot that shows the contribution of each parameter to the final score. A quick rundown without going into the details of the job (as it is irrelevant to the pipeline) is as follows. We create a `RandomForestRegressor` that is trained on the parameters to predict the final score. We then compute the shapely values to identify the parameters that have the most significant impact on the model performance. Finally, the `save` method saves the plot so we can visually inspect the results. + +```{literalinclude} ../../../../snippets/pipelines/dummy/significance_job.txt + +``` + +Great! Now we have the job, as before, we need the generator. Since we only need the results from the previous stage, we don't need to define the config. Let's quickly write that as well. + +```{literalinclude} ../../../../snippets/pipelines/dummy/significance_job_generator.txt + +``` + +## Experiment Pipeline + +So now we have the jobs, and a way to generate them. Let's look at how we can chain them together to achieve what we want. We will use the [Pipeline](../../reference/pipelines/base/pipeline.md) class to define the pipeline. + +When creating a custom pipeline, there is only one important method that we need to implement. That is the `_setup_runners` method. This is where we chain the runners together. + +```{literalinclude} ../../../../snippets/pipelines/dummy/pipeline_serial.txt +:language: python +``` + +In this example we use `SerialRunner` for running each job. It is a simple runner that runs the jobs in a serial manner. For more information on `SerialRunner` look [here](../../reference/pipelines/runners/serial.md). + +Okay, so we have the pipeline. How do we run it? To do this let's create a simple entrypoint in `tools` folder of Anomalib. + +Here is how the directory looks. + +```{literalinclude} ../../../../snippets/pipelines/dummy/tools_dir_structure.txt +:language: bash +``` + +As you can see, we have the `config.yaml` file in the same directory. Let's quickly populate `experiment.py`. + +```python +from anomalib.pipelines.experiment_pipeline import ExperimentPipeline + +if __name__ == "__main__": + ExperimentPipeline().run() +``` + +Alright! Time to take it on the road. + +```bash +python tools/experimental/experiment/experiment.py --config tools/experimental/experiment/config.yaml +``` + +If all goes well you should see the summary plot in `runs/significant_feature/summary_plot.png`. + +## Exposing to the CLI + +Now that you have your shiny new pipeline, you can expose it as a subcommand to `anomalib` by adding an entry to the pipeline registry in `anomalib/cli/pipelines.py`. + +```python +if try_import("anomalib.pipelines"): + ... + from anomalib.pipelines import ExperimentPipeline + +PIPELINE_REGISTRY: dict[str, type[Pipeline]] | None = { + "experiment": ExperimentPipeline, + ... +} +``` + +With this you can now call + +```{literalinclude} ../../../../snippets/pipelines/dummy/anomalib_cli.txt +:language: bash +``` + +Congratulations! You have successfully created a pipeline that trains a model and computes the significance of the parameters to the final score πŸŽ‰ + +```{admonition} Challenge +:class: tip +This example used a random model hence the scores were meaningless. Try to implement a real model and compute the scores. Look into which parameters lead to the most significant contribution to your score. +``` + +## Final Tweaks + +Before we end, let's look at a few final tweaks that you can make to the pipeline. + +First, let's run the initial model training in parallel. Since all jobs are independent, we can use the [ParallelRunner](../../reference/pipelines/runners/parallel.md). Since the `TrainJob` is a dummy job in this example, the pool of parallel jobs is set to the number of experiments. + +```{literalinclude} ../../../../snippets/pipelines/dummy/pipeline_parallel.txt + +``` + +You now notice that the entire pipeline takes lesser time to run. This is handy when you have large number of experiments, and when each job takes substantial time to run. + +Now on to the second one. When running the pipeline we don't want our terminal cluttered with the outputs from each run. Anomalib provides a handy decorator that temporarily hides the output of a function. It suppresses all outputs to the standard out and the standard error unless an exception is raised. Let's add this to the `TrainJob` + +```python +from anomalib.utils.logging import hide_output + +class TrainJob(Job): + ... + + @hide_output + def run(self, task_id: int | None = None) -> dict: + ... +``` + +You will no longer see the output of the `print` statement in the `TrainJob` method in the terminal. diff --git a/docs/source/markdown/guides/how_to/pipelines/index.md b/docs/source/markdown/guides/how_to/pipelines/index.md index ed3d66f81d..c7f2c44706 100644 --- a/docs/source/markdown/guides/how_to/pipelines/index.md +++ b/docs/source/markdown/guides/how_to/pipelines/index.md @@ -1,254 +1,30 @@ -# Pipelines +# Pipeline Tutorials -This guide demonstrates how to create a [Pipeline](../../reference/pipelines/index.md) for your custom task. +This section contains tutorials on how to use different pipelines of Anomalib and how to creat your own. -A pipeline is made up of runners. These runners are responsible for running a single type of job. A job is the smallest unit of work that is independent, such as, training a model or statistical comparison of the outputs of two models. Each job should be designed to be independent of other jobs so that they are agnostic to the runner that is running them. This ensures that the job can be run in parallel or serially without any changes to the job itself. The runner does not directly instantiate a job but rather has a job generator that generates the job based on the configuration. This generator is responsible for parsing the config and generating the job. +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 -## Birds Eye View +:::{grid-item-card} {octicon}`stack` Tiled Ensemble +:link: ./tiled_ensemble +:link-type: doc -In this guide we are going to create a dummy significant parameter search pipeline. The pipeline will have two jobs. The first job trains a model and computes the metric. The second job computes the significance of the parameters to the final score using shapely values. The final output of the pipeline is a plot that shows the contribution of each parameter to the final score. This will help teach you how to create a pipeline, a job, a job generator, and how to expose it to the `anomalib` CLI. The pipeline is going to be named `experiment`. So by the end of this you will be able to generate significance plot using +Learn more about how to use the tiled ensemble pipelines. +::: -```{literalinclude} ../../../../snippets/pipelines/dummy/anomalib_cli.txt -:language: bash -``` - -The final directory structure will look as follows: - -```{literalinclude} ../../../../snippets/pipelines/dummy/src_dir_structure.txt - -``` - -```{literalinclude} ../../../../snippets/pipelines/dummy/tools_dir_structure.txt -:language: bash -``` - -## Creating the Jobs - -Let's first look at the base class for the [jobs](../../reference/pipelines/base/job.md). It has a few methods defined. - -- The `run` method is the main method that is called by the runner. This is where we will train the model and return the model metrics. -- The `collect` method is used to gather the results from all the runs and collate them. This is handy as we want to pass a single object to the next job that contains details of all the runs including the final score. -- The `save` method is used to write any artifacts to the disk. It accepts the gathered results as a parameter. This is useful in a variety of situations. Say, when we want to write the results in a csv file or write the raw anomaly maps for further processing. - -Let's create the first job that trains the model and computes the metric. Since it is a dummy example, we will just return a random number as the metric. - -```python -class TrainJob(Job): - name = "train" +:::{grid-item-card} {octicon}`gear` Custom Pipeline +:link: ./custom_pipeline +:link-type: doc - def __init__(self, lr: float, backbone: str, stride: int): - self.lr = lr - self.backbone = backbone - self.stride = stride - - def run(self, task_id: int | None = None) -> dict: - print(f"Training with lr: {self.lr}, backbone: {self.backbone}, stride: {self.stride}") - time.sleep(2) - score = np.random.uniform(0.7, 0.1) - return {"lr": self.lr, "backbone": self.backbone, "stride": self.stride, "score": score} -``` - -Ignore the `task_id` for now. It is used for parallel jobs. We will come back to it later. - -````{note} -The `name` attribute is important and is used to identify the arguments in the job config file. -So, in our case the config `yaml` file will contain an entry like this: - -```yaml -... -train: - lr: - backbone: - stride: -... -```` - -Of course, it is up to us to choose what parameters should be shown under the `train` key. - -Let's also add the `collect` method so that we return a nice dict object that can be used by the next job. - -```python -def collect(results: list[dict]) -> dict: - output: dict = {} - for key in results[0]: - output[key] = [] - for result in results: - for key, value in result.items(): - output[key].append(value) - return output -``` - -We can also define a `save` method that writes the dictionary as a csv file. - -```python -@staticmethod -def save(results: dict) -> None: - """Save results in a csv file.""" - results_df = pd.DataFrame(results) - file_path = Path("runs") / TrainJob.name - file_path.mkdir(parents=True, exist_ok=True) - results_df.to_csv(file_path / "results.csv", index=False) -``` - -The entire job class is shown below. - -```{literalinclude} ../../../../snippets/pipelines/dummy/train_job.txt -:language: python -``` - -Now we need a way to generate this job when the pipeline is run. To do this we need to subclass the [JobGenerator](../../reference/pipelines/base/generator.md) class. - -The job generator is the actual object that is attached to a runner and is responsible for parsing the configuration and generating jobs. It has two methods that need to be implemented. - -- `generate_job`: This method accepts the configuration as a dictionary and, optionally, the results of the previous job. For the train job, we don't need results for previous jobs, so we will ignore it. -- `job_class`: This holds the reference to the class of the job that the generator will yield. It is used to inform the runner about the job that is being run, and is used to access the static attributes of the job such as its name, collect method, etc. - -Let's first start by defining the configuration that the generator will accept. The train job requires three parameters: `lr`, `backbone`, and `stride`. We will also add another parameter that defines the number of experiments we want to run. One way to define it would be as follows: - -```yaml -train: - experiments: 10 - lr: [0.1, 0.99] - backbone: - - resnet18 - - wide_resnet50 - stride: - - 3 - - 5 -``` - -For this example the specification is defined as follows. - -1. The number of experiments is set to 10. -2. Learning rate is sampled from a uniform distribution in the range `[0.1, 0.99]`. -3. The backbone is chosen from the list `["resnet18", "wide_resnet50"]`. -4. The stride is chosen from the list `[3, 5]`. - -```{note} -While the `[ ]` and `-` syntax in `yaml` both signify a list, for visual disambiguation this example uses `[ ]` to denote closed interval and `-` for a list of options. -``` - -With this defined, we can define the generator class as follows. - -```{literalinclude} ../../../../snippets/pipelines/dummy/train_generator.txt -:language: python -``` - -Since this is a dummy example, we generate the next experiment randomly. In practice, you would use a more sophisticated method that relies on your validation metrics to generate the next experiment. - -```{admonition} Challenge -:class: tip -For a challenge define your own configuration and a generator to parse that configuration. -``` - -Okay, so now we can train the model. We still need a way to find out which parameters contribute the most to the final score. We will do this by computing the shapely values to find out the contribution of each parameter to the final score. - -Let's first start by adding the library to our environment - -```bash -pip install shap -``` +Learn more about how to create a new custom pipeline. +::: -The following listing shows the job that computes the shapely values and saves a plot that shows the contribution of each parameter to the final score. A quick rundown without going into the details of the job (as it is irrelevant to the pipeline) is as follows. We create a `RandomForestRegressor` that is trained on the parameters to predict the final score. We then compute the shapely values to identify the parameters that have the most significant impact on the model performance. Finally, the `save` method saves the plot so we can visually inspect the results. +:::: -```{literalinclude} ../../../../snippets/pipelines/dummy/significance_job.txt +```{toctree} +:caption: Model Tutorials +:hidden: +./feature_extractors ``` - -Great! Now we have the job, as before, we need the generator. Since we only need the results from the previous stage, we don't need to define the config. Let's quickly write that as well. - -```{literalinclude} ../../../../snippets/pipelines/dummy/significance_job_generator.txt - -``` - -## Experiment Pipeline - -So now we have the jobs, and a way to generate them. Let's look at how we can chain them together to achieve what we want. We will use the [Pipeline](../../reference/pipelines/base/pipeline.md) class to define the pipeline. - -When creating a custom pipeline, there is only one important method that we need to implement. That is the `_setup_runners` method. This is where we chain the runners together. - -```{literalinclude} ../../../../snippets/pipelines/dummy/pipeline_serial.txt -:language: python -``` - -In this example we use `SerialRunner` for running each job. It is a simple runner that runs the jobs in a serial manner. For more information on `SerialRunner` look [here](../../reference/pipelines/runners/serial.md). - -Okay, so we have the pipeline. How do we run it? To do this let's create a simple entrypoint in `tools` folder of Anomalib. - -Here is how the directory looks. - -```{literalinclude} ../../../../snippets/pipelines/dummy/tools_dir_structure.txt -:language: bash -``` - -As you can see, we have the `config.yaml` file in the same directory. Let's quickly populate `experiment.py`. - -```python -from anomalib.pipelines.experiment_pipeline import ExperimentPipeline - -if __name__ == "__main__": - ExperimentPipeline().run() -``` - -Alright! Time to take it on the road. - -```bash -python tools/experimental/experiment/experiment.py --config tools/experimental/experiment/config.yaml -``` - -If all goes well you should see the summary plot in `runs/significant_feature/summary_plot.png`. - -## Exposing to the CLI - -Now that you have your shiny new pipeline, you can expose it as a subcommand to `anomalib` by adding an entry to the pipeline registry in `anomalib/cli/pipelines.py`. - -```python -if try_import("anomalib.pipelines"): - ... - from anomalib.pipelines import ExperimentPipeline - -PIPELINE_REGISTRY: dict[str, type[Pipeline]] | None = { - "experiment": ExperimentPipeline, - ... -} -``` - -With this you can now call - -```{literalinclude} ../../../../snippets/pipelines/dummy/anomalib_cli.txt -:language: bash -``` - -Congratulations! You have successfully created a pipeline that trains a model and computes the significance of the parameters to the final score πŸŽ‰ - -```{admonition} Challenge -:class: tip -This example used a random model hence the scores were meaningless. Try to implement a real model and compute the scores. Look into which parameters lead to the most significant contribution to your score. -``` - -## Final Tweaks - -Before we end, let's look at a few final tweaks that you can make to the pipeline. - -First, let's run the initial model training in parallel. Since all jobs are independent, we can use the [ParallelRunner](../../reference/pipelines/runners/parallel.md). Since the `TrainJob` is a dummy job in this example, the pool of parallel jobs is set to the number of experiments. - -```{literalinclude} ../../../../snippets/pipelines/dummy/pipeline_parallel.txt - -``` - -You now notice that the entire pipeline takes lesser time to run. This is handy when you have large number of experiments, and when each job takes substantial time to run. - -Now on to the second one. When running the pipeline we don't want our terminal cluttered with the outputs from each run. Anomalib provides a handy decorator that temporarily hides the output of a function. It suppresses all outputs to the standard out and the standard error unless an exception is raised. Let's add this to the `TrainJob` - -```python -from anomalib.utils.logging import hide_output - -class TrainJob(Job): - ... - - @hide_output - def run(self, task_id: int | None = None) -> dict: - ... -``` - -You will no longer see the output of the `print` statement in the `TrainJob` method in the terminal. diff --git a/docs/source/markdown/guides/how_to/pipelines/tiled_ensemble.md b/docs/source/markdown/guides/how_to/pipelines/tiled_ensemble.md new file mode 100644 index 0000000000..3550efb5fd --- /dev/null +++ b/docs/source/markdown/guides/how_to/pipelines/tiled_ensemble.md @@ -0,0 +1,157 @@ +# Tiled ensemble + +This guide will show you how to use **The Tiled Ensemble** method for anomaly detection. For more details, refer to the official [Paper](https://openaccess.thecvf.com/content/CVPR2024W/VAND/html/Rolih_Divide_and_Conquer_High-Resolution_Industrial_Anomaly_Detection_via_Memory_Efficient_CVPRW_2024_paper.html). + +The tiled ensemble approach reduces memory consumption by dividing input images into a grid of tiles and training a dedicated model for each tile location. +It is compatible with any existing image anomaly detection model without the need for any modification of the underlying architecture. + +![Tiled ensemble flow](../../../../images/tiled_ensemble/ensemble_flow.png) + +```{note} +This feature is experimental and may not work as expected. +For any problems refer to [Issues](https://github.com/openvinotoolkit/anomalib/issues) and feel free to ask any question in [Discussions](https://github.com/openvinotoolkit/anomalib/discussions). +``` + +## Training + +You can train a tiled ensemble using the training script located inside `tools/tiled_ensemble` directory: + +```{code-block} bash + +python tools/tiled_ensemble/train_ensemble.py \ + --config tools/tiled_ensemble/ens_config.yaml +``` + +By default, the Padim model is trained on **MVTec AD bottle** category using image size of 256x256, divided into non-overlapping 128x128 tiles. +You can modify these parameters in the [config file](#ensemble-configuration). + +## Evaluation + +After training, you can evaluate the tiled ensemble on test data using: + +```{code-block} bash + +python tools/tiled_ensemble/eval.py \ + --config tools/tiled_ensemble/ens_config.yaml \ + --root path_to_results_dir + +``` + +Ensure that `root` points to the directory containing the training results, typically `results/padim/mvtec/bottle/runX`. + +## Ensemble configuration + +Tiled ensemble is configured using `ens_config.yaml` file in the `tools/tiled_ensemble` directory. +It contains general settings and tiled ensemble specific settings. + +### General + +General settings at the top of the config file are used to set up the random `seed`, `accelerator` (device) and the path to where results will be saved `default_root_dir`. + +```{code-block} yaml +seed: 42 +accelerator: "gpu" +default_root_dir: "results" +``` + +### Tiling + +This section contains the following settings, used for image tiling: + +```{code-block} yaml + +tiling: + tile_size: 256 + stride: 256 +``` + +These settings determine the tile size and stride. Another important parameter is image_size from `data` section later in the config. It determines the original size of the image. + +Input image is split into tiles, where each tile is of shape set by `tile_size` and tiles are taken with step set by `stride`. +For example: having image_size: 512, tile_size: 256, and stride: 256, results in 4 non-overlapping tile locations. + +### Normalization and thresholding + +Next up are the normalization and thresholding settings: + +```{code-block} yaml +normalization_stage: image +thresholding: + method: F1AdaptiveThreshold + stage: image +``` + +- **Normalization**: Can be applied per each tile location separately (`tile` option), after combining prediction (`image` option), or skipped (`none` option). + +- **Thresholding**: Can also be applied at different stages, but it is limited to `tile` and `image`. Another setting for thresholding is the method used. It can be specified as a string or by the class path. + +### Data + +The `data` section is used to configure the input `image_size` and other parameters for the dataset used. + +```{code-block} yaml +data: + class_path: anomalib.data.MVTec + init_args: + root: ./datasets/MVTec + category: bottle + train_batch_size: 32 + eval_batch_size: 32 + num_workers: 8 + task: segmentation + transform: null + train_transform: null + eval_transform: null + test_split_mode: from_dir + test_split_ratio: 0.2 + val_split_mode: same_as_test + val_split_ratio: 0.5 + image_size: [256, 256] +``` + +Refer to [Data](../../reference/data/image/index.md) for more details on parameters. + +### SeamSmoothing + +This section contains settings for `SeamSmoothing` block of pipeline: + +```{code-block} yaml +SeamSmoothing: + apply: True + sigma: 2 + width: 0.1 + +``` + +SeamSmoothing job is responsible for smoothing of regions where tiles meet - called tile seams. + +- **apply**: If True, smoothing will be applied. +- **sigma**: Controls the sigma of Gaussian filter used for smoothing. +- **width**: Sets the percentage of the region around the seam to be smoothed. + +### TrainModels + +The last section `TrainModels` contains the setup for model training: + +```{code-block} yaml +TrainModels: + model: + class_path: Fastflow + + metrics: + pixel: AUROC + image: AUROC + + trainer: + max_epochs: 500 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 42 + monitor: pixel_AUROC + mode: max +``` + +- **Model**: Specifies the model used. Refer to [Models](../../reference/models/image/index.md) for more details on the model parameters. +- **Metrics**: Defines evaluation metrics for pixel and image level. +- **Trainer**: _optional_ parameters, used to control the training process. Refer to [Engine](../../reference/engine/index.md) for more details. diff --git a/src/anomalib/data/base/datamodule.py b/src/anomalib/data/base/datamodule.py index cb95ca8171..a9197f6670 100644 --- a/src/anomalib/data/base/datamodule.py +++ b/src/anomalib/data/base/datamodule.py @@ -119,6 +119,8 @@ def __init__( self._is_setup = False # flag to track if setup has been called from the trainer + self.collate_fn = collate_fn + @property def name(self) -> str: """Name of the datamodule.""" @@ -224,6 +226,7 @@ def train_dataloader(self) -> TRAIN_DATALOADERS: shuffle=True, batch_size=self.train_batch_size, num_workers=self.num_workers, + collate_fn=self.collate_fn, ) def val_dataloader(self) -> EVAL_DATALOADERS: @@ -233,7 +236,7 @@ def val_dataloader(self) -> EVAL_DATALOADERS: shuffle=False, batch_size=self.eval_batch_size, num_workers=self.num_workers, - collate_fn=collate_fn, + collate_fn=self.collate_fn, ) def test_dataloader(self) -> EVAL_DATALOADERS: @@ -243,7 +246,7 @@ def test_dataloader(self) -> EVAL_DATALOADERS: shuffle=False, batch_size=self.eval_batch_size, num_workers=self.num_workers, - collate_fn=collate_fn, + collate_fn=self.collate_fn, ) def predict_dataloader(self) -> EVAL_DATALOADERS: diff --git a/src/anomalib/data/utils/tiler.py b/src/anomalib/data/utils/tiler.py index 089aeaae91..2c1e949e45 100644 --- a/src/anomalib/data/utils/tiler.py +++ b/src/anomalib/data/utils/tiler.py @@ -162,11 +162,11 @@ def __init__( remove_border_count: int = 0, mode: ImageUpscaleMode = ImageUpscaleMode.PADDING, ) -> None: - self.tile_size_h, self.tile_size_w = self.__validate_size_type(tile_size) + self.tile_size_h, self.tile_size_w = self.validate_size_type(tile_size) self.random_tile_count = 4 if stride is not None: - self.stride_h, self.stride_w = self.__validate_size_type(stride) + self.stride_h, self.stride_w = self.validate_size_type(stride) self.remove_border_count = remove_border_count self.overlapping = not (self.stride_h == self.tile_size_h and self.stride_w == self.tile_size_w) @@ -201,7 +201,15 @@ def __init__( self.num_patches_w: int @staticmethod - def __validate_size_type(parameter: int | Sequence) -> tuple[int, ...]: + def validate_size_type(parameter: int | Sequence) -> tuple[int, ...]: + """Validate size type and return tuple of form [tile_h, tile_w]. + + Args: + parameter (int | Sequence): input tile size parameter. + + Returns: + tuple[int, ...]: Validated tile size in tuple form. + """ if isinstance(parameter, int): output = (parameter, parameter) elif isinstance(parameter, Sequence): diff --git a/src/anomalib/models/components/base/anomaly_module.py b/src/anomalib/models/components/base/anomaly_module.py index 963ce485a3..ecd4c62d13 100644 --- a/src/anomalib/models/components/base/anomaly_module.py +++ b/src/anomalib/models/components/base/anomaly_module.py @@ -266,6 +266,8 @@ def input_size(self) -> tuple[int, int] | None: The effective input size is the size of the input tensor after the transform has been applied. If the transform is not set, or if the transform does not change the shape of the input tensor, this method will return None. """ + if self._input_size: + return self._input_size transform = self.transform or self.configure_transforms() if transform is None: return None @@ -275,6 +277,10 @@ def input_size(self) -> tuple[int, int] | None: return None return output_shape[-2:] + def set_input_size(self, input_size: tuple[int, int]) -> None: + """Update the effective input size of the model.""" + self._input_size = input_size + def on_save_checkpoint(self, checkpoint: dict[str, Any]) -> None: """Called when saving the model to a checkpoint. diff --git a/src/anomalib/pipelines/tiled_ensemble/__init__.py b/src/anomalib/pipelines/tiled_ensemble/__init__.py new file mode 100644 index 0000000000..1a068562b7 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/__init__.py @@ -0,0 +1,12 @@ +"""Tiled ensemble pipelines.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from .test_pipeline import EvalTiledEnsemble +from .train_pipeline import TrainTiledEnsemble + +__all__ = [ + "TrainTiledEnsemble", + "EvalTiledEnsemble", +] diff --git a/src/anomalib/pipelines/tiled_ensemble/components/__init__.py b/src/anomalib/pipelines/tiled_ensemble/components/__init__.py new file mode 100644 index 0000000000..619dc2e673 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/__init__.py @@ -0,0 +1,30 @@ +"""Tiled ensemble pipeline components.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from .merging import MergeJobGenerator +from .metrics_calculation import MetricsCalculationJobGenerator +from .model_training import TrainModelJobGenerator +from .normalization import NormalizationJobGenerator +from .prediction import PredictJobGenerator +from .smoothing import SmoothingJobGenerator +from .stats_calculation import StatisticsJobGenerator +from .thresholding import ThresholdingJobGenerator +from .utils import NormalizationStage, PredictData, ThresholdStage +from .visualization import VisualizationJobGenerator + +__all__ = [ + "NormalizationStage", + "ThresholdStage", + "PredictData", + "TrainModelJobGenerator", + "PredictJobGenerator", + "MergeJobGenerator", + "SmoothingJobGenerator", + "StatisticsJobGenerator", + "NormalizationJobGenerator", + "ThresholdingJobGenerator", + "VisualizationJobGenerator", + "MetricsCalculationJobGenerator", +] diff --git a/src/anomalib/pipelines/tiled_ensemble/components/merging.py b/src/anomalib/pipelines/tiled_ensemble/components/merging.py new file mode 100644 index 0000000000..6e8d5fc84c --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/merging.py @@ -0,0 +1,110 @@ +"""Tiled ensemble - prediction merging job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from collections.abc import Generator +from typing import Any + +from tqdm import tqdm + +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS + +from .utils.ensemble_tiling import EnsembleTiler +from .utils.helper_functions import get_ensemble_tiler +from .utils.prediction_data import EnsemblePredictions +from .utils.prediction_merging import PredictionMergingMechanism + +logger = logging.getLogger(__name__) + + +class MergeJob(Job): + """Job for merging tile-level predictions into image-level predictions. + + Args: + predictions (EnsemblePredictions): Object containing ensemble predictions. + tiler (EnsembleTiler): Ensemble tiler used for untiling. + """ + + name = "Merge" + + def __init__(self, predictions: EnsemblePredictions, tiler: EnsembleTiler) -> None: + super().__init__() + self.predictions = predictions + self.tiler = tiler + + def run(self, task_id: int | None = None) -> list[Any]: + """Run merging job that merges all batches of tile-level predictions into image-level predictions. + + Args: + task_id: Not used in this case. + + Returns: + list[Any]: List of merged predictions. + """ + del task_id # not needed here + + merger = PredictionMergingMechanism(self.predictions, self.tiler) + + logger.info("Merging predictions.") + + # merge all batches + merged_predictions = [ + merger.merge_tile_predictions(batch_idx) + for batch_idx in tqdm(range(merger.num_batches), desc="Prediction merging") + ] + + return merged_predictions # noqa: RET504 + + @staticmethod + def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: + """Nothing to collect in this job. + + Returns: + list[Any]: List of predictions. + """ + # take the first element as result is list of lists here + return results[0] + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """Nothing to save in this job.""" + + +class MergeJobGenerator(JobGenerator): + """Generate MergeJob.""" + + def __init__(self, tiling_args: dict, data_args: dict) -> None: + super().__init__() + self.tiling_args = tiling_args + self.data_args = data_args + + @property + def job_class(self) -> type: + """Return the job class.""" + return MergeJob + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: EnsemblePredictions | None = None, + ) -> Generator[MergeJob, None, None]: + """Return a generator producing a single merging job. + + Args: + args (dict): Tiled ensemble pipeline args. + prev_stage_result (EnsemblePredictions): Ensemble predictions from predict step. + + Returns: + Generator[MergeJob, None, None]: MergeJob generator + """ + del args # args not used here + + tiler = get_ensemble_tiler(self.tiling_args, self.data_args) + if prev_stage_result is not None: + yield MergeJob(prev_stage_result, tiler) + else: + msg = "Merging job requires tile level predictions from previous step." + raise ValueError(msg) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/metrics_calculation.py b/src/anomalib/pipelines/tiled_ensemble/components/metrics_calculation.py new file mode 100644 index 0000000000..530662b1d3 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/metrics_calculation.py @@ -0,0 +1,217 @@ +"""Tiled ensemble - metrics calculation job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from collections.abc import Generator +from pathlib import Path +from typing import Any + +import pandas as pd +from tqdm import tqdm + +from anomalib import TaskType +from anomalib.metrics import AnomalibMetricCollection, create_metric_collection +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.types import GATHERED_RESULTS, PREV_STAGE_RESULT, RUN_RESULTS + +from .utils import NormalizationStage +from .utils.helper_functions import get_threshold_values + +logger = logging.getLogger(__name__) + + +class MetricsCalculationJob(Job): + """Job for image and pixel metrics calculation. + + Args: + accelerator (str): Accelerator (device) to use. + predictions (list[Any]): List of batch predictions. + root_dir (Path): Root directory to save checkpoints, stats and images. + image_metrics (AnomalibMetricCollection): Collection of all image-level metrics. + pixel_metrics (AnomalibMetricCollection): Collection of all pixel-level metrics. + """ + + name = "Metrics" + + def __init__( + self, + accelerator: str, + predictions: list[Any] | None, + root_dir: Path, + image_metrics: AnomalibMetricCollection, + pixel_metrics: AnomalibMetricCollection, + ) -> None: + super().__init__() + self.accelerator = accelerator + self.predictions = predictions + self.root_dir = root_dir + self.image_metrics = image_metrics + self.pixel_metrics = pixel_metrics + + def run(self, task_id: int | None = None) -> dict: + """Run a job that calculates image and pixel level metrics. + + Args: + task_id: Not used in this case. + + Returns: + dict[str, float]: Dictionary containing calculated metric values. + """ + del task_id # not needed here + + logger.info("Starting metrics calculation.") + + # add predicted data to metrics + for data in tqdm(self.predictions, desc="Calculating metrics"): + self.image_metrics.update(data["pred_scores"], data["label"].int()) + if "mask" in data and "anomaly_maps" in data: + self.pixel_metrics.update(data["anomaly_maps"], data["mask"].int()) + + # compute all metrics on specified accelerator + metrics_dict = {} + for name, metric in self.image_metrics.items(): + metric.to(self.accelerator) + metrics_dict[name] = metric.compute().item() + metric.cpu() + + if self.pixel_metrics.update_called: + for name, metric in self.pixel_metrics.items(): + metric.to(self.accelerator) + metrics_dict[name] = metric.compute().item() + metric.cpu() + + for name, value in metrics_dict.items(): + print(f"{name}: {value:.4f}") + + # save path used in `save` method + metrics_dict["save_path"] = self.root_dir / "metric_results.csv" + + return metrics_dict + + @staticmethod + def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: + """Nothing to collect in this job. + + Returns: + list[Any]: list of predictions. + """ + # take the first element as result is list of dict here + return results[0] + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """Save metrics values to csv.""" + logger.info("Saving metrics to csv.") + + # get and remove path from stats dict + results_path: Path = results.pop("save_path") + results_path.parent.mkdir(parents=True, exist_ok=True) + + df_dict = {k: [v] for k, v in results.items()} + metrics_df = pd.DataFrame(df_dict) + metrics_df.to_csv(results_path, index=False) + + +class MetricsCalculationJobGenerator(JobGenerator): + """Generate MetricsCalculationJob. + + Args: + root_dir (Path): Root directory to save checkpoints, stats and images. + """ + + def __init__( + self, + accelerator: str, + root_dir: Path, + task: TaskType, + metrics: dict, + normalization_stage: NormalizationStage, + ) -> None: + self.accelerator = accelerator + self.root_dir = root_dir + self.task = task + self.metrics = metrics + self.normalization_stage = normalization_stage + + @property + def job_class(self) -> type: + """Return the job class.""" + return MetricsCalculationJob + + def configure_ensemble_metrics( + self, + image_metrics: list[str] | dict[str, dict[str, Any]] | None = None, + pixel_metrics: list[str] | dict[str, dict[str, Any]] | None = None, + ) -> tuple[AnomalibMetricCollection, AnomalibMetricCollection]: + """Configure image and pixel metrics and put them into a collection. + + Args: + image_metrics (list[str] | None): List of image-level metric names. + pixel_metrics (list[str] | None): List of pixel-level metric names. + + Returns: + tuple[AnomalibMetricCollection, AnomalibMetricCollection]: + Image-metrics collection and pixel-metrics collection + """ + image_metrics = [] if image_metrics is None else image_metrics + + if pixel_metrics is None: + pixel_metrics = [] + elif self.task == TaskType.CLASSIFICATION: + pixel_metrics = [] + logger.warning( + "Cannot perform pixel-level evaluation when task type is classification. " + "Ignoring the following pixel-level metrics: %s", + pixel_metrics, + ) + + # if a single metric is passed, transform to list to fit the creation function + if isinstance(image_metrics, str): + image_metrics = [image_metrics] + if isinstance(pixel_metrics, str): + pixel_metrics = [pixel_metrics] + + image_metrics_collection = create_metric_collection(image_metrics, "image_") + pixel_metrics_collection = create_metric_collection(pixel_metrics, "pixel_") + + return image_metrics_collection, pixel_metrics_collection + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: PREV_STAGE_RESULT = None, + ) -> Generator[MetricsCalculationJob, None, None]: + """Make a generator that yields a single metrics calculation job. + + Args: + args: ensemble run config. + prev_stage_result: ensemble predictions from previous step. + + Returns: + Generator[MetricsCalculationJob, None, None]: MetricsCalculationJob generator + """ + del args # args not used here + + image_metrics_config = self.metrics.get("image", None) + pixel_metrics_config = self.metrics.get("pixel", None) + + image_threshold, pixel_threshold = get_threshold_values(self.normalization_stage, self.root_dir) + + image_metrics, pixel_metrics = self.configure_ensemble_metrics( + image_metrics=image_metrics_config, + pixel_metrics=pixel_metrics_config, + ) + + # set thresholds for metrics that need it + image_metrics.set_threshold(image_threshold) + pixel_metrics.set_threshold(pixel_threshold) + + yield MetricsCalculationJob( + accelerator=self.accelerator, + predictions=prev_stage_result, + root_dir=self.root_dir, + image_metrics=image_metrics, + pixel_metrics=pixel_metrics, + ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/model_training.py b/src/anomalib/pipelines/tiled_ensemble/components/model_training.py new file mode 100644 index 0000000000..6bc81c793b --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/model_training.py @@ -0,0 +1,192 @@ +"""Tiled ensemble - ensemble training job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from collections.abc import Generator +from itertools import product +from pathlib import Path + +from lightning import seed_everything + +from anomalib.data import AnomalibDataModule +from anomalib.models import AnomalyModule +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.types import GATHERED_RESULTS, PREV_STAGE_RESULT + +from .utils import NormalizationStage +from .utils.ensemble_engine import TiledEnsembleEngine +from .utils.helper_functions import ( + get_ensemble_datamodule, + get_ensemble_engine, + get_ensemble_model, + get_ensemble_tiler, +) + +logger = logging.getLogger(__name__) + + +class TrainModelJob(Job): + """Job for training of individual models in the tiled ensemble. + + Args: + accelerator (str): Accelerator (device) to use. + seed (int): Random seed for reproducibility. + root_dir (Path): Root directory to save checkpoints, stats and images. + tile_index (tuple[int, int]): Index of tile that this model processes. + normalization_stage (str): Normalization stage flag. + metrics (dict): metrics dict with pixel and image metric names. + trainer_args (dict| None): Additional arguments to pass to the trainer class. + model (AnomalyModule): Model to train. + datamodule (AnomalibDataModule): Datamodule with all dataloaders. + + """ + + name = "TrainModels" + + def __init__( + self, + accelerator: str, + seed: int, + root_dir: Path, + tile_index: tuple[int, int], + normalization_stage: str, + metrics: dict, + trainer_args: dict | None, + model: AnomalyModule, + datamodule: AnomalibDataModule, + ) -> None: + super().__init__() + self.accelerator = accelerator + self.seed = seed + self.root_dir = root_dir + self.tile_index = tile_index + self.normalization_stage = normalization_stage + self.metrics = metrics + self.trainer_args = trainer_args + self.model = model + self.datamodule = datamodule + + def run( + self, + task_id: int | None = None, + ) -> TiledEnsembleEngine: + """Run train job that fits the model for given tile location. + + Args: + task_id: Passed when job is ran in parallel. + + Returns: + TiledEnsembleEngine: Engine containing trained model. + """ + devices: str | list[int] = "auto" + if task_id is not None: + devices = [task_id] + logger.info(f"Running job {self.model.__class__.__name__} with device {task_id}") + + logger.info("Start of training for tile at position %s,", self.tile_index) + seed_everything(self.seed) + + # create engine for specific tile location and fit the model + engine = get_ensemble_engine( + tile_index=self.tile_index, + accelerator=self.accelerator, + devices=devices, + root_dir=self.root_dir, + normalization_stage=self.normalization_stage, + metrics=self.metrics, + trainer_args=self.trainer_args, + ) + engine.fit(model=self.model, datamodule=self.datamodule) + # move model to cpu to avoid memory issues as the engine is returned to be used in validation phase + engine.model.cpu() + + return engine + + @staticmethod + def collect(results: list[TiledEnsembleEngine]) -> dict[tuple[int, int], TiledEnsembleEngine]: + """Collect engines from each tile location into a dict. + + Returns: + dict[tuple[int, int], TiledEnsembleEngine]: Dict has form {tile_index: TiledEnsembleEngine} + """ + return {r.tile_index: r for r in results} + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """Skip as checkpoints are already saved by callback.""" + + +class TrainModelJobGenerator(JobGenerator): + """Generator for training job that train model for each tile location. + + Args: + root_dir (Path): Root directory to save checkpoints, stats and images. + """ + + def __init__( + self, + seed: int, + accelerator: str, + root_dir: Path, + tiling_args: dict, + data_args: dict, + normalization_stage: NormalizationStage, + ) -> None: + self.seed = seed + self.accelerator = accelerator + self.root_dir = root_dir + self.tiling_args = tiling_args + self.data_args = data_args + self.normalization_stage = normalization_stage + + @property + def job_class(self) -> type: + """Return the job class.""" + return TrainModelJob + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: PREV_STAGE_RESULT = None, + ) -> Generator[TrainModelJob, None, None]: + """Generate training jobs for each tile location. + + Args: + args (dict): Dict with config passed to training. + prev_stage_result (None): Not used here. + + Returns: + Generator[TrainModelJob, None, None]: TrainModelJob generator + """ + del prev_stage_result # Not needed for this job + if args is None: + msg = "TrainModels job requires config args" + raise ValueError(msg) + + # tiler used for splitting the image and getting the tile count + tiler = get_ensemble_tiler(self.tiling_args, self.data_args) + + logger.info( + "Tiled ensemble training started. Separate models will be trained for %d tile locations.", + tiler.num_tiles, + ) + # go over all tile positions + for tile_index in product(range(tiler.num_patches_h), range(tiler.num_patches_w)): + # prepare datamodule with custom collate function that only provides specific tile of image + datamodule = get_ensemble_datamodule(self.data_args, tiler, tile_index) + model = get_ensemble_model(args["model"], tiler) + + # pass root_dir to engine so all models in ensemble have the same root dir + yield TrainModelJob( + accelerator=self.accelerator, + seed=self.seed, + root_dir=self.root_dir, + tile_index=tile_index, + normalization_stage=self.normalization_stage, + metrics=args["metrics"], + trainer_args=args.get("trainer", {}), + model=model, + datamodule=datamodule, + ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/normalization.py b/src/anomalib/pipelines/tiled_ensemble/components/normalization.py new file mode 100644 index 0000000000..8c7a563506 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/normalization.py @@ -0,0 +1,120 @@ +"""Tiled ensemble - normalization job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import logging +from collections.abc import Generator +from pathlib import Path +from typing import Any + +from tqdm import tqdm + +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS +from anomalib.utils.normalization.min_max import normalize + +logger = logging.getLogger(__name__) + + +class NormalizationJob(Job): + """Job for normalization of predictions. + + Args: + predictions (list[Any]): List of predictions. + root_dir (Path): Root directory containing statistics needed for normalization. + """ + + name = "Normalize" + + def __init__(self, predictions: list[Any] | None, root_dir: Path) -> None: + super().__init__() + self.predictions = predictions + self.root_dir = root_dir + + def run(self, task_id: int | None = None) -> list[Any] | None: + """Run normalization job which normalizes image, pixel and box scores. + + Args: + task_id: Not used in this case. + + Returns: + list[Any]: List of normalized predictions. + """ + del task_id # not needed here + + # load all statistics needed for normalization + stats_path = self.root_dir / "weights" / "lightning" / "stats.json" + with stats_path.open("r") as f: + stats = json.load(f) + minmax = stats["minmax"] + image_threshold = stats["image_threshold"] + pixel_threshold = stats["pixel_threshold"] + + logger.info("Starting normalization.") + + for data in tqdm(self.predictions, desc="Normalizing"): + data["pred_scores"] = normalize( + data["pred_scores"], + image_threshold, + minmax["pred_scores"]["min"], + minmax["pred_scores"]["max"], + ) + if "anomaly_maps" in data: + data["anomaly_maps"] = normalize( + data["anomaly_maps"], + pixel_threshold, + minmax["anomaly_maps"]["min"], + minmax["anomaly_maps"]["max"], + ) + + return self.predictions + + @staticmethod + def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: + """Nothing to collect in this job. + + Returns: + list[Any]: List of predictions. + """ + # take the first element as result is list of lists here + return results[0] + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """Nothing is saved in this job.""" + + +class NormalizationJobGenerator(JobGenerator): + """Generate NormalizationJob. + + Args: + root_dir (Path): Root directory where statistics are saved. + """ + + def __init__(self, root_dir: Path) -> None: + self.root_dir = root_dir + + @property + def job_class(self) -> type: + """Return the job class.""" + return NormalizationJob + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: list[Any] | None = None, + ) -> Generator[NormalizationJob, None, None]: + """Return a generator producing a single normalization job. + + Args: + args: not used here. + prev_stage_result (list[Any]): Ensemble predictions from previous step. + + Returns: + Generator[NormalizationJob, None, None]: NormalizationJob generator. + """ + del args # not needed here + + yield NormalizationJob(prev_stage_result, self.root_dir) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/prediction.py b/src/anomalib/pipelines/tiled_ensemble/components/prediction.py new file mode 100644 index 0000000000..792d86a497 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/prediction.py @@ -0,0 +1,228 @@ +"""Tiled ensemble - ensemble prediction job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from collections.abc import Generator +from itertools import product +from pathlib import Path +from typing import Any + +from lightning import seed_everything +from torch.utils.data import DataLoader + +from anomalib.models import AnomalyModule +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.types import GATHERED_RESULTS, PREV_STAGE_RESULT + +from .utils import NormalizationStage, PredictData +from .utils.ensemble_engine import TiledEnsembleEngine +from .utils.helper_functions import ( + get_ensemble_datamodule, + get_ensemble_engine, + get_ensemble_model, + get_ensemble_tiler, +) +from .utils.prediction_data import EnsemblePredictions + +logger = logging.getLogger(__name__) + + +class PredictJob(Job): + """Job for generating predictions with individual models in the tiled ensemble. + + Args: + accelerator (str): Accelerator (device) to use. + seed (int): Random seed for reproducibility. + root_dir (Path): Root directory to save checkpoints, stats and images. + tile_index (tuple[int, int]): Index of tile that this model processes. + normalization_stage (str): Normalization stage flag. + dataloader (DataLoader): Dataloader to use for training (either val or test). + model (AnomalyModule): Model to train. + engine (TiledEnsembleEngine | None): + engine from train job. If job is used standalone, instantiate engine and model from checkpoint. + ckpt_path (Path | None): Path to checkpoint to be loaded if engine doesn't contain correct weights. + + """ + + name = "Predict" + + def __init__( + self, + accelerator: str, + seed: int, + root_dir: Path, + tile_index: tuple[int, int], + normalization_stage: str, + dataloader: DataLoader, + model: AnomalyModule | None, + engine: TiledEnsembleEngine | None, + ckpt_path: Path | None, + ) -> None: + super().__init__() + if engine is None and ckpt_path is None: + msg = "Either engine or checkpoint must be provided to predict job." + raise ValueError(msg) + + self.accelerator = accelerator + self.seed = seed + self.root_dir = root_dir + self.tile_index = tile_index + self.normalization_stage = normalization_stage + self.dataloader = dataloader + self.model = model + self.engine = engine + self.ckpt_path = ckpt_path + + def run( + self, + task_id: int | None = None, + ) -> tuple[tuple[int, int], Any | None]: + """Predict job that predicts the data with specific model for given tile location. + + Args: + task_id: Passed when job is ran in parallel. + + Returns: + tuple[tuple[int, int], list[Any]]: Tile index, List of predictions. + """ + devices: str | list[int] = "auto" + if task_id is not None: + devices = [task_id] + logger.info(f"Running job {self.model.__class__.__name__} with device {task_id}") + + logger.info("Start of predicting for tile at position %s,", self.tile_index) + seed_everything(self.seed) + + if self.engine is None: + # in case predict is invoked separately from train job, make new engine instance + self.engine = get_ensemble_engine( + tile_index=self.tile_index, + accelerator=self.accelerator, + devices=devices, + root_dir=self.root_dir, + normalization_stage=self.normalization_stage, + ) + + predictions = self.engine.predict(model=self.model, dataloaders=self.dataloader, ckpt_path=self.ckpt_path) + + # also return tile index as it's needed in collect method + return self.tile_index, predictions + + @staticmethod + def collect(results: list[tuple[tuple[int, int], list[Any]]]) -> EnsemblePredictions: + """Collect predictions from each tile location into the predictions class. + + Returns: + EnsemblePredictions: Object containing all predictions in form ready for merging. + """ + storage = EnsemblePredictions() + + for tile_index, predictions in results: + storage.add_tile_prediction(tile_index, predictions) + + return storage + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """This stage doesn't save anything.""" + + +class PredictJobGenerator(JobGenerator): + """Generator for predict job that uses individual models to predict for each tile location. + + Args: + root_dir (Path): Root directory to save checkpoints, stats and images. + data_source (PredictData): Whether to predict on validation set. If false use test set. + """ + + def __init__( + self, + data_source: PredictData, + seed: int, + accelerator: str, + root_dir: Path, + tiling_args: dict, + data_args: dict, + model_args: dict, + normalization_stage: NormalizationStage, + ) -> None: + self.data_source = data_source + self.seed = seed + self.accelerator = accelerator + self.root_dir = root_dir + self.tiling_args = tiling_args + self.data_args = data_args + self.model_args = model_args + self.normalization_stage = normalization_stage + + @property + def job_class(self) -> type: + """Return the job class.""" + return PredictJob + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: PREV_STAGE_RESULT = None, + ) -> Generator[PredictJob, None, None]: + """Generate predict jobs for each tile location. + + Args: + args (dict): Dict with config passed to training. + prev_stage_result (dict[tuple[int, int], TiledEnsembleEngine] | None): + if called after train job this contains engines with individual models, otherwise load from checkpoints. + + Returns: + Generator[PredictJob, None, None]: PredictJob generator. + """ + del args # args not used here + + # tiler used for splitting the image and getting the tile count + tiler = get_ensemble_tiler(self.tiling_args, self.data_args) + + logger.info( + "Tiled ensemble predicting started using %s data.", + self.data_source.value, + ) + # go over all tile positions + for tile_index in product(range(tiler.num_patches_h), range(tiler.num_patches_w)): + # prepare datamodule with custom collate function that only provides specific tile of image + datamodule = get_ensemble_datamodule(self.data_args, tiler, tile_index) + + # check if predict step is positioned after training + if prev_stage_result and tile_index in prev_stage_result: + engine = prev_stage_result[tile_index] + # model is inside engine in this case + model = engine.model + ckpt_path = None + else: + # any other case - predict is called standalone + engine = None + # we need to make new model instance as it's not inside engine + model = get_ensemble_model(self.model_args, tiler) + tile_i, tile_j = tile_index + # prepare checkpoint path for model on current tile location + ckpt_path = self.root_dir / "weights" / "lightning" / f"model{tile_i}_{tile_j}.ckpt" + + # pick the dataloader based on predict data + dataloader = datamodule.test_dataloader() + if self.data_source == PredictData.VAL: + dataloader = datamodule.val_dataloader() + # TODO(blaz-r): - this is tweak to avoid problem in engine:388 + # 2254 + dataloader.dataset.transform = None + + # pass root_dir to engine so all models in ensemble have the same root dir + yield PredictJob( + accelerator=self.accelerator, + seed=self.seed, + root_dir=self.root_dir, + tile_index=tile_index, + normalization_stage=self.normalization_stage, + model=model, + dataloader=dataloader, + engine=engine, + ckpt_path=ckpt_path, + ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/smoothing.py b/src/anomalib/pipelines/tiled_ensemble/components/smoothing.py new file mode 100644 index 0000000000..b3d5a51000 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/smoothing.py @@ -0,0 +1,167 @@ +"""Tiled ensemble - seam smoothing job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from collections.abc import Generator +from typing import Any + +import torch +from tqdm import tqdm + +from anomalib.models.components import GaussianBlur2d +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS + +from .utils.ensemble_tiling import EnsembleTiler +from .utils.helper_functions import get_ensemble_tiler + +logger = logging.getLogger(__name__) + + +class SmoothingJob(Job): + """Job for smoothing the area around the tile seam. + + Args: + accelerator (str): Accelerator used for processing. + predictions (list[Any]): List of image-level predictions. + width_factor (float): Factor multiplied by tile dimension to get the region around seam which will be smoothed. + filter_sigma (float): Sigma of filter used for smoothing the seams. + tiler (EnsembleTiler): Tiler object used to get tile dimension data. + """ + + name = "SeamSmoothing" + + def __init__( + self, + accelerator: str, + predictions: list[Any], + width_factor: float, + filter_sigma: float, + tiler: EnsembleTiler, + ) -> None: + super().__init__() + self.accelerator = accelerator + self.predictions = predictions + + # offset in pixels of region around tile seam that will be smoothed + self.height_offset = int(tiler.tile_size_h * width_factor) + self.width_offset = int(tiler.tile_size_w * width_factor) + self.tiler = tiler + + self.seam_mask = self.prepare_seam_mask() + + self.blur = GaussianBlur2d(sigma=filter_sigma) + + def prepare_seam_mask(self) -> torch.Tensor: + """Prepare boolean mask of regions around the part where tiles seam in ensemble. + + Returns: + torch.Tensor: Representation of boolean mask where filtered seams should be used. + """ + img_h, img_w = self.tiler.image_size + stride_h, stride_w = self.tiler.stride_h, self.tiler.stride_w + + mask = torch.zeros(img_h, img_w, dtype=torch.bool) + + # prepare mask strip on vertical seams + curr_w = stride_w + while curr_w < img_w: + start_i = curr_w - self.width_offset + end_i = curr_w + self.width_offset + mask[:, start_i:end_i] = 1 + curr_w += stride_w + + # prepare mask strip on horizontal seams + curr_h = stride_h + while curr_h < img_h: + start_i = curr_h - self.height_offset + end_i = curr_h + self.height_offset + mask[start_i:end_i, :] = True + curr_h += stride_h + + return mask + + def run(self, task_id: int | None = None) -> list[Any]: + """Run smoothing job. + + Args: + task_id: Not used in this case. + + Returns: + list[Any]: List of predictions. + """ + del task_id # not needed here + + logger.info("Starting seam smoothing.") + + for data in tqdm(self.predictions, desc="Seam smoothing"): + # move to specified accelerator for faster execution + data["anomaly_maps"] = data["anomaly_maps"].to(self.accelerator) + # smooth the anomaly map and take only region around seams delimited by seam_mask + smoothed = self.blur(data["anomaly_maps"]) + data["anomaly_maps"][:, :, self.seam_mask] = smoothed[:, :, self.seam_mask] + data["anomaly_maps"] = data["anomaly_maps"].cpu() + + return self.predictions + + @staticmethod + def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: + """Nothing to collect in this job. + + Returns: + list[Any]: List of predictions. + """ + # take the first element as result is list of lists here + return results[0] + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """Nothing to save in this job.""" + + +class SmoothingJobGenerator(JobGenerator): + """Generate SmoothingJob.""" + + def __init__(self, accelerator: str, tiling_args: dict, data_args: dict) -> None: + super().__init__() + self.accelerator = accelerator + self.tiling_args = tiling_args + self.data_args = data_args + + @property + def job_class(self) -> type: + """Return the job class.""" + return SmoothingJob + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: list[Any] | None = None, + ) -> Generator[SmoothingJob, None, None]: + """Return a generator producing a single seam smoothing job. + + Args: + args: Tiled ensemble pipeline args. + prev_stage_result (list[Any]): Ensemble predictions from previous step. + + Returns: + Generator[SmoothingJob, None, None]: SmoothingJob generator + """ + if args is None: + msg = "SeamSmoothing job requires config args" + raise ValueError(msg) + # tiler is used to determine where seams appear + tiler = get_ensemble_tiler(self.tiling_args, self.data_args) + if prev_stage_result is not None: + yield SmoothingJob( + accelerator=self.accelerator, + predictions=prev_stage_result, + width_factor=args["width"], + filter_sigma=args["sigma"], + tiler=tiler, + ) + else: + msg = "Join smoothing job requires tile level predictions from previous step." + raise ValueError(msg) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/stats_calculation.py b/src/anomalib/pipelines/tiled_ensemble/components/stats_calculation.py new file mode 100644 index 0000000000..6c48b639f7 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/stats_calculation.py @@ -0,0 +1,180 @@ +"""Tiled ensemble - post-processing statistics calculation job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import logging +from collections.abc import Generator +from pathlib import Path +from typing import Any + +import torch +from omegaconf import DictConfig, ListConfig +from torchmetrics import MetricCollection +from tqdm import tqdm + +from anomalib.callbacks.thresholding import _ThresholdCallback +from anomalib.metrics import MinMax +from anomalib.metrics.threshold import Threshold +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS + +logger = logging.getLogger(__name__) + + +class StatisticsJob(Job): + """Job for calculating min, max and threshold statistics for post-processing. + + Args: + predictions (list[Any]): List of image-level predictions. + root_dir (Path): Root directory to save checkpoints, stats and images. + """ + + name = "Stats" + + def __init__( + self, + predictions: list[Any] | None, + root_dir: Path, + image_threshold: Threshold, + pixel_threshold: Threshold, + ) -> None: + super().__init__() + self.predictions = predictions + self.root_dir = root_dir + self.image_threshold = image_threshold + self.pixel_threshold = pixel_threshold + + def run(self, task_id: int | None = None) -> dict: + """Run job that calculates statistics needed in post-processing steps. + + Args: + task_id: Not used in this case + + Returns: + dict: Statistics dict with min, max and threshold values. + """ + del task_id # not needed here + + minmax = MetricCollection( + { + "anomaly_maps": MinMax().cpu(), + "pred_scores": MinMax().cpu(), + }, + ) + pixel_update_called = False + + logger.info("Starting post-processing statistics calculation.") + + for data in tqdm(self.predictions, desc="Stats calculation"): + # update minmax + if "anomaly_maps" in data: + minmax["anomaly_maps"](data["anomaly_maps"]) + if "pred_scores" in data: + minmax["pred_scores"](data["pred_scores"]) + + # update thresholds + self.image_threshold.update(data["pred_scores"], data["label"].int()) + if "mask" in data and "anomaly_maps" in data: + self.pixel_threshold.update(torch.squeeze(data["anomaly_maps"]), torch.squeeze(data["mask"].int())) + pixel_update_called = True + + self.image_threshold.compute() + if pixel_update_called: + self.pixel_threshold.compute() + else: + self.pixel_threshold.value = self.image_threshold.value + + min_max_vals = {} + for pred_name, pred_metric in minmax.items(): + min_max_vals[pred_name] = { + "min": pred_metric.min.item(), + "max": pred_metric.max.item(), + } + + # return stats with save path that is later used to save statistics. + return { + "minmax": min_max_vals, + "image_threshold": self.image_threshold.value.item(), + "pixel_threshold": self.pixel_threshold.value.item(), + "save_path": (self.root_dir / "weights" / "lightning" / "stats.json"), + } + + @staticmethod + def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: + """Nothing to collect in this job. + + Returns: + dict: statistics dictionary. + """ + # take the first element as result is list of lists here + return results[0] + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """Save statistics to file system.""" + # get and remove path from stats dict + stats_path: Path = results.pop("save_path") + stats_path.parent.mkdir(parents=True, exist_ok=True) + + # save statistics next to weights + with stats_path.open("w", encoding="utf-8") as stats_file: + json.dump(results, stats_file, ensure_ascii=False, indent=4) + + +class StatisticsJobGenerator(JobGenerator): + """Generate StatisticsJob. + + Args: + root_dir (Path): Root directory where statistics file will be saved (in weights folder). + """ + + def __init__( + self, + root_dir: Path, + thresholding_method: DictConfig | str | ListConfig | list[dict[str, str | float]], + ) -> None: + self.root_dir = root_dir + self.threshold = thresholding_method + + @property + def job_class(self) -> type: + """Return the job class.""" + return StatisticsJob + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: list[Any] | None = None, + ) -> Generator[StatisticsJob, None, None]: + """Return a generator producing a single stats calculating job. + + Args: + args: Not used here. + prev_stage_result (list[Any]): Ensemble predictions from previous step. + + Returns: + Generator[StatisticsJob, None, None]: StatisticsJob generator. + """ + del args # not needed here + + # get threshold class based config + if isinstance(self.threshold, str | DictConfig): + # single method provided + image_threshold = _ThresholdCallback._get_threshold_from_config(self.threshold) # noqa: SLF001 + pixel_threshold = image_threshold.clone() + elif isinstance(self.threshold, ListConfig | list): + # image and pixel method specified separately + image_threshold = _ThresholdCallback._get_threshold_from_config(self.threshold[0]) # noqa: SLF001 + pixel_threshold = _ThresholdCallback._get_threshold_from_config(self.threshold[1]) # noqa: SLF001 + else: + msg = f"Invalid threshold config {self.threshold}" + raise TypeError(msg) + + yield StatisticsJob( + predictions=prev_stage_result, + root_dir=self.root_dir, + image_threshold=image_threshold, + pixel_threshold=pixel_threshold, + ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/thresholding.py b/src/anomalib/pipelines/tiled_ensemble/components/thresholding.py new file mode 100644 index 0000000000..733c3d99db --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/thresholding.py @@ -0,0 +1,114 @@ +"""Tiled ensemble - thresholding job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from collections.abc import Generator +from pathlib import Path +from typing import Any + +from tqdm import tqdm + +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS + +from .utils import NormalizationStage +from .utils.helper_functions import get_threshold_values + +logger = logging.getLogger(__name__) + + +class ThresholdingJob(Job): + """Job used to threshold predictions, producing labels from scores. + + Args: + predictions (list[Any]): List of predictions. + image_threshold (float): Threshold used for image-level thresholding. + pixel_threshold (float): Threshold used for pixel-level thresholding. + """ + + name = "Threshold" + + def __init__(self, predictions: list[Any] | None, image_threshold: float, pixel_threshold: float) -> None: + super().__init__() + self.predictions = predictions + self.image_threshold = image_threshold + self.pixel_threshold = pixel_threshold + + def run(self, task_id: int | None = None) -> list[Any] | None: + """Run job that produces prediction labels from scores. + + Args: + task_id: Not used in this case. + + Returns: + list[Any]: List of thresholded predictions. + """ + del task_id # not needed here + + logger.info("Starting thresholding.") + + for data in tqdm(self.predictions, desc="Thresholding"): + if "pred_scores" in data: + data["pred_labels"] = data["pred_scores"] >= self.image_threshold + if "anomaly_maps" in data: + data["pred_masks"] = data["anomaly_maps"] >= self.pixel_threshold + + return self.predictions + + @staticmethod + def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: + """Nothing to collect in this job. + + Returns: + list[Any]: List of predictions. + """ + # take the first element as result is list of lists here + return results[0] + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """Nothing is saved in this job.""" + + +class ThresholdingJobGenerator(JobGenerator): + """Generate ThresholdingJob. + + Args: + root_dir (Path): Root directory containing post-processing stats. + """ + + def __init__(self, root_dir: Path, normalization_stage: NormalizationStage) -> None: + self.root_dir = root_dir + self.normalization_stage = normalization_stage + + @property + def job_class(self) -> type: + """Return the job class.""" + return ThresholdingJob + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: list[Any] | None = None, + ) -> Generator[ThresholdingJob, None, None]: + """Return a generator producing a single thresholding job. + + Args: + args: ensemble run args. + prev_stage_result (list[Any]): Ensemble predictions from previous step. + + Returns: + Generator[ThresholdingJob, None, None]: ThresholdingJob generator. + """ + del args # args not used here + + # get threshold values base on normalization + image_threshold, pixel_threshold = get_threshold_values(self.normalization_stage, self.root_dir) + + yield ThresholdingJob( + predictions=prev_stage_result, + image_threshold=image_threshold, + pixel_threshold=pixel_threshold, + ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/__init__.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/__init__.py new file mode 100644 index 0000000000..a010208908 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/utils/__init__.py @@ -0,0 +1,44 @@ +"""Tiled ensemble utils and helper functions.""" + +from enum import Enum + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +class NormalizationStage(str, Enum): + """Enum signaling at which stage the normalization is done. + + In case of tile, tiles are normalized for each tile position separately. + In case of image, normalization is done at the end when images are joined back together. + In case of none, output is not normalized. + """ + + TILE = "tile" + IMAGE = "image" + NONE = "none" + + +class ThresholdStage(str, Enum): + """Enum signaling at which stage the thresholding is applied. + + In case of tile, thresholding is applied for each tile location separately. + In case of image, thresholding is applied at the end when images are joined back together. + """ + + TILE = "tile" + IMAGE = "image" + + +class PredictData(Enum): + """Enum indicating which data to use in prediction job.""" + + VAL = "val" + TEST = "test" + + +__all__ = [ + "NormalizationStage", + "ThresholdStage", + "PredictData", +] diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_engine.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_engine.py new file mode 100644 index 0000000000..449109ed3f --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_engine.py @@ -0,0 +1,92 @@ +"""Implements custom Anomalib engine for tiled ensemble training.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from pathlib import Path + +from lightning.pytorch.callbacks import Callback, RichModelSummary + +from anomalib.callbacks import ModelCheckpoint, TimerCallback +from anomalib.callbacks.metrics import _MetricsCallback +from anomalib.callbacks.normalization import get_normalization_callback +from anomalib.callbacks.post_processor import _PostProcessorCallback +from anomalib.callbacks.thresholding import _ThresholdCallback +from anomalib.engine import Engine +from anomalib.models import AnomalyModule +from anomalib.utils.path import create_versioned_dir + +logger = logging.getLogger(__name__) + + +class TiledEnsembleEngine(Engine): + """Engine used for training and evaluating tiled ensemble. + + Most of the logic stays the same, but workspace creation and callbacks are adjusted for ensemble. + + Args: + tile_index (tuple[int, int]): index of tile that this engine instance processes. + **kwargs: Engine arguments. + """ + + def __init__(self, tile_index: tuple[int, int], **kwargs) -> None: + self.tile_index = tile_index + super().__init__(**kwargs) + + def _setup_workspace(self, *args, **kwargs) -> None: + """Skip since in case of tiled ensemble, workspace is only setup once at the beginning of training.""" + + @staticmethod + def setup_ensemble_workspace(args: dict, versioned_dir: bool = True) -> Path: + """Set up the workspace at the beginning of tiled ensemble training. + + Args: + args (dict): Tiled ensemble config dict. + versioned_dir (bool, optional): Whether to create a versioned directory. + Defaults to ``True``. + + Returns: + Path: path to new workspace root dir + """ + model_name = args["TrainModels"]["model"]["class_path"].split(".")[-1] + dataset_name = args["data"]["class_path"].split(".")[-1] + category = args["data"]["init_args"]["category"] + root_dir = Path(args["default_root_dir"]) / model_name / dataset_name / category + return create_versioned_dir(root_dir) if versioned_dir else root_dir / "latest" + + def _setup_anomalib_callbacks(self, model: AnomalyModule) -> None: + """Modified method to enable individual model training. It's called when Trainer is being set up.""" + del model # not used here + + _callbacks: list[Callback] = [RichModelSummary()] + + # Add ModelCheckpoint if it is not in the callbacks list. + has_checkpoint_callback = any(isinstance(c, ModelCheckpoint) for c in self._cache.args["callbacks"]) + if not has_checkpoint_callback: + tile_i, tile_j = self.tile_index + _callbacks.append( + ModelCheckpoint( + dirpath=self._cache.args["default_root_dir"] / "weights" / "lightning", + filename=f"model{tile_i}_{tile_j}", + auto_insert_metric_name=False, + ), + ) + + # Add the post-processor callbacks. Used for thresholding and label calculation. + _callbacks.append(_PostProcessorCallback()) + + # Add the normalization callback if tile level normalization was specified (is not none). + normalization_callback = get_normalization_callback(self.normalization) + if normalization_callback is not None: + _callbacks.append(normalization_callback) + + # Add the thresholding and metrics callbacks in all cases, + # because individual model might still need this for early stop. + _callbacks.append(_ThresholdCallback(self.threshold)) + _callbacks.append(_MetricsCallback(self.task, self.image_metric_names, self.pixel_metric_names)) + + _callbacks.append(TimerCallback()) + + # Combine the callbacks, and update the trainer callbacks. + self._cache.args["callbacks"] = _callbacks + self._cache.args["callbacks"] diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_tiling.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_tiling.py new file mode 100644 index 0000000000..db56f88b47 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_tiling.py @@ -0,0 +1,147 @@ +"""Tiler used with ensemble of models.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from collections.abc import Sequence +from typing import Any + +from torch import Tensor + +from anomalib.data.base.datamodule import collate_fn +from anomalib.data.utils.tiler import Tiler, compute_new_image_size + + +class EnsembleTiler(Tiler): + """Tile Image into (non)overlapping Patches which are then used for ensemble training. + + Args: + tile_size (int | Sequence): Tile dimension for each patch. + stride (int | Sequence): Stride length between patches. + image_size (int | Sequence): Size of input image that will be tiled. + + Examples: + >>> import torch + >>> tiler = EnsembleTiler(tile_size=256, stride=128, image_size=512) + >>> + >>> # random images, shape: [B, C, H, W] + >>> images = torch.rand(32, 5, 512, 512) + >>> # once tiled, the shape is [tile_count_H, tile_count_W, B, C, tile_H, tile_W] + >>> tiled = tiler.tile(images) + >>> tiled.shape + torch.Size([3, 3, 32, 5, 256, 256]) + + >>> # assemble the tiles back together + >>> untiled = tiler.untile(tiled) + >>> untiled.shape + torch.Size([32, 5, 512, 512]) + """ + + def __init__(self, tile_size: int | Sequence, stride: int | Sequence, image_size: int | Sequence) -> None: + super().__init__( + tile_size=tile_size, + stride=stride, + ) + + # calculate final image size + self.image_size = self.validate_size_type(image_size) + self.input_h, self.input_w = self.image_size + self.resized_h, self.resized_w = compute_new_image_size( + image_size=(self.input_h, self.input_w), + tile_size=(self.tile_size_h, self.tile_size_w), + stride=(self.stride_h, self.stride_w), + ) + + # get number of patches in both dimensions + self.num_patches_h = int((self.resized_h - self.tile_size_h) / self.stride_h) + 1 + self.num_patches_w = int((self.resized_w - self.tile_size_w) / self.stride_w) + 1 + self.num_tiles = self.num_patches_h * self.num_patches_w + + def tile(self, image: Tensor, use_random_tiling: bool = False) -> Tensor: + """Tiles an input image to either overlapping or non-overlapping patches. + + Args: + image (Tensor): Input images. + use_random_tiling (bool): Random tiling, which is part of original tiler but is unused here. + + Returns: + Tensor: Tiles generated from images. + Returned shape: [num_h, num_w, batch, channel, tile_height, tile_width]. + """ + # tiles are returned in order [tile_count * batch, channels, tile_height, tile_width] + combined_tiles = super().tile(image, use_random_tiling) + + # rearrange to [num_h, num_w, batch, channel, tile_height, tile_width] + tiles = combined_tiles.contiguous().view( + self.batch_size, + self.num_patches_h, + self.num_patches_w, + self.num_channels, + self.tile_size_h, + self.tile_size_w, + ) + tiles = tiles.permute(1, 2, 0, 3, 4, 5) + + return tiles # noqa: RET504 + + def untile(self, tiles: Tensor) -> Tensor: + """Reassemble the tiled tensor into image level representation. + + Args: + tiles (Tensor): Tiles in shape: [num_h, num_w, batch, channel, tile_height, tile_width]. + + Returns: + Tensor: Image constructed from input tiles. Shape: [B, C, H, W]. + """ + # tiles have shape [num_h, num_w, batch, channel, tile_height, tile_width] + _, _, batch, channels, tile_size_h, tile_size_w = tiles.shape + + # set tilers batch size as it might have been changed by previous tiling + self.batch_size = batch + + # rearrange the tiles in order [tile_count * batch, channels, tile_height, tile_width] + # the required shape for untiling + tiles = tiles.permute(2, 0, 1, 3, 4, 5) + tiles = tiles.contiguous().view(-1, channels, tile_size_h, tile_size_w) + + untiled = super().untile(tiles) + + return untiled # noqa: RET504 + + +class TileCollater: + """Class serving as collate function to perform tiling on batch of images from Dataloader. + + Args: + tiler (EnsembleTiler): Tiler used to split the images to tiles. + tile_index (tuple[int, int]): Index of tile we want to return. + """ + + def __init__(self, tiler: EnsembleTiler, tile_index: tuple[int, int]) -> None: + self.tiler = tiler + self.tile_index = tile_index + + def __call__(self, batch: list) -> dict[str, Any]: + """Collate batch and tile images + masks from batch. + + Args: + batch (list): Batch of elements from data, also including images. + + Returns: + dict[str, Any]: Collated batch dictionary with tiled images. + """ + # use default collate + coll_batch = collate_fn(batch) + + tiled_images = self.tiler.tile(coll_batch["image"]) + # return only tiles at given index + coll_batch["image"] = tiled_images[self.tile_index] + + if "mask" in coll_batch: + # insert channel (as mask has just one) + tiled_masks = self.tiler.tile(coll_batch["mask"].unsqueeze(1)) + + # return only tiled at given index, squeeze to remove previously added channel + coll_batch["mask"] = tiled_masks[self.tile_index].squeeze(1) + + return coll_batch diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/helper_functions.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/helper_functions.py new file mode 100644 index 0000000000..bc1e5f4f55 --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/utils/helper_functions.py @@ -0,0 +1,179 @@ +"""Helper functions for the tiled ensemble training.""" + +import json + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +from pathlib import Path + +from jsonargparse import ArgumentParser, Namespace +from lightning import Trainer + +from anomalib.data import AnomalibDataModule, get_datamodule +from anomalib.models import AnomalyModule, get_model +from anomalib.utils.normalization import NormalizationMethod + +from . import NormalizationStage +from .ensemble_engine import TiledEnsembleEngine +from .ensemble_tiling import EnsembleTiler, TileCollater + + +def get_ensemble_datamodule(data_args: dict, tiler: EnsembleTiler, tile_index: tuple[int, int]) -> AnomalibDataModule: + """Get Anomaly Datamodule adjusted for use in ensemble. + + Datamodule collate function gets replaced by TileCollater in order to tile all images before they are passed on. + + Args: + data_args: tiled ensemble data configuration. + tiler (EnsembleTiler): Tiler used to split the images to tiles for use in ensemble. + tile_index (tuple[int, int]): Index of the tile in the split image. + + Returns: + AnomalibDataModule: Anomalib Lightning DataModule + """ + datamodule = get_datamodule(data_args) + # set custom collate function that does the tiling + datamodule.collate_fn = TileCollater(tiler, tile_index) + datamodule.setup() + + return datamodule + + +def get_ensemble_model(model_args: dict, tiler: EnsembleTiler) -> AnomalyModule: + """Get model prepared for ensemble training. + + Args: + model_args: tiled ensemble model configuration. + tiler (EnsembleTiler): tiler used to get tile dimensions. + + Returns: + AnomalyModule: model with input_size setup + """ + model = get_model(model_args) + # set model input size match tile size + model.set_input_size((tiler.tile_size_h, tiler.tile_size_w)) + + return model + + +def get_ensemble_tiler(tiling_args: dict, data_args: dict) -> EnsembleTiler: + """Get tiler used for image tiling and to obtain tile dimensions. + + Args: + tiling_args: tiled ensemble tiling configuration. + data_args: tiled ensemble data configuration. + + Returns: + EnsembleTiler: tiler object. + """ + tiler = EnsembleTiler( + tile_size=tiling_args["tile_size"], + stride=tiling_args["stride"], + image_size=data_args["init_args"]["image_size"], + ) + + return tiler # noqa: RET504 + + +def parse_trainer_kwargs(trainer_args: dict | None) -> Namespace | dict: + """Parse trainer args and instantiate all needed elements. + + Transforms config into kwargs ready for Trainer, including instantiation of callback etc. + + Args: + trainer_args (dict): Trainer args dictionary. + + Returns: + dict: parsed kwargs with instantiated elements. + """ + if not trainer_args: + return {} + + # try to get trainer args, if not present return empty + parser = ArgumentParser() + + parser.add_class_arguments(Trainer, fail_untyped=False, instantiate=False, sub_configs=True) + config = parser.parse_object(trainer_args) + objects = parser.instantiate_classes(config) + + return objects # noqa: RET504 + + +def get_ensemble_engine( + tile_index: tuple[int, int], + accelerator: str, + devices: list[int] | str | int, + root_dir: Path, + normalization_stage: str, + metrics: dict | None = None, + trainer_args: dict | None = None, +) -> TiledEnsembleEngine: + """Prepare engine for ensemble training or prediction. + + This method makes sure correct normalization is used, prepares metrics and additional trainer kwargs.. + + Args: + tile_index (tuple[int, int]): Index of tile that this model processes. + accelerator (str): Accelerator (device) to use. + devices (list[int] | str | int): device IDs used for training. + root_dir (Path): Root directory to save checkpoints, stats and images. + normalization_stage (str): Config dictionary for ensemble post-processing. + metrics (dict): Dict containing pixel and image metrics names. + trainer_args (dict): Trainer args dictionary. Empty dict if not present. + + Returns: + TiledEnsembleEngine: set up engine for ensemble training/prediction. + """ + # if we want tile level normalization we set it here, otherwise it's done later on joined images + if normalization_stage == NormalizationStage.TILE: + normalization = NormalizationMethod.MIN_MAX + else: + normalization = NormalizationMethod.NONE + + # parse additional trainer args and callbacks if present in config + trainer_kwargs = parse_trainer_kwargs(trainer_args) + # remove keys that we already have + trainer_kwargs.pop("accelerator", None) + trainer_kwargs.pop("default_root_dir", None) + trainer_kwargs.pop("devices", None) + + # create engine for specific tile location + engine = TiledEnsembleEngine( + tile_index=tile_index, + normalization=normalization, + accelerator=accelerator, + devices=devices, + default_root_dir=root_dir, + image_metrics=metrics.get("image", None) if metrics else None, + pixel_metrics=metrics.get("pixel", None) if metrics else None, + **trainer_kwargs, + ) + + return engine # noqa: RET504 + + +def get_threshold_values(normalization_stage: NormalizationStage, root_dir: Path) -> tuple[float, float]: + """Get threshold values for image and pixel level predictions. + + If normalization is not used, get values based on statistics obtained from validation set. + If normalization is used, both image and pixel threshold are 0.5 + + Args: + normalization_stage (NormalizationStage): ensemble run args, used to get normalization stage. + root_dir (Path): path to run root where stats file is saved. + + Returns: + tuple[float, float]: image and pixel threshold. + """ + if normalization_stage == NormalizationStage.NONE: + stats_path = root_dir / "weights" / "lightning" / "stats.json" + with stats_path.open("r") as f: + stats = json.load(f) + image_threshold = stats["image_threshold"] + pixel_threshold = stats["pixel_threshold"] + else: + # normalization transforms the scores so that threshold is at 0.5 + image_threshold = 0.5 + pixel_threshold = 0.5 + + return image_threshold, pixel_threshold diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_data.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_data.py new file mode 100644 index 0000000000..4fe45e9c4a --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_data.py @@ -0,0 +1,45 @@ +"""Classes used to store ensemble predictions.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from torch import Tensor + + +class EnsemblePredictions: + """Basic implementation of EnsemblePredictionData that keeps all predictions in main memory.""" + + def __init__(self) -> None: + super().__init__() + self.all_data: dict[tuple[int, int], list] = {} + + def add_tile_prediction(self, tile_index: tuple[int, int], tile_prediction: list[dict[str, Tensor | list]]) -> None: + """Add tile prediction data at provided index to class dictionary in main memory. + + Args: + tile_index (tuple[int, int]): Index of tile that we are adding in form (row, column). + tile_prediction (list[dict[str, Tensor | list]]): + List of batches containing all predicted data for current tile position. + + """ + self.num_batches = len(tile_prediction) + + self.all_data[tile_index] = tile_prediction + + def get_batch_tiles(self, batch_index: int) -> dict[tuple[int, int], dict]: + """Get all tiles of current batch from class dictionary. + + Called by merging mechanism. + + Args: + batch_index (int): Index of current batch of tiles to be returned. + + Returns: + dict[tuple[int, int], dict]: Dictionary mapping tile index to predicted data, for provided batch index. + """ + batch_data = {} + + for index, batches in self.all_data.items(): + batch_data[index] = batches[batch_index] + + return batch_data diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_merging.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_merging.py new file mode 100644 index 0000000000..7337cc4ffe --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_merging.py @@ -0,0 +1,167 @@ +"""Class used as mechanism to merge ensemble predictions from each tile into complete whole-image representation.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import torch +from torch import Tensor + +from .ensemble_tiling import EnsembleTiler +from .prediction_data import EnsemblePredictions + + +class PredictionMergingMechanism: + """Class used for merging the data predicted by each separate model of tiled ensemble. + + Tiles are stacked in one tensor and untiled using Ensemble Tiler. + Boxes from tiles are either stacked or generated anew from anomaly map. + Labels are combined with OR operator, meaning one anomalous tile -> anomalous image. + Scores are averaged across all tiles. + + Args: + ensemble_predictions (EnsemblePredictions): Object containing predictions on tile level. + tiler (EnsembleTiler): Tiler used to transform tiles back to image level representation. + + Example: + >>> from anomalib.pipelines.tiled_ensemble.components.utils.ensemble_tiling import EnsembleTiler + >>> from anomalib.pipelines.tiled_ensemble.components.utils.prediction_data import EnsemblePredictions + >>> + >>> tiler = EnsembleTiler(tile_size=256, stride=128, image_size=512) + >>> data = EnsemblePredictions() + >>> merger = PredictionMergingMechanism(data, tiler) + >>> + >>> # we can then start merging procedure for each batch + >>> merger.merge_tile_predictions(0) + """ + + def __init__(self, ensemble_predictions: EnsemblePredictions, tiler: EnsembleTiler) -> None: + assert ensemble_predictions.num_batches > 0, "There should be at least one batch for each tile prediction." + assert (0, 0) in ensemble_predictions.get_batch_tiles( + 0, + ), "Tile prediction dictionary should always have at least one tile" + + self.ensemble_predictions = ensemble_predictions + self.num_batches = self.ensemble_predictions.num_batches + + self.tiler = tiler + + def merge_tiles(self, batch_data: dict, tile_key: str) -> Tensor: + """Merge tiles back into one tensor and perform untiling with tiler. + + Args: + batch_data (dict): Dictionary containing all tile predictions of current batch. + tile_key (str): Key used in prediction dictionary for tiles that we want to merge. + + Returns: + Tensor: Tensor of tiles in original (stitched) shape. + """ + # batch of tiles with index (0, 0) always exists, so we use it to get some basic information + first_tiles = batch_data[0, 0][tile_key] + batch_size = first_tiles.shape[0] + device = first_tiles.device + + if tile_key == "mask": + # in case of ground truth masks, we don't have channels + merged_size = [ + self.tiler.num_patches_h, + self.tiler.num_patches_w, + batch_size, + self.tiler.tile_size_h, + self.tiler.tile_size_w, + ] + else: + # all tiles beside masks also have channels + num_channels = first_tiles.shape[1] + merged_size = [ + self.tiler.num_patches_h, + self.tiler.num_patches_w, + batch_size, + int(num_channels), + self.tiler.tile_size_h, + self.tiler.tile_size_w, + ] + + # create new empty tensor for merged tiles + merged_masks = torch.zeros(size=merged_size, device=device) + + # insert tile into merged tensor at right locations + for (tile_i, tile_j), tile_data in batch_data.items(): + merged_masks[tile_i, tile_j, ...] = tile_data[tile_key] + + if tile_key == "mask": + # add channel as tiler needs it + merged_masks = merged_masks.unsqueeze(3) + + # stitch tiles back into whole, output is [B, C, H, W] + merged_output = self.tiler.untile(merged_masks) + + if tile_key == "mask": + # remove previously added channels + merged_output = merged_output.squeeze(1) + + return merged_output + + def merge_labels_and_scores(self, batch_data: dict) -> dict[str, Tensor]: + """Join scores and their corresponding label predictions from all tiles for each image. + + Label merging is done by rule where one anomalous tile in image results in whole image being anomalous. + Scores are averaged over tiles. + + Args: + batch_data (dict): Dictionary containing all tile predictions of current batch. + + Returns: + dict[str, Tensor]: Dictionary with "pred_labels" and "pred_scores" + """ + # create accumulator with same shape as original + labels = torch.zeros(batch_data[0, 0]["pred_labels"].shape, dtype=torch.bool) + scores = torch.zeros(batch_data[0, 0]["pred_scores"].shape) + + for curr_tile_data in batch_data.values(): + curr_labels = curr_tile_data["pred_labels"] + curr_scores = curr_tile_data["pred_scores"] + + labels = labels.logical_or(curr_labels) + scores += curr_scores + + scores /= self.tiler.num_tiles + + return {"pred_labels": labels, "pred_scores": scores} + + def merge_tile_predictions(self, batch_index: int) -> dict[str, Tensor | list]: + """Join predictions from ensemble into whole image level representation for batch at index batch_index. + + Args: + batch_index (int): Index of current batch. + + Returns: + dict[str, Tensor | list]: List of merged predictions for specified batch. + """ + current_batch_data = self.ensemble_predictions.get_batch_tiles(batch_index) + + # take first tile as base prediction, keep items that are the same over all tiles: + # image_path, label, mask_path + merged_predictions = { + "image_path": current_batch_data[0, 0]["image_path"], + "label": current_batch_data[0, 0]["label"], + } + if "mask_path" in current_batch_data[0, 0]: + merged_predictions["mask_path"] = current_batch_data[0, 0]["mask_path"] + if "boxes" in current_batch_data[0, 0]: + merged_predictions["boxes"] = current_batch_data[0, 0]["boxes"] + + tiled_data = ["image", "mask"] + if "anomaly_maps" in current_batch_data[0, 0]: + tiled_data += ["anomaly_maps", "pred_masks"] + + # merge all tiled data + for t_key in tiled_data: + if t_key in current_batch_data[0, 0]: + merged_predictions[t_key] = self.merge_tiles(current_batch_data, t_key) + + # label and score merging + merged_scores_and_labels = self.merge_labels_and_scores(current_batch_data) + merged_predictions["pred_labels"] = merged_scores_and_labels["pred_labels"] + merged_predictions["pred_scores"] = merged_scores_and_labels["pred_scores"] + + return merged_predictions diff --git a/src/anomalib/pipelines/tiled_ensemble/components/visualization.py b/src/anomalib/pipelines/tiled_ensemble/components/visualization.py new file mode 100644 index 0000000000..1298ece89f --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/components/visualization.py @@ -0,0 +1,125 @@ +"""Tiled ensemble - visualization job.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from collections.abc import Generator +from pathlib import Path +from typing import Any + +from tqdm import tqdm + +from anomalib import TaskType +from anomalib.data.utils.image import save_image +from anomalib.pipelines.components import Job, JobGenerator +from anomalib.pipelines.tiled_ensemble.components.utils import NormalizationStage +from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS +from anomalib.utils.visualization import ImageVisualizer + +logger = logging.getLogger(__name__) + + +class VisualizationJob(Job): + """Job for visualization of predictions. + + Args: + predictions (list[Any]): list of image-level predictions. + root_dir (Path): Root directory to save checkpoints, stats and images. + task (TaskType): type of task the predictions represent. + normalize (bool): if predictions need to be normalized + """ + + name = "Visualize" + + def __init__(self, predictions: list[Any], root_dir: Path, task: TaskType, normalize: bool) -> None: + super().__init__() + self.predictions = predictions + self.root_dir = root_dir / "images" + self.task = task + self.normalize = normalize + + def run(self, task_id: int | None = None) -> list[Any]: + """Run job that visualizes all prediction data. + + Args: + task_id: Not used in this case. + + Returns: + list[Any]: Unchanged predictions. + """ + del task_id # not needed here + + visualizer = ImageVisualizer(task=self.task, normalize=self.normalize) + + logger.info("Starting visualization.") + + for data in tqdm(self.predictions, desc="Visualizing"): + for result in visualizer(outputs=data): + # Finally image path is root/defect_type/image_name + if result.file_name is not None: + file_path = Path(result.file_name) + else: + msg = "file_path should exist in returned Visualizer." + raise ValueError(msg) + + root = self.root_dir / file_path.parent.name + filename = file_path.name + + save_image(image=result.image, root=root, filename=filename) + + return self.predictions + + @staticmethod + def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: + """Nothing to collect in this job. + + Returns: + list[Any]: Unchanged list of predictions. + """ + # take the first element as result is list of lists here + return results[0] + + @staticmethod + def save(results: GATHERED_RESULTS) -> None: + """This job doesn't save anything.""" + + +class VisualizationJobGenerator(JobGenerator): + """Generate VisualizationJob. + + Args: + root_dir (Path): Root directory where images will be saved (root/images). + """ + + def __init__(self, root_dir: Path, task: TaskType, normalization_stage: NormalizationStage) -> None: + self.root_dir = root_dir + self.task = task + self.normalize = normalization_stage == NormalizationStage.NONE + + @property + def job_class(self) -> type: + """Return the job class.""" + return VisualizationJob + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: list[Any] | None = None, + ) -> Generator[VisualizationJob, None, None]: + """Return a generator producing a single visualization job. + + Args: + args: Ensemble run args. + prev_stage_result (list[Any]): Ensemble predictions from previous step. + + Returns: + Generator[VisualizationJob, None, None]: VisualizationJob generator + """ + del args # args not used here + + if prev_stage_result is not None: + yield VisualizationJob(prev_stage_result, self.root_dir, self.task, self.normalize) + else: + msg = "Visualization job requires tile level predictions from previous step." + raise ValueError(msg) diff --git a/src/anomalib/pipelines/tiled_ensemble/test_pipeline.py b/src/anomalib/pipelines/tiled_ensemble/test_pipeline.py new file mode 100644 index 0000000000..7fdd61e9ff --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/test_pipeline.py @@ -0,0 +1,124 @@ +"""Tiled ensemble test pipeline.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from pathlib import Path + +import torch + +from anomalib.data.utils import TestSplitMode +from anomalib.pipelines.components.base import Pipeline, Runner +from anomalib.pipelines.components.runners import ParallelRunner, SerialRunner +from anomalib.pipelines.tiled_ensemble.components import ( + MergeJobGenerator, + MetricsCalculationJobGenerator, + NormalizationJobGenerator, + PredictJobGenerator, + SmoothingJobGenerator, + ThresholdingJobGenerator, + VisualizationJobGenerator, +) +from anomalib.pipelines.tiled_ensemble.components.utils import NormalizationStage, PredictData, ThresholdStage + +logger = logging.getLogger(__name__) + + +class EvalTiledEnsemble(Pipeline): + """Tiled ensemble evaluation pipeline. + + Args: + root_dir (Path): Path to root dir of run that contains checkpoints. + """ + + def __init__(self, root_dir: Path) -> None: + self.root_dir = Path(root_dir) + + def _setup_runners(self, args: dict) -> list[Runner]: + """Set up the runners for the pipeline. + + This pipeline consists of jobs used to test/evaluate tiled ensemble: + Prediction on test data > merging of predictions > (optional) seam smoothing + > (optional) Normalization > (optional) Thresholding + > Visualisation of predictions > Metrics calculation. + + Returns: + list[Runner]: List of runners executing tiled ensemble testing jobs. + """ + runners: list[Runner] = [] + + if args["data"]["init_args"]["test_split_mode"] == TestSplitMode.NONE: + logger.info("Test split mode set to `none`, skipping test phase.") + return runners + + seed = args["seed"] + accelerator = args["accelerator"] + tiling_args = args["tiling"] + data_args = args["data"] + normalization_stage = NormalizationStage(args["normalization_stage"]) + threshold_stage = ThresholdStage(args["thresholding"]["stage"]) + model_args = args["TrainModels"]["model"] + task = args["data"]["init_args"]["task"] + metrics = args["TrainModels"]["metrics"] + + predict_job_generator = PredictJobGenerator( + PredictData.TEST, + seed=seed, + accelerator=accelerator, + root_dir=self.root_dir, + tiling_args=tiling_args, + data_args=data_args, + model_args=model_args, + normalization_stage=normalization_stage, + ) + # 1. predict using test data + if accelerator == "cuda": + runners.append( + ParallelRunner( + predict_job_generator, + n_jobs=torch.cuda.device_count(), + ), + ) + else: + runners.append( + SerialRunner( + predict_job_generator, + ), + ) + # 2. merge predictions + runners.append(SerialRunner(MergeJobGenerator(tiling_args=tiling_args, data_args=data_args))) + + # 3. (optional) smooth seams + if args["SeamSmoothing"]["apply"]: + runners.append( + SerialRunner( + SmoothingJobGenerator(accelerator=accelerator, tiling_args=tiling_args, data_args=data_args), + ), + ) + + # 4. (optional) normalize + if normalization_stage == NormalizationStage.IMAGE: + runners.append(SerialRunner(NormalizationJobGenerator(self.root_dir))) + # 5. (optional) threshold to get labels from scores + if threshold_stage == ThresholdStage.IMAGE: + runners.append(SerialRunner(ThresholdingJobGenerator(self.root_dir, normalization_stage))) + + # 6. visualize predictions + runners.append( + SerialRunner(VisualizationJobGenerator(self.root_dir, task=task, normalization_stage=normalization_stage)), + ) + # calculate metrics + runners.append( + SerialRunner( + MetricsCalculationJobGenerator( + accelerator=accelerator, + root_dir=self.root_dir, + task=task, + metrics=metrics, + normalization_stage=normalization_stage, + ), + ), + ) + + return runners diff --git a/src/anomalib/pipelines/tiled_ensemble/train_pipeline.py b/src/anomalib/pipelines/tiled_ensemble/train_pipeline.py new file mode 100644 index 0000000000..38e4e34e4b --- /dev/null +++ b/src/anomalib/pipelines/tiled_ensemble/train_pipeline.py @@ -0,0 +1,123 @@ +"""Tiled ensemble training pipeline.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import TYPE_CHECKING + +from anomalib.data.utils import ValSplitMode + +if TYPE_CHECKING: + from pathlib import Path + +import logging + +import torch + +from anomalib.pipelines.components.base import Pipeline, Runner +from anomalib.pipelines.components.runners import ParallelRunner, SerialRunner + +from .components import ( + MergeJobGenerator, + PredictJobGenerator, + SmoothingJobGenerator, + StatisticsJobGenerator, + TrainModelJobGenerator, +) +from .components.utils import NormalizationStage, PredictData +from .components.utils.ensemble_engine import TiledEnsembleEngine + +logger = logging.getLogger(__name__) + + +class TrainTiledEnsemble(Pipeline): + """Tiled ensemble training pipeline.""" + + def __init__(self) -> None: + self.root_dir: Path + + def _setup_runners(self, args: dict) -> list[Runner]: + """Setup the runners for the pipeline. + + This pipeline consists of training and validation steps: + Training models > prediction on val data > merging val data > + > (optionally) smoothing seams > calculation of post-processing statistics + + Returns: + list[Runner]: List of runners executing tiled ensemble train + val jobs. + """ + runners: list[Runner] = [] + self.root_dir = TiledEnsembleEngine.setup_ensemble_workspace(args) + + seed = args["seed"] + accelerator = args["accelerator"] + tiling_args = args["tiling"] + data_args = args["data"] + normalization_stage = NormalizationStage(args["normalization_stage"]) + thresholding_method = args["thresholding"]["method"] + model_args = args["TrainModels"]["model"] + + train_job_generator = TrainModelJobGenerator( + seed=seed, + accelerator=accelerator, + root_dir=self.root_dir, + tiling_args=tiling_args, + data_args=data_args, + normalization_stage=normalization_stage, + ) + + predict_job_generator = PredictJobGenerator( + data_source=PredictData.VAL, + seed=seed, + accelerator=accelerator, + root_dir=self.root_dir, + tiling_args=tiling_args, + data_args=data_args, + model_args=model_args, + normalization_stage=normalization_stage, + ) + + # 1. train + if accelerator == "cuda": + runners.append( + ParallelRunner( + train_job_generator, + n_jobs=torch.cuda.device_count(), + ), + ) + else: + runners.append( + SerialRunner( + train_job_generator, + ), + ) + + if data_args["init_args"]["val_split_mode"] == ValSplitMode.NONE: + logger.warning("No validation set provided, skipping statistics calculation.") + return runners + + # 2. predict using validation data + if accelerator == "cuda": + runners.append( + ParallelRunner(predict_job_generator, n_jobs=torch.cuda.device_count()), + ) + else: + runners.append( + SerialRunner(predict_job_generator), + ) + + # 3. merge predictions + runners.append(SerialRunner(MergeJobGenerator(tiling_args=tiling_args, data_args=data_args))) + + # 4. (optional) smooth seams + if args["SeamSmoothing"]["apply"]: + runners.append( + SerialRunner( + SmoothingJobGenerator(accelerator=accelerator, tiling_args=tiling_args, data_args=data_args), + ), + ) + + # 5. calculate statistics used for inference + runners.append(SerialRunner(StatisticsJobGenerator(self.root_dir, thresholding_method))) + + return runners diff --git a/tests/integration/pipelines/test_tiled_ensemble.py b/tests/integration/pipelines/test_tiled_ensemble.py new file mode 100644 index 0000000000..2909311276 --- /dev/null +++ b/tests/integration/pipelines/test_tiled_ensemble.py @@ -0,0 +1,62 @@ +"""Test tiled ensemble training and prediction.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +import pytest +import yaml + +from anomalib.pipelines.tiled_ensemble import EvalTiledEnsemble, TrainTiledEnsemble + + +@pytest.fixture(scope="session") +def get_mock_environment(dataset_path: Path, project_path: Path) -> Path: + """Return mock directory for testing with datapath setup to dummy data.""" + ens_temp_dir = project_path / "ens_tmp" + ens_temp_dir.mkdir(exist_ok=True) + + with Path("tests/integration/pipelines/tiled_ensemble.yaml").open(encoding="utf-8") as file: + config = yaml.safe_load(file) + + # use separate project temp dir to avoid messing with other tests + config["default_root_dir"] = str(ens_temp_dir) + config["data"]["init_args"]["root"] = str(dataset_path / "mvtec") + + with (Path(ens_temp_dir) / "tiled_ensemble.yaml").open("w", encoding="utf-8") as file: + yaml.safe_dump(config, file) + + return Path(ens_temp_dir) + + +def test_train(get_mock_environment: Path, capsys: pytest.CaptureFixture) -> None: + """Test training of the tiled ensemble.""" + train_pipeline = TrainTiledEnsemble() + train_parser = train_pipeline.get_parser() + args = train_parser.parse_args(["--config", str(get_mock_environment / "tiled_ensemble.yaml")]) + train_pipeline.run(args) + # check that no errors were printed -> all stages were successful + out = capsys.readouterr().out + assert not any(line.startswith("There were some errors") for line in out.split("\n")) + + +def test_predict(get_mock_environment: Path, capsys: pytest.CaptureFixture) -> None: + """Test prediction with the tiled ensemble.""" + predict_pipeline = EvalTiledEnsemble(root_dir=get_mock_environment / "Padim" / "MVTec" / "dummy" / "v0") + predict_parser = predict_pipeline.get_parser() + args = predict_parser.parse_args(["--config", str(get_mock_environment / "tiled_ensemble.yaml")]) + predict_pipeline.run(args) + # check that no errors were printed -> all stages were successful + out = capsys.readouterr().out + assert not any(line.startswith("There were some errors") for line in out.split("\n")) + + +def test_visualisation(get_mock_environment: Path) -> None: + """Test that images were produced.""" + assert (get_mock_environment / "Padim/MVTec/dummy/v0/images/bad/000.png").exists() + + +def test_metric_results(get_mock_environment: Path) -> None: + """Test that metrics were saved.""" + assert (get_mock_environment / "Padim/MVTec/dummy/v0/metric_results.csv").exists() diff --git a/tests/integration/pipelines/tiled_ensemble.yaml b/tests/integration/pipelines/tiled_ensemble.yaml new file mode 100644 index 0000000000..8d35be8297 --- /dev/null +++ b/tests/integration/pipelines/tiled_ensemble.yaml @@ -0,0 +1,43 @@ +seed: 42 +accelerator: "cpu" +default_root_dir: "results" + +tiling: + tile_size: [50, 50] + stride: 50 + +normalization_stage: image # on what level we normalize, options: [tile, image, none] +thresholding: + method: F1AdaptiveThreshold # refer to documentation for thresholding methods + stage: image # stage at which we apply threshold, options: [tile, image] + +data: + class_path: anomalib.data.MVTec + init_args: + root: toBeSetup + category: dummy + train_batch_size: 32 + eval_batch_size: 32 + num_workers: 0 + task: segmentation + transform: null + train_transform: null + eval_transform: null + test_split_mode: from_dir + test_split_ratio: 0.2 + val_split_mode: same_as_test + val_split_ratio: 0.5 + image_size: [50, 100] + +SeamSmoothing: + apply: True # if this is applied, area around tile seams are is smoothed + sigma: 2 # sigma of gaussian filter used to smooth this area + width: 0.1 # width factor, multiplied by tile dimension gives the region width around seam which will be smoothed + +TrainModels: + model: + class_path: Padim + + metrics: + pixel: AUROC + image: AUROC diff --git a/tests/unit/pipelines/__init__.py b/tests/unit/pipelines/__init__.py new file mode 100644 index 0000000000..46de40af76 --- /dev/null +++ b/tests/unit/pipelines/__init__.py @@ -0,0 +1,4 @@ +"""Pipeline unit tests.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/pipelines/tiled_ensemble/__init__.py b/tests/unit/pipelines/tiled_ensemble/__init__.py new file mode 100644 index 0000000000..a78a1ad659 --- /dev/null +++ b/tests/unit/pipelines/tiled_ensemble/__init__.py @@ -0,0 +1,4 @@ +"""Tiled ensemble unit tests.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/pipelines/tiled_ensemble/conftest.py b/tests/unit/pipelines/tiled_ensemble/conftest.py new file mode 100644 index 0000000000..b4fad61ebb --- /dev/null +++ b/tests/unit/pipelines/tiled_ensemble/conftest.py @@ -0,0 +1,151 @@ +"""Fixtures that are used in tiled ensemble testing.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +from pathlib import Path +from tempfile import TemporaryDirectory + +import pytest +import torch +import yaml + +from anomalib.data import AnomalibDataModule +from anomalib.models import AnomalyModule +from anomalib.pipelines.tiled_ensemble.components.utils.ensemble_tiling import EnsembleTiler +from anomalib.pipelines.tiled_ensemble.components.utils.helper_functions import ( + get_ensemble_datamodule, + get_ensemble_model, + get_ensemble_tiler, +) +from anomalib.pipelines.tiled_ensemble.components.utils.prediction_data import EnsemblePredictions +from anomalib.pipelines.tiled_ensemble.components.utils.prediction_merging import PredictionMergingMechanism + + +@pytest.fixture(scope="module") +def get_ensemble_config(dataset_path: Path) -> dict: + """Return ensemble dummy config dict with corrected dataset path to dummy temp dir.""" + with Path("tests/unit/pipelines/tiled_ensemble/dummy_config.yaml").open(encoding="utf-8") as file: + config = yaml.safe_load(file) + # dummy dataset + config["data"]["init_args"]["root"] = dataset_path / "mvtec" + + return config + + +@pytest.fixture(scope="module") +def get_tiler(get_ensemble_config: dict) -> EnsembleTiler: + """Return EnsembleTiler object based on test dummy config.""" + config = get_ensemble_config + + return get_ensemble_tiler(config["tiling"], config["data"]) + + +@pytest.fixture(scope="module") +def get_model(get_ensemble_config: dict, get_tiler: EnsembleTiler) -> AnomalyModule: + """Return model prepared for tiled ensemble training.""" + config = get_ensemble_config + tiler = get_tiler + + return get_ensemble_model(config["TrainModels"]["model"], tiler) + + +@pytest.fixture(scope="module") +def get_datamodule(get_ensemble_config: dict, get_tiler: EnsembleTiler) -> AnomalibDataModule: + """Return ensemble datamodule.""" + config = get_ensemble_config + tiler = get_tiler + datamodule = get_ensemble_datamodule(config, tiler, (0, 0)) + datamodule.setup() + + return datamodule + + +@pytest.fixture(scope="module") +def get_tile_predictions(get_datamodule: AnomalibDataModule) -> EnsemblePredictions: + """Return tile predictions inside EnsemblePredictions object.""" + datamodule = get_datamodule + + data = EnsemblePredictions() + + for tile_index in [(0, 0), (0, 1), (1, 0), (1, 1)]: + datamodule.collate_fn.tile_index = tile_index + + tile_prediction = [] + batch = next(iter(datamodule.test_dataloader())) + + # make mock labels and scores + batch["pred_scores"] = torch.rand(batch["label"].shape) + batch["pred_labels"] = batch["pred_scores"] > 0.5 + + # set mock maps to just one channel of image + batch["anomaly_maps"] = batch["image"].clone()[:, 0, :, :].unsqueeze(1) + # set mock pred mask to mask but add channel + batch["pred_masks"] = batch["mask"].clone().unsqueeze(1) + + tile_prediction.append(batch) + + # store to prediction storage object + data.add_tile_prediction(tile_index, tile_prediction) + + return data + + +@pytest.fixture(scope="module") +def get_batch_predictions() -> list[dict]: + """Return mock batched predictions.""" + mock_data = { + "image": torch.rand((5, 3, 100, 100)), + "mask": (torch.rand((5, 100, 100)) > 0.5).type(torch.float32), + "anomaly_maps": torch.rand((5, 1, 100, 100)), + "label": torch.Tensor([0, 1, 1, 0, 1]), + "pred_scores": torch.rand(5), + "pred_labels": torch.ones(5), + "pred_masks": torch.zeros((5, 100, 100)), + } + + return [mock_data, mock_data] + + +@pytest.fixture(scope="module") +def get_merging_mechanism( + get_tile_predictions: EnsemblePredictions, + get_tiler: EnsembleTiler, +) -> PredictionMergingMechanism: + """Return ensemble prediction merging mechanism object.""" + tiler = get_tiler + predictions = get_tile_predictions + return PredictionMergingMechanism(predictions, tiler) + + +@pytest.fixture(scope="module") +def get_mock_stats_dir() -> Path: + """Get temp dir containing statistics.""" + with TemporaryDirectory() as temp_dir: + stats = { + "minmax": { + "anomaly_maps": { + "min": 1.9403648376464844, + "max": 209.91940307617188, + }, + "box_scores": { + "min": 0.5, + "max": 0.45, + }, + "pred_scores": { + "min": 9.390382766723633, + "max": 209.91940307617188, + }, + }, + "image_threshold": 0.1111, + "pixel_threshold": 0.1111, + } + stats_path = Path(temp_dir) / "weights" / "lightning" / "stats.json" + stats_path.parent.mkdir(parents=True) + + # save mock statistics + with stats_path.open("w", encoding="utf-8") as stats_file: + json.dump(stats, stats_file, ensure_ascii=False, indent=4) + + yield Path(temp_dir) diff --git a/tests/unit/pipelines/tiled_ensemble/dummy_config.yaml b/tests/unit/pipelines/tiled_ensemble/dummy_config.yaml new file mode 100644 index 0000000000..fcd4b7c716 --- /dev/null +++ b/tests/unit/pipelines/tiled_ensemble/dummy_config.yaml @@ -0,0 +1,52 @@ +seed: 42 +accelerator: "cpu" +default_root_dir: "results" + +tiling: + tile_size: [50, 50] + stride: 50 + +normalization_stage: image # on what level we normalize, options: [tile, image, none] +thresholding: + method: F1AdaptiveThreshold # refer to documentation for thresholding methods + stage: image # stage at which we apply threshold, options: [tile, image] + +data: + class_path: anomalib.data.MVTec + init_args: + root: toBeSetup + category: dummy + train_batch_size: 32 + eval_batch_size: 32 + num_workers: 0 + task: segmentation + transform: null + train_transform: null + eval_transform: null + test_split_mode: from_dir + test_split_ratio: 0.2 + val_split_mode: same_as_test + val_split_ratio: 0.5 + image_size: [100, 100] + +SeamSmoothing: + apply: True # if this is applied, area around tile seams are is smoothed + sigma: 2 # sigma of gaussian filter used to smooth this area + width: 0.1 # width factor, multiplied by tile dimension gives the region width around seam which will be smoothed + +TrainModels: + model: + class_path: Fastflow + + metrics: + pixel: AUROC + image: AUROC + + trainer: + max_epochs: 1 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 1 + monitor: pixel_AUROC + mode: max diff --git a/tests/unit/pipelines/tiled_ensemble/test_components.py b/tests/unit/pipelines/tiled_ensemble/test_components.py new file mode 100644 index 0000000000..0e3c0dcdd4 --- /dev/null +++ b/tests/unit/pipelines/tiled_ensemble/test_components.py @@ -0,0 +1,387 @@ +"""Test working of tiled ensemble pipeline components.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import copy +from pathlib import Path +from tempfile import TemporaryDirectory + +import pytest +import torch + +from anomalib.data import get_datamodule +from anomalib.metrics import F1AdaptiveThreshold, ManualThreshold +from anomalib.pipelines.tiled_ensemble.components import ( + MergeJobGenerator, + MetricsCalculationJobGenerator, + NormalizationJobGenerator, + SmoothingJobGenerator, + StatisticsJobGenerator, + ThresholdingJobGenerator, +) +from anomalib.pipelines.tiled_ensemble.components.metrics_calculation import MetricsCalculationJob +from anomalib.pipelines.tiled_ensemble.components.smoothing import SmoothingJob +from anomalib.pipelines.tiled_ensemble.components.utils import NormalizationStage +from anomalib.pipelines.tiled_ensemble.components.utils.prediction_data import EnsemblePredictions +from anomalib.pipelines.tiled_ensemble.components.utils.prediction_merging import PredictionMergingMechanism + + +class TestMerging: + """Test merging mechanism and merging job.""" + + @staticmethod + def test_tile_merging(get_ensemble_config: dict, get_merging_mechanism: PredictionMergingMechanism) -> None: + """Test tiled data merging.""" + config = get_ensemble_config + merger = get_merging_mechanism + + # prepared original data + datamodule = get_datamodule(config) + datamodule.prepare_data() + datamodule.setup() + original_data = next(iter(datamodule.test_dataloader())) + + batch = merger.ensemble_predictions.get_batch_tiles(0) + + merged_image = merger.merge_tiles(batch, "image") + assert merged_image.equal(original_data["image"]) + + merged_mask = merger.merge_tiles(batch, "mask") + assert merged_mask.equal(original_data["mask"]) + + @staticmethod + def test_label_and_score_merging(get_merging_mechanism: PredictionMergingMechanism) -> None: + """Test label and score merging.""" + merger = get_merging_mechanism + scores = torch.rand(4, 10) + labels = scores > 0.5 + + mock_data = {(0, 0): {}, (0, 1): {}, (1, 0): {}, (1, 1): {}} + + for i, data in enumerate(mock_data.values()): + data["pred_scores"] = scores[i] + data["pred_labels"] = labels[i] + + merged = merger.merge_labels_and_scores(mock_data) + + assert merged["pred_scores"].equal(scores.mean(dim=0)) + + assert merged["pred_labels"].equal(labels.any(dim=0)) + + @staticmethod + def test_merge_job( + get_tile_predictions: EnsemblePredictions, + get_ensemble_config: dict, + get_merging_mechanism: PredictionMergingMechanism, + ) -> None: + """Test merging job execution.""" + config = get_ensemble_config + predictions = copy.deepcopy(get_tile_predictions) + merging_mechanism = get_merging_mechanism + + merging_job_generator = MergeJobGenerator(tiling_args=config["tiling"], data_args=config["data"]) + merging_job = next(merging_job_generator.generate_jobs(prev_stage_result=predictions)) + + merged_direct = merging_mechanism.merge_tile_predictions(0) + merged_with_job = merging_job.run()[0] + + # check that merging by job is same as with the mechanism directly + for key, value in merged_direct.items(): + if isinstance(value, torch.Tensor): + assert merged_with_job[key].equal(value) + elif isinstance(value, list) and isinstance(value[0], torch.Tensor): + # boxes + assert all(j.equal(d) for j, d in zip(merged_with_job[key], value, strict=False)) + else: + assert merged_with_job[key] == value + + +class TestStatsCalculation: + """Test post-processing statistics calculations.""" + + @staticmethod + @pytest.mark.parametrize( + ("threshold_str", "threshold_cls"), + [("F1AdaptiveThreshold", F1AdaptiveThreshold), ("ManualThreshold", ManualThreshold)], + ) + def test_threshold_method(threshold_str: str, threshold_cls: type, get_ensemble_config: dict) -> None: + """Test that correct thresholding method is used.""" + config = copy.deepcopy(get_ensemble_config) + config["thresholding"]["method"] = threshold_str + + stats_job_generator = StatisticsJobGenerator(Path("mock"), threshold_str) + stats_job = next(stats_job_generator.generate_jobs(None, None)) + + assert isinstance(stats_job.image_threshold, threshold_cls) + + @staticmethod + def test_stats_run(project_path: Path) -> None: + """Test execution of statistics calc. job.""" + mock_preds = [ + { + "pred_scores": torch.rand(4), + "label": torch.ones(4), + "anomaly_maps": torch.rand(4, 1, 50, 50), + "mask": torch.ones(4, 1, 50, 50), + }, + ] + + stats_job_generator = StatisticsJobGenerator(project_path, "F1AdaptiveThreshold") + stats_job = next(stats_job_generator.generate_jobs(None, mock_preds)) + + results = stats_job.run() + + assert "minmax" in results + assert "image_threshold" in results + assert "pixel_threshold" in results + + # save as it's removed from results + save_path = results["save_path"] + stats_job.save(results) + assert Path(save_path).exists() + + @staticmethod + @pytest.mark.parametrize( + ("key", "values"), + [ + ("anomaly_maps", [torch.rand(5, 1, 50, 50), torch.rand(5, 1, 50, 50)]), + ("pred_scores", [torch.rand(5), torch.rand(5)]), + ], + ) + def test_minmax(key: str, values: list) -> None: + """Test minmax stats calculation.""" + # add given keys to test all possible sources of minmax + data = [ + {"pred_scores": torch.rand(5), "label": torch.ones(5), key: values[0]}, + {"pred_scores": torch.rand(5), "label": torch.ones(5), key: values[1]}, + ] + + stats_job_generator = StatisticsJobGenerator(Path("mock"), "F1AdaptiveThreshold") + stats_job = next(stats_job_generator.generate_jobs(None, data)) + results = stats_job.run() + + if isinstance(values[0], list): + values[0] = torch.cat(values[0]) + values[1] = torch.cat(values[1]) + values = torch.stack(values) + + assert results["minmax"][key]["min"] == torch.min(values) + assert results["minmax"][key]["max"] == torch.max(values) + + @staticmethod + @pytest.mark.parametrize( + ("labels", "preds", "target_threshold"), + [ + (torch.Tensor([0, 0, 0, 1, 1]), torch.Tensor([2.3, 1.6, 2.6, 7.9, 3.3]), 3.3), # standard case + (torch.Tensor([1, 0, 0, 0]), torch.Tensor([4, 3, 2, 1]), 4), # 100% recall for all thresholds + ], + ) + def test_threshold(labels: torch.Tensor, preds: torch.Tensor, target_threshold: float) -> None: + """Test threshold calculation job.""" + data = [ + { + "label": labels, + "mask": labels, + "pred_scores": preds, + "anomaly_maps": preds, + }, + ] + + stats_job_generator = StatisticsJobGenerator(Path("mock"), "F1AdaptiveThreshold") + stats_job = next(stats_job_generator.generate_jobs(None, data)) + results = stats_job.run() + + assert round(results["image_threshold"], 5) == target_threshold + assert round(results["pixel_threshold"], 5) == target_threshold + + +class TestMetrics: + """Test ensemble metrics.""" + + @pytest.fixture(scope="class") + @staticmethod + def get_ensemble_metrics_job( + get_ensemble_config: dict, + get_batch_predictions: list[dict], + ) -> tuple[MetricsCalculationJob, str]: + """Return Metrics calculation job and path to directory where metrics csv will be saved.""" + config = get_ensemble_config + with TemporaryDirectory() as tmp_dir: + metrics = MetricsCalculationJobGenerator( + config["accelerator"], + root_dir=Path(tmp_dir), + task=config["data"]["init_args"]["task"], + metrics=config["TrainModels"]["metrics"], + normalization_stage=NormalizationStage(config["normalization_stage"]), + ) + + mock_predictions = get_batch_predictions + + return next(metrics.generate_jobs(prev_stage_result=copy.deepcopy(mock_predictions))), tmp_dir + + @staticmethod + def test_metrics_result(get_ensemble_metrics_job: tuple[MetricsCalculationJob, str]) -> None: + """Test metrics result.""" + metrics_job, _ = get_ensemble_metrics_job + + result = metrics_job.run() + + assert "pixel_AUROC" in result + assert "image_AUROC" in result + + @staticmethod + def test_metrics_saving(get_ensemble_metrics_job: tuple[MetricsCalculationJob, str]) -> None: + """Test metrics saving to csv.""" + metrics_job, tmp_dir = get_ensemble_metrics_job + + result = metrics_job.run() + metrics_job.save(result) + assert (Path(tmp_dir) / "metric_results.csv").exists() + + +class TestJoinSmoothing: + """Test JoinSmoothing job responsible for smoothing area at tile seams.""" + + @pytest.fixture(scope="class") + @staticmethod + def get_join_smoothing_job(get_ensemble_config: dict, get_batch_predictions: list[dict]) -> SmoothingJob: + """Make and return SmoothingJob instance.""" + config = get_ensemble_config + job_gen = SmoothingJobGenerator( + accelerator=config["accelerator"], + tiling_args=config["tiling"], + data_args=config["data"], + ) + # copy since smoothing changes data + mock_predictions = copy.deepcopy(get_batch_predictions) + return next(job_gen.generate_jobs(config["SeamSmoothing"], mock_predictions)) + + @staticmethod + def test_mask(get_join_smoothing_job: SmoothingJob) -> None: + """Test seam mask in case where tiles don't overlap.""" + smooth = get_join_smoothing_job + + join_index = smooth.tiler.tile_size_h, smooth.tiler.tile_size_w + + # seam should be covered by True + assert smooth.seam_mask[join_index] + + # non-seam region should be false + assert not smooth.seam_mask[0, 0] + assert not smooth.seam_mask[-1, -1] + + @staticmethod + def test_mask_overlapping(get_ensemble_config: dict, get_batch_predictions: list[dict]) -> None: + """Test seam mask in case where tiles overlap.""" + config = copy.deepcopy(get_ensemble_config) + # tile size = 50, stride = 25 -> overlapping + config["tiling"]["stride"] = 25 + job_gen = SmoothingJobGenerator( + accelerator=config["accelerator"], + tiling_args=config["tiling"], + data_args=config["data"], + ) + mock_predictions = copy.deepcopy(get_batch_predictions) + smooth = next(job_gen.generate_jobs(config["SeamSmoothing"], mock_predictions)) + + join_index = smooth.tiler.stride_h, smooth.tiler.stride_w + + # overlap seam should be covered by True + assert smooth.seam_mask[join_index] + assert smooth.seam_mask[-join_index[0], -join_index[1]] + + # non-seam region should be false + assert not smooth.seam_mask[0, 0] + assert not smooth.seam_mask[-1, -1] + + @staticmethod + def test_smoothing(get_join_smoothing_job: SmoothingJob, get_batch_predictions: list[dict]) -> None: + """Test smoothing job run.""" + original_data = get_batch_predictions + # fixture makes a copy of data + smooth = get_join_smoothing_job + + # take first batch + smoothed = smooth.run()[0] + join_index = smooth.tiler.tile_size_h, smooth.tiler.tile_size_w + + # join sections should be processed + assert not smoothed["anomaly_maps"][:, :, join_index].equal(original_data[0]["anomaly_maps"][:, :, join_index]) + + # non-join section shouldn't be changed + assert smoothed["anomaly_maps"][:, :, 0, 0].equal(original_data[0]["anomaly_maps"][:, :, 0, 0]) + + +def test_normalization(get_batch_predictions: list[dict], project_path: Path) -> None: + """Test normalization step.""" + original_predictions = copy.deepcopy(get_batch_predictions) + + for batch in original_predictions: + batch["anomaly_maps"] *= 100 + batch["pred_scores"] *= 100 + + # # get and save stats using stats job on predictions + stats_job_generator = StatisticsJobGenerator(project_path, "F1AdaptiveThreshold") + stats_job = next(stats_job_generator.generate_jobs(prev_stage_result=original_predictions)) + stats = stats_job.run() + stats_job.save(stats) + + # normalize predictions based on obtained stats + norm_job_generator = NormalizationJobGenerator(root_dir=project_path) + # copy as this changes preds + norm_job = next(norm_job_generator.generate_jobs(prev_stage_result=original_predictions)) + normalized_predictions = norm_job.run() + + for batch in normalized_predictions: + assert (batch["anomaly_maps"] >= 0).all() + assert (batch["anomaly_maps"] <= 1).all() + + assert (batch["pred_scores"] >= 0).all() + assert (batch["pred_scores"] <= 1).all() + + +class TestThresholding: + """Test tiled ensemble thresholding stage.""" + + @pytest.fixture(scope="class") + @staticmethod + def get_threshold_job(get_mock_stats_dir: Path) -> callable: + """Return a function that takes prediction data and runs threshold job.""" + thresh_job_generator = ThresholdingJobGenerator( + root_dir=get_mock_stats_dir, + normalization_stage=NormalizationStage.IMAGE, + ) + + def thresh_helper(preds: dict) -> list | None: + thresh_job = next(thresh_job_generator.generate_jobs(prev_stage_result=preds)) + return thresh_job.run() + + return thresh_helper + + @staticmethod + def test_score_threshold(get_threshold_job: callable) -> None: + """Test anomaly score thresholding.""" + thresholding = get_threshold_job + + data = [{"pred_scores": torch.tensor([0.7, 0.8, 0.1, 0.33, 0.5])}] + + thresholded = thresholding(data)[0] + + assert thresholded["pred_labels"].equal(torch.tensor([True, True, False, False, True])) + + @staticmethod + def test_anomap_threshold(get_threshold_job: callable) -> None: + """Test anomaly map thresholding.""" + thresholding = get_threshold_job + + data = [ + { + "pred_scores": torch.tensor([0.7, 0.8, 0.1, 0.33, 0.5]), + "anomaly_maps": torch.tensor([[0.7, 0.8, 0.1], [0.33, 0.5, 0.1]]), + }, + ] + + thresholded = thresholding(data)[0] + + assert thresholded["pred_masks"].equal(torch.tensor([[True, True, False], [False, True, False]])) diff --git a/tests/unit/pipelines/tiled_ensemble/test_helper_functions.py b/tests/unit/pipelines/tiled_ensemble/test_helper_functions.py new file mode 100644 index 0000000000..06e5864cef --- /dev/null +++ b/tests/unit/pipelines/tiled_ensemble/test_helper_functions.py @@ -0,0 +1,113 @@ +"""Test ensemble helper functions.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +import pytest +from jsonargparse import Namespace +from lightning.pytorch.callbacks import EarlyStopping + +from anomalib.callbacks.normalization import _MinMaxNormalizationCallback +from anomalib.models import AnomalyModule +from anomalib.pipelines.tiled_ensemble.components.utils import NormalizationStage +from anomalib.pipelines.tiled_ensemble.components.utils.ensemble_tiling import EnsembleTiler, TileCollater +from anomalib.pipelines.tiled_ensemble.components.utils.helper_functions import ( + get_ensemble_datamodule, + get_ensemble_engine, + get_ensemble_model, + get_ensemble_tiler, + get_threshold_values, + parse_trainer_kwargs, +) + + +class TestHelperFunctions: + """Test ensemble helper functions.""" + + @staticmethod + def test_ensemble_datamodule(get_ensemble_config: dict, get_tiler: EnsembleTiler) -> None: + """Test that datamodule is created and has correct collate function.""" + config = get_ensemble_config + tiler = get_tiler + datamodule = get_ensemble_datamodule(config, tiler, (0, 0)) + + assert isinstance(datamodule.collate_fn, TileCollater) + + @staticmethod + def test_ensemble_model(get_ensemble_config: dict, get_tiler: EnsembleTiler) -> None: + """Test that model is successfully created with correct input shape.""" + config = get_ensemble_config + tiler = get_tiler + model = get_ensemble_model(config["TrainModels"]["model"], tiler) + + assert model.input_size == tuple(config["tiling"]["tile_size"]) + + @staticmethod + def test_tiler(get_ensemble_config: dict) -> None: + """Test that tiler is successfully instantiated.""" + config = get_ensemble_config + + tiler = get_ensemble_tiler(config["tiling"], config["data"]) + assert isinstance(tiler, EnsembleTiler) + + @staticmethod + def test_trainer_kwargs(get_ensemble_config: dict) -> None: + """Test that objects are correctly constructed from kwargs.""" + config = get_ensemble_config + + objects = parse_trainer_kwargs(config["TrainModels"]["trainer"]) + assert isinstance(objects, Namespace) + # verify that early stopping is parsed and added to callbacks + assert isinstance(objects.callbacks[0], EarlyStopping) + + @staticmethod + @pytest.mark.parametrize( + "normalization_stage", + [NormalizationStage.NONE, NormalizationStage.IMAGE, NormalizationStage.TILE], + ) + def test_threshold_values(normalization_stage: NormalizationStage, get_mock_stats_dir: Path) -> None: + """Test that threshold values are correctly set based on normalization stage.""" + stats_dir = get_mock_stats_dir + + i_thresh, p_thresh = get_threshold_values(normalization_stage, stats_dir) + + if normalization_stage != NormalizationStage.NONE: + # minmax normalization sets thresholds to 0.5 + assert i_thresh == p_thresh == 0.5 + else: + assert i_thresh == p_thresh == 0.1111 + + +class TestEnsembleEngine: + """Test ensemble engine configuration.""" + + @staticmethod + @pytest.mark.parametrize( + "normalization_stage", + [NormalizationStage.NONE, NormalizationStage.IMAGE, NormalizationStage.TILE], + ) + def test_normalisation(normalization_stage: NormalizationStage, get_model: AnomalyModule) -> None: + """Test that normalization callback is correctly initialized.""" + engine = get_ensemble_engine( + tile_index=(0, 0), + accelerator="cpu", + devices="1", + root_dir=Path("mock"), + normalization_stage=normalization_stage, + ) + + engine._setup_anomalib_callbacks(get_model) # noqa: SLF001 + + # verify that only in case of tile level normalization the callback is present + if normalization_stage == NormalizationStage.TILE: + assert any( + isinstance(x, _MinMaxNormalizationCallback) + for x in engine._cache.args["callbacks"] # noqa: SLF001 + ) + else: + assert not any( + isinstance(x, _MinMaxNormalizationCallback) + for x in engine._cache.args["callbacks"] # noqa: SLF001 + ) diff --git a/tests/unit/pipelines/tiled_ensemble/test_prediction_data.py b/tests/unit/pipelines/tiled_ensemble/test_prediction_data.py new file mode 100644 index 0000000000..7185f1e2ca --- /dev/null +++ b/tests/unit/pipelines/tiled_ensemble/test_prediction_data.py @@ -0,0 +1,69 @@ +"""Test tiled prediction storage class.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import copy +from collections.abc import Callable + +import torch +from torch import Tensor + +from anomalib.data import AnomalibDataModule +from anomalib.pipelines.tiled_ensemble.components.utils.prediction_data import EnsemblePredictions + + +class TestPredictionData: + """Test EnsemblePredictions class, used for tiled prediction storage.""" + + @staticmethod + def store_all(data: EnsemblePredictions, datamodule: AnomalibDataModule) -> dict: + """Store the tiled predictions in the EnsemblePredictions object.""" + tile_dict = {} + for tile_index in [(0, 0), (0, 1), (1, 0), (1, 1)]: + datamodule.collate_fn.tile_index = tile_index + + tile_prediction = [] + for batch in iter(datamodule.train_dataloader()): + # set mock maps to just one channel of image + batch["anomaly_maps"] = batch["image"].clone()[:, 0, :, :].unsqueeze(1) + # set mock pred mask to mask but add channel + batch["pred_masks"] = batch["mask"].clone().unsqueeze(1) + tile_prediction.append(batch) + # save original + tile_dict[tile_index] = copy.deepcopy(tile_prediction) + # store to prediction storage object + data.add_tile_prediction(tile_index, tile_prediction) + + return tile_dict + + @staticmethod + def verify_equal(name: str, tile_dict: dict, storage: EnsemblePredictions, eq_funct: Callable) -> bool: + """Verify that all data at same tile index and same batch index matches.""" + batch_num = len(tile_dict[0, 0]) + + for batch_i in range(batch_num): + # batch is dict where key: tile index and val is batched data of that tile + curr_batch = storage.get_batch_tiles(batch_i) + + # go over all indices of current batch of stored data + for tile_index, stored_data_batch in curr_batch.items(): + stored_data = stored_data_batch[name] + # get original data dict at current tile index and batch index + original_data = tile_dict[tile_index][batch_i][name] + if isinstance(original_data, Tensor): + if not eq_funct(original_data, stored_data): + return False + elif original_data != stored_data: + return False + + return True + + def test_prediction_object(self, get_datamodule: AnomalibDataModule) -> None: + """Test prediction storage class.""" + datamodule = get_datamodule + storage = EnsemblePredictions() + original = self.store_all(storage, datamodule) + + for name in original[0, 0][0]: + assert self.verify_equal(name, original, storage, torch.equal), f"{name} doesn't match" diff --git a/tests/unit/pipelines/tiled_ensemble/test_tiler.py b/tests/unit/pipelines/tiled_ensemble/test_tiler.py new file mode 100644 index 0000000000..96b6c0e7bc --- /dev/null +++ b/tests/unit/pipelines/tiled_ensemble/test_tiler.py @@ -0,0 +1,119 @@ +"""Tiling related tests for tiled ensemble.""" + +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import copy + +import pytest +import torch + +from anomalib.data import AnomalibDataModule +from anomalib.pipelines.tiled_ensemble.components.utils.helper_functions import get_ensemble_tiler + +tiler_config = { + "tiling": { + "tile_size": 256, + "stride": 256, + }, + "data": {"init_args": {"image_size": 512}}, +} + +tiler_config_overlap = { + "tiling": { + "tile_size": 256, + "stride": 128, + }, + "data": {"init_args": {"image_size": 512}}, +} + + +class TestTiler: + """EnsembleTiler tests.""" + + @staticmethod + @pytest.mark.parametrize( + ("input_shape", "config", "expected_shape"), + [ + (torch.Size([5, 3, 512, 512]), tiler_config, torch.Size([2, 2, 5, 3, 256, 256])), + (torch.Size([5, 3, 512, 512]), tiler_config_overlap, torch.Size([3, 3, 5, 3, 256, 256])), + (torch.Size([5, 3, 500, 500]), tiler_config, torch.Size([2, 2, 5, 3, 256, 256])), + (torch.Size([5, 3, 500, 500]), tiler_config_overlap, torch.Size([3, 3, 5, 3, 256, 256])), + ], + ) + def test_basic_tile_for_ensemble(input_shape: torch.Size, config: dict, expected_shape: torch.Size) -> None: + """Test basic tiling of data.""" + config = copy.deepcopy(config) + config["data"]["init_args"]["image_size"] = input_shape[-1] + tiler = get_ensemble_tiler(config["tiling"], config["data"]) + + images = torch.rand(size=input_shape) + tiled = tiler.tile(images) + + assert tiled.shape == expected_shape + + @staticmethod + @pytest.mark.parametrize( + ("input_shape", "config"), + [ + (torch.Size([5, 3, 512, 512]), tiler_config), + (torch.Size([5, 3, 512, 512]), tiler_config_overlap), + (torch.Size([5, 3, 500, 500]), tiler_config), + (torch.Size([5, 3, 500, 500]), tiler_config_overlap), + ], + ) + def test_basic_tile_reconstruction(input_shape: torch.Size, config: dict) -> None: + """Test basic reconstruction of tiled data.""" + config = copy.deepcopy(config) + config["data"]["init_args"]["image_size"] = input_shape[-1] + + tiler = get_ensemble_tiler(config["tiling"], config["data"]) + + images = torch.rand(size=input_shape) + tiled = tiler.tile(images.clone()) + untiled = tiler.untile(tiled) + + assert images.shape == untiled.shape + assert images.equal(untiled) + + @staticmethod + @pytest.mark.parametrize( + ("input_shape", "config"), + [ + (torch.Size([5, 3, 512, 512]), tiler_config), + (torch.Size([5, 3, 500, 500]), tiler_config), + ], + ) + def test_untile_different_instance(input_shape: torch.Size, config: dict) -> None: + """Test untiling with different Tiler instance.""" + config = copy.deepcopy(config) + config["data"]["init_args"]["image_size"] = input_shape[-1] + tiler_1 = get_ensemble_tiler(config["tiling"], config["data"]) + + tiler_2 = get_ensemble_tiler(config["tiling"], config["data"]) + + images = torch.rand(size=input_shape) + tiled = tiler_1.tile(images.clone()) + + untiled = tiler_2.untile(tiled) + + # untiling should work even with different instance of tiler + assert images.shape == untiled.shape + assert images.equal(untiled) + + +class TestTileCollater: + """Test tile collater.""" + + @staticmethod + def test_collate_tile_shape(get_ensemble_config: dict, get_datamodule: AnomalibDataModule) -> None: + """Test that collate function successfully tiles the image.""" + config = get_ensemble_config + # datamodule with tile collater + datamodule = get_datamodule + + tile_w, tile_h = config["tiling"]["tile_size"] + + batch = next(iter(datamodule.train_dataloader())) + assert batch["image"].shape[1:] == (3, tile_w, tile_h) + assert batch["mask"].shape[1:] == (tile_w, tile_h) diff --git a/tools/tiled_ensemble/ens_config.yaml b/tools/tiled_ensemble/ens_config.yaml new file mode 100644 index 0000000000..2490b22e9a --- /dev/null +++ b/tools/tiled_ensemble/ens_config.yaml @@ -0,0 +1,43 @@ +seed: 42 +accelerator: "gpu" +default_root_dir: "results" + +tiling: + tile_size: [128, 128] + stride: 128 + +normalization_stage: image # on what level we normalize, options: [tile, image, none] +thresholding: + method: F1AdaptiveThreshold # refer to documentation for thresholding methods + stage: image # stage at which we apply threshold, options: [tile, image] + +data: + class_path: anomalib.data.MVTec + init_args: + root: ./datasets/MVTec + category: bottle + train_batch_size: 32 + eval_batch_size: 32 + num_workers: 8 + task: segmentation + transform: null + train_transform: null + eval_transform: null + test_split_mode: from_dir + test_split_ratio: 0.2 + val_split_mode: same_as_test + val_split_ratio: 0.5 + image_size: [256, 256] + +SeamSmoothing: + apply: True # if this is applied, area around tile seams are is smoothed + sigma: 2 # sigma of gaussian filter used to smooth this area + width: 0.1 # width factor, multiplied by tile dimension gives the region width around seam which will be smoothed + +TrainModels: + model: + class_path: Padim + + metrics: + pixel: AUROC + image: AUROC diff --git a/tools/tiled_ensemble/eval.py b/tools/tiled_ensemble/eval.py new file mode 100644 index 0000000000..58be27c25c --- /dev/null +++ b/tools/tiled_ensemble/eval.py @@ -0,0 +1,28 @@ +"""Run tiled ensemble prediction.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +from jsonargparse import ArgumentParser + +from anomalib.pipelines.tiled_ensemble import EvalTiledEnsemble + + +def get_parser() -> ArgumentParser: + """Create a new parser if none is provided.""" + parser = ArgumentParser() + parser.add_argument("--config", type=str | Path, help="Configuration file path.", required=True) + parser.add_argument("--root", type=str | Path, help="Weights file path.", required=True) + + return parser + + +if __name__ == "__main__": + args = get_parser().parse_args() + + print("Running tiled ensemble test pipeline.") + # pass the path to root dir with checkpoints + test_pipeline = EvalTiledEnsemble(args.root) + test_pipeline.run(args) diff --git a/tools/tiled_ensemble/train.py b/tools/tiled_ensemble/train.py new file mode 100644 index 0000000000..8aed47ea0d --- /dev/null +++ b/tools/tiled_ensemble/train.py @@ -0,0 +1,17 @@ +"""Run tiled ensemble training.""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from anomalib.pipelines.tiled_ensemble import EvalTiledEnsemble, TrainTiledEnsemble + +if __name__ == "__main__": + print("Running tiled ensemble train pipeline") + train_pipeline = TrainTiledEnsemble() + # run training + train_pipeline.run() + + print("Running tiled ensemble test pipeline.") + # pass the root dir from train run to load checkpoints + test_pipeline = EvalTiledEnsemble(train_pipeline.root_dir) + test_pipeline.run() From 0301d591061674008dbab11110ba9f310c9bed07 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 24 Oct 2024 12:19:37 +0100 Subject: [PATCH 22/32] =?UTF-8?q?=F0=9F=93=9A=20Add=20training=20from=20a?= =?UTF-8?q?=20checkpoint=20example=20(#2389)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add training from a checkpoint example Signed-off-by: Samet Akcay * Replace patchcore example with efficient-ad Signed-off-by: Samet Akcay --------- Signed-off-by: Samet Akcay --- docs/source/markdown/get_started/anomalib.md | 30 +++++++++++--------- docs/source/snippets/train/api/default.txt | 11 ++++--- docs/source/snippets/train/cli/default.txt | 7 +++-- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/docs/source/markdown/get_started/anomalib.md b/docs/source/markdown/get_started/anomalib.md index 37af563b3e..4580c7fae5 100644 --- a/docs/source/markdown/get_started/anomalib.md +++ b/docs/source/markdown/get_started/anomalib.md @@ -17,7 +17,7 @@ The installer can be installed using the following commands: :::{tab-item} API :sync: label-1 -```{literalinclude} ../../snippets/install/pypi.txt +```{literalinclude} /snippets/install/pypi.txt :language: bash ``` @@ -26,7 +26,7 @@ The installer can be installed using the following commands: :::{tab-item} Source :sync: label-2 -```{literalinclude} ../../snippets/install/source.txt +```{literalinclude} /snippets/install/source.txt :language: bash ``` @@ -42,7 +42,7 @@ The next section demonstrates how to install the full package using the CLI inst :::::{dropdown} Installing the Full Package After installing anomalib, you can install the full package using the following commands: -```{literalinclude} ../../snippets/install/anomalib_help.txt +```{literalinclude} /snippets/install/anomalib_help.txt :language: bash ``` @@ -50,14 +50,14 @@ As can be seen above, the only available sub-command is `install` at the moment. The `install` sub-command has options to install either the full package or the specific components of the package. -```{literalinclude} ../../snippets/install/anomalib_install_help.txt +```{literalinclude} /snippets/install/anomalib_install_help.txt :language: bash ``` By default the `install` sub-command installs the full package. If you want to install only the specific components of the package, you can use the `--option` flag. -```{literalinclude} ../../snippets/install/anomalib_install.txt +```{literalinclude} /snippets/install/anomalib_install.txt :language: bash ``` @@ -66,13 +66,15 @@ After following these steps, your environment will be ready to use anomalib! ## {octicon}`mortar-board` Training -Anomalib supports both API and CLI-based training. The API is more flexible and allows for more customization, while the CLI training utilizes command line interfaces, and might be easier for those who would like to use anomalib off-the-shelf. +Anomalib supports both API and CLI-based training. The API is more flexible +and allows for more customization, while the CLI training utilizes command line +interfaces, and might be easier for those who would like to use anomalib off-the-shelf. ::::{tab-set} :::{tab-item} API -```{literalinclude} ../../snippets/train/api/default.txt +```{literalinclude} /snippets/train/api/default.txt :language: python ``` @@ -80,7 +82,7 @@ Anomalib supports both API and CLI-based training. The API is more flexible and :::{tab-item} CLI -```{literalinclude} ../../snippets/train/cli/default.txt +```{literalinclude} /snippets/train/cli/default.txt :language: bash ``` @@ -100,7 +102,7 @@ Anomalib includes multiple inferencing scripts, including Torch, Lightning, Grad :::{tab-item} API :sync: label-1 -```{literalinclude} ../../snippets/inference/api/lightning.txt +```{literalinclude} /snippets/inference/api/lightning.txt :language: python ``` @@ -109,7 +111,7 @@ Anomalib includes multiple inferencing scripts, including Torch, Lightning, Grad :::{tab-item} CLI :sync: label-2 -```{literalinclude} ../../snippets/inference/cli/lightning.txt +```{literalinclude} /snippets/inference/cli/lightning.txt :language: bash ``` @@ -201,7 +203,7 @@ Anomalib supports hyper-parameter optimization using [wandb](https://wandb.ai/) :::{tab-item} CLI -```{literalinclude} ../../snippets/pipelines/hpo/cli.txt +```{literalinclude} /snippets/pipelines/hpo/cli.txt :language: bash ``` @@ -209,7 +211,7 @@ Anomalib supports hyper-parameter optimization using [wandb](https://wandb.ai/) :::{tab-item} API -```{literalinclude} ../../snippets/pipelines/hpo/api.txt +```{literalinclude} /snippets/pipelines/hpo/api.txt :language: bash ``` @@ -233,7 +235,7 @@ To run a training experiment with experiment tracking, you will need the followi By using the configuration file above, you can run the experiment with the following command: -```{literalinclude} ../../snippets/logging/cli.txt +```{literalinclude} /snippets/logging/cli.txt :language: bash ``` @@ -241,7 +243,7 @@ By using the configuration file above, you can run the experiment with the follo :::{tab-item} API -```{literalinclude} ../../snippets/logging/api.txt +```{literalinclude} /snippets/logging/api.txt :language: bash ``` diff --git a/docs/source/snippets/train/api/default.txt b/docs/source/snippets/train/api/default.txt index 30293cf501..1fe6cb895c 100644 --- a/docs/source/snippets/train/api/default.txt +++ b/docs/source/snippets/train/api/default.txt @@ -1,12 +1,15 @@ # Import the required modules from anomalib.data import MVTec -from anomalib.models import Patchcore from anomalib.engine import Engine +from anomalib.models import EfficientAd # Initialize the datamodule, model and engine -datamodule = MVTec() -model = Patchcore() -engine = Engine() +datamodule = MVTec(train_batch_size=1) +model = EfficientAd() +engine = Engine(max_epochs=5) # Train the model engine.fit(datamodule=datamodule, model=model) + +# Continue from a checkpoint +engine.fit(datamodule=datamodule, model=model, ckpt_path="path/to/checkpoint.ckpt") diff --git a/docs/source/snippets/train/cli/default.txt b/docs/source/snippets/train/cli/default.txt index 3f64f687ad..1990dbf97e 100644 --- a/docs/source/snippets/train/cli/default.txt +++ b/docs/source/snippets/train/cli/default.txt @@ -2,10 +2,13 @@ anomalib train -h # Train by using the default values. -anomalib train --model Patchcore --data anomalib.data.MVTec +anomalib train --model EfficientAd --data anomalib.data.MVTec --data.train_batch_size 1 # Train by overriding arguments. -anomalib train --model Patchcore --data anomalib.data.MVTec --data.category transistor +anomalib train --model EfficientAd --data anomalib.data.MVTec --data.train_batch_size 1 --data.category transistor # Train by using a config file. anomalib train --config + +# Continue training from a checkpoint +anomalib train --config --ckpt_path From c00e101f3351b622e407dc097135fe3348860a9a Mon Sep 17 00:00:00 2001 From: Weilin Xu Date: Thu, 24 Oct 2024 07:12:15 -0700 Subject: [PATCH 23/32] Export experiment duration in seconds in CSV. (#2392) * Export experiment duration in seconds in CSV. Signed-off-by: Weilin Xu * Update CHANGELOG Signed-off-by: Weilin Xu * Log fit and test durations separately. Signed-off-by: Weilin Xu --------- Signed-off-by: Weilin Xu Co-authored-by: Samet Akcay --- CHANGELOG.md | 2 ++ src/anomalib/pipelines/benchmark/job.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b50bf09ecb..7760befc7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed +- Add duration of experiments in seconds in the benchmark CSV result by [mzweilin](https://github.com/mzweilin) in https://github.com/openvinotoolkit/anomalib/pull/2392 + ### Deprecated ### Fixed diff --git a/src/anomalib/pipelines/benchmark/job.py b/src/anomalib/pipelines/benchmark/job.py index ab443cfa8a..01822d5f29 100644 --- a/src/anomalib/pipelines/benchmark/job.py +++ b/src/anomalib/pipelines/benchmark/job.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 import logging +import time from datetime import datetime from pathlib import Path from tempfile import TemporaryDirectory @@ -48,6 +49,7 @@ def run( task_id: int | None = None, ) -> dict[str, Any]: """Run the benchmark.""" + job_start_time = time.time() devices: str | list[int] = "auto" if task_id is not None: devices = [task_id] @@ -59,8 +61,16 @@ def run( devices=devices, default_root_dir=temp_dir, ) + fit_start_time = time.time() engine.fit(self.model, self.datamodule) + test_start_time = time.time() test_results = engine.test(self.model, self.datamodule) + job_end_time = time.time() + durations = { + "job_duration": job_end_time - job_start_time, + "fit_duration": test_start_time - fit_start_time, + "test_duration": job_end_time - test_start_time, + } # TODO(ashwinvaidya17): Restore throughput # https://github.com/openvinotoolkit/anomalib/issues/2054 output = { @@ -69,6 +79,7 @@ def run( "model": self.model.__class__.__name__, "data": self.datamodule.__class__.__name__, "category": self.datamodule.category, + **durations, **test_results[0], } logger.info(f"Completed with result {output}") From 31952db16eded32017adf2910207e40148272ea1 Mon Sep 17 00:00:00 2001 From: Weilin Xu Date: Thu, 24 Oct 2024 09:37:39 -0700 Subject: [PATCH 24/32] Make single GPU benchmarking 5x more efficient (#2390) * Use SerialRunner if only one CUDA device is available. Signed-off-by: Weilin Xu * Resolve PLR6201. Signed-off-by: Weilin Xu * Update CHANGELOG. Signed-off-by: Weilin Xu * Keep the same logging level in benchmarking. Signed-off-by: Weilin Xu --------- Signed-off-by: Weilin Xu Co-authored-by: Samet Akcay --- CHANGELOG.md | 2 ++ src/anomalib/pipelines/benchmark/pipeline.py | 11 ++++++----- src/anomalib/utils/logging.py | 4 +--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7760befc7a..1c6f07555a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed +- Make single GPU benchmarking 5x more efficient by [mzweilin](https://github.com/mzweilin) in https://github.com/openvinotoolkit/anomalib/pull/2390 + ### New Contributors **Full Changelog**: diff --git a/src/anomalib/pipelines/benchmark/pipeline.py b/src/anomalib/pipelines/benchmark/pipeline.py index 730b3ecccc..f68ee5e2a1 100644 --- a/src/anomalib/pipelines/benchmark/pipeline.py +++ b/src/anomalib/pipelines/benchmark/pipeline.py @@ -20,11 +20,12 @@ def _setup_runners(args: dict) -> list[Runner]: accelerators = args["accelerator"] if isinstance(args["accelerator"], list) else [args["accelerator"]] runners: list[Runner] = [] for accelerator in accelerators: - if accelerator == "cpu": - runners.append(SerialRunner(BenchmarkJobGenerator("cpu"))) - elif accelerator == "cuda": - runners.append(ParallelRunner(BenchmarkJobGenerator("cuda"), n_jobs=torch.cuda.device_count())) - else: + if accelerator not in {"cpu", "cuda"}: msg = f"Unsupported accelerator: {accelerator}" raise ValueError(msg) + device_count = torch.cuda.device_count() + if device_count <= 1: + runners.append(SerialRunner(BenchmarkJobGenerator(accelerator))) + else: + runners.append(ParallelRunner(BenchmarkJobGenerator(accelerator), n_jobs=device_count)) return runners diff --git a/src/anomalib/utils/logging.py b/src/anomalib/utils/logging.py index 21f7994fbf..d73ef440c4 100644 --- a/src/anomalib/utils/logging.py +++ b/src/anomalib/utils/logging.py @@ -74,10 +74,8 @@ def redirect_logs(log_file: str) -> None: """ Path(log_file).parent.mkdir(exist_ok=True, parents=True) logger_file_handler = logging.FileHandler(log_file) - root_logger = logging.getLogger() - root_logger.setLevel(logging.DEBUG) format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - logging.basicConfig(format=format_string, level=logging.DEBUG, handlers=[logger_file_handler]) + logging.basicConfig(format=format_string, handlers=[logger_file_handler]) logging.captureWarnings(capture=True) # remove other handlers from all loggers loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] From f4f9b9a28036c5d6a12294c380f7897d1ef1755b Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 24 Oct 2024 21:20:25 +0100 Subject: [PATCH 25/32] =?UTF-8?q?=F0=9F=90=9E=20Fix=20installation=20packa?= =?UTF-8?q?ge=20issues=20(#2395)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update the coverage settings Signed-off-by: Samet Akcay * Remove VlmAd's relative import Signed-off-by: Samet Akcay * Revert relative imports Signed-off-by: Samet Akcay * Add type checking Signed-off-by: Samet Akcay --------- Signed-off-by: Samet Akcay --- pyproject.toml | 11 ++++++++--- src/anomalib/models/__init__.py | 2 +- .../models/image/vlm_ad/backends/huggingface.py | 12 +++++++----- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 268544ad2e..e47f7e55d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "jsonargparse[signatures]>=4.27.7", "docstring_parser", # CLI help-formatter "rich_argparse", # CLI help-formatter + "lightning-utilities", ] [project.optional-dependencies] @@ -293,11 +294,15 @@ pythonpath = "src" # COVERAGE CONFIGURATION # [tool.coverage.report] exclude_lines = [ - "except ImportError", + "pragma: no cover", + "def __repr__", + "raise NotImplementedError", + "if TYPE_CHECKING:", + "@abstractmethod", + "pass", "raise ImportError", - "except ApiException", - "raise ApiException", "raise ValueError", + "except ImportError:", ] [tool.coverage.paths] diff --git a/src/anomalib/models/__init__.py b/src/anomalib/models/__init__.py index ea091d1640..3b32c83367 100644 --- a/src/anomalib/models/__init__.py +++ b/src/anomalib/models/__init__.py @@ -58,9 +58,9 @@ class UnknownModelError(ModuleNotFoundError): "Rkde", "Stfpm", "Uflow", - "AiVad", "VlmAd", "WinClip", + "AiVad", ] logger = logging.getLogger(__name__) diff --git a/src/anomalib/models/image/vlm_ad/backends/huggingface.py b/src/anomalib/models/image/vlm_ad/backends/huggingface.py index c234ecfbc5..e25e9dccb3 100644 --- a/src/anomalib/models/image/vlm_ad/backends/huggingface.py +++ b/src/anomalib/models/image/vlm_ad/backends/huggingface.py @@ -5,19 +5,21 @@ import logging from pathlib import Path +from typing import TYPE_CHECKING from lightning_utilities.core.imports import package_available from PIL import Image -from transformers.modeling_utils import PreTrainedModel from anomalib.models.image.vlm_ad.utils import Prompt from .base import Backend -if package_available("transformers"): - import transformers +if TYPE_CHECKING: from transformers.modeling_utils import PreTrainedModel from transformers.processing_utils import ProcessorMixin + +if package_available("transformers"): + import transformers else: transformers = None @@ -39,7 +41,7 @@ def __init__( self._model: PreTrainedModel | None = None @property - def processor(self) -> ProcessorMixin: + def processor(self) -> "ProcessorMixin": """Get the Huggingface processor.""" if self._processor is None: if transformers is None: @@ -49,7 +51,7 @@ def processor(self) -> ProcessorMixin: return self._processor @property - def model(self) -> PreTrainedModel: + def model(self) -> "PreTrainedModel": """Get the Huggingface model.""" if self._model is None: if transformers is None: From 42b3ad5287f8e0833e21fbf43e0972f2439aa1ca Mon Sep 17 00:00:00 2001 From: Weilin Xu Date: Thu, 24 Oct 2024 22:12:48 -0700 Subject: [PATCH 26/32] Export the flattened config in benchmark CSV. (#2391) * Export the flattened config in benchmark CSV. Signed-off-by: Weilin Xu * Update CHANGELOG Signed-off-by: Weilin Xu * Reuse the existing flatten_dict(). Signed-off-by: Weilin Xu --------- Signed-off-by: Weilin Xu Co-authored-by: Samet Akcay --- CHANGELOG.md | 1 + src/anomalib/pipelines/benchmark/generator.py | 4 ++++ src/anomalib/pipelines/benchmark/job.py | 16 +++++++++++----- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c6f07555a..8152e202b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - Add duration of experiments in seconds in the benchmark CSV result by [mzweilin](https://github.com/mzweilin) in https://github.com/openvinotoolkit/anomalib/pull/2392 +- Export flat configurations in benchmark CSV results by [mzweilin](https://github.com/mzweilin) in https://github.com/openvinotoolkit/anomalib/pull/2391 ### Deprecated diff --git a/src/anomalib/pipelines/benchmark/generator.py b/src/anomalib/pipelines/benchmark/generator.py index 922dfa06cb..988e0111b7 100644 --- a/src/anomalib/pipelines/benchmark/generator.py +++ b/src/anomalib/pipelines/benchmark/generator.py @@ -10,6 +10,7 @@ from anomalib.pipelines.components import JobGenerator from anomalib.pipelines.components.utils import get_iterator_from_grid_dict from anomalib.pipelines.types import PREV_STAGE_RESULT +from anomalib.utils.config import flatten_dict from anomalib.utils.logging import hide_output from .job import BenchmarkJob @@ -39,9 +40,12 @@ def generate_jobs( """Return iterator based on the arguments.""" del previous_stage_result # Not needed for this job for _container in get_iterator_from_grid_dict(args): + # Pass experimental configs as a flatten dictionary to the job runner. + flat_cfg = flatten_dict(_container) yield BenchmarkJob( accelerator=self.accelerator, seed=_container["seed"], model=get_model(_container["model"]), datamodule=get_datamodule(_container["data"]), + flat_cfg=flat_cfg, ) diff --git a/src/anomalib/pipelines/benchmark/job.py b/src/anomalib/pipelines/benchmark/job.py index 01822d5f29..f56899ac5d 100644 --- a/src/anomalib/pipelines/benchmark/job.py +++ b/src/anomalib/pipelines/benchmark/job.py @@ -32,16 +32,25 @@ class BenchmarkJob(Job): model (AnomalyModule): The model to use. datamodule (AnomalibDataModule): The data module to use. seed (int): The seed to use. + flat_cfg (dict): The flat dictionary of configs with dotted keys. """ name = "benchmark" - def __init__(self, accelerator: str, model: AnomalyModule, datamodule: AnomalibDataModule, seed: int) -> None: + def __init__( + self, + accelerator: str, + model: AnomalyModule, + datamodule: AnomalibDataModule, + seed: int, + flat_cfg: dict, + ) -> None: super().__init__() self.accelerator = accelerator self.model = model self.datamodule = datamodule self.seed = seed + self.flat_cfg = flat_cfg @hide_output def run( @@ -74,12 +83,9 @@ def run( # TODO(ashwinvaidya17): Restore throughput # https://github.com/openvinotoolkit/anomalib/issues/2054 output = { - "seed": self.seed, "accelerator": self.accelerator, - "model": self.model.__class__.__name__, - "data": self.datamodule.__class__.__name__, - "category": self.datamodule.category, **durations, + **self.flat_cfg, **test_results[0], } logger.info(f"Completed with result {output}") From 7d6b89c173e98611dfbcfd0256647598c42dc002 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 31 Oct 2024 08:58:05 +0000 Subject: [PATCH 27/32] `v1.2.0` Release (#2397) Prepare v1.2.0 release (#2396) * Update changelog * Update the version in __init__ --------- Signed-off-by: Samet Akcay --- CHANGELOG.md | 60 +++++++++++++++++++++++++++++++++++----- src/anomalib/__init__.py | 2 +- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8152e202b9..dedec2f441 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,21 +8,67 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added -- Add `VlmAd` metric by [Bepitic](https://github.com/Bepitic) and refactored by [ashwinvaidya17](https://github.com/ashwinvaidya17) in https://github.com/openvinotoolkit/anomalib/pull/2344 -- Add `Datumaro` annotation format support by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2377 -- Add `AUPIMO` tutorials notebooks in https://github.com/openvinotoolkit/anomalib/pull/2330 and https://github.com/openvinotoolkit/anomalib/pull/2336 -- Add `AUPIMO` metric by [jpcbertoldo](https://github.com/jpcbertoldo) in https://github.com/openvinotoolkit/anomalib/pull/1726 and refactored by [ashwinvaidya17](https://github.com/ashwinvaidya17) in https://github.com/openvinotoolkit/anomalib/pull/2329 +### Changed + +### Deprecated + +### Fixed + +### New Contributors + +## [v1.2.0] + +### Added + +- πŸš€ Add ensembling methods for tiling to Anomalib by @blaz-r in https://github.com/openvinotoolkit/anomalib/pull/1226 +- πŸ“š optimization/quantization added into 500 series by @paularamo in https://github.com/openvinotoolkit/anomalib/pull/2197 +- πŸš€ Add PIMO by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2329 +- πŸ“š Add PIMO tutorial advanced i (fixed) by @jpcbertoldo in https://github.com/openvinotoolkit/anomalib/pull/2336 +- πŸš€ Add VLM based Anomaly Model by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2344 +- πŸ“š Add PIMO tutorials/02 advanced ii by @jpcbertoldo in https://github.com/openvinotoolkit/anomalib/pull/2347 +- πŸ“š Add PIMO tutorials/03 advanced iii by @jpcbertoldo in https://github.com/openvinotoolkit/anomalib/pull/2348 +- πŸ“š Add PIMO tutorials/04 advanced iv by @jpcbertoldo in https://github.com/openvinotoolkit/anomalib/pull/2352 +- πŸš€ Add datumaro annotation dataloader by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2377 +- πŸ“š Add training from a checkpoint example by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2389 ### Changed -- Add duration of experiments in seconds in the benchmark CSV result by [mzweilin](https://github.com/mzweilin) in https://github.com/openvinotoolkit/anomalib/pull/2392 -- Export flat configurations in benchmark CSV results by [mzweilin](https://github.com/mzweilin) in https://github.com/openvinotoolkit/anomalib/pull/2391 +- πŸ”¨ Refactor folder3d to avoid complex-structure (C901) issue by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2185 +- Update open-clip-torch requirement from <2.26.1,>=2.23.0 to >=2.23.0,<2.26.2 by @dependabot in https://github.com/openvinotoolkit/anomalib/pull/2189 +- Update sphinx requirement by @dependabot in https://github.com/openvinotoolkit/anomalib/pull/2235 +- Refactor Lightning's `trainer.model` to `trainer.lightning_module` by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2255 +- Revert "Update open-clip-torch requirement from <2.26.1,>=2.23.0 to >=2.23.0,<2.26.2" by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2270 +- Update ruff configuration by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2269 +- Update timm requirement by @dependabot in https://github.com/openvinotoolkit/anomalib/pull/2274 +- Refactor BaseThreshold to Threshold by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2278 +- πŸ”¨ Lint: Update Ruff Config - Add Missing Copyright Headers by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2281 +- Reduce rich methods by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2283 +- Enable Ruff Rules: PLW1514 and PLR6201 by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2284 +- Update nncf export by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2286 +- Linting: Enable `PLR6301`, # could be a function, class method or static method by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2288 +- 🐞 Update `setuptools` requirement for PEP 660 support by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2320 +- πŸ”¨ Update the issue templates by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2363 +- 🐞 Defer OpenVINO import to avoid unnecessary warnings by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2385 +- πŸ”¨ Make single GPU benchmarking 5x more efficient by @mzweilin in https://github.com/openvinotoolkit/anomalib/pull/2390 +- 🐞 Export the flattened config in benchmark CSV. by @mzweilin in https://github.com/openvinotoolkit/anomalib/pull/2391 +- πŸ”¨ Export experiment duration in seconds in CSV. by @mzweilin in https://github.com/openvinotoolkit/anomalib/pull/2392 +- 🐞 Fix installation package issues by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2395 ### Deprecated +- πŸ”¨ Deprecate try import and replace it with Lightning's package_available by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2373 + ### Fixed -- Make single GPU benchmarking 5x more efficient by [mzweilin](https://github.com/mzweilin) in https://github.com/openvinotoolkit/anomalib/pull/2390 +- Add check before loading metrics data from checkpoint by @blaz-r in https://github.com/openvinotoolkit/anomalib/pull/2323 +- Fix transforms for draem, dsr and rkde by @blaz-r in https://github.com/openvinotoolkit/anomalib/pull/2324 +- Makes batch size dynamic by @Marcus1506 in https://github.com/openvinotoolkit/anomalib/pull/2339 + +## New Contributors + +- @Marcus1506 made their first contribution in https://github.com/openvinotoolkit/anomalib/pull/2339 + +**Full Changelog**: https://github.com/openvinotoolkit/anomalib/compare/v1.1.1...v1.2.0 ### New Contributors diff --git a/src/anomalib/__init__.py b/src/anomalib/__init__.py index 1b7a30497c..09edd93c22 100644 --- a/src/anomalib/__init__.py +++ b/src/anomalib/__init__.py @@ -5,7 +5,7 @@ from enum import Enum -__version__ = "1.2.0dev" +__version__ = "1.2.0" class LearningType(str, Enum): From 6ed0067813a5b4969e7ec69d22ab617f4e921142 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 31 Oct 2024 11:47:10 +0000 Subject: [PATCH 28/32] Bump Anomalib version to `2.0.0dev` in `main` (#2402) Update __init__.py --- src/anomalib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anomalib/__init__.py b/src/anomalib/__init__.py index 09edd93c22..206e9531a9 100644 --- a/src/anomalib/__init__.py +++ b/src/anomalib/__init__.py @@ -5,7 +5,7 @@ from enum import Enum -__version__ = "1.2.0" +__version__ = "2.0.0dev" class LearningType(str, Enum): From bcc0b439f616b13a8629cb64d8bf0f88fc9083a8 Mon Sep 17 00:00:00 2001 From: Harim Kang Date: Tue, 5 Nov 2024 19:29:23 +0900 Subject: [PATCH 29/32] =?UTF-8?q?=F0=9F=90=9EReplace=20package=5Favailable?= =?UTF-8?q?=20with=20module=5Favailable=20(#2407)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/anomalib/cli/pipelines.py | 4 ++-- src/anomalib/cli/utils/openvino.py | 4 ++-- src/anomalib/deploy/inferencers/openvino_inferencer.py | 4 ++-- src/anomalib/loggers/wandb.py | 4 ++-- src/anomalib/models/components/base/export_mixin.py | 6 +++--- src/anomalib/models/image/vlm_ad/backends/chat_gpt.py | 4 ++-- src/anomalib/models/image/vlm_ad/backends/huggingface.py | 4 ++-- src/anomalib/models/image/vlm_ad/backends/ollama.py | 4 ++-- src/anomalib/utils/exceptions/imports.py | 2 +- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/anomalib/cli/pipelines.py b/src/anomalib/cli/pipelines.py index 8cfb04fd2e..ba6030491b 100644 --- a/src/anomalib/cli/pipelines.py +++ b/src/anomalib/cli/pipelines.py @@ -6,13 +6,13 @@ import logging from jsonargparse import Namespace -from lightning_utilities.core.imports import package_available +from lightning_utilities.core.imports import module_available from anomalib.cli.utils.help_formatter import get_short_docstring logger = logging.getLogger(__name__) -if package_available("anomalib.pipelines"): +if module_available("anomalib.pipelines"): from anomalib.pipelines import Benchmark from anomalib.pipelines.components.base import Pipeline diff --git a/src/anomalib/cli/utils/openvino.py b/src/anomalib/cli/utils/openvino.py index ee54bf09b2..50a894c304 100644 --- a/src/anomalib/cli/utils/openvino.py +++ b/src/anomalib/cli/utils/openvino.py @@ -6,12 +6,12 @@ import logging from jsonargparse import ArgumentParser -from lightning_utilities.core.imports import package_available +from lightning_utilities.core.imports import module_available logger = logging.getLogger(__name__) -if package_available("openvino"): +if module_available("openvino"): from openvino.tools.ovc.cli_parser import get_common_cli_parser else: get_common_cli_parser = None diff --git a/src/anomalib/deploy/inferencers/openvino_inferencer.py b/src/anomalib/deploy/inferencers/openvino_inferencer.py index 8dea77b92e..b85df0536c 100644 --- a/src/anomalib/deploy/inferencers/openvino_inferencer.py +++ b/src/anomalib/deploy/inferencers/openvino_inferencer.py @@ -9,7 +9,7 @@ import cv2 import numpy as np -from lightning_utilities.core.imports import package_available +from lightning_utilities.core.imports import module_available from omegaconf import DictConfig from PIL import Image @@ -94,7 +94,7 @@ def __init__( task: str | None = None, config: dict | None = None, ) -> None: - if not package_available("openvino"): + if not module_available("openvino"): msg = "OpenVINO is not installed. Please install OpenVINO to use OpenVINOInferencer." raise ImportError(msg) diff --git a/src/anomalib/loggers/wandb.py b/src/anomalib/loggers/wandb.py index 55e65e6d54..ff41a0949e 100644 --- a/src/anomalib/loggers/wandb.py +++ b/src/anomalib/loggers/wandb.py @@ -9,12 +9,12 @@ from lightning.fabric.utilities.types import _PATH from lightning.pytorch.loggers.wandb import WandbLogger from lightning.pytorch.utilities import rank_zero_only -from lightning_utilities.core.imports import package_available +from lightning_utilities.core.imports import module_available from matplotlib.figure import Figure from .base import ImageLoggerBase -if package_available("wandb"): +if module_available("wandb"): import wandb if TYPE_CHECKING: diff --git a/src/anomalib/models/components/base/export_mixin.py b/src/anomalib/models/components/base/export_mixin.py index d11b50ff99..327cb87e02 100644 --- a/src/anomalib/models/components/base/export_mixin.py +++ b/src/anomalib/models/components/base/export_mixin.py @@ -12,7 +12,7 @@ import numpy as np import torch -from lightning_utilities.core.imports import package_available +from lightning_utilities.core.imports import module_available from torch import nn from torchmetrics import Metric from torchvision.transforms.v2 import Transform @@ -245,7 +245,7 @@ def to_openvino( ... task="segmentation", ... ) """ - if not package_available("openvino"): + if not module_available("openvino"): logger.exception("Could not find OpenVINO. Please check OpenVINO installation.") raise ModuleNotFoundError @@ -294,7 +294,7 @@ def _compress_ov_model( Returns: model (CompiledModel): Model in the OpenVINO format compressed with NNCF quantization. """ - if not package_available("nncf"): + if not module_available("nncf"): logger.exception("Could not find NCCF. Please check NNCF installation.") raise ModuleNotFoundError diff --git a/src/anomalib/models/image/vlm_ad/backends/chat_gpt.py b/src/anomalib/models/image/vlm_ad/backends/chat_gpt.py index 741288354f..53648e688a 100644 --- a/src/anomalib/models/image/vlm_ad/backends/chat_gpt.py +++ b/src/anomalib/models/image/vlm_ad/backends/chat_gpt.py @@ -10,13 +10,13 @@ from typing import TYPE_CHECKING from dotenv import load_dotenv -from lightning_utilities.core.imports import package_available +from lightning_utilities.core.imports import module_available from anomalib.models.image.vlm_ad.utils import Prompt from .base import Backend -if package_available("openai"): +if module_available("openai"): from openai import OpenAI else: OpenAI = None diff --git a/src/anomalib/models/image/vlm_ad/backends/huggingface.py b/src/anomalib/models/image/vlm_ad/backends/huggingface.py index e25e9dccb3..e8d3c1e84b 100644 --- a/src/anomalib/models/image/vlm_ad/backends/huggingface.py +++ b/src/anomalib/models/image/vlm_ad/backends/huggingface.py @@ -7,7 +7,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from lightning_utilities.core.imports import package_available +from lightning_utilities.core.imports import module_available from PIL import Image from anomalib.models.image.vlm_ad.utils import Prompt @@ -18,7 +18,7 @@ from transformers.modeling_utils import PreTrainedModel from transformers.processing_utils import ProcessorMixin -if package_available("transformers"): +if module_available("transformers"): import transformers else: transformers = None diff --git a/src/anomalib/models/image/vlm_ad/backends/ollama.py b/src/anomalib/models/image/vlm_ad/backends/ollama.py index db5a215bb3..ff680bee3b 100644 --- a/src/anomalib/models/image/vlm_ad/backends/ollama.py +++ b/src/anomalib/models/image/vlm_ad/backends/ollama.py @@ -12,13 +12,13 @@ import logging from pathlib import Path -from lightning_utilities.core.imports import package_available +from lightning_utilities.core.imports import module_available from anomalib.models.image.vlm_ad.utils import Prompt from .base import Backend -if package_available("ollama"): +if module_available("ollama"): from ollama import chat from ollama._client import _encode_image else: diff --git a/src/anomalib/utils/exceptions/imports.py b/src/anomalib/utils/exceptions/imports.py index dac22ba056..6ef8dbd89d 100644 --- a/src/anomalib/utils/exceptions/imports.py +++ b/src/anomalib/utils/exceptions/imports.py @@ -22,7 +22,7 @@ def try_import(import_path: str) -> bool: warnings.warn( "The 'try_import' function is deprecated and will be removed in v2.0.0. " - "Use 'package_available' from lightning-utilities instead.", + "Use 'module_available' from lightning-utilities instead.", DeprecationWarning, stacklevel=2, ) From 0e6aa9025e346db89c7769dcc05ec345fee8f570 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Wed, 11 Dec 2024 14:26:32 +0000 Subject: [PATCH 30/32] ignore tiled ensemble for now Signed-off-by: Samet Akcay --- .../markdown/guides/how_to/pipelines/index.md | 7 - .../guides/how_to/pipelines/tiled_ensemble.md | 157 ------------ .../pipelines/tiled_ensemble/__init__.py | 12 - .../tiled_ensemble/components/__init__.py | 30 --- .../tiled_ensemble/components/merging.py | 110 --------- .../components/metrics_calculation.py | 217 ----------------- .../components/model_training.py | 192 --------------- .../components/normalization.py | 120 --------- .../tiled_ensemble/components/prediction.py | 228 ------------------ .../tiled_ensemble/components/smoothing.py | 167 ------------- .../components/stats_calculation.py | 180 -------------- .../tiled_ensemble/components/thresholding.py | 114 --------- .../components/utils/__init__.py | 44 ---- .../components/utils/ensemble_engine.py | 92 ------- .../components/utils/ensemble_tiling.py | 147 ----------- .../components/utils/helper_functions.py | 179 -------------- .../components/utils/prediction_data.py | 45 ---- .../components/utils/prediction_merging.py | 167 ------------- .../components/visualization.py | 125 ---------- .../pipelines/tiled_ensemble/test_pipeline.py | 124 ---------- .../tiled_ensemble/train_pipeline.py | 123 ---------- .../pipelines/test_tiled_ensemble.py | 62 ----- .../integration/pipelines/tiled_ensemble.yaml | 43 ---- tools/tiled_ensemble/ens_config.yaml | 43 ---- tools/tiled_ensemble/eval.py | 28 --- tools/tiled_ensemble/train.py | 17 -- 26 files changed, 2773 deletions(-) delete mode 100644 docs/source/markdown/guides/how_to/pipelines/tiled_ensemble.md delete mode 100644 src/anomalib/pipelines/tiled_ensemble/__init__.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/__init__.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/merging.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/metrics_calculation.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/model_training.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/normalization.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/prediction.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/smoothing.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/stats_calculation.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/thresholding.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/__init__.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_engine.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_tiling.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/helper_functions.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_data.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_merging.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/components/visualization.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/test_pipeline.py delete mode 100644 src/anomalib/pipelines/tiled_ensemble/train_pipeline.py delete mode 100644 tests/integration/pipelines/test_tiled_ensemble.py delete mode 100644 tests/integration/pipelines/tiled_ensemble.yaml delete mode 100644 tools/tiled_ensemble/ens_config.yaml delete mode 100644 tools/tiled_ensemble/eval.py delete mode 100644 tools/tiled_ensemble/train.py diff --git a/docs/source/markdown/guides/how_to/pipelines/index.md b/docs/source/markdown/guides/how_to/pipelines/index.md index c7f2c44706..d70e6be757 100644 --- a/docs/source/markdown/guides/how_to/pipelines/index.md +++ b/docs/source/markdown/guides/how_to/pipelines/index.md @@ -6,13 +6,6 @@ This section contains tutorials on how to use different pipelines of Anomalib an :margin: 1 1 0 0 :gutter: 1 -:::{grid-item-card} {octicon}`stack` Tiled Ensemble -:link: ./tiled_ensemble -:link-type: doc - -Learn more about how to use the tiled ensemble pipelines. -::: - :::{grid-item-card} {octicon}`gear` Custom Pipeline :link: ./custom_pipeline :link-type: doc diff --git a/docs/source/markdown/guides/how_to/pipelines/tiled_ensemble.md b/docs/source/markdown/guides/how_to/pipelines/tiled_ensemble.md deleted file mode 100644 index 3550efb5fd..0000000000 --- a/docs/source/markdown/guides/how_to/pipelines/tiled_ensemble.md +++ /dev/null @@ -1,157 +0,0 @@ -# Tiled ensemble - -This guide will show you how to use **The Tiled Ensemble** method for anomaly detection. For more details, refer to the official [Paper](https://openaccess.thecvf.com/content/CVPR2024W/VAND/html/Rolih_Divide_and_Conquer_High-Resolution_Industrial_Anomaly_Detection_via_Memory_Efficient_CVPRW_2024_paper.html). - -The tiled ensemble approach reduces memory consumption by dividing input images into a grid of tiles and training a dedicated model for each tile location. -It is compatible with any existing image anomaly detection model without the need for any modification of the underlying architecture. - -![Tiled ensemble flow](../../../../images/tiled_ensemble/ensemble_flow.png) - -```{note} -This feature is experimental and may not work as expected. -For any problems refer to [Issues](https://github.com/openvinotoolkit/anomalib/issues) and feel free to ask any question in [Discussions](https://github.com/openvinotoolkit/anomalib/discussions). -``` - -## Training - -You can train a tiled ensemble using the training script located inside `tools/tiled_ensemble` directory: - -```{code-block} bash - -python tools/tiled_ensemble/train_ensemble.py \ - --config tools/tiled_ensemble/ens_config.yaml -``` - -By default, the Padim model is trained on **MVTec AD bottle** category using image size of 256x256, divided into non-overlapping 128x128 tiles. -You can modify these parameters in the [config file](#ensemble-configuration). - -## Evaluation - -After training, you can evaluate the tiled ensemble on test data using: - -```{code-block} bash - -python tools/tiled_ensemble/eval.py \ - --config tools/tiled_ensemble/ens_config.yaml \ - --root path_to_results_dir - -``` - -Ensure that `root` points to the directory containing the training results, typically `results/padim/mvtec/bottle/runX`. - -## Ensemble configuration - -Tiled ensemble is configured using `ens_config.yaml` file in the `tools/tiled_ensemble` directory. -It contains general settings and tiled ensemble specific settings. - -### General - -General settings at the top of the config file are used to set up the random `seed`, `accelerator` (device) and the path to where results will be saved `default_root_dir`. - -```{code-block} yaml -seed: 42 -accelerator: "gpu" -default_root_dir: "results" -``` - -### Tiling - -This section contains the following settings, used for image tiling: - -```{code-block} yaml - -tiling: - tile_size: 256 - stride: 256 -``` - -These settings determine the tile size and stride. Another important parameter is image_size from `data` section later in the config. It determines the original size of the image. - -Input image is split into tiles, where each tile is of shape set by `tile_size` and tiles are taken with step set by `stride`. -For example: having image_size: 512, tile_size: 256, and stride: 256, results in 4 non-overlapping tile locations. - -### Normalization and thresholding - -Next up are the normalization and thresholding settings: - -```{code-block} yaml -normalization_stage: image -thresholding: - method: F1AdaptiveThreshold - stage: image -``` - -- **Normalization**: Can be applied per each tile location separately (`tile` option), after combining prediction (`image` option), or skipped (`none` option). - -- **Thresholding**: Can also be applied at different stages, but it is limited to `tile` and `image`. Another setting for thresholding is the method used. It can be specified as a string or by the class path. - -### Data - -The `data` section is used to configure the input `image_size` and other parameters for the dataset used. - -```{code-block} yaml -data: - class_path: anomalib.data.MVTec - init_args: - root: ./datasets/MVTec - category: bottle - train_batch_size: 32 - eval_batch_size: 32 - num_workers: 8 - task: segmentation - transform: null - train_transform: null - eval_transform: null - test_split_mode: from_dir - test_split_ratio: 0.2 - val_split_mode: same_as_test - val_split_ratio: 0.5 - image_size: [256, 256] -``` - -Refer to [Data](../../reference/data/image/index.md) for more details on parameters. - -### SeamSmoothing - -This section contains settings for `SeamSmoothing` block of pipeline: - -```{code-block} yaml -SeamSmoothing: - apply: True - sigma: 2 - width: 0.1 - -``` - -SeamSmoothing job is responsible for smoothing of regions where tiles meet - called tile seams. - -- **apply**: If True, smoothing will be applied. -- **sigma**: Controls the sigma of Gaussian filter used for smoothing. -- **width**: Sets the percentage of the region around the seam to be smoothed. - -### TrainModels - -The last section `TrainModels` contains the setup for model training: - -```{code-block} yaml -TrainModels: - model: - class_path: Fastflow - - metrics: - pixel: AUROC - image: AUROC - - trainer: - max_epochs: 500 - callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - patience: 42 - monitor: pixel_AUROC - mode: max -``` - -- **Model**: Specifies the model used. Refer to [Models](../../reference/models/image/index.md) for more details on the model parameters. -- **Metrics**: Defines evaluation metrics for pixel and image level. -- **Trainer**: _optional_ parameters, used to control the training process. Refer to [Engine](../../reference/engine/index.md) for more details. diff --git a/src/anomalib/pipelines/tiled_ensemble/__init__.py b/src/anomalib/pipelines/tiled_ensemble/__init__.py deleted file mode 100644 index 1a068562b7..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Tiled ensemble pipelines.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from .test_pipeline import EvalTiledEnsemble -from .train_pipeline import TrainTiledEnsemble - -__all__ = [ - "TrainTiledEnsemble", - "EvalTiledEnsemble", -] diff --git a/src/anomalib/pipelines/tiled_ensemble/components/__init__.py b/src/anomalib/pipelines/tiled_ensemble/components/__init__.py deleted file mode 100644 index 619dc2e673..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Tiled ensemble pipeline components.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from .merging import MergeJobGenerator -from .metrics_calculation import MetricsCalculationJobGenerator -from .model_training import TrainModelJobGenerator -from .normalization import NormalizationJobGenerator -from .prediction import PredictJobGenerator -from .smoothing import SmoothingJobGenerator -from .stats_calculation import StatisticsJobGenerator -from .thresholding import ThresholdingJobGenerator -from .utils import NormalizationStage, PredictData, ThresholdStage -from .visualization import VisualizationJobGenerator - -__all__ = [ - "NormalizationStage", - "ThresholdStage", - "PredictData", - "TrainModelJobGenerator", - "PredictJobGenerator", - "MergeJobGenerator", - "SmoothingJobGenerator", - "StatisticsJobGenerator", - "NormalizationJobGenerator", - "ThresholdingJobGenerator", - "VisualizationJobGenerator", - "MetricsCalculationJobGenerator", -] diff --git a/src/anomalib/pipelines/tiled_ensemble/components/merging.py b/src/anomalib/pipelines/tiled_ensemble/components/merging.py deleted file mode 100644 index 6e8d5fc84c..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/merging.py +++ /dev/null @@ -1,110 +0,0 @@ -"""Tiled ensemble - prediction merging job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from collections.abc import Generator -from typing import Any - -from tqdm import tqdm - -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS - -from .utils.ensemble_tiling import EnsembleTiler -from .utils.helper_functions import get_ensemble_tiler -from .utils.prediction_data import EnsemblePredictions -from .utils.prediction_merging import PredictionMergingMechanism - -logger = logging.getLogger(__name__) - - -class MergeJob(Job): - """Job for merging tile-level predictions into image-level predictions. - - Args: - predictions (EnsemblePredictions): Object containing ensemble predictions. - tiler (EnsembleTiler): Ensemble tiler used for untiling. - """ - - name = "Merge" - - def __init__(self, predictions: EnsemblePredictions, tiler: EnsembleTiler) -> None: - super().__init__() - self.predictions = predictions - self.tiler = tiler - - def run(self, task_id: int | None = None) -> list[Any]: - """Run merging job that merges all batches of tile-level predictions into image-level predictions. - - Args: - task_id: Not used in this case. - - Returns: - list[Any]: List of merged predictions. - """ - del task_id # not needed here - - merger = PredictionMergingMechanism(self.predictions, self.tiler) - - logger.info("Merging predictions.") - - # merge all batches - merged_predictions = [ - merger.merge_tile_predictions(batch_idx) - for batch_idx in tqdm(range(merger.num_batches), desc="Prediction merging") - ] - - return merged_predictions # noqa: RET504 - - @staticmethod - def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: - """Nothing to collect in this job. - - Returns: - list[Any]: List of predictions. - """ - # take the first element as result is list of lists here - return results[0] - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """Nothing to save in this job.""" - - -class MergeJobGenerator(JobGenerator): - """Generate MergeJob.""" - - def __init__(self, tiling_args: dict, data_args: dict) -> None: - super().__init__() - self.tiling_args = tiling_args - self.data_args = data_args - - @property - def job_class(self) -> type: - """Return the job class.""" - return MergeJob - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: EnsemblePredictions | None = None, - ) -> Generator[MergeJob, None, None]: - """Return a generator producing a single merging job. - - Args: - args (dict): Tiled ensemble pipeline args. - prev_stage_result (EnsemblePredictions): Ensemble predictions from predict step. - - Returns: - Generator[MergeJob, None, None]: MergeJob generator - """ - del args # args not used here - - tiler = get_ensemble_tiler(self.tiling_args, self.data_args) - if prev_stage_result is not None: - yield MergeJob(prev_stage_result, tiler) - else: - msg = "Merging job requires tile level predictions from previous step." - raise ValueError(msg) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/metrics_calculation.py b/src/anomalib/pipelines/tiled_ensemble/components/metrics_calculation.py deleted file mode 100644 index 530662b1d3..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/metrics_calculation.py +++ /dev/null @@ -1,217 +0,0 @@ -"""Tiled ensemble - metrics calculation job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Any - -import pandas as pd -from tqdm import tqdm - -from anomalib import TaskType -from anomalib.metrics import AnomalibMetricCollection, create_metric_collection -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.types import GATHERED_RESULTS, PREV_STAGE_RESULT, RUN_RESULTS - -from .utils import NormalizationStage -from .utils.helper_functions import get_threshold_values - -logger = logging.getLogger(__name__) - - -class MetricsCalculationJob(Job): - """Job for image and pixel metrics calculation. - - Args: - accelerator (str): Accelerator (device) to use. - predictions (list[Any]): List of batch predictions. - root_dir (Path): Root directory to save checkpoints, stats and images. - image_metrics (AnomalibMetricCollection): Collection of all image-level metrics. - pixel_metrics (AnomalibMetricCollection): Collection of all pixel-level metrics. - """ - - name = "Metrics" - - def __init__( - self, - accelerator: str, - predictions: list[Any] | None, - root_dir: Path, - image_metrics: AnomalibMetricCollection, - pixel_metrics: AnomalibMetricCollection, - ) -> None: - super().__init__() - self.accelerator = accelerator - self.predictions = predictions - self.root_dir = root_dir - self.image_metrics = image_metrics - self.pixel_metrics = pixel_metrics - - def run(self, task_id: int | None = None) -> dict: - """Run a job that calculates image and pixel level metrics. - - Args: - task_id: Not used in this case. - - Returns: - dict[str, float]: Dictionary containing calculated metric values. - """ - del task_id # not needed here - - logger.info("Starting metrics calculation.") - - # add predicted data to metrics - for data in tqdm(self.predictions, desc="Calculating metrics"): - self.image_metrics.update(data["pred_scores"], data["label"].int()) - if "mask" in data and "anomaly_maps" in data: - self.pixel_metrics.update(data["anomaly_maps"], data["mask"].int()) - - # compute all metrics on specified accelerator - metrics_dict = {} - for name, metric in self.image_metrics.items(): - metric.to(self.accelerator) - metrics_dict[name] = metric.compute().item() - metric.cpu() - - if self.pixel_metrics.update_called: - for name, metric in self.pixel_metrics.items(): - metric.to(self.accelerator) - metrics_dict[name] = metric.compute().item() - metric.cpu() - - for name, value in metrics_dict.items(): - print(f"{name}: {value:.4f}") - - # save path used in `save` method - metrics_dict["save_path"] = self.root_dir / "metric_results.csv" - - return metrics_dict - - @staticmethod - def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: - """Nothing to collect in this job. - - Returns: - list[Any]: list of predictions. - """ - # take the first element as result is list of dict here - return results[0] - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """Save metrics values to csv.""" - logger.info("Saving metrics to csv.") - - # get and remove path from stats dict - results_path: Path = results.pop("save_path") - results_path.parent.mkdir(parents=True, exist_ok=True) - - df_dict = {k: [v] for k, v in results.items()} - metrics_df = pd.DataFrame(df_dict) - metrics_df.to_csv(results_path, index=False) - - -class MetricsCalculationJobGenerator(JobGenerator): - """Generate MetricsCalculationJob. - - Args: - root_dir (Path): Root directory to save checkpoints, stats and images. - """ - - def __init__( - self, - accelerator: str, - root_dir: Path, - task: TaskType, - metrics: dict, - normalization_stage: NormalizationStage, - ) -> None: - self.accelerator = accelerator - self.root_dir = root_dir - self.task = task - self.metrics = metrics - self.normalization_stage = normalization_stage - - @property - def job_class(self) -> type: - """Return the job class.""" - return MetricsCalculationJob - - def configure_ensemble_metrics( - self, - image_metrics: list[str] | dict[str, dict[str, Any]] | None = None, - pixel_metrics: list[str] | dict[str, dict[str, Any]] | None = None, - ) -> tuple[AnomalibMetricCollection, AnomalibMetricCollection]: - """Configure image and pixel metrics and put them into a collection. - - Args: - image_metrics (list[str] | None): List of image-level metric names. - pixel_metrics (list[str] | None): List of pixel-level metric names. - - Returns: - tuple[AnomalibMetricCollection, AnomalibMetricCollection]: - Image-metrics collection and pixel-metrics collection - """ - image_metrics = [] if image_metrics is None else image_metrics - - if pixel_metrics is None: - pixel_metrics = [] - elif self.task == TaskType.CLASSIFICATION: - pixel_metrics = [] - logger.warning( - "Cannot perform pixel-level evaluation when task type is classification. " - "Ignoring the following pixel-level metrics: %s", - pixel_metrics, - ) - - # if a single metric is passed, transform to list to fit the creation function - if isinstance(image_metrics, str): - image_metrics = [image_metrics] - if isinstance(pixel_metrics, str): - pixel_metrics = [pixel_metrics] - - image_metrics_collection = create_metric_collection(image_metrics, "image_") - pixel_metrics_collection = create_metric_collection(pixel_metrics, "pixel_") - - return image_metrics_collection, pixel_metrics_collection - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: PREV_STAGE_RESULT = None, - ) -> Generator[MetricsCalculationJob, None, None]: - """Make a generator that yields a single metrics calculation job. - - Args: - args: ensemble run config. - prev_stage_result: ensemble predictions from previous step. - - Returns: - Generator[MetricsCalculationJob, None, None]: MetricsCalculationJob generator - """ - del args # args not used here - - image_metrics_config = self.metrics.get("image", None) - pixel_metrics_config = self.metrics.get("pixel", None) - - image_threshold, pixel_threshold = get_threshold_values(self.normalization_stage, self.root_dir) - - image_metrics, pixel_metrics = self.configure_ensemble_metrics( - image_metrics=image_metrics_config, - pixel_metrics=pixel_metrics_config, - ) - - # set thresholds for metrics that need it - image_metrics.set_threshold(image_threshold) - pixel_metrics.set_threshold(pixel_threshold) - - yield MetricsCalculationJob( - accelerator=self.accelerator, - predictions=prev_stage_result, - root_dir=self.root_dir, - image_metrics=image_metrics, - pixel_metrics=pixel_metrics, - ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/model_training.py b/src/anomalib/pipelines/tiled_ensemble/components/model_training.py deleted file mode 100644 index 6bc81c793b..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/model_training.py +++ /dev/null @@ -1,192 +0,0 @@ -"""Tiled ensemble - ensemble training job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from collections.abc import Generator -from itertools import product -from pathlib import Path - -from lightning import seed_everything - -from anomalib.data import AnomalibDataModule -from anomalib.models import AnomalyModule -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.types import GATHERED_RESULTS, PREV_STAGE_RESULT - -from .utils import NormalizationStage -from .utils.ensemble_engine import TiledEnsembleEngine -from .utils.helper_functions import ( - get_ensemble_datamodule, - get_ensemble_engine, - get_ensemble_model, - get_ensemble_tiler, -) - -logger = logging.getLogger(__name__) - - -class TrainModelJob(Job): - """Job for training of individual models in the tiled ensemble. - - Args: - accelerator (str): Accelerator (device) to use. - seed (int): Random seed for reproducibility. - root_dir (Path): Root directory to save checkpoints, stats and images. - tile_index (tuple[int, int]): Index of tile that this model processes. - normalization_stage (str): Normalization stage flag. - metrics (dict): metrics dict with pixel and image metric names. - trainer_args (dict| None): Additional arguments to pass to the trainer class. - model (AnomalyModule): Model to train. - datamodule (AnomalibDataModule): Datamodule with all dataloaders. - - """ - - name = "TrainModels" - - def __init__( - self, - accelerator: str, - seed: int, - root_dir: Path, - tile_index: tuple[int, int], - normalization_stage: str, - metrics: dict, - trainer_args: dict | None, - model: AnomalyModule, - datamodule: AnomalibDataModule, - ) -> None: - super().__init__() - self.accelerator = accelerator - self.seed = seed - self.root_dir = root_dir - self.tile_index = tile_index - self.normalization_stage = normalization_stage - self.metrics = metrics - self.trainer_args = trainer_args - self.model = model - self.datamodule = datamodule - - def run( - self, - task_id: int | None = None, - ) -> TiledEnsembleEngine: - """Run train job that fits the model for given tile location. - - Args: - task_id: Passed when job is ran in parallel. - - Returns: - TiledEnsembleEngine: Engine containing trained model. - """ - devices: str | list[int] = "auto" - if task_id is not None: - devices = [task_id] - logger.info(f"Running job {self.model.__class__.__name__} with device {task_id}") - - logger.info("Start of training for tile at position %s,", self.tile_index) - seed_everything(self.seed) - - # create engine for specific tile location and fit the model - engine = get_ensemble_engine( - tile_index=self.tile_index, - accelerator=self.accelerator, - devices=devices, - root_dir=self.root_dir, - normalization_stage=self.normalization_stage, - metrics=self.metrics, - trainer_args=self.trainer_args, - ) - engine.fit(model=self.model, datamodule=self.datamodule) - # move model to cpu to avoid memory issues as the engine is returned to be used in validation phase - engine.model.cpu() - - return engine - - @staticmethod - def collect(results: list[TiledEnsembleEngine]) -> dict[tuple[int, int], TiledEnsembleEngine]: - """Collect engines from each tile location into a dict. - - Returns: - dict[tuple[int, int], TiledEnsembleEngine]: Dict has form {tile_index: TiledEnsembleEngine} - """ - return {r.tile_index: r for r in results} - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """Skip as checkpoints are already saved by callback.""" - - -class TrainModelJobGenerator(JobGenerator): - """Generator for training job that train model for each tile location. - - Args: - root_dir (Path): Root directory to save checkpoints, stats and images. - """ - - def __init__( - self, - seed: int, - accelerator: str, - root_dir: Path, - tiling_args: dict, - data_args: dict, - normalization_stage: NormalizationStage, - ) -> None: - self.seed = seed - self.accelerator = accelerator - self.root_dir = root_dir - self.tiling_args = tiling_args - self.data_args = data_args - self.normalization_stage = normalization_stage - - @property - def job_class(self) -> type: - """Return the job class.""" - return TrainModelJob - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: PREV_STAGE_RESULT = None, - ) -> Generator[TrainModelJob, None, None]: - """Generate training jobs for each tile location. - - Args: - args (dict): Dict with config passed to training. - prev_stage_result (None): Not used here. - - Returns: - Generator[TrainModelJob, None, None]: TrainModelJob generator - """ - del prev_stage_result # Not needed for this job - if args is None: - msg = "TrainModels job requires config args" - raise ValueError(msg) - - # tiler used for splitting the image and getting the tile count - tiler = get_ensemble_tiler(self.tiling_args, self.data_args) - - logger.info( - "Tiled ensemble training started. Separate models will be trained for %d tile locations.", - tiler.num_tiles, - ) - # go over all tile positions - for tile_index in product(range(tiler.num_patches_h), range(tiler.num_patches_w)): - # prepare datamodule with custom collate function that only provides specific tile of image - datamodule = get_ensemble_datamodule(self.data_args, tiler, tile_index) - model = get_ensemble_model(args["model"], tiler) - - # pass root_dir to engine so all models in ensemble have the same root dir - yield TrainModelJob( - accelerator=self.accelerator, - seed=self.seed, - root_dir=self.root_dir, - tile_index=tile_index, - normalization_stage=self.normalization_stage, - metrics=args["metrics"], - trainer_args=args.get("trainer", {}), - model=model, - datamodule=datamodule, - ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/normalization.py b/src/anomalib/pipelines/tiled_ensemble/components/normalization.py deleted file mode 100644 index 8c7a563506..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/normalization.py +++ /dev/null @@ -1,120 +0,0 @@ -"""Tiled ensemble - normalization job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Any - -from tqdm import tqdm - -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS -from anomalib.utils.normalization.min_max import normalize - -logger = logging.getLogger(__name__) - - -class NormalizationJob(Job): - """Job for normalization of predictions. - - Args: - predictions (list[Any]): List of predictions. - root_dir (Path): Root directory containing statistics needed for normalization. - """ - - name = "Normalize" - - def __init__(self, predictions: list[Any] | None, root_dir: Path) -> None: - super().__init__() - self.predictions = predictions - self.root_dir = root_dir - - def run(self, task_id: int | None = None) -> list[Any] | None: - """Run normalization job which normalizes image, pixel and box scores. - - Args: - task_id: Not used in this case. - - Returns: - list[Any]: List of normalized predictions. - """ - del task_id # not needed here - - # load all statistics needed for normalization - stats_path = self.root_dir / "weights" / "lightning" / "stats.json" - with stats_path.open("r") as f: - stats = json.load(f) - minmax = stats["minmax"] - image_threshold = stats["image_threshold"] - pixel_threshold = stats["pixel_threshold"] - - logger.info("Starting normalization.") - - for data in tqdm(self.predictions, desc="Normalizing"): - data["pred_scores"] = normalize( - data["pred_scores"], - image_threshold, - minmax["pred_scores"]["min"], - minmax["pred_scores"]["max"], - ) - if "anomaly_maps" in data: - data["anomaly_maps"] = normalize( - data["anomaly_maps"], - pixel_threshold, - minmax["anomaly_maps"]["min"], - minmax["anomaly_maps"]["max"], - ) - - return self.predictions - - @staticmethod - def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: - """Nothing to collect in this job. - - Returns: - list[Any]: List of predictions. - """ - # take the first element as result is list of lists here - return results[0] - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """Nothing is saved in this job.""" - - -class NormalizationJobGenerator(JobGenerator): - """Generate NormalizationJob. - - Args: - root_dir (Path): Root directory where statistics are saved. - """ - - def __init__(self, root_dir: Path) -> None: - self.root_dir = root_dir - - @property - def job_class(self) -> type: - """Return the job class.""" - return NormalizationJob - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: list[Any] | None = None, - ) -> Generator[NormalizationJob, None, None]: - """Return a generator producing a single normalization job. - - Args: - args: not used here. - prev_stage_result (list[Any]): Ensemble predictions from previous step. - - Returns: - Generator[NormalizationJob, None, None]: NormalizationJob generator. - """ - del args # not needed here - - yield NormalizationJob(prev_stage_result, self.root_dir) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/prediction.py b/src/anomalib/pipelines/tiled_ensemble/components/prediction.py deleted file mode 100644 index 792d86a497..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/prediction.py +++ /dev/null @@ -1,228 +0,0 @@ -"""Tiled ensemble - ensemble prediction job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from collections.abc import Generator -from itertools import product -from pathlib import Path -from typing import Any - -from lightning import seed_everything -from torch.utils.data import DataLoader - -from anomalib.models import AnomalyModule -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.types import GATHERED_RESULTS, PREV_STAGE_RESULT - -from .utils import NormalizationStage, PredictData -from .utils.ensemble_engine import TiledEnsembleEngine -from .utils.helper_functions import ( - get_ensemble_datamodule, - get_ensemble_engine, - get_ensemble_model, - get_ensemble_tiler, -) -from .utils.prediction_data import EnsemblePredictions - -logger = logging.getLogger(__name__) - - -class PredictJob(Job): - """Job for generating predictions with individual models in the tiled ensemble. - - Args: - accelerator (str): Accelerator (device) to use. - seed (int): Random seed for reproducibility. - root_dir (Path): Root directory to save checkpoints, stats and images. - tile_index (tuple[int, int]): Index of tile that this model processes. - normalization_stage (str): Normalization stage flag. - dataloader (DataLoader): Dataloader to use for training (either val or test). - model (AnomalyModule): Model to train. - engine (TiledEnsembleEngine | None): - engine from train job. If job is used standalone, instantiate engine and model from checkpoint. - ckpt_path (Path | None): Path to checkpoint to be loaded if engine doesn't contain correct weights. - - """ - - name = "Predict" - - def __init__( - self, - accelerator: str, - seed: int, - root_dir: Path, - tile_index: tuple[int, int], - normalization_stage: str, - dataloader: DataLoader, - model: AnomalyModule | None, - engine: TiledEnsembleEngine | None, - ckpt_path: Path | None, - ) -> None: - super().__init__() - if engine is None and ckpt_path is None: - msg = "Either engine or checkpoint must be provided to predict job." - raise ValueError(msg) - - self.accelerator = accelerator - self.seed = seed - self.root_dir = root_dir - self.tile_index = tile_index - self.normalization_stage = normalization_stage - self.dataloader = dataloader - self.model = model - self.engine = engine - self.ckpt_path = ckpt_path - - def run( - self, - task_id: int | None = None, - ) -> tuple[tuple[int, int], Any | None]: - """Predict job that predicts the data with specific model for given tile location. - - Args: - task_id: Passed when job is ran in parallel. - - Returns: - tuple[tuple[int, int], list[Any]]: Tile index, List of predictions. - """ - devices: str | list[int] = "auto" - if task_id is not None: - devices = [task_id] - logger.info(f"Running job {self.model.__class__.__name__} with device {task_id}") - - logger.info("Start of predicting for tile at position %s,", self.tile_index) - seed_everything(self.seed) - - if self.engine is None: - # in case predict is invoked separately from train job, make new engine instance - self.engine = get_ensemble_engine( - tile_index=self.tile_index, - accelerator=self.accelerator, - devices=devices, - root_dir=self.root_dir, - normalization_stage=self.normalization_stage, - ) - - predictions = self.engine.predict(model=self.model, dataloaders=self.dataloader, ckpt_path=self.ckpt_path) - - # also return tile index as it's needed in collect method - return self.tile_index, predictions - - @staticmethod - def collect(results: list[tuple[tuple[int, int], list[Any]]]) -> EnsemblePredictions: - """Collect predictions from each tile location into the predictions class. - - Returns: - EnsemblePredictions: Object containing all predictions in form ready for merging. - """ - storage = EnsemblePredictions() - - for tile_index, predictions in results: - storage.add_tile_prediction(tile_index, predictions) - - return storage - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """This stage doesn't save anything.""" - - -class PredictJobGenerator(JobGenerator): - """Generator for predict job that uses individual models to predict for each tile location. - - Args: - root_dir (Path): Root directory to save checkpoints, stats and images. - data_source (PredictData): Whether to predict on validation set. If false use test set. - """ - - def __init__( - self, - data_source: PredictData, - seed: int, - accelerator: str, - root_dir: Path, - tiling_args: dict, - data_args: dict, - model_args: dict, - normalization_stage: NormalizationStage, - ) -> None: - self.data_source = data_source - self.seed = seed - self.accelerator = accelerator - self.root_dir = root_dir - self.tiling_args = tiling_args - self.data_args = data_args - self.model_args = model_args - self.normalization_stage = normalization_stage - - @property - def job_class(self) -> type: - """Return the job class.""" - return PredictJob - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: PREV_STAGE_RESULT = None, - ) -> Generator[PredictJob, None, None]: - """Generate predict jobs for each tile location. - - Args: - args (dict): Dict with config passed to training. - prev_stage_result (dict[tuple[int, int], TiledEnsembleEngine] | None): - if called after train job this contains engines with individual models, otherwise load from checkpoints. - - Returns: - Generator[PredictJob, None, None]: PredictJob generator. - """ - del args # args not used here - - # tiler used for splitting the image and getting the tile count - tiler = get_ensemble_tiler(self.tiling_args, self.data_args) - - logger.info( - "Tiled ensemble predicting started using %s data.", - self.data_source.value, - ) - # go over all tile positions - for tile_index in product(range(tiler.num_patches_h), range(tiler.num_patches_w)): - # prepare datamodule with custom collate function that only provides specific tile of image - datamodule = get_ensemble_datamodule(self.data_args, tiler, tile_index) - - # check if predict step is positioned after training - if prev_stage_result and tile_index in prev_stage_result: - engine = prev_stage_result[tile_index] - # model is inside engine in this case - model = engine.model - ckpt_path = None - else: - # any other case - predict is called standalone - engine = None - # we need to make new model instance as it's not inside engine - model = get_ensemble_model(self.model_args, tiler) - tile_i, tile_j = tile_index - # prepare checkpoint path for model on current tile location - ckpt_path = self.root_dir / "weights" / "lightning" / f"model{tile_i}_{tile_j}.ckpt" - - # pick the dataloader based on predict data - dataloader = datamodule.test_dataloader() - if self.data_source == PredictData.VAL: - dataloader = datamodule.val_dataloader() - # TODO(blaz-r): - this is tweak to avoid problem in engine:388 - # 2254 - dataloader.dataset.transform = None - - # pass root_dir to engine so all models in ensemble have the same root dir - yield PredictJob( - accelerator=self.accelerator, - seed=self.seed, - root_dir=self.root_dir, - tile_index=tile_index, - normalization_stage=self.normalization_stage, - model=model, - dataloader=dataloader, - engine=engine, - ckpt_path=ckpt_path, - ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/smoothing.py b/src/anomalib/pipelines/tiled_ensemble/components/smoothing.py deleted file mode 100644 index b3d5a51000..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/smoothing.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Tiled ensemble - seam smoothing job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from collections.abc import Generator -from typing import Any - -import torch -from tqdm import tqdm - -from anomalib.models.components import GaussianBlur2d -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS - -from .utils.ensemble_tiling import EnsembleTiler -from .utils.helper_functions import get_ensemble_tiler - -logger = logging.getLogger(__name__) - - -class SmoothingJob(Job): - """Job for smoothing the area around the tile seam. - - Args: - accelerator (str): Accelerator used for processing. - predictions (list[Any]): List of image-level predictions. - width_factor (float): Factor multiplied by tile dimension to get the region around seam which will be smoothed. - filter_sigma (float): Sigma of filter used for smoothing the seams. - tiler (EnsembleTiler): Tiler object used to get tile dimension data. - """ - - name = "SeamSmoothing" - - def __init__( - self, - accelerator: str, - predictions: list[Any], - width_factor: float, - filter_sigma: float, - tiler: EnsembleTiler, - ) -> None: - super().__init__() - self.accelerator = accelerator - self.predictions = predictions - - # offset in pixels of region around tile seam that will be smoothed - self.height_offset = int(tiler.tile_size_h * width_factor) - self.width_offset = int(tiler.tile_size_w * width_factor) - self.tiler = tiler - - self.seam_mask = self.prepare_seam_mask() - - self.blur = GaussianBlur2d(sigma=filter_sigma) - - def prepare_seam_mask(self) -> torch.Tensor: - """Prepare boolean mask of regions around the part where tiles seam in ensemble. - - Returns: - torch.Tensor: Representation of boolean mask where filtered seams should be used. - """ - img_h, img_w = self.tiler.image_size - stride_h, stride_w = self.tiler.stride_h, self.tiler.stride_w - - mask = torch.zeros(img_h, img_w, dtype=torch.bool) - - # prepare mask strip on vertical seams - curr_w = stride_w - while curr_w < img_w: - start_i = curr_w - self.width_offset - end_i = curr_w + self.width_offset - mask[:, start_i:end_i] = 1 - curr_w += stride_w - - # prepare mask strip on horizontal seams - curr_h = stride_h - while curr_h < img_h: - start_i = curr_h - self.height_offset - end_i = curr_h + self.height_offset - mask[start_i:end_i, :] = True - curr_h += stride_h - - return mask - - def run(self, task_id: int | None = None) -> list[Any]: - """Run smoothing job. - - Args: - task_id: Not used in this case. - - Returns: - list[Any]: List of predictions. - """ - del task_id # not needed here - - logger.info("Starting seam smoothing.") - - for data in tqdm(self.predictions, desc="Seam smoothing"): - # move to specified accelerator for faster execution - data["anomaly_maps"] = data["anomaly_maps"].to(self.accelerator) - # smooth the anomaly map and take only region around seams delimited by seam_mask - smoothed = self.blur(data["anomaly_maps"]) - data["anomaly_maps"][:, :, self.seam_mask] = smoothed[:, :, self.seam_mask] - data["anomaly_maps"] = data["anomaly_maps"].cpu() - - return self.predictions - - @staticmethod - def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: - """Nothing to collect in this job. - - Returns: - list[Any]: List of predictions. - """ - # take the first element as result is list of lists here - return results[0] - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """Nothing to save in this job.""" - - -class SmoothingJobGenerator(JobGenerator): - """Generate SmoothingJob.""" - - def __init__(self, accelerator: str, tiling_args: dict, data_args: dict) -> None: - super().__init__() - self.accelerator = accelerator - self.tiling_args = tiling_args - self.data_args = data_args - - @property - def job_class(self) -> type: - """Return the job class.""" - return SmoothingJob - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: list[Any] | None = None, - ) -> Generator[SmoothingJob, None, None]: - """Return a generator producing a single seam smoothing job. - - Args: - args: Tiled ensemble pipeline args. - prev_stage_result (list[Any]): Ensemble predictions from previous step. - - Returns: - Generator[SmoothingJob, None, None]: SmoothingJob generator - """ - if args is None: - msg = "SeamSmoothing job requires config args" - raise ValueError(msg) - # tiler is used to determine where seams appear - tiler = get_ensemble_tiler(self.tiling_args, self.data_args) - if prev_stage_result is not None: - yield SmoothingJob( - accelerator=self.accelerator, - predictions=prev_stage_result, - width_factor=args["width"], - filter_sigma=args["sigma"], - tiler=tiler, - ) - else: - msg = "Join smoothing job requires tile level predictions from previous step." - raise ValueError(msg) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/stats_calculation.py b/src/anomalib/pipelines/tiled_ensemble/components/stats_calculation.py deleted file mode 100644 index 6c48b639f7..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/stats_calculation.py +++ /dev/null @@ -1,180 +0,0 @@ -"""Tiled ensemble - post-processing statistics calculation job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Any - -import torch -from omegaconf import DictConfig, ListConfig -from torchmetrics import MetricCollection -from tqdm import tqdm - -from anomalib.callbacks.thresholding import _ThresholdCallback -from anomalib.metrics import MinMax -from anomalib.metrics.threshold import Threshold -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS - -logger = logging.getLogger(__name__) - - -class StatisticsJob(Job): - """Job for calculating min, max and threshold statistics for post-processing. - - Args: - predictions (list[Any]): List of image-level predictions. - root_dir (Path): Root directory to save checkpoints, stats and images. - """ - - name = "Stats" - - def __init__( - self, - predictions: list[Any] | None, - root_dir: Path, - image_threshold: Threshold, - pixel_threshold: Threshold, - ) -> None: - super().__init__() - self.predictions = predictions - self.root_dir = root_dir - self.image_threshold = image_threshold - self.pixel_threshold = pixel_threshold - - def run(self, task_id: int | None = None) -> dict: - """Run job that calculates statistics needed in post-processing steps. - - Args: - task_id: Not used in this case - - Returns: - dict: Statistics dict with min, max and threshold values. - """ - del task_id # not needed here - - minmax = MetricCollection( - { - "anomaly_maps": MinMax().cpu(), - "pred_scores": MinMax().cpu(), - }, - ) - pixel_update_called = False - - logger.info("Starting post-processing statistics calculation.") - - for data in tqdm(self.predictions, desc="Stats calculation"): - # update minmax - if "anomaly_maps" in data: - minmax["anomaly_maps"](data["anomaly_maps"]) - if "pred_scores" in data: - minmax["pred_scores"](data["pred_scores"]) - - # update thresholds - self.image_threshold.update(data["pred_scores"], data["label"].int()) - if "mask" in data and "anomaly_maps" in data: - self.pixel_threshold.update(torch.squeeze(data["anomaly_maps"]), torch.squeeze(data["mask"].int())) - pixel_update_called = True - - self.image_threshold.compute() - if pixel_update_called: - self.pixel_threshold.compute() - else: - self.pixel_threshold.value = self.image_threshold.value - - min_max_vals = {} - for pred_name, pred_metric in minmax.items(): - min_max_vals[pred_name] = { - "min": pred_metric.min.item(), - "max": pred_metric.max.item(), - } - - # return stats with save path that is later used to save statistics. - return { - "minmax": min_max_vals, - "image_threshold": self.image_threshold.value.item(), - "pixel_threshold": self.pixel_threshold.value.item(), - "save_path": (self.root_dir / "weights" / "lightning" / "stats.json"), - } - - @staticmethod - def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: - """Nothing to collect in this job. - - Returns: - dict: statistics dictionary. - """ - # take the first element as result is list of lists here - return results[0] - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """Save statistics to file system.""" - # get and remove path from stats dict - stats_path: Path = results.pop("save_path") - stats_path.parent.mkdir(parents=True, exist_ok=True) - - # save statistics next to weights - with stats_path.open("w", encoding="utf-8") as stats_file: - json.dump(results, stats_file, ensure_ascii=False, indent=4) - - -class StatisticsJobGenerator(JobGenerator): - """Generate StatisticsJob. - - Args: - root_dir (Path): Root directory where statistics file will be saved (in weights folder). - """ - - def __init__( - self, - root_dir: Path, - thresholding_method: DictConfig | str | ListConfig | list[dict[str, str | float]], - ) -> None: - self.root_dir = root_dir - self.threshold = thresholding_method - - @property - def job_class(self) -> type: - """Return the job class.""" - return StatisticsJob - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: list[Any] | None = None, - ) -> Generator[StatisticsJob, None, None]: - """Return a generator producing a single stats calculating job. - - Args: - args: Not used here. - prev_stage_result (list[Any]): Ensemble predictions from previous step. - - Returns: - Generator[StatisticsJob, None, None]: StatisticsJob generator. - """ - del args # not needed here - - # get threshold class based config - if isinstance(self.threshold, str | DictConfig): - # single method provided - image_threshold = _ThresholdCallback._get_threshold_from_config(self.threshold) # noqa: SLF001 - pixel_threshold = image_threshold.clone() - elif isinstance(self.threshold, ListConfig | list): - # image and pixel method specified separately - image_threshold = _ThresholdCallback._get_threshold_from_config(self.threshold[0]) # noqa: SLF001 - pixel_threshold = _ThresholdCallback._get_threshold_from_config(self.threshold[1]) # noqa: SLF001 - else: - msg = f"Invalid threshold config {self.threshold}" - raise TypeError(msg) - - yield StatisticsJob( - predictions=prev_stage_result, - root_dir=self.root_dir, - image_threshold=image_threshold, - pixel_threshold=pixel_threshold, - ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/thresholding.py b/src/anomalib/pipelines/tiled_ensemble/components/thresholding.py deleted file mode 100644 index 733c3d99db..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/thresholding.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Tiled ensemble - thresholding job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Any - -from tqdm import tqdm - -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS - -from .utils import NormalizationStage -from .utils.helper_functions import get_threshold_values - -logger = logging.getLogger(__name__) - - -class ThresholdingJob(Job): - """Job used to threshold predictions, producing labels from scores. - - Args: - predictions (list[Any]): List of predictions. - image_threshold (float): Threshold used for image-level thresholding. - pixel_threshold (float): Threshold used for pixel-level thresholding. - """ - - name = "Threshold" - - def __init__(self, predictions: list[Any] | None, image_threshold: float, pixel_threshold: float) -> None: - super().__init__() - self.predictions = predictions - self.image_threshold = image_threshold - self.pixel_threshold = pixel_threshold - - def run(self, task_id: int | None = None) -> list[Any] | None: - """Run job that produces prediction labels from scores. - - Args: - task_id: Not used in this case. - - Returns: - list[Any]: List of thresholded predictions. - """ - del task_id # not needed here - - logger.info("Starting thresholding.") - - for data in tqdm(self.predictions, desc="Thresholding"): - if "pred_scores" in data: - data["pred_labels"] = data["pred_scores"] >= self.image_threshold - if "anomaly_maps" in data: - data["pred_masks"] = data["anomaly_maps"] >= self.pixel_threshold - - return self.predictions - - @staticmethod - def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: - """Nothing to collect in this job. - - Returns: - list[Any]: List of predictions. - """ - # take the first element as result is list of lists here - return results[0] - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """Nothing is saved in this job.""" - - -class ThresholdingJobGenerator(JobGenerator): - """Generate ThresholdingJob. - - Args: - root_dir (Path): Root directory containing post-processing stats. - """ - - def __init__(self, root_dir: Path, normalization_stage: NormalizationStage) -> None: - self.root_dir = root_dir - self.normalization_stage = normalization_stage - - @property - def job_class(self) -> type: - """Return the job class.""" - return ThresholdingJob - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: list[Any] | None = None, - ) -> Generator[ThresholdingJob, None, None]: - """Return a generator producing a single thresholding job. - - Args: - args: ensemble run args. - prev_stage_result (list[Any]): Ensemble predictions from previous step. - - Returns: - Generator[ThresholdingJob, None, None]: ThresholdingJob generator. - """ - del args # args not used here - - # get threshold values base on normalization - image_threshold, pixel_threshold = get_threshold_values(self.normalization_stage, self.root_dir) - - yield ThresholdingJob( - predictions=prev_stage_result, - image_threshold=image_threshold, - pixel_threshold=pixel_threshold, - ) diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/__init__.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/__init__.py deleted file mode 100644 index a010208908..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/utils/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Tiled ensemble utils and helper functions.""" - -from enum import Enum - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -class NormalizationStage(str, Enum): - """Enum signaling at which stage the normalization is done. - - In case of tile, tiles are normalized for each tile position separately. - In case of image, normalization is done at the end when images are joined back together. - In case of none, output is not normalized. - """ - - TILE = "tile" - IMAGE = "image" - NONE = "none" - - -class ThresholdStage(str, Enum): - """Enum signaling at which stage the thresholding is applied. - - In case of tile, thresholding is applied for each tile location separately. - In case of image, thresholding is applied at the end when images are joined back together. - """ - - TILE = "tile" - IMAGE = "image" - - -class PredictData(Enum): - """Enum indicating which data to use in prediction job.""" - - VAL = "val" - TEST = "test" - - -__all__ = [ - "NormalizationStage", - "ThresholdStage", - "PredictData", -] diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_engine.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_engine.py deleted file mode 100644 index 449109ed3f..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_engine.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Implements custom Anomalib engine for tiled ensemble training.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from pathlib import Path - -from lightning.pytorch.callbacks import Callback, RichModelSummary - -from anomalib.callbacks import ModelCheckpoint, TimerCallback -from anomalib.callbacks.metrics import _MetricsCallback -from anomalib.callbacks.normalization import get_normalization_callback -from anomalib.callbacks.post_processor import _PostProcessorCallback -from anomalib.callbacks.thresholding import _ThresholdCallback -from anomalib.engine import Engine -from anomalib.models import AnomalyModule -from anomalib.utils.path import create_versioned_dir - -logger = logging.getLogger(__name__) - - -class TiledEnsembleEngine(Engine): - """Engine used for training and evaluating tiled ensemble. - - Most of the logic stays the same, but workspace creation and callbacks are adjusted for ensemble. - - Args: - tile_index (tuple[int, int]): index of tile that this engine instance processes. - **kwargs: Engine arguments. - """ - - def __init__(self, tile_index: tuple[int, int], **kwargs) -> None: - self.tile_index = tile_index - super().__init__(**kwargs) - - def _setup_workspace(self, *args, **kwargs) -> None: - """Skip since in case of tiled ensemble, workspace is only setup once at the beginning of training.""" - - @staticmethod - def setup_ensemble_workspace(args: dict, versioned_dir: bool = True) -> Path: - """Set up the workspace at the beginning of tiled ensemble training. - - Args: - args (dict): Tiled ensemble config dict. - versioned_dir (bool, optional): Whether to create a versioned directory. - Defaults to ``True``. - - Returns: - Path: path to new workspace root dir - """ - model_name = args["TrainModels"]["model"]["class_path"].split(".")[-1] - dataset_name = args["data"]["class_path"].split(".")[-1] - category = args["data"]["init_args"]["category"] - root_dir = Path(args["default_root_dir"]) / model_name / dataset_name / category - return create_versioned_dir(root_dir) if versioned_dir else root_dir / "latest" - - def _setup_anomalib_callbacks(self, model: AnomalyModule) -> None: - """Modified method to enable individual model training. It's called when Trainer is being set up.""" - del model # not used here - - _callbacks: list[Callback] = [RichModelSummary()] - - # Add ModelCheckpoint if it is not in the callbacks list. - has_checkpoint_callback = any(isinstance(c, ModelCheckpoint) for c in self._cache.args["callbacks"]) - if not has_checkpoint_callback: - tile_i, tile_j = self.tile_index - _callbacks.append( - ModelCheckpoint( - dirpath=self._cache.args["default_root_dir"] / "weights" / "lightning", - filename=f"model{tile_i}_{tile_j}", - auto_insert_metric_name=False, - ), - ) - - # Add the post-processor callbacks. Used for thresholding and label calculation. - _callbacks.append(_PostProcessorCallback()) - - # Add the normalization callback if tile level normalization was specified (is not none). - normalization_callback = get_normalization_callback(self.normalization) - if normalization_callback is not None: - _callbacks.append(normalization_callback) - - # Add the thresholding and metrics callbacks in all cases, - # because individual model might still need this for early stop. - _callbacks.append(_ThresholdCallback(self.threshold)) - _callbacks.append(_MetricsCallback(self.task, self.image_metric_names, self.pixel_metric_names)) - - _callbacks.append(TimerCallback()) - - # Combine the callbacks, and update the trainer callbacks. - self._cache.args["callbacks"] = _callbacks + self._cache.args["callbacks"] diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_tiling.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_tiling.py deleted file mode 100644 index db56f88b47..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/utils/ensemble_tiling.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Tiler used with ensemble of models.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from collections.abc import Sequence -from typing import Any - -from torch import Tensor - -from anomalib.data.base.datamodule import collate_fn -from anomalib.data.utils.tiler import Tiler, compute_new_image_size - - -class EnsembleTiler(Tiler): - """Tile Image into (non)overlapping Patches which are then used for ensemble training. - - Args: - tile_size (int | Sequence): Tile dimension for each patch. - stride (int | Sequence): Stride length between patches. - image_size (int | Sequence): Size of input image that will be tiled. - - Examples: - >>> import torch - >>> tiler = EnsembleTiler(tile_size=256, stride=128, image_size=512) - >>> - >>> # random images, shape: [B, C, H, W] - >>> images = torch.rand(32, 5, 512, 512) - >>> # once tiled, the shape is [tile_count_H, tile_count_W, B, C, tile_H, tile_W] - >>> tiled = tiler.tile(images) - >>> tiled.shape - torch.Size([3, 3, 32, 5, 256, 256]) - - >>> # assemble the tiles back together - >>> untiled = tiler.untile(tiled) - >>> untiled.shape - torch.Size([32, 5, 512, 512]) - """ - - def __init__(self, tile_size: int | Sequence, stride: int | Sequence, image_size: int | Sequence) -> None: - super().__init__( - tile_size=tile_size, - stride=stride, - ) - - # calculate final image size - self.image_size = self.validate_size_type(image_size) - self.input_h, self.input_w = self.image_size - self.resized_h, self.resized_w = compute_new_image_size( - image_size=(self.input_h, self.input_w), - tile_size=(self.tile_size_h, self.tile_size_w), - stride=(self.stride_h, self.stride_w), - ) - - # get number of patches in both dimensions - self.num_patches_h = int((self.resized_h - self.tile_size_h) / self.stride_h) + 1 - self.num_patches_w = int((self.resized_w - self.tile_size_w) / self.stride_w) + 1 - self.num_tiles = self.num_patches_h * self.num_patches_w - - def tile(self, image: Tensor, use_random_tiling: bool = False) -> Tensor: - """Tiles an input image to either overlapping or non-overlapping patches. - - Args: - image (Tensor): Input images. - use_random_tiling (bool): Random tiling, which is part of original tiler but is unused here. - - Returns: - Tensor: Tiles generated from images. - Returned shape: [num_h, num_w, batch, channel, tile_height, tile_width]. - """ - # tiles are returned in order [tile_count * batch, channels, tile_height, tile_width] - combined_tiles = super().tile(image, use_random_tiling) - - # rearrange to [num_h, num_w, batch, channel, tile_height, tile_width] - tiles = combined_tiles.contiguous().view( - self.batch_size, - self.num_patches_h, - self.num_patches_w, - self.num_channels, - self.tile_size_h, - self.tile_size_w, - ) - tiles = tiles.permute(1, 2, 0, 3, 4, 5) - - return tiles # noqa: RET504 - - def untile(self, tiles: Tensor) -> Tensor: - """Reassemble the tiled tensor into image level representation. - - Args: - tiles (Tensor): Tiles in shape: [num_h, num_w, batch, channel, tile_height, tile_width]. - - Returns: - Tensor: Image constructed from input tiles. Shape: [B, C, H, W]. - """ - # tiles have shape [num_h, num_w, batch, channel, tile_height, tile_width] - _, _, batch, channels, tile_size_h, tile_size_w = tiles.shape - - # set tilers batch size as it might have been changed by previous tiling - self.batch_size = batch - - # rearrange the tiles in order [tile_count * batch, channels, tile_height, tile_width] - # the required shape for untiling - tiles = tiles.permute(2, 0, 1, 3, 4, 5) - tiles = tiles.contiguous().view(-1, channels, tile_size_h, tile_size_w) - - untiled = super().untile(tiles) - - return untiled # noqa: RET504 - - -class TileCollater: - """Class serving as collate function to perform tiling on batch of images from Dataloader. - - Args: - tiler (EnsembleTiler): Tiler used to split the images to tiles. - tile_index (tuple[int, int]): Index of tile we want to return. - """ - - def __init__(self, tiler: EnsembleTiler, tile_index: tuple[int, int]) -> None: - self.tiler = tiler - self.tile_index = tile_index - - def __call__(self, batch: list) -> dict[str, Any]: - """Collate batch and tile images + masks from batch. - - Args: - batch (list): Batch of elements from data, also including images. - - Returns: - dict[str, Any]: Collated batch dictionary with tiled images. - """ - # use default collate - coll_batch = collate_fn(batch) - - tiled_images = self.tiler.tile(coll_batch["image"]) - # return only tiles at given index - coll_batch["image"] = tiled_images[self.tile_index] - - if "mask" in coll_batch: - # insert channel (as mask has just one) - tiled_masks = self.tiler.tile(coll_batch["mask"].unsqueeze(1)) - - # return only tiled at given index, squeeze to remove previously added channel - coll_batch["mask"] = tiled_masks[self.tile_index].squeeze(1) - - return coll_batch diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/helper_functions.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/helper_functions.py deleted file mode 100644 index bc1e5f4f55..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/utils/helper_functions.py +++ /dev/null @@ -1,179 +0,0 @@ -"""Helper functions for the tiled ensemble training.""" - -import json - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -from pathlib import Path - -from jsonargparse import ArgumentParser, Namespace -from lightning import Trainer - -from anomalib.data import AnomalibDataModule, get_datamodule -from anomalib.models import AnomalyModule, get_model -from anomalib.utils.normalization import NormalizationMethod - -from . import NormalizationStage -from .ensemble_engine import TiledEnsembleEngine -from .ensemble_tiling import EnsembleTiler, TileCollater - - -def get_ensemble_datamodule(data_args: dict, tiler: EnsembleTiler, tile_index: tuple[int, int]) -> AnomalibDataModule: - """Get Anomaly Datamodule adjusted for use in ensemble. - - Datamodule collate function gets replaced by TileCollater in order to tile all images before they are passed on. - - Args: - data_args: tiled ensemble data configuration. - tiler (EnsembleTiler): Tiler used to split the images to tiles for use in ensemble. - tile_index (tuple[int, int]): Index of the tile in the split image. - - Returns: - AnomalibDataModule: Anomalib Lightning DataModule - """ - datamodule = get_datamodule(data_args) - # set custom collate function that does the tiling - datamodule.collate_fn = TileCollater(tiler, tile_index) - datamodule.setup() - - return datamodule - - -def get_ensemble_model(model_args: dict, tiler: EnsembleTiler) -> AnomalyModule: - """Get model prepared for ensemble training. - - Args: - model_args: tiled ensemble model configuration. - tiler (EnsembleTiler): tiler used to get tile dimensions. - - Returns: - AnomalyModule: model with input_size setup - """ - model = get_model(model_args) - # set model input size match tile size - model.set_input_size((tiler.tile_size_h, tiler.tile_size_w)) - - return model - - -def get_ensemble_tiler(tiling_args: dict, data_args: dict) -> EnsembleTiler: - """Get tiler used for image tiling and to obtain tile dimensions. - - Args: - tiling_args: tiled ensemble tiling configuration. - data_args: tiled ensemble data configuration. - - Returns: - EnsembleTiler: tiler object. - """ - tiler = EnsembleTiler( - tile_size=tiling_args["tile_size"], - stride=tiling_args["stride"], - image_size=data_args["init_args"]["image_size"], - ) - - return tiler # noqa: RET504 - - -def parse_trainer_kwargs(trainer_args: dict | None) -> Namespace | dict: - """Parse trainer args and instantiate all needed elements. - - Transforms config into kwargs ready for Trainer, including instantiation of callback etc. - - Args: - trainer_args (dict): Trainer args dictionary. - - Returns: - dict: parsed kwargs with instantiated elements. - """ - if not trainer_args: - return {} - - # try to get trainer args, if not present return empty - parser = ArgumentParser() - - parser.add_class_arguments(Trainer, fail_untyped=False, instantiate=False, sub_configs=True) - config = parser.parse_object(trainer_args) - objects = parser.instantiate_classes(config) - - return objects # noqa: RET504 - - -def get_ensemble_engine( - tile_index: tuple[int, int], - accelerator: str, - devices: list[int] | str | int, - root_dir: Path, - normalization_stage: str, - metrics: dict | None = None, - trainer_args: dict | None = None, -) -> TiledEnsembleEngine: - """Prepare engine for ensemble training or prediction. - - This method makes sure correct normalization is used, prepares metrics and additional trainer kwargs.. - - Args: - tile_index (tuple[int, int]): Index of tile that this model processes. - accelerator (str): Accelerator (device) to use. - devices (list[int] | str | int): device IDs used for training. - root_dir (Path): Root directory to save checkpoints, stats and images. - normalization_stage (str): Config dictionary for ensemble post-processing. - metrics (dict): Dict containing pixel and image metrics names. - trainer_args (dict): Trainer args dictionary. Empty dict if not present. - - Returns: - TiledEnsembleEngine: set up engine for ensemble training/prediction. - """ - # if we want tile level normalization we set it here, otherwise it's done later on joined images - if normalization_stage == NormalizationStage.TILE: - normalization = NormalizationMethod.MIN_MAX - else: - normalization = NormalizationMethod.NONE - - # parse additional trainer args and callbacks if present in config - trainer_kwargs = parse_trainer_kwargs(trainer_args) - # remove keys that we already have - trainer_kwargs.pop("accelerator", None) - trainer_kwargs.pop("default_root_dir", None) - trainer_kwargs.pop("devices", None) - - # create engine for specific tile location - engine = TiledEnsembleEngine( - tile_index=tile_index, - normalization=normalization, - accelerator=accelerator, - devices=devices, - default_root_dir=root_dir, - image_metrics=metrics.get("image", None) if metrics else None, - pixel_metrics=metrics.get("pixel", None) if metrics else None, - **trainer_kwargs, - ) - - return engine # noqa: RET504 - - -def get_threshold_values(normalization_stage: NormalizationStage, root_dir: Path) -> tuple[float, float]: - """Get threshold values for image and pixel level predictions. - - If normalization is not used, get values based on statistics obtained from validation set. - If normalization is used, both image and pixel threshold are 0.5 - - Args: - normalization_stage (NormalizationStage): ensemble run args, used to get normalization stage. - root_dir (Path): path to run root where stats file is saved. - - Returns: - tuple[float, float]: image and pixel threshold. - """ - if normalization_stage == NormalizationStage.NONE: - stats_path = root_dir / "weights" / "lightning" / "stats.json" - with stats_path.open("r") as f: - stats = json.load(f) - image_threshold = stats["image_threshold"] - pixel_threshold = stats["pixel_threshold"] - else: - # normalization transforms the scores so that threshold is at 0.5 - image_threshold = 0.5 - pixel_threshold = 0.5 - - return image_threshold, pixel_threshold diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_data.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_data.py deleted file mode 100644 index 4fe45e9c4a..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_data.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Classes used to store ensemble predictions.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from torch import Tensor - - -class EnsemblePredictions: - """Basic implementation of EnsemblePredictionData that keeps all predictions in main memory.""" - - def __init__(self) -> None: - super().__init__() - self.all_data: dict[tuple[int, int], list] = {} - - def add_tile_prediction(self, tile_index: tuple[int, int], tile_prediction: list[dict[str, Tensor | list]]) -> None: - """Add tile prediction data at provided index to class dictionary in main memory. - - Args: - tile_index (tuple[int, int]): Index of tile that we are adding in form (row, column). - tile_prediction (list[dict[str, Tensor | list]]): - List of batches containing all predicted data for current tile position. - - """ - self.num_batches = len(tile_prediction) - - self.all_data[tile_index] = tile_prediction - - def get_batch_tiles(self, batch_index: int) -> dict[tuple[int, int], dict]: - """Get all tiles of current batch from class dictionary. - - Called by merging mechanism. - - Args: - batch_index (int): Index of current batch of tiles to be returned. - - Returns: - dict[tuple[int, int], dict]: Dictionary mapping tile index to predicted data, for provided batch index. - """ - batch_data = {} - - for index, batches in self.all_data.items(): - batch_data[index] = batches[batch_index] - - return batch_data diff --git a/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_merging.py b/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_merging.py deleted file mode 100644 index 7337cc4ffe..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/utils/prediction_merging.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Class used as mechanism to merge ensemble predictions from each tile into complete whole-image representation.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import torch -from torch import Tensor - -from .ensemble_tiling import EnsembleTiler -from .prediction_data import EnsemblePredictions - - -class PredictionMergingMechanism: - """Class used for merging the data predicted by each separate model of tiled ensemble. - - Tiles are stacked in one tensor and untiled using Ensemble Tiler. - Boxes from tiles are either stacked or generated anew from anomaly map. - Labels are combined with OR operator, meaning one anomalous tile -> anomalous image. - Scores are averaged across all tiles. - - Args: - ensemble_predictions (EnsemblePredictions): Object containing predictions on tile level. - tiler (EnsembleTiler): Tiler used to transform tiles back to image level representation. - - Example: - >>> from anomalib.pipelines.tiled_ensemble.components.utils.ensemble_tiling import EnsembleTiler - >>> from anomalib.pipelines.tiled_ensemble.components.utils.prediction_data import EnsemblePredictions - >>> - >>> tiler = EnsembleTiler(tile_size=256, stride=128, image_size=512) - >>> data = EnsemblePredictions() - >>> merger = PredictionMergingMechanism(data, tiler) - >>> - >>> # we can then start merging procedure for each batch - >>> merger.merge_tile_predictions(0) - """ - - def __init__(self, ensemble_predictions: EnsemblePredictions, tiler: EnsembleTiler) -> None: - assert ensemble_predictions.num_batches > 0, "There should be at least one batch for each tile prediction." - assert (0, 0) in ensemble_predictions.get_batch_tiles( - 0, - ), "Tile prediction dictionary should always have at least one tile" - - self.ensemble_predictions = ensemble_predictions - self.num_batches = self.ensemble_predictions.num_batches - - self.tiler = tiler - - def merge_tiles(self, batch_data: dict, tile_key: str) -> Tensor: - """Merge tiles back into one tensor and perform untiling with tiler. - - Args: - batch_data (dict): Dictionary containing all tile predictions of current batch. - tile_key (str): Key used in prediction dictionary for tiles that we want to merge. - - Returns: - Tensor: Tensor of tiles in original (stitched) shape. - """ - # batch of tiles with index (0, 0) always exists, so we use it to get some basic information - first_tiles = batch_data[0, 0][tile_key] - batch_size = first_tiles.shape[0] - device = first_tiles.device - - if tile_key == "mask": - # in case of ground truth masks, we don't have channels - merged_size = [ - self.tiler.num_patches_h, - self.tiler.num_patches_w, - batch_size, - self.tiler.tile_size_h, - self.tiler.tile_size_w, - ] - else: - # all tiles beside masks also have channels - num_channels = first_tiles.shape[1] - merged_size = [ - self.tiler.num_patches_h, - self.tiler.num_patches_w, - batch_size, - int(num_channels), - self.tiler.tile_size_h, - self.tiler.tile_size_w, - ] - - # create new empty tensor for merged tiles - merged_masks = torch.zeros(size=merged_size, device=device) - - # insert tile into merged tensor at right locations - for (tile_i, tile_j), tile_data in batch_data.items(): - merged_masks[tile_i, tile_j, ...] = tile_data[tile_key] - - if tile_key == "mask": - # add channel as tiler needs it - merged_masks = merged_masks.unsqueeze(3) - - # stitch tiles back into whole, output is [B, C, H, W] - merged_output = self.tiler.untile(merged_masks) - - if tile_key == "mask": - # remove previously added channels - merged_output = merged_output.squeeze(1) - - return merged_output - - def merge_labels_and_scores(self, batch_data: dict) -> dict[str, Tensor]: - """Join scores and their corresponding label predictions from all tiles for each image. - - Label merging is done by rule where one anomalous tile in image results in whole image being anomalous. - Scores are averaged over tiles. - - Args: - batch_data (dict): Dictionary containing all tile predictions of current batch. - - Returns: - dict[str, Tensor]: Dictionary with "pred_labels" and "pred_scores" - """ - # create accumulator with same shape as original - labels = torch.zeros(batch_data[0, 0]["pred_labels"].shape, dtype=torch.bool) - scores = torch.zeros(batch_data[0, 0]["pred_scores"].shape) - - for curr_tile_data in batch_data.values(): - curr_labels = curr_tile_data["pred_labels"] - curr_scores = curr_tile_data["pred_scores"] - - labels = labels.logical_or(curr_labels) - scores += curr_scores - - scores /= self.tiler.num_tiles - - return {"pred_labels": labels, "pred_scores": scores} - - def merge_tile_predictions(self, batch_index: int) -> dict[str, Tensor | list]: - """Join predictions from ensemble into whole image level representation for batch at index batch_index. - - Args: - batch_index (int): Index of current batch. - - Returns: - dict[str, Tensor | list]: List of merged predictions for specified batch. - """ - current_batch_data = self.ensemble_predictions.get_batch_tiles(batch_index) - - # take first tile as base prediction, keep items that are the same over all tiles: - # image_path, label, mask_path - merged_predictions = { - "image_path": current_batch_data[0, 0]["image_path"], - "label": current_batch_data[0, 0]["label"], - } - if "mask_path" in current_batch_data[0, 0]: - merged_predictions["mask_path"] = current_batch_data[0, 0]["mask_path"] - if "boxes" in current_batch_data[0, 0]: - merged_predictions["boxes"] = current_batch_data[0, 0]["boxes"] - - tiled_data = ["image", "mask"] - if "anomaly_maps" in current_batch_data[0, 0]: - tiled_data += ["anomaly_maps", "pred_masks"] - - # merge all tiled data - for t_key in tiled_data: - if t_key in current_batch_data[0, 0]: - merged_predictions[t_key] = self.merge_tiles(current_batch_data, t_key) - - # label and score merging - merged_scores_and_labels = self.merge_labels_and_scores(current_batch_data) - merged_predictions["pred_labels"] = merged_scores_and_labels["pred_labels"] - merged_predictions["pred_scores"] = merged_scores_and_labels["pred_scores"] - - return merged_predictions diff --git a/src/anomalib/pipelines/tiled_ensemble/components/visualization.py b/src/anomalib/pipelines/tiled_ensemble/components/visualization.py deleted file mode 100644 index 1298ece89f..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/components/visualization.py +++ /dev/null @@ -1,125 +0,0 @@ -"""Tiled ensemble - visualization job.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Any - -from tqdm import tqdm - -from anomalib import TaskType -from anomalib.data.utils.image import save_image -from anomalib.pipelines.components import Job, JobGenerator -from anomalib.pipelines.tiled_ensemble.components.utils import NormalizationStage -from anomalib.pipelines.types import GATHERED_RESULTS, RUN_RESULTS -from anomalib.utils.visualization import ImageVisualizer - -logger = logging.getLogger(__name__) - - -class VisualizationJob(Job): - """Job for visualization of predictions. - - Args: - predictions (list[Any]): list of image-level predictions. - root_dir (Path): Root directory to save checkpoints, stats and images. - task (TaskType): type of task the predictions represent. - normalize (bool): if predictions need to be normalized - """ - - name = "Visualize" - - def __init__(self, predictions: list[Any], root_dir: Path, task: TaskType, normalize: bool) -> None: - super().__init__() - self.predictions = predictions - self.root_dir = root_dir / "images" - self.task = task - self.normalize = normalize - - def run(self, task_id: int | None = None) -> list[Any]: - """Run job that visualizes all prediction data. - - Args: - task_id: Not used in this case. - - Returns: - list[Any]: Unchanged predictions. - """ - del task_id # not needed here - - visualizer = ImageVisualizer(task=self.task, normalize=self.normalize) - - logger.info("Starting visualization.") - - for data in tqdm(self.predictions, desc="Visualizing"): - for result in visualizer(outputs=data): - # Finally image path is root/defect_type/image_name - if result.file_name is not None: - file_path = Path(result.file_name) - else: - msg = "file_path should exist in returned Visualizer." - raise ValueError(msg) - - root = self.root_dir / file_path.parent.name - filename = file_path.name - - save_image(image=result.image, root=root, filename=filename) - - return self.predictions - - @staticmethod - def collect(results: list[RUN_RESULTS]) -> GATHERED_RESULTS: - """Nothing to collect in this job. - - Returns: - list[Any]: Unchanged list of predictions. - """ - # take the first element as result is list of lists here - return results[0] - - @staticmethod - def save(results: GATHERED_RESULTS) -> None: - """This job doesn't save anything.""" - - -class VisualizationJobGenerator(JobGenerator): - """Generate VisualizationJob. - - Args: - root_dir (Path): Root directory where images will be saved (root/images). - """ - - def __init__(self, root_dir: Path, task: TaskType, normalization_stage: NormalizationStage) -> None: - self.root_dir = root_dir - self.task = task - self.normalize = normalization_stage == NormalizationStage.NONE - - @property - def job_class(self) -> type: - """Return the job class.""" - return VisualizationJob - - def generate_jobs( - self, - args: dict | None = None, - prev_stage_result: list[Any] | None = None, - ) -> Generator[VisualizationJob, None, None]: - """Return a generator producing a single visualization job. - - Args: - args: Ensemble run args. - prev_stage_result (list[Any]): Ensemble predictions from previous step. - - Returns: - Generator[VisualizationJob, None, None]: VisualizationJob generator - """ - del args # args not used here - - if prev_stage_result is not None: - yield VisualizationJob(prev_stage_result, self.root_dir, self.task, self.normalize) - else: - msg = "Visualization job requires tile level predictions from previous step." - raise ValueError(msg) diff --git a/src/anomalib/pipelines/tiled_ensemble/test_pipeline.py b/src/anomalib/pipelines/tiled_ensemble/test_pipeline.py deleted file mode 100644 index 7fdd61e9ff..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/test_pipeline.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Tiled ensemble test pipeline.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from pathlib import Path - -import torch - -from anomalib.data.utils import TestSplitMode -from anomalib.pipelines.components.base import Pipeline, Runner -from anomalib.pipelines.components.runners import ParallelRunner, SerialRunner -from anomalib.pipelines.tiled_ensemble.components import ( - MergeJobGenerator, - MetricsCalculationJobGenerator, - NormalizationJobGenerator, - PredictJobGenerator, - SmoothingJobGenerator, - ThresholdingJobGenerator, - VisualizationJobGenerator, -) -from anomalib.pipelines.tiled_ensemble.components.utils import NormalizationStage, PredictData, ThresholdStage - -logger = logging.getLogger(__name__) - - -class EvalTiledEnsemble(Pipeline): - """Tiled ensemble evaluation pipeline. - - Args: - root_dir (Path): Path to root dir of run that contains checkpoints. - """ - - def __init__(self, root_dir: Path) -> None: - self.root_dir = Path(root_dir) - - def _setup_runners(self, args: dict) -> list[Runner]: - """Set up the runners for the pipeline. - - This pipeline consists of jobs used to test/evaluate tiled ensemble: - Prediction on test data > merging of predictions > (optional) seam smoothing - > (optional) Normalization > (optional) Thresholding - > Visualisation of predictions > Metrics calculation. - - Returns: - list[Runner]: List of runners executing tiled ensemble testing jobs. - """ - runners: list[Runner] = [] - - if args["data"]["init_args"]["test_split_mode"] == TestSplitMode.NONE: - logger.info("Test split mode set to `none`, skipping test phase.") - return runners - - seed = args["seed"] - accelerator = args["accelerator"] - tiling_args = args["tiling"] - data_args = args["data"] - normalization_stage = NormalizationStage(args["normalization_stage"]) - threshold_stage = ThresholdStage(args["thresholding"]["stage"]) - model_args = args["TrainModels"]["model"] - task = args["data"]["init_args"]["task"] - metrics = args["TrainModels"]["metrics"] - - predict_job_generator = PredictJobGenerator( - PredictData.TEST, - seed=seed, - accelerator=accelerator, - root_dir=self.root_dir, - tiling_args=tiling_args, - data_args=data_args, - model_args=model_args, - normalization_stage=normalization_stage, - ) - # 1. predict using test data - if accelerator == "cuda": - runners.append( - ParallelRunner( - predict_job_generator, - n_jobs=torch.cuda.device_count(), - ), - ) - else: - runners.append( - SerialRunner( - predict_job_generator, - ), - ) - # 2. merge predictions - runners.append(SerialRunner(MergeJobGenerator(tiling_args=tiling_args, data_args=data_args))) - - # 3. (optional) smooth seams - if args["SeamSmoothing"]["apply"]: - runners.append( - SerialRunner( - SmoothingJobGenerator(accelerator=accelerator, tiling_args=tiling_args, data_args=data_args), - ), - ) - - # 4. (optional) normalize - if normalization_stage == NormalizationStage.IMAGE: - runners.append(SerialRunner(NormalizationJobGenerator(self.root_dir))) - # 5. (optional) threshold to get labels from scores - if threshold_stage == ThresholdStage.IMAGE: - runners.append(SerialRunner(ThresholdingJobGenerator(self.root_dir, normalization_stage))) - - # 6. visualize predictions - runners.append( - SerialRunner(VisualizationJobGenerator(self.root_dir, task=task, normalization_stage=normalization_stage)), - ) - # calculate metrics - runners.append( - SerialRunner( - MetricsCalculationJobGenerator( - accelerator=accelerator, - root_dir=self.root_dir, - task=task, - metrics=metrics, - normalization_stage=normalization_stage, - ), - ), - ) - - return runners diff --git a/src/anomalib/pipelines/tiled_ensemble/train_pipeline.py b/src/anomalib/pipelines/tiled_ensemble/train_pipeline.py deleted file mode 100644 index 38e4e34e4b..0000000000 --- a/src/anomalib/pipelines/tiled_ensemble/train_pipeline.py +++ /dev/null @@ -1,123 +0,0 @@ -"""Tiled ensemble training pipeline.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from typing import TYPE_CHECKING - -from anomalib.data.utils import ValSplitMode - -if TYPE_CHECKING: - from pathlib import Path - -import logging - -import torch - -from anomalib.pipelines.components.base import Pipeline, Runner -from anomalib.pipelines.components.runners import ParallelRunner, SerialRunner - -from .components import ( - MergeJobGenerator, - PredictJobGenerator, - SmoothingJobGenerator, - StatisticsJobGenerator, - TrainModelJobGenerator, -) -from .components.utils import NormalizationStage, PredictData -from .components.utils.ensemble_engine import TiledEnsembleEngine - -logger = logging.getLogger(__name__) - - -class TrainTiledEnsemble(Pipeline): - """Tiled ensemble training pipeline.""" - - def __init__(self) -> None: - self.root_dir: Path - - def _setup_runners(self, args: dict) -> list[Runner]: - """Setup the runners for the pipeline. - - This pipeline consists of training and validation steps: - Training models > prediction on val data > merging val data > - > (optionally) smoothing seams > calculation of post-processing statistics - - Returns: - list[Runner]: List of runners executing tiled ensemble train + val jobs. - """ - runners: list[Runner] = [] - self.root_dir = TiledEnsembleEngine.setup_ensemble_workspace(args) - - seed = args["seed"] - accelerator = args["accelerator"] - tiling_args = args["tiling"] - data_args = args["data"] - normalization_stage = NormalizationStage(args["normalization_stage"]) - thresholding_method = args["thresholding"]["method"] - model_args = args["TrainModels"]["model"] - - train_job_generator = TrainModelJobGenerator( - seed=seed, - accelerator=accelerator, - root_dir=self.root_dir, - tiling_args=tiling_args, - data_args=data_args, - normalization_stage=normalization_stage, - ) - - predict_job_generator = PredictJobGenerator( - data_source=PredictData.VAL, - seed=seed, - accelerator=accelerator, - root_dir=self.root_dir, - tiling_args=tiling_args, - data_args=data_args, - model_args=model_args, - normalization_stage=normalization_stage, - ) - - # 1. train - if accelerator == "cuda": - runners.append( - ParallelRunner( - train_job_generator, - n_jobs=torch.cuda.device_count(), - ), - ) - else: - runners.append( - SerialRunner( - train_job_generator, - ), - ) - - if data_args["init_args"]["val_split_mode"] == ValSplitMode.NONE: - logger.warning("No validation set provided, skipping statistics calculation.") - return runners - - # 2. predict using validation data - if accelerator == "cuda": - runners.append( - ParallelRunner(predict_job_generator, n_jobs=torch.cuda.device_count()), - ) - else: - runners.append( - SerialRunner(predict_job_generator), - ) - - # 3. merge predictions - runners.append(SerialRunner(MergeJobGenerator(tiling_args=tiling_args, data_args=data_args))) - - # 4. (optional) smooth seams - if args["SeamSmoothing"]["apply"]: - runners.append( - SerialRunner( - SmoothingJobGenerator(accelerator=accelerator, tiling_args=tiling_args, data_args=data_args), - ), - ) - - # 5. calculate statistics used for inference - runners.append(SerialRunner(StatisticsJobGenerator(self.root_dir, thresholding_method))) - - return runners diff --git a/tests/integration/pipelines/test_tiled_ensemble.py b/tests/integration/pipelines/test_tiled_ensemble.py deleted file mode 100644 index 2909311276..0000000000 --- a/tests/integration/pipelines/test_tiled_ensemble.py +++ /dev/null @@ -1,62 +0,0 @@ -"""Test tiled ensemble training and prediction.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from pathlib import Path - -import pytest -import yaml - -from anomalib.pipelines.tiled_ensemble import EvalTiledEnsemble, TrainTiledEnsemble - - -@pytest.fixture(scope="session") -def get_mock_environment(dataset_path: Path, project_path: Path) -> Path: - """Return mock directory for testing with datapath setup to dummy data.""" - ens_temp_dir = project_path / "ens_tmp" - ens_temp_dir.mkdir(exist_ok=True) - - with Path("tests/integration/pipelines/tiled_ensemble.yaml").open(encoding="utf-8") as file: - config = yaml.safe_load(file) - - # use separate project temp dir to avoid messing with other tests - config["default_root_dir"] = str(ens_temp_dir) - config["data"]["init_args"]["root"] = str(dataset_path / "mvtec") - - with (Path(ens_temp_dir) / "tiled_ensemble.yaml").open("w", encoding="utf-8") as file: - yaml.safe_dump(config, file) - - return Path(ens_temp_dir) - - -def test_train(get_mock_environment: Path, capsys: pytest.CaptureFixture) -> None: - """Test training of the tiled ensemble.""" - train_pipeline = TrainTiledEnsemble() - train_parser = train_pipeline.get_parser() - args = train_parser.parse_args(["--config", str(get_mock_environment / "tiled_ensemble.yaml")]) - train_pipeline.run(args) - # check that no errors were printed -> all stages were successful - out = capsys.readouterr().out - assert not any(line.startswith("There were some errors") for line in out.split("\n")) - - -def test_predict(get_mock_environment: Path, capsys: pytest.CaptureFixture) -> None: - """Test prediction with the tiled ensemble.""" - predict_pipeline = EvalTiledEnsemble(root_dir=get_mock_environment / "Padim" / "MVTec" / "dummy" / "v0") - predict_parser = predict_pipeline.get_parser() - args = predict_parser.parse_args(["--config", str(get_mock_environment / "tiled_ensemble.yaml")]) - predict_pipeline.run(args) - # check that no errors were printed -> all stages were successful - out = capsys.readouterr().out - assert not any(line.startswith("There were some errors") for line in out.split("\n")) - - -def test_visualisation(get_mock_environment: Path) -> None: - """Test that images were produced.""" - assert (get_mock_environment / "Padim/MVTec/dummy/v0/images/bad/000.png").exists() - - -def test_metric_results(get_mock_environment: Path) -> None: - """Test that metrics were saved.""" - assert (get_mock_environment / "Padim/MVTec/dummy/v0/metric_results.csv").exists() diff --git a/tests/integration/pipelines/tiled_ensemble.yaml b/tests/integration/pipelines/tiled_ensemble.yaml deleted file mode 100644 index 8d35be8297..0000000000 --- a/tests/integration/pipelines/tiled_ensemble.yaml +++ /dev/null @@ -1,43 +0,0 @@ -seed: 42 -accelerator: "cpu" -default_root_dir: "results" - -tiling: - tile_size: [50, 50] - stride: 50 - -normalization_stage: image # on what level we normalize, options: [tile, image, none] -thresholding: - method: F1AdaptiveThreshold # refer to documentation for thresholding methods - stage: image # stage at which we apply threshold, options: [tile, image] - -data: - class_path: anomalib.data.MVTec - init_args: - root: toBeSetup - category: dummy - train_batch_size: 32 - eval_batch_size: 32 - num_workers: 0 - task: segmentation - transform: null - train_transform: null - eval_transform: null - test_split_mode: from_dir - test_split_ratio: 0.2 - val_split_mode: same_as_test - val_split_ratio: 0.5 - image_size: [50, 100] - -SeamSmoothing: - apply: True # if this is applied, area around tile seams are is smoothed - sigma: 2 # sigma of gaussian filter used to smooth this area - width: 0.1 # width factor, multiplied by tile dimension gives the region width around seam which will be smoothed - -TrainModels: - model: - class_path: Padim - - metrics: - pixel: AUROC - image: AUROC diff --git a/tools/tiled_ensemble/ens_config.yaml b/tools/tiled_ensemble/ens_config.yaml deleted file mode 100644 index 2490b22e9a..0000000000 --- a/tools/tiled_ensemble/ens_config.yaml +++ /dev/null @@ -1,43 +0,0 @@ -seed: 42 -accelerator: "gpu" -default_root_dir: "results" - -tiling: - tile_size: [128, 128] - stride: 128 - -normalization_stage: image # on what level we normalize, options: [tile, image, none] -thresholding: - method: F1AdaptiveThreshold # refer to documentation for thresholding methods - stage: image # stage at which we apply threshold, options: [tile, image] - -data: - class_path: anomalib.data.MVTec - init_args: - root: ./datasets/MVTec - category: bottle - train_batch_size: 32 - eval_batch_size: 32 - num_workers: 8 - task: segmentation - transform: null - train_transform: null - eval_transform: null - test_split_mode: from_dir - test_split_ratio: 0.2 - val_split_mode: same_as_test - val_split_ratio: 0.5 - image_size: [256, 256] - -SeamSmoothing: - apply: True # if this is applied, area around tile seams are is smoothed - sigma: 2 # sigma of gaussian filter used to smooth this area - width: 0.1 # width factor, multiplied by tile dimension gives the region width around seam which will be smoothed - -TrainModels: - model: - class_path: Padim - - metrics: - pixel: AUROC - image: AUROC diff --git a/tools/tiled_ensemble/eval.py b/tools/tiled_ensemble/eval.py deleted file mode 100644 index 58be27c25c..0000000000 --- a/tools/tiled_ensemble/eval.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Run tiled ensemble prediction.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from pathlib import Path - -from jsonargparse import ArgumentParser - -from anomalib.pipelines.tiled_ensemble import EvalTiledEnsemble - - -def get_parser() -> ArgumentParser: - """Create a new parser if none is provided.""" - parser = ArgumentParser() - parser.add_argument("--config", type=str | Path, help="Configuration file path.", required=True) - parser.add_argument("--root", type=str | Path, help="Weights file path.", required=True) - - return parser - - -if __name__ == "__main__": - args = get_parser().parse_args() - - print("Running tiled ensemble test pipeline.") - # pass the path to root dir with checkpoints - test_pipeline = EvalTiledEnsemble(args.root) - test_pipeline.run(args) diff --git a/tools/tiled_ensemble/train.py b/tools/tiled_ensemble/train.py deleted file mode 100644 index 8aed47ea0d..0000000000 --- a/tools/tiled_ensemble/train.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Run tiled ensemble training.""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from anomalib.pipelines.tiled_ensemble import EvalTiledEnsemble, TrainTiledEnsemble - -if __name__ == "__main__": - print("Running tiled ensemble train pipeline") - train_pipeline = TrainTiledEnsemble() - # run training - train_pipeline.run() - - print("Running tiled ensemble test pipeline.") - # pass the root dir from train run to load checkpoints - test_pipeline = EvalTiledEnsemble(train_pipeline.root_dir) - test_pipeline.run() From 90eb35fbc7f23ac9871fbec657e13e738b748046 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Wed, 11 Dec 2024 14:36:33 +0000 Subject: [PATCH 31/32] ollama version Signed-off-by: Samet Akcay --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ebdbaf6b04..805795da40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ core = [ "open-clip-torch>=2.23.0,<2.26.1", ] openvino = ["openvino>=2024.0", "nncf>=2.10.0", "onnx>=1.16.0"] -vlm = ["ollama", "openai", "python-dotenv","transformers"] +vlm = ["ollama<0.4.0", "openai", "python-dotenv","transformers"] loggers = [ "comet-ml>=3.31.7", "gradio>=4", From 3e9c76de929d959a033617894572d5f37a143e56 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Wed, 11 Dec 2024 14:39:34 +0000 Subject: [PATCH 32/32] Exclude tiled ensemble unit tests Signed-off-by: Samet Akcay --- .../unit/pipelines/tiled_ensemble/__init__.py | 4 - .../unit/pipelines/tiled_ensemble/conftest.py | 151 ------- .../tiled_ensemble/dummy_config.yaml | 52 --- .../tiled_ensemble/test_components.py | 387 ------------------ .../tiled_ensemble/test_helper_functions.py | 113 ----- .../tiled_ensemble/test_prediction_data.py | 69 ---- .../pipelines/tiled_ensemble/test_tiler.py | 119 ------ 7 files changed, 895 deletions(-) delete mode 100644 tests/unit/pipelines/tiled_ensemble/__init__.py delete mode 100644 tests/unit/pipelines/tiled_ensemble/conftest.py delete mode 100644 tests/unit/pipelines/tiled_ensemble/dummy_config.yaml delete mode 100644 tests/unit/pipelines/tiled_ensemble/test_components.py delete mode 100644 tests/unit/pipelines/tiled_ensemble/test_helper_functions.py delete mode 100644 tests/unit/pipelines/tiled_ensemble/test_prediction_data.py delete mode 100644 tests/unit/pipelines/tiled_ensemble/test_tiler.py diff --git a/tests/unit/pipelines/tiled_ensemble/__init__.py b/tests/unit/pipelines/tiled_ensemble/__init__.py deleted file mode 100644 index a78a1ad659..0000000000 --- a/tests/unit/pipelines/tiled_ensemble/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Tiled ensemble unit tests.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/pipelines/tiled_ensemble/conftest.py b/tests/unit/pipelines/tiled_ensemble/conftest.py deleted file mode 100644 index b4fad61ebb..0000000000 --- a/tests/unit/pipelines/tiled_ensemble/conftest.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Fixtures that are used in tiled ensemble testing.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -from pathlib import Path -from tempfile import TemporaryDirectory - -import pytest -import torch -import yaml - -from anomalib.data import AnomalibDataModule -from anomalib.models import AnomalyModule -from anomalib.pipelines.tiled_ensemble.components.utils.ensemble_tiling import EnsembleTiler -from anomalib.pipelines.tiled_ensemble.components.utils.helper_functions import ( - get_ensemble_datamodule, - get_ensemble_model, - get_ensemble_tiler, -) -from anomalib.pipelines.tiled_ensemble.components.utils.prediction_data import EnsemblePredictions -from anomalib.pipelines.tiled_ensemble.components.utils.prediction_merging import PredictionMergingMechanism - - -@pytest.fixture(scope="module") -def get_ensemble_config(dataset_path: Path) -> dict: - """Return ensemble dummy config dict with corrected dataset path to dummy temp dir.""" - with Path("tests/unit/pipelines/tiled_ensemble/dummy_config.yaml").open(encoding="utf-8") as file: - config = yaml.safe_load(file) - # dummy dataset - config["data"]["init_args"]["root"] = dataset_path / "mvtec" - - return config - - -@pytest.fixture(scope="module") -def get_tiler(get_ensemble_config: dict) -> EnsembleTiler: - """Return EnsembleTiler object based on test dummy config.""" - config = get_ensemble_config - - return get_ensemble_tiler(config["tiling"], config["data"]) - - -@pytest.fixture(scope="module") -def get_model(get_ensemble_config: dict, get_tiler: EnsembleTiler) -> AnomalyModule: - """Return model prepared for tiled ensemble training.""" - config = get_ensemble_config - tiler = get_tiler - - return get_ensemble_model(config["TrainModels"]["model"], tiler) - - -@pytest.fixture(scope="module") -def get_datamodule(get_ensemble_config: dict, get_tiler: EnsembleTiler) -> AnomalibDataModule: - """Return ensemble datamodule.""" - config = get_ensemble_config - tiler = get_tiler - datamodule = get_ensemble_datamodule(config, tiler, (0, 0)) - datamodule.setup() - - return datamodule - - -@pytest.fixture(scope="module") -def get_tile_predictions(get_datamodule: AnomalibDataModule) -> EnsemblePredictions: - """Return tile predictions inside EnsemblePredictions object.""" - datamodule = get_datamodule - - data = EnsemblePredictions() - - for tile_index in [(0, 0), (0, 1), (1, 0), (1, 1)]: - datamodule.collate_fn.tile_index = tile_index - - tile_prediction = [] - batch = next(iter(datamodule.test_dataloader())) - - # make mock labels and scores - batch["pred_scores"] = torch.rand(batch["label"].shape) - batch["pred_labels"] = batch["pred_scores"] > 0.5 - - # set mock maps to just one channel of image - batch["anomaly_maps"] = batch["image"].clone()[:, 0, :, :].unsqueeze(1) - # set mock pred mask to mask but add channel - batch["pred_masks"] = batch["mask"].clone().unsqueeze(1) - - tile_prediction.append(batch) - - # store to prediction storage object - data.add_tile_prediction(tile_index, tile_prediction) - - return data - - -@pytest.fixture(scope="module") -def get_batch_predictions() -> list[dict]: - """Return mock batched predictions.""" - mock_data = { - "image": torch.rand((5, 3, 100, 100)), - "mask": (torch.rand((5, 100, 100)) > 0.5).type(torch.float32), - "anomaly_maps": torch.rand((5, 1, 100, 100)), - "label": torch.Tensor([0, 1, 1, 0, 1]), - "pred_scores": torch.rand(5), - "pred_labels": torch.ones(5), - "pred_masks": torch.zeros((5, 100, 100)), - } - - return [mock_data, mock_data] - - -@pytest.fixture(scope="module") -def get_merging_mechanism( - get_tile_predictions: EnsemblePredictions, - get_tiler: EnsembleTiler, -) -> PredictionMergingMechanism: - """Return ensemble prediction merging mechanism object.""" - tiler = get_tiler - predictions = get_tile_predictions - return PredictionMergingMechanism(predictions, tiler) - - -@pytest.fixture(scope="module") -def get_mock_stats_dir() -> Path: - """Get temp dir containing statistics.""" - with TemporaryDirectory() as temp_dir: - stats = { - "minmax": { - "anomaly_maps": { - "min": 1.9403648376464844, - "max": 209.91940307617188, - }, - "box_scores": { - "min": 0.5, - "max": 0.45, - }, - "pred_scores": { - "min": 9.390382766723633, - "max": 209.91940307617188, - }, - }, - "image_threshold": 0.1111, - "pixel_threshold": 0.1111, - } - stats_path = Path(temp_dir) / "weights" / "lightning" / "stats.json" - stats_path.parent.mkdir(parents=True) - - # save mock statistics - with stats_path.open("w", encoding="utf-8") as stats_file: - json.dump(stats, stats_file, ensure_ascii=False, indent=4) - - yield Path(temp_dir) diff --git a/tests/unit/pipelines/tiled_ensemble/dummy_config.yaml b/tests/unit/pipelines/tiled_ensemble/dummy_config.yaml deleted file mode 100644 index fcd4b7c716..0000000000 --- a/tests/unit/pipelines/tiled_ensemble/dummy_config.yaml +++ /dev/null @@ -1,52 +0,0 @@ -seed: 42 -accelerator: "cpu" -default_root_dir: "results" - -tiling: - tile_size: [50, 50] - stride: 50 - -normalization_stage: image # on what level we normalize, options: [tile, image, none] -thresholding: - method: F1AdaptiveThreshold # refer to documentation for thresholding methods - stage: image # stage at which we apply threshold, options: [tile, image] - -data: - class_path: anomalib.data.MVTec - init_args: - root: toBeSetup - category: dummy - train_batch_size: 32 - eval_batch_size: 32 - num_workers: 0 - task: segmentation - transform: null - train_transform: null - eval_transform: null - test_split_mode: from_dir - test_split_ratio: 0.2 - val_split_mode: same_as_test - val_split_ratio: 0.5 - image_size: [100, 100] - -SeamSmoothing: - apply: True # if this is applied, area around tile seams are is smoothed - sigma: 2 # sigma of gaussian filter used to smooth this area - width: 0.1 # width factor, multiplied by tile dimension gives the region width around seam which will be smoothed - -TrainModels: - model: - class_path: Fastflow - - metrics: - pixel: AUROC - image: AUROC - - trainer: - max_epochs: 1 - callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - patience: 1 - monitor: pixel_AUROC - mode: max diff --git a/tests/unit/pipelines/tiled_ensemble/test_components.py b/tests/unit/pipelines/tiled_ensemble/test_components.py deleted file mode 100644 index 0e3c0dcdd4..0000000000 --- a/tests/unit/pipelines/tiled_ensemble/test_components.py +++ /dev/null @@ -1,387 +0,0 @@ -"""Test working of tiled ensemble pipeline components.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import copy -from pathlib import Path -from tempfile import TemporaryDirectory - -import pytest -import torch - -from anomalib.data import get_datamodule -from anomalib.metrics import F1AdaptiveThreshold, ManualThreshold -from anomalib.pipelines.tiled_ensemble.components import ( - MergeJobGenerator, - MetricsCalculationJobGenerator, - NormalizationJobGenerator, - SmoothingJobGenerator, - StatisticsJobGenerator, - ThresholdingJobGenerator, -) -from anomalib.pipelines.tiled_ensemble.components.metrics_calculation import MetricsCalculationJob -from anomalib.pipelines.tiled_ensemble.components.smoothing import SmoothingJob -from anomalib.pipelines.tiled_ensemble.components.utils import NormalizationStage -from anomalib.pipelines.tiled_ensemble.components.utils.prediction_data import EnsemblePredictions -from anomalib.pipelines.tiled_ensemble.components.utils.prediction_merging import PredictionMergingMechanism - - -class TestMerging: - """Test merging mechanism and merging job.""" - - @staticmethod - def test_tile_merging(get_ensemble_config: dict, get_merging_mechanism: PredictionMergingMechanism) -> None: - """Test tiled data merging.""" - config = get_ensemble_config - merger = get_merging_mechanism - - # prepared original data - datamodule = get_datamodule(config) - datamodule.prepare_data() - datamodule.setup() - original_data = next(iter(datamodule.test_dataloader())) - - batch = merger.ensemble_predictions.get_batch_tiles(0) - - merged_image = merger.merge_tiles(batch, "image") - assert merged_image.equal(original_data["image"]) - - merged_mask = merger.merge_tiles(batch, "mask") - assert merged_mask.equal(original_data["mask"]) - - @staticmethod - def test_label_and_score_merging(get_merging_mechanism: PredictionMergingMechanism) -> None: - """Test label and score merging.""" - merger = get_merging_mechanism - scores = torch.rand(4, 10) - labels = scores > 0.5 - - mock_data = {(0, 0): {}, (0, 1): {}, (1, 0): {}, (1, 1): {}} - - for i, data in enumerate(mock_data.values()): - data["pred_scores"] = scores[i] - data["pred_labels"] = labels[i] - - merged = merger.merge_labels_and_scores(mock_data) - - assert merged["pred_scores"].equal(scores.mean(dim=0)) - - assert merged["pred_labels"].equal(labels.any(dim=0)) - - @staticmethod - def test_merge_job( - get_tile_predictions: EnsemblePredictions, - get_ensemble_config: dict, - get_merging_mechanism: PredictionMergingMechanism, - ) -> None: - """Test merging job execution.""" - config = get_ensemble_config - predictions = copy.deepcopy(get_tile_predictions) - merging_mechanism = get_merging_mechanism - - merging_job_generator = MergeJobGenerator(tiling_args=config["tiling"], data_args=config["data"]) - merging_job = next(merging_job_generator.generate_jobs(prev_stage_result=predictions)) - - merged_direct = merging_mechanism.merge_tile_predictions(0) - merged_with_job = merging_job.run()[0] - - # check that merging by job is same as with the mechanism directly - for key, value in merged_direct.items(): - if isinstance(value, torch.Tensor): - assert merged_with_job[key].equal(value) - elif isinstance(value, list) and isinstance(value[0], torch.Tensor): - # boxes - assert all(j.equal(d) for j, d in zip(merged_with_job[key], value, strict=False)) - else: - assert merged_with_job[key] == value - - -class TestStatsCalculation: - """Test post-processing statistics calculations.""" - - @staticmethod - @pytest.mark.parametrize( - ("threshold_str", "threshold_cls"), - [("F1AdaptiveThreshold", F1AdaptiveThreshold), ("ManualThreshold", ManualThreshold)], - ) - def test_threshold_method(threshold_str: str, threshold_cls: type, get_ensemble_config: dict) -> None: - """Test that correct thresholding method is used.""" - config = copy.deepcopy(get_ensemble_config) - config["thresholding"]["method"] = threshold_str - - stats_job_generator = StatisticsJobGenerator(Path("mock"), threshold_str) - stats_job = next(stats_job_generator.generate_jobs(None, None)) - - assert isinstance(stats_job.image_threshold, threshold_cls) - - @staticmethod - def test_stats_run(project_path: Path) -> None: - """Test execution of statistics calc. job.""" - mock_preds = [ - { - "pred_scores": torch.rand(4), - "label": torch.ones(4), - "anomaly_maps": torch.rand(4, 1, 50, 50), - "mask": torch.ones(4, 1, 50, 50), - }, - ] - - stats_job_generator = StatisticsJobGenerator(project_path, "F1AdaptiveThreshold") - stats_job = next(stats_job_generator.generate_jobs(None, mock_preds)) - - results = stats_job.run() - - assert "minmax" in results - assert "image_threshold" in results - assert "pixel_threshold" in results - - # save as it's removed from results - save_path = results["save_path"] - stats_job.save(results) - assert Path(save_path).exists() - - @staticmethod - @pytest.mark.parametrize( - ("key", "values"), - [ - ("anomaly_maps", [torch.rand(5, 1, 50, 50), torch.rand(5, 1, 50, 50)]), - ("pred_scores", [torch.rand(5), torch.rand(5)]), - ], - ) - def test_minmax(key: str, values: list) -> None: - """Test minmax stats calculation.""" - # add given keys to test all possible sources of minmax - data = [ - {"pred_scores": torch.rand(5), "label": torch.ones(5), key: values[0]}, - {"pred_scores": torch.rand(5), "label": torch.ones(5), key: values[1]}, - ] - - stats_job_generator = StatisticsJobGenerator(Path("mock"), "F1AdaptiveThreshold") - stats_job = next(stats_job_generator.generate_jobs(None, data)) - results = stats_job.run() - - if isinstance(values[0], list): - values[0] = torch.cat(values[0]) - values[1] = torch.cat(values[1]) - values = torch.stack(values) - - assert results["minmax"][key]["min"] == torch.min(values) - assert results["minmax"][key]["max"] == torch.max(values) - - @staticmethod - @pytest.mark.parametrize( - ("labels", "preds", "target_threshold"), - [ - (torch.Tensor([0, 0, 0, 1, 1]), torch.Tensor([2.3, 1.6, 2.6, 7.9, 3.3]), 3.3), # standard case - (torch.Tensor([1, 0, 0, 0]), torch.Tensor([4, 3, 2, 1]), 4), # 100% recall for all thresholds - ], - ) - def test_threshold(labels: torch.Tensor, preds: torch.Tensor, target_threshold: float) -> None: - """Test threshold calculation job.""" - data = [ - { - "label": labels, - "mask": labels, - "pred_scores": preds, - "anomaly_maps": preds, - }, - ] - - stats_job_generator = StatisticsJobGenerator(Path("mock"), "F1AdaptiveThreshold") - stats_job = next(stats_job_generator.generate_jobs(None, data)) - results = stats_job.run() - - assert round(results["image_threshold"], 5) == target_threshold - assert round(results["pixel_threshold"], 5) == target_threshold - - -class TestMetrics: - """Test ensemble metrics.""" - - @pytest.fixture(scope="class") - @staticmethod - def get_ensemble_metrics_job( - get_ensemble_config: dict, - get_batch_predictions: list[dict], - ) -> tuple[MetricsCalculationJob, str]: - """Return Metrics calculation job and path to directory where metrics csv will be saved.""" - config = get_ensemble_config - with TemporaryDirectory() as tmp_dir: - metrics = MetricsCalculationJobGenerator( - config["accelerator"], - root_dir=Path(tmp_dir), - task=config["data"]["init_args"]["task"], - metrics=config["TrainModels"]["metrics"], - normalization_stage=NormalizationStage(config["normalization_stage"]), - ) - - mock_predictions = get_batch_predictions - - return next(metrics.generate_jobs(prev_stage_result=copy.deepcopy(mock_predictions))), tmp_dir - - @staticmethod - def test_metrics_result(get_ensemble_metrics_job: tuple[MetricsCalculationJob, str]) -> None: - """Test metrics result.""" - metrics_job, _ = get_ensemble_metrics_job - - result = metrics_job.run() - - assert "pixel_AUROC" in result - assert "image_AUROC" in result - - @staticmethod - def test_metrics_saving(get_ensemble_metrics_job: tuple[MetricsCalculationJob, str]) -> None: - """Test metrics saving to csv.""" - metrics_job, tmp_dir = get_ensemble_metrics_job - - result = metrics_job.run() - metrics_job.save(result) - assert (Path(tmp_dir) / "metric_results.csv").exists() - - -class TestJoinSmoothing: - """Test JoinSmoothing job responsible for smoothing area at tile seams.""" - - @pytest.fixture(scope="class") - @staticmethod - def get_join_smoothing_job(get_ensemble_config: dict, get_batch_predictions: list[dict]) -> SmoothingJob: - """Make and return SmoothingJob instance.""" - config = get_ensemble_config - job_gen = SmoothingJobGenerator( - accelerator=config["accelerator"], - tiling_args=config["tiling"], - data_args=config["data"], - ) - # copy since smoothing changes data - mock_predictions = copy.deepcopy(get_batch_predictions) - return next(job_gen.generate_jobs(config["SeamSmoothing"], mock_predictions)) - - @staticmethod - def test_mask(get_join_smoothing_job: SmoothingJob) -> None: - """Test seam mask in case where tiles don't overlap.""" - smooth = get_join_smoothing_job - - join_index = smooth.tiler.tile_size_h, smooth.tiler.tile_size_w - - # seam should be covered by True - assert smooth.seam_mask[join_index] - - # non-seam region should be false - assert not smooth.seam_mask[0, 0] - assert not smooth.seam_mask[-1, -1] - - @staticmethod - def test_mask_overlapping(get_ensemble_config: dict, get_batch_predictions: list[dict]) -> None: - """Test seam mask in case where tiles overlap.""" - config = copy.deepcopy(get_ensemble_config) - # tile size = 50, stride = 25 -> overlapping - config["tiling"]["stride"] = 25 - job_gen = SmoothingJobGenerator( - accelerator=config["accelerator"], - tiling_args=config["tiling"], - data_args=config["data"], - ) - mock_predictions = copy.deepcopy(get_batch_predictions) - smooth = next(job_gen.generate_jobs(config["SeamSmoothing"], mock_predictions)) - - join_index = smooth.tiler.stride_h, smooth.tiler.stride_w - - # overlap seam should be covered by True - assert smooth.seam_mask[join_index] - assert smooth.seam_mask[-join_index[0], -join_index[1]] - - # non-seam region should be false - assert not smooth.seam_mask[0, 0] - assert not smooth.seam_mask[-1, -1] - - @staticmethod - def test_smoothing(get_join_smoothing_job: SmoothingJob, get_batch_predictions: list[dict]) -> None: - """Test smoothing job run.""" - original_data = get_batch_predictions - # fixture makes a copy of data - smooth = get_join_smoothing_job - - # take first batch - smoothed = smooth.run()[0] - join_index = smooth.tiler.tile_size_h, smooth.tiler.tile_size_w - - # join sections should be processed - assert not smoothed["anomaly_maps"][:, :, join_index].equal(original_data[0]["anomaly_maps"][:, :, join_index]) - - # non-join section shouldn't be changed - assert smoothed["anomaly_maps"][:, :, 0, 0].equal(original_data[0]["anomaly_maps"][:, :, 0, 0]) - - -def test_normalization(get_batch_predictions: list[dict], project_path: Path) -> None: - """Test normalization step.""" - original_predictions = copy.deepcopy(get_batch_predictions) - - for batch in original_predictions: - batch["anomaly_maps"] *= 100 - batch["pred_scores"] *= 100 - - # # get and save stats using stats job on predictions - stats_job_generator = StatisticsJobGenerator(project_path, "F1AdaptiveThreshold") - stats_job = next(stats_job_generator.generate_jobs(prev_stage_result=original_predictions)) - stats = stats_job.run() - stats_job.save(stats) - - # normalize predictions based on obtained stats - norm_job_generator = NormalizationJobGenerator(root_dir=project_path) - # copy as this changes preds - norm_job = next(norm_job_generator.generate_jobs(prev_stage_result=original_predictions)) - normalized_predictions = norm_job.run() - - for batch in normalized_predictions: - assert (batch["anomaly_maps"] >= 0).all() - assert (batch["anomaly_maps"] <= 1).all() - - assert (batch["pred_scores"] >= 0).all() - assert (batch["pred_scores"] <= 1).all() - - -class TestThresholding: - """Test tiled ensemble thresholding stage.""" - - @pytest.fixture(scope="class") - @staticmethod - def get_threshold_job(get_mock_stats_dir: Path) -> callable: - """Return a function that takes prediction data and runs threshold job.""" - thresh_job_generator = ThresholdingJobGenerator( - root_dir=get_mock_stats_dir, - normalization_stage=NormalizationStage.IMAGE, - ) - - def thresh_helper(preds: dict) -> list | None: - thresh_job = next(thresh_job_generator.generate_jobs(prev_stage_result=preds)) - return thresh_job.run() - - return thresh_helper - - @staticmethod - def test_score_threshold(get_threshold_job: callable) -> None: - """Test anomaly score thresholding.""" - thresholding = get_threshold_job - - data = [{"pred_scores": torch.tensor([0.7, 0.8, 0.1, 0.33, 0.5])}] - - thresholded = thresholding(data)[0] - - assert thresholded["pred_labels"].equal(torch.tensor([True, True, False, False, True])) - - @staticmethod - def test_anomap_threshold(get_threshold_job: callable) -> None: - """Test anomaly map thresholding.""" - thresholding = get_threshold_job - - data = [ - { - "pred_scores": torch.tensor([0.7, 0.8, 0.1, 0.33, 0.5]), - "anomaly_maps": torch.tensor([[0.7, 0.8, 0.1], [0.33, 0.5, 0.1]]), - }, - ] - - thresholded = thresholding(data)[0] - - assert thresholded["pred_masks"].equal(torch.tensor([[True, True, False], [False, True, False]])) diff --git a/tests/unit/pipelines/tiled_ensemble/test_helper_functions.py b/tests/unit/pipelines/tiled_ensemble/test_helper_functions.py deleted file mode 100644 index 06e5864cef..0000000000 --- a/tests/unit/pipelines/tiled_ensemble/test_helper_functions.py +++ /dev/null @@ -1,113 +0,0 @@ -"""Test ensemble helper functions.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from pathlib import Path - -import pytest -from jsonargparse import Namespace -from lightning.pytorch.callbacks import EarlyStopping - -from anomalib.callbacks.normalization import _MinMaxNormalizationCallback -from anomalib.models import AnomalyModule -from anomalib.pipelines.tiled_ensemble.components.utils import NormalizationStage -from anomalib.pipelines.tiled_ensemble.components.utils.ensemble_tiling import EnsembleTiler, TileCollater -from anomalib.pipelines.tiled_ensemble.components.utils.helper_functions import ( - get_ensemble_datamodule, - get_ensemble_engine, - get_ensemble_model, - get_ensemble_tiler, - get_threshold_values, - parse_trainer_kwargs, -) - - -class TestHelperFunctions: - """Test ensemble helper functions.""" - - @staticmethod - def test_ensemble_datamodule(get_ensemble_config: dict, get_tiler: EnsembleTiler) -> None: - """Test that datamodule is created and has correct collate function.""" - config = get_ensemble_config - tiler = get_tiler - datamodule = get_ensemble_datamodule(config, tiler, (0, 0)) - - assert isinstance(datamodule.collate_fn, TileCollater) - - @staticmethod - def test_ensemble_model(get_ensemble_config: dict, get_tiler: EnsembleTiler) -> None: - """Test that model is successfully created with correct input shape.""" - config = get_ensemble_config - tiler = get_tiler - model = get_ensemble_model(config["TrainModels"]["model"], tiler) - - assert model.input_size == tuple(config["tiling"]["tile_size"]) - - @staticmethod - def test_tiler(get_ensemble_config: dict) -> None: - """Test that tiler is successfully instantiated.""" - config = get_ensemble_config - - tiler = get_ensemble_tiler(config["tiling"], config["data"]) - assert isinstance(tiler, EnsembleTiler) - - @staticmethod - def test_trainer_kwargs(get_ensemble_config: dict) -> None: - """Test that objects are correctly constructed from kwargs.""" - config = get_ensemble_config - - objects = parse_trainer_kwargs(config["TrainModels"]["trainer"]) - assert isinstance(objects, Namespace) - # verify that early stopping is parsed and added to callbacks - assert isinstance(objects.callbacks[0], EarlyStopping) - - @staticmethod - @pytest.mark.parametrize( - "normalization_stage", - [NormalizationStage.NONE, NormalizationStage.IMAGE, NormalizationStage.TILE], - ) - def test_threshold_values(normalization_stage: NormalizationStage, get_mock_stats_dir: Path) -> None: - """Test that threshold values are correctly set based on normalization stage.""" - stats_dir = get_mock_stats_dir - - i_thresh, p_thresh = get_threshold_values(normalization_stage, stats_dir) - - if normalization_stage != NormalizationStage.NONE: - # minmax normalization sets thresholds to 0.5 - assert i_thresh == p_thresh == 0.5 - else: - assert i_thresh == p_thresh == 0.1111 - - -class TestEnsembleEngine: - """Test ensemble engine configuration.""" - - @staticmethod - @pytest.mark.parametrize( - "normalization_stage", - [NormalizationStage.NONE, NormalizationStage.IMAGE, NormalizationStage.TILE], - ) - def test_normalisation(normalization_stage: NormalizationStage, get_model: AnomalyModule) -> None: - """Test that normalization callback is correctly initialized.""" - engine = get_ensemble_engine( - tile_index=(0, 0), - accelerator="cpu", - devices="1", - root_dir=Path("mock"), - normalization_stage=normalization_stage, - ) - - engine._setup_anomalib_callbacks(get_model) # noqa: SLF001 - - # verify that only in case of tile level normalization the callback is present - if normalization_stage == NormalizationStage.TILE: - assert any( - isinstance(x, _MinMaxNormalizationCallback) - for x in engine._cache.args["callbacks"] # noqa: SLF001 - ) - else: - assert not any( - isinstance(x, _MinMaxNormalizationCallback) - for x in engine._cache.args["callbacks"] # noqa: SLF001 - ) diff --git a/tests/unit/pipelines/tiled_ensemble/test_prediction_data.py b/tests/unit/pipelines/tiled_ensemble/test_prediction_data.py deleted file mode 100644 index 7185f1e2ca..0000000000 --- a/tests/unit/pipelines/tiled_ensemble/test_prediction_data.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Test tiled prediction storage class.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import copy -from collections.abc import Callable - -import torch -from torch import Tensor - -from anomalib.data import AnomalibDataModule -from anomalib.pipelines.tiled_ensemble.components.utils.prediction_data import EnsemblePredictions - - -class TestPredictionData: - """Test EnsemblePredictions class, used for tiled prediction storage.""" - - @staticmethod - def store_all(data: EnsemblePredictions, datamodule: AnomalibDataModule) -> dict: - """Store the tiled predictions in the EnsemblePredictions object.""" - tile_dict = {} - for tile_index in [(0, 0), (0, 1), (1, 0), (1, 1)]: - datamodule.collate_fn.tile_index = tile_index - - tile_prediction = [] - for batch in iter(datamodule.train_dataloader()): - # set mock maps to just one channel of image - batch["anomaly_maps"] = batch["image"].clone()[:, 0, :, :].unsqueeze(1) - # set mock pred mask to mask but add channel - batch["pred_masks"] = batch["mask"].clone().unsqueeze(1) - tile_prediction.append(batch) - # save original - tile_dict[tile_index] = copy.deepcopy(tile_prediction) - # store to prediction storage object - data.add_tile_prediction(tile_index, tile_prediction) - - return tile_dict - - @staticmethod - def verify_equal(name: str, tile_dict: dict, storage: EnsemblePredictions, eq_funct: Callable) -> bool: - """Verify that all data at same tile index and same batch index matches.""" - batch_num = len(tile_dict[0, 0]) - - for batch_i in range(batch_num): - # batch is dict where key: tile index and val is batched data of that tile - curr_batch = storage.get_batch_tiles(batch_i) - - # go over all indices of current batch of stored data - for tile_index, stored_data_batch in curr_batch.items(): - stored_data = stored_data_batch[name] - # get original data dict at current tile index and batch index - original_data = tile_dict[tile_index][batch_i][name] - if isinstance(original_data, Tensor): - if not eq_funct(original_data, stored_data): - return False - elif original_data != stored_data: - return False - - return True - - def test_prediction_object(self, get_datamodule: AnomalibDataModule) -> None: - """Test prediction storage class.""" - datamodule = get_datamodule - storage = EnsemblePredictions() - original = self.store_all(storage, datamodule) - - for name in original[0, 0][0]: - assert self.verify_equal(name, original, storage, torch.equal), f"{name} doesn't match" diff --git a/tests/unit/pipelines/tiled_ensemble/test_tiler.py b/tests/unit/pipelines/tiled_ensemble/test_tiler.py deleted file mode 100644 index 96b6c0e7bc..0000000000 --- a/tests/unit/pipelines/tiled_ensemble/test_tiler.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Tiling related tests for tiled ensemble.""" - -# Copyright (C) 2023-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import copy - -import pytest -import torch - -from anomalib.data import AnomalibDataModule -from anomalib.pipelines.tiled_ensemble.components.utils.helper_functions import get_ensemble_tiler - -tiler_config = { - "tiling": { - "tile_size": 256, - "stride": 256, - }, - "data": {"init_args": {"image_size": 512}}, -} - -tiler_config_overlap = { - "tiling": { - "tile_size": 256, - "stride": 128, - }, - "data": {"init_args": {"image_size": 512}}, -} - - -class TestTiler: - """EnsembleTiler tests.""" - - @staticmethod - @pytest.mark.parametrize( - ("input_shape", "config", "expected_shape"), - [ - (torch.Size([5, 3, 512, 512]), tiler_config, torch.Size([2, 2, 5, 3, 256, 256])), - (torch.Size([5, 3, 512, 512]), tiler_config_overlap, torch.Size([3, 3, 5, 3, 256, 256])), - (torch.Size([5, 3, 500, 500]), tiler_config, torch.Size([2, 2, 5, 3, 256, 256])), - (torch.Size([5, 3, 500, 500]), tiler_config_overlap, torch.Size([3, 3, 5, 3, 256, 256])), - ], - ) - def test_basic_tile_for_ensemble(input_shape: torch.Size, config: dict, expected_shape: torch.Size) -> None: - """Test basic tiling of data.""" - config = copy.deepcopy(config) - config["data"]["init_args"]["image_size"] = input_shape[-1] - tiler = get_ensemble_tiler(config["tiling"], config["data"]) - - images = torch.rand(size=input_shape) - tiled = tiler.tile(images) - - assert tiled.shape == expected_shape - - @staticmethod - @pytest.mark.parametrize( - ("input_shape", "config"), - [ - (torch.Size([5, 3, 512, 512]), tiler_config), - (torch.Size([5, 3, 512, 512]), tiler_config_overlap), - (torch.Size([5, 3, 500, 500]), tiler_config), - (torch.Size([5, 3, 500, 500]), tiler_config_overlap), - ], - ) - def test_basic_tile_reconstruction(input_shape: torch.Size, config: dict) -> None: - """Test basic reconstruction of tiled data.""" - config = copy.deepcopy(config) - config["data"]["init_args"]["image_size"] = input_shape[-1] - - tiler = get_ensemble_tiler(config["tiling"], config["data"]) - - images = torch.rand(size=input_shape) - tiled = tiler.tile(images.clone()) - untiled = tiler.untile(tiled) - - assert images.shape == untiled.shape - assert images.equal(untiled) - - @staticmethod - @pytest.mark.parametrize( - ("input_shape", "config"), - [ - (torch.Size([5, 3, 512, 512]), tiler_config), - (torch.Size([5, 3, 500, 500]), tiler_config), - ], - ) - def test_untile_different_instance(input_shape: torch.Size, config: dict) -> None: - """Test untiling with different Tiler instance.""" - config = copy.deepcopy(config) - config["data"]["init_args"]["image_size"] = input_shape[-1] - tiler_1 = get_ensemble_tiler(config["tiling"], config["data"]) - - tiler_2 = get_ensemble_tiler(config["tiling"], config["data"]) - - images = torch.rand(size=input_shape) - tiled = tiler_1.tile(images.clone()) - - untiled = tiler_2.untile(tiled) - - # untiling should work even with different instance of tiler - assert images.shape == untiled.shape - assert images.equal(untiled) - - -class TestTileCollater: - """Test tile collater.""" - - @staticmethod - def test_collate_tile_shape(get_ensemble_config: dict, get_datamodule: AnomalibDataModule) -> None: - """Test that collate function successfully tiles the image.""" - config = get_ensemble_config - # datamodule with tile collater - datamodule = get_datamodule - - tile_w, tile_h = config["tiling"]["tile_size"] - - batch = next(iter(datamodule.train_dataloader())) - assert batch["image"].shape[1:] == (3, tile_w, tile_h) - assert batch["mask"].shape[1:] == (tile_w, tile_h)