diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index decc2e82..0693b450 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -1,6 +1,7 @@ from importlib.metadata import version from spatialdata_io.readers.cosmx import cosmx +from spatialdata_io.readers.steinbock import steinbock from spatialdata_io.readers.visium import visium from spatialdata_io.readers.xenium import xenium @@ -8,6 +9,7 @@ "visium", "xenium", "cosmx", + "steinbock", ] __version__ = version("spatialdata-io") diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index 10682629..df51e07b 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -83,3 +83,18 @@ class VisiumKeys(ModeEnum): SPOTS_FILE = "spatial/tissue_positions.csv" SPOTS_X = "pxl_row_in_fullres" SPOTS_Y = "pxl_col_in_fullres" + + +@unique +class SteinbockKeys(ModeEnum): + """Keys for *Steinbock* formatted dataset.""" + + # files and directories + CELLS_FILE = "cells.h5ad" + DEEPCELL_MASKS_DIR = "masks_deepcell" + ILASTIK_MASKS_DIR = "masks_ilastik" + IMAGES_DIR = "ome" + + # suffixes for images and labels + IMAGE_SUFFIX = ".ome.tiff" + LABEL_SUFFIX = ".tiff" diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py new file mode 100644 index 00000000..469c5c06 --- /dev/null +++ b/src/spatialdata_io/readers/steinbock.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +import os +from collections.abc import Mapping +from pathlib import Path +from types import MappingProxyType +from typing import Any, Literal, Union + +import anndata as ad +from dask_image.imread import imread +from multiscale_spatial_image.multiscale_spatial_image import MultiscaleSpatialImage +from spatial_image import SpatialImage +from spatialdata import Image2DModel, Labels2DModel, SpatialData, TableModel +from spatialdata._logging import logger + +from spatialdata_io._constants._constants import SteinbockKeys + +__all__ = ["steinbock"] + + +def _get_images( + path: Path, + sample: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> Union[SpatialImage, MultiscaleSpatialImage]: + image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs) + return Image2DModel.parse(image, **image_models_kwargs) + + +def _get_labels( + path: Path, + sample: str, + labels_kind: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> Union[SpatialImage, MultiscaleSpatialImage]: + image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze() + return Labels2DModel.parse(image, **image_models_kwargs) + + +def steinbock( + path: str | Path, + labels_kind: Literal["deepcell", "ilastik"] = "deepcell", + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> SpatialData: + """ + Read a *Steinbock* output into a SpatialData object. + + .. seealso:: + + - `Steinbock pipeline `_. + + Parameters + ---------- + path + Path to the dataset. + labels_kind + Kind of labels to use. Either ``deepcell`` or ``ilastik``. + imread_kwargs + Keyword arguments to pass to the image reader. + image_models_kwargs + Keyword arguments to pass to the image models. + + Returns + ------- + :class:`spatialdata.SpatialData` + """ + path = Path(path) + + labels_kind = SteinbockKeys(f"masks_{labels_kind}") # type: ignore[assignment] + + samples = [i.replace(SteinbockKeys.IMAGE_SUFFIX, "") for i in os.listdir(path / SteinbockKeys.IMAGES_DIR)] + samples_labels = [i.replace(SteinbockKeys.LABEL_SUFFIX, "") for i in os.listdir(path / labels_kind)] + images = {} + labels = {} + if len(set(samples).difference(set(samples_labels))): + logger.warning( + f"Samples {set(samples).difference(set(samples_labels))} have images but no labels. " + "They will be ignored." + ) + for sample in samples: + images[sample] = _get_images( + path, + sample, + imread_kwargs, + image_models_kwargs, + ) + labels[sample] = _get_labels( + path, + sample, + labels_kind, + imread_kwargs, + image_models_kwargs, + ) + + adata = ad.read(path / SteinbockKeys.CELLS_FILE) + idx = adata.obs.index.str.split(" ").map(lambda x: x[1]) + regions = adata.obs.image.str.replace(".tiff", "", regex=False) + adata.obs["cell_id"] = idx + adata.obs["region"] = regions + if len(set(samples).difference(set(regions.unique()))): + raise ValueError("Samples in table and images are inconsistent, please check.") + table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id") + + return SpatialData(images=images, labels=labels, table=table)