Skip to content

add steinbock #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/spatialdata_io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from importlib.metadata import version

from spatialdata_io.readers.cosmx import cosmx
from spatialdata_io.readers.steinbock import steinbock
from spatialdata_io.readers.visium import visium
from spatialdata_io.readers.xenium import xenium

__all__ = [
"visium",
"xenium",
"cosmx",
"steinbock",
]

__version__ = version("spatialdata-io")
15 changes: 15 additions & 0 deletions src/spatialdata_io/_constants/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,18 @@ class VisiumKeys(ModeEnum):
SPOTS_FILE = "spatial/tissue_positions.csv"
SPOTS_X = "pxl_row_in_fullres"
SPOTS_Y = "pxl_col_in_fullres"


@unique
class SteinbockKeys(ModeEnum):
"""Keys for *Steinbock* formatted dataset."""

# files and directories
CELLS_FILE = "cells.h5ad"
DEEPCELL_MASKS_DIR = "masks_deepcell"
ILASTIK_MASKS_DIR = "masks_ilastik"
IMAGES_DIR = "ome"

# suffixes for images and labels
IMAGE_SUFFIX = ".ome.tiff"
LABEL_SUFFIX = ".tiff"
107 changes: 107 additions & 0 deletions src/spatialdata_io/readers/steinbock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from __future__ import annotations

import os
from collections.abc import Mapping
from pathlib import Path
from types import MappingProxyType
from typing import Any, Literal, Union

import anndata as ad
from dask_image.imread import imread
from multiscale_spatial_image.multiscale_spatial_image import MultiscaleSpatialImage
from spatial_image import SpatialImage
from spatialdata import Image2DModel, Labels2DModel, SpatialData, TableModel
from spatialdata._logging import logger

from spatialdata_io._constants._constants import SteinbockKeys

__all__ = ["steinbock"]


def _get_images(
path: Path,
sample: str,
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
) -> Union[SpatialImage, MultiscaleSpatialImage]:
image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs)
return Image2DModel.parse(image, **image_models_kwargs)


def _get_labels(
path: Path,
sample: str,
labels_kind: str,
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
) -> Union[SpatialImage, MultiscaleSpatialImage]:
image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze()
return Labels2DModel.parse(image, **image_models_kwargs)


def steinbock(
path: str | Path,
labels_kind: Literal["deepcell", "ilastik"] = "deepcell",
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
) -> SpatialData:
"""
Read a *Steinbock* output into a SpatialData object.

.. seealso::

- `Steinbock pipeline <https://bodenmillergroup.github.io/steinbock/latest/>`_.

Parameters
----------
path
Path to the dataset.
labels_kind
Kind of labels to use. Either ``deepcell`` or ``ilastik``.
imread_kwargs
Keyword arguments to pass to the image reader.
image_models_kwargs
Keyword arguments to pass to the image models.

Returns
-------
:class:`spatialdata.SpatialData`
"""
path = Path(path)

labels_kind = SteinbockKeys(f"masks_{labels_kind}") # type: ignore[assignment]

samples = [i.replace(SteinbockKeys.IMAGE_SUFFIX, "") for i in os.listdir(path / SteinbockKeys.IMAGES_DIR)]
samples_labels = [i.replace(SteinbockKeys.LABEL_SUFFIX, "") for i in os.listdir(path / labels_kind)]
images = {}
labels = {}
if len(set(samples).difference(set(samples_labels))):
logger.warning(
f"Samples {set(samples).difference(set(samples_labels))} have images but no labels. "
"They will be ignored."
)
for sample in samples:
images[sample] = _get_images(
path,
sample,
imread_kwargs,
image_models_kwargs,
)
labels[sample] = _get_labels(
path,
sample,
labels_kind,
imread_kwargs,
image_models_kwargs,
)

adata = ad.read(path / SteinbockKeys.CELLS_FILE)
idx = adata.obs.index.str.split(" ").map(lambda x: x[1])
regions = adata.obs.image.str.replace(".tiff", "", regex=False)
adata.obs["cell_id"] = idx
adata.obs["region"] = regions
if len(set(samples).difference(set(regions.unique()))):
raise ValueError("Samples in table and images are inconsistent, please check.")
table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id")

return SpatialData(images=images, labels=labels, table=table)