From a485be606d0426f62efe030fd54cc5b86b05f89e Mon Sep 17 00:00:00 2001 From: giovp Date: Sat, 14 Jan 2023 18:29:20 +0100 Subject: [PATCH 01/19] add steinbock --- src/spatialdata_io/_constants/_constants.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index 10682629..b62ef8e5 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -83,3 +83,18 @@ class VisiumKeys(ModeEnum): SPOTS_FILE = "spatial/tissue_positions.csv" SPOTS_X = "pxl_row_in_fullres" SPOTS_Y = "pxl_col_in_fullres" + + +@unique +class SteinbockKeys(ModeEnum): + """Keys for *Steinbock* formatted dataset.""" + + # files and directories + CELLS_FILE = "cells.h5ad" + DEEPCELL_MASKS_DIR = "masks_deepcell" + ILASTIK_MASKS_DIR = "masks_ilastik" + IMAGES_DIR = "ome" + + # suffixes for images and labels + IMAGE_SUFFIX = ".ome.tif" + LABEL_SUFFIX = ".tiff" From 382dec003880ac3be5f2c66d71c640dc79c8b417 Mon Sep 17 00:00:00 2001 From: giovp Date: Sat, 14 Jan 2023 18:30:57 +0100 Subject: [PATCH 02/19] add steinbock --- src/spatialdata_io/readers/steinbock.py | 108 ++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/spatialdata_io/readers/steinbock.py diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py new file mode 100644 index 00000000..71ef7b5d --- /dev/null +++ b/src/spatialdata_io/readers/steinbock.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import os +from collections.abc import Mapping +from pathlib import Path +from types import MappingProxyType +from typing import Any, Literal, Union + +import anndata as ad +from dask_image.imread import imread +from multiscale_spatial_image.multiscale_spatial_image import MultiscaleSpatialImage +from spatial_image import SpatialImage +from spatialdata import Image2DModel, Labels2DModel, SpatialData, TableModel +from spatialdata._logging import logger + +from spatialdata_io._constants._constants import SteinbockKeys + +__all__ = ["steinbock"] + + +def _get_images( + path: Path, + sample: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> Union[SpatialImage, MultiscaleSpatialImage]: + image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs) + return Image2DModel.parse(image, **image_models_kwargs) + + +def _get_labels( + path: Path, + sample: str, + labels_kind: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> Union[SpatialImage, MultiscaleSpatialImage]: + image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze() + return Labels2DModel.parse(image, **image_models_kwargs) + + +def steinbock( + path: str | Path, + labels_kind: Literal["deepcell", "ilastik"] = "deepcell", + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> SpatialData: + """ + Read a *Steinbock* output into a SpatialData object. + + .. seealso:: + + - `Steinbock pipeline `_. + + Parameters + ---------- + path + Path to the dataset. + labels_kind + Kind of labels to use. Either ``deepcell`` or ``ilastik``. + imread_kwargs + Keyword arguments to pass to the image reader. + image_models_kwargs + Keyword arguments to pass to the image models. + + Returns + ------- + :class:`spatialdata.SpatialData` + """ + path = Path(path) + + labels_kind = SteinbockKeys(f"masks_{labels_kind}") # type: ignore[assignment] + + samples = [i.replace(SteinbockKeys.IMAGE_SUFFIX, "") for i in os.listdir(path / SteinbockKeys.IMAGES_DIR)] + samples_labels = [i.replace(SteinbockKeys.LABEL_SUFFIX, "") for i in os.listdir(path / labels_kind)] + images = {} + labels = {} + if len(set(samples).difference(set(samples_labels))): + logger.warning( + f"Samples {set(samples).difference(set(samples_labels))} have images but no labels. " + "They will be ignored." + ) + for sample in samples: + images[sample] = _get_images( + path, + sample, + imread_kwargs, + image_models_kwargs, + ) + labels[sample] = _get_labels( + path, + sample, + labels_kind, + imread_kwargs, + image_models_kwargs, + ) + + adata = ad.read(path / SteinbockKeys.CELLS_FILE) + idx = adata.obs.index.str.split(" ").map(lambda x: x[1]) + regions = adata.obs.image.str.replace(".tiff", "", regex=False) + adata.obs["cell_id"] = idx + adata.obs["region"] = regions + if len(set(samples).difference(set(regions.unique()))): + raise ValueError("Samples in table and images are inconsistent, please check.") + + table = TableModel.parse(adata, region=regions.unique(), region_key="region", instance_key="cell_id") + + return SpatialData(images=images, labels=labels, table=table) From e26095236288f1acc10647ecd87fa46827f5125b Mon Sep 17 00:00:00 2001 From: giovp Date: Sat, 14 Jan 2023 18:40:57 +0100 Subject: [PATCH 03/19] updates --- src/spatialdata_io/__init__.py | 1 + src/spatialdata_io/_constants/_constants.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index decc2e82..ce126e54 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -1,6 +1,7 @@ from importlib.metadata import version from spatialdata_io.readers.cosmx import cosmx +from spatialdata_io.readers.steinbock import steinbock from spatialdata_io.readers.visium import visium from spatialdata_io.readers.xenium import xenium diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index b62ef8e5..df51e07b 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -96,5 +96,5 @@ class SteinbockKeys(ModeEnum): IMAGES_DIR = "ome" # suffixes for images and labels - IMAGE_SUFFIX = ".ome.tif" + IMAGE_SUFFIX = ".ome.tiff" LABEL_SUFFIX = ".tiff" From 5631648888f60a7160b08403de88aa77e4b3f3c3 Mon Sep 17 00:00:00 2001 From: giovp Date: Sat, 14 Jan 2023 18:55:45 +0100 Subject: [PATCH 04/19] fixes --- src/spatialdata_io/readers/steinbock.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py index 71ef7b5d..469c5c06 100644 --- a/src/spatialdata_io/readers/steinbock.py +++ b/src/spatialdata_io/readers/steinbock.py @@ -102,7 +102,6 @@ def steinbock( adata.obs["region"] = regions if len(set(samples).difference(set(regions.unique()))): raise ValueError("Samples in table and images are inconsistent, please check.") - - table = TableModel.parse(adata, region=regions.unique(), region_key="region", instance_key="cell_id") + table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id") return SpatialData(images=images, labels=labels, table=table) From 4cd565bbc0ce19ce835cd3016f363bb1edb8e330 Mon Sep 17 00:00:00 2001 From: giovp Date: Sun, 15 Jan 2023 11:25:41 +0100 Subject: [PATCH 05/19] update constants and init --- src/spatialdata_io/__init__.py | 7 ++----- src/spatialdata_io/_constants/_constants.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index ce126e54..cf4baf5f 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -1,14 +1,11 @@ from importlib.metadata import version from spatialdata_io.readers.cosmx import cosmx +from spatialdata_io.readers.mcmicro import mcmicro from spatialdata_io.readers.steinbock import steinbock from spatialdata_io.readers.visium import visium from spatialdata_io.readers.xenium import xenium -__all__ = [ - "visium", - "xenium", - "cosmx", -] +__all__ = ["visium", "xenium", "cosmx", "mcmicro"] __version__ = version("spatialdata-io") diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index df51e07b..05c731b3 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -98,3 +98,22 @@ class SteinbockKeys(ModeEnum): # suffixes for images and labels IMAGE_SUFFIX = ".ome.tiff" LABEL_SUFFIX = ".tiff" + + +@unique +class McmicroKeys(ModeEnum): + """Keys for *Mcmicro* formatted dataset.""" + + # files and directories + CELL_FEATURES_SUFFIX = "--unmicst_cell.csv" + QUANTIFICATION_DIR = "quantification" + MARKERS_FILE = "markers.csv" + IMAGES_DIR = "registration" + IMAGE_SUFFIX = ".ome.tif" + LABELS_DIR = "segmentation" + LABELS_PREFIX = "unmicst-" + + # metadata + COORDS_X = "X_centroid" + COORDS_Y = "Y_centroid" + INSTANCE_KEY = "CellID" From 6c60a02624da35e5f8aa4a9f2507ee90c44d953b Mon Sep 17 00:00:00 2001 From: giovp Date: Sun, 15 Jan 2023 11:26:05 +0100 Subject: [PATCH 06/19] fix init --- src/spatialdata_io/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index ce126e54..0693b450 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -9,6 +9,7 @@ "visium", "xenium", "cosmx", + "steinbock", ] __version__ = version("spatialdata-io") From 9f16a34a2bd21e6400d31ab0796cd90cc08ab842 Mon Sep 17 00:00:00 2001 From: giovp Date: Mon, 27 Feb 2023 11:16:41 +0100 Subject: [PATCH 07/19] update visium and xenium --- pyproject.toml | 1 + src/spatialdata_io/readers/visium.py | 20 +++--- src/spatialdata_io/readers/xenium.py | 94 ++++++++++------------------ 3 files changed, 42 insertions(+), 73 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6aa35ef7..aad9280f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "joblib", "imagecodecs", "dask-image", + "pyarrow", ] [project.optional-dependencies] diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 1d891005..6939a7df 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -96,19 +96,19 @@ def visium( adata.obs = pd.merge(adata.obs, coords, how="left", left_index=True, right_index=True) coords = adata.obs[[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y]].values adata.obs.drop(columns=[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y], inplace=True) - adata.obs["visium_spot_id"] = adata.obs_names + adata.obs["barcode"] = adata.obs.index.copy() + adata.var_names_make_unique() scalefactors = json.loads((path / VisiumKeys.SCALEFACTORS_FILE).read_bytes()) shapes = {} circles = ShapesModel.parse( coords, - shape_type="Circle", - shape_size=scalefactors["spot_diameter_fullres"], - index=adata.obs_names, + geometry=0, + radius=np.repeat(scalefactors["spot_diameter_fullres"], len(adata)), transformations={"global": Identity()}, ) shapes[dataset_id] = circles - table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="visium_spot_id") + table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="barcode") transform_original = Identity() transform_lowres = Scale( @@ -133,16 +133,12 @@ def visium( full_image_parsed = Image2DModel.parse( full_image, - multiscale_factors=[2, 2, 2, 2], + scale_factors=[2, 2, 2, 2], transformations={"global": transform_original}, **image_models_kwargs, ) - image_hires_parsed = Image2DModel.parse( - image_hires, transformations={"downscaled": transform_hires} - ) - image_lowres_parsed = Image2DModel.parse( - image_lowres, transformations={"downscaled": transform_lowres} - ) + image_hires_parsed = Image2DModel.parse(image_hires, transformations={"downscaled": transform_hires}) + image_lowres_parsed = Image2DModel.parse(image_lowres, transformations={"downscaled": transform_lowres}) images = { dataset_id + "_full_image": full_image_parsed, diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index ec919bdb..e705c3f9 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -10,23 +10,15 @@ import pandas as pd import pyarrow.parquet as pq from anndata import AnnData -from dask_image.imread import imread -from dask.dataframe.core import DataFrame as DaskDataFrame from dask.dataframe import read_parquet +from dask_image.imread import imread from geopandas import GeoDataFrame from joblib import Parallel, delayed from multiscale_spatial_image.multiscale_spatial_image import MultiscaleSpatialImage from pyarrow import Table from shapely import Polygon from spatial_image import SpatialImage -from spatialdata import ( - Image2DModel, - PointsModel, - PolygonsModel, - ShapesModel, - SpatialData, - TableModel, -) +from spatialdata import Image2DModel, PointsModel, ShapesModel, SpatialData, TableModel from spatialdata._core.transformations import Identity, Scale from spatialdata._types import ArrayLike @@ -40,7 +32,6 @@ @inject_docs(xx=XeniumKeys) def xenium( path: str | Path, - # dataset_id: str, n_jobs: int = 1, nucleus_boundaries: bool = True, cell_boundaries: bool = True, @@ -98,7 +89,12 @@ def xenium( image_models_kwargs = {} assert isinstance(image_models_kwargs, dict) image_models_kwargs["chunks"] = (1, 4096, 4096) - image_models_kwargs["multiscale_factors"] = [2, 2, 2, 2] + if "scale_factors" not in image_models_kwargs: + if isinstance(image_models_kwargs, MappingProxyType): + image_models_kwargs = {} + assert isinstance(image_models_kwargs, dict) + image_models_kwargs["scale_factors"] = [2, 2, 2, 2] + path = Path(path) with open(path / XeniumKeys.XENIUM_SPECS) as f: specs = json.load(f) @@ -122,28 +118,27 @@ def xenium( if transcripts: points["transcripts"] = _get_points(path, specs) - images = {} - if morphology_mip: - images["morphology_mip"] = _get_images( - path, - XeniumKeys.MORPHOLOGY_MIP_FILE, - specs, - imread_kwargs, - image_models_kwargs, - ) - if morphology_focus: - images["morphology_focus"] = _get_images( - path, - XeniumKeys.MORPHOLOGY_MIP_FILE, - specs, - imread_kwargs, - image_models_kwargs, - ) - - circles = {} - table, circles["circles"] = _get_tables(path, specs) - - return SpatialData(images=images, polygons=polygons, points=points, shapes=circles, table=table) + # images = {} + # if morphology_mip: + # images["morphology_mip"] = _get_images( + # path, + # XeniumKeys.MORPHOLOGY_MIP_FILE, + # specs, + # imread_kwargs, + # image_models_kwargs, + # ) + # if morphology_focus: + # images["morphology_focus"] = _get_images( + # path, + # XeniumKeys.MORPHOLOGY_MIP_FILE, + # specs, + # imread_kwargs, + # image_models_kwargs, + # ) + + table = _get_tables(path, specs) + # return SpatialData(images=images, shapes=polygons, points=points, table=table) + return SpatialData(shapes=polygons, points=points, table=table) def _get_polygons(path: Path, file: str, specs: dict[str, Any], n_jobs: int) -> GeoDataFrame: @@ -159,27 +154,16 @@ def _poly(arr: ArrayLike) -> Polygon: ) geo_df = GeoDataFrame({"geometry": out}) scale = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) - return PolygonsModel.parse(geo_df, transformations={"global": scale}) + return ShapesModel.parse(geo_df, transformations={"global": scale}) def _get_points(path: Path, specs: dict[str, Any]) -> Table: table = read_parquet(path / XeniumKeys.TRANSCRIPTS_FILE) - # table = pq.read_table(path / XeniumKeys.TRANSCRIPTS_FILE) - # arr = ( - # table.select([XeniumKeys.TRANSCRIPTS_X, XeniumKeys.TRANSCRIPTS_Y, XeniumKeys.TRANSCRIPTS_Z]) - # .to_pandas() - # .to_numpy() - # ) - # annotations = table.select((XeniumKeys.OVERLAPS_NUCLEUS, XeniumKeys.QUALITY_VALUE, XeniumKeys.CELL_ID)) - # annotations = annotations.add_column( - # 3, XeniumKeys.FEATURE_NAME, table.column(XeniumKeys.FEATURE_NAME).cast("string").dictionary_encode() - # ) transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) - # points = PointsModel.parse(coords=arr, annotations=annotations, transformations={"global": transform}) points = PointsModel.parse( table, - coordinates={"x": XeniumKeys.TRANSCRIPTS_X, "y": XeniumKeys.TRANSCRIPTS_Y}, + coordinates={"x": XeniumKeys.TRANSCRIPTS_X, "y": XeniumKeys.TRANSCRIPTS_Y, "z": XeniumKeys.TRANSCRIPTS_Y}, feature_key=XeniumKeys.FEATURE_NAME, instance_key=XeniumKeys.CELL_ID, transformations={"global": transform}, @@ -187,26 +171,14 @@ def _get_points(path: Path, specs: dict[str, Any]) -> Table: return points -def _get_tables(path: Path, specs: dict[str, Any]) -> tuple[AnnData, AnnData]: +def _get_tables(path: Path, specs: dict[str, Any]) -> AnnData: adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE) metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE) np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values) - - circ = metadata[[XeniumKeys.CELL_X, XeniumKeys.CELL_Y]].to_numpy() - metadata.drop([XeniumKeys.CELL_X, XeniumKeys.CELL_Y], axis=1, inplace=True) metadata[XeniumKeys.CELL_ID] = metadata[XeniumKeys.CELL_ID].astype(str) adata.obs = metadata - transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) - diameters = 2 * np.sqrt(adata.obs[XeniumKeys.CELL_AREA].to_numpy() / np.pi) / specs["pixel_size"] - circles = ShapesModel.parse( - circ, - shape_type="Circle", - shape_size=diameters, - transformations={"global": transform}, - index=adata.obs[XeniumKeys.CELL_ID], - ) table = TableModel.parse(adata, region="/shapes/circles", instance_key=str(XeniumKeys.CELL_ID)) - return table, circles + return table def _get_images( From 3447c9ed4c4134bdd63e422dce8c1b1ba33837e6 Mon Sep 17 00:00:00 2001 From: giovp Date: Mon, 27 Feb 2023 11:25:35 +0100 Subject: [PATCH 08/19] readd images in xenium --- src/spatialdata_io/readers/xenium.py | 37 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index e705c3f9..f63e0fa3 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -118,27 +118,26 @@ def xenium( if transcripts: points["transcripts"] = _get_points(path, specs) - # images = {} - # if morphology_mip: - # images["morphology_mip"] = _get_images( - # path, - # XeniumKeys.MORPHOLOGY_MIP_FILE, - # specs, - # imread_kwargs, - # image_models_kwargs, - # ) - # if morphology_focus: - # images["morphology_focus"] = _get_images( - # path, - # XeniumKeys.MORPHOLOGY_MIP_FILE, - # specs, - # imread_kwargs, - # image_models_kwargs, - # ) + images = {} + if morphology_mip: + images["morphology_mip"] = _get_images( + path, + XeniumKeys.MORPHOLOGY_MIP_FILE, + specs, + imread_kwargs, + image_models_kwargs, + ) + if morphology_focus: + images["morphology_focus"] = _get_images( + path, + XeniumKeys.MORPHOLOGY_MIP_FILE, + specs, + imread_kwargs, + image_models_kwargs, + ) table = _get_tables(path, specs) - # return SpatialData(images=images, shapes=polygons, points=points, table=table) - return SpatialData(shapes=polygons, points=points, table=table) + return SpatialData(images=images, shapes=polygons, points=points, table=table) def _get_polygons(path: Path, file: str, specs: dict[str, Any], n_jobs: int) -> GeoDataFrame: From da512d68575b7a5135faeae98b4cd1d41330bd5e Mon Sep 17 00:00:00 2001 From: giovp Date: Mon, 27 Feb 2023 13:39:13 +0100 Subject: [PATCH 09/19] fix steinbock --- src/spatialdata_io/readers/steinbock.py | 42 ++++++++++++------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py index 469c5c06..5a2e63be 100644 --- a/src/spatialdata_io/readers/steinbock.py +++ b/src/spatialdata_io/readers/steinbock.py @@ -18,27 +18,6 @@ __all__ = ["steinbock"] -def _get_images( - path: Path, - sample: str, - imread_kwargs: Mapping[str, Any] = MappingProxyType({}), - image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), -) -> Union[SpatialImage, MultiscaleSpatialImage]: - image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs) - return Image2DModel.parse(image, **image_models_kwargs) - - -def _get_labels( - path: Path, - sample: str, - labels_kind: str, - imread_kwargs: Mapping[str, Any] = MappingProxyType({}), - image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), -) -> Union[SpatialImage, MultiscaleSpatialImage]: - image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze() - return Labels2DModel.parse(image, **image_models_kwargs) - - def steinbock( path: str | Path, labels_kind: Literal["deepcell", "ilastik"] = "deepcell", @@ -105,3 +84,24 @@ def steinbock( table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id") return SpatialData(images=images, labels=labels, table=table) + + +def _get_images( + path: Path, + sample: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> Union[SpatialImage, MultiscaleSpatialImage]: + image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs) + return Image2DModel.parse(image, **image_models_kwargs) + + +def _get_labels( + path: Path, + sample: str, + labels_kind: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> Union[SpatialImage, MultiscaleSpatialImage]: + image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze() + return Labels2DModel.parse(image, **image_models_kwargs) From aac84f0158c135119896ff2a6e107ba27b79bce1 Mon Sep 17 00:00:00 2001 From: giovp Date: Mon, 27 Feb 2023 13:51:57 +0100 Subject: [PATCH 10/19] add mcmicro --- src/spatialdata_io/readers/mcmicro.py | 96 +++++++++++++-------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py index 9015501c..a8f513bc 100644 --- a/src/spatialdata_io/readers/mcmicro.py +++ b/src/spatialdata_io/readers/mcmicro.py @@ -19,54 +19,6 @@ __all__ = ["mcmicro"] -def _get_images( - path: Path, - sample: str, - imread_kwargs: Mapping[str, Any] = MappingProxyType({}), - image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), -) -> Union[SpatialImage, MultiscaleSpatialImage]: - image = imread(path / McmicroKeys.IMAGES_DIR / f"{sample}{McmicroKeys.IMAGE_SUFFIX}", **imread_kwargs) - return Image2DModel.parse(image, **image_models_kwargs) - - -def _get_labels( - path: Path, - sample: str, - labels_kind: str, - imread_kwargs: Mapping[str, Any] = MappingProxyType({}), - image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), -) -> Union[SpatialImage, MultiscaleSpatialImage]: - image = imread( - path - / McmicroKeys.LABELS_DIR - / f"{McmicroKeys.LABELS_PREFIX}{sample}" - / f"{labels_kind}{McmicroKeys.IMAGE_SUFFIX}", - **imread_kwargs, - ).squeeze() - return Labels2DModel.parse(image, **image_models_kwargs) - - -def _get_table( - path: Path, - sample: str, -) -> AnnData: - - table = pd.read_csv(path / McmicroKeys.QUANTIFICATION_DIR / f"{sample}{McmicroKeys.CELL_FEATURES_SUFFIX}") - markers = pd.read_csv(path / McmicroKeys.MARKERS_FILE) - markers.index = markers.marker_name - var = markers.marker_name.tolist() - coords = [McmicroKeys.COORDS_X, McmicroKeys.COORDS_Y] - adata = AnnData( - table[var].to_numpy(), - obs=table.drop(columns=var + coords), - var=markers, - obsm={"spatial": table[coords].to_numpy()}, - dtype=np.float_, - ) - - return TableModel.parse(adata, region=sample, instance_key=McmicroKeys.INSTANCE_KEY) - - def mcmicro( path: str | Path, dataset_id: str, @@ -129,3 +81,51 @@ def mcmicro( table = _get_table(path, dataset_id) return SpatialData(images=images, labels=labels, table=table) + + +def _get_images( + path: Path, + sample: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> Union[SpatialImage, MultiscaleSpatialImage]: + image = imread(path / McmicroKeys.IMAGES_DIR / f"{sample}{McmicroKeys.IMAGE_SUFFIX}", **imread_kwargs) + return Image2DModel.parse(image, **image_models_kwargs) + + +def _get_labels( + path: Path, + sample: str, + labels_kind: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> Union[SpatialImage, MultiscaleSpatialImage]: + image = imread( + path + / McmicroKeys.LABELS_DIR + / f"{McmicroKeys.LABELS_PREFIX}{sample}" + / f"{labels_kind}{McmicroKeys.IMAGE_SUFFIX}", + **imread_kwargs, + ).squeeze() + return Labels2DModel.parse(image, **image_models_kwargs) + + +def _get_table( + path: Path, + sample: str, +) -> AnnData: + + table = pd.read_csv(path / McmicroKeys.QUANTIFICATION_DIR / f"{sample}{McmicroKeys.CELL_FEATURES_SUFFIX}") + markers = pd.read_csv(path / McmicroKeys.MARKERS_FILE) + markers.index = markers.marker_name + var = markers.marker_name.tolist() + coords = [McmicroKeys.COORDS_X.value, McmicroKeys.COORDS_Y.value] + adata = AnnData( + table[var].to_numpy(), + obs=table.drop(columns=var + coords), + var=markers, + obsm={"spatial": table[coords].to_numpy()}, + dtype=np.float_, + ) + + return TableModel.parse(adata, region=sample, instance_key=McmicroKeys.INSTANCE_KEY.value) From e761e85bea00aa28b9ecf5288c536dedf1197d79 Mon Sep 17 00:00:00 2001 From: giovp Date: Mon, 27 Feb 2023 14:01:59 +0100 Subject: [PATCH 11/19] pre-commit fixes --- .pre-commit-config.yaml | 8 +++---- src/spatialdata_io/_constants/_constants.py | 2 +- src/spatialdata_io/readers/cosmx.py | 25 +++++++++++++-------- src/spatialdata_io/readers/mcmicro.py | 1 - 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c1c46add..0548fbff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_stages: minimum_pre_commit_version: 2.16.0 repos: - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 23.1.0 hooks: - id: black - repo: https://github.com/pre-commit/mirrors-prettier @@ -19,11 +19,11 @@ repos: hooks: - id: blacken-docs - repo: https://github.com/PyCQA/isort - rev: 5.11.4 + rev: 5.12.0 hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.991 + rev: v1.0.1 hooks: - id: mypy additional_dependencies: [numpy==1.24.0] @@ -50,7 +50,7 @@ repos: - id: trailing-whitespace - id: check-case-conflict - repo: https://github.com/PyCQA/autoflake - rev: v2.0.0 + rev: v2.0.1 hooks: - id: autoflake args: diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index f3d36041..c213b315 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -57,7 +57,7 @@ class XeniumKeys(ModeEnum): CELL_METADATA_FILE = "cells.parquet" CELL_X = "x_centroid" CELL_Y = "y_centroid" - CELL_AREA = 'cell_area' + CELL_AREA = "cell_area" # morphology iamges MORPHOLOGY_MIP_FILE = "morphology_mip.ome.tif" diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 7083631f..5d06cf14 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -7,21 +7,26 @@ from pathlib import Path from types import MappingProxyType from typing import Any, Optional -import pyarrow as pa -import pyarrow.parquet as pq +import dask.array as da import numpy as np import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq from anndata import AnnData -from dask_image.imread import imread -import dask.array as da from dask.dataframe.core import DataFrame as DaskDataFrame +from dask_image.imread import imread from scipy.sparse import csr_matrix # from spatialdata._core.core_utils import xy_cs from skimage.transform import estimate_transform from spatialdata import SpatialData -from spatialdata._core.models import Image2DModel, Labels2DModel, TableModel, PointsModel +from spatialdata._core.models import ( + Image2DModel, + Labels2DModel, + PointsModel, + TableModel, +) # from spatialdata._core.ngff.ngff_coordinate_system import NgffAxis # , CoordinateSystem from spatialdata._core.transformations import Affine, Identity @@ -163,7 +168,10 @@ def cosmx( table.obsm["global"] = table.obs[[CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL]].to_numpy() table.obsm["spatial"] = table.obs[[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL]].to_numpy() - table.obs.drop(columns=[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL, CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL], inplace=True) + table.obs.drop( + columns=[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL, CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL], + inplace=True, + ) # prepare to read images and labels file_extensions = (".jpg", ".png", ".jpeg", ".tif", ".tiff") @@ -251,9 +259,9 @@ def cosmx( for fov in fovs_counts: aff = affine_transforms_to_global[fov] sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas() - sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype('category') + sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category") # we rename z because we want to treat the data as 2d - sub_table.rename(columns={'z': 'z_raw'}, inplace=True) + sub_table.rename(columns={"z": "z_raw"}, inplace=True) points[fov] = PointsModel.parse( sub_table, coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, @@ -266,7 +274,6 @@ def cosmx( }, ) - # TODO: what to do with fov file? # if fov_file is not None: # fov_positions = pd.read_csv(path / fov_file, header=0, index_col=CosmxKeys.FOV) diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py index a8f513bc..8a04a274 100644 --- a/src/spatialdata_io/readers/mcmicro.py +++ b/src/spatialdata_io/readers/mcmicro.py @@ -114,7 +114,6 @@ def _get_table( path: Path, sample: str, ) -> AnnData: - table = pd.read_csv(path / McmicroKeys.QUANTIFICATION_DIR / f"{sample}{McmicroKeys.CELL_FEATURES_SUFFIX}") markers = pd.read_csv(path / McmicroKeys.MARKERS_FILE) markers.index = markers.marker_name From 2615da13144a4ef839f2c8f22b67c0402dd6b0e9 Mon Sep 17 00:00:00 2001 From: giovp Date: Mon, 27 Feb 2023 15:47:50 +0100 Subject: [PATCH 12/19] minor fixes cosmx --- src/spatialdata_io/readers/cosmx.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 5d06cf14..d3e21f67 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -146,12 +146,6 @@ def cosmx( fovs_counts = list(map(str, adata.obs.fov.astype(int).unique())) - # TODO(giovp): uncomment once transform is ready - # input_cs = CoordinateSystem("cxy", axes=[c_axis, y_axis, x_axis]) - # input_cs_labels = CoordinateSystem("cxy", axes=[y_axis, x_axis]) - # output_cs = CoordinateSystem("global", axes=[c_axis, y_axis, x_axis]) - # output_cs_labels = CoordinateSystem("global", axes=[y_axis, x_axis]) - affine_transforms_to_global = {} for fov in fovs_counts: @@ -208,7 +202,6 @@ def cosmx( flipped_im = da.flip(im, axis=0) parsed_im = Image2DModel.parse( flipped_im, - name=fov, transformations={ fov: Identity(), "global": aff, @@ -232,7 +225,6 @@ def cosmx( flipped_la = da.flip(la, axis=0) parsed_la = Labels2DModel.parse( flipped_la, - name=fov, transformations={ fov: Identity(), "global": aff, @@ -249,11 +241,11 @@ def cosmx( if transcripts: # let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting with tempfile.TemporaryDirectory() as tmpdir: - print("converting .csv to .parquet... ", end="") + logger.info("converting .csv to .parquet... ", end="") assert transcripts_file is not None transcripts_data = pd.read_csv(path / transcripts_file, header=0) transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet") - print("done") + logger.info("done") ptable = pq.read_table(Path(tmpdir) / "transcripts.parquet") for fov in fovs_counts: From cfd12f23e6803d243378b2fb75809c6fd52d0aaa Mon Sep 17 00:00:00 2001 From: giovp Date: Mon, 27 Feb 2023 15:56:35 +0100 Subject: [PATCH 13/19] remove temp dir --- src/spatialdata_io/readers/cosmx.py | 52 ++++++++++++----------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index d3e21f67..ee7aaf6b 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -2,7 +2,6 @@ import os import re -import tempfile from collections.abc import Mapping from pathlib import Path from types import MappingProxyType @@ -12,7 +11,6 @@ import numpy as np import pandas as pd import pyarrow as pa -import pyarrow.parquet as pq from anndata import AnnData from dask.dataframe.core import DataFrame as DaskDataFrame from dask_image.imread import imread @@ -46,7 +44,6 @@ def cosmx( path: str | Path, dataset_id: Optional[str] = None, - # shape_size: float | int = 1, transcripts: bool = True, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), @@ -72,8 +69,8 @@ def cosmx( Path to the root directory containing *Nanostring* files. dataset_id Name of the dataset. - shape_size - Size of the shape to be used for the centroids of the labels. + transcripts + Whether to also read in transcripts information. imread_kwargs Keyword arguments passed to :func:`dask_image.imread.imread`. image_models_kwargs @@ -239,32 +236,25 @@ def cosmx( points: dict[str, DaskDataFrame] = {} if transcripts: - # let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting - with tempfile.TemporaryDirectory() as tmpdir: - logger.info("converting .csv to .parquet... ", end="") - assert transcripts_file is not None - transcripts_data = pd.read_csv(path / transcripts_file, header=0) - transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet") - logger.info("done") - - ptable = pq.read_table(Path(tmpdir) / "transcripts.parquet") - for fov in fovs_counts: - aff = affine_transforms_to_global[fov] - sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas() - sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category") - # we rename z because we want to treat the data as 2d - sub_table.rename(columns={"z": "z_raw"}, inplace=True) - points[fov] = PointsModel.parse( - sub_table, - coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, - feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT, - instance_key=CosmxKeys.INSTANCE_KEY, - transformations={ - fov: Identity(), - "global": aff, - "global_only_labels": aff, - }, - ) + assert transcripts_file is not None + ptable = pa.read_table(path / transcripts_file, header=0) + for fov in fovs_counts: + aff = affine_transforms_to_global[fov] + sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas() + sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category") + # we rename z because we want to treat the data as 2d + sub_table.rename(columns={"z": "z_raw"}, inplace=True) + points[fov] = PointsModel.parse( + sub_table, + coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, + feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT, + instance_key=CosmxKeys.INSTANCE_KEY, + transformations={ + fov: Identity(), + "global": aff, + "global_only_labels": aff, + }, + ) # TODO: what to do with fov file? # if fov_file is not None: From 31b1d2a4f4ea530009d808a0a633798485ee820c Mon Sep 17 00:00:00 2001 From: giovp Date: Mon, 27 Feb 2023 21:48:51 +0100 Subject: [PATCH 14/19] io fixes --- src/spatialdata_io/readers/cosmx.py | 4 +++- src/spatialdata_io/readers/visium.py | 5 +++-- src/spatialdata_io/readers/xenium.py | 31 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index ee7aaf6b..c3b09015 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -237,7 +237,9 @@ def cosmx( points: dict[str, DaskDataFrame] = {} if transcripts: assert transcripts_file is not None - ptable = pa.read_table(path / transcripts_file, header=0) + from pyarrow.csv import read_csv + + ptable = read_csv(path / transcripts_file) # , header=0) for fov in fovs_counts: aff = affine_transforms_to_global[fov] sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas() diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 6939a7df..270c89e0 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -96,7 +96,7 @@ def visium( adata.obs = pd.merge(adata.obs, coords, how="left", left_index=True, right_index=True) coords = adata.obs[[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y]].values adata.obs.drop(columns=[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y], inplace=True) - adata.obs["barcode"] = adata.obs.index.copy() + adata.obs["spot_id"] = np.arange(len(adata)) adata.var_names_make_unique() scalefactors = json.loads((path / VisiumKeys.SCALEFACTORS_FILE).read_bytes()) @@ -105,10 +105,11 @@ def visium( coords, geometry=0, radius=np.repeat(scalefactors["spot_diameter_fullres"], len(adata)), + index=adata.obs["spot_id"].copy(), transformations={"global": Identity()}, ) shapes[dataset_id] = circles - table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="barcode") + table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="spot_id") transform_original = Identity() transform_lowres = Scale( diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index f63e0fa3..d9fdd4b0 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -4,7 +4,7 @@ from collections.abc import Mapping from pathlib import Path from types import MappingProxyType -from typing import Any +from typing import Any, Optional import numpy as np import pandas as pd @@ -34,7 +34,6 @@ def xenium( path: str | Path, n_jobs: int = 1, nucleus_boundaries: bool = True, - cell_boundaries: bool = True, transcripts: bool = True, morphology_mip: bool = True, morphology_focus: bool = True, @@ -67,8 +66,6 @@ def xenium( Number of jobs to use for parallel processing. nucleus_boundaries Whether to read nucleus boundaries. - cell_boundaries - Whether to read cell boundaries. transcripts Whether to read transcripts. morphology_mip @@ -99,7 +96,10 @@ def xenium( with open(path / XeniumKeys.XENIUM_SPECS) as f: specs = json.load(f) + specs["region"] = "cell_boundaries" + table = _get_tables(path, specs) polygons = {} + if nucleus_boundaries: polygons["nucleus_boundaries"] = _get_polygons( path, @@ -107,13 +107,11 @@ def xenium( specs, n_jobs, ) - if cell_boundaries: - polygons["cell_boundaries"] = _get_polygons( - path, - XeniumKeys.CELL_BOUNDARIES_FILE, - specs, - n_jobs, - ) + + polygons["cell_boundaries"] = _get_polygons( + path, XeniumKeys.CELL_BOUNDARIES_FILE, specs, n_jobs, idx=table.obs[str(XeniumKeys.CELL_ID)].copy() + ) + points = {} if transcripts: points["transcripts"] = _get_points(path, specs) @@ -136,11 +134,12 @@ def xenium( image_models_kwargs, ) - table = _get_tables(path, specs) return SpatialData(images=images, shapes=polygons, points=points, table=table) -def _get_polygons(path: Path, file: str, specs: dict[str, Any], n_jobs: int) -> GeoDataFrame: +def _get_polygons( + path: Path, file: str, specs: dict[str, Any], n_jobs: int, idx: Optional[ArrayLike] = None +) -> GeoDataFrame: def _poly(arr: ArrayLike) -> Polygon: return Polygon(arr[:-1]) @@ -152,6 +151,8 @@ def _poly(arr: ArrayLike) -> Polygon: for _, i in df.groupby(XeniumKeys.CELL_ID)[[XeniumKeys.BOUNDARIES_VERTEX_X, XeniumKeys.BOUNDARIES_VERTEX_Y]] ) geo_df = GeoDataFrame({"geometry": out}) + if idx is not None: + geo_df.index = idx scale = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) return ShapesModel.parse(geo_df, transformations={"global": scale}) @@ -174,9 +175,9 @@ def _get_tables(path: Path, specs: dict[str, Any]) -> AnnData: adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE) metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE) np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values) - metadata[XeniumKeys.CELL_ID] = metadata[XeniumKeys.CELL_ID].astype(str) + metadata[XeniumKeys.CELL_ID] = np.arange(len(metadata[XeniumKeys.CELL_ID])) adata.obs = metadata - table = TableModel.parse(adata, region="/shapes/circles", instance_key=str(XeniumKeys.CELL_ID)) + table = TableModel.parse(adata, region=specs["region"], instance_key=str(XeniumKeys.CELL_ID)) return table From 40ca0b8b4aab1137b1357d1bad9e8d7203d690ca Mon Sep 17 00:00:00 2001 From: Luca Marconato <2664412+LucaMarconato@users.noreply.github.com> Date: Tue, 28 Feb 2023 16:52:49 +0100 Subject: [PATCH 15/19] tested all technologies with napari and added fixes --- src/spatialdata_io/_constants/_constants.py | 1 + src/spatialdata_io/readers/cosmx.py | 70 ++++++++++++++------- src/spatialdata_io/readers/mcmicro.py | 2 +- src/spatialdata_io/readers/steinbock.py | 10 +-- src/spatialdata_io/readers/visium.py | 4 +- src/spatialdata_io/readers/xenium.py | 23 +++++-- 6 files changed, 76 insertions(+), 34 deletions(-) diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index c213b315..aee085d5 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -58,6 +58,7 @@ class XeniumKeys(ModeEnum): CELL_X = "x_centroid" CELL_Y = "y_centroid" CELL_AREA = "cell_area" + CELL_NUCLEUS_AREA = "nucleus_area" # morphology iamges MORPHOLOGY_MIP_FILE = "morphology_mip.ome.tif" diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index c3b09015..22c7cfd3 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -236,27 +236,55 @@ def cosmx( points: dict[str, DaskDataFrame] = {} if transcripts: - assert transcripts_file is not None - from pyarrow.csv import read_csv - - ptable = read_csv(path / transcripts_file) # , header=0) - for fov in fovs_counts: - aff = affine_transforms_to_global[fov] - sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas() - sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category") - # we rename z because we want to treat the data as 2d - sub_table.rename(columns={"z": "z_raw"}, inplace=True) - points[fov] = PointsModel.parse( - sub_table, - coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, - feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT, - instance_key=CosmxKeys.INSTANCE_KEY, - transformations={ - fov: Identity(), - "global": aff, - "global_only_labels": aff, - }, - ) + # assert transcripts_file is not None + # from pyarrow.csv import read_csv + # + # ptable = read_csv(path / transcripts_file) # , header=0) + # for fov in fovs_counts: + # aff = affine_transforms_to_global[fov] + # sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas() + # sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category") + # # we rename z because we want to treat the data as 2d + # sub_table.rename(columns={"z": "z_raw"}, inplace=True) + # points[fov] = PointsModel.parse( + # sub_table, + # coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, + # feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT, + # instance_key=CosmxKeys.INSTANCE_KEY, + # transformations={ + # fov: Identity(), + # "global": aff, + # "global_only_labels": aff, + # }, + # ) + # let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting + import tempfile + import pyarrow.parquet as pq + with tempfile.TemporaryDirectory() as tmpdir: + print("converting .csv to .parquet to improve the speed of the slicing operations... ", end="") + assert transcripts_file is not None + transcripts_data = pd.read_csv(path / transcripts_file, header=0) + transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet") + print("done") + + ptable = pq.read_table(Path(tmpdir) / "transcripts.parquet") + for fov in fovs_counts: + aff = affine_transforms_to_global[fov] + sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas() + sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype('category') + # we rename z because we want to treat the data as 2d + sub_table.rename(columns={'z': 'z_raw'}, inplace=True) + points[fov] = PointsModel.parse( + sub_table, + coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, + feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT, + instance_key=CosmxKeys.INSTANCE_KEY, + transformations={ + fov: Identity(), + "global": aff, + "global_only_labels": aff, + }, + ) # TODO: what to do with fov file? # if fov_file is not None: diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py index 8a04a274..aa77661e 100644 --- a/src/spatialdata_io/readers/mcmicro.py +++ b/src/spatialdata_io/readers/mcmicro.py @@ -127,4 +127,4 @@ def _get_table( dtype=np.float_, ) - return TableModel.parse(adata, region=sample, instance_key=McmicroKeys.INSTANCE_KEY.value) + return TableModel.parse(adata, region=f'/labels/{sample}', instance_key=McmicroKeys.INSTANCE_KEY.value) diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py index 5a2e63be..8858e578 100644 --- a/src/spatialdata_io/readers/steinbock.py +++ b/src/spatialdata_io/readers/steinbock.py @@ -11,6 +11,7 @@ from multiscale_spatial_image.multiscale_spatial_image import MultiscaleSpatialImage from spatial_image import SpatialImage from spatialdata import Image2DModel, Labels2DModel, SpatialData, TableModel +from spatialdata._core.transformations import Identity from spatialdata._logging import logger from spatialdata_io._constants._constants import SteinbockKeys @@ -75,11 +76,12 @@ def steinbock( ) adata = ad.read(path / SteinbockKeys.CELLS_FILE) - idx = adata.obs.index.str.split(" ").map(lambda x: x[1]) + idx = adata.obs.index.str.split(" ").map(lambda x: int(x[1])) regions = adata.obs.image.str.replace(".tiff", "", regex=False) + regions = regions.apply(lambda x: f'/labels/{x}') adata.obs["cell_id"] = idx adata.obs["region"] = regions - if len(set(samples).difference(set(regions.unique()))): + if len(set([f'/labels/{s}' for s in samples]).difference(set(regions.unique()))): raise ValueError("Samples in table and images are inconsistent, please check.") table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id") @@ -93,7 +95,7 @@ def _get_images( image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> Union[SpatialImage, MultiscaleSpatialImage]: image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs) - return Image2DModel.parse(image, **image_models_kwargs) + return Image2DModel.parse(image, transformations={sample: Identity()}, **image_models_kwargs) def _get_labels( @@ -104,4 +106,4 @@ def _get_labels( image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> Union[SpatialImage, MultiscaleSpatialImage]: image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze() - return Labels2DModel.parse(image, **image_models_kwargs) + return Labels2DModel.parse(image, transformations={sample: Identity()}, **image_models_kwargs) diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 270c89e0..c9d9aa61 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -104,7 +104,7 @@ def visium( circles = ShapesModel.parse( coords, geometry=0, - radius=np.repeat(scalefactors["spot_diameter_fullres"], len(adata)), + radius=scalefactors["spot_diameter_fullres"] / 2., index=adata.obs["spot_id"].copy(), transformations={"global": Identity()}, ) @@ -147,4 +147,4 @@ def visium( dataset_id + "_lowres_image": image_lowres_parsed, } - return SpatialData(table=table, shapes=shapes, images=images) + return SpatialData(images=images, shapes=shapes, table=table) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index d9fdd4b0..68a43360 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -96,8 +96,8 @@ def xenium( with open(path / XeniumKeys.XENIUM_SPECS) as f: specs = json.load(f) - specs["region"] = "cell_boundaries" - table = _get_tables(path, specs) + specs["region"] = "/shapes/nucleus_circles" + table, circles = _get_tables_and_circles(path, specs) polygons = {} if nucleus_boundaries: @@ -134,7 +134,7 @@ def xenium( image_models_kwargs, ) - return SpatialData(images=images, shapes=polygons, points=points, table=table) + return SpatialData(images=images, shapes=polygons | {specs['region']: circles}, points=points, table=table) def _get_polygons( @@ -171,14 +171,25 @@ def _get_points(path: Path, specs: dict[str, Any]) -> Table: return points -def _get_tables(path: Path, specs: dict[str, Any]) -> AnnData: +def _get_tables_and_circles(path: Path, specs: dict[str, Any]) -> tuple[AnnData, AnnData]: adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE) metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE) np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values) + circ = metadata[[XeniumKeys.CELL_X, XeniumKeys.CELL_Y]].to_numpy() + metadata.drop([XeniumKeys.CELL_X, XeniumKeys.CELL_Y], axis=1, inplace=True) metadata[XeniumKeys.CELL_ID] = np.arange(len(metadata[XeniumKeys.CELL_ID])) adata.obs = metadata - table = TableModel.parse(adata, region=specs["region"], instance_key=str(XeniumKeys.CELL_ID)) - return table + transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) + radii = np.sqrt(adata.obs[XeniumKeys.CELL_NUCLEUS_AREA].to_numpy() / np.pi) + circles = ShapesModel.parse( + circ, + geometry=0, + radius=radii, + transformations={"global": transform}, + index=adata.obs[XeniumKeys.CELL_ID].copy(), + ) + table = TableModel.parse(adata, region=f'/shapes/{specs["region"]}', instance_key=str(XeniumKeys.CELL_ID)) + return table, circles def _get_images( From 0560498080c325de71691035d9ed440dac4db329 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Feb 2023 15:53:40 +0000 Subject: [PATCH 16/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata_io/readers/cosmx.py | 6 ++++-- src/spatialdata_io/readers/mcmicro.py | 2 +- src/spatialdata_io/readers/steinbock.py | 4 ++-- src/spatialdata_io/readers/visium.py | 2 +- src/spatialdata_io/readers/xenium.py | 2 +- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 22c7cfd3..b54dbbe0 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -259,7 +259,9 @@ def cosmx( # ) # let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting import tempfile + import pyarrow.parquet as pq + with tempfile.TemporaryDirectory() as tmpdir: print("converting .csv to .parquet to improve the speed of the slicing operations... ", end="") assert transcripts_file is not None @@ -271,9 +273,9 @@ def cosmx( for fov in fovs_counts: aff = affine_transforms_to_global[fov] sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas() - sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype('category') + sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category") # we rename z because we want to treat the data as 2d - sub_table.rename(columns={'z': 'z_raw'}, inplace=True) + sub_table.rename(columns={"z": "z_raw"}, inplace=True) points[fov] = PointsModel.parse( sub_table, coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py index aa77661e..ff081fd7 100644 --- a/src/spatialdata_io/readers/mcmicro.py +++ b/src/spatialdata_io/readers/mcmicro.py @@ -127,4 +127,4 @@ def _get_table( dtype=np.float_, ) - return TableModel.parse(adata, region=f'/labels/{sample}', instance_key=McmicroKeys.INSTANCE_KEY.value) + return TableModel.parse(adata, region=f"/labels/{sample}", instance_key=McmicroKeys.INSTANCE_KEY.value) diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py index 8858e578..df24f1f4 100644 --- a/src/spatialdata_io/readers/steinbock.py +++ b/src/spatialdata_io/readers/steinbock.py @@ -78,10 +78,10 @@ def steinbock( adata = ad.read(path / SteinbockKeys.CELLS_FILE) idx = adata.obs.index.str.split(" ").map(lambda x: int(x[1])) regions = adata.obs.image.str.replace(".tiff", "", regex=False) - regions = regions.apply(lambda x: f'/labels/{x}') + regions = regions.apply(lambda x: f"/labels/{x}") adata.obs["cell_id"] = idx adata.obs["region"] = regions - if len(set([f'/labels/{s}' for s in samples]).difference(set(regions.unique()))): + if len({f"/labels/{s}" for s in samples}.difference(set(regions.unique()))): raise ValueError("Samples in table and images are inconsistent, please check.") table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id") diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index c9d9aa61..67dde757 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -104,7 +104,7 @@ def visium( circles = ShapesModel.parse( coords, geometry=0, - radius=scalefactors["spot_diameter_fullres"] / 2., + radius=scalefactors["spot_diameter_fullres"] / 2.0, index=adata.obs["spot_id"].copy(), transformations={"global": Identity()}, ) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 68a43360..4a6d0d21 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -134,7 +134,7 @@ def xenium( image_models_kwargs, ) - return SpatialData(images=images, shapes=polygons | {specs['region']: circles}, points=points, table=table) + return SpatialData(images=images, shapes=polygons | {specs["region"]: circles}, points=points, table=table) def _get_polygons( From b05fd8de23196c6dfc43904218ffd8ce57371aac Mon Sep 17 00:00:00 2001 From: giovp Date: Fri, 3 Mar 2023 14:25:10 +0100 Subject: [PATCH 17/19] minor fixes --- src/spatialdata_io/readers/xenium.py | 43 +++++++++++++++++----------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 4a6d0d21..a7640a7a 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -33,6 +33,7 @@ def xenium( path: str | Path, n_jobs: int = 1, + cells_as_shapes: bool = False, nucleus_boundaries: bool = True, transcripts: bool = True, morphology_mip: bool = True, @@ -64,6 +65,8 @@ def xenium( Path to the dataset. n_jobs Number of jobs to use for parallel processing. + cells_as_shapes + Whether to read cells also as shapes. Useful for visualization. nucleus_boundaries Whether to read nucleus boundaries. transcripts @@ -96,8 +99,8 @@ def xenium( with open(path / XeniumKeys.XENIUM_SPECS) as f: specs = json.load(f) - specs["region"] = "/shapes/nucleus_circles" - table, circles = _get_tables_and_circles(path, specs) + specs["region"] = "cell_circles" if cells_as_shapes else "cell_boundaries" + table, circles = _get_tables_and_circles(path, cells_as_shapes, specs) polygons = {} if nucleus_boundaries: @@ -133,8 +136,9 @@ def xenium( imread_kwargs, image_models_kwargs, ) - - return SpatialData(images=images, shapes=polygons | {specs["region"]: circles}, points=points, table=table) + if cells_as_shapes: + return SpatialData(images=images, shapes=polygons | {specs["region"]: circles}, points=points, table=table) + return SpatialData(images=images, shapes=polygons, points=points, table=table) def _get_polygons( @@ -159,6 +163,7 @@ def _poly(arr: ArrayLike) -> Polygon: def _get_points(path: Path, specs: dict[str, Any]) -> Table: table = read_parquet(path / XeniumKeys.TRANSCRIPTS_FILE) + table[XeniumKeys.CELL_ID.value] = pd.Categorical(table[XeniumKeys.CELL_ID.value]) transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) points = PointsModel.parse( @@ -171,25 +176,29 @@ def _get_points(path: Path, specs: dict[str, Any]) -> Table: return points -def _get_tables_and_circles(path: Path, specs: dict[str, Any]) -> tuple[AnnData, AnnData]: +def _get_tables_and_circles( + path: Path, cells_as_shapes: bool, specs: dict[str, Any] +) -> AnnData | tuple[AnnData, AnnData]: adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE) metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE) np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values) circ = metadata[[XeniumKeys.CELL_X, XeniumKeys.CELL_Y]].to_numpy() metadata.drop([XeniumKeys.CELL_X, XeniumKeys.CELL_Y], axis=1, inplace=True) - metadata[XeniumKeys.CELL_ID] = np.arange(len(metadata[XeniumKeys.CELL_ID])) adata.obs = metadata - transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) - radii = np.sqrt(adata.obs[XeniumKeys.CELL_NUCLEUS_AREA].to_numpy() / np.pi) - circles = ShapesModel.parse( - circ, - geometry=0, - radius=radii, - transformations={"global": transform}, - index=adata.obs[XeniumKeys.CELL_ID].copy(), - ) - table = TableModel.parse(adata, region=f'/shapes/{specs["region"]}', instance_key=str(XeniumKeys.CELL_ID)) - return table, circles + adata.obs["region"] = specs["region"] + table = TableModel.parse(adata, region=specs["region"], region_key="region", instance_key=str(XeniumKeys.CELL_ID)) + if cells_as_shapes: + transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) + radii = np.sqrt(adata.obs[XeniumKeys.CELL_NUCLEUS_AREA].to_numpy() / np.pi) + circles = ShapesModel.parse( + circ, + geometry=0, + radius=radii, + transformations={"global": transform}, + index=adata.obs[XeniumKeys.CELL_ID].copy(), + ) + return table, circles + return table def _get_images( From f66f522783c41486f421c2cb897788740309eb4b Mon Sep 17 00:00:00 2001 From: giovp Date: Fri, 3 Mar 2023 18:43:22 +0100 Subject: [PATCH 18/19] fixes --- src/spatialdata_io/readers/cosmx.py | 8 ++++---- src/spatialdata_io/readers/visium.py | 30 +++++++++++++++++----------- src/spatialdata_io/readers/xenium.py | 1 - 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index b54dbbe0..3a7d6ca3 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -120,7 +120,7 @@ def cosmx( obs = pd.read_csv(path / meta_file, header=0, index_col=CosmxKeys.INSTANCE_KEY) obs[CosmxKeys.FOV] = pd.Categorical(obs[CosmxKeys.FOV].astype(str)) - obs[CosmxKeys.REGION_KEY] = pd.Categorical(obs[CosmxKeys.FOV].astype(str).apply(lambda s: "/labels/" + s)) + obs[CosmxKeys.REGION_KEY] = pd.Categorical(obs[CosmxKeys.FOV].astype(str).apply(lambda s: s + "_labels")) obs[CosmxKeys.INSTANCE_KEY] = obs.index.astype(np.int64) obs.rename_axis(None, inplace=True) obs.index = obs.index.astype(str).str.cat(obs[CosmxKeys.FOV].values, sep="_") @@ -207,7 +207,7 @@ def cosmx( dims=("y", "x", "c"), **image_models_kwargs, ) - images[fov] = parsed_im + images[f"{fov}_image"] = parsed_im else: logger.warning(f"FOV {fov} not found in counts file. Skipping image {fname}.") @@ -230,7 +230,7 @@ def cosmx( dims=("y", "x"), **image_models_kwargs, ) - labels[fov] = parsed_la + labels[f"{fov}_labels"] = parsed_la else: logger.warning(f"FOV {fov} not found in counts file. Skipping labels {fname}.") @@ -276,7 +276,7 @@ def cosmx( sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category") # we rename z because we want to treat the data as 2d sub_table.rename(columns={"z": "z_raw"}, inplace=True) - points[fov] = PointsModel.parse( + points[f"{fov}_points"] = PointsModel.parse( sub_table, coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT, diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 67dde757..d7ffa578 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -100,16 +100,6 @@ def visium( adata.var_names_make_unique() scalefactors = json.loads((path / VisiumKeys.SCALEFACTORS_FILE).read_bytes()) - shapes = {} - circles = ShapesModel.parse( - coords, - geometry=0, - radius=scalefactors["spot_diameter_fullres"] / 2.0, - index=adata.obs["spot_id"].copy(), - transformations={"global": Identity()}, - ) - shapes[dataset_id] = circles - table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="spot_id") transform_original = Identity() transform_lowres = Scale( @@ -121,6 +111,22 @@ def visium( axes=("y", "x"), ) + shapes = {} + circles = ShapesModel.parse( + coords, + geometry=0, + radius=scalefactors["spot_diameter_fullres"] / 2.0, + index=adata.obs["spot_id"].copy(), + transformations={ + "global": Identity(), + "downscaled_hires": transform_hires, + "downscaled_lowres": transform_lowres, + }, + ) + shapes[dataset_id] = circles + adata.obs["region"] = dataset_id + table = TableModel.parse(adata, region=dataset_id, region_key="region", instance_key="spot_id") + full_image = ( imread(path / f"{dataset_id}{VisiumKeys.IMAGE_TIF_SUFFIX}", **imread_kwargs).squeeze().transpose(2, 0, 1) ) @@ -138,8 +144,8 @@ def visium( transformations={"global": transform_original}, **image_models_kwargs, ) - image_hires_parsed = Image2DModel.parse(image_hires, transformations={"downscaled": transform_hires}) - image_lowres_parsed = Image2DModel.parse(image_lowres, transformations={"downscaled": transform_lowres}) + image_hires_parsed = Image2DModel.parse(image_hires, transformations={"downscaled_hires": transform_hires}) + image_lowres_parsed = Image2DModel.parse(image_lowres, transformations={"downscaled_lowres": transform_lowres}) images = { dataset_id + "_full_image": full_image_parsed, diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index a7640a7a..30f8d8e9 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -163,7 +163,6 @@ def _poly(arr: ArrayLike) -> Polygon: def _get_points(path: Path, specs: dict[str, Any]) -> Table: table = read_parquet(path / XeniumKeys.TRANSCRIPTS_FILE) - table[XeniumKeys.CELL_ID.value] = pd.Categorical(table[XeniumKeys.CELL_ID.value]) transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y")) points = PointsModel.parse( From 3907d6839dacb669aab54bfc6e85d42c72bdc5a9 Mon Sep 17 00:00:00 2001 From: giovp Date: Sat, 4 Mar 2023 19:46:16 +0100 Subject: [PATCH 19/19] add spatial key in obsm --- src/spatialdata_io/readers/mcmicro.py | 9 ++++++--- src/spatialdata_io/readers/steinbock.py | 13 +++++++------ src/spatialdata_io/readers/visium.py | 1 + src/spatialdata_io/readers/xenium.py | 1 + 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py index ff081fd7..98f7647b 100644 --- a/src/spatialdata_io/readers/mcmicro.py +++ b/src/spatialdata_io/readers/mcmicro.py @@ -56,14 +56,14 @@ def mcmicro( raise ValueError("Dataset id is not consistent with sample name.") images = {} - images[dataset_id] = _get_images( + images[f"{dataset_id}_image"] = _get_images( path, dataset_id, imread_kwargs, image_models_kwargs, ) labels = {} - labels[dataset_id] = _get_labels( + labels[f"{dataset_id}_cells"] = _get_labels( path, dataset_id, "cell", @@ -126,5 +126,8 @@ def _get_table( obsm={"spatial": table[coords].to_numpy()}, dtype=np.float_, ) + adata.obs["region"] = f"{sample}_cells" - return TableModel.parse(adata, region=f"/labels/{sample}", instance_key=McmicroKeys.INSTANCE_KEY.value) + return TableModel.parse( + adata, region=f"{sample}_cells", region_key="region", instance_key=McmicroKeys.INSTANCE_KEY.value + ) diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py index df24f1f4..70074736 100644 --- a/src/spatialdata_io/readers/steinbock.py +++ b/src/spatialdata_io/readers/steinbock.py @@ -61,13 +61,13 @@ def steinbock( "They will be ignored." ) for sample in samples: - images[sample] = _get_images( + images[f"{sample}_image"] = _get_images( path, sample, imread_kwargs, image_models_kwargs, ) - labels[sample] = _get_labels( + labels[f"{sample}_labels"] = _get_labels( path, sample, labels_kind, @@ -78,10 +78,11 @@ def steinbock( adata = ad.read(path / SteinbockKeys.CELLS_FILE) idx = adata.obs.index.str.split(" ").map(lambda x: int(x[1])) regions = adata.obs.image.str.replace(".tiff", "", regex=False) - regions = regions.apply(lambda x: f"/labels/{x}") + regions = regions.apply(lambda x: f"{x}_labels") adata.obs["cell_id"] = idx adata.obs["region"] = regions - if len({f"/labels/{s}" for s in samples}.difference(set(regions.unique()))): + adata.obsm["spatial"] = adata.obs[["centroid-0", "centroid-1"]].to_numpy() + if len({f"{s}_labels" for s in samples}.difference(set(regions.unique()))): raise ValueError("Samples in table and images are inconsistent, please check.") table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id") @@ -95,7 +96,7 @@ def _get_images( image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> Union[SpatialImage, MultiscaleSpatialImage]: image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs) - return Image2DModel.parse(image, transformations={sample: Identity()}, **image_models_kwargs) + return Image2DModel.parse(data=image, transformations={sample: Identity()}, **image_models_kwargs) def _get_labels( @@ -106,4 +107,4 @@ def _get_labels( image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> Union[SpatialImage, MultiscaleSpatialImage]: image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze() - return Labels2DModel.parse(image, transformations={sample: Identity()}, **image_models_kwargs) + return Labels2DModel.parse(data=image, transformations={sample: Identity()}, **image_models_kwargs) diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index d7ffa578..64aac258 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -95,6 +95,7 @@ def visium( adata.obs = pd.merge(adata.obs, coords, how="left", left_index=True, right_index=True) coords = adata.obs[[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y]].values + adata.obsm["spatial"] = coords adata.obs.drop(columns=[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y], inplace=True) adata.obs["spot_id"] = np.arange(len(adata)) adata.var_names_make_unique() diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 30f8d8e9..18dc70dd 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -182,6 +182,7 @@ def _get_tables_and_circles( metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE) np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values) circ = metadata[[XeniumKeys.CELL_X, XeniumKeys.CELL_Y]].to_numpy() + adata.obsm["spatial"] = circ metadata.drop([XeniumKeys.CELL_X, XeniumKeys.CELL_Y], axis=1, inplace=True) adata.obs = metadata adata.obs["region"] = specs["region"]