From a485be606d0426f62efe030fd54cc5b86b05f89e Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Sat, 14 Jan 2023 18:29:20 +0100
Subject: [PATCH 01/19] add steinbock

---
 src/spatialdata_io/_constants/_constants.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index 10682629..b62ef8e5 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -83,3 +83,18 @@ class VisiumKeys(ModeEnum):
     SPOTS_FILE = "spatial/tissue_positions.csv"
     SPOTS_X = "pxl_row_in_fullres"
     SPOTS_Y = "pxl_col_in_fullres"
+
+
+@unique
+class SteinbockKeys(ModeEnum):
+    """Keys for *Steinbock* formatted dataset."""
+
+    # files and directories
+    CELLS_FILE = "cells.h5ad"
+    DEEPCELL_MASKS_DIR = "masks_deepcell"
+    ILASTIK_MASKS_DIR = "masks_ilastik"
+    IMAGES_DIR = "ome"
+
+    # suffixes for images and labels
+    IMAGE_SUFFIX = ".ome.tif"
+    LABEL_SUFFIX = ".tiff"

From 382dec003880ac3be5f2c66d71c640dc79c8b417 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Sat, 14 Jan 2023 18:30:57 +0100
Subject: [PATCH 02/19] add steinbock

---
 src/spatialdata_io/readers/steinbock.py | 108 ++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 src/spatialdata_io/readers/steinbock.py

diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py
new file mode 100644
index 00000000..71ef7b5d
--- /dev/null
+++ b/src/spatialdata_io/readers/steinbock.py
@@ -0,0 +1,108 @@
+from __future__ import annotations
+
+import os
+from collections.abc import Mapping
+from pathlib import Path
+from types import MappingProxyType
+from typing import Any, Literal, Union
+
+import anndata as ad
+from dask_image.imread import imread
+from multiscale_spatial_image.multiscale_spatial_image import MultiscaleSpatialImage
+from spatial_image import SpatialImage
+from spatialdata import Image2DModel, Labels2DModel, SpatialData, TableModel
+from spatialdata._logging import logger
+
+from spatialdata_io._constants._constants import SteinbockKeys
+
+__all__ = ["steinbock"]
+
+
+def _get_images(
+    path: Path,
+    sample: str,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> Union[SpatialImage, MultiscaleSpatialImage]:
+    image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs)
+    return Image2DModel.parse(image, **image_models_kwargs)
+
+
+def _get_labels(
+    path: Path,
+    sample: str,
+    labels_kind: str,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> Union[SpatialImage, MultiscaleSpatialImage]:
+    image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze()
+    return Labels2DModel.parse(image, **image_models_kwargs)
+
+
+def steinbock(
+    path: str | Path,
+    labels_kind: Literal["deepcell", "ilastik"] = "deepcell",
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> SpatialData:
+    """
+    Read a *Steinbock* output into a SpatialData object.
+
+    .. seealso::
+
+        - `Steinbock pipeline  <https://bodenmillergroup.github.io/steinbock/latest/>`_.
+
+    Parameters
+    ----------
+    path
+        Path to the dataset.
+    labels_kind
+        Kind of labels to use. Either ``deepcell`` or ``ilastik``.
+    imread_kwargs
+        Keyword arguments to pass to the image reader.
+    image_models_kwargs
+        Keyword arguments to pass to the image models.
+
+    Returns
+    -------
+    :class:`spatialdata.SpatialData`
+    """
+    path = Path(path)
+
+    labels_kind = SteinbockKeys(f"masks_{labels_kind}")  # type: ignore[assignment]
+
+    samples = [i.replace(SteinbockKeys.IMAGE_SUFFIX, "") for i in os.listdir(path / SteinbockKeys.IMAGES_DIR)]
+    samples_labels = [i.replace(SteinbockKeys.LABEL_SUFFIX, "") for i in os.listdir(path / labels_kind)]
+    images = {}
+    labels = {}
+    if len(set(samples).difference(set(samples_labels))):
+        logger.warning(
+            f"Samples {set(samples).difference(set(samples_labels))} have images but no labels. "
+            "They will be ignored."
+        )
+    for sample in samples:
+        images[sample] = _get_images(
+            path,
+            sample,
+            imread_kwargs,
+            image_models_kwargs,
+        )
+        labels[sample] = _get_labels(
+            path,
+            sample,
+            labels_kind,
+            imread_kwargs,
+            image_models_kwargs,
+        )
+
+    adata = ad.read(path / SteinbockKeys.CELLS_FILE)
+    idx = adata.obs.index.str.split(" ").map(lambda x: x[1])
+    regions = adata.obs.image.str.replace(".tiff", "", regex=False)
+    adata.obs["cell_id"] = idx
+    adata.obs["region"] = regions
+    if len(set(samples).difference(set(regions.unique()))):
+        raise ValueError("Samples in table and images are inconsistent, please check.")
+
+    table = TableModel.parse(adata, region=regions.unique(), region_key="region", instance_key="cell_id")
+
+    return SpatialData(images=images, labels=labels, table=table)

From e26095236288f1acc10647ecd87fa46827f5125b Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Sat, 14 Jan 2023 18:40:57 +0100
Subject: [PATCH 03/19] updates

---
 src/spatialdata_io/__init__.py              | 1 +
 src/spatialdata_io/_constants/_constants.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py
index decc2e82..ce126e54 100644
--- a/src/spatialdata_io/__init__.py
+++ b/src/spatialdata_io/__init__.py
@@ -1,6 +1,7 @@
 from importlib.metadata import version
 
 from spatialdata_io.readers.cosmx import cosmx
+from spatialdata_io.readers.steinbock import steinbock
 from spatialdata_io.readers.visium import visium
 from spatialdata_io.readers.xenium import xenium
 
diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index b62ef8e5..df51e07b 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -96,5 +96,5 @@ class SteinbockKeys(ModeEnum):
     IMAGES_DIR = "ome"
 
     # suffixes for images and labels
-    IMAGE_SUFFIX = ".ome.tif"
+    IMAGE_SUFFIX = ".ome.tiff"
     LABEL_SUFFIX = ".tiff"

From 5631648888f60a7160b08403de88aa77e4b3f3c3 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Sat, 14 Jan 2023 18:55:45 +0100
Subject: [PATCH 04/19] fixes

---
 src/spatialdata_io/readers/steinbock.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py
index 71ef7b5d..469c5c06 100644
--- a/src/spatialdata_io/readers/steinbock.py
+++ b/src/spatialdata_io/readers/steinbock.py
@@ -102,7 +102,6 @@ def steinbock(
     adata.obs["region"] = regions
     if len(set(samples).difference(set(regions.unique()))):
         raise ValueError("Samples in table and images are inconsistent, please check.")
-
-    table = TableModel.parse(adata, region=regions.unique(), region_key="region", instance_key="cell_id")
+    table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id")
 
     return SpatialData(images=images, labels=labels, table=table)

From 4cd565bbc0ce19ce835cd3016f363bb1edb8e330 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Sun, 15 Jan 2023 11:25:41 +0100
Subject: [PATCH 05/19] update constants and init

---
 src/spatialdata_io/__init__.py              |  7 ++-----
 src/spatialdata_io/_constants/_constants.py | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py
index ce126e54..cf4baf5f 100644
--- a/src/spatialdata_io/__init__.py
+++ b/src/spatialdata_io/__init__.py
@@ -1,14 +1,11 @@
 from importlib.metadata import version
 
 from spatialdata_io.readers.cosmx import cosmx
+from spatialdata_io.readers.mcmicro import mcmicro
 from spatialdata_io.readers.steinbock import steinbock
 from spatialdata_io.readers.visium import visium
 from spatialdata_io.readers.xenium import xenium
 
-__all__ = [
-    "visium",
-    "xenium",
-    "cosmx",
-]
+__all__ = ["visium", "xenium", "cosmx", "mcmicro"]
 
 __version__ = version("spatialdata-io")
diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index df51e07b..05c731b3 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -98,3 +98,22 @@ class SteinbockKeys(ModeEnum):
     # suffixes for images and labels
     IMAGE_SUFFIX = ".ome.tiff"
     LABEL_SUFFIX = ".tiff"
+
+
+@unique
+class McmicroKeys(ModeEnum):
+    """Keys for *Mcmicro* formatted dataset."""
+
+    # files and directories
+    CELL_FEATURES_SUFFIX = "--unmicst_cell.csv"
+    QUANTIFICATION_DIR = "quantification"
+    MARKERS_FILE = "markers.csv"
+    IMAGES_DIR = "registration"
+    IMAGE_SUFFIX = ".ome.tif"
+    LABELS_DIR = "segmentation"
+    LABELS_PREFIX = "unmicst-"
+
+    # metadata
+    COORDS_X = "X_centroid"
+    COORDS_Y = "Y_centroid"
+    INSTANCE_KEY = "CellID"

From 6c60a02624da35e5f8aa4a9f2507ee90c44d953b Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Sun, 15 Jan 2023 11:26:05 +0100
Subject: [PATCH 06/19] fix init

---
 src/spatialdata_io/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py
index ce126e54..0693b450 100644
--- a/src/spatialdata_io/__init__.py
+++ b/src/spatialdata_io/__init__.py
@@ -9,6 +9,7 @@
     "visium",
     "xenium",
     "cosmx",
+    "steinbock",
 ]
 
 __version__ = version("spatialdata-io")

From 9f16a34a2bd21e6400d31ab0796cd90cc08ab842 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Mon, 27 Feb 2023 11:16:41 +0100
Subject: [PATCH 07/19] update visium and xenium

---
 pyproject.toml                       |  1 +
 src/spatialdata_io/readers/visium.py | 20 +++---
 src/spatialdata_io/readers/xenium.py | 94 ++++++++++------------------
 3 files changed, 42 insertions(+), 73 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6aa35ef7..aad9280f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
     "joblib",
     "imagecodecs",
     "dask-image",
+    "pyarrow",
 ]
 
 [project.optional-dependencies]
diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py
index 1d891005..6939a7df 100644
--- a/src/spatialdata_io/readers/visium.py
+++ b/src/spatialdata_io/readers/visium.py
@@ -96,19 +96,19 @@ def visium(
     adata.obs = pd.merge(adata.obs, coords, how="left", left_index=True, right_index=True)
     coords = adata.obs[[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y]].values
     adata.obs.drop(columns=[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y], inplace=True)
-    adata.obs["visium_spot_id"] = adata.obs_names
+    adata.obs["barcode"] = adata.obs.index.copy()
+    adata.var_names_make_unique()
 
     scalefactors = json.loads((path / VisiumKeys.SCALEFACTORS_FILE).read_bytes())
     shapes = {}
     circles = ShapesModel.parse(
         coords,
-        shape_type="Circle",
-        shape_size=scalefactors["spot_diameter_fullres"],
-        index=adata.obs_names,
+        geometry=0,
+        radius=np.repeat(scalefactors["spot_diameter_fullres"], len(adata)),
         transformations={"global": Identity()},
     )
     shapes[dataset_id] = circles
-    table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="visium_spot_id")
+    table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="barcode")
 
     transform_original = Identity()
     transform_lowres = Scale(
@@ -133,16 +133,12 @@ def visium(
 
     full_image_parsed = Image2DModel.parse(
         full_image,
-        multiscale_factors=[2, 2, 2, 2],
+        scale_factors=[2, 2, 2, 2],
         transformations={"global": transform_original},
         **image_models_kwargs,
     )
-    image_hires_parsed = Image2DModel.parse(
-        image_hires, transformations={"downscaled": transform_hires}
-    )
-    image_lowres_parsed = Image2DModel.parse(
-        image_lowres, transformations={"downscaled": transform_lowres}
-    )
+    image_hires_parsed = Image2DModel.parse(image_hires, transformations={"downscaled": transform_hires})
+    image_lowres_parsed = Image2DModel.parse(image_lowres, transformations={"downscaled": transform_lowres})
 
     images = {
         dataset_id + "_full_image": full_image_parsed,
diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
index ec919bdb..e705c3f9 100644
--- a/src/spatialdata_io/readers/xenium.py
+++ b/src/spatialdata_io/readers/xenium.py
@@ -10,23 +10,15 @@
 import pandas as pd
 import pyarrow.parquet as pq
 from anndata import AnnData
-from dask_image.imread import imread
-from dask.dataframe.core import DataFrame as DaskDataFrame
 from dask.dataframe import read_parquet
+from dask_image.imread import imread
 from geopandas import GeoDataFrame
 from joblib import Parallel, delayed
 from multiscale_spatial_image.multiscale_spatial_image import MultiscaleSpatialImage
 from pyarrow import Table
 from shapely import Polygon
 from spatial_image import SpatialImage
-from spatialdata import (
-    Image2DModel,
-    PointsModel,
-    PolygonsModel,
-    ShapesModel,
-    SpatialData,
-    TableModel,
-)
+from spatialdata import Image2DModel, PointsModel, ShapesModel, SpatialData, TableModel
 from spatialdata._core.transformations import Identity, Scale
 from spatialdata._types import ArrayLike
 
@@ -40,7 +32,6 @@
 @inject_docs(xx=XeniumKeys)
 def xenium(
     path: str | Path,
-    # dataset_id: str,
     n_jobs: int = 1,
     nucleus_boundaries: bool = True,
     cell_boundaries: bool = True,
@@ -98,7 +89,12 @@ def xenium(
             image_models_kwargs = {}
         assert isinstance(image_models_kwargs, dict)
         image_models_kwargs["chunks"] = (1, 4096, 4096)
-        image_models_kwargs["multiscale_factors"] = [2, 2, 2, 2]
+    if "scale_factors" not in image_models_kwargs:
+        if isinstance(image_models_kwargs, MappingProxyType):
+            image_models_kwargs = {}
+        assert isinstance(image_models_kwargs, dict)
+        image_models_kwargs["scale_factors"] = [2, 2, 2, 2]
+
     path = Path(path)
     with open(path / XeniumKeys.XENIUM_SPECS) as f:
         specs = json.load(f)
@@ -122,28 +118,27 @@ def xenium(
     if transcripts:
         points["transcripts"] = _get_points(path, specs)
 
-    images = {}
-    if morphology_mip:
-        images["morphology_mip"] = _get_images(
-            path,
-            XeniumKeys.MORPHOLOGY_MIP_FILE,
-            specs,
-            imread_kwargs,
-            image_models_kwargs,
-        )
-    if morphology_focus:
-        images["morphology_focus"] = _get_images(
-            path,
-            XeniumKeys.MORPHOLOGY_MIP_FILE,
-            specs,
-            imread_kwargs,
-            image_models_kwargs,
-        )
-
-    circles = {}
-    table, circles["circles"] = _get_tables(path, specs)
-
-    return SpatialData(images=images, polygons=polygons, points=points, shapes=circles, table=table)
+    # images = {}
+    # if morphology_mip:
+    #     images["morphology_mip"] = _get_images(
+    #         path,
+    #         XeniumKeys.MORPHOLOGY_MIP_FILE,
+    #         specs,
+    #         imread_kwargs,
+    #         image_models_kwargs,
+    #     )
+    # if morphology_focus:
+    #     images["morphology_focus"] = _get_images(
+    #         path,
+    #         XeniumKeys.MORPHOLOGY_MIP_FILE,
+    #         specs,
+    #         imread_kwargs,
+    #         image_models_kwargs,
+    #     )
+
+    table = _get_tables(path, specs)
+    # return SpatialData(images=images, shapes=polygons, points=points, table=table)
+    return SpatialData(shapes=polygons, points=points, table=table)
 
 
 def _get_polygons(path: Path, file: str, specs: dict[str, Any], n_jobs: int) -> GeoDataFrame:
@@ -159,27 +154,16 @@ def _poly(arr: ArrayLike) -> Polygon:
     )
     geo_df = GeoDataFrame({"geometry": out})
     scale = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
-    return PolygonsModel.parse(geo_df, transformations={"global": scale})
+    return ShapesModel.parse(geo_df, transformations={"global": scale})
 
 
 def _get_points(path: Path, specs: dict[str, Any]) -> Table:
     table = read_parquet(path / XeniumKeys.TRANSCRIPTS_FILE)
-    # table = pq.read_table(path / XeniumKeys.TRANSCRIPTS_FILE)
-    # arr = (
-    #     table.select([XeniumKeys.TRANSCRIPTS_X, XeniumKeys.TRANSCRIPTS_Y, XeniumKeys.TRANSCRIPTS_Z])
-    #     .to_pandas()
-    #     .to_numpy()
-    # )
-    # annotations = table.select((XeniumKeys.OVERLAPS_NUCLEUS, XeniumKeys.QUALITY_VALUE, XeniumKeys.CELL_ID))
-    # annotations = annotations.add_column(
-    #     3, XeniumKeys.FEATURE_NAME, table.column(XeniumKeys.FEATURE_NAME).cast("string").dictionary_encode()
-    # )
 
     transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
-    # points = PointsModel.parse(coords=arr, annotations=annotations, transformations={"global": transform})
     points = PointsModel.parse(
         table,
-        coordinates={"x": XeniumKeys.TRANSCRIPTS_X, "y": XeniumKeys.TRANSCRIPTS_Y},
+        coordinates={"x": XeniumKeys.TRANSCRIPTS_X, "y": XeniumKeys.TRANSCRIPTS_Y, "z": XeniumKeys.TRANSCRIPTS_Y},
         feature_key=XeniumKeys.FEATURE_NAME,
         instance_key=XeniumKeys.CELL_ID,
         transformations={"global": transform},
@@ -187,26 +171,14 @@ def _get_points(path: Path, specs: dict[str, Any]) -> Table:
     return points
 
 
-def _get_tables(path: Path, specs: dict[str, Any]) -> tuple[AnnData, AnnData]:
+def _get_tables(path: Path, specs: dict[str, Any]) -> AnnData:
     adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE)
     metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE)
     np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values)
-
-    circ = metadata[[XeniumKeys.CELL_X, XeniumKeys.CELL_Y]].to_numpy()
-    metadata.drop([XeniumKeys.CELL_X, XeniumKeys.CELL_Y], axis=1, inplace=True)
     metadata[XeniumKeys.CELL_ID] = metadata[XeniumKeys.CELL_ID].astype(str)
     adata.obs = metadata
-    transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
-    diameters = 2 * np.sqrt(adata.obs[XeniumKeys.CELL_AREA].to_numpy() / np.pi) / specs["pixel_size"]
-    circles = ShapesModel.parse(
-        circ,
-        shape_type="Circle",
-        shape_size=diameters,
-        transformations={"global": transform},
-        index=adata.obs[XeniumKeys.CELL_ID],
-    )
     table = TableModel.parse(adata, region="/shapes/circles", instance_key=str(XeniumKeys.CELL_ID))
-    return table, circles
+    return table
 
 
 def _get_images(

From 3447c9ed4c4134bdd63e422dce8c1b1ba33837e6 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Mon, 27 Feb 2023 11:25:35 +0100
Subject: [PATCH 08/19] readd images in xenium

---
 src/spatialdata_io/readers/xenium.py | 37 ++++++++++++++--------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
index e705c3f9..f63e0fa3 100644
--- a/src/spatialdata_io/readers/xenium.py
+++ b/src/spatialdata_io/readers/xenium.py
@@ -118,27 +118,26 @@ def xenium(
     if transcripts:
         points["transcripts"] = _get_points(path, specs)
 
-    # images = {}
-    # if morphology_mip:
-    #     images["morphology_mip"] = _get_images(
-    #         path,
-    #         XeniumKeys.MORPHOLOGY_MIP_FILE,
-    #         specs,
-    #         imread_kwargs,
-    #         image_models_kwargs,
-    #     )
-    # if morphology_focus:
-    #     images["morphology_focus"] = _get_images(
-    #         path,
-    #         XeniumKeys.MORPHOLOGY_MIP_FILE,
-    #         specs,
-    #         imread_kwargs,
-    #         image_models_kwargs,
-    #     )
+    images = {}
+    if morphology_mip:
+        images["morphology_mip"] = _get_images(
+            path,
+            XeniumKeys.MORPHOLOGY_MIP_FILE,
+            specs,
+            imread_kwargs,
+            image_models_kwargs,
+        )
+    if morphology_focus:
+        images["morphology_focus"] = _get_images(
+            path,
+            XeniumKeys.MORPHOLOGY_MIP_FILE,
+            specs,
+            imread_kwargs,
+            image_models_kwargs,
+        )
 
     table = _get_tables(path, specs)
-    # return SpatialData(images=images, shapes=polygons, points=points, table=table)
-    return SpatialData(shapes=polygons, points=points, table=table)
+    return SpatialData(images=images, shapes=polygons, points=points, table=table)
 
 
 def _get_polygons(path: Path, file: str, specs: dict[str, Any], n_jobs: int) -> GeoDataFrame:

From da512d68575b7a5135faeae98b4cd1d41330bd5e Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Mon, 27 Feb 2023 13:39:13 +0100
Subject: [PATCH 09/19] fix steinbock

---
 src/spatialdata_io/readers/steinbock.py | 42 ++++++++++++-------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py
index 469c5c06..5a2e63be 100644
--- a/src/spatialdata_io/readers/steinbock.py
+++ b/src/spatialdata_io/readers/steinbock.py
@@ -18,27 +18,6 @@
 __all__ = ["steinbock"]
 
 
-def _get_images(
-    path: Path,
-    sample: str,
-    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
-    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
-) -> Union[SpatialImage, MultiscaleSpatialImage]:
-    image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs)
-    return Image2DModel.parse(image, **image_models_kwargs)
-
-
-def _get_labels(
-    path: Path,
-    sample: str,
-    labels_kind: str,
-    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
-    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
-) -> Union[SpatialImage, MultiscaleSpatialImage]:
-    image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze()
-    return Labels2DModel.parse(image, **image_models_kwargs)
-
-
 def steinbock(
     path: str | Path,
     labels_kind: Literal["deepcell", "ilastik"] = "deepcell",
@@ -105,3 +84,24 @@ def steinbock(
     table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id")
 
     return SpatialData(images=images, labels=labels, table=table)
+
+
+def _get_images(
+    path: Path,
+    sample: str,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> Union[SpatialImage, MultiscaleSpatialImage]:
+    image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs)
+    return Image2DModel.parse(image, **image_models_kwargs)
+
+
+def _get_labels(
+    path: Path,
+    sample: str,
+    labels_kind: str,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> Union[SpatialImage, MultiscaleSpatialImage]:
+    image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze()
+    return Labels2DModel.parse(image, **image_models_kwargs)

From aac84f0158c135119896ff2a6e107ba27b79bce1 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Mon, 27 Feb 2023 13:51:57 +0100
Subject: [PATCH 10/19] add mcmicro

---
 src/spatialdata_io/readers/mcmicro.py | 96 +++++++++++++--------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py
index 9015501c..a8f513bc 100644
--- a/src/spatialdata_io/readers/mcmicro.py
+++ b/src/spatialdata_io/readers/mcmicro.py
@@ -19,54 +19,6 @@
 __all__ = ["mcmicro"]
 
 
-def _get_images(
-    path: Path,
-    sample: str,
-    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
-    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
-) -> Union[SpatialImage, MultiscaleSpatialImage]:
-    image = imread(path / McmicroKeys.IMAGES_DIR / f"{sample}{McmicroKeys.IMAGE_SUFFIX}", **imread_kwargs)
-    return Image2DModel.parse(image, **image_models_kwargs)
-
-
-def _get_labels(
-    path: Path,
-    sample: str,
-    labels_kind: str,
-    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
-    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
-) -> Union[SpatialImage, MultiscaleSpatialImage]:
-    image = imread(
-        path
-        / McmicroKeys.LABELS_DIR
-        / f"{McmicroKeys.LABELS_PREFIX}{sample}"
-        / f"{labels_kind}{McmicroKeys.IMAGE_SUFFIX}",
-        **imread_kwargs,
-    ).squeeze()
-    return Labels2DModel.parse(image, **image_models_kwargs)
-
-
-def _get_table(
-    path: Path,
-    sample: str,
-) -> AnnData:
-
-    table = pd.read_csv(path / McmicroKeys.QUANTIFICATION_DIR / f"{sample}{McmicroKeys.CELL_FEATURES_SUFFIX}")
-    markers = pd.read_csv(path / McmicroKeys.MARKERS_FILE)
-    markers.index = markers.marker_name
-    var = markers.marker_name.tolist()
-    coords = [McmicroKeys.COORDS_X, McmicroKeys.COORDS_Y]
-    adata = AnnData(
-        table[var].to_numpy(),
-        obs=table.drop(columns=var + coords),
-        var=markers,
-        obsm={"spatial": table[coords].to_numpy()},
-        dtype=np.float_,
-    )
-
-    return TableModel.parse(adata, region=sample, instance_key=McmicroKeys.INSTANCE_KEY)
-
-
 def mcmicro(
     path: str | Path,
     dataset_id: str,
@@ -129,3 +81,51 @@ def mcmicro(
     table = _get_table(path, dataset_id)
 
     return SpatialData(images=images, labels=labels, table=table)
+
+
+def _get_images(
+    path: Path,
+    sample: str,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> Union[SpatialImage, MultiscaleSpatialImage]:
+    image = imread(path / McmicroKeys.IMAGES_DIR / f"{sample}{McmicroKeys.IMAGE_SUFFIX}", **imread_kwargs)
+    return Image2DModel.parse(image, **image_models_kwargs)
+
+
+def _get_labels(
+    path: Path,
+    sample: str,
+    labels_kind: str,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> Union[SpatialImage, MultiscaleSpatialImage]:
+    image = imread(
+        path
+        / McmicroKeys.LABELS_DIR
+        / f"{McmicroKeys.LABELS_PREFIX}{sample}"
+        / f"{labels_kind}{McmicroKeys.IMAGE_SUFFIX}",
+        **imread_kwargs,
+    ).squeeze()
+    return Labels2DModel.parse(image, **image_models_kwargs)
+
+
+def _get_table(
+    path: Path,
+    sample: str,
+) -> AnnData:
+
+    table = pd.read_csv(path / McmicroKeys.QUANTIFICATION_DIR / f"{sample}{McmicroKeys.CELL_FEATURES_SUFFIX}")
+    markers = pd.read_csv(path / McmicroKeys.MARKERS_FILE)
+    markers.index = markers.marker_name
+    var = markers.marker_name.tolist()
+    coords = [McmicroKeys.COORDS_X.value, McmicroKeys.COORDS_Y.value]
+    adata = AnnData(
+        table[var].to_numpy(),
+        obs=table.drop(columns=var + coords),
+        var=markers,
+        obsm={"spatial": table[coords].to_numpy()},
+        dtype=np.float_,
+    )
+
+    return TableModel.parse(adata, region=sample, instance_key=McmicroKeys.INSTANCE_KEY.value)

From e761e85bea00aa28b9ecf5288c536dedf1197d79 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Mon, 27 Feb 2023 14:01:59 +0100
Subject: [PATCH 11/19] pre-commit fixes

---
 .pre-commit-config.yaml                     |  8 +++----
 src/spatialdata_io/_constants/_constants.py |  2 +-
 src/spatialdata_io/readers/cosmx.py         | 25 +++++++++++++--------
 src/spatialdata_io/readers/mcmicro.py       |  1 -
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c1c46add..0548fbff 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ default_stages:
 minimum_pre_commit_version: 2.16.0
 repos:
     - repo: https://github.com/psf/black
-      rev: 22.12.0
+      rev: 23.1.0
       hooks:
           - id: black
     - repo: https://github.com/pre-commit/mirrors-prettier
@@ -19,11 +19,11 @@ repos:
       hooks:
           - id: blacken-docs
     - repo: https://github.com/PyCQA/isort
-      rev: 5.11.4
+      rev: 5.12.0
       hooks:
           - id: isort
     - repo: https://github.com/pre-commit/mirrors-mypy
-      rev: v0.991
+      rev: v1.0.1
       hooks:
           - id: mypy
             additional_dependencies: [numpy==1.24.0]
@@ -50,7 +50,7 @@ repos:
           - id: trailing-whitespace
           - id: check-case-conflict
     - repo: https://github.com/PyCQA/autoflake
-      rev: v2.0.0
+      rev: v2.0.1
       hooks:
           - id: autoflake
             args:
diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index f3d36041..c213b315 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -57,7 +57,7 @@ class XeniumKeys(ModeEnum):
     CELL_METADATA_FILE = "cells.parquet"
     CELL_X = "x_centroid"
     CELL_Y = "y_centroid"
-    CELL_AREA = 'cell_area'
+    CELL_AREA = "cell_area"
 
     # morphology iamges
     MORPHOLOGY_MIP_FILE = "morphology_mip.ome.tif"
diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py
index 7083631f..5d06cf14 100644
--- a/src/spatialdata_io/readers/cosmx.py
+++ b/src/spatialdata_io/readers/cosmx.py
@@ -7,21 +7,26 @@
 from pathlib import Path
 from types import MappingProxyType
 from typing import Any, Optional
-import pyarrow as pa
-import pyarrow.parquet as pq
 
+import dask.array as da
 import numpy as np
 import pandas as pd
+import pyarrow as pa
+import pyarrow.parquet as pq
 from anndata import AnnData
-from dask_image.imread import imread
-import dask.array as da
 from dask.dataframe.core import DataFrame as DaskDataFrame
+from dask_image.imread import imread
 from scipy.sparse import csr_matrix
 
 # from spatialdata._core.core_utils import xy_cs
 from skimage.transform import estimate_transform
 from spatialdata import SpatialData
-from spatialdata._core.models import Image2DModel, Labels2DModel, TableModel, PointsModel
+from spatialdata._core.models import (
+    Image2DModel,
+    Labels2DModel,
+    PointsModel,
+    TableModel,
+)
 
 # from spatialdata._core.ngff.ngff_coordinate_system import NgffAxis  # , CoordinateSystem
 from spatialdata._core.transformations import Affine, Identity
@@ -163,7 +168,10 @@ def cosmx(
 
     table.obsm["global"] = table.obs[[CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL]].to_numpy()
     table.obsm["spatial"] = table.obs[[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL]].to_numpy()
-    table.obs.drop(columns=[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL, CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL], inplace=True)
+    table.obs.drop(
+        columns=[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL, CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL],
+        inplace=True,
+    )
 
     # prepare to read images and labels
     file_extensions = (".jpg", ".png", ".jpeg", ".tif", ".tiff")
@@ -251,9 +259,9 @@ def cosmx(
             for fov in fovs_counts:
                 aff = affine_transforms_to_global[fov]
                 sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
-                sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype('category')
+                sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
                 # we rename z because we want to treat the data as 2d
-                sub_table.rename(columns={'z': 'z_raw'}, inplace=True)
+                sub_table.rename(columns={"z": "z_raw"}, inplace=True)
                 points[fov] = PointsModel.parse(
                     sub_table,
                     coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
@@ -266,7 +274,6 @@ def cosmx(
                     },
                 )
 
-
     # TODO: what to do with fov file?
     # if fov_file is not None:
     #     fov_positions = pd.read_csv(path / fov_file, header=0, index_col=CosmxKeys.FOV)
diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py
index a8f513bc..8a04a274 100644
--- a/src/spatialdata_io/readers/mcmicro.py
+++ b/src/spatialdata_io/readers/mcmicro.py
@@ -114,7 +114,6 @@ def _get_table(
     path: Path,
     sample: str,
 ) -> AnnData:
-
     table = pd.read_csv(path / McmicroKeys.QUANTIFICATION_DIR / f"{sample}{McmicroKeys.CELL_FEATURES_SUFFIX}")
     markers = pd.read_csv(path / McmicroKeys.MARKERS_FILE)
     markers.index = markers.marker_name

From 2615da13144a4ef839f2c8f22b67c0402dd6b0e9 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Mon, 27 Feb 2023 15:47:50 +0100
Subject: [PATCH 12/19] minor fixes cosmx

---
 src/spatialdata_io/readers/cosmx.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py
index 5d06cf14..d3e21f67 100644
--- a/src/spatialdata_io/readers/cosmx.py
+++ b/src/spatialdata_io/readers/cosmx.py
@@ -146,12 +146,6 @@ def cosmx(
 
     fovs_counts = list(map(str, adata.obs.fov.astype(int).unique()))
 
-    # TODO(giovp): uncomment once transform is ready
-    # input_cs = CoordinateSystem("cxy", axes=[c_axis, y_axis, x_axis])
-    # input_cs_labels = CoordinateSystem("cxy", axes=[y_axis, x_axis])
-    # output_cs = CoordinateSystem("global", axes=[c_axis, y_axis, x_axis])
-    # output_cs_labels = CoordinateSystem("global", axes=[y_axis, x_axis])
-
     affine_transforms_to_global = {}
 
     for fov in fovs_counts:
@@ -208,7 +202,6 @@ def cosmx(
                 flipped_im = da.flip(im, axis=0)
                 parsed_im = Image2DModel.parse(
                     flipped_im,
-                    name=fov,
                     transformations={
                         fov: Identity(),
                         "global": aff,
@@ -232,7 +225,6 @@ def cosmx(
                 flipped_la = da.flip(la, axis=0)
                 parsed_la = Labels2DModel.parse(
                     flipped_la,
-                    name=fov,
                     transformations={
                         fov: Identity(),
                         "global": aff,
@@ -249,11 +241,11 @@ def cosmx(
     if transcripts:
         # let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting
         with tempfile.TemporaryDirectory() as tmpdir:
-            print("converting .csv to .parquet... ", end="")
+            logger.info("converting .csv to .parquet... ", end="")
             assert transcripts_file is not None
             transcripts_data = pd.read_csv(path / transcripts_file, header=0)
             transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet")
-            print("done")
+            logger.info("done")
 
             ptable = pq.read_table(Path(tmpdir) / "transcripts.parquet")
             for fov in fovs_counts:

From cfd12f23e6803d243378b2fb75809c6fd52d0aaa Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Mon, 27 Feb 2023 15:56:35 +0100
Subject: [PATCH 13/19] remove temp dir

---
 src/spatialdata_io/readers/cosmx.py | 52 ++++++++++++-----------------
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py
index d3e21f67..ee7aaf6b 100644
--- a/src/spatialdata_io/readers/cosmx.py
+++ b/src/spatialdata_io/readers/cosmx.py
@@ -2,7 +2,6 @@
 
 import os
 import re
-import tempfile
 from collections.abc import Mapping
 from pathlib import Path
 from types import MappingProxyType
@@ -12,7 +11,6 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-import pyarrow.parquet as pq
 from anndata import AnnData
 from dask.dataframe.core import DataFrame as DaskDataFrame
 from dask_image.imread import imread
@@ -46,7 +44,6 @@
 def cosmx(
     path: str | Path,
     dataset_id: Optional[str] = None,
-    # shape_size: float | int = 1,
     transcripts: bool = True,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
     image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -72,8 +69,8 @@ def cosmx(
         Path to the root directory containing *Nanostring* files.
     dataset_id
         Name of the dataset.
-    shape_size
-        Size of the shape to be used for the centroids of the labels.
+    transcripts
+        Whether to also read in transcripts information.
     imread_kwargs
         Keyword arguments passed to :func:`dask_image.imread.imread`.
     image_models_kwargs
@@ -239,32 +236,25 @@ def cosmx(
 
     points: dict[str, DaskDataFrame] = {}
     if transcripts:
-        # let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting
-        with tempfile.TemporaryDirectory() as tmpdir:
-            logger.info("converting .csv to .parquet... ", end="")
-            assert transcripts_file is not None
-            transcripts_data = pd.read_csv(path / transcripts_file, header=0)
-            transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet")
-            logger.info("done")
-
-            ptable = pq.read_table(Path(tmpdir) / "transcripts.parquet")
-            for fov in fovs_counts:
-                aff = affine_transforms_to_global[fov]
-                sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
-                sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
-                # we rename z because we want to treat the data as 2d
-                sub_table.rename(columns={"z": "z_raw"}, inplace=True)
-                points[fov] = PointsModel.parse(
-                    sub_table,
-                    coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
-                    feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
-                    instance_key=CosmxKeys.INSTANCE_KEY,
-                    transformations={
-                        fov: Identity(),
-                        "global": aff,
-                        "global_only_labels": aff,
-                    },
-                )
+        assert transcripts_file is not None
+        ptable = pa.read_table(path / transcripts_file, header=0)
+        for fov in fovs_counts:
+            aff = affine_transforms_to_global[fov]
+            sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
+            sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
+            # we rename z because we want to treat the data as 2d
+            sub_table.rename(columns={"z": "z_raw"}, inplace=True)
+            points[fov] = PointsModel.parse(
+                sub_table,
+                coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
+                feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
+                instance_key=CosmxKeys.INSTANCE_KEY,
+                transformations={
+                    fov: Identity(),
+                    "global": aff,
+                    "global_only_labels": aff,
+                },
+            )
 
     # TODO: what to do with fov file?
     # if fov_file is not None:

From 31b1d2a4f4ea530009d808a0a633798485ee820c Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Mon, 27 Feb 2023 21:48:51 +0100
Subject: [PATCH 14/19] io fixes

---
 src/spatialdata_io/readers/cosmx.py  |  4 +++-
 src/spatialdata_io/readers/visium.py |  5 +++--
 src/spatialdata_io/readers/xenium.py | 31 ++++++++++++++--------------
 3 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py
index ee7aaf6b..c3b09015 100644
--- a/src/spatialdata_io/readers/cosmx.py
+++ b/src/spatialdata_io/readers/cosmx.py
@@ -237,7 +237,9 @@ def cosmx(
     points: dict[str, DaskDataFrame] = {}
     if transcripts:
         assert transcripts_file is not None
-        ptable = pa.read_table(path / transcripts_file, header=0)
+        from pyarrow.csv import read_csv
+
+        ptable = read_csv(path / transcripts_file)  # , header=0)
         for fov in fovs_counts:
             aff = affine_transforms_to_global[fov]
             sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py
index 6939a7df..270c89e0 100644
--- a/src/spatialdata_io/readers/visium.py
+++ b/src/spatialdata_io/readers/visium.py
@@ -96,7 +96,7 @@ def visium(
     adata.obs = pd.merge(adata.obs, coords, how="left", left_index=True, right_index=True)
     coords = adata.obs[[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y]].values
     adata.obs.drop(columns=[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y], inplace=True)
-    adata.obs["barcode"] = adata.obs.index.copy()
+    adata.obs["spot_id"] = np.arange(len(adata))
     adata.var_names_make_unique()
 
     scalefactors = json.loads((path / VisiumKeys.SCALEFACTORS_FILE).read_bytes())
@@ -105,10 +105,11 @@ def visium(
         coords,
         geometry=0,
         radius=np.repeat(scalefactors["spot_diameter_fullres"], len(adata)),
+        index=adata.obs["spot_id"].copy(),
         transformations={"global": Identity()},
     )
     shapes[dataset_id] = circles
-    table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="barcode")
+    table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="spot_id")
 
     transform_original = Identity()
     transform_lowres = Scale(
diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
index f63e0fa3..d9fdd4b0 100644
--- a/src/spatialdata_io/readers/xenium.py
+++ b/src/spatialdata_io/readers/xenium.py
@@ -4,7 +4,7 @@
 from collections.abc import Mapping
 from pathlib import Path
 from types import MappingProxyType
-from typing import Any
+from typing import Any, Optional
 
 import numpy as np
 import pandas as pd
@@ -34,7 +34,6 @@ def xenium(
     path: str | Path,
     n_jobs: int = 1,
     nucleus_boundaries: bool = True,
-    cell_boundaries: bool = True,
     transcripts: bool = True,
     morphology_mip: bool = True,
     morphology_focus: bool = True,
@@ -67,8 +66,6 @@ def xenium(
         Number of jobs to use for parallel processing.
     nucleus_boundaries
         Whether to read nucleus boundaries.
-    cell_boundaries
-        Whether to read cell boundaries.
     transcripts
         Whether to read transcripts.
     morphology_mip
@@ -99,7 +96,10 @@ def xenium(
     with open(path / XeniumKeys.XENIUM_SPECS) as f:
         specs = json.load(f)
 
+    specs["region"] = "cell_boundaries"
+    table = _get_tables(path, specs)
     polygons = {}
+
     if nucleus_boundaries:
         polygons["nucleus_boundaries"] = _get_polygons(
             path,
@@ -107,13 +107,11 @@ def xenium(
             specs,
             n_jobs,
         )
-    if cell_boundaries:
-        polygons["cell_boundaries"] = _get_polygons(
-            path,
-            XeniumKeys.CELL_BOUNDARIES_FILE,
-            specs,
-            n_jobs,
-        )
+
+    polygons["cell_boundaries"] = _get_polygons(
+        path, XeniumKeys.CELL_BOUNDARIES_FILE, specs, n_jobs, idx=table.obs[str(XeniumKeys.CELL_ID)].copy()
+    )
+
     points = {}
     if transcripts:
         points["transcripts"] = _get_points(path, specs)
@@ -136,11 +134,12 @@ def xenium(
             image_models_kwargs,
         )
 
-    table = _get_tables(path, specs)
     return SpatialData(images=images, shapes=polygons, points=points, table=table)
 
 
-def _get_polygons(path: Path, file: str, specs: dict[str, Any], n_jobs: int) -> GeoDataFrame:
+def _get_polygons(
+    path: Path, file: str, specs: dict[str, Any], n_jobs: int, idx: Optional[ArrayLike] = None
+) -> GeoDataFrame:
     def _poly(arr: ArrayLike) -> Polygon:
         return Polygon(arr[:-1])
 
@@ -152,6 +151,8 @@ def _poly(arr: ArrayLike) -> Polygon:
         for _, i in df.groupby(XeniumKeys.CELL_ID)[[XeniumKeys.BOUNDARIES_VERTEX_X, XeniumKeys.BOUNDARIES_VERTEX_Y]]
     )
     geo_df = GeoDataFrame({"geometry": out})
+    if idx is not None:
+        geo_df.index = idx
     scale = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
     return ShapesModel.parse(geo_df, transformations={"global": scale})
 
@@ -174,9 +175,9 @@ def _get_tables(path: Path, specs: dict[str, Any]) -> AnnData:
     adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE)
     metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE)
     np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values)
-    metadata[XeniumKeys.CELL_ID] = metadata[XeniumKeys.CELL_ID].astype(str)
+    metadata[XeniumKeys.CELL_ID] = np.arange(len(metadata[XeniumKeys.CELL_ID]))
     adata.obs = metadata
-    table = TableModel.parse(adata, region="/shapes/circles", instance_key=str(XeniumKeys.CELL_ID))
+    table = TableModel.parse(adata, region=specs["region"], instance_key=str(XeniumKeys.CELL_ID))
     return table
 
 

From 40ca0b8b4aab1137b1357d1bad9e8d7203d690ca Mon Sep 17 00:00:00 2001
From: Luca Marconato <2664412+LucaMarconato@users.noreply.github.com>
Date: Tue, 28 Feb 2023 16:52:49 +0100
Subject: [PATCH 15/19] tested all technologies with napari and added fixes

---
 src/spatialdata_io/_constants/_constants.py |  1 +
 src/spatialdata_io/readers/cosmx.py         | 70 ++++++++++++++-------
 src/spatialdata_io/readers/mcmicro.py       |  2 +-
 src/spatialdata_io/readers/steinbock.py     | 10 +--
 src/spatialdata_io/readers/visium.py        |  4 +-
 src/spatialdata_io/readers/xenium.py        | 23 +++++--
 6 files changed, 76 insertions(+), 34 deletions(-)

diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index c213b315..aee085d5 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -58,6 +58,7 @@ class XeniumKeys(ModeEnum):
     CELL_X = "x_centroid"
     CELL_Y = "y_centroid"
     CELL_AREA = "cell_area"
+    CELL_NUCLEUS_AREA = "nucleus_area"
 
     # morphology iamges
     MORPHOLOGY_MIP_FILE = "morphology_mip.ome.tif"
diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py
index c3b09015..22c7cfd3 100644
--- a/src/spatialdata_io/readers/cosmx.py
+++ b/src/spatialdata_io/readers/cosmx.py
@@ -236,27 +236,55 @@ def cosmx(
 
     points: dict[str, DaskDataFrame] = {}
     if transcripts:
-        assert transcripts_file is not None
-        from pyarrow.csv import read_csv
-
-        ptable = read_csv(path / transcripts_file)  # , header=0)
-        for fov in fovs_counts:
-            aff = affine_transforms_to_global[fov]
-            sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
-            sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
-            # we rename z because we want to treat the data as 2d
-            sub_table.rename(columns={"z": "z_raw"}, inplace=True)
-            points[fov] = PointsModel.parse(
-                sub_table,
-                coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
-                feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
-                instance_key=CosmxKeys.INSTANCE_KEY,
-                transformations={
-                    fov: Identity(),
-                    "global": aff,
-                    "global_only_labels": aff,
-                },
-            )
+        # assert transcripts_file is not None
+        # from pyarrow.csv import read_csv
+        #
+        # ptable = read_csv(path / transcripts_file)  # , header=0)
+        # for fov in fovs_counts:
+        #     aff = affine_transforms_to_global[fov]
+        #     sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
+        #     sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
+        #     # we rename z because we want to treat the data as 2d
+        #     sub_table.rename(columns={"z": "z_raw"}, inplace=True)
+        #     points[fov] = PointsModel.parse(
+        #         sub_table,
+        #         coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
+        #         feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
+        #         instance_key=CosmxKeys.INSTANCE_KEY,
+        #         transformations={
+        #             fov: Identity(),
+        #             "global": aff,
+        #             "global_only_labels": aff,
+        #         },
+        #     )
+        # let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting
+        import tempfile
+        import pyarrow.parquet as pq
+        with tempfile.TemporaryDirectory() as tmpdir:
+            print("converting .csv to .parquet to improve the speed of the slicing operations... ", end="")
+            assert transcripts_file is not None
+            transcripts_data = pd.read_csv(path / transcripts_file, header=0)
+            transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet")
+            print("done")
+
+            ptable = pq.read_table(Path(tmpdir) / "transcripts.parquet")
+            for fov in fovs_counts:
+                aff = affine_transforms_to_global[fov]
+                sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
+                sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype('category')
+                # we rename z because we want to treat the data as 2d
+                sub_table.rename(columns={'z': 'z_raw'}, inplace=True)
+                points[fov] = PointsModel.parse(
+                    sub_table,
+                    coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
+                    feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
+                    instance_key=CosmxKeys.INSTANCE_KEY,
+                    transformations={
+                        fov: Identity(),
+                        "global": aff,
+                        "global_only_labels": aff,
+                    },
+                )
 
     # TODO: what to do with fov file?
     # if fov_file is not None:
diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py
index 8a04a274..aa77661e 100644
--- a/src/spatialdata_io/readers/mcmicro.py
+++ b/src/spatialdata_io/readers/mcmicro.py
@@ -127,4 +127,4 @@ def _get_table(
         dtype=np.float_,
     )
 
-    return TableModel.parse(adata, region=sample, instance_key=McmicroKeys.INSTANCE_KEY.value)
+    return TableModel.parse(adata, region=f'/labels/{sample}', instance_key=McmicroKeys.INSTANCE_KEY.value)
diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py
index 5a2e63be..8858e578 100644
--- a/src/spatialdata_io/readers/steinbock.py
+++ b/src/spatialdata_io/readers/steinbock.py
@@ -11,6 +11,7 @@
 from multiscale_spatial_image.multiscale_spatial_image import MultiscaleSpatialImage
 from spatial_image import SpatialImage
 from spatialdata import Image2DModel, Labels2DModel, SpatialData, TableModel
+from spatialdata._core.transformations import Identity
 from spatialdata._logging import logger
 
 from spatialdata_io._constants._constants import SteinbockKeys
@@ -75,11 +76,12 @@ def steinbock(
         )
 
     adata = ad.read(path / SteinbockKeys.CELLS_FILE)
-    idx = adata.obs.index.str.split(" ").map(lambda x: x[1])
+    idx = adata.obs.index.str.split(" ").map(lambda x: int(x[1]))
     regions = adata.obs.image.str.replace(".tiff", "", regex=False)
+    regions = regions.apply(lambda x: f'/labels/{x}')
     adata.obs["cell_id"] = idx
     adata.obs["region"] = regions
-    if len(set(samples).difference(set(regions.unique()))):
+    if len(set([f'/labels/{s}' for s in samples]).difference(set(regions.unique()))):
         raise ValueError("Samples in table and images are inconsistent, please check.")
     table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id")
 
@@ -93,7 +95,7 @@ def _get_images(
     image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> Union[SpatialImage, MultiscaleSpatialImage]:
     image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs)
-    return Image2DModel.parse(image, **image_models_kwargs)
+    return Image2DModel.parse(image, transformations={sample: Identity()}, **image_models_kwargs)
 
 
 def _get_labels(
@@ -104,4 +106,4 @@ def _get_labels(
     image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> Union[SpatialImage, MultiscaleSpatialImage]:
     image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze()
-    return Labels2DModel.parse(image, **image_models_kwargs)
+    return Labels2DModel.parse(image, transformations={sample: Identity()}, **image_models_kwargs)
diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py
index 270c89e0..c9d9aa61 100644
--- a/src/spatialdata_io/readers/visium.py
+++ b/src/spatialdata_io/readers/visium.py
@@ -104,7 +104,7 @@ def visium(
     circles = ShapesModel.parse(
         coords,
         geometry=0,
-        radius=np.repeat(scalefactors["spot_diameter_fullres"], len(adata)),
+        radius=scalefactors["spot_diameter_fullres"] / 2.,
         index=adata.obs["spot_id"].copy(),
         transformations={"global": Identity()},
     )
@@ -147,4 +147,4 @@ def visium(
         dataset_id + "_lowres_image": image_lowres_parsed,
     }
 
-    return SpatialData(table=table, shapes=shapes, images=images)
+    return SpatialData(images=images, shapes=shapes, table=table)
diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
index d9fdd4b0..68a43360 100644
--- a/src/spatialdata_io/readers/xenium.py
+++ b/src/spatialdata_io/readers/xenium.py
@@ -96,8 +96,8 @@ def xenium(
     with open(path / XeniumKeys.XENIUM_SPECS) as f:
         specs = json.load(f)
 
-    specs["region"] = "cell_boundaries"
-    table = _get_tables(path, specs)
+    specs["region"] = "/shapes/nucleus_circles"
+    table, circles = _get_tables_and_circles(path, specs)
     polygons = {}
 
     if nucleus_boundaries:
@@ -134,7 +134,7 @@ def xenium(
             image_models_kwargs,
         )
 
-    return SpatialData(images=images, shapes=polygons, points=points, table=table)
+    return SpatialData(images=images, shapes=polygons | {specs['region']: circles}, points=points, table=table)
 
 
 def _get_polygons(
@@ -171,14 +171,25 @@ def _get_points(path: Path, specs: dict[str, Any]) -> Table:
     return points
 
 
-def _get_tables(path: Path, specs: dict[str, Any]) -> AnnData:
+def _get_tables_and_circles(path: Path, specs: dict[str, Any]) -> tuple[AnnData, AnnData]:
     adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE)
     metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE)
     np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values)
+    circ = metadata[[XeniumKeys.CELL_X, XeniumKeys.CELL_Y]].to_numpy()
+    metadata.drop([XeniumKeys.CELL_X, XeniumKeys.CELL_Y], axis=1, inplace=True)
     metadata[XeniumKeys.CELL_ID] = np.arange(len(metadata[XeniumKeys.CELL_ID]))
     adata.obs = metadata
-    table = TableModel.parse(adata, region=specs["region"], instance_key=str(XeniumKeys.CELL_ID))
-    return table
+    transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
+    radii = np.sqrt(adata.obs[XeniumKeys.CELL_NUCLEUS_AREA].to_numpy() / np.pi)
+    circles = ShapesModel.parse(
+        circ,
+        geometry=0,
+        radius=radii,
+        transformations={"global": transform},
+        index=adata.obs[XeniumKeys.CELL_ID].copy(),
+    )
+    table = TableModel.parse(adata, region=f'/shapes/{specs["region"]}', instance_key=str(XeniumKeys.CELL_ID))
+    return table, circles
 
 
 def _get_images(

From 0560498080c325de71691035d9ed440dac4db329 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 28 Feb 2023 15:53:40 +0000
Subject: [PATCH 16/19] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata_io/readers/cosmx.py     | 6 ++++--
 src/spatialdata_io/readers/mcmicro.py   | 2 +-
 src/spatialdata_io/readers/steinbock.py | 4 ++--
 src/spatialdata_io/readers/visium.py    | 2 +-
 src/spatialdata_io/readers/xenium.py    | 2 +-
 5 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py
index 22c7cfd3..b54dbbe0 100644
--- a/src/spatialdata_io/readers/cosmx.py
+++ b/src/spatialdata_io/readers/cosmx.py
@@ -259,7 +259,9 @@ def cosmx(
         #     )
         # let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting
         import tempfile
+
         import pyarrow.parquet as pq
+
         with tempfile.TemporaryDirectory() as tmpdir:
             print("converting .csv to .parquet to improve the speed of the slicing operations... ", end="")
             assert transcripts_file is not None
@@ -271,9 +273,9 @@ def cosmx(
             for fov in fovs_counts:
                 aff = affine_transforms_to_global[fov]
                 sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
-                sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype('category')
+                sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
                 # we rename z because we want to treat the data as 2d
-                sub_table.rename(columns={'z': 'z_raw'}, inplace=True)
+                sub_table.rename(columns={"z": "z_raw"}, inplace=True)
                 points[fov] = PointsModel.parse(
                     sub_table,
                     coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py
index aa77661e..ff081fd7 100644
--- a/src/spatialdata_io/readers/mcmicro.py
+++ b/src/spatialdata_io/readers/mcmicro.py
@@ -127,4 +127,4 @@ def _get_table(
         dtype=np.float_,
     )
 
-    return TableModel.parse(adata, region=f'/labels/{sample}', instance_key=McmicroKeys.INSTANCE_KEY.value)
+    return TableModel.parse(adata, region=f"/labels/{sample}", instance_key=McmicroKeys.INSTANCE_KEY.value)
diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py
index 8858e578..df24f1f4 100644
--- a/src/spatialdata_io/readers/steinbock.py
+++ b/src/spatialdata_io/readers/steinbock.py
@@ -78,10 +78,10 @@ def steinbock(
     adata = ad.read(path / SteinbockKeys.CELLS_FILE)
     idx = adata.obs.index.str.split(" ").map(lambda x: int(x[1]))
     regions = adata.obs.image.str.replace(".tiff", "", regex=False)
-    regions = regions.apply(lambda x: f'/labels/{x}')
+    regions = regions.apply(lambda x: f"/labels/{x}")
     adata.obs["cell_id"] = idx
     adata.obs["region"] = regions
-    if len(set([f'/labels/{s}' for s in samples]).difference(set(regions.unique()))):
+    if len({f"/labels/{s}" for s in samples}.difference(set(regions.unique()))):
         raise ValueError("Samples in table and images are inconsistent, please check.")
     table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id")
 
diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py
index c9d9aa61..67dde757 100644
--- a/src/spatialdata_io/readers/visium.py
+++ b/src/spatialdata_io/readers/visium.py
@@ -104,7 +104,7 @@ def visium(
     circles = ShapesModel.parse(
         coords,
         geometry=0,
-        radius=scalefactors["spot_diameter_fullres"] / 2.,
+        radius=scalefactors["spot_diameter_fullres"] / 2.0,
         index=adata.obs["spot_id"].copy(),
         transformations={"global": Identity()},
     )
diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
index 68a43360..4a6d0d21 100644
--- a/src/spatialdata_io/readers/xenium.py
+++ b/src/spatialdata_io/readers/xenium.py
@@ -134,7 +134,7 @@ def xenium(
             image_models_kwargs,
         )
 
-    return SpatialData(images=images, shapes=polygons | {specs['region']: circles}, points=points, table=table)
+    return SpatialData(images=images, shapes=polygons | {specs["region"]: circles}, points=points, table=table)
 
 
 def _get_polygons(

From b05fd8de23196c6dfc43904218ffd8ce57371aac Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Fri, 3 Mar 2023 14:25:10 +0100
Subject: [PATCH 17/19] minor fixes

---
 src/spatialdata_io/readers/xenium.py | 43 +++++++++++++++++-----------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
index 4a6d0d21..a7640a7a 100644
--- a/src/spatialdata_io/readers/xenium.py
+++ b/src/spatialdata_io/readers/xenium.py
@@ -33,6 +33,7 @@
 def xenium(
     path: str | Path,
     n_jobs: int = 1,
+    cells_as_shapes: bool = False,
     nucleus_boundaries: bool = True,
     transcripts: bool = True,
     morphology_mip: bool = True,
@@ -64,6 +65,8 @@ def xenium(
         Path to the dataset.
     n_jobs
         Number of jobs to use for parallel processing.
+    cells_as_shapes
+        Whether to read cells also as shapes. Useful for visualization.
     nucleus_boundaries
         Whether to read nucleus boundaries.
     transcripts
@@ -96,8 +99,8 @@ def xenium(
     with open(path / XeniumKeys.XENIUM_SPECS) as f:
         specs = json.load(f)
 
-    specs["region"] = "/shapes/nucleus_circles"
-    table, circles = _get_tables_and_circles(path, specs)
+    specs["region"] = "cell_circles" if cells_as_shapes else "cell_boundaries"
+    table, circles = _get_tables_and_circles(path, cells_as_shapes, specs)
     polygons = {}
 
     if nucleus_boundaries:
@@ -133,8 +136,9 @@ def xenium(
             imread_kwargs,
             image_models_kwargs,
         )
-
-    return SpatialData(images=images, shapes=polygons | {specs["region"]: circles}, points=points, table=table)
+    if cells_as_shapes:
+        return SpatialData(images=images, shapes=polygons | {specs["region"]: circles}, points=points, table=table)
+    return SpatialData(images=images, shapes=polygons, points=points, table=table)
 
 
 def _get_polygons(
@@ -159,6 +163,7 @@ def _poly(arr: ArrayLike) -> Polygon:
 
 def _get_points(path: Path, specs: dict[str, Any]) -> Table:
     table = read_parquet(path / XeniumKeys.TRANSCRIPTS_FILE)
+    table[XeniumKeys.CELL_ID.value] = pd.Categorical(table[XeniumKeys.CELL_ID.value])
 
     transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
     points = PointsModel.parse(
@@ -171,25 +176,29 @@ def _get_points(path: Path, specs: dict[str, Any]) -> Table:
     return points
 
 
-def _get_tables_and_circles(path: Path, specs: dict[str, Any]) -> tuple[AnnData, AnnData]:
+def _get_tables_and_circles(
+    path: Path, cells_as_shapes: bool, specs: dict[str, Any]
+) -> AnnData | tuple[AnnData, AnnData]:
     adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE)
     metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE)
     np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values)
     circ = metadata[[XeniumKeys.CELL_X, XeniumKeys.CELL_Y]].to_numpy()
     metadata.drop([XeniumKeys.CELL_X, XeniumKeys.CELL_Y], axis=1, inplace=True)
-    metadata[XeniumKeys.CELL_ID] = np.arange(len(metadata[XeniumKeys.CELL_ID]))
     adata.obs = metadata
-    transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
-    radii = np.sqrt(adata.obs[XeniumKeys.CELL_NUCLEUS_AREA].to_numpy() / np.pi)
-    circles = ShapesModel.parse(
-        circ,
-        geometry=0,
-        radius=radii,
-        transformations={"global": transform},
-        index=adata.obs[XeniumKeys.CELL_ID].copy(),
-    )
-    table = TableModel.parse(adata, region=f'/shapes/{specs["region"]}', instance_key=str(XeniumKeys.CELL_ID))
-    return table, circles
+    adata.obs["region"] = specs["region"]
+    table = TableModel.parse(adata, region=specs["region"], region_key="region", instance_key=str(XeniumKeys.CELL_ID))
+    if cells_as_shapes:
+        transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
+        radii = np.sqrt(adata.obs[XeniumKeys.CELL_NUCLEUS_AREA].to_numpy() / np.pi)
+        circles = ShapesModel.parse(
+            circ,
+            geometry=0,
+            radius=radii,
+            transformations={"global": transform},
+            index=adata.obs[XeniumKeys.CELL_ID].copy(),
+        )
+        return table, circles
+    return table
 
 
 def _get_images(

From f66f522783c41486f421c2cb897788740309eb4b Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Fri, 3 Mar 2023 18:43:22 +0100
Subject: [PATCH 18/19] fixes

---
 src/spatialdata_io/readers/cosmx.py  |  8 ++++----
 src/spatialdata_io/readers/visium.py | 30 +++++++++++++++++-----------
 src/spatialdata_io/readers/xenium.py |  1 -
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py
index b54dbbe0..3a7d6ca3 100644
--- a/src/spatialdata_io/readers/cosmx.py
+++ b/src/spatialdata_io/readers/cosmx.py
@@ -120,7 +120,7 @@ def cosmx(
 
     obs = pd.read_csv(path / meta_file, header=0, index_col=CosmxKeys.INSTANCE_KEY)
     obs[CosmxKeys.FOV] = pd.Categorical(obs[CosmxKeys.FOV].astype(str))
-    obs[CosmxKeys.REGION_KEY] = pd.Categorical(obs[CosmxKeys.FOV].astype(str).apply(lambda s: "/labels/" + s))
+    obs[CosmxKeys.REGION_KEY] = pd.Categorical(obs[CosmxKeys.FOV].astype(str).apply(lambda s: s + "_labels"))
     obs[CosmxKeys.INSTANCE_KEY] = obs.index.astype(np.int64)
     obs.rename_axis(None, inplace=True)
     obs.index = obs.index.astype(str).str.cat(obs[CosmxKeys.FOV].values, sep="_")
@@ -207,7 +207,7 @@ def cosmx(
                     dims=("y", "x", "c"),
                     **image_models_kwargs,
                 )
-                images[fov] = parsed_im
+                images[f"{fov}_image"] = parsed_im
             else:
                 logger.warning(f"FOV {fov} not found in counts file. Skipping image {fname}.")
 
@@ -230,7 +230,7 @@ def cosmx(
                     dims=("y", "x"),
                     **image_models_kwargs,
                 )
-                labels[fov] = parsed_la
+                labels[f"{fov}_labels"] = parsed_la
             else:
                 logger.warning(f"FOV {fov} not found in counts file. Skipping labels {fname}.")
 
@@ -276,7 +276,7 @@ def cosmx(
                 sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
                 # we rename z because we want to treat the data as 2d
                 sub_table.rename(columns={"z": "z_raw"}, inplace=True)
-                points[fov] = PointsModel.parse(
+                points[f"{fov}_points"] = PointsModel.parse(
                     sub_table,
                     coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
                     feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py
index 67dde757..d7ffa578 100644
--- a/src/spatialdata_io/readers/visium.py
+++ b/src/spatialdata_io/readers/visium.py
@@ -100,16 +100,6 @@ def visium(
     adata.var_names_make_unique()
 
     scalefactors = json.loads((path / VisiumKeys.SCALEFACTORS_FILE).read_bytes())
-    shapes = {}
-    circles = ShapesModel.parse(
-        coords,
-        geometry=0,
-        radius=scalefactors["spot_diameter_fullres"] / 2.0,
-        index=adata.obs["spot_id"].copy(),
-        transformations={"global": Identity()},
-    )
-    shapes[dataset_id] = circles
-    table = TableModel.parse(adata, region=f"/shapes/{dataset_id}", region_key=None, instance_key="spot_id")
 
     transform_original = Identity()
     transform_lowres = Scale(
@@ -121,6 +111,22 @@ def visium(
         axes=("y", "x"),
     )
 
+    shapes = {}
+    circles = ShapesModel.parse(
+        coords,
+        geometry=0,
+        radius=scalefactors["spot_diameter_fullres"] / 2.0,
+        index=adata.obs["spot_id"].copy(),
+        transformations={
+            "global": Identity(),
+            "downscaled_hires": transform_hires,
+            "downscaled_lowres": transform_lowres,
+        },
+    )
+    shapes[dataset_id] = circles
+    adata.obs["region"] = dataset_id
+    table = TableModel.parse(adata, region=dataset_id, region_key="region", instance_key="spot_id")
+
     full_image = (
         imread(path / f"{dataset_id}{VisiumKeys.IMAGE_TIF_SUFFIX}", **imread_kwargs).squeeze().transpose(2, 0, 1)
     )
@@ -138,8 +144,8 @@ def visium(
         transformations={"global": transform_original},
         **image_models_kwargs,
     )
-    image_hires_parsed = Image2DModel.parse(image_hires, transformations={"downscaled": transform_hires})
-    image_lowres_parsed = Image2DModel.parse(image_lowres, transformations={"downscaled": transform_lowres})
+    image_hires_parsed = Image2DModel.parse(image_hires, transformations={"downscaled_hires": transform_hires})
+    image_lowres_parsed = Image2DModel.parse(image_lowres, transformations={"downscaled_lowres": transform_lowres})
 
     images = {
         dataset_id + "_full_image": full_image_parsed,
diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
index a7640a7a..30f8d8e9 100644
--- a/src/spatialdata_io/readers/xenium.py
+++ b/src/spatialdata_io/readers/xenium.py
@@ -163,7 +163,6 @@ def _poly(arr: ArrayLike) -> Polygon:
 
 def _get_points(path: Path, specs: dict[str, Any]) -> Table:
     table = read_parquet(path / XeniumKeys.TRANSCRIPTS_FILE)
-    table[XeniumKeys.CELL_ID.value] = pd.Categorical(table[XeniumKeys.CELL_ID.value])
 
     transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
     points = PointsModel.parse(

From 3907d6839dacb669aab54bfc6e85d42c72bdc5a9 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Sat, 4 Mar 2023 19:46:16 +0100
Subject: [PATCH 19/19] add spatial key in obsm

---
 src/spatialdata_io/readers/mcmicro.py   |  9 ++++++---
 src/spatialdata_io/readers/steinbock.py | 13 +++++++------
 src/spatialdata_io/readers/visium.py    |  1 +
 src/spatialdata_io/readers/xenium.py    |  1 +
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/spatialdata_io/readers/mcmicro.py b/src/spatialdata_io/readers/mcmicro.py
index ff081fd7..98f7647b 100644
--- a/src/spatialdata_io/readers/mcmicro.py
+++ b/src/spatialdata_io/readers/mcmicro.py
@@ -56,14 +56,14 @@ def mcmicro(
         raise ValueError("Dataset id is not consistent with sample name.")
 
     images = {}
-    images[dataset_id] = _get_images(
+    images[f"{dataset_id}_image"] = _get_images(
         path,
         dataset_id,
         imread_kwargs,
         image_models_kwargs,
     )
     labels = {}
-    labels[dataset_id] = _get_labels(
+    labels[f"{dataset_id}_cells"] = _get_labels(
         path,
         dataset_id,
         "cell",
@@ -126,5 +126,8 @@ def _get_table(
         obsm={"spatial": table[coords].to_numpy()},
         dtype=np.float_,
     )
+    adata.obs["region"] = f"{sample}_cells"
 
-    return TableModel.parse(adata, region=f"/labels/{sample}", instance_key=McmicroKeys.INSTANCE_KEY.value)
+    return TableModel.parse(
+        adata, region=f"{sample}_cells", region_key="region", instance_key=McmicroKeys.INSTANCE_KEY.value
+    )
diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py
index df24f1f4..70074736 100644
--- a/src/spatialdata_io/readers/steinbock.py
+++ b/src/spatialdata_io/readers/steinbock.py
@@ -61,13 +61,13 @@ def steinbock(
             "They will be ignored."
         )
     for sample in samples:
-        images[sample] = _get_images(
+        images[f"{sample}_image"] = _get_images(
             path,
             sample,
             imread_kwargs,
             image_models_kwargs,
         )
-        labels[sample] = _get_labels(
+        labels[f"{sample}_labels"] = _get_labels(
             path,
             sample,
             labels_kind,
@@ -78,10 +78,11 @@ def steinbock(
     adata = ad.read(path / SteinbockKeys.CELLS_FILE)
     idx = adata.obs.index.str.split(" ").map(lambda x: int(x[1]))
     regions = adata.obs.image.str.replace(".tiff", "", regex=False)
-    regions = regions.apply(lambda x: f"/labels/{x}")
+    regions = regions.apply(lambda x: f"{x}_labels")
     adata.obs["cell_id"] = idx
     adata.obs["region"] = regions
-    if len({f"/labels/{s}" for s in samples}.difference(set(regions.unique()))):
+    adata.obsm["spatial"] = adata.obs[["centroid-0", "centroid-1"]].to_numpy()
+    if len({f"{s}_labels" for s in samples}.difference(set(regions.unique()))):
         raise ValueError("Samples in table and images are inconsistent, please check.")
     table = TableModel.parse(adata, region=regions.unique().tolist(), region_key="region", instance_key="cell_id")
 
@@ -95,7 +96,7 @@ def _get_images(
     image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> Union[SpatialImage, MultiscaleSpatialImage]:
     image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs)
-    return Image2DModel.parse(image, transformations={sample: Identity()}, **image_models_kwargs)
+    return Image2DModel.parse(data=image, transformations={sample: Identity()}, **image_models_kwargs)
 
 
 def _get_labels(
@@ -106,4 +107,4 @@ def _get_labels(
     image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> Union[SpatialImage, MultiscaleSpatialImage]:
     image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze()
-    return Labels2DModel.parse(image, transformations={sample: Identity()}, **image_models_kwargs)
+    return Labels2DModel.parse(data=image, transformations={sample: Identity()}, **image_models_kwargs)
diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py
index d7ffa578..64aac258 100644
--- a/src/spatialdata_io/readers/visium.py
+++ b/src/spatialdata_io/readers/visium.py
@@ -95,6 +95,7 @@ def visium(
 
     adata.obs = pd.merge(adata.obs, coords, how="left", left_index=True, right_index=True)
     coords = adata.obs[[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y]].values
+    adata.obsm["spatial"] = coords
     adata.obs.drop(columns=[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y], inplace=True)
     adata.obs["spot_id"] = np.arange(len(adata))
     adata.var_names_make_unique()
diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
index 30f8d8e9..18dc70dd 100644
--- a/src/spatialdata_io/readers/xenium.py
+++ b/src/spatialdata_io/readers/xenium.py
@@ -182,6 +182,7 @@ def _get_tables_and_circles(
     metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE)
     np.testing.assert_array_equal(metadata.cell_id.astype(str).values, adata.obs_names.values)
     circ = metadata[[XeniumKeys.CELL_X, XeniumKeys.CELL_Y]].to_numpy()
+    adata.obsm["spatial"] = circ
     metadata.drop([XeniumKeys.CELL_X, XeniumKeys.CELL_Y], axis=1, inplace=True)
     adata.obs = metadata
     adata.obs["region"] = specs["region"]