Skip to content

Commit 2e9e27e

Browse files
giovpLucaMarconatopre-commit-ci[bot]
authored
various fixes and additions for IO (scverse#19)
Co-authored-by: Luca Marconato <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 2ab16ca commit 2e9e27e

File tree

9 files changed

+406
-106
lines changed

9 files changed

+406
-106
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ default_stages:
77
minimum_pre_commit_version: 2.16.0
88
repos:
99
- repo: https://github.com/psf/black
10-
rev: 22.12.0
10+
rev: 23.1.0
1111
hooks:
1212
- id: black
1313
- repo: https://github.com/pre-commit/mirrors-prettier
@@ -19,11 +19,11 @@ repos:
1919
hooks:
2020
- id: blacken-docs
2121
- repo: https://github.com/PyCQA/isort
22-
rev: 5.11.4
22+
rev: 5.12.0
2323
hooks:
2424
- id: isort
2525
- repo: https://github.com/pre-commit/mirrors-mypy
26-
rev: v0.991
26+
rev: v1.0.1
2727
hooks:
2828
- id: mypy
2929
additional_dependencies: [numpy==1.24.0]
@@ -50,7 +50,7 @@ repos:
5050
- id: trailing-whitespace
5151
- id: check-case-conflict
5252
- repo: https://github.com/PyCQA/autoflake
53-
rev: v2.0.0
53+
rev: v2.0.1
5454
hooks:
5555
- id: autoflake
5656
args:

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ dependencies = [
2626
"joblib",
2727
"imagecodecs",
2828
"dask-image",
29+
"pyarrow",
2930
]
3031

3132
[project.optional-dependencies]

src/spatialdata_io/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from importlib.metadata import version
22

33
from spatialdata_io.readers.cosmx import cosmx
4+
from spatialdata_io.readers.mcmicro import mcmicro
5+
from spatialdata_io.readers.steinbock import steinbock
46
from spatialdata_io.readers.visium import visium
57
from spatialdata_io.readers.xenium import xenium
68

79
__all__ = [
810
"visium",
911
"xenium",
1012
"cosmx",
13+
"mcmicro",
14+
"steinbock",
1115
]
1216

1317
__version__ = version("spatialdata-io")

src/spatialdata_io/_constants/_constants.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ class XeniumKeys(ModeEnum):
5757
CELL_METADATA_FILE = "cells.parquet"
5858
CELL_X = "x_centroid"
5959
CELL_Y = "y_centroid"
60-
CELL_AREA = 'cell_area'
60+
CELL_AREA = "cell_area"
61+
CELL_NUCLEUS_AREA = "nucleus_area"
6162

6263
# morphology iamges
6364
MORPHOLOGY_MIP_FILE = "morphology_mip.ome.tif"
@@ -85,3 +86,37 @@ class VisiumKeys(ModeEnum):
8586
SPOTS_FILE = "spatial/tissue_positions.csv"
8687
SPOTS_X = "pxl_row_in_fullres"
8788
SPOTS_Y = "pxl_col_in_fullres"
89+
90+
91+
@unique
92+
class SteinbockKeys(ModeEnum):
93+
"""Keys for *Steinbock* formatted dataset."""
94+
95+
# files and directories
96+
CELLS_FILE = "cells.h5ad"
97+
DEEPCELL_MASKS_DIR = "masks_deepcell"
98+
ILASTIK_MASKS_DIR = "masks_ilastik"
99+
IMAGES_DIR = "ome"
100+
101+
# suffixes for images and labels
102+
IMAGE_SUFFIX = ".ome.tiff"
103+
LABEL_SUFFIX = ".tiff"
104+
105+
106+
@unique
107+
class McmicroKeys(ModeEnum):
108+
"""Keys for *Mcmicro* formatted dataset."""
109+
110+
# files and directories
111+
CELL_FEATURES_SUFFIX = "--unmicst_cell.csv"
112+
QUANTIFICATION_DIR = "quantification"
113+
MARKERS_FILE = "markers.csv"
114+
IMAGES_DIR = "registration"
115+
IMAGE_SUFFIX = ".ome.tif"
116+
LABELS_DIR = "segmentation"
117+
LABELS_PREFIX = "unmicst-"
118+
119+
# metadata
120+
COORDS_X = "X_centroid"
121+
COORDS_Y = "Y_centroid"
122+
INSTANCE_KEY = "CellID"

src/spatialdata_io/readers/cosmx.py

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,29 @@
22

33
import os
44
import re
5-
import tempfile
65
from collections.abc import Mapping
76
from pathlib import Path
87
from types import MappingProxyType
98
from typing import Any, Optional
10-
import pyarrow as pa
11-
import pyarrow.parquet as pq
129

10+
import dask.array as da
1311
import numpy as np
1412
import pandas as pd
13+
import pyarrow as pa
1514
from anndata import AnnData
16-
from dask_image.imread import imread
17-
import dask.array as da
1815
from dask.dataframe.core import DataFrame as DaskDataFrame
16+
from dask_image.imread import imread
1917
from scipy.sparse import csr_matrix
2018

2119
# from spatialdata._core.core_utils import xy_cs
2220
from skimage.transform import estimate_transform
2321
from spatialdata import SpatialData
24-
from spatialdata._core.models import Image2DModel, Labels2DModel, TableModel, PointsModel
22+
from spatialdata._core.models import (
23+
Image2DModel,
24+
Labels2DModel,
25+
PointsModel,
26+
TableModel,
27+
)
2528

2629
# from spatialdata._core.ngff.ngff_coordinate_system import NgffAxis # , CoordinateSystem
2730
from spatialdata._core.transformations import Affine, Identity
@@ -41,7 +44,6 @@
4144
def cosmx(
4245
path: str | Path,
4346
dataset_id: Optional[str] = None,
44-
# shape_size: float | int = 1,
4547
transcripts: bool = True,
4648
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
4749
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -67,8 +69,8 @@ def cosmx(
6769
Path to the root directory containing *Nanostring* files.
6870
dataset_id
6971
Name of the dataset.
70-
shape_size
71-
Size of the shape to be used for the centroids of the labels.
72+
transcripts
73+
Whether to also read in transcripts information.
7274
imread_kwargs
7375
Keyword arguments passed to :func:`dask_image.imread.imread`.
7476
image_models_kwargs
@@ -118,7 +120,7 @@ def cosmx(
118120

119121
obs = pd.read_csv(path / meta_file, header=0, index_col=CosmxKeys.INSTANCE_KEY)
120122
obs[CosmxKeys.FOV] = pd.Categorical(obs[CosmxKeys.FOV].astype(str))
121-
obs[CosmxKeys.REGION_KEY] = pd.Categorical(obs[CosmxKeys.FOV].astype(str).apply(lambda s: "/labels/" + s))
123+
obs[CosmxKeys.REGION_KEY] = pd.Categorical(obs[CosmxKeys.FOV].astype(str).apply(lambda s: s + "_labels"))
122124
obs[CosmxKeys.INSTANCE_KEY] = obs.index.astype(np.int64)
123125
obs.rename_axis(None, inplace=True)
124126
obs.index = obs.index.astype(str).str.cat(obs[CosmxKeys.FOV].values, sep="_")
@@ -141,12 +143,6 @@ def cosmx(
141143

142144
fovs_counts = list(map(str, adata.obs.fov.astype(int).unique()))
143145

144-
# TODO(giovp): uncomment once transform is ready
145-
# input_cs = CoordinateSystem("cxy", axes=[c_axis, y_axis, x_axis])
146-
# input_cs_labels = CoordinateSystem("cxy", axes=[y_axis, x_axis])
147-
# output_cs = CoordinateSystem("global", axes=[c_axis, y_axis, x_axis])
148-
# output_cs_labels = CoordinateSystem("global", axes=[y_axis, x_axis])
149-
150146
affine_transforms_to_global = {}
151147

152148
for fov in fovs_counts:
@@ -163,7 +159,10 @@ def cosmx(
163159

164160
table.obsm["global"] = table.obs[[CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL]].to_numpy()
165161
table.obsm["spatial"] = table.obs[[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL]].to_numpy()
166-
table.obs.drop(columns=[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL, CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL], inplace=True)
162+
table.obs.drop(
163+
columns=[CosmxKeys.X_LOCAL_CELL, CosmxKeys.Y_LOCAL_CELL, CosmxKeys.X_GLOBAL_CELL, CosmxKeys.Y_GLOBAL_CELL],
164+
inplace=True,
165+
)
167166

168167
# prepare to read images and labels
169168
file_extensions = (".jpg", ".png", ".jpeg", ".tif", ".tiff")
@@ -200,7 +199,6 @@ def cosmx(
200199
flipped_im = da.flip(im, axis=0)
201200
parsed_im = Image2DModel.parse(
202201
flipped_im,
203-
name=fov,
204202
transformations={
205203
fov: Identity(),
206204
"global": aff,
@@ -209,7 +207,7 @@ def cosmx(
209207
dims=("y", "x", "c"),
210208
**image_models_kwargs,
211209
)
212-
images[fov] = parsed_im
210+
images[f"{fov}_image"] = parsed_im
213211
else:
214212
logger.warning(f"FOV {fov} not found in counts file. Skipping image {fname}.")
215213

@@ -224,7 +222,6 @@ def cosmx(
224222
flipped_la = da.flip(la, axis=0)
225223
parsed_la = Labels2DModel.parse(
226224
flipped_la,
227-
name=fov,
228225
transformations={
229226
fov: Identity(),
230227
"global": aff,
@@ -233,15 +230,40 @@ def cosmx(
233230
dims=("y", "x"),
234231
**image_models_kwargs,
235232
)
236-
labels[fov] = parsed_la
233+
labels[f"{fov}_labels"] = parsed_la
237234
else:
238235
logger.warning(f"FOV {fov} not found in counts file. Skipping labels {fname}.")
239236

240237
points: dict[str, DaskDataFrame] = {}
241238
if transcripts:
239+
# assert transcripts_file is not None
240+
# from pyarrow.csv import read_csv
241+
#
242+
# ptable = read_csv(path / transcripts_file) # , header=0)
243+
# for fov in fovs_counts:
244+
# aff = affine_transforms_to_global[fov]
245+
# sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
246+
# sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
247+
# # we rename z because we want to treat the data as 2d
248+
# sub_table.rename(columns={"z": "z_raw"}, inplace=True)
249+
# points[fov] = PointsModel.parse(
250+
# sub_table,
251+
# coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
252+
# feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
253+
# instance_key=CosmxKeys.INSTANCE_KEY,
254+
# transformations={
255+
# fov: Identity(),
256+
# "global": aff,
257+
# "global_only_labels": aff,
258+
# },
259+
# )
242260
# let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting
261+
import tempfile
262+
263+
import pyarrow.parquet as pq
264+
243265
with tempfile.TemporaryDirectory() as tmpdir:
244-
print("converting .csv to .parquet... ", end="")
266+
print("converting .csv to .parquet to improve the speed of the slicing operations... ", end="")
245267
assert transcripts_file is not None
246268
transcripts_data = pd.read_csv(path / transcripts_file, header=0)
247269
transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet")
@@ -251,10 +273,10 @@ def cosmx(
251273
for fov in fovs_counts:
252274
aff = affine_transforms_to_global[fov]
253275
sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
254-
sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype('category')
276+
sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
255277
# we rename z because we want to treat the data as 2d
256-
sub_table.rename(columns={'z': 'z_raw'}, inplace=True)
257-
points[fov] = PointsModel.parse(
278+
sub_table.rename(columns={"z": "z_raw"}, inplace=True)
279+
points[f"{fov}_points"] = PointsModel.parse(
258280
sub_table,
259281
coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
260282
feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
@@ -266,7 +288,6 @@ def cosmx(
266288
},
267289
)
268290

269-
270291
# TODO: what to do with fov file?
271292
# if fov_file is not None:
272293
# fov_positions = pd.read_csv(path / fov_file, header=0, index_col=CosmxKeys.FOV)

0 commit comments

Comments
 (0)