2
2
3
3
import os
4
4
import re
5
- import tempfile
6
5
from collections .abc import Mapping
7
6
from pathlib import Path
8
7
from types import MappingProxyType
9
8
from typing import Any , Optional
10
- import pyarrow as pa
11
- import pyarrow .parquet as pq
12
9
10
+ import dask .array as da
13
11
import numpy as np
14
12
import pandas as pd
13
+ import pyarrow as pa
15
14
from anndata import AnnData
16
- from dask_image .imread import imread
17
- import dask .array as da
18
15
from dask .dataframe .core import DataFrame as DaskDataFrame
16
+ from dask_image .imread import imread
19
17
from scipy .sparse import csr_matrix
20
18
21
19
# from spatialdata._core.core_utils import xy_cs
22
20
from skimage .transform import estimate_transform
23
21
from spatialdata import SpatialData
24
- from spatialdata ._core .models import Image2DModel , Labels2DModel , TableModel , PointsModel
22
+ from spatialdata ._core .models import (
23
+ Image2DModel ,
24
+ Labels2DModel ,
25
+ PointsModel ,
26
+ TableModel ,
27
+ )
25
28
26
29
# from spatialdata._core.ngff.ngff_coordinate_system import NgffAxis # , CoordinateSystem
27
30
from spatialdata ._core .transformations import Affine , Identity
41
44
def cosmx (
42
45
path : str | Path ,
43
46
dataset_id : Optional [str ] = None ,
44
- # shape_size: float | int = 1,
45
47
transcripts : bool = True ,
46
48
imread_kwargs : Mapping [str , Any ] = MappingProxyType ({}),
47
49
image_models_kwargs : Mapping [str , Any ] = MappingProxyType ({}),
@@ -67,8 +69,8 @@ def cosmx(
67
69
Path to the root directory containing *Nanostring* files.
68
70
dataset_id
69
71
Name of the dataset.
70
- shape_size
71
- Size of the shape to be used for the centroids of the labels .
72
+ transcripts
73
+ Whether to also read in transcripts information .
72
74
imread_kwargs
73
75
Keyword arguments passed to :func:`dask_image.imread.imread`.
74
76
image_models_kwargs
@@ -118,7 +120,7 @@ def cosmx(
118
120
119
121
obs = pd .read_csv (path / meta_file , header = 0 , index_col = CosmxKeys .INSTANCE_KEY )
120
122
obs [CosmxKeys .FOV ] = pd .Categorical (obs [CosmxKeys .FOV ].astype (str ))
121
- obs [CosmxKeys .REGION_KEY ] = pd .Categorical (obs [CosmxKeys .FOV ].astype (str ).apply (lambda s : "/labels/" + s ))
123
+ obs [CosmxKeys .REGION_KEY ] = pd .Categorical (obs [CosmxKeys .FOV ].astype (str ).apply (lambda s : s + "_labels" ))
122
124
obs [CosmxKeys .INSTANCE_KEY ] = obs .index .astype (np .int64 )
123
125
obs .rename_axis (None , inplace = True )
124
126
obs .index = obs .index .astype (str ).str .cat (obs [CosmxKeys .FOV ].values , sep = "_" )
@@ -141,12 +143,6 @@ def cosmx(
141
143
142
144
fovs_counts = list (map (str , adata .obs .fov .astype (int ).unique ()))
143
145
144
- # TODO(giovp): uncomment once transform is ready
145
- # input_cs = CoordinateSystem("cxy", axes=[c_axis, y_axis, x_axis])
146
- # input_cs_labels = CoordinateSystem("cxy", axes=[y_axis, x_axis])
147
- # output_cs = CoordinateSystem("global", axes=[c_axis, y_axis, x_axis])
148
- # output_cs_labels = CoordinateSystem("global", axes=[y_axis, x_axis])
149
-
150
146
affine_transforms_to_global = {}
151
147
152
148
for fov in fovs_counts :
@@ -163,7 +159,10 @@ def cosmx(
163
159
164
160
table .obsm ["global" ] = table .obs [[CosmxKeys .X_GLOBAL_CELL , CosmxKeys .Y_GLOBAL_CELL ]].to_numpy ()
165
161
table .obsm ["spatial" ] = table .obs [[CosmxKeys .X_LOCAL_CELL , CosmxKeys .Y_LOCAL_CELL ]].to_numpy ()
166
- table .obs .drop (columns = [CosmxKeys .X_LOCAL_CELL , CosmxKeys .Y_LOCAL_CELL , CosmxKeys .X_GLOBAL_CELL , CosmxKeys .Y_GLOBAL_CELL ], inplace = True )
162
+ table .obs .drop (
163
+ columns = [CosmxKeys .X_LOCAL_CELL , CosmxKeys .Y_LOCAL_CELL , CosmxKeys .X_GLOBAL_CELL , CosmxKeys .Y_GLOBAL_CELL ],
164
+ inplace = True ,
165
+ )
167
166
168
167
# prepare to read images and labels
169
168
file_extensions = (".jpg" , ".png" , ".jpeg" , ".tif" , ".tiff" )
@@ -200,7 +199,6 @@ def cosmx(
200
199
flipped_im = da .flip (im , axis = 0 )
201
200
parsed_im = Image2DModel .parse (
202
201
flipped_im ,
203
- name = fov ,
204
202
transformations = {
205
203
fov : Identity (),
206
204
"global" : aff ,
@@ -209,7 +207,7 @@ def cosmx(
209
207
dims = ("y" , "x" , "c" ),
210
208
** image_models_kwargs ,
211
209
)
212
- images [fov ] = parsed_im
210
+ images [f" { fov } _image" ] = parsed_im
213
211
else :
214
212
logger .warning (f"FOV { fov } not found in counts file. Skipping image { fname } ." )
215
213
@@ -224,7 +222,6 @@ def cosmx(
224
222
flipped_la = da .flip (la , axis = 0 )
225
223
parsed_la = Labels2DModel .parse (
226
224
flipped_la ,
227
- name = fov ,
228
225
transformations = {
229
226
fov : Identity (),
230
227
"global" : aff ,
@@ -233,15 +230,40 @@ def cosmx(
233
230
dims = ("y" , "x" ),
234
231
** image_models_kwargs ,
235
232
)
236
- labels [fov ] = parsed_la
233
+ labels [f" { fov } _labels" ] = parsed_la
237
234
else :
238
235
logger .warning (f"FOV { fov } not found in counts file. Skipping labels { fname } ." )
239
236
240
237
points : dict [str , DaskDataFrame ] = {}
241
238
if transcripts :
239
+ # assert transcripts_file is not None
240
+ # from pyarrow.csv import read_csv
241
+ #
242
+ # ptable = read_csv(path / transcripts_file) # , header=0)
243
+ # for fov in fovs_counts:
244
+ # aff = affine_transforms_to_global[fov]
245
+ # sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
246
+ # sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype("category")
247
+ # # we rename z because we want to treat the data as 2d
248
+ # sub_table.rename(columns={"z": "z_raw"}, inplace=True)
249
+ # points[fov] = PointsModel.parse(
250
+ # sub_table,
251
+ # coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
252
+ # feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
253
+ # instance_key=CosmxKeys.INSTANCE_KEY,
254
+ # transformations={
255
+ # fov: Identity(),
256
+ # "global": aff,
257
+ # "global_only_labels": aff,
258
+ # },
259
+ # )
242
260
# let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting
261
+ import tempfile
262
+
263
+ import pyarrow .parquet as pq
264
+
243
265
with tempfile .TemporaryDirectory () as tmpdir :
244
- print ("converting .csv to .parquet... " , end = "" )
266
+ print ("converting .csv to .parquet to improve the speed of the slicing operations ... " , end = "" )
245
267
assert transcripts_file is not None
246
268
transcripts_data = pd .read_csv (path / transcripts_file , header = 0 )
247
269
transcripts_data .to_parquet (Path (tmpdir ) / "transcripts.parquet" )
@@ -251,10 +273,10 @@ def cosmx(
251
273
for fov in fovs_counts :
252
274
aff = affine_transforms_to_global [fov ]
253
275
sub_table = ptable .filter (pa .compute .equal (ptable .column (CosmxKeys .FOV ), int (fov ))).to_pandas ()
254
- sub_table [CosmxKeys .INSTANCE_KEY ] = sub_table [CosmxKeys .INSTANCE_KEY ].astype (' category' )
276
+ sub_table [CosmxKeys .INSTANCE_KEY ] = sub_table [CosmxKeys .INSTANCE_KEY ].astype (" category" )
255
277
# we rename z because we want to treat the data as 2d
256
- sub_table .rename (columns = {'z' : ' z_raw' }, inplace = True )
257
- points [fov ] = PointsModel .parse (
278
+ sub_table .rename (columns = {"z" : " z_raw" }, inplace = True )
279
+ points [f" { fov } _points" ] = PointsModel .parse (
258
280
sub_table ,
259
281
coordinates = {"x" : CosmxKeys .X_LOCAL_TRANSCRIPT , "y" : CosmxKeys .Y_LOCAL_TRANSCRIPT },
260
282
feature_key = CosmxKeys .TARGET_OF_TRANSCRIPT ,
@@ -266,7 +288,6 @@ def cosmx(
266
288
},
267
289
)
268
290
269
-
270
291
# TODO: what to do with fov file?
271
292
# if fov_file is not None:
272
293
# fov_positions = pd.read_csv(path / fov_file, header=0, index_col=CosmxKeys.FOV)
0 commit comments