Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/spatialdata_io/_constants/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class CosmxKeys(ModeEnum):
FOV_SUFFIX = "fov_positions_file.csv"
IMAGES_DIR = "CellComposite"
LABELS_DIR = "CellLabels"

# metadata
FOV = "fov"
REGION_KEY = "fov_labels"
Expand All @@ -26,6 +27,14 @@ class CosmxKeys(ModeEnum):
Y_LOCAL_TRANSCRIPT = "y_local_px"
TARGET_OF_TRANSCRIPT = "target"

# transcripts
TRANSCRIPTS_SUFFIX = "tx_file.csv"
TRANSCRIPTS_X = "x_local_px"
TRANSCRIPTS_Y = "y_local_px"
TRANSCRIPTS_Z = "z"
FEATURE_NAME = "target"
CELL_COMP = "CellComp"


@unique
class XeniumKeys(ModeEnum):
Expand Down
55 changes: 25 additions & 30 deletions src/spatialdata_io/readers/cosmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import numpy as np
import pandas as pd
import pyarrow as pa
from anndata import AnnData
from dask_image.imread import imread
import dask.array as da
Expand Down Expand Up @@ -47,7 +48,7 @@ def cosmx(
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
) -> SpatialData:
"""
Read *Cosmx Nanostring* data.
Read *Nanostring Cosmx* data.

This function reads the following files:

Expand Down Expand Up @@ -112,6 +113,9 @@ def cosmx(
labels_dir = path / CosmxKeys.LABELS_DIR
if not labels_dir.exists():
raise FileNotFoundError(f"Labels directory not found: {labels_dir}.")
transcripts_file = path / f"{dataset_id}_{CosmxKeys.TRANSCRIPTS_SUFFIX}"
if not transcripts_file.exists():
raise FileNotFoundError(f"Transcripts file not found: {transcripts_file}.")

counts = pd.read_csv(path / counts_file, header=0, index_col=CosmxKeys.INSTANCE_KEY)
counts.index = counts.index.astype(str).str.cat(counts.pop(CosmxKeys.FOV).astype(str).values, sep="_")
Expand Down Expand Up @@ -237,35 +241,10 @@ def cosmx(
else:
logger.warning(f"FOV {fov} not found in counts file. Skipping labels {fname}.")

points: dict[str, DaskDataFrame] = {}
if transcripts:
# let's convert the .csv to .parquet and let's read it with pyarrow.parquet for faster subsetting
with tempfile.TemporaryDirectory() as tmpdir:
print("converting .csv to .parquet... ", end="")
assert transcripts_file is not None
transcripts_data = pd.read_csv(path / transcripts_file, header=0)
transcripts_data.to_parquet(Path(tmpdir) / "transcripts.parquet")
print("done")

ptable = pq.read_table(Path(tmpdir) / "transcripts.parquet")
for fov in fovs_counts:
aff = affine_transforms_to_global[fov]
sub_table = ptable.filter(pa.compute.equal(ptable.column(CosmxKeys.FOV), int(fov))).to_pandas()
sub_table[CosmxKeys.INSTANCE_KEY] = sub_table[CosmxKeys.INSTANCE_KEY].astype('category')
# we rename z because we want to treat the data as 2d
sub_table.rename(columns={'z': 'z_raw'}, inplace=True)
points[fov] = PointsModel.parse(
sub_table,
coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT},
feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT,
instance_key=CosmxKeys.INSTANCE_KEY,
transformations={
fov: Identity(),
"global": aff,
"global_only_labels": aff,
},
)

points = {}
points_table = _get_points(transcripts_file)
for i in points_table[CosmxKeys.REGION_KEY].unique():
points[i] = points_table.filter(pa.compute.equal(points_table[CosmxKeys.REGION_KEY], i))

# TODO: what to do with fov file?
# if fov_file is not None:
Expand All @@ -278,3 +257,19 @@ def cosmx(
# continue

return SpatialData(images=images, labels=labels, points=points, table=table)


def _get_points(path: Path) -> Table:
from pyarrow.csv import read_csv

table = read_csv(path)
arr = (
table.select([CosmxKeys.TRANSCRIPTS_X, CosmxKeys.TRANSCRIPTS_Y, CosmxKeys.TRANSCRIPTS_Z]).to_pandas().to_numpy()
)
annotations = table.select((CosmxKeys.CELL_COMP, CosmxKeys.REGION_KEY, CosmxKeys.INSTANCE_KEY))
annotations = annotations.add_column(
3, CosmxKeys.FEATURE_NAME, table.column(CosmxKeys.FEATURE_NAME).cast("string").dictionary_encode()
)

points = PointsModel.parse(coords=arr, annotations=annotations)
return points
2 changes: 1 addition & 1 deletion src/spatialdata_io/readers/visium.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def visium(
**kwargs: Any,
) -> SpatialData:
"""
Read *10x Genomics* Visium formatted dataset.
Read *10x Genomics Visium* data.

This function reads the following files:

Expand Down
2 changes: 1 addition & 1 deletion src/spatialdata_io/readers/xenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def xenium(
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
) -> SpatialData:
"""
Read a *10X Genomics Xenium* dataset into a SpatialData object.
Read a *10X Genomics Xenium* data.

This function reads the following files:

Expand Down