Skip to content

Commit bfbde13

Browse files
Bento007ebezzi
andauthored
[builder] Upgrade to CELLxGENE schema 5.1 (#1192)
* feat: upgrade census buidler to schema 5.1 * feat: upgrade COG version * update tests * Apply suggestions from code review * bump census schema version --------- Co-authored-by: Emanuele Bezzi <[email protected]>
1 parent e42bd1f commit bfbde13

File tree

5 files changed

+12
-12
lines changed

5 files changed

+12
-12
lines changed

tools/cellxgene_census_builder/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ dependencies= [
3636
# https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md
3737
"tiledbsoma==1.9.3",
3838
"cellxgene-census==1.12.0",
39-
"cellxgene-ontology-guide==0.6.1",
39+
"cellxgene-ontology-guide==1.0.0",
4040
"scipy==1.12.0",
4141
"fsspec[http]==2024.3.1",
4242
"s3fs==2024.3.1",

tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
# DataFrame columns. True is enabled, False is disabled.
1212
USE_ARROW_DICTIONARY = True
1313

14-
CENSUS_SCHEMA_VERSION = "2.0.1"
14+
CENSUS_SCHEMA_VERSION = "2.1.0"
1515

16-
CXG_SCHEMA_VERSION = "5.0.0" # the CELLxGENE schema version supported
16+
CXG_SCHEMA_VERSION = "5.1.0" # the CELLxGENE schema version supported
1717

1818
# Columns expected in the census_datasets dataframe
1919
CENSUS_DATASETS_TABLE_SPEC = TableSpec.create(

tools/cellxgene_census_builder/tests/anndata/test_anndata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def test_empty_estimated_density(tmp_path: pathlib.Path) -> None:
265265
adata = anndata.AnnData(
266266
obs=pd.DataFrame(), var=pd.DataFrame({"feature_id": [0, 1, 2]}), X=sparse.csr_matrix((0, 3), dtype=np.float32)
267267
)
268-
adata.uns["schema_version"] = "5.0.0"
268+
adata.uns["schema_version"] = "5.1.0"
269269
adata.write_h5ad(path)
270270

271271
with open_anndata(path) as ad:
@@ -297,7 +297,7 @@ def test_open_anndata_raw_X(tmp_path: pathlib.Path) -> None:
297297
var=pd.DataFrame({"feature_id": [0, 1, 2]}),
298298
X=sparse.csr_matrix((2, 3), dtype=np.float32),
299299
raw={"X": sparse.csr_matrix((2, 4), dtype=np.float32)},
300-
uns={"schema_version": "5.0.0"},
300+
uns={"schema_version": "5.1.0"},
301301
)
302302
adata.write_h5ad(path)
303303

@@ -410,7 +410,7 @@ def test_multi_species_filter(
410410
index=[f"feature_{i}" for i in range(n_vars)],
411411
),
412412
X=sparse.random(n_obs, n_vars, format="csr", dtype=np.float32),
413-
uns={"schema_version": "5.0.0"},
413+
uns={"schema_version": "5.1.0"},
414414
)
415415
path = (tmp_path / "species.h5ad").as_posix()
416416
adata.write_h5ad(path)

tools/cellxgene_census_builder/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def get_anndata(
116116
uns["batch_condition"] = np.array(["a", "b"], dtype="object")
117117

118118
# Need to carefully set the corpora schema versions in order for tests to pass.
119-
uns["schema_version"] = "5.0.0" # type: ignore
119+
uns["schema_version"] = "5.1.0" # type: ignore
120120

121121
return anndata.AnnData(X=X, obs=obs, var=var, obsm=obsm, uns=uns)
122122

tools/cellxgene_census_builder/tests/test_manifest.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def test_load_manifest_from_cxg(empty_blocklist: str) -> None:
6565
"collection_doi_label": "Publication 1",
6666
"citation": "citation",
6767
"title": "dataset #1",
68-
"schema_version": "5.0.0",
68+
"schema_version": "5.1.0",
6969
"assets": [
7070
{
7171
"filesize": 123,
@@ -90,7 +90,7 @@ def test_load_manifest_from_cxg(empty_blocklist: str) -> None:
9090
"collection_doi_label": "Publication 2",
9191
"citation": "citation",
9292
"title": "dataset #2",
93-
"schema_version": "5.0.0",
93+
"schema_version": "5.1.0",
9494
"assets": [{"filesize": 456, "filetype": "H5AD", "url": "https://fake.url/dataset_id_2.h5ad"}],
9595
"dataset_version_id": "dataset_id_2",
9696
"cell_count": 11,
@@ -122,7 +122,7 @@ def test_load_manifest_from_cxg_errors_on_datasets_with_old_schema(
122122
"collection_doi_label": "Publication 1",
123123
"citation": "citation",
124124
"title": "dataset #1",
125-
"schema_version": "5.0.0",
125+
"schema_version": "5.1.0",
126126
"assets": [{"filesize": 123, "filetype": "H5AD", "url": "https://fake.url/dataset_id_1.h5ad"}],
127127
"dataset_version_id": "dataset_id_1",
128128
"cell_count": 10,
@@ -166,7 +166,7 @@ def test_load_manifest_from_cxg_excludes_datasets_with_no_assets(
166166
"collection_doi": None,
167167
"citation": "citation",
168168
"title": "dataset #1",
169-
"schema_version": "5.0.0",
169+
"schema_version": "5.1.0",
170170
"assets": [{"filesize": 123, "filetype": "H5AD", "url": "https://fake.url/dataset_id_1.h5ad"}],
171171
"dataset_version_id": "dataset_id_1",
172172
"cell_count": 10,
@@ -179,7 +179,7 @@ def test_load_manifest_from_cxg_excludes_datasets_with_no_assets(
179179
"collection_doi": None,
180180
"citation": "citation",
181181
"title": "dataset #2",
182-
"schema_version": "5.0.0",
182+
"schema_version": "5.1.0",
183183
"assets": [],
184184
"dataset_version_id": "dataset_id_2",
185185
"cell_count": 10,

0 commit comments

Comments
 (0)