Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/cellxgene_census_builder/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies= [
# https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md
"tiledbsoma==1.9.3",
"cellxgene-census==1.12.0",
"cellxgene-ontology-guide==0.6.1",
"cellxgene-ontology-guide==1.0.0",
"scipy==1.12.0",
"fsspec[http]==2024.3.1",
"s3fs==2024.3.1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
# DataFrame columns. True is enabled, False is disabled.
USE_ARROW_DICTIONARY = True

CENSUS_SCHEMA_VERSION = "2.0.1"
CENSUS_SCHEMA_VERSION = "2.1.0"

CXG_SCHEMA_VERSION = "5.0.0" # the CELLxGENE schema version supported
CXG_SCHEMA_VERSION = "5.1.0" # the CELLxGENE schema version supported
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ebezzi @atolopko-czi - Should CENSUS_SCHEMA_VERSION also be bumped? If so, is it a patch or minor version bump?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would only bump if there are noticeable and traceable changes (e.g. major ontology changes, new/removed assays). We should be able to add some doc changes if we bump it.

Copy link
Collaborator

@pablo-gar pablo-gar Jun 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we need to bump the minor version number, and see my comment.
#1194 (comment)


# Columns expected in the census_datasets dataframe
CENSUS_DATASETS_TABLE_SPEC = TableSpec.create(
Expand Down
6 changes: 3 additions & 3 deletions tools/cellxgene_census_builder/tests/anndata/test_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_empty_estimated_density(tmp_path: pathlib.Path) -> None:
adata = anndata.AnnData(
obs=pd.DataFrame(), var=pd.DataFrame({"feature_id": [0, 1, 2]}), X=sparse.csr_matrix((0, 3), dtype=np.float32)
)
adata.uns["schema_version"] = "5.0.0"
adata.uns["schema_version"] = "5.1.0"
adata.write_h5ad(path)

with open_anndata(path) as ad:
Expand Down Expand Up @@ -297,7 +297,7 @@ def test_open_anndata_raw_X(tmp_path: pathlib.Path) -> None:
var=pd.DataFrame({"feature_id": [0, 1, 2]}),
X=sparse.csr_matrix((2, 3), dtype=np.float32),
raw={"X": sparse.csr_matrix((2, 4), dtype=np.float32)},
uns={"schema_version": "5.0.0"},
uns={"schema_version": "5.1.0"},
)
adata.write_h5ad(path)

Expand Down Expand Up @@ -410,7 +410,7 @@ def test_multi_species_filter(
index=[f"feature_{i}" for i in range(n_vars)],
),
X=sparse.random(n_obs, n_vars, format="csr", dtype=np.float32),
uns={"schema_version": "5.0.0"},
uns={"schema_version": "5.1.0"},
)
path = (tmp_path / "species.h5ad").as_posix()
adata.write_h5ad(path)
Expand Down
2 changes: 1 addition & 1 deletion tools/cellxgene_census_builder/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def get_anndata(
uns["batch_condition"] = np.array(["a", "b"], dtype="object")

# Need to carefully set the corpora schema versions in order for tests to pass.
uns["schema_version"] = "5.0.0" # type: ignore
uns["schema_version"] = "5.1.0" # type: ignore

return anndata.AnnData(X=X, obs=obs, var=var, obsm=obsm, uns=uns)

Expand Down
10 changes: 5 additions & 5 deletions tools/cellxgene_census_builder/tests/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_load_manifest_from_cxg(empty_blocklist: str) -> None:
"collection_doi": None,
"citation": "citation",
"title": "dataset #1",
"schema_version": "5.0.0",
"schema_version": "5.1.0",
"assets": [
{
"filesize": 123,
Expand All @@ -88,7 +88,7 @@ def test_load_manifest_from_cxg(empty_blocklist: str) -> None:
"collection_doi": None,
"citation": "citation",
"title": "dataset #2",
"schema_version": "5.0.0",
"schema_version": "5.1.0",
"assets": [{"filesize": 456, "filetype": "H5AD", "url": "https://fake.url/dataset_id_2.h5ad"}],
"dataset_version_id": "dataset_id_2",
"cell_count": 11,
Expand Down Expand Up @@ -119,7 +119,7 @@ def test_load_manifest_from_cxg_errors_on_datasets_with_old_schema(
"collection_doi": None,
"citation": "citation",
"title": "dataset #1",
"schema_version": "5.0.0",
"schema_version": "5.1.0",
"assets": [{"filesize": 123, "filetype": "H5AD", "url": "https://fake.url/dataset_id_1.h5ad"}],
"dataset_version_id": "dataset_id_1",
"cell_count": 10,
Expand Down Expand Up @@ -162,7 +162,7 @@ def test_load_manifest_from_cxg_excludes_datasets_with_no_assets(
"collection_doi": None,
"citation": "citation",
"title": "dataset #1",
"schema_version": "5.0.0",
"schema_version": "5.1.0",
"assets": [{"filesize": 123, "filetype": "H5AD", "url": "https://fake.url/dataset_id_1.h5ad"}],
"dataset_version_id": "dataset_id_1",
"cell_count": 10,
Expand All @@ -175,7 +175,7 @@ def test_load_manifest_from_cxg_excludes_datasets_with_no_assets(
"collection_doi": None,
"citation": "citation",
"title": "dataset #2",
"schema_version": "5.0.0",
"schema_version": "5.1.0",
"assets": [],
"dataset_version_id": "dataset_id_2",
"cell_count": 10,
Expand Down