Skip to content

Commit 949c22e

Browse files
authored
fix: handle KeyError for missing ontology terms in CellGuide pipeline (#7685)
1 parent 56d7cb5 commit 949c22e

File tree

5 files changed

+156
-17
lines changed

5 files changed

+156
-17
lines changed

backend/cellguide/pipeline/metadata/metadata_generator.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
import logging
22

33
from backend.cellguide.pipeline.metadata.types import CellMetadata, TissueMetadata
4-
from backend.common.census_cube.utils import ontology_parser
4+
from backend.common.census_cube.data.ontology_labels import (
5+
is_ontology_term_deprecated,
6+
ontology_term_description,
7+
ontology_term_label,
8+
ontology_term_synonyms,
9+
)
510

611
logger = logging.getLogger(__name__)
712

@@ -27,20 +32,20 @@ def generate_cellguide_card_metadata(all_cell_type_ids_in_corpus: list[str]) ->
2732

2833
for id in all_cell_type_ids_in_corpus:
2934

30-
if ontology_parser.is_term_deprecated(id):
35+
if is_ontology_term_deprecated(id):
3136
obsolete_cell_ids.append(id)
3237
else:
33-
description = ontology_parser.get_term_description(id)
38+
description = ontology_term_description(id)
3439
if description is not None:
3540
cell_ids_with_cl_description += 1
3641
else:
3742
cell_ids_without_cl_description += 1
3843

3944
metadata = CellMetadata(
40-
name=ontology_parser.get_term_label(id),
45+
name=ontology_term_label(id),
4146
id=id,
4247
clDescription=description,
43-
synonyms=ontology_parser.get_term_synonyms(id),
48+
synonyms=ontology_term_synonyms(id),
4449
)
4550
cellguide_card_metadata[id] = metadata
4651

@@ -71,20 +76,20 @@ def generate_cellguide_tissue_card_metadata(all_tissue_ids_in_corpus: list[str])
7176
uberon_ids_without_description = 0
7277

7378
for id in all_tissue_ids_in_corpus:
74-
if ontology_parser.is_term_deprecated(id):
79+
if is_ontology_term_deprecated(id):
7580
obsolete_uberon_ids.append(id)
7681
else:
77-
description = ontology_parser.get_term_description(id)
82+
description = ontology_term_description(id)
7883
if description is not None:
7984
uberon_ids_with_description += 1
8085
else:
8186
uberon_ids_without_description += 1
8287

8388
metadata = TissueMetadata(
84-
name=ontology_parser.get_term_label(id),
89+
name=ontology_term_label(id),
8590
id=id,
8691
uberonDescription=description,
87-
synonyms=ontology_parser.get_term_synonyms(id),
92+
synonyms=ontology_term_synonyms(id),
8893
)
8994
cellguide_tissue_card_metadata[id] = metadata
9095

backend/cellguide/pipeline/ontology_tree/tree_builder.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from backend.cellguide.pipeline.constants import CELLGUIDE_PIPELINE_NUM_CPUS
1111
from backend.cellguide.pipeline.ontology_tree.types import OntologyTree, OntologyTreeState
12+
from backend.common.census_cube.data.ontology_labels import ontology_term_label
1213
from backend.common.census_cube.utils import ontology_parser, rollup_across_cell_type_descendants, to_dict
1314

1415
logger = logging.getLogger(__name__)
@@ -98,7 +99,7 @@ def __init__(self, cell_counts_df, root_node="CL:0000000"):
9899
self.all_cell_type_ids_to_labels_in_corpus = dict(
99100
zip(
100101
self.all_cell_type_ids_in_corpus,
101-
[self.ontology.get_term_label(c) for c in self.all_cell_type_ids_in_corpus],
102+
[ontology_term_label(c) for c in self.all_cell_type_ids_in_corpus],
102103
strict=False,
103104
)
104105
)
@@ -390,7 +391,7 @@ def _process_tissue__parallel(self, tissueId: str) -> OntologyTreeState:
390391
The number of cells is a dictionary with keys "n_cells" and "n_cells_rollup".
391392
"""
392393
end_nodes = self.uberon_by_celltype[tissueId]
393-
tissue_label = self.ontology.get_term_label(tissueId)
394+
tissue_label = ontology_term_label(tissueId)
394395
uberon_ancestors = self.ontology.get_term_ancestors(tissueId, include_self=True)
395396

396397
# filter out hemaotoietic cell types from non-whitelisted tissues

backend/cellguide/pipeline/source_collections/source_collections_generator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
from pandas import DataFrame
33

44
from backend.cellguide.pipeline.source_collections.types import SourceCollectionsData
5-
from backend.common.census_cube.utils import descendants, ontology_parser
5+
from backend.common.census_cube.data.ontology_labels import ontology_term_label
6+
from backend.common.census_cube.utils import descendants
67

78

89
def generate_source_collections_data(
@@ -40,7 +41,7 @@ def generate_source_collections_data(
4041
# We need tissue, disease, and organism labels AND ontology term ids for each cell type id
4142
df_dict = {
4243
df_agg.index[i]: [
43-
{"label": ontology_parser.get_term_label(cell_type_id), "ontology_term_id": cell_type_id}
44+
{"label": ontology_term_label(cell_type_id), "ontology_term_id": cell_type_id}
4445
for cell_type_id in df_agg.values[i][0].split(",")
4546
]
4647
for i in range(len(df_agg))

backend/common/census_cube/data/ontology_labels.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,46 @@ def ontology_term_label(ontology_term_id: str) -> Optional[str]:
2020
"""
2121
try:
2222
return ontology_parser.get_term_label(ontology_term_id)
23-
# If the ontology term id is invalid, return the ontology term id itself
24-
# This is useful for cases like publication citation.
25-
except ValueError:
23+
# If the ontology term id is invalid or not found in the ontology schema,
24+
# return the ontology term id itself. This is useful for cases like publication
25+
# citation and newly added cell types not yet in the ontology schema.
26+
except (ValueError, KeyError):
2627
return ontology_term_id
2728

2829

30+
def is_ontology_term_deprecated(ontology_term_id: str) -> bool:
31+
"""
32+
Returns whether an ontology term is deprecated. Returns False for unknown terms.
33+
"""
34+
try:
35+
return ontology_parser.is_term_deprecated(ontology_term_id)
36+
except (ValueError, KeyError):
37+
# Assume unknown terms are not deprecated
38+
return False
39+
40+
41+
def ontology_term_description(ontology_term_id: str) -> Optional[str]:
42+
"""
43+
Returns the description for an ontology term, given its id. Returns None if the term is not found.
44+
"""
45+
try:
46+
return ontology_parser.get_term_description(ontology_term_id)
47+
except (ValueError, KeyError):
48+
# Return None for unknown terms (no description available)
49+
return None
50+
51+
52+
def ontology_term_synonyms(ontology_term_id: str) -> list[str]:
53+
"""
54+
Returns the synonyms for an ontology term, given its id. Returns empty list if the term is not found.
55+
"""
56+
try:
57+
return ontology_parser.get_term_synonyms(ontology_term_id)
58+
except (ValueError, KeyError):
59+
# Return empty list for unknown terms (no synonyms available)
60+
return []
61+
62+
2963
def gene_term_label(gene_ontology_term_id: str) -> Optional[str]:
3064
"""
3165
Returns the label for a gene ontology term, given its id. Return None if ontology term id is invalid.

tests/unit/wmg_processing/test_ontology_labels.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
import unittest
2+
from unittest.mock import patch
23

3-
from backend.common.census_cube.data.ontology_labels import gene_term_label, ontology_term_label
4+
from backend.common.census_cube.data.ontology_labels import (
5+
gene_term_label,
6+
is_ontology_term_deprecated,
7+
ontology_term_description,
8+
ontology_term_label,
9+
ontology_term_synonyms,
10+
)
411

512

613
class OntologyLabelTests(unittest.TestCase):
@@ -36,6 +43,97 @@ def test__gene_label(self):
3643
with self.subTest(gene_id):
3744
self.assertEqual(gene_term_label(gene_id), expected_gene_label)
3845

46+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
47+
def test_ontology_term_label_handles_key_error(self, mock_ontology_parser):
48+
"""Test that ontology_term_label handles KeyError for missing terms in ontology."""
49+
# Simulate the error that occurred in production with CL:4033085
50+
mock_ontology_parser.get_term_label.side_effect = KeyError("CL:4033085")
51+
52+
result = ontology_term_label("CL:4033085")
53+
54+
# Should return the term ID itself as a fallback instead of crashing
55+
self.assertEqual(result, "CL:4033085")
56+
57+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
58+
def test_ontology_term_label_handles_value_error(self, mock_ontology_parser):
59+
"""Test that ontology_term_label handles ValueError gracefully."""
60+
mock_ontology_parser.get_term_label.side_effect = ValueError("Invalid term")
61+
62+
result = ontology_term_label("CL:INVALID")
63+
64+
# Should return the term ID itself as a fallback
65+
self.assertEqual(result, "CL:INVALID")
66+
67+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
68+
def test_ontology_term_label_success_path(self, mock_ontology_parser):
69+
"""Test that ontology_term_label returns the correct label when term exists."""
70+
mock_ontology_parser.get_term_label.return_value = "native cell"
71+
72+
result = ontology_term_label("CL:0000003")
73+
74+
self.assertEqual(result, "native cell")
75+
mock_ontology_parser.get_term_label.assert_called_once_with("CL:0000003")
76+
77+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
78+
def test_is_ontology_term_deprecated_handles_key_error(self, mock_ontology_parser):
79+
"""Test that is_ontology_term_deprecated handles KeyError for missing terms."""
80+
mock_ontology_parser.is_term_deprecated.side_effect = KeyError("CL:4033085")
81+
82+
result = is_ontology_term_deprecated("CL:4033085")
83+
84+
# Should return False for unknown terms (assume not deprecated)
85+
self.assertEqual(result, False)
86+
87+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
88+
def test_is_ontology_term_deprecated_success_path(self, mock_ontology_parser):
89+
"""Test that is_ontology_term_deprecated returns the correct value when term exists."""
90+
mock_ontology_parser.is_term_deprecated.return_value = True
91+
92+
result = is_ontology_term_deprecated("CL:0000001")
93+
94+
self.assertEqual(result, True)
95+
mock_ontology_parser.is_term_deprecated.assert_called_once_with("CL:0000001")
96+
97+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
98+
def test_ontology_term_description_handles_key_error(self, mock_ontology_parser):
99+
"""Test that ontology_term_description handles KeyError for missing terms."""
100+
mock_ontology_parser.get_term_description.side_effect = KeyError("CL:4033085")
101+
102+
result = ontology_term_description("CL:4033085")
103+
104+
# Should return None for unknown terms (no description available)
105+
self.assertIsNone(result)
106+
107+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
108+
def test_ontology_term_description_success_path(self, mock_ontology_parser):
109+
"""Test that ontology_term_description returns the correct description when term exists."""
110+
mock_ontology_parser.get_term_description.return_value = "A cell description"
111+
112+
result = ontology_term_description("CL:0000003")
113+
114+
self.assertEqual(result, "A cell description")
115+
mock_ontology_parser.get_term_description.assert_called_once_with("CL:0000003")
116+
117+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
118+
def test_ontology_term_synonyms_handles_key_error(self, mock_ontology_parser):
119+
"""Test that ontology_term_synonyms handles KeyError for missing terms."""
120+
mock_ontology_parser.get_term_synonyms.side_effect = KeyError("CL:4033085")
121+
122+
result = ontology_term_synonyms("CL:4033085")
123+
124+
# Should return empty list for unknown terms (no synonyms available)
125+
self.assertEqual(result, [])
126+
127+
@patch("backend.common.census_cube.data.ontology_labels.ontology_parser")
128+
def test_ontology_term_synonyms_success_path(self, mock_ontology_parser):
129+
"""Test that ontology_term_synonyms returns the correct synonyms when term exists."""
130+
mock_ontology_parser.get_term_synonyms.return_value = ["synonym1", "synonym2"]
131+
132+
result = ontology_term_synonyms("CL:0000003")
133+
134+
self.assertEqual(result, ["synonym1", "synonym2"])
135+
mock_ontology_parser.get_term_synonyms.assert_called_once_with("CL:0000003")
136+
39137

40138
if __name__ == "__main__":
41139
unittest.main()

0 commit comments

Comments
 (0)