Skip to content

Commit 25dbbf5

Browse files
committed
Revert "update rmc browser globals"
This reverts commit 19d3b5e.
1 parent 19d3b5e commit 25dbbf5

File tree

1 file changed

+26
-21
lines changed

1 file changed

+26
-21
lines changed

rmc/utils/constraint.py

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2089,45 +2089,50 @@ def _missing_aa_check(ht: hl.Table) -> Union[hl.Table, None]:
20892089

20902090

20912091
def add_globals_rmc_browser(
2092-
ht: hl.Table,
2093-
filter_to_canonical: bool = True,
2092+
ht: hl.Table, filter_to_canonical: bool, keep_outliers: bool = True
20942093
) -> hl.Table:
20952094
"""
20962095
Annotate HT globals with RMC transcript information.
20972096
20982097
Function is used when reformatting RMC results for browser release.
2099-
2100-
Annotates two structs:
2101-
- `transcript_counts`: Counts of total transcripts and transcripts with/without evidence of RMC (QC pass only).
2102-
- `transcript_counts_all`: Counts of all transcripts, transcripts with/without evidence of RMC, and outlier transcripts.
2103-
2098+
Annotates:
2099+
- transcripts not searched for RMC
2100+
- transcripts without evidence of RMC
2101+
- outlier transcripts
21042102
:param HT: Input Table. Should be RMC regions HT annotated with amino acid
21052103
information for region starts and stops.
2106-
:param filter_to_canonical: Whether to filter to canonical transcripts only. Default is True.
2104+
:param filter_to_canonical: Whether to filter to canonical transcripts only.
2105+
:param keep_outliers: Whether to keep outlier transcripts.
2106+
Default is True.
21072107
:return: RMC regions HT with updated globals annotations.
21082108
"""
2109-
# Get all transcripts with evidence of RMC
2109+
# Get transcripts with evidence of RMC
21102110
rmc_transcripts = hl.literal(ht.aggregate(hl.agg.collect_as_set(ht.transcript)))
21112111

2112-
# Get all transcripts from constraint HT
2113-
all_transcripts = get_constraint_transcripts(
2114-
all_transcripts=True, filter_to_canonical=filter_to_canonical
2112+
# Get all QC pass transcripts
2113+
qc_pass_transcripts = get_constraint_transcripts(
2114+
filter_to_canonical=filter_to_canonical, outlier=False
21152115
)
21162116
outlier_transcripts = get_constraint_transcripts(
21172117
filter_to_canonical=filter_to_canonical, outlier=True
21182118
)
21192119

2120+
# Get all transcripts from constraint HT
2121+
all_transcripts = get_constraint_transcripts(
2122+
all_transcripts=True, filter_to_canonical=filter_to_canonical
2123+
)
2124+
if keep_outliers:
2125+
transcripts_no_rmc = all_transcripts.difference(rmc_transcripts)
2126+
transcripts_not_searched = hl.empty_array(hl.tstr)
2127+
else:
2128+
transcripts_no_rmc = qc_pass_transcripts.difference(rmc_transcripts)
2129+
transcripts_not_searched = all_transcripts.difference(qc_pass_transcripts)
2130+
21202131
ht = ht.select_globals()
21212132
return ht.annotate_globals(
2122-
transcript_counts=hl.struct(
2123-
all_transcripts=all_transcripts.difference(outlier_transcripts),
2124-
rmc_transcripts=rmc_transcripts.difference(outlier_transcripts),
2125-
),
2126-
transcript_counts_all=hl.struct(
2127-
all_transcripts=all_transcripts,
2128-
rmc_transcripts=rmc_transcripts,
2129-
outlier_transcripts=outlier_transcripts,
2130-
),
2133+
transcripts_not_searched=transcripts_not_searched,
2134+
transcripts_no_rmc=transcripts_no_rmc,
2135+
outlier_transcripts=outlier_transcripts,
21312136
)
21322137

21332138

0 commit comments

Comments
 (0)