@@ -2089,45 +2089,50 @@ def _missing_aa_check(ht: hl.Table) -> Union[hl.Table, None]:
20892089
20902090
20912091def add_globals_rmc_browser (
2092- ht : hl .Table ,
2093- filter_to_canonical : bool = True ,
2092+ ht : hl .Table , filter_to_canonical : bool , keep_outliers : bool = True
20942093) -> hl .Table :
20952094 """
20962095 Annotate HT globals with RMC transcript information.
20972096
20982097 Function is used when reformatting RMC results for browser release.
2099-
2100- Annotates two structs:
2101- - `transcript_counts`: Counts of total transcripts and transcripts with/without evidence of RMC (QC pass only).
2102- - `transcript_counts_all`: Counts of all transcripts, transcripts with/without evidence of RMC, and outlier transcripts.
2103-
2098+ Annotates:
2099+ - transcripts not searched for RMC
2100+ - transcripts without evidence of RMC
2101+ - outlier transcripts
21042102 :param HT: Input Table. Should be RMC regions HT annotated with amino acid
21052103 information for region starts and stops.
2106- :param filter_to_canonical: Whether to filter to canonical transcripts only. Default is True.
2104+ :param filter_to_canonical: Whether to filter to canonical transcripts only.
2105+ :param keep_outliers: Whether to keep outlier transcripts.
2106+ Default is True.
21072107 :return: RMC regions HT with updated globals annotations.
21082108 """
2109- # Get all transcripts with evidence of RMC
2109+ # Get transcripts with evidence of RMC
21102110 rmc_transcripts = hl .literal (ht .aggregate (hl .agg .collect_as_set (ht .transcript )))
21112111
2112- # Get all transcripts from constraint HT
2113- all_transcripts = get_constraint_transcripts (
2114- all_transcripts = True , filter_to_canonical = filter_to_canonical
2112+ # Get all QC pass transcripts
2113+ qc_pass_transcripts = get_constraint_transcripts (
2114+ filter_to_canonical = filter_to_canonical , outlier = False
21152115 )
21162116 outlier_transcripts = get_constraint_transcripts (
21172117 filter_to_canonical = filter_to_canonical , outlier = True
21182118 )
21192119
2120+ # Get all transcripts from constraint HT
2121+ all_transcripts = get_constraint_transcripts (
2122+ all_transcripts = True , filter_to_canonical = filter_to_canonical
2123+ )
2124+ if keep_outliers :
2125+ transcripts_no_rmc = all_transcripts .difference (rmc_transcripts )
2126+ transcripts_not_searched = hl .empty_array (hl .tstr )
2127+ else :
2128+ transcripts_no_rmc = qc_pass_transcripts .difference (rmc_transcripts )
2129+ transcripts_not_searched = all_transcripts .difference (qc_pass_transcripts )
2130+
21202131 ht = ht .select_globals ()
21212132 return ht .annotate_globals (
2122- transcript_counts = hl .struct (
2123- all_transcripts = all_transcripts .difference (outlier_transcripts ),
2124- rmc_transcripts = rmc_transcripts .difference (outlier_transcripts ),
2125- ),
2126- transcript_counts_all = hl .struct (
2127- all_transcripts = all_transcripts ,
2128- rmc_transcripts = rmc_transcripts ,
2129- outlier_transcripts = outlier_transcripts ,
2130- ),
2133+ transcripts_not_searched = transcripts_not_searched ,
2134+ transcripts_no_rmc = transcripts_no_rmc ,
2135+ outlier_transcripts = outlier_transcripts ,
21312136 )
21322137
21332138
0 commit comments