@@ -1309,32 +1309,21 @@ def split_interleaved_fastq(fastq_filename, output_filename_r1, output_filename_
13091309# allele modification functions
13101310######
13111311
1312- def get_row_around_cut (row , cut_point , offset ):
1313- cut_idx = row ['ref_positions' ].index (cut_point )
1314- return row ['Aligned_Sequence' ][cut_idx - offset + 1 :cut_idx + offset + 1 ], row ['Reference_Sequence' ][
1315- cut_idx - offset + 1 :cut_idx + offset + 1 ], \
1316- row ['Read_Status' ] == 'UNMODIFIED' , row ['n_deleted' ], row ['n_inserted' ], row ['n_mutated' ], row ['#Reads' ], \
1317- row ['%Reads' ]
1312+ def get_row_around_cut_asymmetrical (row ,cut_point ,plot_left ,plot_right ):
1313+ cut_idx = row ['ref_positions' ].index (cut_point )
1314+ return row ['Aligned_Sequence' ][cut_idx - plot_left + 1 :cut_idx + plot_right + 1 ],row ['Reference_Sequence' ][cut_idx - plot_left + 1 :cut_idx + plot_right + 1 ],row ['Read_Status' ]== 'UNMODIFIED' ,row ['n_deleted' ],row ['n_inserted' ],row ['n_mutated' ],row ['#Reads' ], row ['%Reads' ]
13181315
13191316
1320- def get_dataframe_around_cut (df_alleles , cut_point , offset , collapse_by_sequence = True ):
1317+ def get_dataframe_around_cut_asymmetrical (df_alleles , cut_point ,plot_left , plot_right , collapse_by_sequence = True ):
13211318 if df_alleles .shape [0 ] == 0 :
13221319 return df_alleles
13231320 ref1 = df_alleles ['Reference_Sequence' ].iloc [0 ]
1324- ref1 = ref1 .replace ('-' , '' )
1325- if (cut_point + offset + 1 > len (ref1 )):
1326- raise (BadParameterException (
1327- 'The plotting window cannot extend past the end of the amplicon. Amplicon length is ' + str (
1328- len (ref1 )) + ' but plot extends to ' + str (cut_point + offset + 1 )))
1329-
1330- df_alleles_around_cut = pd .DataFrame (
1331- list (df_alleles .apply (lambda row : get_row_around_cut (row , cut_point , offset ), axis = 1 ).values ),
1332- columns = ['Aligned_Sequence' , 'Reference_Sequence' , 'Unedited' , 'n_deleted' , 'n_inserted' , 'n_mutated' , '#Reads' ,
1333- '%Reads' ])
1321+ ref1 = ref1 .replace ('-' ,'' )
1322+
1323+ df_alleles_around_cut = pd .DataFrame (list (df_alleles .apply (lambda row : get_row_around_cut_asymmetrical (row ,cut_point ,plot_left ,plot_right ),axis = 1 ).values ),
1324+ columns = ['Aligned_Sequence' ,'Reference_Sequence' ,'Unedited' ,'n_deleted' ,'n_inserted' ,'n_mutated' ,'#Reads' ,'%Reads' ])
13341325
1335- df_alleles_around_cut = df_alleles_around_cut .groupby (
1336- ['Aligned_Sequence' , 'Reference_Sequence' , 'Unedited' , 'n_deleted' , 'n_inserted' ,
1337- 'n_mutated' ]).sum ().reset_index ().set_index ('Aligned_Sequence' )
1326+ df_alleles_around_cut = df_alleles_around_cut .groupby (['Aligned_Sequence' ,'Reference_Sequence' ,'Unedited' ,'n_deleted' ,'n_inserted' ,'n_mutated' ]).sum ().reset_index ().set_index ('Aligned_Sequence' )
13381327
13391328 df_alleles_around_cut .sort_values (by = ['#Reads' , 'Aligned_Sequence' , 'Reference_Sequence' ], inplace = True , ascending = [False , True , True ])
13401329 df_alleles_around_cut ['Unedited' ] = df_alleles_around_cut ['Unedited' ] > 0
@@ -1631,13 +1620,11 @@ def get_amplicon_info_for_guides(ref_seq, guides, guide_mismatches, guide_names,
16311620 if this_sgRNA_cut_points and plot_window_size > 0 :
16321621 for cut_p in this_sgRNA_cut_points :
16331622 if cut_p - window_around_cut + 1 < 0 :
1634- raise BadParameterException (
1635- 'Offset around cut would extend to the left of the amplicon. Please decrease plot_window_size parameter. Cut point: ' + str (
1636- cut_p ) + ' window: ' + str (window_around_cut ) + ' reference: ' + str (ref_seq_length ))
1623+ logging .warning ('Offset around cut would extend to the left of the amplicon. Window will be truncated.' )
1624+ window_around_cut = cut_p + 1
16371625 if cut_p + window_around_cut > ref_seq_length - 1 :
1638- raise BadParameterException (
1639- 'Offset around cut would be greater than reference sequence length. Please decrease plot_window_size parameter. Cut point: ' + str (
1640- cut_p ) + ' window: ' + str (window_around_cut ) + ' reference: ' + str (ref_seq_length ))
1626+ logging .warning ('Offset around cut would be greater than reference sequence length. Window will be truncated.' )
1627+ window_around_cut = ref_seq_length - cut_p - 1
16411628 st = max (0 , cut_p - window_around_cut + 1 )
16421629 en = min (ref_seq_length - 1 , cut_p + window_around_cut + 1 )
16431630 this_sgRNA_plot_idxs .append (sorted (list (range (st , en ))))
0 commit comments