Skip to content

Commit 56ff6f0

Browse files
Colelymanmbowcut2
andcommitted
use get_row_around_cut_asymmetrical for allele plot (#117) (pinellolab#527)
* use get_row_around_cut_asymmetrical for allele plot * tests branch * removed commented code * update to use asym function * white space * spaces * integration tests * Move test branch back to master --------- Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
1 parent 7c5f159 commit 56ff6f0

4 files changed

Lines changed: 59 additions & 33 deletions

File tree

.github/workflows/integration_tests.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,18 @@ jobs:
133133
if: success() || failure()
134134
run: |
135135
make aggregate test print
136+
137+
- name: Run Asym Allele Plot Left
138+
if: success() || failure()
139+
run: |
140+
make asym-left test print
141+
142+
- name: Run Asym Allele Plot Right
143+
if: success() || failure()
144+
run: |
145+
make asym-right test print
146+
147+
- name: Run Asym Allele Plot Both
148+
if: success() || failure()
149+
run: |
150+
make asym-both test print

CRISPResso2/CRISPRessoCORE.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4581,8 +4581,30 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
45814581
sgRNA_legend = sgRNA_name + " (" + sgRNA +")"
45824582
sgRNA_label = CRISPRessoShared.slugify(sgRNA_label)
45834583

4584-
plot_half_window = max(1, args.plot_window_size)
4585-
df_alleles_around_cut=CRISPRessoShared.get_dataframe_around_cut(df_alleles.loc[df_alleles['Reference_Name'] == ref_name], cut_point, plot_half_window)
4584+
pass_cut_point = False
4585+
4586+
#Set left window size
4587+
if cut_point - args.plot_window_size + 1 >= 0:
4588+
plot_half_window_left = args.plot_window_size
4589+
else:
4590+
plot_half_window_left = cut_point + 1
4591+
pass_cut_point = True
4592+
warn(f'sgRNA {0} is too close to the start of the amplicon to plot the full window. Truncating the window.')
4593+
4594+
#Set right window size
4595+
if cut_point + args.plot_window_size < ref_len:
4596+
plot_half_window_right = args.plot_window_size
4597+
else:
4598+
plot_half_window_right = ref_len - cut_point - 1
4599+
pass_cut_point = True
4600+
warn(f'sgRNA {0} is too close to the end of the amplicon to plot the full window. Truncating the window.')
4601+
4602+
df_alleles_around_cut = CRISPRessoShared.get_dataframe_around_cut_asymmetrical(
4603+
df_alleles.loc[df_alleles['Reference_Name'] == ref_name],
4604+
cut_point,
4605+
plot_half_window_left,
4606+
plot_half_window_right,
4607+
)
45864608
count_total = counts_total[ref_name]
45874609
if args.allele_plot_pcts_only_for_assigned_reference:
45884610
df_alleles_around_cut['%AllReads']=df_alleles_around_cut['%Reads']
@@ -4593,7 +4615,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
45934615
df_alleles_around_cut.to_csv(allele_filename, sep='\t', header=True)
45944616
crispresso2_info['results']['refs'][ref_name]['allele_frequency_files'].append(os.path.basename(allele_filename))
45954617

4596-
ref_seq_around_cut=refs[ref_name]['sequence'][cut_point-plot_half_window+1:cut_point+plot_half_window+1]
4618+
ref_seq_around_cut=refs[ref_name]['sequence'][cut_point-plot_half_window_left+1:cut_point+plot_half_window_right+1]
45974619
fig_filename_root = _jp('9.'+ref_plot_name+'Alleles_frequency_table_around_'+sgRNA_label)
45984620
n_good = df_alleles_around_cut[df_alleles_around_cut['%Reads']>=args.min_frequency_alleles_around_cut_to_plot].shape[0]
45994621
if not args.suppress_plots and n_good > 0:
@@ -4605,11 +4627,12 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
46054627

46064628
new_sgRNA_intervals = []
46074629
#adjust coordinates of sgRNAs
4608-
new_sel_cols_start = cut_point - plot_half_window
4630+
new_sel_cols_start = cut_point - plot_half_window_left
46094631
for (int_start, int_end) in refs[ref_name]['sgRNA_intervals']:
46104632
new_sgRNA_intervals += [(int_start - new_sel_cols_start - 1, int_end - new_sel_cols_start - 1)]
4611-
4612-
4633+
if int_start <= cut_point and cut_point <= int_end:
4634+
new_cut_point = cut_point - new_sel_cols_start - 1
4635+
46134636
prepped_df_alleles, annotations, y_labels, insertion_dict, per_element_annot_kws, is_reference = CRISPRessoPlot.prep_alleles_table(
46144637
df_to_plot,
46154638
ref_seq_around_cut,
@@ -4628,6 +4651,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
46284651
'custom_colors': custom_config["colors"],
46294652
'SAVE_ALSO_PNG': save_png,
46304653
'plot_cut_point': plot_cut_point,
4654+
'cut_point_ind': new_cut_point if pass_cut_point else None,
46314655
'sgRNA_intervals': new_sgRNA_intervals,
46324656
'sgRNA_names': sgRNA_names,
46334657
'sgRNA_mismatches': sgRNA_mismatches,

CRISPResso2/CRISPRessoShared.py

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,32 +1309,21 @@ def split_interleaved_fastq(fastq_filename, output_filename_r1, output_filename_
13091309
# allele modification functions
13101310
######
13111311

1312-
def get_row_around_cut(row, cut_point, offset):
1313-
cut_idx = row['ref_positions'].index(cut_point)
1314-
return row['Aligned_Sequence'][cut_idx - offset + 1:cut_idx + offset + 1], row['Reference_Sequence'][
1315-
cut_idx - offset + 1:cut_idx + offset + 1], \
1316-
row['Read_Status'] == 'UNMODIFIED', row['n_deleted'], row['n_inserted'], row['n_mutated'], row['#Reads'], \
1317-
row['%Reads']
1312+
def get_row_around_cut_asymmetrical(row,cut_point,plot_left,plot_right):
1313+
cut_idx=row['ref_positions'].index(cut_point)
1314+
return row['Aligned_Sequence'][cut_idx-plot_left+1:cut_idx+plot_right+1],row['Reference_Sequence'][cut_idx-plot_left+1:cut_idx+plot_right+1],row['Read_Status']=='UNMODIFIED',row['n_deleted'],row['n_inserted'],row['n_mutated'],row['#Reads'], row['%Reads']
13181315

13191316

1320-
def get_dataframe_around_cut(df_alleles, cut_point, offset, collapse_by_sequence=True):
1317+
def get_dataframe_around_cut_asymmetrical(df_alleles, cut_point,plot_left,plot_right,collapse_by_sequence=True):
13211318
if df_alleles.shape[0] == 0:
13221319
return df_alleles
13231320
ref1 = df_alleles['Reference_Sequence'].iloc[0]
1324-
ref1 = ref1.replace('-', '')
1325-
if (cut_point + offset + 1 > len(ref1)):
1326-
raise (BadParameterException(
1327-
'The plotting window cannot extend past the end of the amplicon. Amplicon length is ' + str(
1328-
len(ref1)) + ' but plot extends to ' + str(cut_point + offset + 1)))
1329-
1330-
df_alleles_around_cut = pd.DataFrame(
1331-
list(df_alleles.apply(lambda row: get_row_around_cut(row, cut_point, offset), axis=1).values),
1332-
columns=['Aligned_Sequence', 'Reference_Sequence', 'Unedited', 'n_deleted', 'n_inserted', 'n_mutated', '#Reads',
1333-
'%Reads'])
1321+
ref1 = ref1.replace('-','')
1322+
1323+
df_alleles_around_cut=pd.DataFrame(list(df_alleles.apply(lambda row: get_row_around_cut_asymmetrical(row,cut_point,plot_left,plot_right),axis=1).values),
1324+
columns=['Aligned_Sequence','Reference_Sequence','Unedited','n_deleted','n_inserted','n_mutated','#Reads','%Reads'])
13341325

1335-
df_alleles_around_cut = df_alleles_around_cut.groupby(
1336-
['Aligned_Sequence', 'Reference_Sequence', 'Unedited', 'n_deleted', 'n_inserted',
1337-
'n_mutated']).sum().reset_index().set_index('Aligned_Sequence')
1326+
df_alleles_around_cut=df_alleles_around_cut.groupby(['Aligned_Sequence','Reference_Sequence','Unedited','n_deleted','n_inserted','n_mutated']).sum().reset_index().set_index('Aligned_Sequence')
13381327

13391328
df_alleles_around_cut.sort_values(by=['#Reads', 'Aligned_Sequence', 'Reference_Sequence'], inplace=True, ascending=[False, True, True])
13401329
df_alleles_around_cut['Unedited'] = df_alleles_around_cut['Unedited'] > 0
@@ -1631,13 +1620,11 @@ def get_amplicon_info_for_guides(ref_seq, guides, guide_mismatches, guide_names,
16311620
if this_sgRNA_cut_points and plot_window_size > 0:
16321621
for cut_p in this_sgRNA_cut_points:
16331622
if cut_p - window_around_cut + 1 < 0:
1634-
raise BadParameterException(
1635-
'Offset around cut would extend to the left of the amplicon. Please decrease plot_window_size parameter. Cut point: ' + str(
1636-
cut_p) + ' window: ' + str(window_around_cut) + ' reference: ' + str(ref_seq_length))
1623+
logging.warning('Offset around cut would extend to the left of the amplicon. Window will be truncated.')
1624+
window_around_cut = cut_p + 1
16371625
if cut_p + window_around_cut > ref_seq_length - 1:
1638-
raise BadParameterException(
1639-
'Offset around cut would be greater than reference sequence length. Please decrease plot_window_size parameter. Cut point: ' + str(
1640-
cut_p) + ' window: ' + str(window_around_cut) + ' reference: ' + str(ref_seq_length))
1626+
logging.warning('Offset around cut would be greater than reference sequence length. Window will be truncated.')
1627+
window_around_cut = ref_seq_length - cut_p - 1
16411628
st = max(0, cut_p - window_around_cut + 1)
16421629
en = min(ref_seq_length - 1, cut_p + window_around_cut + 1)
16431630
this_sgRNA_plot_idxs.append(sorted(list(range(st, en))))

scripts/plotAmbiguous.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def plot_ambiguous_alleles_tables_from_folder(crispresso_output_folder,fig_filen
104104
ref_seq_around_cut=refs[ref_name]['sequence'][cut_point-plot_half_window+1:cut_point+plot_half_window+1]
105105

106106
ambiguous_ref_name = "AMBIGUOUS_"+ref_name
107-
df_alleles_around_cut=CRISPRessoShared.get_dataframe_around_cut(df_alleles.loc[df_alleles['Reference_Name'] == ambiguous_ref_name],cut_point,plot_half_window)
107+
df_alleles_around_cut=CRISPRessoShared.get_dataframe_around_cut_asymmetrical(df_alleles.loc[df_alleles['Reference_Name'] == ambiguous_ref_name],cut_point,plot_half_window, plot_half_window)
108108
this_ambig_allele_count = len(df_alleles_around_cut.index)
109109
if this_ambig_allele_count < 1:
110110
print('No ambiguous reads found for ' + ref_name)

0 commit comments

Comments
 (0)