From d9bd36ae5615d2882a1c987a72c48788687db235 Mon Sep 17 00:00:00 2001
From: Luca Pinello <lucapinello@gmail.com>
Date: Fri, 29 Jun 2018 20:18:14 -0400
Subject: [PATCH] Fixed alleles plot when frequent alleles contain N

---
 CRISPResso/CRISPRessoCORE.py |  9 +++++----
 Changelog.txt                | 13 ++++++++-----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/CRISPResso/CRISPRessoCORE.py b/CRISPResso/CRISPRessoCORE.py
index 1d6669a..15a40e2 100755
--- a/CRISPResso/CRISPRessoCORE.py
+++ b/CRISPResso/CRISPRessoCORE.py
@@ -8,7 +8,7 @@
 '''
 
 
-__version__ = "1.0.12"
+__version__ = "1.0.13"
 
 import sys
 import errno
@@ -696,11 +696,12 @@ def plot_alleles_table(reference_seq,cut_point,df_alleles,sgRNA_name,OUTPUT_DIRE
     T_color=get_color(190,174,212)
     C_color=get_color(253,192,134)
     G_color=get_color(255,255,153)
+    N_color=get_color(255,255,255)
     INDEL_color=get_color(230,230,230)
 
-    cmap = colors_mpl.ListedColormap([INDEL_color, A_color,T_color,C_color,G_color])
+    cmap = colors_mpl.ListedColormap([INDEL_color, A_color,T_color,C_color,G_color,N_color])
 
-    dna_to_numbers={'-':0,'A':1,'T':2,'C':3,'G':4}
+    dna_to_numbers={'-':0,'A':1,'T':2,'C':3,'G':4,'N':5}
     seq_to_numbers= lambda seq: [dna_to_numbers[x] for x in seq]
 
     X=[]
@@ -857,7 +858,7 @@ def print_stacktrace_if_debug():
              parser.add_argument('--trim_sequences',help='Enable the trimming of Illumina adapters with Trimmomatic',action='store_true')
              parser.add_argument('--trimmomatic_options_string', type=str, help='Override options for Trimmomatic',default=' ILLUMINACLIP:%s:0:90:10:0:true MINLEN:40' % get_data('NexteraPE-PE.fa'))
              parser.add_argument('--min_paired_end_reads_overlap',  type=int, help='Parameter for the FLASH read merging step. Minimum required overlap length between two reads to provide a confident overlap. ', default=4)
-             parser.add_argument('--max_paired_end_reads_overlap',  type=int, help='Parameter for the FLASH merging step. Maximum overlap length expected in approximately 90%% of read pairs. Please see the FLASH manual for more information.', default=100)    
+             parser.add_argument('--max_paired_end_reads_overlap',  type=int, help='Parameter for the FLASH merging step. Maximum overlap length expected in approximately 90%% of read pairs. Please see the FLASH manual for more information.', default=100)
              parser.add_argument('--hide_mutations_outside_window_NHEJ',help='This parameter allows to visualize only the mutations overlapping the cleavage site and used to classify a read as NHEJ. This parameter has no effect on the quanitification of the NHEJ. It  may be helpful to mask a pre-existing and known mutations or sequencing errors outside the window used for quantification of NHEJ events.',action='store_true')
              parser.add_argument('-w','--window_around_sgrna', type=int, help='Window(s) in bp around the cleavage position (half on on each side) as determined by the provide guide RNA sequence to quantify the indels. Any indels outside this window are excluded. A value of 0 disables this filter.', default=1)
              parser.add_argument('--cleavage_offset', type=int, help="Cleavage offset to use within respect to the 3' end of the provided sgRNA sequence. Remember that the sgRNA sequence must be entered without the PAM. The default is -3 and is suitable for the SpCas9 system. For alternate nucleases, other cleavage offsets may be appropriate, for example, if using Cpf1 this parameter would be set to 1.", default=-3)
diff --git a/Changelog.txt b/Changelog.txt
index f6db76f..20b6957 100644
--- a/Changelog.txt
+++ b/Changelog.txt
@@ -1,8 +1,11 @@
+[1.0.13]
+> Fixed alleles plot when common alleles contain 'N'
+
 [1.0.12]
 > Added --max_paired_end_reads_overlap for FLASH merging step
 
 [1.0.11]
-> CRISPRessoPooled looks for cleaned/slugified names (produced by CRISPResso) for combining pools. 
+> CRISPRessoPooled looks for cleaned/slugified names (produced by CRISPResso) for combining pools.
 > Fixed a Warning: invalid value encountered in double_scalars  y_label_values=np.arange(0,y_max,y_max/6.0)
 > Fixed a problem with Pandas #ILovePandas where if a subset returns a single
 row, it returns a scalar instead of a DataFrame.  This resulted in an
@@ -21,11 +24,11 @@ where less than half of reads align in CRISPRessoPooled
 > Fixed  allels around cut site plot with seaborn >= 0.8.0
 > Added the option --allow-outies to flash to also try combining read pairs in the "outie" orientation
 [1.0.7]
-> Alleles around cut sites fixed if multiple cuts are present 
+> Alleles around cut sites fixed if multiple cuts are present
 
 [1.0.6]
 > Fixed plots when more than 2 sgRNA sequences are provided
-> Introduced new summary table for alleles around each cut site 
+> Introduced new summary table for alleles around each cut site
 > Introduced a new graphical report for allele around each cut site (Figures files starting with 9.)
 
 [1.0.5]
@@ -34,7 +37,7 @@ where less than half of reads align in CRISPRessoPooled
 [1.0.4]
 > Fixed quantification of insertion in reads that align in the reverse complement of the amplicon
 > Fixed window not symmetric for w=1
-> Fixed cut point visualization if the guide is in the reverse amplicon 
+> Fixed cut point visualization if the guide is in the reverse amplicon
 
 
 [1.0.3]
@@ -49,5 +52,5 @@ where less than half of reads align in CRISPRessoPooled
 > New option --split_paired_end to support paired end reads encoded in a single file (for example data obtained trough the MGH core in Boston: )
 
 [1.0.0] First stable release!
-> Introduced a new report in output: allelic frequency table (Alleles_frequency_table.txt). Table with a summary of all the alleles detected, % of reads supporting them and 
+> Introduced a new report in output: allelic frequency table (Alleles_frequency_table.txt). Table with a summary of all the alleles detected, % of reads supporting them and
 classification (unmodified, NHEJ, HDR and mixed). You can open this file with Excel.