From d9bd36ae5615d2882a1c987a72c48788687db235 Mon Sep 17 00:00:00 2001 From: Luca Pinello Date: Fri, 29 Jun 2018 20:18:14 -0400 Subject: [PATCH] Fixed alleles plot when frequent alleles contain N --- CRISPResso/CRISPRessoCORE.py | 9 +++++---- Changelog.txt | 13 ++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/CRISPResso/CRISPRessoCORE.py b/CRISPResso/CRISPRessoCORE.py index 1d6669a..15a40e2 100755 --- a/CRISPResso/CRISPRessoCORE.py +++ b/CRISPResso/CRISPRessoCORE.py @@ -8,7 +8,7 @@ ''' -__version__ = "1.0.12" +__version__ = "1.0.13" import sys import errno @@ -696,11 +696,12 @@ def plot_alleles_table(reference_seq,cut_point,df_alleles,sgRNA_name,OUTPUT_DIRE T_color=get_color(190,174,212) C_color=get_color(253,192,134) G_color=get_color(255,255,153) + N_color=get_color(255,255,255) INDEL_color=get_color(230,230,230) - cmap = colors_mpl.ListedColormap([INDEL_color, A_color,T_color,C_color,G_color]) + cmap = colors_mpl.ListedColormap([INDEL_color, A_color,T_color,C_color,G_color,N_color]) - dna_to_numbers={'-':0,'A':1,'T':2,'C':3,'G':4} + dna_to_numbers={'-':0,'A':1,'T':2,'C':3,'G':4,'N':5} seq_to_numbers= lambda seq: [dna_to_numbers[x] for x in seq] X=[] @@ -857,7 +858,7 @@ def print_stacktrace_if_debug(): parser.add_argument('--trim_sequences',help='Enable the trimming of Illumina adapters with Trimmomatic',action='store_true') parser.add_argument('--trimmomatic_options_string', type=str, help='Override options for Trimmomatic',default=' ILLUMINACLIP:%s:0:90:10:0:true MINLEN:40' % get_data('NexteraPE-PE.fa')) parser.add_argument('--min_paired_end_reads_overlap', type=int, help='Parameter for the FLASH read merging step. Minimum required overlap length between two reads to provide a confident overlap. ', default=4) - parser.add_argument('--max_paired_end_reads_overlap', type=int, help='Parameter for the FLASH merging step. Maximum overlap length expected in approximately 90%% of read pairs. Please see the FLASH manual for more information.', default=100) + parser.add_argument('--max_paired_end_reads_overlap', type=int, help='Parameter for the FLASH merging step. Maximum overlap length expected in approximately 90%% of read pairs. Please see the FLASH manual for more information.', default=100) parser.add_argument('--hide_mutations_outside_window_NHEJ',help='This parameter allows to visualize only the mutations overlapping the cleavage site and used to classify a read as NHEJ. This parameter has no effect on the quanitification of the NHEJ. It may be helpful to mask a pre-existing and known mutations or sequencing errors outside the window used for quantification of NHEJ events.',action='store_true') parser.add_argument('-w','--window_around_sgrna', type=int, help='Window(s) in bp around the cleavage position (half on on each side) as determined by the provide guide RNA sequence to quantify the indels. Any indels outside this window are excluded. A value of 0 disables this filter.', default=1) parser.add_argument('--cleavage_offset', type=int, help="Cleavage offset to use within respect to the 3' end of the provided sgRNA sequence. Remember that the sgRNA sequence must be entered without the PAM. The default is -3 and is suitable for the SpCas9 system. For alternate nucleases, other cleavage offsets may be appropriate, for example, if using Cpf1 this parameter would be set to 1.", default=-3) diff --git a/Changelog.txt b/Changelog.txt index f6db76f..20b6957 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,8 +1,11 @@ +[1.0.13] +> Fixed alleles plot when common alleles contain 'N' + [1.0.12] > Added --max_paired_end_reads_overlap for FLASH merging step [1.0.11] -> CRISPRessoPooled looks for cleaned/slugified names (produced by CRISPResso) for combining pools. +> CRISPRessoPooled looks for cleaned/slugified names (produced by CRISPResso) for combining pools. > Fixed a Warning: invalid value encountered in double_scalars y_label_values=np.arange(0,y_max,y_max/6.0) > Fixed a problem with Pandas #ILovePandas where if a subset returns a single row, it returns a scalar instead of a DataFrame. This resulted in an @@ -21,11 +24,11 @@ where less than half of reads align in CRISPRessoPooled > Fixed allels around cut site plot with seaborn >= 0.8.0 > Added the option --allow-outies to flash to also try combining read pairs in the "outie" orientation [1.0.7] -> Alleles around cut sites fixed if multiple cuts are present +> Alleles around cut sites fixed if multiple cuts are present [1.0.6] > Fixed plots when more than 2 sgRNA sequences are provided -> Introduced new summary table for alleles around each cut site +> Introduced new summary table for alleles around each cut site > Introduced a new graphical report for allele around each cut site (Figures files starting with 9.) [1.0.5] @@ -34,7 +37,7 @@ where less than half of reads align in CRISPRessoPooled [1.0.4] > Fixed quantification of insertion in reads that align in the reverse complement of the amplicon > Fixed window not symmetric for w=1 -> Fixed cut point visualization if the guide is in the reverse amplicon +> Fixed cut point visualization if the guide is in the reverse amplicon [1.0.3] @@ -49,5 +52,5 @@ where less than half of reads align in CRISPRessoPooled > New option --split_paired_end to support paired end reads encoded in a single file (for example data obtained trough the MGH core in Boston: ) [1.0.0] First stable release! -> Introduced a new report in output: allelic frequency table (Alleles_frequency_table.txt). Table with a summary of all the alleles detected, % of reads supporting them and +> Introduced a new report in output: allelic frequency table (Alleles_frequency_table.txt). Table with a summary of all the alleles detected, % of reads supporting them and classification (unmodified, NHEJ, HDR and mixed). You can open this file with Excel.