From 319cc0b524db54c66d730ee26243eefe0114ee56 Mon Sep 17 00:00:00 2001 From: Stephen Kazakoff Date: Mon, 27 Jul 2020 00:34:21 +1000 Subject: [PATCH 1/6] Die unless a FASTA file is provide in offline mode --- Downstream.pm | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Downstream.pm b/Downstream.pm index 218a2ae5..4c829913 100755 --- a/Downstream.pm +++ b/Downstream.pm @@ -59,6 +59,18 @@ use POSIX qw(ceil); use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); +sub new { + my $class = shift; + + my $self = $class->SUPER::new(@_); + + if($self->{config}{offline} && !$self->{config}{fasta}) { + die("ERROR: cannot function in offline mode without a FASTA file\n"); + } + + return $self; +} + sub version { return '2.3'; } From dfdac88b5d67c305f6f44b30d4888938f917f0a2 Mon Sep 17 00:00:00 2001 From: Stephen Kazakoff Date: Mon, 27 Jul 2020 00:37:58 +1000 Subject: [PATCH 2/6] Wrap long lines --- Downstream.pm | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Downstream.pm b/Downstream.pm index 4c829913..160d4bca 100755 --- a/Downstream.pm +++ b/Downstream.pm @@ -102,7 +102,10 @@ sub run { my $tv = $tva->transcript_variation; my $tr = $tv->transcript; - my $cds_seq = defined($tr->{_variation_effect_feature_cache}) ? $tr->{_variation_effect_feature_cache}->{translateable_seq} : $tr->translateable_seq; + + my $cds_seq = defined($tr->{_variation_effect_feature_cache}) + ? $tr->{_variation_effect_feature_cache}->{translateable_seq} + : $tr->translateable_seq; # get the sequence to translate my ($low_pos, $high_pos) = sort {$a <=> $b} ($tv->cds_start, $tv->cds_end); @@ -137,8 +140,13 @@ sub run { $new_pep =~ s/\*.*//; # compare lengths - my $translation = defined($tr->{_variation_effect_feature_cache}) && defined($tr->{_variation_effect_feature_cache}->{peptide}) ? $tr->{_variation_effect_feature_cache}->{peptide} : $tr->translation->seq; - my $new_length = ($tv->translation_start < $tv->translation_end ? $tv->translation_start : $tv->translation_end) + length($new_pep); + my $translation = defined($tr->{_variation_effect_feature_cache}->{peptide}) + ? $tr->{_variation_effect_feature_cache}->{peptide} + : $tr->translation->seq; + + my ($pep_start, $pep_end) = ($tv->translation_start, $tv->translation_end); + + my $new_length = ($pep_start < $pep_end ? $pep_start : $pep_end) + length($new_pep); return { DownstreamProtein => $new_pep, From 078896c095785c656925f741b43b658aafae3b0d Mon Sep 17 00:00:00 2001 From: Stephen Kazakoff Date: Mon, 27 Jul 2020 01:06:22 +1000 Subject: [PATCH 3/6] Reduce indentation --- Downstream.pm | 100 ++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/Downstream.pm b/Downstream.pm index 160d4bca..2f4e9075 100755 --- a/Downstream.pm +++ b/Downstream.pm @@ -93,68 +93,64 @@ sub get_header_info { sub run { my ($self, $tva) = @_; - my @ocs = @{$tva->get_all_OverlapConsequences}; - - if(grep {$_->SO_term eq 'frameshift_variant'} @ocs) { - - # can't do it for splice sites - return {} if grep {$_->SO_term =~ /splice/} @ocs; + my @SO_terms = map { $_->SO_term } @{$tva->get_all_OverlapConsequences}; + + return {} unless grep { $_ eq 'frameshift_variant' } @SO_terms; - my $tv = $tva->transcript_variation; - my $tr = $tv->transcript; + return {} if grep { /splice/ } @SO_terms; - my $cds_seq = defined($tr->{_variation_effect_feature_cache}) - ? $tr->{_variation_effect_feature_cache}->{translateable_seq} - : $tr->translateable_seq; + my $tv = $tva->transcript_variation; + my $tr = $tv->transcript; + + my $cds_seq = defined($tr->{_variation_effect_feature_cache}) + ? $tr->{_variation_effect_feature_cache}->{translateable_seq} + : $tr->translateable_seq; - # get the sequence to translate - my ($low_pos, $high_pos) = sort {$a <=> $b} ($tv->cds_start, $tv->cds_end); - my $is_insertion = $tv->cds_start > $tv->cds_end ? 1 : 0; - my $last_complete_codon = (ceil($low_pos / 3) - 1) * 3; - my $before_var_seq = substr $cds_seq, $last_complete_codon, $low_pos - $last_complete_codon - ($is_insertion ? 0 : 1); - my $after_var_seq = substr $cds_seq, $high_pos - ($is_insertion ? 1 : 0); - my $to_translate = $before_var_seq.$tva->feature_seq.$after_var_seq; - my $three_prime_utr_seq = $tr->three_prime_utr->seq() if ($tr->three_prime_utr); - $to_translate = $to_translate.$three_prime_utr_seq if ($three_prime_utr_seq); - $to_translate =~ s/\-//g; + # get the sequence to translate + my ($low_pos, $high_pos) = sort {$a <=> $b} ($tv->cds_start, $tv->cds_end); + my $is_insertion = $tv->cds_start > $tv->cds_end ? 1 : 0; + my $last_complete_codon = (ceil($low_pos / 3) - 1) * 3; + my $before_var_seq = substr $cds_seq, $last_complete_codon, $low_pos - $last_complete_codon - ($is_insertion ? 0 : 1); + my $after_var_seq = substr $cds_seq, $high_pos - ($is_insertion ? 1 : 0); + my $to_translate = $before_var_seq.$tva->feature_seq.$after_var_seq; + my $three_prime_utr_seq = $tr->three_prime_utr->seq() if ($tr->three_prime_utr); + $to_translate = $to_translate.$three_prime_utr_seq if ($three_prime_utr_seq); + $to_translate =~ s/\-//g; - # create a bioperl object - my $codon_seq = Bio::Seq->new( - -seq => $to_translate, - -moltype => 'dna', - -alphabet => 'dna' - ); + # create a bioperl object + my $codon_seq = Bio::Seq->new( + -seq => $to_translate, + -moltype => 'dna', + -alphabet => 'dna' + ); - # get codon table - my $codon_table; - if(defined($tr->{_variation_effect_feature_cache})) { - $codon_table = $tr->{_variation_effect_feature_cache}->{codon_table} || 1; - } - else { - my ($attrib) = @{$tr->slice->get_all_Attributes('codon_table')}; - $codon_table = $attrib ? $attrib->value || 1 : 1; - } + # get codon table + my $codon_table; + if(defined($tr->{_variation_effect_feature_cache})) { + $codon_table = $tr->{_variation_effect_feature_cache}->{codon_table} || 1; + } + else { + my ($attrib) = @{$tr->slice->get_all_Attributes('codon_table')}; + $codon_table = $attrib ? $attrib->value || 1 : 1; + } - # translate - my $new_pep = $codon_seq->translate(undef, undef, undef, $codon_table)->seq(); - $new_pep =~ s/\*.*//; + # translate + my $new_pep = $codon_seq->translate(undef, undef, undef, $codon_table)->seq(); + $new_pep =~ s/\*.*//; - # compare lengths - my $translation = defined($tr->{_variation_effect_feature_cache}->{peptide}) - ? $tr->{_variation_effect_feature_cache}->{peptide} - : $tr->translation->seq; + # compare lengths + my $translation = defined($tr->{_variation_effect_feature_cache}->{peptide}) + ? $tr->{_variation_effect_feature_cache}->{peptide} + : $tr->translation->seq; - my ($pep_start, $pep_end) = ($tv->translation_start, $tv->translation_end); + my ($pep_start, $pep_end) = ($tv->translation_start, $tv->translation_end); - my $new_length = ($pep_start < $pep_end ? $pep_start : $pep_end) + length($new_pep); + my $new_length = ($pep_start < $pep_end ? $pep_start : $pep_end) + length($new_pep); - return { - DownstreamProtein => $new_pep, - ProteinLengthChange => $new_length - length($translation), - }; - } - - return {}; + return { + DownstreamProtein => $new_pep, + ProteinLengthChange => $new_length - length($translation), + }; } 1; From 4c196f8e5f8e410c80b577874e1e442e697a4f5c Mon Sep 17 00:00:00 2001 From: Stephen Kazakoff Date: Mon, 27 Jul 2020 01:10:03 +1000 Subject: [PATCH 4/6] Remove superfluous whitespace and comments --- Downstream.pm | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/Downstream.pm b/Downstream.pm index 2f4e9075..403702de 100755 --- a/Downstream.pm +++ b/Downstream.pm @@ -92,11 +92,11 @@ sub get_header_info { sub run { my ($self, $tva) = @_; - + my @SO_terms = map { $_->SO_term } @{$tva->get_all_OverlapConsequences}; return {} unless grep { $_ eq 'frameshift_variant' } @SO_terms; - + return {} if grep { /splice/ } @SO_terms; my $tv = $tva->transcript_variation; @@ -105,8 +105,7 @@ sub run { my $cds_seq = defined($tr->{_variation_effect_feature_cache}) ? $tr->{_variation_effect_feature_cache}->{translateable_seq} : $tr->translateable_seq; - - # get the sequence to translate + my ($low_pos, $high_pos) = sort {$a <=> $b} ($tv->cds_start, $tv->cds_end); my $is_insertion = $tv->cds_start > $tv->cds_end ? 1 : 0; my $last_complete_codon = (ceil($low_pos / 3) - 1) * 3; @@ -116,15 +115,13 @@ sub run { my $three_prime_utr_seq = $tr->three_prime_utr->seq() if ($tr->three_prime_utr); $to_translate = $to_translate.$three_prime_utr_seq if ($three_prime_utr_seq); $to_translate =~ s/\-//g; - - # create a bioperl object + my $codon_seq = Bio::Seq->new( -seq => $to_translate, -moltype => 'dna', -alphabet => 'dna' ); - - # get codon table + my $codon_table; if(defined($tr->{_variation_effect_feature_cache})) { $codon_table = $tr->{_variation_effect_feature_cache}->{codon_table} || 1; @@ -133,12 +130,10 @@ sub run { my ($attrib) = @{$tr->slice->get_all_Attributes('codon_table')}; $codon_table = $attrib ? $attrib->value || 1 : 1; } - - # translate + my $new_pep = $codon_seq->translate(undef, undef, undef, $codon_table)->seq(); $new_pep =~ s/\*.*//; - - # compare lengths + my $translation = defined($tr->{_variation_effect_feature_cache}->{peptide}) ? $tr->{_variation_effect_feature_cache}->{peptide} : $tr->translation->seq; @@ -146,7 +141,7 @@ sub run { my ($pep_start, $pep_end) = ($tv->translation_start, $tv->translation_end); my $new_length = ($pep_start < $pep_end ? $pep_start : $pep_end) + length($new_pep); - + return { DownstreamProtein => $new_pep, ProteinLengthChange => $new_length - length($translation), From 6305a9d1f1c49a61b7092e1168166255a0d9569b Mon Sep 17 00:00:00 2001 From: Stephen Kazakoff Date: Fri, 7 Aug 2020 09:52:55 +1000 Subject: [PATCH 5/6] Remove ceil dependency --- Downstream.pm | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/Downstream.pm b/Downstream.pm index 403702de..5bbcb416 100755 --- a/Downstream.pm +++ b/Downstream.pm @@ -55,7 +55,6 @@ use strict; use warnings; use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin; -use POSIX qw(ceil); use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); @@ -106,18 +105,18 @@ sub run { ? $tr->{_variation_effect_feature_cache}->{translateable_seq} : $tr->translateable_seq; - my ($low_pos, $high_pos) = sort {$a <=> $b} ($tv->cds_start, $tv->cds_end); - my $is_insertion = $tv->cds_start > $tv->cds_end ? 1 : 0; - my $last_complete_codon = (ceil($low_pos / 3) - 1) * 3; - my $before_var_seq = substr $cds_seq, $last_complete_codon, $low_pos - $last_complete_codon - ($is_insertion ? 0 : 1); - my $after_var_seq = substr $cds_seq, $high_pos - ($is_insertion ? 1 : 0); - my $to_translate = $before_var_seq.$tva->feature_seq.$after_var_seq; - my $three_prime_utr_seq = $tr->three_prime_utr->seq() if ($tr->three_prime_utr); - $to_translate = $to_translate.$three_prime_utr_seq if ($three_prime_utr_seq); - $to_translate =~ s/\-//g; + my ($start, $end) = ($tv->cds_start, $tv->cds_end); + + substr($cds_seq, $start - 1, $end - $start + 1) = $tva->seq_length > 0 ? $tva->feature_seq : ''; + + my $low_pos = $start > $end ? $end : $start; + my $last_complete_codon = $low_pos - ( ( ( $low_pos - 1 ) % 3 ) + 1 ); + + my $downstream_seq = substr($cds_seq, $last_complete_codon > 0 ? $last_complete_codon : 0); + my $three_prime_utr = $tr->three_prime_utr ? $tr->three_prime_utr->seq() : ''; my $codon_seq = Bio::Seq->new( - -seq => $to_translate, + -seq => $downstream_seq . $three_prime_utr, -moltype => 'dna', -alphabet => 'dna' ); From 657fed80504d16f0b831de3a2d396a547c25dfd7 Mon Sep 17 00:00:00 2001 From: Stephen Kazakoff Date: Fri, 7 Aug 2020 09:53:36 +1000 Subject: [PATCH 6/6] Bump plugin version to 2.4 --- Downstream.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Downstream.pm b/Downstream.pm index 5bbcb416..621d217c 100755 --- a/Downstream.pm +++ b/Downstream.pm @@ -71,7 +71,7 @@ sub new { } sub version { - return '2.3'; + return '2.4'; } sub feature_types {