Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
tbrittoborges committed Jan 11, 2022
1 parent 430a8fe commit 36a3279
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 23 deletions.
44 changes: 21 additions & 23 deletions benchmark_scripts/parse_suppa_sup.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
library(tidyverse)
library(GenomicFeatures)
library(rtracklayer)
library(GenomicRanges)

parse_suppa_sup = function(url, sheet, event_id=TRUE, startRow = 3){
x = openxlsx::read.xlsx(url, sheet = sheet, startRow = startRow)
if (isTRUE(event_id)) {
Expand All @@ -18,18 +23,10 @@ url='https://static-content.springer.com/esm/art%3A10.1186%2Fs13059-018-1417-1/M
# 44 negative
pos = parse_suppa_sup(url, 4, event_id = TRUE, startRow = 3)
neg = parse_suppa_sup(url, 10, event_id = FALSE, 3)
pos$score = 1
neg$score = 0

library(tidyverse)

neg <- neg %>%
rownames_to_column('event_n') %>%
dplyr::rename(chr=Chr, Gene=Gene_symbol) %>%
mutate(
coord = str_glue_data(., "{Exon_start}-{Exon_end}"),
Exon_start = NULL,
Exon_end = NULL)
dplyr::rename(chr=Chr, Gene=Gene_symbol)

reference <- rtracklayer::import.gff("/biodb/genomes/homo_sapiens/GRCh38_102/GRCh38.102.SIRV.gtf")

Expand All @@ -53,23 +50,24 @@ get_introns <- function(ex_tx) {
introns <- unlist(introns)
introns
}
ch <- rtracklayer::import.chain('~/GRCh37_to_GRCh38_2.chain')

ref_ex_tx <- filter_multi_exon(reference)
ref_introns <- get_introns(ref_ex_tx)
ref_introns <- ref_introns[width(ref_introns) > 2, ]

neg_gr <- GRanges(
seqnames = neg$Chr,
ranges = IRanges(neg$Exon_start, pos$Exon_end))
seqnames = neg$chr,
ranges = IRanges(neg$Exon_start, neg$Exon_end))
seqlevelsStyle(neg_gr) <- 'ensembl'
neg_gr <- rtracklayer::liftOver(neg_gr, ch)
neg_gr <- unlist(neg_gr)

neg_introns <- subsetByOverlaps(ref_introns, neg_gr)
perc_olaps <- width(pintersect(ref_introns[from(neg_olaps)], neg_gr[to(neg_olaps)])) / width(neg_gr[to(neg_olaps)])
neg_introns <- neg_introns[ perc_olaps == 1 ]
# perc_olaps <- width(pintersect(ref_introns[from(neg_olaps)], neg_gr[to(neg_olaps)])) / width(neg_gr[to(neg_olaps)])
# neg_introns <- neg_introns[ perc_olaps == 1 ]
score(neg_introns) <- 0

library(GenomicFeatures)
library(rtracklayer)

pos <- pos %>%
rownames_to_column('event_n') %>%
mutate(
Expand All @@ -88,17 +86,17 @@ pos <- pos %>%
gr <- GRanges(seqnames = pos$chr, ranges = IRanges(pos$coord), strand = pos$strand)
seqlevelsStyle(gr) <- 'ensembl'
# https://gist.github.com/tbrittoborges/f0bc89cd7192d0955679d5105fd39475
ch <- rtracklayer::import.chain('~/GRCh37_to_GRCh38_2.chain')
gr <- rtracklayer::liftOver(gr, ch)
gr <- unlist(gr)

pairs <- findOverlapPairs(ref_introns, gr)
same_end <- end(pairs@first) - end(pairs@second) == -1
same_start <- start(pairs@first) - start(pairs@second) == 1
length(unique(pairs[same_start & same_end]@second))
pairs <- pairs[same_start & same_end]
gr <- unique(pairs@first)
mcols(gr)$score <- 1
names(gr) <- seq_along(gr)

neg_gr <- rtracklayer::liftOver(neg_gr, ch)
neg_gr <- unlist(neg_gr)

gr <- c(neg_introns, gr)
export.bed(gr, '/prj/Niels_Gehring/baltica_benchmark/best_et_al/baltica/ortho.bed')
head(gr)


51 changes: 51 additions & 0 deletions benchmark_scripts/roc_calling_suppa.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
library(ROCR)
library(cowplot)

source("~/Baltica/benchmark_scripts/load_suppa_data.R")

length(unique(coordinates[!is.na(df$orthogonal) ]))
table(df$orthogonal)

compute_prediction <- function(col, ref) {
col <- df[, col]
ref <- df[, ref]
col <- col[!is.na(ref)]
ref <- ref[!is.na(ref)]
col <- replace(col, is.na(col), 0)
prediction(col, ref)
}

pars <- tibble(
col = c("majiq", "leafcutter", "rmats", "junctionseq"),
ref = rep("orthogonal", 4)
)
prediction <- purrr::pmap(pars, compute_prediction)
names(prediction) <- pars$col
tpr_fpr <- lapply(prediction, performance, measure = "tpr", x.measure = "fpr")
auc <- lapply(prediction, performance, measure = "auc")

method <- unlist(lapply(tpr_fpr, function(x) length(slot(x, "x.values")[[1]])))
auc <- lapply(auc, function(x) round(slot(x, "y.values")[[1]][[1]], 2))

roc_data <- tibble(
FPR = unlist(lapply(tpr_fpr, function(x) slot(x, "x.values")[[1]])),
TPR = unlist(lapply(tpr_fpr, function(x) slot(x, "y.values")[[1]])),
Method = rep(names(method), method),
)

p <- ggplot(roc_data, aes(x = FPR, y = TPR, color = Method)) +
geom_line() +
geom_point(size=0.5) +
geom_abline(aes(slope = 1, intercept = 0), linetype = "dashed") +
theme_cowplot(14) +
theme(legend.position="bottom") +
scale_color_manual(
name = NULL,
labels = setNames(nm=names(auc), paste(names(auc), ' AUCROC=', auc, sep="")),
values = color_list$method[1:4]) +
guides(color=guide_legend(nrow=2,byrow=TRUE))

p

ggsave(
"results/roc_suppa_calling.pdf", width = 15, height = 15, units = 'cm')

0 comments on commit 36a3279

Please sign in to comment.