Skip to content

Commit

Permalink
initial commit for script to plot and train the meta predictor
Browse files Browse the repository at this point in the history
  • Loading branch information
tbrittoborges committed Sep 7, 2021
1 parent e8ec3f5 commit 76c1def
Show file tree
Hide file tree
Showing 16 changed files with 998 additions and 109 deletions.
26 changes: 26 additions & 0 deletions benchmark_scripts/confmat_calling_nanopore.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
source("load_nanopore_data.R")

cm <- lapply(
seq_along(df),
function(x) {
caret::confusionMatrix(
as.factor(ifelse(df[[x]] > 0.95, 1, 0)),
as.factor(ifelse(df$orthogonal > 0.95, 1, 0)),
positive = "1"
)
}
)

names(cm) <- colnames(df)

lapply(
names(cm),
function(x) {
writeLines(
capture.output(
cm[[x]]
),
paste0("../nanopore_benchmark/results/confmat_", x, ".tab")
)
}
)
27 changes: 27 additions & 0 deletions benchmark_scripts/confmat_calling_sirv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
source("load_sirv_data.R")


cm <- lapply(
seq_along(df),
function(x) {
caret::confusionMatrix(
as.factor(ifelse(df[[x]] > 0.95, 1, 0)),
as.factor(df$orthogonal),
positive = "1"
)
}
)

names(cm) <- colnames(df)

lapply(
names(cm),
function(x) {
writeLines(
capture.output(
cm[[x]]
),
paste0("../sirv_benchmark/results/confmat_", x, ".tab")
)
}
)
53 changes: 53 additions & 0 deletions benchmark_scripts/eulerr_calling_sirv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
library(eulerr)

source("~/Baltica/benchmark_scripts/load_sirv_data.R")

mat <- df %>%
mutate(
across(everything(), ~ replace_na(.x, 0)),
comparison = NULL
) %>%
as.matrix(df)

mat[mat > 0.95] <- 1
mat[mat < 0.95] <- 0
mat[is.na(mat)] <- 0
mat <- mat[rowSums(mat) >= 1, ]

fit <- euler(mat, shape = "ellipse")

plot(
fit,
fills = color_list$method,
edges = F,
fontsize = 14,
quantities = list(fontsize = 16),
labels = FALSE,
legend = list(nrow = 1, ncol = 5, side = "bottom", fontsize = 16),
main = "Intesection of SJ per method"
)

library(VennDiagram)

input_sj <- lapply(
as.data.frame(mat),
function(i) unique(coordinates[i])
)

venn.diagram(
x = as.list(mat),
category.names = colnames(mat),
filename = "venn_sirv_calling.png"
# output=TRUE
)

input <- lapply(as.data.frame(mat), function(i) unique(gene_name[i]))

input$junctionseq

venn.diagram(
x = input,
# category.names = colnames(mat),
filename = "venn_sirv_calling.png",
output = TRUE
)
66 changes: 66 additions & 0 deletions benchmark_scripts/heatmap_calling_nanopore.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
source("load_nanopore_data.R")

library(ComplexHeatmap)
library(RColorBrewer)

set.seed(123)

methods <- c("rmats", "junctionseq", "majiq", "leafcutter", "orthogonal")
methods <- str_extract(colnames(df), str_c(methods, collapse = "|"))
methods <- methods[!is.na(methods)]

mat <- df %>%
mutate(
across(everything(), ~ replace_na(.x, 0)),
comparison = NULL
) %>%
as.matrix(df)

idx <- data.frame(
is=ifelse(df$orthogonal>0.95, 1, 0)
)

idx <- idx %>%
mutate(n=row_number()) %>%
group_by(is) %>%
sample_n(500)

method <- str_extract(colnames(mat), str_c(methods, collapse = "|"))
method_cols <- setNames(brewer.pal(n = length(methods), name = "Set2"), methods)

mat <- mat[idx$n, ]
ca=HeatmapAnnotation(
method_names=anno_block(
gp = gpar(fill = method_cols[method]),
labels = method,
labels_gp = gpar(col = "white", fontsize = 9))
)

lengend_method = Legend(
labels = unique(method),
title = "method",
legend_gp = gpar(
fontsize = 14,
fontface = "bold",
fill = method_cols[unique(method)]))

col_fun = circlize::colorRamp2(c(0, 0.95, 1), c("blue", "white", "red"))

hm=Heatmap(
mat,
name='score',
col = col_fun,
top_annotation = ca,
cluster_columns = F,
cluster_rows = F,
column_split = 1:5,
border = TRUE,
column_title = NULL,
show_column_names=F)

png("~/Baltica/nanopore_benchmark/results/heatmap_calling.pdf", width = 8, height = 4, units = "in", res = 200)
draw(hm,
heatmap_legend_list = list(lengend_method))
dev.off()


74 changes: 74 additions & 0 deletions benchmark_scripts/heatmap_calling_sirv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
source("load_sirv_data.R")

library(ComplexHeatmap)
library(RColorBrewer)

set.seed(123)

methods <- c("rmats", "junctionseq", "majiq", "leafcutter", "orthogonal")
methods <- str_extract(colnames(df), str_c(methods, collapse = "|"))
methods <- methods[!is.na(methods)]

mat <- df %>%
mutate(
across(everything(), ~ replace_na(.x, 0)),
comparison = NULL
) %>%
as.matrix(df)

is_SIRV <- startsWith(coordinates, 'SIRV')

idx <- data_frame(
is_SIRV=is_SIRV)

idx <- idx %>%
mutate(n=row_number()) %>%
group_by(is_SIRV) %>%
sample_n(504)

method <- str_extract(colnames(mat), str_c(methods, collapse = "|"))
method_cols <- setNames(brewer.pal(n = length(methods), name = "Set2"), methods)

mat <- mat[idx$n, ]
ca=HeatmapAnnotation(
method_names=anno_block(
gp = gpar(fill = method_cols[method]),
labels = method,
labels_gp = gpar(col = "white", fontsize = 9))
)

lengend_method = Legend(
labels = unique(method),
title = "method",
legend_gp = gpar(
fontsize = 14,
fontface = "bold",
fill = method_cols[unique(method)]))

ra = HeatmapAnnotation(
SIRV = is_SIRV[idx$n],
col=list(SIRV=c('TRUE'='black', 'FALSE'='white')),
which = 'row')

col_fun = circlize::colorRamp2(c(0, 0.95, 1), c("blue", "white", "red"))

hm=Heatmap(
mat,
name='score',
col = col_fun,
left_annotation = ra,
top_annotation = ca,
cluster_columns = F,
cluster_rows = F,
column_split = 1:5,
row_split = is_SIRV[idx$n],
border = TRUE,
column_title = NULL,
show_column_names=F)

png("~/Baltica/sirv_benchmark/results/heatmap_calling.pdf", width = 8, height = 4, units = "in", res = 200)
draw(hm,
heatmap_legend_list = list(lengend_method))
dev.off()


31 changes: 31 additions & 0 deletions benchmark_scripts/load_nanopore_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
library(tidyverse)

df <- readr::read_csv(
"../nanopore_benchmark/results/SJ_annotated.csv",
col_types = cols(
.default = col_double(),
is_novel = col_logical(),
gene_name = col_character(),
transcript_name = col_character(),
class_code = col_character(),
exon_number = col_character(),
coordinates = col_character()
)
)

gene_name <- df$gene_name
coordinates <- df$coordinates
is_novel <- df$is_novel

colnames(df) <- gsub(x=colnames(df), pattern = "_SMG7KO2SMG6KD-vs-control", '')
colnames(df) <- gsub(x=colnames(df), pattern = "_NA", '')

methods <- c("rmats", "junctionseq", "majiq", "leafcutter", "orthogonal")
df <- df %>% dplyr::select(matches(methods))

color_list <- list(
method = setNames(
scales::brewer_pal(palette = "Set2")(length(methods)),
methods
)
)
36 changes: 36 additions & 0 deletions benchmark_scripts/load_sirv_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
library(tidyverse)

df <- readr::read_csv(
"../sirv_benchmark/results/SJ_annotated.csv",
col_types = cols(
.default = col_double(),
is_novel = col_logical(),
gene_name = col_character(),
transcript_name = col_character(),
class_code = col_character(),
exon_number = col_character(),
coordinates = col_character()
)
)

df <- df %>%
# pivot to remove the mixes
pivot_longer(matches("-vs-"),
names_to = c(".value", 'comparison'),
names_pattern = "(.+)_(.+)"
)

gene_name <- df$gene_name
coordinates <- df$coordinates
is_novel <- df$is_novel

methods <- c("rmats", "junctionseq", "majiq", "leafcutter", "orthogonal")

df <- df %>% dplyr::select(matches(methods))

color_list <- list(
method = setNames(
scales::brewer_pal(palette = "Set2")(length(methods)),
methods
)
)
59 changes: 59 additions & 0 deletions benchmark_scripts/pearson_calling_sirv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
library(tidyverse)
library(reshape2)

df <- readr::read_csv(
"../sirv_benchmark/results/SJ_annotated.csv",
col_types = cols(
.default = col_double(),
is_novel = col_logical(),
gene_name = col_character(),
transcript_name = col_character(),
class_code = col_character(),
exon_number = col_character(),
coordinates = col_character()
)
)

df <- df %>%
dplyr::select(matches("-vs-"), "coordinates", "gene_name")

coordinates <- df$coordinates
gene_name <- df$gene_name
df$coordinates <- NULL
df$gene_name <- NULL
df <- df %>% mutate(
across(everything(), ~ replace_na(.x, 0))
)

cormat <- apply(df, 2, function(x){ -log10(x+1e-10) })
cormat <- round(
cor(cormat, method='pearson', use="pairwise.complete.obs"),
2)
cormat[lower.tri(cormat)]=NA
cormat=melt(cormat)
cormat=cormat[!is.na(cormat$value),]
ggplot(cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed() +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.7, 0.7),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
ggsave('../sirv_benchmark/results/heatmap_pearson.png')
Loading

0 comments on commit 76c1def

Please sign in to comment.