Fixed missing APOBEC1

dieterich-lab · Dec 21, 2020 · 2017f47 · 2017f47
1 parent a73d51d
commit 2017f47
Show file tree

Hide file tree

Showing 11 changed files with 194 additions and 105 deletions.
diff --git a/R/data.R b/R/data.R
@@ -294,20 +294,20 @@
 #' \itemize{
 #'   \item TODO
 #' }
-"APOBEC1"
+#"APOBEC1"
 
-#' head -n 10000 call2_APOBEC1_APOBEC1YTHmut_RC14_C2T_call2_result.out
-#' 
-#' TODO
-#'
-#' @docType data
-#' 
-#' @usage data(TEST)
-#' 
-#' @references TODO
-#' 
-#' @format TODO:
-#' \itemize{
-#'   \item TODO
-#' }
+##' head -n 10000 call2_APOBEC1_APOBEC1YTHmut_RC14_C2T_call2_result.out
+##' 
+##' TODO
+##'
+##' @docType data
+##' 
+##' @usage data(TEST)
+##' 
+##' @references TODO
+##' 
+##' @format TODO:
+##' \itemize{
+##'   \item TODO
+##' }
 "TEST"
diff --git a/data-raw/DATASET.R b/data-raw/DATASET.R
@@ -33,6 +33,6 @@ inputs <- paste0(prefix, meta_conds, ".out")
 Zhou2018 <- read_results(inputs, meta_conds, cond_descs)
 usethis::use_data(Zhou2018, overwrite = TRUE)
 
-TEST <-read_result("test.out", unpack = TRUE, cores = 2)
-TEST$tag <- clean_tag(TEST$tag)
-usethis::use_data(TEST, overwrite = TRUE)
+#TEST <-read_result("test.out", unpack = TRUE, cores = 2)
+#TEST$tag <- clean_tag(TEST$tag)
+#usethis::use_data(TEST, overwrite = TRUE)
diff --git a/man/APOBEC1.Rd b/man/APOBEC1.Rd
diff --git a/man/Allnum.Rd b/man/Allnum.Rd
diff --git a/man/TEST.Rd b/man/TEST.Rd
diff --git a/man/extract_info.Rd b/man/extract_info.Rd
diff --git a/man/extract_score.Rd b/man/extract_score.Rd
diff --git a/man/read_result.Rd b/man/read_result.Rd
diff --git a/man/sub_counts.Rd b/man/sub_counts.Rd
diff --git a/man/sub_ratio.Rd b/man/sub_ratio.Rd
diff --git a/vignettes/JACUSA2helper.Rmd b/vignettes/JACUSA2helper.Rmd
@@ -391,63 +391,63 @@ Where available, each observations for each site (=contig, start, stop, strand)
 distinguished by "tag=A2G" or "tag=*" in the info column, respectively.
 Use `result$tag <- clean_tag(result$tag)` to convert base substitutions such as "A2G" to "A->G".
 
-```{r}
-# load data
-data(APOBEC1)
-result <- APOBEC1
-barplot(table(result$tag), ylab="# of sites", main="Distribution of tagged sites")
-```
-We calculate the total base call count with `lapply_cond` and `Reduce("+")`.
-
-```{r}
-# sum base call counts of conditions and replicates
-result$total_bases <- lapply_cond(
-  result$bases, function(bases) { Reduce("+", bases) } 
-) %>% Reduce("+", .)
-```
-
-
-```{r}
-# mark "valid" sites 
-result <- result %>%
-  dplyr::mutate(
-    keep = (
-      # restrict to total reads
-      tag == "*" & 
-      # require less than 2 different base calls
-      base_count(total_bases, ref) <= 2 & 
-      # require 10 reads for each replicate of each condition
-      All(cov$cond1 >= 10) & All(cov$cond2 >= 10) &
-      score >= 2 &
-      robust(bases)
-    )
-  )
-# raw data
-# add structured columns: tagged_bases and not_tagged_bases
-expanded <- expand_tag(result)
-filtered <- dplyr::filter(expanded, keep == TRUE)
-```
-
-```{r}
-# plot
-count <- c(nrow(result), nrow(expanded), nrow(filtered))
-b <- barplot(count, names.arg = c("raw", "expaded", "filtered"), ylim=c(0, max(count) + max(count) * 15 / 100))
-text(b, count + count * 10 / 100, count)
-```
-
-```{r} 
-# add observed non reference base substitution, e.g.: A->G
-filtered[["base_sub"]] <- base_sub(filtered$total_bases, filtered$ref)
-# ratio of non reference base calls
-filtered[["non_ref_ratio"]] <- lapply_repl(filtered$bases, function(cond) non_ref_ratio(cond, filtered$ref))
-
-# tag specific base substitution for each condition
-filtered[["tagged_base_sub"]] <- lapply_cond(filtered$tagged_bases, function(bases) { Reduce("+", bases) %>% base_sub(filtered$ref)} )
-filtered[["tagged_non_ref_ratio"]] <- lapply_repl(filtered$tagged_bases, function(cond) non_ref_ratio(cond, filtered$ref))
-
-filtered[["not_tagged_base_sub"]] <- lapply_cond(filtered$not_tagged_bases, function(bases) { Reduce("+", bases) %>% base_sub(filtered$ref)} )
-filtered[["not_tagged_non_ref_ratio"]] <- lapply_repl(filtered$not_tagged_bases, function(cond) non_ref_ratio(cond, filtered$ref))
-```
+#```{r}
+## load data
+#data(APOBEC1)
+#result <- APOBEC1
+#barplot(table(result$tag), ylab="# of sites", main="Distribution of tagged sites")
+#```
+#We calculate the total base call count with `lapply_cond` and `Reduce("+")`.
+
+#```{r}
+## sum base call counts of conditions and replicates
+#result$total_bases <- lapply_cond(
+#  result$bases, function(bases) { Reduce("+", bases) } 
+#) %>% Reduce("+", .)
+#```
+
+
+#```{r}
+## mark "valid" sites 
+#result <- result %>%
+#  dplyr::mutate(
+#    keep = (
+#      # restrict to total reads
+#      tag == "*" & 
+#      # require less than 2 different base calls
+#      base_count(total_bases, ref) <= 2 & 
+#      # require 10 reads for each replicate of each condition
+#      All(cov$cond1 >= 10) & All(cov$cond2 >= 10) &
+#      score >= 2 &
+#      robust(bases)
+#    )
+#  )
+## raw data
+## add structured columns: tagged_bases and not_tagged_bases
+#expanded <- expand_tag(result)
+#filtered <- dplyr::filter(expanded, keep == TRUE)
+#```
+
+#```{r}
+## plot
+#count <- c(nrow(result), nrow(expanded), nrow(filtered))
+#b <- barplot(count, names.arg = c("raw", "expaded", "filtered"), ylim=c(0, max(count) + max(count) * 15 / 100))
+#text(b, count + count * 10 / 100, count)
+#```
+
+#```{r} 
+## add observed non reference base substitution, e.g.: A->G
+#filtered[["base_sub"]] <- base_sub(filtered$total_bases, filtered$ref)
+## ratio of non reference base calls
+#filtered[["non_ref_ratio"]] <- lapply_repl(filtered$bases, function(cond) non_ref_ratio(cond, filtered$ref))
+#
+## tag specific base substitution for each condition
+#filtered[["tagged_base_sub"]] <- lapply_cond(filtered$tagged_bases, function(bases) { Reduce("+", bases) %>% base_sub(filtered$ref)} )
+#filtered[["tagged_non_ref_ratio"]] <- lapply_repl(filtered$tagged_bases, function(cond) non_ref_ratio(cond, filtered$ref))
+#
+#filtered[["not_tagged_base_sub"]] <- lapply_cond(filtered$not_tagged_bases, function(bases) { Reduce("+", bases) %>% #base_sub(filtered$ref)} )
+#filtered[["not_tagged_non_ref_ratio"]] <- lapply_repl(filtered$not_tagged_bases, function(cond) non_ref_ratio(cond, filtered$ref))
+#```
 
 
 <!--- # Experimental features