Adapted to new R version and changed vignette

dieterich-lab · May 21, 2021 · 9729174 · 9729174
1 parent 38558a2
commit 9729174
Show file tree

Hide file tree

Showing 39 changed files with 51,738 additions and 146,217 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,16 +1,17 @@
 Package: JACUSA2helper
 Type: Package
 Title: Post-processing for JACUSA2 output
-Version: 1.99-6
+Version: 1.99-7
 Depends: R (>= 3.5)
-Date: 2020-10-26
+Date: 2021-05-19
 Author: Michael Piechotta
 Maintainer: <[email protected]>
 Description: This package enables post-processing of JACUSA2 output: read, filter, plot, write.
-License: MIT + file LICENSE
+License: GPL-3 + file LICENSE
 Encoding: UTF-8
 RoxygenNote: 7.1.1
 LazyData: true
+LazyDataCompression: bzip2
 Suggests: 
     testthat (>= 2.1.0),
     knitr,
@@ -26,5 +27,5 @@ Imports:
   scales,
   sticky,
   methods, 
-  stringr 
+  stringr
 VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -21,12 +21,16 @@ export(lapply_cond)
 export(lapply_repl)
 export(mapply_repl)
 export(mask_sub)
+export(max_observed_bc)
 export(merge_sub)
 export(non_ref_ratio)
+export(observed_bc)
 export(read_result)
 export(read_results)
 export(robust)
 export(sub_counts)
 export(sub_ratio)
+export(unpack_info)
+export(variant_bc)
 export(write_bedGraph)
 importFrom(magrittr,"%>%")
diff --git a/R/common.R b/R/common.R
@@ -1,6 +1,6 @@
 #' JACUSA2helper: A package for post-processing JACUSA2 result files.
 #'
-#' TODO
+#' Auxiliary R package for the assessment of JACUSA1.x and JACUSA2.x results.
 #'
 #' @section Description:
 #' A package that provides functions to post-process result files of JACUSA2.
@@ -18,7 +18,7 @@
 #' 
 #' When working with stranded RNA-Seq data, inverting base calls is not necessary because
 #' JACUSA2 will automatically invert Single End (SE) and Paired End (PE) depending on the
-#' provided library type option "-P" UNSTRANDED|FR_FIRSTSTRAND|RF_SECOND_STRAND".
+#' provided library type option "-P" UNSTRANDED|FR_FIRSTSTRAND|RF_SECONDSTRAND".
 #' 
 #' The central data structure in JACUSA2helper is the JACUSA2 result object that follows the 
 #' tidy data approach to feature easy interaction with dplyr and ggplot2.

diff --git a/R/data.R b/R/data.R
@@ -240,7 +240,7 @@
 #'		\item name: Character string. Currently, name of used method (\emph{call-\{1,2\}}, \emph{pileup}, \emph{rt-arrest}, or \emph{lrt-arrest})
 #'		\item score: Numeric value representing the test-statistc. Higher values indicate more divergent pileups
 #'		\item strand: Character representing strand information; "+", "-", or "."(no strand information available)
-#'		\item info: Character string separated with ";" provding additional data for this specific site. Empty field is equal to "*"
+#'		\item info: Character string separated with ";" providing additional data for this specific site. Empty field is equal to "*"
 #'		\item filter: ";"-separated character string showing feature filter information. Empty field is equal to "*"
 #'		\item ref: Character "A", "C", "G", "T", or "N" representing the reference base for this site - inverted when strand is "-".
 #'		\item bases: tibble representing counts for A, C, G, and T base calls for all reads (=arrest + through).
@@ -283,7 +283,7 @@
 #' call2_APOBEC1_APOBEC1YTHmut_RC14_C2T_call2_result.out
 #' 
 #' TODO
-#'
+#' 
 #' @docType data
 #' 
 #' @usage data(APOBEC1)
@@ -294,20 +294,4 @@
 #' \itemize{
 #'   \item TODO
 #' }
-#"APOBEC1"
-
-##' head -n 10000 call2_APOBEC1_APOBEC1YTHmut_RC14_C2T_call2_result.out
-##' 
-##' TODO
-##'
-##' @docType data
-##' 
-##' @usage data(TEST)
-##' 
-##' @references TODO
-##' 
-##' @format TODO:
-##' \itemize{
-##'   \item TODO
-##' }
-"TEST"
+#"APOBEC1"
diff --git a/R/indel-ratio.R b/R/indel-ratio.R
@@ -6,10 +6,10 @@
 #' @export
 extract_info <- function (s, f) 
 {
-    d = stringr::str_extract(s, paste0(f, "=[\\d\\,]+"))
-    d[!is.na(d)] = .eend(d[!is.na(d)], nchar(f) + 2)
-    d[is.na(d)] = "0,0"
-    .sp_mat(d)
+  d = stringr::str_extract(s, paste0(f, "=[\\d\\,]+"))
+  d[!is.na(d)] = .eend(d[!is.na(d)], nchar(f) + 2)
+  d[is.na(d)] = "0,0"
+  .sp_mat(d)
 }
 #' Extract indel scores from the JACUSA2 info.
 #' 
@@ -18,12 +18,16 @@ extract_info <- function (s, f)
 #' @return vector of the extracted scores
 #' @export
 extract_score <- function(s,f){ 
-f = match.arg(f, c("insertion_score", "deletion_score"))
-d = stringr::str_extract(s, paste0(f,'=[\\w-\\.]+'))
-d[!is.na(d)] = .eend(d[!is.na(d)], nchar(f)+2)
-d[is.na(d)]  = '0'
-as.numeric(d)
+  f = match.arg(f, c("insertion_score", "deletion_score"))
+  d = stringr::str_extract(s, paste0(f,'=[\\w-\\.]+'))
+  d[!is.na(d)] = .eend(d[!is.na(d)], nchar(f)+2)
+  d[is.na(d)]  = '0'
+  as.numeric(d)
 }
 
-.eend <- function(s,i) substr(s,i,nchar(s))
-.sp_mat <- function(d)read.table(textConnection(d), sep=',')
+.eend <- function(s,i){
+  substr(s,i,nchar(s))
+} 
+.sp_mat <- function(d){
+  utils::read.table(textConnection(d), sep=',')
+}
diff --git a/R/io.R b/R/io.R
@@ -1,6 +1,6 @@
 #' Read JACUSA2 result file
 #'
-#' \code{read_result()} Reads data that was generated by JACUSA2 and creates a JACUSA2 result object.
+#' \code{read_result()} reads data that was generated by JACUSA2 and creates a JACUSA2 result object.
 #'
 #' @param file String that represents the file name of the JACUSA2 output.
 #' @param cond_desc Vector of strings that represent names/descriptions for conditions.
@@ -161,12 +161,7 @@ coord <- function(result) {
 
   # unpack info field
   if (unpack) {
-    browser()
-    n <- cores
-    nr <- nrow(df)
-    tmp <- split(df, rep(1:ceiling(nr/n), each=n, length.out=nr))
-    browser()
-    result <- .unpack_info(result, cond_count, cores)
+    result <- unpack_info(result, cond_count, cores)
   }
 
   result
@@ -213,7 +208,17 @@ base_call <-function(bases) {
   df
 }
 
-.unpack_info <- function(result, cond_count, cores) {
+#' Unpack info field
+#' 
+#' Unpacks info field.
+#' 
+#' @param result JACUSA2 result object.
+#' @param cond_count integer Number of conditions.
+#' @param cores integer Number of compute cores to use. Default: 1.
+#' @return TODO 
+#' 
+#' @export
+unpack_info <- function(result, cond_count, cores = 1) {
   info <- tidyr::separate_rows(result[, c("id", .INFO_COL)], info, sep = ";")
 
   . <- key <- value <- NULL
@@ -259,8 +264,7 @@ base_call <-function(bases) {
 
   cols <- unique(matches$col)
   df <- .fill_empty(df, cols, new_cols)
-
-  . <- NULL
+
   unpacked <- parallel::mclapply(
     df[cols], 
     .unpack, new_cols=new_cols, 
@@ -271,8 +275,10 @@ base_call <-function(bases) {
 
   for (prefix in prefixes) {
     i <- matches$prefix == prefix
-    merged <- .merge_cond(unpacked[matches[i, "col"]], matches[i, ])
-    df[[prefix]] <- tidyr::as_tibble(merged)
+    if (any(i)) {
+      merged <- .merge_cond(unpacked[matches[i, "col"]], matches[i, ])
+      df[[prefix]] <- tidyr::as_tibble(merged)
+    }
   }
 
   df