Skip to content

Commit

Permalink
Adapted to new R version and changed vignette
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Piechotta committed May 21, 2021
1 parent 38558a2 commit 9729174
Show file tree
Hide file tree
Showing 39 changed files with 51,738 additions and 146,217 deletions.
9 changes: 5 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
Package: JACUSA2helper
Type: Package
Title: Post-processing for JACUSA2 output
Version: 1.99-6
Version: 1.99-7
Depends: R (>= 3.5)
Date: 2020-10-26
Date: 2021-05-19
Author: Michael Piechotta
Maintainer: <[email protected]>
Description: This package enables post-processing of JACUSA2 output: read, filter, plot, write.
License: MIT + file LICENSE
License: GPL-3 + file LICENSE
Encoding: UTF-8
RoxygenNote: 7.1.1
LazyData: true
LazyDataCompression: bzip2
Suggests:
testthat (>= 2.1.0),
knitr,
Expand All @@ -26,5 +27,5 @@ Imports:
scales,
sticky,
methods,
stringr
stringr
VignetteBuilder: knitr
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,16 @@ export(lapply_cond)
export(lapply_repl)
export(mapply_repl)
export(mask_sub)
export(max_observed_bc)
export(merge_sub)
export(non_ref_ratio)
export(observed_bc)
export(read_result)
export(read_results)
export(robust)
export(sub_counts)
export(sub_ratio)
export(unpack_info)
export(variant_bc)
export(write_bedGraph)
importFrom(magrittr,"%>%")
4 changes: 2 additions & 2 deletions R/common.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' JACUSA2helper: A package for post-processing JACUSA2 result files.
#'
#' TODO
#' Auxiliary R package for the assessment of JACUSA1.x and JACUSA2.x results.
#'
#' @section Description:
#' A package that provides functions to post-process result files of JACUSA2.
Expand All @@ -18,7 +18,7 @@
#'
#' When working with stranded RNA-Seq data, inverting base calls is not necessary because
#' JACUSA2 will automatically invert Single End (SE) and Paired End (PE) depending on the
#' provided library type option "-P" UNSTRANDED|FR_FIRSTSTRAND|RF_SECOND_STRAND".
#' provided library type option "-P" UNSTRANDED|FR_FIRSTSTRAND|RF_SECONDSTRAND".
#'
#' The central data structure in JACUSA2helper is the JACUSA2 result object that follows the
#' tidy data approach to feature easy interaction with dplyr and ggplot2.
Expand Down
22 changes: 3 additions & 19 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@
#' \item name: Character string. Currently, name of used method (\emph{call-\{1,2\}}, \emph{pileup}, \emph{rt-arrest}, or \emph{lrt-arrest})
#' \item score: Numeric value representing the test-statistc. Higher values indicate more divergent pileups
#' \item strand: Character representing strand information; "+", "-", or "."(no strand information available)
#' \item info: Character string separated with ";" provding additional data for this specific site. Empty field is equal to "*"
#' \item info: Character string separated with ";" providing additional data for this specific site. Empty field is equal to "*"
#' \item filter: ";"-separated character string showing feature filter information. Empty field is equal to "*"
#' \item ref: Character "A", "C", "G", "T", or "N" representing the reference base for this site - inverted when strand is "-".
#' \item bases: tibble representing counts for A, C, G, and T base calls for all reads (=arrest + through).
Expand Down Expand Up @@ -283,7 +283,7 @@
#' call2_APOBEC1_APOBEC1YTHmut_RC14_C2T_call2_result.out
#'
#' TODO
#'
#'
#' @docType data
#'
#' @usage data(APOBEC1)
Expand All @@ -294,20 +294,4 @@
#' \itemize{
#' \item TODO
#' }
#"APOBEC1"

##' head -n 10000 call2_APOBEC1_APOBEC1YTHmut_RC14_C2T_call2_result.out
##'
##' TODO
##'
##' @docType data
##'
##' @usage data(TEST)
##'
##' @references TODO
##'
##' @format TODO:
##' \itemize{
##' \item TODO
##' }
"TEST"
#"APOBEC1"
26 changes: 15 additions & 11 deletions R/indel-ratio.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
#' @export
extract_info <- function (s, f)
{
d = stringr::str_extract(s, paste0(f, "=[\\d\\,]+"))
d[!is.na(d)] = .eend(d[!is.na(d)], nchar(f) + 2)
d[is.na(d)] = "0,0"
.sp_mat(d)
d = stringr::str_extract(s, paste0(f, "=[\\d\\,]+"))
d[!is.na(d)] = .eend(d[!is.na(d)], nchar(f) + 2)
d[is.na(d)] = "0,0"
.sp_mat(d)
}
#' Extract indel scores from the JACUSA2 info.
#'
Expand All @@ -18,12 +18,16 @@ extract_info <- function (s, f)
#' @return vector of the extracted scores
#' @export
extract_score <- function(s,f){
f = match.arg(f, c("insertion_score", "deletion_score"))
d = stringr::str_extract(s, paste0(f,'=[\\w-\\.]+'))
d[!is.na(d)] = .eend(d[!is.na(d)], nchar(f)+2)
d[is.na(d)] = '0'
as.numeric(d)
f = match.arg(f, c("insertion_score", "deletion_score"))
d = stringr::str_extract(s, paste0(f,'=[\\w-\\.]+'))
d[!is.na(d)] = .eend(d[!is.na(d)], nchar(f)+2)
d[is.na(d)] = '0'
as.numeric(d)
}

.eend <- function(s,i) substr(s,i,nchar(s))
.sp_mat <- function(d)read.table(textConnection(d), sep=',')
.eend <- function(s,i){
substr(s,i,nchar(s))
}
.sp_mat <- function(d){
utils::read.table(textConnection(d), sep=',')
}
30 changes: 18 additions & 12 deletions R/io.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Read JACUSA2 result file
#'
#' \code{read_result()} Reads data that was generated by JACUSA2 and creates a JACUSA2 result object.
#' \code{read_result()} reads data that was generated by JACUSA2 and creates a JACUSA2 result object.
#'
#' @param file String that represents the file name of the JACUSA2 output.
#' @param cond_desc Vector of strings that represent names/descriptions for conditions.
Expand Down Expand Up @@ -161,12 +161,7 @@ coord <- function(result) {

# unpack info field
if (unpack) {
browser()
n <- cores
nr <- nrow(df)
tmp <- split(df, rep(1:ceiling(nr/n), each=n, length.out=nr))
browser()
result <- .unpack_info(result, cond_count, cores)
result <- unpack_info(result, cond_count, cores)
}

result
Expand Down Expand Up @@ -213,7 +208,17 @@ base_call <-function(bases) {
df
}

.unpack_info <- function(result, cond_count, cores) {
#' Unpack info field
#'
#' Unpacks info field.
#'
#' @param result JACUSA2 result object.
#' @param cond_count integer Number of conditions.
#' @param cores integer Number of compute cores to use. Default: 1.
#' @return TODO
#'
#' @export
unpack_info <- function(result, cond_count, cores = 1) {
info <- tidyr::separate_rows(result[, c("id", .INFO_COL)], info, sep = ";")

. <- key <- value <- NULL
Expand Down Expand Up @@ -259,8 +264,7 @@ base_call <-function(bases) {

cols <- unique(matches$col)
df <- .fill_empty(df, cols, new_cols)

. <- NULL

unpacked <- parallel::mclapply(
df[cols],
.unpack, new_cols=new_cols,
Expand All @@ -271,8 +275,10 @@ base_call <-function(bases) {

for (prefix in prefixes) {
i <- matches$prefix == prefix
merged <- .merge_cond(unpacked[matches[i, "col"]], matches[i, ])
df[[prefix]] <- tidyr::as_tibble(merged)
if (any(i)) {
merged <- .merge_cond(unpacked[matches[i, "col"]], matches[i, ])
df[[prefix]] <- tidyr::as_tibble(merged)
}
}

df
Expand Down
Loading

0 comments on commit 9729174

Please sign in to comment.