Skip to content

Commit

Permalink
Fixed missing APOBEC1
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Piechotta committed Dec 21, 2020
1 parent a73d51d commit 2017f47
Show file tree
Hide file tree
Showing 11 changed files with 194 additions and 105 deletions.
30 changes: 15 additions & 15 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -294,20 +294,20 @@
#' \itemize{
#' \item TODO
#' }
"APOBEC1"
#"APOBEC1"

#' head -n 10000 call2_APOBEC1_APOBEC1YTHmut_RC14_C2T_call2_result.out
#'
#' TODO
#'
#' @docType data
#'
#' @usage data(TEST)
#'
#' @references TODO
#'
#' @format TODO:
#' \itemize{
#' \item TODO
#' }
##' head -n 10000 call2_APOBEC1_APOBEC1YTHmut_RC14_C2T_call2_result.out
##'
##' TODO
##'
##' @docType data
##'
##' @usage data(TEST)
##'
##' @references TODO
##'
##' @format TODO:
##' \itemize{
##' \item TODO
##' }
"TEST"
6 changes: 3 additions & 3 deletions data-raw/DATASET.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ inputs <- paste0(prefix, meta_conds, ".out")
Zhou2018 <- read_results(inputs, meta_conds, cond_descs)
usethis::use_data(Zhou2018, overwrite = TRUE)

TEST <-read_result("test.out", unpack = TRUE, cores = 2)
TEST$tag <- clean_tag(TEST$tag)
usethis::use_data(TEST, overwrite = TRUE)
#TEST <-read_result("test.out", unpack = TRUE, cores = 2)
#TEST$tag <- clean_tag(TEST$tag)
#usethis::use_data(TEST, overwrite = TRUE)
22 changes: 0 additions & 22 deletions man/APOBEC1.Rd

This file was deleted.

17 changes: 17 additions & 0 deletions man/Allnum.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 13 additions & 1 deletion man/TEST.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/extract_info.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/extract_score.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/read_result.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions man/sub_counts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions man/sub_ratio.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

114 changes: 57 additions & 57 deletions vignettes/JACUSA2helper.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -391,63 +391,63 @@ Where available, each observations for each site (=contig, start, stop, strand)
distinguished by "tag=A2G" or "tag=*" in the info column, respectively.
Use `result$tag <- clean_tag(result$tag)` to convert base substitutions such as "A2G" to "A->G".

```{r}
# load data
data(APOBEC1)
result <- APOBEC1
barplot(table(result$tag), ylab="# of sites", main="Distribution of tagged sites")
```
We calculate the total base call count with `lapply_cond` and `Reduce("+")`.

```{r}
# sum base call counts of conditions and replicates
result$total_bases <- lapply_cond(
result$bases, function(bases) { Reduce("+", bases) }
) %>% Reduce("+", .)
```


```{r}
# mark "valid" sites
result <- result %>%
dplyr::mutate(
keep = (
# restrict to total reads
tag == "*" &
# require less than 2 different base calls
base_count(total_bases, ref) <= 2 &
# require 10 reads for each replicate of each condition
All(cov$cond1 >= 10) & All(cov$cond2 >= 10) &
score >= 2 &
robust(bases)
)
)
# raw data
# add structured columns: tagged_bases and not_tagged_bases
expanded <- expand_tag(result)
filtered <- dplyr::filter(expanded, keep == TRUE)
```

```{r}
# plot
count <- c(nrow(result), nrow(expanded), nrow(filtered))
b <- barplot(count, names.arg = c("raw", "expaded", "filtered"), ylim=c(0, max(count) + max(count) * 15 / 100))
text(b, count + count * 10 / 100, count)
```

```{r}
# add observed non reference base substitution, e.g.: A->G
filtered[["base_sub"]] <- base_sub(filtered$total_bases, filtered$ref)
# ratio of non reference base calls
filtered[["non_ref_ratio"]] <- lapply_repl(filtered$bases, function(cond) non_ref_ratio(cond, filtered$ref))
# tag specific base substitution for each condition
filtered[["tagged_base_sub"]] <- lapply_cond(filtered$tagged_bases, function(bases) { Reduce("+", bases) %>% base_sub(filtered$ref)} )
filtered[["tagged_non_ref_ratio"]] <- lapply_repl(filtered$tagged_bases, function(cond) non_ref_ratio(cond, filtered$ref))
filtered[["not_tagged_base_sub"]] <- lapply_cond(filtered$not_tagged_bases, function(bases) { Reduce("+", bases) %>% base_sub(filtered$ref)} )
filtered[["not_tagged_non_ref_ratio"]] <- lapply_repl(filtered$not_tagged_bases, function(cond) non_ref_ratio(cond, filtered$ref))
```
#```{r}
## load data
#data(APOBEC1)
#result <- APOBEC1
#barplot(table(result$tag), ylab="# of sites", main="Distribution of tagged sites")
#```
#We calculate the total base call count with `lapply_cond` and `Reduce("+")`.

#```{r}
## sum base call counts of conditions and replicates
#result$total_bases <- lapply_cond(
# result$bases, function(bases) { Reduce("+", bases) }
#) %>% Reduce("+", .)
#```


#```{r}
## mark "valid" sites
#result <- result %>%
# dplyr::mutate(
# keep = (
# # restrict to total reads
# tag == "*" &
# # require less than 2 different base calls
# base_count(total_bases, ref) <= 2 &
# # require 10 reads for each replicate of each condition
# All(cov$cond1 >= 10) & All(cov$cond2 >= 10) &
# score >= 2 &
# robust(bases)
# )
# )
## raw data
## add structured columns: tagged_bases and not_tagged_bases
#expanded <- expand_tag(result)
#filtered <- dplyr::filter(expanded, keep == TRUE)
#```

#```{r}
## plot
#count <- c(nrow(result), nrow(expanded), nrow(filtered))
#b <- barplot(count, names.arg = c("raw", "expaded", "filtered"), ylim=c(0, max(count) + max(count) * 15 / 100))
#text(b, count + count * 10 / 100, count)
#```

#```{r}
## add observed non reference base substitution, e.g.: A->G
#filtered[["base_sub"]] <- base_sub(filtered$total_bases, filtered$ref)
## ratio of non reference base calls
#filtered[["non_ref_ratio"]] <- lapply_repl(filtered$bases, function(cond) non_ref_ratio(cond, filtered$ref))
#
## tag specific base substitution for each condition
#filtered[["tagged_base_sub"]] <- lapply_cond(filtered$tagged_bases, function(bases) { Reduce("+", bases) %>% base_sub(filtered$ref)} )
#filtered[["tagged_non_ref_ratio"]] <- lapply_repl(filtered$tagged_bases, function(cond) non_ref_ratio(cond, filtered$ref))
#
#filtered[["not_tagged_base_sub"]] <- lapply_cond(filtered$not_tagged_bases, function(bases) { Reduce("+", bases) %>% #base_sub(filtered$ref)} )
#filtered[["not_tagged_non_ref_ratio"]] <- lapply_repl(filtered$not_tagged_bases, function(cond) non_ref_ratio(cond, filtered$ref))
#```


<!--- # Experimental features
Expand Down

0 comments on commit 2017f47

Please sign in to comment.