diff --git a/DESCRIPTION b/DESCRIPTION index 5659be3..509025f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,34 +1,66 @@ -Package: SC3 Type: Package +Package: SC3 Title: Single-Cell Consensus Clustering Version: 1.15.1 +Authors@R: + c(person(given = "Vladimir", + family = "Kiselev", + role = c("cre", "aut"), + email = "vladimir.yu.kiselev@gmail.com"), + person(given = "Andrew", + family = "Yiu", + role = "ctb"), + person(given = "Tallulah", + family = "Andrews", + role = "ctb"), + person(given = "Martin", + family = "Hemberg", + role = "aut")) Author: Vladimir Kiselev Maintainer: Vladimir Kiselev -Authors@R: c(person("Vladimir", "Kiselev", - email = "vladimir.yu.kiselev@gmail.com", - role=c("cre", "aut")), - person("Andrew", "Yiu", - role=c("ctb")), - person("Tallulah", "Andrews", - role=c("ctb")), - person("Martin", "Hemberg", - role=c("aut"))) -Description: A tool for unsupervised clustering and analysis of single cell RNA-Seq data. +Description: A tool for unsupervised clustering and analysis of single + cell RNA-Seq data. License: GPL-3 -Imports: graphics, stats, utils, methods, e1071, parallel, foreach, - doParallel, doRNG, shiny, ggplot2, pheatmap (>= 1.0.8), - ROCR, robustbase, rrcov, cluster, WriteXLS, - Rcpp (>= 0.11.1), SummarizedExperiment, SingleCellExperiment, - BiocGenerics, S4Vectors -Depends: R(>= 3.3) -LinkingTo: Rcpp, RcppArmadillo -LazyData: TRUE -RoxygenNote: 6.0.1 -Suggests: knitr, rmarkdown, mclust, scater -VignetteBuilder: knitr -biocViews: ImmunoOncology, SingleCell, Software, Classification, Clustering, DimensionReduction, - SupportVectorMachine, RNASeq, Visualization, Transcriptomics, - DataRepresentation, GUI, DifferentialExpression, Transcription -NeedsCompilation: no URL: https://github.com/hemberg-lab/SC3 BugReports: https://support.bioconductor.org/t/sc3/ +Depends: + R (>= 3.3) +Imports: + BiocGenerics, + BiocParallel, + cluster, + e1071, + ggplot2, + graphics, + methods, + pheatmap (>= 1.0.8), + Rcpp (>= 0.11.1), + robustbase, + ROCR, + rrcov, + S4Vectors, + shiny, + SingleCellExperiment, + stats, + SummarizedExperiment, + utils, + WriteXLS +Suggests: + BiocStyle, + knitr, + mclust, + rmarkdown, + scater +LinkingTo: + Rcpp, + RcppArmadillo +VignetteBuilder: + knitr +biocViews: ImmunoOncology, SingleCell, Software, Classification, + Clustering, DimensionReduction, SupportVectorMachine, RNASeq, + Visualization, Transcriptomics, DataRepresentation, GUI, + DifferentialExpression, Transcription +Encoding: UTF-8 +LazyData: TRUE +NeedsCompilation: no +RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index 167add9..7df85a5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,11 +39,7 @@ importFrom(SummarizedExperiment,assayNames) importFrom(SummarizedExperiment,colData) importFrom(SummarizedExperiment,rowData) importFrom(WriteXLS,WriteXLS) -importFrom(doParallel,registerDoParallel) -importFrom(doRNG,"%dorng%") importFrom(e1071,svm) -importFrom(foreach,"%dopar%") -importFrom(foreach,foreach) importFrom(ggplot2,aes) importFrom(ggplot2,geom_bar) importFrom(ggplot2,ggplot) @@ -53,9 +49,6 @@ importFrom(ggplot2,ylim) importFrom(graphics,plot) importFrom(methods,as) importFrom(methods,new) -importFrom(parallel,detectCores) -importFrom(parallel,makeCluster) -importFrom(parallel,stopCluster) importFrom(pheatmap,pheatmap) importFrom(robustbase,covMcd) importFrom(rrcov,PcaHubert) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 60cb602..b3ee6ea 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -1,49 +1,48 @@ #' @export -setGeneric("sc3", signature = "object", function(object, ks = NULL, - gene_filter = TRUE, pct_dropout_min = 10, pct_dropout_max = 90, - d_region_min = 0.04, d_region_max = 0.07, svm_num_cells = NULL, - svm_train_inds = NULL, svm_max = 5000, n_cores = NULL, kmeans_nstart = NULL, - kmeans_iter_max = 1e+09, k_estimator = FALSE, biology = FALSE, rand_seed = 1) { +setGeneric("sc3", signature = "object", function(object, ks = NULL, + gene_filter = TRUE, pct_dropout_min = 10, pct_dropout_max = 90, + d_region_min = 0.04, d_region_max = 0.07, svm_num_cells = NULL, + svm_train_inds = NULL, svm_max = 5000, kmeans_nstart = NULL, + kmeans_iter_max = 1e+09, k_estimator = FALSE, biology = FALSE, + BPPARAM = BiocParallel::bpparam()) { standardGeneric("sc3") }) #' @export -setGeneric("sc3_estimate_k", signature = "object", function(object) { +setGeneric("sc3_estimate_k", signature = "object", function(object, BPPARAM = BiocParallel::bpparam()) { standardGeneric("sc3_estimate_k") }) #' @export -setGeneric("sc3_prepare", function(object, gene_filter = TRUE, - pct_dropout_min = 10, pct_dropout_max = 90, d_region_min = 0.04, - d_region_max = 0.07, svm_num_cells = NULL, svm_train_inds = NULL, - svm_max = 5000, n_cores = NULL, kmeans_nstart = NULL, - kmeans_iter_max = 1e+09, rand_seed = 1) { +setGeneric("sc3_prepare", function(object, gene_filter = TRUE, + pct_dropout_min = 10, pct_dropout_max = 90, d_region_min = 0.04, + d_region_max = 0.07, svm_num_cells = NULL, svm_train_inds = NULL, + svm_max = 5000, kmeans_nstart = NULL, kmeans_iter_max = 1e+09) { standardGeneric("sc3_prepare") }) #' @export -setGeneric("sc3_calc_dists", signature = "object", function(object) { +setGeneric("sc3_calc_dists", signature = "object", function(object, BPPARAM = BiocParallel::bpparam()) { standardGeneric("sc3_calc_dists") }) #' @export -setGeneric("sc3_calc_transfs", signature = "object", function(object) { +setGeneric("sc3_calc_transfs", signature = "object", function(object, BPPARAM = BiocParallel::bpparam()) { standardGeneric("sc3_calc_transfs") }) #' @export -setGeneric("sc3_kmeans", signature = "object", function(object, ks = NULL) { +setGeneric("sc3_kmeans", signature = "object", function(object, ks = NULL, BPPARAM = BiocParallel::bpparam()) { standardGeneric("sc3_kmeans") }) #' @export -setGeneric("sc3_calc_consens", signature = "object", function(object) { +setGeneric("sc3_calc_consens", signature = "object", function(object, BPPARAM = BiocParallel::bpparam()) { standardGeneric("sc3_calc_consens") }) #' @export -setGeneric("sc3_calc_biology", signature = "object", function(object, ks = NULL, - regime = NULL) { +setGeneric("sc3_calc_biology", signature = "object", function(object, ks = NULL, regime = NULL, BPPARAM = BiocParallel::bpparam()) { standardGeneric("sc3_calc_biology") }) @@ -58,7 +57,7 @@ setGeneric("sc3_run_svm", signature = "object", function(object, ks = NULL) { }) #' @export -setGeneric("sc3_plot_consensus", signature = "object", function(object, k, +setGeneric("sc3_plot_consensus", signature = "object", function(object, k, show_pdata = NULL) { standardGeneric("sc3_plot_consensus") }) @@ -74,13 +73,13 @@ setGeneric("sc3_plot_expression", signature = "object", function(object, k, show }) #' @export -setGeneric("sc3_plot_de_genes", signature = "object", function(object, +setGeneric("sc3_plot_de_genes", signature = "object", function(object, k, p.val = 0.01, show_pdata = NULL) { standardGeneric("sc3_plot_de_genes") }) #' @export -setGeneric("sc3_plot_markers", signature = "object", function(object, k, auroc = 0.85, +setGeneric("sc3_plot_markers", signature = "object", function(object, k, auroc = 0.85, p.val = 0.01, show_pdata = NULL) { standardGeneric("sc3_plot_markers") }) @@ -91,7 +90,7 @@ setGeneric("sc3_plot_cluster_stability", signature = "object", function(object, }) #' @export -setGeneric("sc3_export_results_xls", signature = "object", function(object, +setGeneric("sc3_export_results_xls", signature = "object", function(object, filename = "sc3_results.xls") { standardGeneric("sc3_export_results_xls") }) diff --git a/R/CoreMethods.R b/R/CoreMethods.R index 46b12f4..1243654 100644 --- a/R/CoreMethods.R +++ b/R/CoreMethods.R @@ -1,49 +1,46 @@ #' Run all steps of \code{SC3} in one go -#' +#' #' This function is a wrapper that executes all steps of \code{SC3} analysis in one go. -#' +#' #' @param object an object of \code{SingleCellExperiment} class. #' @param ks a range of the number of clusters \code{k} used for \code{SC3} clustering. #' Can also be a single integer. -#' @param gene_filter a boolen variable which defines whether to perform gene +#' @param gene_filter a boolen variable which defines whether to perform gene #' filtering before SC3 clustering. -#' @param pct_dropout_min if \code{gene_filter = TRUE}, then genes with percent of dropouts smaller than +#' @param pct_dropout_min if \code{gene_filter = TRUE}, then genes with percent of dropouts smaller than #' \code{pct_dropout_min} are filtered out before clustering. -#' @param pct_dropout_max if \code{gene_filter = TRUE}, then genes with percent of dropouts larger than +#' @param pct_dropout_max if \code{gene_filter = TRUE}, then genes with percent of dropouts larger than #' \code{pct_dropout_max} are filtered out before clustering. -#' @param d_region_min defines the minimum number of eigenvectors used for +#' @param d_region_min defines the minimum number of eigenvectors used for #' kmeans clustering as a fraction of the total number of cells. Default is \code{0.04}. #' See \code{SC3} paper for more details. -#' @param d_region_max defines the maximum number of eigenvectors used for +#' @param d_region_max defines the maximum number of eigenvectors used for #' kmeans clustering as a fraction of the total number of cells. Default is \code{0.07}. #' See \code{SC3} paper for more details. -#' @param svm_num_cells number of randomly selected training cells to be used +#' @param svm_num_cells number of randomly selected training cells to be used #' for SVM prediction. The default is \code{NULL}. -#' @param svm_train_inds a numeric vector defining indeces of training cells +#' @param svm_train_inds a numeric vector defining indeces of training cells #' that should be used for SVM training. The default is \code{NULL}. #' @param svm_max define the maximum number of cells below which SVM is not run. -#' @param n_cores defines the number of cores to be used on the user's machine. If not set, `SC3` will use all but one cores of your machine. -#' @param kmeans_nstart nstart parameter passed to \code{\link[stats]{kmeans}} function. Can be set manually. By default it is +#' @param kmeans_nstart nstart parameter passed to \code{\link[stats]{kmeans}} function. Can be set manually. By default it is #' \code{1000} for up to \code{2000} cells and \code{50} for more than \code{2000} cells. -#' @param kmeans_iter_max iter.max parameter passed to \code{\link[stats]{kmeans}} +#' @param kmeans_iter_max iter.max parameter passed to \code{\link[stats]{kmeans}} #' function. #' @param k_estimator boolean parameter, defines whether to estimate an optimal number of clusters \code{k}. If user has already defined the ks parameter the estimation does not affect the user's paramater. -#' @param biology boolean parameter, defines whether to compute differentially expressed genes, marker +#' @param biology boolean parameter, defines whether to compute differentially expressed genes, marker #' genes and cell outliers. -#' @param rand_seed sets the seed of the random number generator. \code{SC3} is a stochastic -#' method, so setting the \code{rand_seed} to a fixed values can be used for reproducibility -#' purposes. -#' +#' @param BPPARAM a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +#' including a seed for random number generator. +#' #' @name sc3 #' @aliases sc3 -#' +#' #' @return an object of \code{SingleCellExperiment} class -sc3.SingleCellExperiment <- function(object, ks, gene_filter, pct_dropout_min, pct_dropout_max, d_region_min, - d_region_max, svm_num_cells, svm_train_inds, svm_max, n_cores, kmeans_nstart, kmeans_iter_max, - k_estimator, biology, rand_seed) { - object <- sc3_prepare(object, gene_filter, pct_dropout_min, pct_dropout_max, - d_region_min, d_region_max, svm_num_cells, svm_train_inds, svm_max, n_cores, kmeans_nstart, - kmeans_iter_max, rand_seed) +sc3.SingleCellExperiment <- function(object, ks, gene_filter, pct_dropout_min, pct_dropout_max, d_region_min, + d_region_max, svm_num_cells, svm_train_inds, svm_max, kmeans_nstart, kmeans_iter_max, + k_estimator, biology, BPPARAM) { + object <- sc3_prepare(object, gene_filter, pct_dropout_min, pct_dropout_max, + d_region_min, d_region_max, svm_num_cells, svm_train_inds, svm_max, kmeans_nstart, kmeans_iter_max) if (k_estimator) { object <- sc3_estimate_k(object) # Do not override cluster if user has set a k @@ -52,12 +49,17 @@ sc3.SingleCellExperiment <- function(object, ks, gene_filter, pct_dropout_min, p ks <- metadata(object)$sc3$k_estimation } } - object <- sc3_calc_dists(object) - object <- sc3_calc_transfs(object) - object <- sc3_kmeans(object, ks) - object <- sc3_calc_consens(object) + + if (!BiocParallel::bpisup(BPPARAM)) { + BiocParallel::bpstart(BPPARAM) + } + + object <- sc3_calc_dists(object, BPPARAM = BPPARAM) + object <- sc3_calc_transfs(object, BPPARAM = BPPARAM) + object <- sc3_kmeans(object, ks = ks, BPPARAM = BPPARAM) + object <- sc3_calc_consens(object, BPPARAM = BPPARAM) if (biology) { - object <- sc3_calc_biology(object, ks) + object <- sc3_calc_biology(object, ks, BPPARAM = BPPARAM) } return(object) } @@ -67,7 +69,7 @@ sc3.SingleCellExperiment <- function(object, ks, gene_filter, pct_dropout_min, p setMethod("sc3", signature(object = "SingleCellExperiment"), sc3.SingleCellExperiment) #' Prepare the \code{SingleCellExperiment} object for \code{SC3} clustering. -#' +#' #' This function prepares an object of \code{SingleCellExperiment} class for \code{SC3} clustering. It #' creates and populates the following items of the \code{sc3} slot of the \code{metadata(object)}: #' \itemize{ @@ -75,55 +77,48 @@ setMethod("sc3", signature(object = "SingleCellExperiment"), sc3.SingleCellExper #' \item \code{kmeans_nstart} - the same as the \code{kmeans_nstart} argument. #' \item \code{n_dim} - contains numbers of the number of eigenvectors to be used #' in \code{\link[stats]{kmeans}} clustering. -#' \item \code{rand_seed} - the same as the \code{rand_seed} argument. -#' \item \code{svm_train_inds} - if SVM is used this item contains indexes of the +#' \item \code{svm_train_inds} - if SVM is used this item contains indexes of the #' training cells to be used for SC3 clustering and further SVM prediction. #' \item \code{svm_study_inds} - if SVM is used this item contains indexes of the #' cells to be predicted by SVM. -#' \item \code{n_cores} - the same as the \code{n_cores} argument. #' } -#' +#' #' @param object an object of \code{SingleCellExperiment} class. -#' @param gene_filter a boolen variable which defines whether to perform gene +#' @param gene_filter a boolen variable which defines whether to perform gene #' filtering before SC3 clustering. -#' @param pct_dropout_min if \code{gene_filter = TRUE}, then genes with percent of dropouts smaller than +#' @param pct_dropout_min if \code{gene_filter = TRUE}, then genes with percent of dropouts smaller than #' \code{pct_dropout_min} are filtered out before clustering. -#' @param pct_dropout_max if \code{gene_filter = TRUE}, then genes with percent of dropouts larger than +#' @param pct_dropout_max if \code{gene_filter = TRUE}, then genes with percent of dropouts larger than #' \code{pct_dropout_max} are filtered out before clustering. -#' @param d_region_min defines the minimum number of eigenvectors used for +#' @param d_region_min defines the minimum number of eigenvectors used for #' kmeans clustering as a fraction of the total number of cells. Default is \code{0.04}. #' See \code{SC3} paper for more details. -#' @param d_region_max defines the maximum number of eigenvectors used for +#' @param d_region_max defines the maximum number of eigenvectors used for #' kmeans clustering as a fraction of the total number of cells. Default is \code{0.07}. #' See \code{SC3} paper for more details. -#' @param svm_num_cells number of randomly selected training cells to be used +#' @param svm_num_cells number of randomly selected training cells to be used #' for SVM prediction. The default is \code{NULL}. -#' @param svm_train_inds a numeric vector defining indeces of training cells +#' @param svm_train_inds a numeric vector defining indeces of training cells #' that should be used for SVM training. The default is \code{NULL}. #' @param svm_max define the maximum number of cells below which SVM is not run. -#' @param n_cores defines the number of cores to be used on the user's machine. If not set, `SC3` will use all but one cores of your machine. -#' @param kmeans_nstart nstart parameter passed to \code{\link[stats]{kmeans}} function. Default is +#' @param kmeans_nstart nstart parameter passed to \code{\link[stats]{kmeans}} function. Default is #' \code{1000} for up to \code{2000} cells and \code{50} for more than \code{2000} cells. -#' @param kmeans_iter_max iter.max parameter passed to \code{\link[stats]{kmeans}} +#' @param kmeans_iter_max iter.max parameter passed to \code{\link[stats]{kmeans}} #' function. Default is \code{1e+09}. -#' @param rand_seed sets the seed of the random number generator. \code{SC3} is a stochastic -#' method, so setting the \code{rand_seed} to a fixed values can be used for reproducibility -#' purposes. -#' +#' #' @name sc3_prepare #' @aliases sc3_prepare sc3_prepare,SingleCellExperiment-method -#' +#' #' @return an object of \code{SingleCellExperiment} class -#' -#' @importFrom parallel detectCores +#' #' @importFrom SummarizedExperiment colData colData<- rowData rowData<- assayNames #' @importFrom S4Vectors metadata metadata<- #' @importFrom utils capture.output #' @importFrom methods new #' @importFrom BiocGenerics counts -sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_min, pct_dropout_max, - d_region_min, d_region_max, svm_num_cells, svm_train_inds, svm_max, n_cores, kmeans_nstart, - kmeans_iter_max, rand_seed) { +sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_min, pct_dropout_max, + d_region_min, d_region_max, svm_num_cells, svm_train_inds, svm_max, kmeans_nstart, kmeans_iter_max) { + if (is.null(rowData(object)$feature_symbol)) { stop("There is no `feature_symbol` column in the `rowData` slot of your dataset! Please write your gene/transcript names to `rowData(object)$feature_symbol`!") return(object) @@ -136,14 +131,14 @@ sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_mi stop("There is no `logcounts` slot in your input SingleCellExperiment object! SC3 operates on `logcounts` slot, which is supposed to contain both normalised and log-transformed expression values! Please write these values the slot by setting `logcounts(object) <- log_norm_counts`!") return(object) } - + message("Setting SC3 parameters...") - + # clean up after the previous SC3 run sc3 slot metadata(object)$sc3 <- list() colData(object) <- colData(object)[, !grepl("sc3_", colnames(colData(object))), drop = FALSE] rowData(object) <- rowData(object)[, !grepl("sc3_", colnames(rowData(object))), drop = FALSE] - + # gene filter f_data <- rowData(object) f_data$sc3_gene_filter <- TRUE @@ -156,7 +151,7 @@ sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_mi } } rowData(object) <- as(f_data, "DataFrame") - + metadata(object)$sc3$kmeans_iter_max <- kmeans_iter_max if (is.null(kmeans_nstart)) { if (ncol(object) > 2000) { @@ -168,14 +163,14 @@ sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_mi } else { metadata(object)$sc3$kmeans_nstart <- kmeans_nstart } - + # define number of cells and region of dimensions n_dim <- floor(d_region_min * ncol(object)):ceiling(d_region_max * ncol(object)) # for large datasets restrict the region of dimensions to 15 if (length(n_dim) > 15) { n_dim <- sample(n_dim, 15) } - + # prepare for SVM if (!is.null(svm_num_cells) | !is.null(svm_train_inds) | ncol(object) > svm_max) { # handle all possible errors @@ -183,7 +178,7 @@ sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_mi if (!is.null(svm_train_inds)) { return(message("You have set both svm_num_cells and svm_train_inds parameters for SVM training. Please set only one of them and rerun sc3_prepare().")) } - if (svm_num_cells >= ncol(object) - 1) + if (svm_num_cells >= ncol(object) - 1) return(message("Number of cells used for SVM training is larger (or equal) than the total number of cells in your dataset. Please make svm_num_cells parameter smaller and rerun sc3_prepare().")) if (svm_num_cells < 10) { return(message("Number of cells used for SVM training is less than 10. Please make sure the number of clusters k is smaller than 10 or increase the number of training cells.")) @@ -199,10 +194,10 @@ sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_mi } # run SVM tmp <- prepare_for_svm(ncol(object), svm_num_cells, svm_train_inds, svm_max) - + metadata(object)$sc3$svm_train_inds <- tmp$svm_train_inds metadata(object)$sc3$svm_study_inds <- tmp$svm_study_inds - + # update kmeans_nstart after defining SVM training indeces if (is.null(kmeans_nstart)) { if (length(tmp$svm_train_inds) <= 2000) { @@ -211,7 +206,7 @@ sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_mi } else { metadata(object)$sc3$kmeans_nstart <- kmeans_nstart } - + # update the region of dimensions n_dim <- floor(d_region_min * length(tmp$svm_train_inds)):ceiling(d_region_max * length(tmp$svm_train_inds)) # for large datasets restrict the region of dimensions to 15 @@ -219,25 +214,9 @@ sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_mi n_dim <- sample(n_dim, 15) } } - + metadata(object)$sc3$n_dim <- n_dim - - metadata(object)$sc3$rand_seed <- rand_seed - - # register computing cluster (N-1 CPUs) on a local machine - if (is.null(n_cores)) { - n_cores <- parallel::detectCores() - if (is.null(n_cores)) { - return("Cannot define a number of available CPU cores that can be used by SC3. Try to set the n_cores parameter in the sc3() function call.") - } - # leave one core for the user - if (n_cores > 1) { - n_cores <- n_cores - 1 - } - } - - metadata(object)$sc3$n_cores <- n_cores - + return(object) } @@ -246,14 +225,14 @@ sc3_prepare.SingleCellExperiment <- function(object, gene_filter, pct_dropout_mi setMethod("sc3_prepare", signature(object = "SingleCellExperiment"), sc3_prepare.SingleCellExperiment) #' Estimate the optimal number of cluster \code{k} for a scRNA-Seq expression matrix -#' +#' #' Uses Tracy-Widom theory on random matrices to estimate the optimal number of #' clusters \code{k}. It creates and populates the \code{k_estimation} item of the #' \code{sc3} slot of the \code{metadata(object)}. -#' +#' #' @name sc3_estimate_k #' @aliases sc3_estimate_k sc3_estimate_k,SingleCellExperiment-method -#' +#' #' @param object an object of \code{SingleCellExperiment} class #' @return an estimated value of k sc3_estimate_k.SingleCellExperiment <- function(object) { @@ -269,61 +248,46 @@ sc3_estimate_k.SingleCellExperiment <- function(object) { setMethod("sc3_estimate_k", signature(object = "SingleCellExperiment"), sc3_estimate_k.SingleCellExperiment) #' Calculate distances between the cells. -#' +#' #' This function calculates distances between the cells. It #' creates and populates the following items of the \code{sc3} slot of the \code{metadata(object)}: #' \itemize{ #' \item \code{distances} - contains a list of distance matrices corresponding to #' Euclidean, Pearson and Spearman distances. #' } -#' +#' #' @name sc3_calc_dists #' @aliases sc3_calc_dists, sc3_calc_dists,SingleCellExperiment-method -#' +#' #' @param object an object of \code{SingleCellExperiment} class -#' +#' @param BPPARAM a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +#' including a seed for random number generator. +#' #' @return an object of \code{SingleCellExperiment} class -#' -#' @importFrom doRNG %dorng% -#' @importFrom foreach foreach %dopar% -#' @importFrom parallel makeCluster stopCluster -#' @importFrom doParallel registerDoParallel -sc3_calc_dists.SingleCellExperiment <- function(object) { +sc3_calc_dists.SingleCellExperiment <- function(object, BPPARAM) { dataset <- get_processed_dataset(object) - + # check whether in the SVM regime if (!is.null(metadata(object)$sc3$svm_train_inds)) { dataset <- dataset[, metadata(object)$sc3$svm_train_inds] } - + # NULLing the variables to avoid notes in R CMD CHECK i <- NULL - + distances <- c("euclidean", "pearson", "spearman") - + message("Calculating distances between the cells...") - - if (metadata(object)$sc3$n_cores > length(distances)) { - n_cores <- length(distances) - } else { - n_cores <- metadata(object)$sc3$n_cores - } - - cl <- parallel::makeCluster(n_cores, outfile = "") - doParallel::registerDoParallel(cl, cores = n_cores) - + # calculate distances in parallel - dists <- foreach::foreach(i = distances) %dorng% { + dists <- BiocParallel::bplapply(distances, BPPARAM = BPPARAM, FUN = function(i, dataset) { try({ calculate_distance(dataset, i) }) - } - - # stop local cluster - parallel::stopCluster(cl) - + }, dataset = dataset) + names(dists) <- distances - + metadata(object)$sc3$distances <- dists return(object) } @@ -333,69 +297,54 @@ sc3_calc_dists.SingleCellExperiment <- function(object) { setMethod("sc3_calc_dists", signature(object = "SingleCellExperiment"), sc3_calc_dists.SingleCellExperiment) #' Calculate transformations of the distance matrices. -#' -#' This function transforms all \code{distances} items of the \code{sc3} slot of -#' the \code{metadata(object)} using either principal component analysis (PCA) +#' +#' This function transforms all \code{distances} items of the \code{sc3} slot of +#' the \code{metadata(object)} using either principal component analysis (PCA) #' or by calculating the eigenvectors of the associated graph Laplacian. -#' The columns of the resulting matrices are then sorted in descending order -#' by their corresponding eigenvalues. The first \code{d} columns -#' (where \code{d = max(metadata(object)$sc3$n_dim)}) of each transformation are then +#' The columns of the resulting matrices are then sorted in descending order +#' by their corresponding eigenvalues. The first \code{d} columns +#' (where \code{d = max(metadata(object)$sc3$n_dim)}) of each transformation are then #' written to the \code{transformations} item of the \code{sc3} slot. #' Additionally, this function also removes the previously calculated \code{distances} from #' the \code{sc3} slot, as they are not needed for further analysis. -#' +#' #' @name sc3_calc_transfs #' @aliases sc3_calc_transfs, sc3_calc_transfs,SingleCellExperiment-method -#' +#' #' @param object an object of \code{SingleCellExperiment} class -#' +#' @param BPPARAM a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +#' including a seed for random number generator. +#' #' @return an object of \code{SingleCellExperiment} class -#' -#' @importFrom doRNG %dorng% -#' @importFrom foreach foreach -#' @importFrom parallel makeCluster stopCluster -#' @importFrom doParallel registerDoParallel -sc3_calc_transfs.SingleCellExperiment <- function(object) { +sc3_calc_transfs.SingleCellExperiment <- function(object, BPPARAM) { dists <- metadata(object)$sc3$distances if (is.null(dists)) { stop(paste0("Please run sc3_calc_dists() first!")) return(object) } - + # NULLing the variables to avoid notes in R CMD CHECK i <- NULL - + distances <- names(dists) transformations <- c("pca", "laplacian") - + n_dim <- metadata(object)$sc3$n_dim - + hash.table <- expand.grid(dists = distances, transfs = transformations, stringsAsFactors = FALSE) - + message("Performing transformations and calculating eigenvectors...") - - if (metadata(object)$sc3$n_cores > nrow(hash.table)) { - n_cores <- nrow(hash.table) - } else { - n_cores <- metadata(object)$sc3$n_cores - } - - cl <- parallel::makeCluster(n_cores, outfile = "") - doParallel::registerDoParallel(cl, cores = n_cores) - + # calculate the 6 distinct transformations in parallel - transfs <- foreach::foreach(i = 1:nrow(hash.table)) %dorng% { + transfs <- BiocParallel::bplapply(1:nrow(hash.table), BPPARAM = BPPARAM, FUN = function(i, hash.table, dists, n_dim) { try({ tmp <- transformation(get(hash.table[i, 1], dists), hash.table[i, 2]) tmp[, 1:max(n_dim)] }) - } - - # stop local cluster - parallel::stopCluster(cl) - + }, hash.table = hash.table, dists = dists, n_dim = n_dim) + names(transfs) <- paste(hash.table[, 1], hash.table[, 2], sep = "_") - + metadata(object)$sc3$transformations <- transfs # remove distances after calculating transformations metadata(object)$sc3$distances <- NULL @@ -412,77 +361,67 @@ sc3_calc_transfs.SingleCellExperiment <- function(object) { setMethod("sc3_calc_transfs", signature(object = "SingleCellExperiment"), sc3_calc_transfs.SingleCellExperiment) #' \code{kmeans} clustering of cells. -#' -#' This function performs \code{\link[stats]{kmeans}} clustering of the matrices +#' +#' This function performs \code{\link[stats]{kmeans}} clustering of the matrices #' contained in the \code{transformations} item of the \code{sc3} slot of the \code{metadata(object)}. It then #' creates and populates the following items of the \code{sc3} slot: #' \itemize{ #' \item \code{kmeans} - contains a list of kmeans clusterings. #' } -#' +#' #' @name sc3_kmeans #' @aliases sc3_kmeans, sc3_kmeans,SingleCellExperiment-method -#' +#' #' @param object an object of \code{SingleCellExperiment} class #' @param ks a continuous range of integers - the number of clusters \code{k} to be used for SC3 clustering. #' Can also be a single integer. -#' +#' #' @return an object of \code{SingleCellExperiment} class -#' -#' @importFrom doRNG %dorng% -#' @importFrom foreach foreach -#' @importFrom parallel makeCluster stopCluster -#' @importFrom doParallel registerDoParallel +#' @param BPPARAM a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +#' including a seed for random number generator. +#' #' @importFrom utils setTxtProgressBar txtProgressBar #' @importFrom stats kmeans -sc3_kmeans.SingleCellExperiment <- function(object, ks) { +sc3_kmeans.SingleCellExperiment <- function(object, ks, BPPARAM) { if (is.null(ks)) { stop(paste0("Please provide a range of the number of clusters `ks` to be used by SC3!")) return(object) } - + transfs <- metadata(object)$sc3$transformations if (is.null(transfs)) { stop(paste0("Please run sc3_calc_transfs() first!")) return(object) } - + # NULLing the variables to avoid notes in R CMD CHECK i <- NULL - + n_dim <- metadata(object)$sc3$n_dim - + hash.table <- expand.grid(transf = names(transfs), ks = ks, n_dim = n_dim, stringsAsFactors = FALSE) - + message("Performing k-means clustering...") - - n_cores <- metadata(object)$sc3$n_cores - + kmeans_iter_max <- metadata(object)$sc3$kmeans_iter_max kmeans_nstart <- metadata(object)$sc3$kmeans_nstart - - cl <- parallel::makeCluster(n_cores, outfile = "") - doParallel::registerDoParallel(cl, cores = n_cores) - + pb <- utils::txtProgressBar(min = 1, max = nrow(hash.table), style = 3) - + # calculate the 6 distinct transformations in parallel - labs <- foreach::foreach(i = 1:nrow(hash.table)) %dorng% { + labs <- BiocParallel::bplapply(1:nrow(hash.table), BPPARAM = BPPARAM, FUN = function(i, hash.table, transfs, n_dim, kmeans_iter_max, kmeans_nstart, pb) { try({ utils::setTxtProgressBar(pb, i) transf <- get(hash.table$transf[i], transfs) - stats::kmeans(transf[, 1:hash.table$n_dim[i]], hash.table$ks[i], iter.max = kmeans_iter_max, - nstart = kmeans_nstart)$cluster + stats::kmeans(transf[, 1:hash.table$n_dim[i]], hash.table$ks[i], iter.max = kmeans_iter_max, + nstart = kmeans_nstart)$cluster }) - } - + }, hash.table = hash.table, transfs = transfs, n_dim = n_dim, kmeans_iter_max = kmeans_iter_max, kmeans_nstart = kmeans_nstart, pb = pb) + close(pb) - - # stop local cluster - parallel::stopCluster(cl) - + names(labs) <- paste(hash.table$transf, hash.table$ks, hash.table$n_dim, sep = "_") - + metadata(object)$sc3$kmeans <- labs return(object) } @@ -492,60 +431,49 @@ sc3_kmeans.SingleCellExperiment <- function(object, ks) { setMethod("sc3_kmeans", signature(object = "SingleCellExperiment"), sc3_kmeans.SingleCellExperiment) #' Calculate consensus matrix. -#' +#' #' This function calculates consensus matrices based on the clustering solutions #' contained in the \code{kmeans} item of the \code{sc3} slot of the \code{metadata(object)}. It then -#' creates and populates the \code{consensus} item of the \code{sc3} slot with +#' creates and populates the \code{consensus} item of the \code{sc3} slot with #' consensus matrices, their hierarchical clusterings in \code{hclust} objects, -#' and Silhouette indeces of the clusters. It also removes the previously +#' and Silhouette indeces of the clusters. It also removes the previously #' calculated \code{kmeans} clusterings from #' the \code{sc3} slot, as they are not needed for further analysis. -#' +#' #' Additionally, it also adds new columns to the \code{colData} slot of the #' input \code{object}. The column names correspond to the consensus cell labels -#' and have the following format: \code{sc3_k_clusters}, where \code{k} is the +#' and have the following format: \code{sc3_k_clusters}, where \code{k} is the #' number of clusters. -#' +#' #' @name sc3_calc_consens #' @aliases sc3_calc_consens, sc3_calc_consens,SingleCellExperiment-method -#' +#' #' @param object an object of \code{SingleCellExperiment} class -#' +#' @param BPPARAM a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +#' including a seed for random number generator. +#' #' @return an object of \code{SingleCellExperiment} class -#' -#' @importFrom doRNG %dorng% -#' @importFrom foreach foreach -#' @importFrom parallel makeCluster stopCluster -#' @importFrom doParallel registerDoParallel +#' #' @import cluster #' @importFrom stats hclust dist as.dist -#' +#' #' @useDynLib SC3 #' @import Rcpp -sc3_calc_consens.SingleCellExperiment <- function(object) { +sc3_calc_consens.SingleCellExperiment <- function(object, BPPARAM) { k.means <- metadata(object)$sc3$kmeans if (is.null(k.means)) { stop(paste0("Please run sc3_kmeans() first!")) return(object) } - + # NULLing the variables to avoid notes in R CMD CHECK i <- NULL - + ks <- as.numeric(unique(unlist(lapply(strsplit(names(k.means), "_"), "[[", 3)))) - - if (metadata(object)$sc3$n_cores > length(ks)) { - n_cores <- length(ks) - } else { - n_cores <- metadata(object)$sc3$n_cores - } - + message("Calculating consensus matrix...") - - cl <- parallel::makeCluster(n_cores, outfile = "") - doParallel::registerDoParallel(cl, cores = n_cores) - - cons <- foreach::foreach(i = ks) %dorng% { + + cons <- BiocParallel::bplapply(ks, BPPARAM = BPPARAM, FUN = function(i, k.means) { try({ d <- k.means[grep(paste0("_", i, "_"), names(k.means))] d <- matrix(unlist(d), nrow = length(d[[1]])) @@ -556,16 +484,13 @@ sc3_calc_consens.SingleCellExperiment <- function(object) { diss <- stats::as.dist(as.matrix(stats::as.dist(tmp))) hc <- stats::hclust(diss) clusts <- reindex_clusters(hc, i) - + silh <- cluster::silhouette(clusts, diss) - + list(consensus = dat, hc = hc, silhouette = silh) }) - } - - # stop local cluster - parallel::stopCluster(cl) - + }, k.means = k.means) + names(cons) <- ks if(is.null(metadata(object)$sc3$consensus)) { metadata(object)$sc3$consensus <- list() @@ -573,10 +498,10 @@ sc3_calc_consens.SingleCellExperiment <- function(object) { for (n in names(cons)) { metadata(object)$sc3$consensus[[n]] <- cons[[n]] } - + # remove kmeans results after calculating consensus metadata(object)$sc3$kmeans <- NULL - + p_data <- colData(object) for (k in ks) { hc <- metadata(object)$sc3$consensus[[as.character(k)]]$hc @@ -590,7 +515,7 @@ sc3_calc_consens.SingleCellExperiment <- function(object) { p_data[, paste0("sc3_", k, "_clusters")] <- factor(clusts, levels = sort(unique(clusts))) } colData(object) <- as(p_data, "DataFrame") - + return(object) } @@ -600,54 +525,52 @@ setMethod("sc3_calc_consens", signature(object = "SingleCellExperiment"), sc3_ca #' Calculate DE genes, marker genes and cell outliers. -#' -#' This function calculates differentially expressed (DE) genes, marker genes +#' +#' This function calculates differentially expressed (DE) genes, marker genes #' and cell outliers based on the consensus \code{SC3} clusterings. -#' -#' DE genes are calculated using \code{\link{get_de_genes}}. Results of the DE -#' analysis are saved as new columns in the -#' \code{featureData} slot of the input \code{object}. The column names correspond -#' to the adjusted \code{p-value}s of the genes and have the following format: +#' +#' DE genes are calculated using \code{\link{get_de_genes}}. Results of the DE +#' analysis are saved as new columns in the +#' \code{featureData} slot of the input \code{object}. The column names correspond +#' to the adjusted \code{p-value}s of the genes and have the following format: #' \code{sc3_k_de_padj}, where \code{k} is the number of clusters. -#' -#' Marker genes are calculated using \code{\link{get_marker_genes}}. -#' Results of the marker gene analysis are saved as three new -#' columns (for each \code{k}) to the -#' \code{featureData} slot of the input \code{object}. The column names correspond -#' to the \code{SC3} cluster labels, to the adjusted \code{p-value}s of the genes +#' +#' Marker genes are calculated using \code{\link{get_marker_genes}}. +#' Results of the marker gene analysis are saved as three new +#' columns (for each \code{k}) to the +#' \code{featureData} slot of the input \code{object}. The column names correspond +#' to the \code{SC3} cluster labels, to the adjusted \code{p-value}s of the genes #' and to the area under the ROC curve -#' and have the following format: \code{sc3_k_markers_clusts}, -#' \code{sc3_k_markers_padj} and \code{sc3_k_markers_auroc}, where \code{k} is +#' and have the following format: \code{sc3_k_markers_clusts}, +#' \code{sc3_k_markers_padj} and \code{sc3_k_markers_auroc}, where \code{k} is #' the number of clusters. -#' -#' Outlier cells are calculated using \code{\link{get_outl_cells}}. Results of the -#' cell outlier analysis are saved as new columns in the -#' \code{phenoData} slot of the input \code{object}. The column names correspond -#' to the \code{log2(outlier_score)} and have the following format: +#' +#' Outlier cells are calculated using \code{\link{get_outl_cells}}. Results of the +#' cell outlier analysis are saved as new columns in the +#' \code{phenoData} slot of the input \code{object}. The column names correspond +#' to the \code{log2(outlier_score)} and have the following format: #' \code{sc3_k_log2_outlier_score}, where \code{k} is the number of clusters. -#' +#' #' Additionally, \code{biology} item is added to the \code{sc3} slot and is set to #' \code{TRUE} indicating that the biological analysis of the dataset has been #' performed. -#' +#' #' @name sc3_calc_biology #' @aliases sc3_calc_biology, sc3_calc_biology,SingleCellExperiment-method -#' +#' #' @param object an object of \code{SingleCellExperiment} class #' @param ks a continuous range of integers - the number of clusters \code{k} to be used for SC3 clustering. #' Can also be a single integer. #' @param regime defines what biological analysis to perform. "marker" for #' marker genes, "de" for differentiall expressed genes and "outl" for outlier #' cells -#' +#' @param BPPARAM a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +#' including a seed for random number generator. +#' #' @return an object of \code{SingleCellExperiment} class -#' -#' @importFrom doRNG %dorng% -#' @importFrom foreach foreach -#' @importFrom parallel makeCluster stopCluster -#' @importFrom doParallel registerDoParallel +#' #' @importFrom methods as -sc3_calc_biology.SingleCellExperiment <- function(object, ks, regime) { +sc3_calc_biology.SingleCellExperiment <- function(object, ks, regime, BPPARAM) { if (is.null(metadata(object)$sc3$consensus)) { stop(paste0("Please run sc3_consensus() first!")) return(object) @@ -667,11 +590,11 @@ sc3_calc_biology.SingleCellExperiment <- function(object, ks, regime) { stop(paste0("Regime value must be either 'marker', 'de' or 'outl', or any combination of these three!")) return(object) } - + message("Calculating biology...") - + hash.table <- expand.grid(ks = ks, regime = regime, stringsAsFactors = FALSE) - + dataset <- get_processed_dataset(object) p_data <- colData(object) clusts <- as.data.frame(p_data[, grep("sc3_.*_clusters", colnames(p_data))]) @@ -682,30 +605,18 @@ sc3_calc_biology.SingleCellExperiment <- function(object, ks, regime) { dataset <- dataset[, metadata(object)$sc3$svm_train_inds] clusts <- clusts[metadata(object)$sc3$svm_train_inds, ] } - + # NULLing the variables to avoid notes in R CMD CHECK i <- NULL - - if (metadata(object)$sc3$n_cores > nrow(hash.table)) { - n_cores <- nrow(hash.table) - } else { - n_cores <- metadata(object)$sc3$n_cores - } - - cl <- parallel::makeCluster(n_cores, outfile = "") - doParallel::registerDoParallel(cl, cores = n_cores) - - biol <- foreach::foreach(i = 1:nrow(hash.table)) %dorng% { + + biol <- BiocParallel::bplapply(1:nrow(hash.table), BPPARAM = BPPARAM, FUN = function(i, dataset, hash.table, clusts) { try({ get_biolgy(dataset, clusts[, paste0("sc3_", hash.table[i, 1], "_clusters")], hash.table[i, 2]) }) - } - - # stop local cluster - parallel::stopCluster(cl) - + }, dataset = dataset, hash.table = hash.table, clusts = clusts) + names(biol) <- paste(hash.table$ks, hash.table$regime, sep = "_") - + f_data <- as.data.frame(rowData(object)) p_data <- as.data.frame(colData(object)) for (b in names(biol)) { @@ -721,11 +632,11 @@ sc3_calc_biology.SingleCellExperiment <- function(object, ks, regime) { f_data[, paste0("sc3_", k, "_markers_clusts")] <- NA f_data[, paste0("sc3_", k, "_markers_padj")] <- NA f_data[, paste0("sc3_", k, "_markers_auroc")] <- NA - f_data[, paste0("sc3_", k, "_markers_clusts")][which(f_data$sc3_gene_filter)] <- biol[[b]][, + f_data[, paste0("sc3_", k, "_markers_clusts")][which(f_data$sc3_gene_filter)] <- biol[[b]][, 2] - f_data[, paste0("sc3_", k, "_markers_padj")][which(f_data$sc3_gene_filter)] <- biol[[b]][, + f_data[, paste0("sc3_", k, "_markers_padj")][which(f_data$sc3_gene_filter)] <- biol[[b]][, 3] - f_data[, paste0("sc3_", k, "_markers_auroc")][which(f_data$sc3_gene_filter)] <- biol[[b]][, + f_data[, paste0("sc3_", k, "_markers_auroc")][which(f_data$sc3_gene_filter)] <- biol[[b]][, 1] } # save cell outliers @@ -742,9 +653,9 @@ sc3_calc_biology.SingleCellExperiment <- function(object, ks, regime) { } rowData(object) <- as(f_data, "DataFrame") colData(object) <- as(p_data, "DataFrame") - + metadata(object)$sc3$biology <- TRUE - + return(object) } @@ -753,24 +664,24 @@ sc3_calc_biology.SingleCellExperiment <- function(object, ks, regime) { setMethod("sc3_calc_biology", signature(object = "SingleCellExperiment"), sc3_calc_biology.SingleCellExperiment) #' Run the hybrid \code{SVM} approach. -#' +#' #' This method parallelize \code{SVM} prediction for each \code{k} (the number -#' of clusters). Namely, for each \code{k}, \code{\link{support_vector_machines}} +#' of clusters). Namely, for each \code{k}, \code{\link{support_vector_machines}} #' function is utilized to predict the labels of study cells. Training cells are #' selected using \code{svm_train_inds} item of the \code{sc3} slot of the #' \code{metadata(object)}. -#' -#' Results are written to the \code{sc3_k_clusters} columns to the -#' \code{colData} slot of the input \code{object}, where \code{k} is the +#' +#' Results are written to the \code{sc3_k_clusters} columns to the +#' \code{colData} slot of the input \code{object}, where \code{k} is the #' number of clusters. -#' +#' #' @name sc3_run_svm #' @aliases sc3_run_svm, sc3_run_svm,SingleCellExperiment-method -#' +#' #' @param object an object of \code{SingleCellExperiment} class #' @param ks a continuous range of integers - the number of clusters \code{k} to be used for SC3 clustering. #' Can also be a single integer. -#' +#' #' @return an object of \code{SingleCellExperiment} class sc3_run_svm.SingleCellExperiment <- function(object, ks) { if (is.null(metadata(object)$sc3$svm_train_inds)) { @@ -781,23 +692,23 @@ sc3_run_svm.SingleCellExperiment <- function(object, ks) { stop(paste0("Please provide a range of the number of clusters `ks` to be used by SC3!")) return(object) } - + dataset <- get_processed_dataset(object) p_data <- colData(object) svm_train_inds <- metadata(object)$sc3$svm_train_inds svm_study_inds <- metadata(object)$sc3$svm_study_inds - + for (k in ks) { clusts <- p_data[, paste0("sc3_", k, "_clusters")] clusts <- clusts[svm_train_inds] - + train.dataset <- dataset[, svm_train_inds] colnames(train.dataset) <- clusts - + study.labs <- support_vector_machines(train.dataset, dataset[, svm_study_inds], "linear") svm.labs <- c(clusts, study.labs) ord <- order(c(svm_train_inds, svm_study_inds)) - + p_data[, paste0("sc3_", k, "_clusters")] <- svm.labs[ord] } colData(object) <- as(p_data, "DataFrame") @@ -809,26 +720,26 @@ sc3_run_svm.SingleCellExperiment <- function(object, ks) { setMethod("sc3_run_svm", signature(object = "SingleCellExperiment"), sc3_run_svm.SingleCellExperiment) #' Write \code{SC3} results to Excel file -#' +#' #' This function writes all \code{SC3} results to an excel file. -#' +#' #' @param object an object of \code{SingleCellExperiment} class #' @param filename name of the excel file, to which the results will be written -#' +#' #' @name sc3_export_results_xls #' @aliases sc3_export_results_xls -#' +#' #' @importFrom WriteXLS WriteXLS sc3_export_results_xls.SingleCellExperiment <- function(object, filename) { if (is.null(metadata(object)$sc3$consensus)) { stop(paste0("Please run sc3_consensus() first!")) } - + p_data <- colData(object) f_data <- rowData(object) - + res <- list() - + if(length(grep("sc3_", colnames(p_data))) != 0) { cells <- as.data.frame(p_data[, grep("sc3_", colnames(p_data))]) colnames(cells) <- colnames(p_data)[grep("sc3_", colnames(p_data))] @@ -845,9 +756,9 @@ sc3_export_results_xls.SingleCellExperiment <- function(object, filename) { } else { warning("There is no gene data provided by SC3!") } - + if(length(res) != 0) { - WriteXLS(res, ExcelFileName = filename, SheetNames = names(res), + WriteXLS(res, ExcelFileName = filename, SheetNames = names(res), row.names = TRUE, AdjWidth = TRUE) } else { warning("There are no SC3 results in your data object, the Excel file will not be produced. Please run SC3 first!") diff --git a/man/ann.Rd b/man/ann.Rd index 74a7ba0..3cf9f87 100644 --- a/man/ann.Rd +++ b/man/ann.Rd @@ -4,7 +4,9 @@ \name{ann} \alias{ann} \title{Cell type annotations for data extracted from a publication by Yan et al.} -\format{An object of class \code{data.frame} with 90 rows and 1 columns.} +\format{ +An object of class \code{data.frame} with 90 rows and 1 columns. +} \source{ \url{http://dx.doi.org/10.1038/nsmb.2660} diff --git a/man/sc3.Rd b/man/sc3.Rd index ca4c06f..3700de5 100644 --- a/man/sc3.Rd +++ b/man/sc3.Rd @@ -1,24 +1,46 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3} \alias{sc3} \alias{sc3.SingleCellExperiment} \alias{sc3,SingleCellExperiment-method} -\alias{sc3} \title{Run all steps of \code{SC3} in one go} \usage{ -sc3.SingleCellExperiment(object, ks, gene_filter, pct_dropout_min, - pct_dropout_max, d_region_min, d_region_max, svm_num_cells, svm_train_inds, - svm_max, n_cores, kmeans_nstart, kmeans_iter_max, k_estimator, biology, - rand_seed) - -\S4method{sc3}{SingleCellExperiment}(object, ks = NULL, gene_filter = TRUE, - pct_dropout_min = 10, pct_dropout_max = 90, d_region_min = 0.04, - d_region_max = 0.07, svm_num_cells = NULL, svm_train_inds = NULL, - svm_max = 5000, n_cores = NULL, kmeans_nstart = NULL, - kmeans_iter_max = 1e+09, k_estimator = FALSE, biology = FALSE, - rand_seed = 1) +sc3.SingleCellExperiment( + object, + ks, + gene_filter, + pct_dropout_min, + pct_dropout_max, + d_region_min, + d_region_max, + svm_num_cells, + svm_train_inds, + svm_max, + kmeans_nstart, + kmeans_iter_max, + k_estimator, + biology, + BPPARAM +) + +\S4method{sc3}{SingleCellExperiment}( + object, + ks = NULL, + gene_filter = TRUE, + pct_dropout_min = 10, + pct_dropout_max = 90, + d_region_min = 0.04, + d_region_max = 0.07, + svm_num_cells = NULL, + svm_train_inds = NULL, + svm_max = 5000, + kmeans_nstart = NULL, + kmeans_iter_max = 1e+09, + k_estimator = FALSE, + biology = FALSE, + BPPARAM = BiocParallel::bpparam() +) } \arguments{ \item{object}{an object of \code{SingleCellExperiment} class.} @@ -26,47 +48,44 @@ sc3.SingleCellExperiment(object, ks, gene_filter, pct_dropout_min, \item{ks}{a range of the number of clusters \code{k} used for \code{SC3} clustering. Can also be a single integer.} -\item{gene_filter}{a boolen variable which defines whether to perform gene +\item{gene_filter}{a boolen variable which defines whether to perform gene filtering before SC3 clustering.} -\item{pct_dropout_min}{if \code{gene_filter = TRUE}, then genes with percent of dropouts smaller than +\item{pct_dropout_min}{if \code{gene_filter = TRUE}, then genes with percent of dropouts smaller than \code{pct_dropout_min} are filtered out before clustering.} -\item{pct_dropout_max}{if \code{gene_filter = TRUE}, then genes with percent of dropouts larger than +\item{pct_dropout_max}{if \code{gene_filter = TRUE}, then genes with percent of dropouts larger than \code{pct_dropout_max} are filtered out before clustering.} -\item{d_region_min}{defines the minimum number of eigenvectors used for +\item{d_region_min}{defines the minimum number of eigenvectors used for kmeans clustering as a fraction of the total number of cells. Default is \code{0.04}. See \code{SC3} paper for more details.} -\item{d_region_max}{defines the maximum number of eigenvectors used for +\item{d_region_max}{defines the maximum number of eigenvectors used for kmeans clustering as a fraction of the total number of cells. Default is \code{0.07}. See \code{SC3} paper for more details.} -\item{svm_num_cells}{number of randomly selected training cells to be used +\item{svm_num_cells}{number of randomly selected training cells to be used for SVM prediction. The default is \code{NULL}.} -\item{svm_train_inds}{a numeric vector defining indeces of training cells +\item{svm_train_inds}{a numeric vector defining indeces of training cells that should be used for SVM training. The default is \code{NULL}.} \item{svm_max}{define the maximum number of cells below which SVM is not run.} -\item{n_cores}{defines the number of cores to be used on the user's machine. If not set, `SC3` will use all but one cores of your machine.} - -\item{kmeans_nstart}{nstart parameter passed to \code{\link[stats]{kmeans}} function. Can be set manually. By default it is +\item{kmeans_nstart}{nstart parameter passed to \code{\link[stats]{kmeans}} function. Can be set manually. By default it is \code{1000} for up to \code{2000} cells and \code{50} for more than \code{2000} cells.} -\item{kmeans_iter_max}{iter.max parameter passed to \code{\link[stats]{kmeans}} +\item{kmeans_iter_max}{iter.max parameter passed to \code{\link[stats]{kmeans}} function.} \item{k_estimator}{boolean parameter, defines whether to estimate an optimal number of clusters \code{k}. If user has already defined the ks parameter the estimation does not affect the user's paramater.} -\item{biology}{boolean parameter, defines whether to compute differentially expressed genes, marker +\item{biology}{boolean parameter, defines whether to compute differentially expressed genes, marker genes and cell outliers.} -\item{rand_seed}{sets the seed of the random number generator. \code{SC3} is a stochastic -method, so setting the \code{rand_seed} to a fixed values can be used for reproducibility -purposes.} +\item{BPPARAM}{a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +including a seed for random number generator.} } \value{ an object of \code{SingleCellExperiment} class diff --git a/man/sc3_calc_biology.Rd b/man/sc3_calc_biology.Rd index ee02197..7a887f8 100644 --- a/man/sc3_calc_biology.Rd +++ b/man/sc3_calc_biology.Rd @@ -1,19 +1,20 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_calc_biology} \alias{sc3_calc_biology} \alias{sc3_calc_biology.SingleCellExperiment} \alias{sc3_calc_biology,} \alias{sc3_calc_biology,SingleCellExperiment-method} -\alias{sc3_calc_biology,SingleCellExperiment-method} -\alias{sc3_calc_biology} \title{Calculate DE genes, marker genes and cell outliers.} \usage{ -sc3_calc_biology.SingleCellExperiment(object, ks, regime) +sc3_calc_biology.SingleCellExperiment(object, ks, regime, BPPARAM) -\S4method{sc3_calc_biology}{SingleCellExperiment}(object, ks = NULL, - regime = NULL) +\S4method{sc3_calc_biology}{SingleCellExperiment}( + object, + ks = NULL, + regime = NULL, + BPPARAM = BiocParallel::bpparam() +) } \arguments{ \item{object}{an object of \code{SingleCellExperiment} class} @@ -24,35 +25,38 @@ Can also be a single integer.} \item{regime}{defines what biological analysis to perform. "marker" for marker genes, "de" for differentiall expressed genes and "outl" for outlier cells} + +\item{BPPARAM}{a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +including a seed for random number generator.} } \value{ an object of \code{SingleCellExperiment} class } \description{ -This function calculates differentially expressed (DE) genes, marker genes +This function calculates differentially expressed (DE) genes, marker genes and cell outliers based on the consensus \code{SC3} clusterings. } \details{ -DE genes are calculated using \code{\link{get_de_genes}}. Results of the DE -analysis are saved as new columns in the -\code{featureData} slot of the input \code{object}. The column names correspond -to the adjusted \code{p-value}s of the genes and have the following format: +DE genes are calculated using \code{\link{get_de_genes}}. Results of the DE +analysis are saved as new columns in the +\code{featureData} slot of the input \code{object}. The column names correspond +to the adjusted \code{p-value}s of the genes and have the following format: \code{sc3_k_de_padj}, where \code{k} is the number of clusters. -Marker genes are calculated using \code{\link{get_marker_genes}}. -Results of the marker gene analysis are saved as three new -columns (for each \code{k}) to the -\code{featureData} slot of the input \code{object}. The column names correspond -to the \code{SC3} cluster labels, to the adjusted \code{p-value}s of the genes +Marker genes are calculated using \code{\link{get_marker_genes}}. +Results of the marker gene analysis are saved as three new +columns (for each \code{k}) to the +\code{featureData} slot of the input \code{object}. The column names correspond +to the \code{SC3} cluster labels, to the adjusted \code{p-value}s of the genes and to the area under the ROC curve -and have the following format: \code{sc3_k_markers_clusts}, -\code{sc3_k_markers_padj} and \code{sc3_k_markers_auroc}, where \code{k} is +and have the following format: \code{sc3_k_markers_clusts}, +\code{sc3_k_markers_padj} and \code{sc3_k_markers_auroc}, where \code{k} is the number of clusters. -Outlier cells are calculated using \code{\link{get_outl_cells}}. Results of the -cell outlier analysis are saved as new columns in the -\code{phenoData} slot of the input \code{object}. The column names correspond -to the \code{log2(outlier_score)} and have the following format: +Outlier cells are calculated using \code{\link{get_outl_cells}}. Results of the +cell outlier analysis are saved as new columns in the +\code{phenoData} slot of the input \code{object}. The column names correspond +to the \code{log2(outlier_score)} and have the following format: \code{sc3_k_log2_outlier_score}, where \code{k} is the number of clusters. Additionally, \code{biology} item is added to the \code{sc3} slot and is set to diff --git a/man/sc3_calc_consens.Rd b/man/sc3_calc_consens.Rd index 7a7c4b1..443b8ec 100644 --- a/man/sc3_calc_consens.Rd +++ b/man/sc3_calc_consens.Rd @@ -1,21 +1,21 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_calc_consens} \alias{sc3_calc_consens} \alias{sc3_calc_consens.SingleCellExperiment} \alias{sc3_calc_consens,} \alias{sc3_calc_consens,SingleCellExperiment-method} -\alias{sc3_calc_consens,SingleCellExperiment-method} -\alias{sc3_calc_consens} \title{Calculate consensus matrix.} \usage{ -sc3_calc_consens.SingleCellExperiment(object) +sc3_calc_consens.SingleCellExperiment(object, BPPARAM) -\S4method{sc3_calc_consens}{SingleCellExperiment}(object) +\S4method{sc3_calc_consens}{SingleCellExperiment}(object, BPPARAM = BiocParallel::bpparam()) } \arguments{ \item{object}{an object of \code{SingleCellExperiment} class} + +\item{BPPARAM}{a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +including a seed for random number generator.} } \value{ an object of \code{SingleCellExperiment} class @@ -23,15 +23,15 @@ an object of \code{SingleCellExperiment} class \description{ This function calculates consensus matrices based on the clustering solutions contained in the \code{kmeans} item of the \code{sc3} slot of the \code{metadata(object)}. It then -creates and populates the \code{consensus} item of the \code{sc3} slot with +creates and populates the \code{consensus} item of the \code{sc3} slot with consensus matrices, their hierarchical clusterings in \code{hclust} objects, -and Silhouette indeces of the clusters. It also removes the previously +and Silhouette indeces of the clusters. It also removes the previously calculated \code{kmeans} clusterings from the \code{sc3} slot, as they are not needed for further analysis. } \details{ Additionally, it also adds new columns to the \code{colData} slot of the input \code{object}. The column names correspond to the consensus cell labels -and have the following format: \code{sc3_k_clusters}, where \code{k} is the +and have the following format: \code{sc3_k_clusters}, where \code{k} is the number of clusters. } diff --git a/man/sc3_calc_dists.Rd b/man/sc3_calc_dists.Rd index 53337a0..6c439a6 100644 --- a/man/sc3_calc_dists.Rd +++ b/man/sc3_calc_dists.Rd @@ -1,21 +1,21 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_calc_dists} \alias{sc3_calc_dists} \alias{sc3_calc_dists.SingleCellExperiment} \alias{sc3_calc_dists,} \alias{sc3_calc_dists,SingleCellExperiment-method} -\alias{sc3_calc_dists,SingleCellExperiment-method} -\alias{sc3_calc_dists} \title{Calculate distances between the cells.} \usage{ -sc3_calc_dists.SingleCellExperiment(object) +sc3_calc_dists.SingleCellExperiment(object, BPPARAM) -\S4method{sc3_calc_dists}{SingleCellExperiment}(object) +\S4method{sc3_calc_dists}{SingleCellExperiment}(object, BPPARAM = BiocParallel::bpparam()) } \arguments{ \item{object}{an object of \code{SingleCellExperiment} class} + +\item{BPPARAM}{a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +including a seed for random number generator.} } \value{ an object of \code{SingleCellExperiment} class diff --git a/man/sc3_calc_transfs.Rd b/man/sc3_calc_transfs.Rd index 04b8a37..5da93eb 100644 --- a/man/sc3_calc_transfs.Rd +++ b/man/sc3_calc_transfs.Rd @@ -1,32 +1,32 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_calc_transfs} \alias{sc3_calc_transfs} \alias{sc3_calc_transfs.SingleCellExperiment} \alias{sc3_calc_transfs,} \alias{sc3_calc_transfs,SingleCellExperiment-method} -\alias{sc3_calc_transfs,SingleCellExperiment-method} -\alias{sc3_calc_transfs} \title{Calculate transformations of the distance matrices.} \usage{ -sc3_calc_transfs.SingleCellExperiment(object) +sc3_calc_transfs.SingleCellExperiment(object, BPPARAM) -\S4method{sc3_calc_transfs}{SingleCellExperiment}(object) +\S4method{sc3_calc_transfs}{SingleCellExperiment}(object, BPPARAM = BiocParallel::bpparam()) } \arguments{ \item{object}{an object of \code{SingleCellExperiment} class} + +\item{BPPARAM}{a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +including a seed for random number generator.} } \value{ an object of \code{SingleCellExperiment} class } \description{ -This function transforms all \code{distances} items of the \code{sc3} slot of -the \code{metadata(object)} using either principal component analysis (PCA) +This function transforms all \code{distances} items of the \code{sc3} slot of +the \code{metadata(object)} using either principal component analysis (PCA) or by calculating the eigenvectors of the associated graph Laplacian. -The columns of the resulting matrices are then sorted in descending order -by their corresponding eigenvalues. The first \code{d} columns -(where \code{d = max(metadata(object)$sc3$n_dim)}) of each transformation are then +The columns of the resulting matrices are then sorted in descending order +by their corresponding eigenvalues. The first \code{d} columns +(where \code{d = max(metadata(object)$sc3$n_dim)}) of each transformation are then written to the \code{transformations} item of the \code{sc3} slot. Additionally, this function also removes the previously calculated \code{distances} from the \code{sc3} slot, as they are not needed for further analysis. diff --git a/man/sc3_estimate_k.Rd b/man/sc3_estimate_k.Rd index 46dc3e6..9b4a7ec 100644 --- a/man/sc3_estimate_k.Rd +++ b/man/sc3_estimate_k.Rd @@ -1,12 +1,9 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_estimate_k} \alias{sc3_estimate_k} \alias{sc3_estimate_k.SingleCellExperiment} \alias{sc3_estimate_k,SingleCellExperiment-method} -\alias{sc3_estimate_k,SingleCellExperiment-method} -\alias{sc3_estimate_k} \title{Estimate the optimal number of cluster \code{k} for a scRNA-Seq expression matrix} \usage{ sc3_estimate_k.SingleCellExperiment(object) diff --git a/man/sc3_export_results_xls.Rd b/man/sc3_export_results_xls.Rd index 6de31ef..b6dca7b 100644 --- a/man/sc3_export_results_xls.Rd +++ b/man/sc3_export_results_xls.Rd @@ -1,17 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_export_results_xls} \alias{sc3_export_results_xls} \alias{sc3_export_results_xls.SingleCellExperiment} \alias{sc3_export_results_xls,SingleCellExperiment-method} -\alias{sc3_export_results_xls} \title{Write \code{SC3} results to Excel file} \usage{ sc3_export_results_xls.SingleCellExperiment(object, filename) -\S4method{sc3_export_results_xls}{SingleCellExperiment}(object, - filename = "sc3_results.xls") +\S4method{sc3_export_results_xls}{SingleCellExperiment}(object, filename = "sc3_results.xls") } \arguments{ \item{object}{an object of \code{SingleCellExperiment} class} diff --git a/man/sc3_interactive.Rd b/man/sc3_interactive.Rd index f2434b1..ad39254 100644 --- a/man/sc3_interactive.Rd +++ b/man/sc3_interactive.Rd @@ -1,13 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ShinyMethods.R -\docType{methods} \name{sc3_interactive} \alias{sc3_interactive} \alias{sc3_interactive.SingleCellExperiment} \alias{sc3_interactive,} \alias{sc3_interactive,SingleCellExperiment-method} -\alias{sc3_interactive,SingleCellExperiment-method} -\alias{sc3_interactive} \title{Opens \code{SC3} results in an interactive session in a web browser.} \usage{ sc3_interactive.SingleCellExperiment(object) diff --git a/man/sc3_kmeans.Rd b/man/sc3_kmeans.Rd index aa48727..92776ee 100644 --- a/man/sc3_kmeans.Rd +++ b/man/sc3_kmeans.Rd @@ -1,30 +1,30 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_kmeans} \alias{sc3_kmeans} \alias{sc3_kmeans.SingleCellExperiment} \alias{sc3_kmeans,} \alias{sc3_kmeans,SingleCellExperiment-method} -\alias{sc3_kmeans,SingleCellExperiment-method} -\alias{sc3_kmeans} \title{\code{kmeans} clustering of cells.} \usage{ -sc3_kmeans.SingleCellExperiment(object, ks) +sc3_kmeans.SingleCellExperiment(object, ks, BPPARAM) -\S4method{sc3_kmeans}{SingleCellExperiment}(object, ks = NULL) +\S4method{sc3_kmeans}{SingleCellExperiment}(object, ks = NULL, BPPARAM = BiocParallel::bpparam()) } \arguments{ \item{object}{an object of \code{SingleCellExperiment} class} \item{ks}{a continuous range of integers - the number of clusters \code{k} to be used for SC3 clustering. Can also be a single integer.} + +\item{BPPARAM}{a \code{\link[BiocParallel]{BiocParallelParam}} object specifying a type of parallelism and allocated resources, +including a seed for random number generator.} } \value{ an object of \code{SingleCellExperiment} class } \description{ -This function performs \code{\link[stats]{kmeans}} clustering of the matrices +This function performs \code{\link[stats]{kmeans}} clustering of the matrices contained in the \code{transformations} item of the \code{sc3} slot of the \code{metadata(object)}. It then creates and populates the following items of the \code{sc3} slot: \itemize{ diff --git a/man/sc3_plot_cluster_stability.Rd b/man/sc3_plot_cluster_stability.Rd index 3bb2b41..e963375 100644 --- a/man/sc3_plot_cluster_stability.Rd +++ b/man/sc3_plot_cluster_stability.Rd @@ -1,13 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PlotMethods.R -\docType{methods} \name{sc3_plot_cluster_stability} \alias{sc3_plot_cluster_stability} \alias{sc3_plot_cluster_stability.SingleCellExperiment} \alias{sc3_plot_cluster_stability,} \alias{sc3_plot_cluster_stability,SingleCellExperiment-method} -\alias{sc3_plot_cluster_stability,SingleCellExperiment-method} -\alias{sc3_plot_cluster_stability} \title{Plot stability of the clusters} \usage{ sc3_plot_cluster_stability.SingleCellExperiment(object, k) diff --git a/man/sc3_plot_consensus.Rd b/man/sc3_plot_consensus.Rd index b359342..8a9c3c3 100644 --- a/man/sc3_plot_consensus.Rd +++ b/man/sc3_plot_consensus.Rd @@ -1,19 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PlotMethods.R -\docType{methods} \name{sc3_plot_consensus} \alias{sc3_plot_consensus} \alias{sc3_plot_consensus.SingleCellExperiment} \alias{sc3_plot_consensus,} \alias{sc3_plot_consensus,SingleCellExperiment-method} -\alias{sc3_plot_consensus,SingleCellExperiment-method} -\alias{sc3_plot_consensus} \title{Plot consensus matrix as a heatmap} \usage{ sc3_plot_consensus.SingleCellExperiment(object, k, show_pdata) -\S4method{sc3_plot_consensus}{SingleCellExperiment}(object, k, - show_pdata = NULL) +\S4method{sc3_plot_consensus}{SingleCellExperiment}(object, k, show_pdata = NULL) } \arguments{ \item{object}{an object of 'SingleCellExperiment' class} diff --git a/man/sc3_plot_de_genes.Rd b/man/sc3_plot_de_genes.Rd index 8e7d97a..bff4d5f 100644 --- a/man/sc3_plot_de_genes.Rd +++ b/man/sc3_plot_de_genes.Rd @@ -1,19 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PlotMethods.R -\docType{methods} \name{sc3_plot_de_genes} \alias{sc3_plot_de_genes} \alias{sc3_plot_de_genes.SingleCellExperiment} \alias{sc3_plot_de_genes,} \alias{sc3_plot_de_genes,SingleCellExperiment-method} -\alias{sc3_plot_de_genes,SingleCellExperiment-method} -\alias{sc3_plot_de_genes} \title{Plot expression of DE genes of the clusters identified by \code{SC3} as a heatmap} \usage{ sc3_plot_de_genes.SingleCellExperiment(object, k, p.val, show_pdata) -\S4method{sc3_plot_de_genes}{SingleCellExperiment}(object, k, p.val = 0.01, - show_pdata = NULL) +\S4method{sc3_plot_de_genes}{SingleCellExperiment}(object, k, p.val = 0.01, show_pdata = NULL) } \arguments{ \item{object}{an object of 'SingleCellExperiment' class} diff --git a/man/sc3_plot_expression.Rd b/man/sc3_plot_expression.Rd index b728c28..6b91783 100644 --- a/man/sc3_plot_expression.Rd +++ b/man/sc3_plot_expression.Rd @@ -1,19 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PlotMethods.R -\docType{methods} \name{sc3_plot_expression} \alias{sc3_plot_expression} \alias{sc3_plot_expression.SingleCellExperiment} \alias{sc3_plot_expression,} \alias{sc3_plot_expression,SingleCellExperiment-method} -\alias{sc3_plot_expression,SingleCellExperiment-method} -\alias{sc3_plot_expression} \title{Plot expression matrix used for SC3 clustering as a heatmap} \usage{ sc3_plot_expression.SingleCellExperiment(object, k, show_pdata) -\S4method{sc3_plot_expression}{SingleCellExperiment}(object, k, - show_pdata = NULL) +\S4method{sc3_plot_expression}{SingleCellExperiment}(object, k, show_pdata = NULL) } \arguments{ \item{object}{an object of 'SingleCellExperiment' class} diff --git a/man/sc3_plot_markers.Rd b/man/sc3_plot_markers.Rd index 50a0028..29c203e 100644 --- a/man/sc3_plot_markers.Rd +++ b/man/sc3_plot_markers.Rd @@ -1,19 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PlotMethods.R -\docType{methods} \name{sc3_plot_markers} \alias{sc3_plot_markers} \alias{sc3_plot_markers.SingleCellExperiment} \alias{sc3_plot_markers,} \alias{sc3_plot_markers,SingleCellExperiment-method} -\alias{sc3_plot_markers,SingleCellExperiment-method} -\alias{sc3_plot_markers} \title{Plot expression of marker genes identified by \code{SC3} as a heatmap.} \usage{ sc3_plot_markers.SingleCellExperiment(object, k, auroc, p.val, show_pdata) -\S4method{sc3_plot_markers}{SingleCellExperiment}(object, k, auroc = 0.85, - p.val = 0.01, show_pdata = NULL) +\S4method{sc3_plot_markers}{SingleCellExperiment}(object, k, auroc = 0.85, p.val = 0.01, show_pdata = NULL) } \arguments{ \item{object}{an object of 'SingleCellExperiment' class} diff --git a/man/sc3_plot_silhouette.Rd b/man/sc3_plot_silhouette.Rd index 0f0f0c9..76467f2 100644 --- a/man/sc3_plot_silhouette.Rd +++ b/man/sc3_plot_silhouette.Rd @@ -1,13 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PlotMethods.R -\docType{methods} \name{sc3_plot_silhouette} \alias{sc3_plot_silhouette} \alias{sc3_plot_silhouette.SingleCellExperiment} \alias{sc3_plot_silhouette,} \alias{sc3_plot_silhouette,SingleCellExperiment-method} -\alias{sc3_plot_silhouette,SingleCellExperiment-method} -\alias{sc3_plot_silhouette} \title{Plot silhouette indexes of the cells} \usage{ sc3_plot_silhouette.SingleCellExperiment(object, k) diff --git a/man/sc3_prepare.Rd b/man/sc3_prepare.Rd index c24bf56..0b15fee 100644 --- a/man/sc3_prepare.Rd +++ b/man/sc3_prepare.Rd @@ -1,63 +1,72 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_prepare} \alias{sc3_prepare} \alias{sc3_prepare.SingleCellExperiment} \alias{sc3_prepare,SingleCellExperiment-method} -\alias{sc3_prepare,SingleCellExperiment-method} -\alias{sc3_prepare} \title{Prepare the \code{SingleCellExperiment} object for \code{SC3} clustering.} \usage{ -sc3_prepare.SingleCellExperiment(object, gene_filter, pct_dropout_min, - pct_dropout_max, d_region_min, d_region_max, svm_num_cells, svm_train_inds, - svm_max, n_cores, kmeans_nstart, kmeans_iter_max, rand_seed) +sc3_prepare.SingleCellExperiment( + object, + gene_filter, + pct_dropout_min, + pct_dropout_max, + d_region_min, + d_region_max, + svm_num_cells, + svm_train_inds, + svm_max, + kmeans_nstart, + kmeans_iter_max +) -\S4method{sc3_prepare}{SingleCellExperiment}(object, gene_filter = TRUE, - pct_dropout_min = 10, pct_dropout_max = 90, d_region_min = 0.04, - d_region_max = 0.07, svm_num_cells = NULL, svm_train_inds = NULL, - svm_max = 5000, n_cores = NULL, kmeans_nstart = NULL, - kmeans_iter_max = 1e+09, rand_seed = 1) +\S4method{sc3_prepare}{SingleCellExperiment}( + object, + gene_filter = TRUE, + pct_dropout_min = 10, + pct_dropout_max = 90, + d_region_min = 0.04, + d_region_max = 0.07, + svm_num_cells = NULL, + svm_train_inds = NULL, + svm_max = 5000, + kmeans_nstart = NULL, + kmeans_iter_max = 1e+09 +) } \arguments{ \item{object}{an object of \code{SingleCellExperiment} class.} -\item{gene_filter}{a boolen variable which defines whether to perform gene +\item{gene_filter}{a boolen variable which defines whether to perform gene filtering before SC3 clustering.} -\item{pct_dropout_min}{if \code{gene_filter = TRUE}, then genes with percent of dropouts smaller than +\item{pct_dropout_min}{if \code{gene_filter = TRUE}, then genes with percent of dropouts smaller than \code{pct_dropout_min} are filtered out before clustering.} -\item{pct_dropout_max}{if \code{gene_filter = TRUE}, then genes with percent of dropouts larger than +\item{pct_dropout_max}{if \code{gene_filter = TRUE}, then genes with percent of dropouts larger than \code{pct_dropout_max} are filtered out before clustering.} -\item{d_region_min}{defines the minimum number of eigenvectors used for +\item{d_region_min}{defines the minimum number of eigenvectors used for kmeans clustering as a fraction of the total number of cells. Default is \code{0.04}. See \code{SC3} paper for more details.} -\item{d_region_max}{defines the maximum number of eigenvectors used for +\item{d_region_max}{defines the maximum number of eigenvectors used for kmeans clustering as a fraction of the total number of cells. Default is \code{0.07}. See \code{SC3} paper for more details.} -\item{svm_num_cells}{number of randomly selected training cells to be used +\item{svm_num_cells}{number of randomly selected training cells to be used for SVM prediction. The default is \code{NULL}.} -\item{svm_train_inds}{a numeric vector defining indeces of training cells +\item{svm_train_inds}{a numeric vector defining indeces of training cells that should be used for SVM training. The default is \code{NULL}.} \item{svm_max}{define the maximum number of cells below which SVM is not run.} -\item{n_cores}{defines the number of cores to be used on the user's machine. If not set, `SC3` will use all but one cores of your machine.} - -\item{kmeans_nstart}{nstart parameter passed to \code{\link[stats]{kmeans}} function. Default is +\item{kmeans_nstart}{nstart parameter passed to \code{\link[stats]{kmeans}} function. Default is \code{1000} for up to \code{2000} cells and \code{50} for more than \code{2000} cells.} -\item{kmeans_iter_max}{iter.max parameter passed to \code{\link[stats]{kmeans}} +\item{kmeans_iter_max}{iter.max parameter passed to \code{\link[stats]{kmeans}} function. Default is \code{1e+09}.} - -\item{rand_seed}{sets the seed of the random number generator. \code{SC3} is a stochastic -method, so setting the \code{rand_seed} to a fixed values can be used for reproducibility -purposes.} } \value{ an object of \code{SingleCellExperiment} class @@ -70,11 +79,9 @@ creates and populates the following items of the \code{sc3} slot of the \code{me \item \code{kmeans_nstart} - the same as the \code{kmeans_nstart} argument. \item \code{n_dim} - contains numbers of the number of eigenvectors to be used in \code{\link[stats]{kmeans}} clustering. - \item \code{rand_seed} - the same as the \code{rand_seed} argument. - \item \code{svm_train_inds} - if SVM is used this item contains indexes of the + \item \code{svm_train_inds} - if SVM is used this item contains indexes of the training cells to be used for SC3 clustering and further SVM prediction. \item \code{svm_study_inds} - if SVM is used this item contains indexes of the cells to be predicted by SVM. - \item \code{n_cores} - the same as the \code{n_cores} argument. } } diff --git a/man/sc3_run_svm.Rd b/man/sc3_run_svm.Rd index 11bd707..dc8a961 100644 --- a/man/sc3_run_svm.Rd +++ b/man/sc3_run_svm.Rd @@ -1,13 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoreMethods.R -\docType{methods} \name{sc3_run_svm} \alias{sc3_run_svm} \alias{sc3_run_svm.SingleCellExperiment} \alias{sc3_run_svm,} \alias{sc3_run_svm,SingleCellExperiment-method} -\alias{sc3_run_svm,SingleCellExperiment-method} -\alias{sc3_run_svm} \title{Run the hybrid \code{SVM} approach.} \usage{ sc3_run_svm.SingleCellExperiment(object, ks) @@ -25,13 +22,13 @@ an object of \code{SingleCellExperiment} class } \description{ This method parallelize \code{SVM} prediction for each \code{k} (the number -of clusters). Namely, for each \code{k}, \code{\link{support_vector_machines}} +of clusters). Namely, for each \code{k}, \code{\link{support_vector_machines}} function is utilized to predict the labels of study cells. Training cells are selected using \code{svm_train_inds} item of the \code{sc3} slot of the \code{metadata(object)}. } \details{ -Results are written to the \code{sc3_k_clusters} columns to the -\code{colData} slot of the input \code{object}, where \code{k} is the +Results are written to the \code{sc3_k_clusters} columns to the +\code{colData} slot of the input \code{object}, where \code{k} is the number of clusters. } diff --git a/man/yan.Rd b/man/yan.Rd index e02a0ee..a0db34d 100644 --- a/man/yan.Rd +++ b/man/yan.Rd @@ -4,7 +4,9 @@ \name{yan} \alias{yan} \title{Single cell RNA-Seq data extracted from a publication by Yan et al.} -\format{An object of class \code{data.frame} with 20214 rows and 90 columns.} +\format{ +An object of class \code{data.frame} with 20214 rows and 90 columns. +} \source{ \url{http://dx.doi.org/10.1038/nsmb.2660} diff --git a/vignettes/SC3.Rmd b/vignettes/SC3.Rmd index 40eec2d..aa356bf 100644 --- a/vignettes/SC3.Rmd +++ b/vignettes/SC3.Rmd @@ -54,7 +54,7 @@ sce <- SingleCellExperiment( assays = list( counts = as.matrix(yan), logcounts = log2(as.matrix(yan) + 1) - ), + ), colData = ann ) @@ -70,6 +70,23 @@ sce <- runPCA(sce) plotPCA(sce, colour_by = "cell_type1") ``` +## Speeding up the computations + +`SC3` is using the [BiocParallel](https://bioconductor.org/packages/release/bioc/html/BiocParallel.html) package for parallel computations. +To utilize its capabilities, you have to create an object of class `BiocParallelParam`, which defines the parallel backend, its allocated +resources, and also a seed for random number generator. The most common is `SnowParam()`: +```{r} +library(BiocParallel) +BPPARAM <- SnowParam(workers = 4, type = "SOCK", RNGseed = 1, progressbar = TRUE) +bpstart(BPPARAM) +BiocParallel::register(BPPARAM) +``` + +Instead of calling the `register()` function you can also directly pass `BPPARAM` to `SC3` methods which support it. + +> `type = "SOCK"` works on all platforms, because it uses external R processes to distribute the parallel computations. +> On the other hand, `type = "FORK"` works only on Unix platforms, and could be faster in exchange for increased memory consumption. + ## Run SC3 If you would like to explore clustering of your data in the range of `k`s (the number of clusters) from 2 to 4, you just need to run the main `sc3` method and define the range of `k`s using the `ks` parameter (here we also ask `SC3` to calculate biological features based on the identified cell clusters): @@ -77,8 +94,6 @@ If you would like to explore clustering of your data in the range of `k`s (the n sce <- sc3(sce, ks = 2:4, biology = TRUE) ``` -> By default `SC3` will use all but one cores of your machine. You can manually set the number of cores to be used by setting the `n_cores` parameter in the `sc3` call. - To quickly and easily explore the `SC3` solutions using an interactive Shiny application use the following method: ```{r, eval=FALSE} sc3_interactive(sce) @@ -104,8 +119,8 @@ Additionally, having `SC3` results stored in the same slot makes it possible to ```{r} sce <- runPCA(sce) plotPCA( - sce, - colour_by = "sc3_3_clusters", + sce, + colour_by = "sc3_3_clusters", size_by = "sc3_3_log2_outlier_score" ) ``` @@ -123,7 +138,7 @@ Because the biological features were also calculated for each `k`, one can find # Number of Сells -The default settings of `SC3` allow to cluster (using a single `k`) a dataset of 2,000 cells in about 20-30 minutes. +The default settings of `SC3` allow to cluster (using a single `k`) a dataset of 2,000 cells in about 20-30 minutes. For datasets with more than 2,000 cells `SC3` automatically adjusts some of its parameters (see below). This allows to cluster a dataset of 5,000 cells in about 20-30 minutes. The parameters can also be manually adjusted for datasets with any number of cells. @@ -143,11 +158,11 @@ sc3_plot_consensus(sce, k = 3) It is also possible to annotate cells (columns of the consensus matrix) with any column of the `colData` slot of the `sce` object. ```{r, fig.height=6, fig.width=8} sc3_plot_consensus( - sce, k = 3, + sce, k = 3, show_pdata = c( - "cell_type1", + "cell_type1", "log10_total_features", - "sc3_3_clusters", + "sc3_3_clusters", "sc3_3_log2_outlier_score" ) ) @@ -170,11 +185,11 @@ sc3_plot_expression(sce, k = 3) It is also possible to annotate cells (columns of the expression matrix) with any column of the `colData` slot of the `sce` object. ```{r, fig.height=6, fig.width=8} sc3_plot_expression( - sce, k = 3, + sce, k = 3, show_pdata = c( - "cell_type1", + "cell_type1", "log10_total_features", - "sc3_3_clusters", + "sc3_3_clusters", "sc3_3_log2_outlier_score" ) ) @@ -197,11 +212,11 @@ sc3_plot_de_genes(sce, k = 3) It is also possible to annotate cells (columns of the matrix containing DE genes) with any column of the `colData` slot of the `sce` object. ```{r, fig.height=9, fig.width=8} sc3_plot_de_genes( - sce, k = 3, + sce, k = 3, show_pdata = c( - "cell_type1", + "cell_type1", "log10_total_features", - "sc3_3_clusters", + "sc3_3_clusters", "sc3_3_log2_outlier_score" ) ) @@ -217,11 +232,11 @@ sc3_plot_markers(sce, k = 3) It is also possible to annotate cells (columns of the matrix containing marker genes) with any column of the `colData` slot of the `sce` object. ```{r, fig.height=6, fig.width=8} sc3_plot_markers( - sce, k = 3, + sce, k = 3, show_pdata = c( - "cell_type1", + "cell_type1", "log10_total_features", - "sc3_3_clusters", + "sc3_3_clusters", "sc3_3_log2_outlier_score" ) ) @@ -243,15 +258,13 @@ Let us go through each of them independently. ## `sc3_prepare` -We start with `sc3_prepare`. This method prepares an object of `sce` class for `SC3` clustering. This method also defines all parameters needed for clustering and stores them in the `sc3` slot. The parameters have their own defaults but can be manually changed. For more information on the parameters please use `?sc3_prepare`. +We start with `sc3_prepare`. This method prepares an object of `sce` class for `SC3` clustering. This method also defines all parameters needed for clustering and stores them in the `sc3` slot. The parameters have their own defaults but can be manually changed. For more information on the parameters please use `?sc3_prepare`. ```{r} sce <- sc3_prepare(sce) str(metadata(sce)$sc3) ``` -> By default `SC3` will use all but one cores of your machine. You can manually set the number of cores to be used by setting the `n_cores` parameter in the `sc3_prepare` call. - ## _(optional)_ `sc3_estimate_k` When the `sce` object is prepared for clustering, `SC3` can also estimate the optimal number of clusters `k` in the dataset. `SC3` utilizes the Tracy-Widom theory on random matrices to estimate `k`. `sc3_estimate_k` method creates and populates the following items of the `sc3` slot: @@ -274,7 +287,7 @@ names(metadata(sce)$sc3$distances) ## `sc3_calc_transfs` -Next the distance matrices are transformed using PCA and graph Laplacian. Method `sc3_calc_transfs` calculates transforamtions of the distance matrices contained in +Next the distance matrices are transformed using PCA and graph Laplacian. Method `sc3_calc_transfs` calculates transforamtions of the distance matrices contained in the `distances` item of the `sc3` slot. It then creates and populates the following items of the `sc3` slot: * `transformations` - contains a list of transformations of the distance matrices corresponding to PCA and graph Laplacian transformations.