Skip to content

Commit

Permalink
Merge branch 'main' into update-license
Browse files Browse the repository at this point in the history
  • Loading branch information
davidpross authored Apr 5, 2023
2 parents 2973a51 + c06abd0 commit 0973e6b
Show file tree
Hide file tree
Showing 52 changed files with 685 additions and 284 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.Rproj.user
*.Rproj
.Rhistory
.RData
.Ruserdata
Expand Down
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Title: An R package for performing cell typing in SMI and other single cell data
Version: 1.0.0
Authors@R: c(person("Patrick", "Danaher", email = "[email protected]", role = c("aut", "cre")),
person("Zhi", "Yang", email = "[email protected]", role = c("aut")),
person("David", "Ross", email = "[email protected]", role = c("aut")))
person("David", "Ross", email = "[email protected]", role = c("aut", "cre")))
Description: Insitutype is an algorithm for performing cell typing in single cell
spatial transcriptomics data, such as is generated by the CosMx platform.
It can perform supervised cell typing from reference profiles, unsupervised clustering,
Expand All @@ -15,7 +15,6 @@ Imports:
scales,
umap,
ggplot2,
lsa,
irlba,
mclust,
sparseMatrixStats,
Expand All @@ -38,5 +37,5 @@ Suggests:
VignetteBuilder: knitr
Depends:
R (>= 3.5.0)
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
LinkingTo: Rcpp, RcppArmadillo
17 changes: 0 additions & 17 deletions InSituType.Rproj

This file was deleted.

3 changes: 1 addition & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ export(chooseClusterNumber)
export(choose_anchors_from_stats)
export(colorCellTypes)
export(fastCohorting)
export(fast_lldist)
export(find_anchor_cells)
export(flightpath_layout)
export(flightpath_plot)
export(get_anchor_stats)
export(insitutype)
export(insitutypeML)
export(lls)
export(numCores)
export(refineClusters)
export(updateProfilesFromAnchors)
export(updateReferenceProfiles)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
* License updated
* lldist parallelized with OpenMP

# InSituType 0.99.4

* Re-submission to Bioconductor

# InSituType 0.99.3

* Merge subclustering fix
Expand Down
8 changes: 4 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#' Probability density function of the negative binomial distribution (written in C++)
#'
#' @param mat dgCMatrix expression counts
#' @param s numeric scaling factor
#' @param x numeric expression for reference profile
#' @param bgsub vector of background expression per cell
#' @param x numeric expression for reference profiles
#' @param bg numeric background level
#' @param size_dnb int Dispersion parameter
#'
Expand All @@ -16,7 +16,7 @@
#' @importFrom Rcpp evalCpp
#' @exportPattern "^[[:alpha:]]+"
#' @export
lls <- function(mat, s, x, bg, size_dnb) {
.Call(`_InSituType_lls`, mat, s, x, bg, size_dnb)
fast_lldist <- function(mat, bgsub, x, bg, size_dnb) {
.Call(`_InSituType_fast_lldist`, mat, bgsub, x, bg, size_dnb)
}

15 changes: 8 additions & 7 deletions R/chooseClusterNumber.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
#' \itemize{
#' \item
#' }
#' @examples
#' data("mini_nsclc")
#' chooseClusterNumber(mini_nsclc$counts, Matrix::rowMeans(mini_nsclc$neg),
#' n_clust = 2:5)
chooseClusterNumber <-
function(counts,
neg,
Expand Down Expand Up @@ -101,13 +105,10 @@ chooseClusterNumber <-
max_iters = max_iters)

# get the loglik of the clustering result:
loglik_thisclust <- parallel::mclapply(asplit(tempclust$profiles, 2),
lldist,
mat = counts,
bg = bg,
size = nb_size,
mc.cores = numCores())
loglik_thisclust <- do.call(cbind, loglik_thisclust)
loglik_thisclust <- lldist(x = tempclust$profiles,
mat = counts,
bg = bg,
size = nb_size)
total_loglik_this_clust <- sum(apply(loglik_thisclust, 1, max))
return(total_loglik_this_clust)
})
Expand Down
13 changes: 13 additions & 0 deletions R/colorCellTypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@
#' @return A named color vector
#' @importFrom grDevices col2rgb colors
#' @export
#' @examples
#' data("mini_nsclc")
#' unsup <- insitutype(
#' x = mini_nsclc$counts,
#' neg = Matrix::rowMeans(mini_nsclc$neg),
#' n_clusts = 8,
#' n_phase1 = 200,
#' n_phase2 = 500,
#' n_phase3 = 2000,
#' n_starts = 1,
#' max_iters = 5
#' ) # choosing inadvisably low numbers to speed the vignette; using the defaults in recommended.
#' colorCellTypes(freqs = table(unsup$clust), palette = "brewers")
#'
colorCellTypes <- function(names = NULL, freqs = NULL, init_colors = NULL, max_sum_rgb = 600,
palette = "earthplus") {
Expand Down
7 changes: 7 additions & 0 deletions R/fastCohorting.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
#' @importFrom mclust predict.Mclust
#' @importFrom mclust mclustBIC
#' @importFrom stats qnorm
#' @examples
#' data("mini_nsclc")
#' ## simulate immunofluorescence data:
#' immunofluordata <- matrix(rpois(n = nrow(mini_nsclc$counts) * 4, lambda = 100),
#' nrow(mini_nsclc$counts))
#' cohort <- fastCohorting(immunofluordata, gaussian_transform = TRUE)
#' table(cohort)
fastCohorting <- function(mat, n_cohorts = NULL, gaussian_transform = TRUE) {

if (any(is.na(mat))) {
Expand Down
50 changes: 44 additions & 6 deletions R/find_anchor_cells.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@
#' @param min_cosine Cells must have at least this much cosine similarity to a fixed profile to be used as an anchor.
#' @return A list with two elements: cos, the matrix of cosine distances;
#' and llr, the matrix of log likelihood ratios of each cell under each cell type vs. the 2nd best cell type.
#' @importFrom lsa cosine
#' @export
#' @examples
#' data("ioprofiles")
#' data("mini_nsclc")
#' get_anchor_stats(counts = mini_nsclc$counts,
#' neg = Matrix::rowMeans(mini_nsclc$neg),
#' profiles = ioprofiles)
get_anchor_stats <- function(counts, neg = NULL, bg = NULL, align_genes = TRUE,
profiles, size = 10,
min_cosine = 0.3) {
Expand Down Expand Up @@ -64,11 +69,13 @@ get_anchor_stats <- function(counts, neg = NULL, bg = NULL, align_genes = TRUE,
}

# get cosine distances:
cos <- matrix(NA, nrow(counts), ncol(profiles),
dimnames = list(rownames(counts), colnames(profiles)))
cos <- sapply(colnames(profiles), function(cell) {
cos[, cell] <- apply(counts, 1, cosine, profiles[, cell])
})
cos_numerator <- counts %*% profiles
counts2 <- as(counts, "dgCMatrix")
counts2@x <- counts2@x^2
rs <- sqrt(Matrix::rowSums(counts2))
ps <- sqrt(Matrix::colSums(profiles^2))
cos_denominator <- (t(t(rs))) %*% ps
cos <- as.matrix(cos_numerator / cos_denominator)

# stats for which cells to get loglik on:
# get 3rd hightest cosines of each cell:
Expand Down Expand Up @@ -128,6 +135,30 @@ get_anchor_stats <- function(counts, neg = NULL, bg = NULL, align_genes = TRUE,
#' @return A vector holding anchor cell assignments (or NA) for each cell in the
#' counts matrix
#' @export
#' @examples
#' data("ioprofiles")
#' data("mini_nsclc")
#' counts <- mini_nsclc$counts
#' astats <- get_anchor_stats(counts = counts,
#' neg = Matrix::rowMeans(mini_nsclc$neg),
#' profiles = ioprofiles)
#'
#' ## estimate per-cell bg as a fraction of total counts:
#' negmean.per.totcount <- mean(rowMeans(mini_nsclc$neg)) / mean(rowSums(counts))
#' per.cell.bg <- rowSums(counts) * negmean.per.totcount
#'
#' # now choose anchors:
#' choose_anchors_from_stats(counts = counts,
#' neg = Matrix::rowMeans(mini_nsclc$neg),
#' bg = per.cell.bg,
#' anchorstats = astats,
#' # a very low value chosen for the mini
#' # dataset. Typically hundreds of cells
#' # would be better.
#' n_cells = 50,
#' min_cosine = 0.4,
#' min_scaled_llr = 0.03,
#' insufficient_anchors_thresh = 5)
choose_anchors_from_stats <-
function(counts,
neg = NULL,
Expand Down Expand Up @@ -230,6 +261,13 @@ choose_anchors_from_stats <-
#' counts matrix
#' @importFrom lsa cosine
#' @export
#' @examples
#' data("ioprofiles")
#' data("mini_nsclc")
#' sharedgenes <- intersect(colnames(mini_nsclc$counts), rownames(ioprofiles))
#' find_anchor_cells(counts = mini_nsclc$counts[, sharedgenes],
#' neg = Matrix::rowMeans(mini_nsclc$neg),
#' profiles = ioprofiles)
find_anchor_cells <- function(counts, neg = NULL, bg = NULL, align_genes = TRUE,
profiles, size = 10, n_cells = 500,
min_cosine = 0.3, min_scaled_llr = 0.01,
Expand Down
33 changes: 32 additions & 1 deletion R/flightpath_layout.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,19 @@
#' @importFrom umap umap
#' @importFrom stats rnorm
#' @export
#' @examples
#' data("mini_nsclc")
#' unsup <- insitutype(
#' x = mini_nsclc$counts,
#' neg = Matrix::rowMeans(mini_nsclc$neg),
#' n_clusts = 8,
#' n_phase1 = 200,
#' n_phase2 = 500,
#' n_phase3 = 2000,
#' n_starts = 1,
#' max_iters = 5
#' ) # choosing inadvisably low numbers to speed the vignette; using the defaults in recommended.
#' flightpath_layout(logliks = unsup$logliks, profiles = unsup$profiles)
flightpath_layout <- function(logliks = NULL, probs = NULL, profiles = NULL, cluster_xpos = NULL, cluster_ypos = NULL) {

if (is.null(probs) && is.null(logliks)) {
Expand Down Expand Up @@ -90,7 +103,19 @@ flightpath_layout <- function(logliks = NULL, probs = NULL, profiles = NULL, clu
#'@return a ggplot object
#'
#'@export
#'
#'@examples
#' data("ioprofiles")
#' unsup <- insitutype(
#' x = mini_nsclc$counts,
#' neg = Matrix::rowMeans(mini_nsclc$neg),
#' n_clusts = 8,
#' n_phase1 = 200,
#' n_phase2 = 500,
#' n_phase3 = 2000,
#' n_starts = 1,
#' max_iters = 5
#' ) # choosing inadvisably low numbers to speed the vignette; using the defaults in recommended.
#' flightpath_plot(insitutype_result = unsup)
flightpath_plot <- function(flightpath_result = NULL, insitutype_result = NULL, col = NULL, showclusterconfidence = TRUE){

# get the flightpath results to use
Expand Down Expand Up @@ -163,6 +188,12 @@ flightpath_plot <- function(flightpath_result = NULL, insitutype_result = NULL,
#' Calculate the mean confidence of the cell calls from each cluster
#' @param probs Matrix of probabilities
#' @return a vector of mean confidences, with values of 1 corresponding to clusters with only prob == 1
#' @examples
#' data("mini_nsclc")
#' probs <- sapply(rownames(mini_nsclc$counts), function(x) {a = runif(10); a/sum(a)})
#' dimnames(probs)[[1]] <- letters[1:10]
#' probs <- t(probs)
#' getMeanClusterConfidence(probs)
getMeanClusterConfidence <- function(probs) {

maxprobs <- apply(probs, 1, max, na.rm = TRUE)
Expand Down
41 changes: 16 additions & 25 deletions R/geoSketch.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
#' @param counts Counts matrix
#' @return A matrix of data for geoSketch, with cells in rows and features in columns
#' @importFrom irlba prcomp_irlba
#' @examples
#' data("mini_nsclc")
#' prepDataForSketching(mini_nsclc$counts)
prepDataForSketching <- function(counts) {
# get PCs:
scaling_factors <- pmax(sparseMatrixStats::colQuantiles(counts, probs = 0.99), 5)
Expand All @@ -37,20 +40,16 @@ prepDataForSketching <- function(counts) {
#' cell
#' @param minCellsPerBin the minimum number of cells required for a bin to be
#' considered for sampling
#' @param seed set seed for random sampling
#'
#' @return Plaid, a named vector of binIDs where names correspond to cellIDs
#' @examples
#' data("mini_nsclc")
#' geoSketch_get_plaid(mini_nsclc$counts, 100)
geoSketch_get_plaid <- function(X, N,
alpha=0.1,
max_iter=200,
returnBins=FALSE,
minCellsPerBin = 1,
seed=NULL) {
# Define seed for sampling if given
if (!is.null(seed)) {
set.seed(seed)
}

minCellsPerBin = 1) {
# Determine the total number of cells and compare it to the desired sample size
nCells <- nrow(X)

Expand Down Expand Up @@ -120,17 +119,14 @@ geoSketch_get_plaid <- function(X, N,
#' Sample cells, trying to give each plaid equal representation
#' @param Plaid Vector of cells' plaid IDs
#' @param N desired sample size
#' @param seed set seed for random sampling
#'
#' @return Plaid, a named vector of binIDs where names correspond to cellIDs
#' @return sampledCells, a vector of cellIDs sampled using the geometric sketching method
geoSketch_sample_from_plaids <- function(Plaid, N, seed=NULL) {

# Define seed for sampling if given
if (!is.null(seed)){
set.seed(seed)
}

#' @examples
#' data("mini_nsclc")
#' plaids <- geoSketch_get_plaid(mini_nsclc$counts, 100)
#' geoSketch_sample_from_plaids(plaids, 5)
geoSketch_sample_from_plaids <- function(Plaid, N) {
# define cells' sampling probabilities as the inverse of their plaid size:
PlaidCounts <- table(Plaid) # Count the number of cells per bin
prob <- 1 / PlaidCounts[Plaid]
Expand All @@ -152,22 +148,17 @@ geoSketch_sample_from_plaids <- function(Plaid, N, seed=NULL) {
#' @param max_iter maximum number of iterations used to achieve an acceptable minimum number of bins
#' @param returnBins determines whether or not to pass back bin labels for each cell
#' @param minCellsPerBin the minimum number of cells required for a bin to be considered for sampling
#' @param seed set seed for random sampling
#'
#' @return sampledCells, a vector of cellIDs sampled using the geometric sketching method
#' @return Plaid, a named vector of binIDs where names correspond to cellIDs
#' @examples
#' data("mini_nsclc")
#' geoSketch(mini_nsclc$counts, 200)
geoSketch <- function(X, N,
alpha=0.1,
max_iter=200,
returnBins=FALSE,
minCellsPerBin = 1,
seed=NULL) {

# Define seed for sampling if given
if (!is.null(seed)) {
set.seed(seed)
}

minCellsPerBin = 1) {
# Determine the total number of cells and compare it to the desired sample size
nCells <- nrow(X)

Expand Down
Loading

0 comments on commit 0973e6b

Please sign in to comment.