Skip to content

Commit

Permalink
Package update during manuscript revision
Browse files Browse the repository at this point in the history
  • Loading branch information
mvfki committed May 21, 2024
1 parent e654702 commit 7e89f55
Show file tree
Hide file tree
Showing 23 changed files with 3,017 additions and 540 deletions.
11 changes: 8 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CytoSimplex
Type: Package
Title: Simplex Visualization of Cell Fate Similarity in Single-Cell Data
Version: 0.1.1
Version: 0.1.99
Authors@R: c(
person(given = 'Yichen', family = 'Wang', email = '[email protected]',
role = c('aut', 'cre'), comment = c(ORCID = "0000-0003-4347-5199")),
Expand All @@ -22,26 +22,31 @@ LazyData: true
Depends:
stats,
methods,
grDevices,
R (>= 3.6)
LinkingTo: Rcpp, RcppArmadillo
biocViews:
Imports:
cli,
ggplot2,
Matrix,
plot3D,
plotly,
RColorBrewer,
Rcpp,
rlang,
viridis
Suggests:
hdf5r,
knitr,
magick,
patchwork,
rgl,
rmarkdown,
Seurat,
SeuratObject,
SingleCellExperiment,
SummarizedExperiment,
testthat (>= 3.0.0)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Config/testthat/edition: 3
VignetteBuilder: knitr
10 changes: 6 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(`[`,colorArg)
S3method(colNormalize,Seurat)
S3method(colNormalize,SingleCellExperiment)
S3method(colNormalize,default)
Expand All @@ -11,22 +12,23 @@ S3method(plotBinary,simMat)
S3method(plotQuaternary,Seurat)
S3method(plotQuaternary,SingleCellExperiment)
S3method(plotQuaternary,default)
S3method(plotQuaternary,simMat)
S3method(plotTernary,Seurat)
S3method(plotTernary,SingleCellExperiment)
S3method(plotTernary,default)
S3method(plotTernary,simMat)
S3method(print,plist)
S3method(print,quatPlot)
S3method(selectTopFeatures,Seurat)
S3method(selectTopFeatures,SingleCellExperiment)
S3method(selectTopFeatures,default)
export(colNormalize)
export(plotBinary)
export(plotQuaternary)
export(plotTernary)
export(readH5ADObsNames)
export(readH5ADObsVar)
export(readH5ADUnsSpMat)
export(readVelocytoLoom)
export(selectTopFeatures)
export(writeQuaternaryGIF)
exportMethods(show)
import(ggplot2)
importFrom(Matrix,colSums)
importFrom(Matrix,rowMeans)
Expand Down
14 changes: 11 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
## CytoSimplex 0.1.0
## CytoSimplex 0.1.99

- Initial release version.
- Added support for coloring dots by categorical or continuous variable, with options to use customized or built-in color palettes.
- Changed interactive 3D support of quaternary simplex plot from *rgl* to *plotly*. Now the default is `interactive = TRUE` for `plotQuaternary()` and it returns a `plotly` object.
- Added interactive support for ternary simplex plot, using plotly, triggered with `interactive = TRUE`
- Fixed wilcoxon bug
- Added `readH5ADObsNames()`, `readH5ADObsVar()`, `readH5ADUnsSpMat()`, `readVelocytoLoom()` for loading commonly seen necessary information from H5AD and LOOM files of RNA velocity analysis
- Added example tutorial for HSPC analysis in the manuscript

## CytoSimplex 0.1.1

- Fix vignette GIF picture linking issue
- Fixed vignette GIF picture linking issue

## CytoSimplex 0.1.0

- Initial release version.
218 changes: 218 additions & 0 deletions R/hdf5Access.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
#' Extract a variable from adata.obs stored in an H5AD file
#' @description
#' Primarily designed for fetching the annotation used for visualization.
#' @param filename File path to the H5AD file.
#' @param obsKey The variable name to extract, must use only one character
#' string.
#' @param named Logical, whether to name the vector with cell IDs that came from
#' \code{adata.obs_names}. Default \code{TRUE}.
#' @param categoricalAsFactor Logical, whether to convert categorical variables
#' to factors. Default \code{TRUE}.
#' @return A vector of the extracted variable, or a factor if the variable is
#' encoded to be categorical and \code{categoricalAsFactor = TRUE}.
#' @export
#' @family H5AD-reader
#' @examples
#' \dontrun{
#' h5adFile <- "path/to/analysis.h5ad"
#' cluster <- readH5ADObsVar(h5adFile, "leiden")
#' }
readH5ADObsVar <- function(
filename,
obsKey,
named = TRUE,
categoricalAsFactor = TRUE
) {
adataObj <- .openH5AD(filename, ".h5ad")
obs <- adataObj[['obs']]
idsCol <- hdf5r::h5attr(obs, "_index")
ids <- obs[[idsCol]][]
if (length(obsKey) != 1 || !is.character(obsKey)) {
cli::cli_abort("{.field obsKey} must be a single character string.")
}
if (!obsKey %in% names(obs)) {
cli::cli_abort("obs key {.val {obsKey}} not found in {.code adata.obs.columns}.")
}
variable <- obs[[obsKey]]

encodingType <- hdf5r::h5attr(variable, "encoding-type")
if (encodingType == "array") {
value <- variable[]
} else if (encodingType == "string-array") {
value <- variable[]
} else if (encodingType == "categorical") {
categories <- variable[['categories']][]
# +1 to address Python 0-based indexing
codes <- variable[['codes']][] + 1
value <- categories[codes]
if (isTRUE(categoricalAsFactor)) {
value <- factor(value, levels = categories)
}
} else {
cli::cli_abort("Unsupported encoding type {.val {encodingType}}.")
}
if (isTRUE(named)) names(value) <- ids
adataObj$close_all()
return(value)
}

#' Extract a sparse matrix from adata.uns stored in an H5AD file
#' @description
#' Primarily designed for fetching the velocity data presented as a cell-cell
#' transition graph.
#' @inheritParams readH5ADObsVar
#' @param unsKey The \code{adata.uns} key to extract, must use only one
#' character string.
#' @return A CSC-matrix of "dgCMatrix" class
#' @export
#' @family H5AD-reader
#' @examples
#' \dontrun{
#' h5adFile <- "path/to/analysis.h5ad"
#' velo <- readH5ADUnsSpMat(h5adFile, "velo_s_norm_graph")
#' }
readH5ADUnsSpMat <- function(
filename,
unsKey
) {
adataObj <- .openH5AD(filename, ".h5ad")
uns <- adataObj[['uns']]
if (length(unsKey) != 1 || !is.character(unsKey)) {
cli::cli_abort("{.field unsKey} must be a single character string.")
}
if (!unsKey %in% names(uns)) {
cli::cli_abort("uns key {.val {unsKey}} not found in {.code adata.uns.keys()}.")
}
spMat <- uns[[unsKey]]
encodingType <- hdf5r::h5attr(spMat, "encoding-type")
if (encodingType != "csr_matrix") {
cli::cli_abort("This function only extracts {.val csr-matrix} encoded data.")
}
i <- spMat[['indices']][] + 1
p <- spMat[['indptr']][]
x <- spMat[['data']][]
# Returning csc-matrix from csr-matrix transposes the matrix
dims <- rev(hdf5r::h5attr(spMat, "shape"))
spMat <- Matrix::sparseMatrix(i = i, p = p, x = x, dims = dims)
adataObj$close_all()
return(spMat)
}

#' Extract `adata.obs_names` from an H5AD file
#' @description
#' It frequently happens that velocity analyses stored in H5AD files do not
#' contain the full raw count data suggested for CytoSimplex visualization.
#' Extracting the cell IDs (e.g. barcodes) helps matching the velocity data to
#' raw count data imported from other sources.
#' @inheritParams readH5ADObsVar
#' @return A character vector of cell IDs.
#' @export
#' @family H5AD-reader
#' @examples
#' \dontrun{
#' h5adFile <- "path/to/analysis.h5ad"
#' cellIDs <- readH5ADObsNames(h5adFile)
#' }
readH5ADObsNames <- function(
filename
) {
adataObj <- .openH5AD(filename, '.h5ad')
obs <- adataObj[['obs']]
idsCol <- hdf5r::h5attr(obs, "_index")
ids <- obs[[idsCol]][]
adataObj$close_all()
return(ids)
}

#' Extract the raw counts from a LOOM file
#' @description
#' This function is primarily designed for fetching the raw count data from a
#' LOOM file, output by \href{https://velocyto.org/}{Velocyto}. We by default
#' use the spliced counts.
#' @details
#' The velocyto output LOOM file is HDF5 based and is roughly organized as
#' follows:
#' \itemize{
#' \item{\code{"matrix"}: The whole raw counts, which is the sum of spliced, unspliced
#' and ambiguous counts.}
#' \item{layers: A group like a folder
#' \itemize{
#' \item{\code{"layers/spliced"}: The spliced counts.}
#' \item{\code{"layers/unspliced"}: The unspliced counts.}
#' \item{\code{"layers/ambiguous"}: The ambiguous counts.}
#' }
#' }
#' }
#'
#' An AnnData object created with Scanpy by default loads the data with a
#' different structure, so that all the four matrices are accessible in
#' \code{adata.layers} and set one of them (by default \code{"layers/spliced"})
#' to \code{adata.X}.
#'
#' @param filename File path to the LOOM file.
#' @param matrixPath A path in the LOOM file to the matrix to extract, following
#' the inner HDF5 structure. Default \code{"layers/spliced"}. See Details.
#' @param cellID The name of the cell ID column in the LOOM column-attributes.
#' The same thing as argument \code{obs_names} of \code{scanpy.read_loom}.
#' Default \code{"CellID"}.
#' @param featureID The name of the feature ID column in the LOOM
#' row-attributes. The same thing as argument \code{var_names} of
#' \code{scanpy.read_loom}. Default \code{"Gene"}.
#' @param chunkSize The maximum size of the chunk to load the matrix. Default
#' 1000.
#' @return A sparse matrix of class "dgCMatrix", with cells as columns and genes
#' as rows.
#' @export
#' @family H5AD-reader
#' @examples
#' \dontrun{
#' loomFile <- "velocyto/out/analysis.loom"
#' rawCounts <- readVelocytoLoom(loomFile)
#' }
readVelocytoLoom <- function(
filename,
matrixPath = "layers/spliced",
cellID = "CellID",
featureID = "Gene",
chunkSize = 1000
) {
loom <- .openH5AD(filename, ".loom")
cellIDs <- loom[[file.path("col_attrs", cellID)]][]
featureIDs <- loom[[file.path("row_attrs", featureID)]][]
matH5D <- loom[[matrixPath]]
chunkDims <- matH5D$chunk_dims
cellChunkDims <- chunkDims[1]
chunkSize <- chunkSize - chunkSize%%cellChunkDims
nChunks <- ceiling(length(cellIDs) / chunkSize)
spMat <- NULL
cli::cli_progress_bar(name = "Loading from LOOM", total = nChunks)
for (i in seq_len(nChunks)) {
start <- (i - 1) * chunkSize + 1
end <- min(i * chunkSize, length(cellIDs))
chunkMat <- matH5D[start:end, ]
chunkMat <- t(chunkMat)
chunkMat <- methods::as(chunkMat, "CsparseMatrix")
spMat <- cbind(spMat, chunkMat)
cli::cli_progress_update(set = i)
}
cli::cli_process_done()
dimnames(spMat) <- list(featureIDs, cellIDs)
return(spMat)
}


.openH5AD <- function(filename, checkExt) {
if (!file.exists(filename)) {
cli::cli_abort("{checkExt} File not found: {.file {filename}}")
}
if (!endsWith(filename, toupper(checkExt)) &&
!endsWith(filename, tolower(checkExt))) {
cli::cli_alert_warning("File extension is not {.var {checkExt}}, opening anyway.")
}
if (!requireNamespace("hdf5r", quietly = TRUE)) {
cli::cli_abort("Package {.pkg hdf5r} is required for extracting data from H5AD files.")
}
return(hdf5r::H5File$new(filename, mode = "r"))
}


Loading

0 comments on commit 7e89f55

Please sign in to comment.