Skip to content

Commit

Permalink
Add chromPeakData,XcmsExperimentHdf5 function
Browse files Browse the repository at this point in the history
  • Loading branch information
jorainer committed Nov 11, 2024
1 parent 9e1a1a6 commit 9255c63
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 61 deletions.
135 changes: 118 additions & 17 deletions R/XcmsExperimentHdf5-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -306,13 +306,6 @@ NULL
ms_level = msl, read_colnames = TRUE,
read_rownames = TRUE, j = idx_columns,
rt = rt, mz = mz, ppm = ppm, type = type)
## ## Might be better (memory wise) to pass this to the import function
## ## instead
## if (length(mz) | length(rt))
## res <- lapply(res, function(z, rt, mz, ppm, type) {
## z[.is_chrom_peaks_within_mz_rt(
## z, rt = rt, mz = mz, ppm = ppm, type = type), , drop = FALSE]
## }, rt = rt, mz = mz, ppm = ppm, type = type)
if (by_sample) {
names(res) <- ids
res
Expand All @@ -322,8 +315,40 @@ NULL
}
}

.h5_chrom_peak_data <- function(x, columns = character(), by_sample = TRUE) {
## LLLL implement; following .h5_chrom_peaks
#' Extract the `chromPeakData` data.frame. Using `peaks` allows to reduce memory
#' demand because only data from the specified chrom peaks is returned. This
#' assumes that `chromPeaks()` was called before to get the IDs of the peaks.
#'
#' @param x `XcmsExperimentHdf5`
#'
#' @param columns optional `character()` to define the columns to extract.
#'
#' @param peaks optional `character()` to define selected chromatographic peaks
#' for which the data should be returned. If not specified data for all
#' chrom peaks is returned.
#'
#' @param by_sample `logical(1)` whether results should be `rbind` or returned
#' as a `list` of `data.frame`.
#'
#' @noRd
.h5_chrom_peak_data <- function(x, msLevel = integer(), columns = character(),
peaks = character(), by_sample = TRUE) {
ids <- rep(x@sample_id, length(msLevel))
msl <- rep(msLevel, each = length(x@sample_id))
## Eventually pass chrom peak ids along to read only specicic data...
res <- .h5_read_data(x@hdf5_file, id = ids, name = "chrom_peak_data",
ms_level = msl, read_rownames = TRUE, peaks = peaks)
if (by_sample) {
names(res) <- ids
res <- mapply(FUN = function(a, b) {
a$ms_level <- b
a
}, res, msl, SIMPLIFY = FALSE)
res
} else {
l <- vapply(res, nrow, 1L)
cbind(do.call(rbind, res), ms_level = rep(msl, l))
}
}

.h5_chrom_peaks_colnames <- function(x, msLevel = 1L) {
Expand All @@ -333,6 +358,14 @@ NULL
drop = TRUE)
}

.h5_chrom_peak_data_colnames <- function(x, msLevel = 1L) {
h5 <- rhdf5::H5Fopen(x@hdf5_file)
on.exit(rhdf5::H5Fclose(h5))
c(.h5_dataset_names(
paste0("/", x@sample_id[1L], "/ms_", msLevel, "/chrom_peak_data"), h5),
"ms_level")
}

#' Replace the retention times of chrom peaks with new values, depending
#' on the provided rts. This function is used during retention time alignment
#'
Expand Down Expand Up @@ -520,6 +553,65 @@ NULL
################################################################################


################################################################################
##
## EIC/CHROMATOGRAMS FUNCTIONALITY
##
################################################################################
#' Read chromatograms for a set of samples (chunk) and adds chromatographic
#' peaks.
#'
#' @param x `XcmsExperimentHdf5` for one subset/chunk of data from which the
#' data should be extracted
#'
#' @param index `integer` with the index of the current subset `x` in the *full*
#' data set.
#'
#' @param ms_level `integer(1)` with the MS level.
#' @noRd
.h5_x_chromatogram <- function(x, index = seq_along(x), ms_level = 1L,
mz, rt, ppm = 0, chromPeaks = "any",
BPPARAM = bpparam()) {
## Get the chromatograms in parallel.
chr <- as(chromatogram(as(x, "MsExperiment"), mz = mz,
rt = rt, BPPARAM = BPPARAM), "XChromatograms")
js <- seq_len(nrow(chr))
message("Processing chromatographic peaks")
pb <- progress_bar$new(format = paste0("[:bar] :current/:",
"total (:percent) in ",
":elapsed"),
total = ncol(chr) + 1L, clear = FALSE)
for (i in seq_along(x)) {
cp <- .h5_read_data(x@hdf5_file, x@sample_id[i], "chrom_peaks",
ms_level = ms_level, read_colnames = TRUE,
read_rownames = TRUE)[[1L]]
for (j in js) {
idx <- which(.is_chrom_peaks_within_mz_rt(
cp, rt[j, ], mz[j, ], ppm, chromPeaks), useNames = FALSE)
a <- cbind(cp[idx, , drop = FALSE], sample = rep(i, length(idx)))
b <- .h5_read_data(
x@hdf5_file, x@sample_id[i], "chrom_peak_data",
ms_level = ms_level, read_colnames = TRUE, i = idx,
read_rownames = FALSE)[[1L]]
b$ms_level <- rep(ms_level, length(idx))
rownames(b) <- rownames(a)
tmp <- chr@.Data[j, i][[1L]]
slot(tmp, "chromPeaks", check = FALSE) <- a
slot(tmp, "chromPeakData", check = FALSE) <- as(b, "DataFrame")
chr@.Data[j, i][[1L]] <- tmp
}
pb$tick()
}
pb$tick()
if (hasFeatures(x, ms_level)) {
stop("Not yet implemented")
## Somehow add features.
}
chr@.processHistory <- x@processHistory
chr
}


################################################################################
##
## HDF5 FUNCTIONALITY
Expand Down Expand Up @@ -587,9 +679,9 @@ NULL
read_rownames = FALSE,
rownames = paste0(name, "_rownames")) {
d <- rhdf5::h5read(h5, name = name)
if (length(index[[1L]]))
if (!is.null(index[[1L]]))
d <- d[index[[1L]], , drop = FALSE]
if (length(index[[2L]]))
if (!is.null(index[[2L]]))
d <- d[, index[[2L]], drop = FALSE]
if (read_rownames)
rownames(d) <- rhdf5::h5read(h5, name = rownames, drop = TRUE,
Expand Down Expand Up @@ -646,15 +738,24 @@ NULL
d <- as.data.frame(d)
if (read_rownames)
rownames(d) <- rhdf5::h5read(h5, rownames, drop = TRUE)
if (length(index[[1L]]))
d[index[[1L]], , drop = FALSE]
else d
if (is.null(index[[1L]]))
d
else d[index[[1L]], , drop = FALSE]
}

.h5_read_chrom_peak_data <- function(name, h5, index = list(NULL, NULL),
read_rownames = FALSE, ...) {
.h5_read_data_frame(name, h5, read_rownames = read_rownames, index = index,
rownames = sub("_data", "s_rownames", name))
read_rownames = FALSE, peaks = character(),
...) {
cd <- .h5_read_data_frame(
name, h5, read_rownames = read_rownames || length(peaks) > 0,
index = index, rownames = sub("_data", "s_rownames", name))
if (length(peaks)) {
idx <- match(peaks, rownames(cd))
cd <- cd[idx[!is.na(idx)], , drop = FALSE]
if (!read_rownames)
rownames(cd) <- NULL
}
cd
}

#' Reads the names of the data sets of a group. This can for example be used
Expand Down
100 changes: 62 additions & 38 deletions R/XcmsExperimentHdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@
#' chromatographic peak in the chrom peak matrix **of that sample** and
#' MS level.
#'
#' @section Functionality related to chromatographic peaks:
#'
#' - `chromPeakData()` gains a new parameter `peaks` which allows to specify
#' from which chromatographic peaks data should be returned. For these
#' chromatographic peaks the ID (row name in `chromPeaks()`) should be
#' provided with the `peaks` parameter. This can reduce the memory
#' requirement for cases in which only data of some selected chromatographic
#' peaks needs to be extracted.
#'
#' @section Retention time alignment:
#'
#' - `adjustRtimePeakGroups()` and `adjustRtime()` with `PeakGroupsParam`:
Expand Down Expand Up @@ -245,7 +254,6 @@ setMethod(
msl <- object@chrom_peaks_ms_level
if (length(msLevel))
msl <- msl[msl %in% msLevel]
## Eventually run chunk-wise?
.h5_chrom_peaks(object, msLevel = msl, columns = columns,
by_sample = FALSE, mz = mz, rt = rt, ppm = ppm,
type = type)
Expand All @@ -258,16 +266,24 @@ setReplaceMethod(
stop("Not implemented for ", class(object)[1L])
})

#' @rdname hidden_aliases
#' @rdname XcmsExperimentHdf5
setMethod(
"chromPeakData", "XcmsExperimentHdf5",
function(object, msLevel = integer(), sample = integer(),
function(object, msLevel = integer(), peaks = character(),
return.type = c("DataFrame", "data.frame")) {
return.type <- match.arg(return.type)
stop("Not implemented for ", class(object)[1L])
## if (return.type == "DataFrame")
## as(.chromPeakData(object, msLevel = msLevel), "DataFrame")
## else .chromPeakData(object, msLevel = msLevel)
if (!length(object))
return(as(object@chromPeakData, return.type))
.h5_check_mod_count(object@hdf5_file, object@hdf5_mod_count)
if (!length(msLevel))
msLevel <- object@chrom_peaks_ms_level
if (!hasChromPeaks(object, msLevel = msLevel))
return(as(object@chromPeakData, return.type))
if (return.type == "DataFrame")
as(.h5_chrom_peak_data(object, msLevel, peaks = peaks,
by_sample = FALSE), "DataFrame")
else .h5_chrom_peak_data(object, msLevel, peaks = peaks,
by_sample = FALSE)
})

## #' @rdname refineChromPeaks
Expand Down Expand Up @@ -683,34 +699,42 @@ setMethod(
#' - get chrom peaks for each sample/chrom peak.
#'
#' @noRd
## #' @rdname hidden_aliases
## setMethod(
## "chromatogram", "XcmsExperimentHdf5",
## function(object, rt = matrix(nrow = 0, ncol = 2),
## mz = matrix(nrow = 0, ncol = 2), aggregationFun = "sum",
## msLevel = 1L, chunkSize = 2L, isolationWindowTargetMz = NULL,
## return.type = c("XChromatograms", "MChromatograms"),
## include = character(),
## chromPeaks = c("apex_within", "any", "none"),
## BPPARAM = bpparam()) {
## if (!is.matrix(rt)) rt <- matrix(rt, ncol = 2L)
## if (!is.matrix(mz)) mz <- matrix(mz, ncol = 2L)
## if (length(include)) {
## warning("Parameter 'include' is deprecated, please use ",
## "'chromPeaks' instead")
## chromPeaks <- include
## }
## if (nrow(mz) && !nrow(rt))
## rt <- cbind(rep(-Inf, nrow(mz)), rep(Inf, nrow(mz)))
## if (nrow(rt) && !nrow(mz))
## mz <- cbind(rep(-Inf, nrow(rt)), rep(Inf, nrow(rt)))
## return.type <- match.arg(return.type)
## chromPeaks <- match.arg(chromPeaks)
## if (hasAdjustedRtime(object))
## object <- applyAdjustedRtime(object)
## .xmse_extract_chromatograms_old(
## object, rt = rt, mz = mz, aggregationFun = aggregationFun,
## msLevel = msLevel, isolationWindow = isolationWindowTargetMz,
## chunkSize = chunkSize, chromPeaks = chromPeaks,
## return.type = return.type, BPPARAM = BPPARAM)
## })
#' @rdname hidden_aliases
setMethod(
"chromatogram", "XcmsExperimentHdf5",
function(object, rt = matrix(nrow = 0, ncol = 2),
mz = matrix(nrow = 0, ncol = 2), aggregationFun = "sum",
msLevel = 1L, chunkSize = 2L, isolationWindowTargetMz = NULL,
return.type = c("XChromatograms", "MChromatograms"),
include = character(),
chromPeaks = c("apex_within", "any", "none"),
BPPARAM = bpparam()) {
if (!is.matrix(rt)) rt <- matrix(rt, ncol = 2L)
if (!is.matrix(mz)) mz <- matrix(mz, ncol = 2L)
if (length(include)) {
warning("Parameter 'include' is deprecated, please use ",
"'chromPeaks' instead")
chromPeaks <- include
}
if (nrow(mz) && !nrow(rt))
rt <- cbind(rep(-Inf, nrow(mz)), rep(Inf, nrow(mz)))
if (nrow(rt) && !nrow(mz))
mz <- cbind(rep(-Inf, nrow(rt)), rep(Inf, nrow(rt)))
return.type <- match.arg(return.type)
chromPeaks <- match.arg(chromPeaks)
if (hasAdjustedRtime(object))
object <- applyAdjustedRtime(object)
## process the data in chunks.
## in each chunk: get chromatograms, load chrom peaks and process those.
## ? how to get/define the features too? get the feature indices?
## Implementation notes:
## XChromatogram has slots @chromPeaks (matrix) @chromPeakData (DataFrame)
## XChromatograms has slot @featureDefinitions (DataFrame)


.xmse_extract_chromatograms_old(
object, rt = rt, mz = mz, aggregationFun = aggregationFun,
msLevel = msLevel, isolationWindow = isolationWindowTargetMz,
chunkSize = chunkSize, chromPeaks = chromPeaks,
return.type = return.type, BPPARAM = BPPARAM)
})
Loading

0 comments on commit 9255c63

Please sign in to comment.