Skip to content

Commit

Permalink
feat: add featureValues,XcmsExperimentHdf5
Browse files Browse the repository at this point in the history
  • Loading branch information
jorainer committed Oct 29, 2024
1 parent f8fb0e0 commit 71129d6
Show file tree
Hide file tree
Showing 8 changed files with 385 additions and 11 deletions.
105 changes: 103 additions & 2 deletions R/XcmsExperimentHdf5-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,108 @@ NULL
}


################################################################################
##
## FEATURES THINGS
##
################################################################################

#' Extracts feature values for one sample summing intensities for features
#' with multiple peaks assigned.
#'
#' @param hdf5_file `character(1)` with the HDF5 file name.
#'
#' @param sample_id `character(1)` with the sample ID.
#'
#' @param ms_level `integer(1)` with the MS level.
#'
#' @param n_features `integer(1)` with the total number of features for that
#' MS level.
#'
#' @param method `character(1)` defining the method to be used to tackle
#' features with multiple peaks.
#'
#' @param col_idx `integer` with the index of the peak columns that should be
#' loaded and processed by the functions. The first index **must** be the
#' index of the `value` column (i.e. the column that should be reported).
#' For `method = "maxint"`, the second column should be the column defined
#' with parameter `intensity`, i.e. the column with the intensity values
#' to select the *larger* peak. For `method = "rtmed"` it should be the
#' index of the column `"rt"`.
#'
#' @param filled `logical(1)` whether gap-filled values should be reported or
#' removed.
#'
#' @param rtmed `numeric` with the `"rtmed"` column of the feature definitions.
#' Only used (but required) for `method = "medret"`.
#'
#' @noRd
.h5_feature_values_sample <- function(sample_id, hdf5_file, ms_level,
n_features, method,
col_idx = integer(),
filled = TRUE, rtmed, ...) {
res <- rep(NA_real_, n_features)
sid <- paste0("/", sample_id, "/ms_", ms_level)
vals <- .h5_read_data(hdf5_file, sample_id, name = "chrom_peaks",
ms_level = ms_level, column = col_idx)[[1L]]
fidx <- .h5_read_data(hdf5_file, sample_id, name = "feature_to_chrom_peaks",
ms_level = ms_level)[[1L]]
## remove gap-filled values
if (!filled) {
is_filled <- rhdf5::h5read(hdf5_file,
paste0(sid, "/chrom_peak_data/is_filled"),
drop = TRUE)
vals[is_filled, 1L] <- NA_real_
}
## set/assign single and multiple values.
res[fidx[, 1L]] <- vals[fidx[, 2L], 1L]
if (method == "medret") {
## calculate difference between feature and peak rt
vals[fidx[, 2L], 2L] <- vals[fidx[, 2L], 2L] - rtmed[fidx[, 1L]]
}
## handle duplicates
f <- factor(fidx[, 1L], levels = seq_len(n_features))
pk_idx <- split(fidx[, 2L], f)
idx_multi <- which(lengths(pk_idx) > 1L)
FUN <- switch(
method,
sum = function(z) sum(vals[z, 1L]),
maxint = function(z) vals[z, 1L][which.max(vals[z, 2L])],
medret = function(z) vals[z, 1L][which.min(abs(vals[z, 2L]))])
res[idx_multi] <- vapply(pk_idx[idx_multi], FUN, 1.1)
res
}

#' Get feature values for a specific MS level.
#'
#' @noRd
.h5_feature_values_ms_level <- function(ms_level, x, method, value, intensity,
filled = TRUE) {
cn <- h5read(x@hdf5_file, paste0("/", x@sample_id[1L], "/ms_", ms_level,
"/chrom_peaks_colnames"), drop = TRUE)
col <- switch(method,
sum = value,
medret = c(value, "rt"),
maxint = c(value, intensity))
if (!all(col %in% cn))
stop("Not all requested columns available. Please make sure 'value' ",
"and 'intensity' (if defined) are available columns in the ",
"chrom peak matrix.", call. = FALSE)
col_idx <- match(col, cn)
rtmed <- rhdf5::h5read(x@hdf5_file,
paste0("/features/ms_", ms_level,
"/feature_definitions/rtmed"), drop = TRUE)
rn <- rhdf5::h5read(x@hdf5_file,
paste0("/features/ms_", ms_level,
"/feature_definitions_rownames"), drop = TRUE)
res <- do.call(
cbind, lapply(x@sample_id, .h5_feature_values_sample,
hdf5_file = x@hdf5_file, ms_level = ms_level,
n_features = length(rtmed), method = method,
col_idx = col_idx, filled = filled, rtmed = rtmed))
rownames(res) <- rn
res
}

################################################################################
##
Expand Down Expand Up @@ -709,7 +810,7 @@ NULL
h5 <- rhdf5::H5Fopen(h5_file)
on.exit(invisible(rhdf5::H5Fclose(h5)))
FUN <- .h5_write_matrix
if (name %in% c("chrom_peak_data", "feature_definition"))
if (name %in% c("chrom_peak_data", "feature_definitions"))
FUN <- .h5_write_data_frame
comp_level <- .h5_compression_level()
for (i in seq_along(data_list)) {
Expand All @@ -725,4 +826,4 @@ NULL
replace = replace)
}
.h5_increment_mod_count(h5)
}
}
85 changes: 82 additions & 3 deletions R/XcmsExperimentHdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#' @title xcms result object for very large data sets
#'
#' @aliases XcmsExperimentHdf5-class
#'
#' @name XcmsExperimentHdf5
#'
#' @description
Expand Down Expand Up @@ -40,6 +42,18 @@
#' chromatographic peak in the chrom peak matrix **of that sample** and
#' MS level.
#'
#' @section Correspondence analysis results:
#'
#' - `featureDefinitions()`: similarly to `featureDefinitions()` for
#' [XcmsExperiment] objects, this method returns a `data.frame` with the
#' characteristics for the defined LC-MS features. The function for
#' `XcmsExperimentHdf5` does however **not** return the `"peakidx"` column
#' with the indices of the chromatographic peaks per feature. Also, the
#' columns are usually returned in alphabetic order.
#'
#' - `featureValues()`: parameter `value = "index"` (i.e. returning the index
#' of the chromatographic peaks per feature) is not supported.
#'
#' @author Johannes Rainerr, Philippine Louail
NULL

Expand Down Expand Up @@ -483,7 +497,8 @@ setMethod(
res <- .xmse_group_cpeaks(cps, param = param)
if (!nrow(res))
return(object)
object@features_ms_level <- unique(c(object@features_ms_level, msLevel))
object@features_ms_level <- as.integer(
unique(c(object@features_ms_level, msLevel)))
cpk_idx <- res$peakidx
res$peakidx <- NULL
rownames(res) <- .featureIDs(
Expand Down Expand Up @@ -524,5 +539,69 @@ setMethod(
object
})

## featureDefinitions
## featureValues; of only one MS level?
#' @rdname hidden_aliases
setReplaceMethod("featureDefinitions", "XcmsExperimentHdf5",
function(object, value) {
stop("Not implemented for ", class(object)[1L])
})

#' @rdname hidden_aliases
setMethod(
"featureDefinitions", "XcmsExperimentHdf5",
function(object, mz = numeric(), rt = numeric(), ppm = 0,
type = c("any", "within", "apex_within"), msLevel = integer()) {
if (!hasFeatures(object))
return(object@featureDefinitions)
type <- match.arg(type)
if (length(msLevel))
msLevel <- intersect(msLevel, object@features_ms_level)
else msLevel <- object@features_ms_level
if (!length(msLevel))
return(object@featureDefinitions)
fd <- .h5_read_data(object@hdf5_file, rep("features", length(msLevel)),
name = "feature_definitions", ms_level = msLevel,
read_rownames = TRUE)
msl <- rep(msLevel, vapply(fd, nrow, 1L))
fd <- do.call(rbind, fd)
fd$ms_level <- msl
.subset_feature_definitions(fd, mz = mz, rt = rt,
ppm = ppm, type = type)
})

#' @rdname hidden_aliases
setMethod(
"featureValues", "XcmsExperimentHdf5",
function(object, method = c("medret", "maxint", "sum"), value = "into",
intensity = "into", filled = TRUE, missing = NA_real_,
msLevel = integer()) {
if (!length(msLevel)) msLevel <- object@features_ms_level
if (!hasFeatures(object, msLevel = msLevel))
stop("No feature definitions for MS level(s) ", msLevel," present.")
method <- match.arg(method)
if (value == "index")
stop("'featureValues' for 'XcmsExperimentHdf5' does not support ",
"'value = \"index\"'.", call. = FALSE)
if (is.character(missing) && !(missing %in% c("rowmin_half")))
stop("if 'missing' is not 'NA' or a numeric it should",
" be one of: \"rowmin_half\".")
msLevel <- intersect(msLevel, object@features_ms_level)
vals <- do.call(
rbindFill,
lapply(msLevel, .h5_feature_values_ms_level, x = object,
method = method, value = value, intensity = intensity,
filled = filled)
)
colnames(vals) <- basename(fileNames(object))
missing <- missing[1L]
if (!is.na(missing)) {
if (is.numeric(missing))
vals[is.na(vals)] <- missing
if (missing == "rowmin_half")
for (i in seq_len(nrow(vals))) {
nas <- is.na(vals[i, ])
if (any(nas))
vals[i, nas] <- min(vals[i, ], na.rm = TRUE) / 2
}
}
vals
})
5 changes: 4 additions & 1 deletion man/XcmsExperiment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 26 additions & 3 deletions man/XcmsExperimentHdf5.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/findChromPeaks.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 71129d6

Please sign in to comment.