Skip to content

Commit

Permalink
Add functionality related to retention time alignment
Browse files Browse the repository at this point in the history
  • Loading branch information
jorainer committed Nov 6, 2024
1 parent eafadff commit 3dc7b28
Show file tree
Hide file tree
Showing 15 changed files with 310 additions and 169 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ export(
"do_groupChromPeaks_nearest",
"do_adjustRtime_peakGroups",
"processHistoryTypes",
"adjustRtimePeakGroups",
"highlightChromPeaks",
"plotChromPeaks",
"plotChromPeakImage",
Expand Down Expand Up @@ -479,6 +478,7 @@ exportMethods("hasChromPeaks",
"hasAdjustedRtime",
"adjustedRtime",
"adjustedRtime<-",
"adjustRtimePeakGroups",
"featureDefinitions",
"featureDefinitions<-",
"featureValues",
Expand Down
8 changes: 6 additions & 2 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,11 @@ setGeneric("addProcessHistory", function(object, ...)
setGeneric("adjustRtime", function(object, param, ...)
standardGeneric("adjustRtime"))

setGeneric("adjustedRtime", function(object, ...) standardGeneric("adjustedRtime"))
#' @rdname adjustRtime
setGeneric("adjustRtimePeakGroups", function(object, param, ...)
standardGeneric("adjustRtimePeakGroups"))
setGeneric("adjustedRtime", function(object, ...)
standardGeneric("adjustedRtime"))
setGeneric("adjustedRtime<-", function(object, value)
standardGeneric("adjustedRtime<-"))
setGeneric("ampTh", function(object, ...) standardGeneric("ampTh"))
Expand Down Expand Up @@ -2140,4 +2144,4 @@ setGeneric("write.mzdata", function(object, ...) standardGeneric("write.mzdata")
setGeneric("write.mzQuantML", function(object, ...) standardGeneric("write.mzQuantML"))

## X
setGeneric("xcmsSource", function(object, ...) standardGeneric("xcmsSource"))
setGeneric("xcmsSource", function(object, ...) standardGeneric("xcmsSource"))
13 changes: 5 additions & 8 deletions R/DataClasses.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
## All class definitions should go in here.
#' @include AllGenerics.R functions-XChromatogram.R functions-XChromatograms.R

############################################################
## Class unions
setClassUnion("characterOrNULL", c("character", "NULL"))
setClassUnion("logicalOrNumeric", c("logical", "numeric"))
##setClassUnion("ANYorNULL", c("ANY", "NULL"))


############################################################
## xcmsSet
##
Expand Down Expand Up @@ -2186,4 +2179,8 @@ setClass("FilterIntensityParam",
setClass("BetaDistributionParam",
contains = "Param"
)


############################################################
## Class unions
setClassUnion("characterOrNULL", c("character", "NULL"))
setClassUnion("logicalOrNumeric", c("logical", "numeric"))
2 changes: 1 addition & 1 deletion R/XcmsExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -2072,4 +2072,4 @@ setMethod(
res <- do.call(rbind, res)
pb$tick()
res
})
})
142 changes: 80 additions & 62 deletions R/XcmsExperimentHdf5-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,39 +337,40 @@ NULL
##
################################################################################

#' Get chrom peaks for features from one sample. Allows to define/subset by
#' features (`i`) and select column(s) from the chrom peaks matrix to return.
#'
#' @param sample_id `character(1)` with the ID of the sample from which to
#' return the data.
#'
#' @param hdf5_file `character(1)` with the HDF5 file name
#'
#' @param ms_level `integer(1)` with the MS level of the features/chrom peaks
#'
#' @param i optional `integer` to select the features for which to return the
#' data.
#'
#' @param j optional `integer` defining the index of the column(s) to return.
#'
#' @return `matrix` with the chrom peak data, first column being the feature
#' index.
#'
#' @importFrom S4Vectors findMatches to
#'
#' @noRd
.h5_feature_chrom_peaks_sample <- function(sample_id, hdf5_file, ms_level,
i = integer(), j = NULL) {
fidx <- xcms:::.h5_read_data(hdf5_file, sample_id, name = "feature_to_chrom_peaks",
ms_level = ms_level)[[1L]]
if (length(i)) {
hits <- findMatches(i, fidx[, 1L])
fidx <- fidx[to(hits), , drop = FALSE]
}
vals <- xcms:::.h5_read_data(hdf5_file, sample_id, name = "chrom_peaks",
ms_level = ms_level, i = fidx[, 2L], j = j)[[1L]]
cbind(fidx[, 1L], vals)
}
## ## WE MIGHT ACTUALLY NOT NEED THIS!
## #' Get chrom peaks for features from one sample. Allows to define/subset by
## #' features (`i`) and select column(s) from the chrom peaks matrix to return.
## #'
## #' @param sample_id `character(1)` with the ID of the sample from which to
## #' return the data.
## #'
## #' @param hdf5_file `character(1)` with the HDF5 file name
## #'
## #' @param ms_level `integer(1)` with the MS level of the features/chrom peaks
## #'
## #' @param i optional `integer` to select the features for which to return the
## #' data.
## #'
## #' @param j optional `integer` defining the index of the column(s) to return.
## #'
## #' @return `matrix` with the chrom peak data, first column being the feature
## #' index.
## #'
## #' @importFrom S4Vectors findMatches to
## #'
## #' @noRd
## .h5_feature_chrom_peaks_sample <- function(sample_id, hdf5_file, ms_level,
## i = integer(), j = NULL) {
## fidx <- .h5_read_data(hdf5_file, sample_id, name = "feature_to_chrom_peaks",
## ms_level = ms_level)[[1L]]
## if (length(i)) {
## hits <- findMatches(i, fidx[, 1L])
## fidx <- fidx[to(hits), , drop = FALSE]
## }
## vals <- .h5_read_data(hdf5_file, sample_id, name = "chrom_peaks",
## ms_level = ms_level, i = fidx[, 2L], j = j)[[1L]]
## cbind(fidx[, 1L], vals)
## }

#' Extracts feature values for one sample summing intensities for features
#' with multiple peaks assigned.
Expand Down Expand Up @@ -428,12 +429,14 @@ NULL
f <- factor(fidx[, 1L], levels = seq_len(n_features))
pk_idx <- split(fidx[, 2L], f)
idx_multi <- which(lengths(pk_idx) > 1L)
FUN <- switch(
method,
sum = function(z) sum(vals[z, 1L]),
maxint = function(z) vals[z, 1L][which.max(vals[z, 2L])],
medret = function(z) vals[z, 1L][which.min(abs(vals[z, 2L]))])
res[idx_multi] <- vapply(pk_idx[idx_multi], FUN, 1.1)
if (length(idx_multi)) {
FUN <- switch(
method,
sum = function(z) sum(vals[z, 1L]),
maxint = function(z) vals[z, 1L][which.max(vals[z, 2L])],
medret = function(z) vals[z, 1L][which.min(abs(vals[z, 2L]))])
res[idx_multi] <- vapply(pk_idx[idx_multi], FUN, 1.1)
}
res
}

Expand Down Expand Up @@ -473,23 +476,6 @@ NULL
##
################################################################################

#' .getPeakGroupsRtMatrix in do_adjustRtime-functions.R
#' Need a function that gets a matrix with columns being samples and rows the
#' retention time of a (defined) peak group. Peak groups get defined based
#' on the number of available peaks per feature.
#'
#' define the features based on the features_to_chrom_peaks data set.
#' for those we need to get then their retention times.
#'
#' @noRd
.get_peak_groups_rt_matrix <- function(x, ms_level = 1L) {
## LLLLLL continue.
i <- 1L
idx <- rhdf5::h5read(x@hdf5_file, paste0("/S", i, "/ms_", ms_level,
"/feature_to_chrom_peaks"))
rts <- xcms:::.h5_read_data(, index = LLLL)
}


################################################################################
##
Expand All @@ -513,6 +499,11 @@ NULL
#' `read_colnames` and `read_rownames` also the rownames and colnames are read.
#' Not reading them has performance advantages.
#'
#' This function supports reading only subsets of the data from the HDF5 file,
#' which, surprisingly, has a negative impact on performance. Maybe chunking
#' might improve that behaviour. Thus, for now, the `.h5_read_matrix2()`
#' function should be used instead.
#'
#' @param name `character(1)` with the name of the data set to read.
#'
#' @param h5 HDF5 file handle
Expand Down Expand Up @@ -542,6 +533,30 @@ NULL
d
}

#' Same functionality as `.h5_read_matrix`, but this one does the subsetting
#' in R, i.e. reads first the **full** matrix into R and does the subsetting
#' in R. For data matrices up to 100,000 rows and 3 columns this function
#' is faster.
#'
#' @noRd
.h5_read_matrix2 <- function(name, h5, index = list(NULL, NULL),
read_colnames = FALSE,
read_rownames = FALSE,
rownames = paste0(name, "_rownames")) {
d <- rhdf5::h5read(h5, name = name)
if (length(index[[1L]]))
d <- d[index[[1L]], , drop = FALSE]
if (length(index[[2L]]))
d <- d[, index[[2L]], drop = FALSE]
if (read_rownames)
rownames(d) <- rhdf5::h5read(h5, name = rownames, drop = TRUE,
index = index[1L])
if (read_colnames)
colnames(d) <- rhdf5::h5read(h5, name = paste0(name, "_colnames"),
drop = TRUE, index = index[2L])
d
}

#' Read a single `data.frame` from the HDF5 file. With
#' `read_rownames = TRUE` also the row names are read and set, which requires
#' an additional reading step. Note that for a `data.frame` each column
Expand Down Expand Up @@ -665,10 +680,7 @@ NULL
FUN <- switch(name,
chrom_peak_data = .h5_read_chrom_peak_data,
feature_definitions = .h5_read_data_frame,
.h5_read_matrix)
## FUN <- .h5_read_matrix
## if (name %in% c("chrom_peak_data", "feature_definitions"))
## FUN <- .h5_read_data_frame
.h5_read_matrix2)
h5 <- rhdf5::H5Fopen(h5_file)
on.exit(invisible(rhdf5::H5Fclose(h5)))
d <- paste0("/", index, "/ms_", ms_level, "/", name)
Expand Down Expand Up @@ -736,6 +748,8 @@ NULL

.h5_compression_level <- function() 0L

.h5_filter <- function() "NONE"

#' Initializes the HDF5 file
#'
#' @noRd
Expand All @@ -746,9 +760,12 @@ NULL
h5 <- rhdf5::H5Fcreate(x)
on.exit(invisible(rhdf5::H5Fclose(h5)))
comp_level <- .h5_compression_level()
flt <- .h5_filter()
rhdf5::h5createGroup(h5, "header")
rhdf5::h5write("package:xcms", h5, "/header/package", level = comp_level)
rhdf5::h5write(mod_count, h5, "/header/modcount", level = comp_level)
rhdf5::h5write("package:xcms", h5, "/header/package",
level = comp_level)
rhdf5::h5write(mod_count, h5, "/header/modcount",
level = comp_level)
}

#' Every writing operation should increate the "mod count", i.e. the
Expand Down Expand Up @@ -886,6 +903,7 @@ NULL
if (name %in% c("chrom_peak_data", "feature_definitions"))
FUN <- .h5_write_data_frame
comp_level <- .h5_compression_level()
flt <- .h5_filter()
for (i in seq_along(data_list)) {
group_sample <- paste0("/", names(data_list)[i])
if (!rhdf5::H5Lexists(h5, group_sample))
Expand Down
30 changes: 28 additions & 2 deletions R/XcmsExperimentHdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,21 @@
#' chromatographic peak in the chrom peak matrix **of that sample** and
#' MS level.
#'
#' @section Retention time alignment:
#'
#' - `adjustRtimePeakGroups()` and `adjustRtime()` with `PeakGroupsParam`:
#' parameter `extraPeaks` of `PeakGroupsParam` is ignored. Anchor peaks are
#' only defined using the `minFraction` parameter (and eventually, if
#' provided, the `subset` parameter).
#'
#' @section Correspondence analysis results:
#'
#' - `featureDefinitions()`: similarly to `featureDefinitions()` for
#' [XcmsExperiment] objects, this method returns a `data.frame` with the
#' characteristics for the defined LC-MS features. The function for
#' `XcmsExperimentHdf5` does however **not** return the `"peakidx"` column
#' with the indices of the chromatographic peaks per feature. Also, the
#' columns are usually returned in alphabetic order.
#' columns are returned in alphabetic order.
#'
#' - `featureValues()`: parameter `value = "index"` (i.e. returning the index
#' of the chromatographic peaks per feature) is not supported.
Expand Down Expand Up @@ -373,7 +380,6 @@ setMethod(
#' - add chunkSize to chromPeaks?
#' - chromPeakData,XcmsExperimentHdf5
#' - adjustRtime,XcmsExperimentHdf5
#' - findChromPeaks,XcmsExperimentHdf5
#'
#' @noRd
NULL
Expand All @@ -385,6 +391,25 @@ NULL
##
################################################################################

#' @rdname XcmsExperimentHdf5
setMethod(
"adjustRtimePeakGroups", c("XcmsExperimentHdf5", "PeakGroupsParam"),
function(object, param = PeakGroupsParam(), msLevel = 1L) {
if (!hasFeatures(object, msLevel = msLevel))
stop("No features present. Please run 'groupChromPeaks' first.")
subs <- param@subset
if (!length(subs)) subs <- seq_along(object)
if (!all(subs %in% seq_along(object)))
stop("Parameter 'subset' is out of bounds.")
object@sample_id <- object@sample_id[subs] # quick subset hack.
rts <- .h5_feature_values_ms_level(msLevel, object, method = "maxint",
intensity = "into", value = "rt")
pres <- apply(rts, 1, function(z) sum(!is.na(z)))
rts <- rts[pres >= param@minFraction * length(subs), , drop = FALSE]
colnames(rts) <- basename(fileNames(object))[subs]
rts[order(rowMedians(rts, na.rm = TRUE)), , drop = FALSE]
})

setMethod(
"adjustRtime", signature(object = "XcmsExperimentHdf5",
param = "PeakGroupsParam"),
Expand All @@ -407,6 +432,7 @@ setMethod(
## Need to implement an `adjustRtimePeakGroups,XcmsExperimentHdf5`.

}
## LLLLL continue here
fidx <- as.factor(fromFile(object))
rt_raw <- split(rtime(object), fidx)
rt_adj <- .adjustRtime_peakGroupsMatrix(
Expand Down
31 changes: 1 addition & 30 deletions R/functions-XCMSnExp.R
Original file line number Diff line number Diff line change
Expand Up @@ -577,35 +577,6 @@ dropGenericProcessHistory <- function(x, fun) {
idxs
}

#' @rdname adjustRtime
adjustRtimePeakGroups <- function(object, param = PeakGroupsParam(),
msLevel = 1L) {
if (!(inherits(object, "XCMSnExp") | inherits(object, "XcmsExperiment")))
stop("'object' has to be an 'XCMSnExp' or 'XcmsExperiment' object.")
if (!hasFeatures(object))
stop("No features present. Please run 'groupChromPeaks' first.")
if (hasAdjustedRtime(object))
warning("Alignment/retention time correction was already performed, ",
"returning a matrix with adjusted retention times.")
subs <- subset(param)
if (!length(subs))
subs <- seq_along(fileNames(object))
nSamples <- length(subs)
missingSample <- nSamples - (nSamples * minFraction(param))
pkGrp <- .getPeakGroupsRtMatrix(
peaks = chromPeaks(object, msLevel = msLevel),
peakIndex = .peakIndex(
.update_feature_definitions(
featureDefinitions(object), rownames(chromPeaks(object)),
rownames(chromPeaks(object, msLevel = msLevel)))),
sampleIndex = subs,
missingSample = missingSample,
extraPeaks = extraPeaks(param)
)
colnames(pkGrp) <- basename(fileNames(object))[subs]
pkGrp
}

.plotChromPeakDensity <- function(object, mz, rt, param, simulate = TRUE,
col = "#00000080", xlab = "retention time",
ylab = "sample", xlim = range(rt),
Expand Down Expand Up @@ -2079,4 +2050,4 @@ setMethod("hasFilledChromPeaks", "XCMSnExp", function(object) {
x <- x[keep, , drop = FALSE]
}
x
}
}
Loading

0 comments on commit 3dc7b28

Please sign in to comment.