From 7bd035dbd904a008aedc75af5579789810357d47 Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Thu, 14 Nov 2024 09:57:37 +0100 Subject: [PATCH] feat: add chromatogram,XcmsExperimentHdf5 --- R/XcmsExperiment-functions.R | 6 +- R/XcmsExperimentHdf5-functions.R | 101 ++++++++++++----- R/XcmsExperimentHdf5.R | 40 +++---- R/functions-utils.R | 2 +- R/methods-XChromatograms.R | 10 +- man/XcmsExperiment.Rd | 5 +- man/XcmsExperimentHdf5.Rd | 34 ++++++ man/hidden_aliases.Rd | 29 +++-- .../test_XcmsExperimentHdf5-functions.R | 51 ++++++++- tests/testthat/test_XcmsExperimentHdf5.R | 103 ++++++++++++++++-- tests/testthat/test_functions-utils.R | 2 +- 11 files changed, 294 insertions(+), 89 deletions(-) diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 9eb7b089f..1ff360860 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -921,6 +921,7 @@ msLevel, isolationWindow = NULL, chunkSize, chromPeaks, return.type, BPPARAM) { + message("Extracting chromatographic data") chrs <- as(.mse_chromatogram( as(object, "MsExperiment"), rt = rt, mz = mz, aggregationFun = aggregationFun, msLevel = msLevel, @@ -956,7 +957,8 @@ } } pb$tick() - ## Process features - that is not perfect. + ## Process features - that is not perfect: features are selected based on + ## mz and rt, not based on the selected chrom peaks. if (hasFeatures(object)) { message("Processing features") pb <- progress_bar$new(format = paste0("[:bar] :current/:", @@ -1264,4 +1266,4 @@ XcmsExperiment <- function() { n@.processHistory <- from@processHistory validObject(n) n -} +} \ No newline at end of file diff --git a/R/XcmsExperimentHdf5-functions.R b/R/XcmsExperimentHdf5-functions.R index fcd7ed8b7..82fd14810 100644 --- a/R/XcmsExperimentHdf5-functions.R +++ b/R/XcmsExperimentHdf5-functions.R @@ -578,7 +578,10 @@ NULL ## ################################################################################ #' Read chromatograms for a set of samples (chunk) and adds chromatographic -#' peaks. +#' peaks. Chromatographic peaks are added/processed by sample reading the +#' respective information from the hdf5 file. Features are added by processing +#' the feature and chrom peak IDs of the selected chrom peaks. +#' #' #' @param x `XcmsExperimentHdf5` for one subset/chunk of data from which the #' data should be extracted @@ -588,49 +591,90 @@ NULL #' #' @param ms_level `integer(1)` with the MS level. #' @noRd -.h5_x_chromatogram <- function(x, index = seq_along(x), ms_level = 1L, - mz, rt, ppm = 0, chromPeaks = "any", +.h5_x_chromatograms <- function(x, index = seq_along(x), aggregationFun = "sum", + ms_level = 1L, mz, rt, isolationWindow = NULL, + chromPeaks = "any", chunkSize = 2L, + return.type = "XChromatograms", BPPARAM = bpparam()) { - ## Get the chromatograms in parallel. - chr <- as(chromatogram(as(x, "MsExperiment"), mz = mz, - rt = rt, BPPARAM = BPPARAM), "XChromatograms") - js <- seq_len(nrow(chr)) + message("Extracting chromatographic data") + chr <- as(.mse_chromatogram( + as(x, "MsExperiment"), rt = rt, mz = mz, msLevel = ms_level, + aggregationFun = aggregationFun, isolationWindow = isolationWindow, + chunkSize = chunkSize, BPPARAM = BPPARAM), return.type) + if (return.type == "MChromatograms" || chromPeaks == "none") + return(chr) message("Processing chromatographic peaks") + js <- seq_len(nrow(chr)) pb <- progress_bar$new(format = paste0("[:bar] :current/:", "total (:percent) in ", ":elapsed"), - total = ncol(chr) + 1L, clear = FALSE) - for (i in seq_along(x)) { + total = length(x), clear = FALSE) + has_features <- hasFeatures(x, ms_level) + if (has_features) { + fd <- .h5_read_data(x@hdf5_file, "features", "feature_definitions", + read_rownames = TRUE, ms_level = ms_level)[[1L]] + f2p <- vector("list", 1000) # initialize with an educated guess; + cnt <- 1L + } + for (i in seq_along(x)) { # iterate over samples cp <- .h5_read_data(x@hdf5_file, x@sample_id[i], "chrom_peaks", ms_level = ms_level, read_colnames = TRUE, read_rownames = TRUE)[[1L]] - for (j in js) { + cd <- .h5_read_data(x@hdf5_file, x@sample_id[i], "chrom_peak_data", + ms_level = ms_level, read_colnames = TRUE, + read_rownames = FALSE)[[1L]] + if (has_features) + fidx <- .h5_read_data(x@hdf5_file, x@sample_id[i], + "feature_to_chrom_peaks", + ms_level = ms_level)[[1L]] + for (j in js) { # iterate over ranges/EICs idx <- which(.is_chrom_peaks_within_mz_rt( - cp, rt[j, ], mz[j, ], ppm, chromPeaks), useNames = FALSE) - a <- cbind(cp[idx, , drop = FALSE], sample = rep(i, length(idx))) - b <- .h5_read_data( - x@hdf5_file, x@sample_id[i], "chrom_peak_data", - ms_level = ms_level, read_colnames = TRUE, i = idx, - read_rownames = FALSE)[[1L]] - b$ms_level <- rep(ms_level, length(idx)) + cp, rt[j, ], mz[j, ], type = chromPeaks), useNames = FALSE) + li <- length(idx) + a <- cbind(cp[idx, , drop = FALSE], sample = rep(i, li)) + b <- extractROWS(cd, idx) + b$ms_level <- rep(ms_level, li) attr(b, "row.names") <- rownames(a) tmp <- chr@.Data[j, i][[1L]] slot(tmp, "chromPeaks", check = FALSE) <- a slot(tmp, "chromPeakData", check = FALSE) <- as(b, "DataFrame") - chr@.Data[j, i][[1L]] <- tmp + chr@.Data[j, i][[1L]] <- tmp # this does not seem to cause copying + ## Add mapping of features and chrom peaks for that sample/EIC + if (li && has_features) { + is_feature <- fidx[, 2L] %in% idx + if (any(is_feature)) { + f2p[[cnt]] <- cbind(rownames(fd)[fidx[is_feature, 1L]], + rownames(cp)[fidx[is_feature, 2L]], + rep(as.character(j), sum(is_feature))) + cnt <- cnt + 1L + } + } } pb$tick() } - pb$tick() - if (hasFeatures(x, ms_level)) { - stop("Not yet implemented") - ## Somehow add features. + if (has_features) { + message("Processing features") + ## Define the featureDefinitions and assign chrom peaks to each, + ## matching the same `"row"` (!). + f2p <- do.call(base::rbind, f2p) # mapping of features and chrom peaks + cp <- chromPeaks(chr) # chrom peaks to calculate index + cp_row_id <- paste0(rownames(cp), ".", cp[, "row"]) + ft_cp_row_id <- paste0(f2p[, 2], ".", f2p[, 3]) + ft_row_id <- paste0(f2p[, 1], ".", f2p[, 3]) + peakidx <- lapply(split(ft_cp_row_id, ft_row_id), + base::match, table = cp_row_id) + id <- strsplit(names(peakidx), ".", fixed = TRUE) + fts <- fd[vapply(id, `[`, i = 1L, FUN.VALUE = NA_character_), ] + fts$peakidx <- unname(peakidx) + fts$row <- vapply(id, function(z) as.integer(z[2L]), NA_integer_) + fts$ms_level <- ms_level + slot(chr, "featureDefinitions", check = FALSE) <- + DataFrame(extractROWS(fts, order(fts$row))) } - chr@.processHistory <- x@processHistory + slot(chr, ".processHistory", check = FALSE) <- x@processHistory chr } - ################################################################################ ## ## HDF5 FUNCTIONALITY @@ -805,7 +849,6 @@ NULL rhdf5::h5ls(g, recursive = recursive, datasetinfo = FALSE)$name } - .h5_ms_levels <- function(h5, sample_id) { nms <- .h5_dataset_names(paste0("/", sample_id), h5) as.integer(unique(sub("ms_", "", grep("^ms", nms, value = TRUE)))) @@ -909,7 +952,7 @@ NULL #' #' @noRd .h5_check_mod_count <- function(h5, mod_count = 0L) { - mc <- rhdf5::h5read(h5, "/header/modcount")[1L] + mc <- .h5_mod_count(h5) if (mc != mod_count) stop("The HDF5 file was changed by a different process. This xcms ", "result object/variable is no longer valid.") @@ -965,13 +1008,17 @@ NULL level = comp_level) } +.h5_mod_count <- function(h5) { + rhdf5::h5read(h5, "/header/modcount")[1L] +} + #' Every writing operation should increate the "mod count", i.e. the #' count of data modifications. This function increases the mod count by +1 #' and returns this value. #' #' @noRd .h5_increment_mod_count <- function(h5) { - mc <- rhdf5::h5read(h5, "/header/modcount")[1L] + 1L + mc <- .h5_mod_count(h5) + 1L rhdf5::h5write(mc, h5, "/header/modcount", level = .h5_compression_level()) mc diff --git a/R/XcmsExperimentHdf5.R b/R/XcmsExperimentHdf5.R index fba822fcb..6aa92df3e 100644 --- a/R/XcmsExperimentHdf5.R +++ b/R/XcmsExperimentHdf5.R @@ -410,14 +410,14 @@ setMethod( ## }) #' TODO: -#' - add chunkSize to chromPeaks? -#' - chromPeakData,XcmsExperimentHdf5 -#' - adjustRtime,XcmsExperimentHdf5 +#' - fillChromPeaks,XcmsExperimentHdf5 +#' - filterMsLevel +#' - filterRt +#' - filterMz #' #' @noRd NULL - ################################################################################ ## ## RETENTION TIME ALIGNMENT @@ -594,7 +594,7 @@ setMethod( unique(c(object@features_ms_level, msLevel))) cpk_idx <- res$peakidx res$peakidx <- NULL - rownames(res) <- .featureIDs( + attr(res, "row.names") <- .featureIDs( nrow(res), prefix = paste0("FT", msLevel), min_len = 6) ## Save features to "/features/ms_/feature_definitions" .h5_write_data(object@hdf5_file, list(features = res), @@ -705,15 +705,6 @@ setMethod( ## ################################################################################ -#' While previously we were first extracting the chromatograms and then adding -#' the chrom peaks later for `XcmsExperimentHdf5` it might be more efficient to -#' also extract the chrom peaks in the loop/chunk processing. So, essentially: -#' - process `object` chunk-wise -#' - for each chunk: -#' - extract chromatograms (in parallel?) -#' - get chrom peaks for each sample/chrom peak. -#' -#' @noRd #' @rdname hidden_aliases setMethod( "chromatogram", "XcmsExperimentHdf5", @@ -737,19 +728,14 @@ setMethod( mz <- cbind(rep(-Inf, nrow(rt)), rep(Inf, nrow(rt))) return.type <- match.arg(return.type) chromPeaks <- match.arg(chromPeaks) + if (!hasChromPeaks(object, msLevel)) + chromPeaks <- "none" if (hasAdjustedRtime(object)) object <- applyAdjustedRtime(object) - ## process the data in chunks. - ## in each chunk: get chromatograms, load chrom peaks and process those. - ## ? how to get/define the features too? get the feature indices? - ## Implementation notes: - ## XChromatogram has slots @chromPeaks (matrix) @chromPeakData (DataFrame) - ## XChromatograms has slot @featureDefinitions (DataFrame) - - - .xmse_extract_chromatograms_old( - object, rt = rt, mz = mz, aggregationFun = aggregationFun, - msLevel = msLevel, isolationWindow = isolationWindowTargetMz, - chunkSize = chunkSize, chromPeaks = chromPeaks, - return.type = return.type, BPPARAM = BPPARAM) + .h5_x_chromatograms( + object, ms_level = msLevel, chromPeaks = chromPeaks, + mz = mz, rt = rt, aggregationFun = aggregationFun, + chunkSize = chunkSize, return.type = return.type, + isolationWindow = isolationWindowTargetMz, + BPPARAM = BPPARAM) }) \ No newline at end of file diff --git a/R/functions-utils.R b/R/functions-utils.R index 9c92c8043..b80ad97cc 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -922,4 +922,4 @@ groupOverlaps <- function(xmin, xmax) { PACKAGE = "xcms") cbind(rtime = scantime[scns], intensity = res$intensity) -} +} \ No newline at end of file diff --git a/R/methods-XChromatograms.R b/R/methods-XChromatograms.R index 29f9b80ec..ce0a64075 100644 --- a/R/methods-XChromatograms.R +++ b/R/methods-XChromatograms.R @@ -3,14 +3,14 @@ setAs("MChromatograms", "XChromatograms", function(from) { res <- new("XChromatograms") - res@.Data <- matrix(lapply(from, function(z) { + slot(res, ".Data", check = FALSE) <- matrix(lapply(from@.Data, function(z) { if (is(z, "Chromatogram")) as(z, "XChromatogram") else z }), nrow = nrow(from), ncol = ncol(from), dimnames = dimnames(from)) - res@phenoData <- from@phenoData - res@featureData <- from@featureData - if (validObject(resetClass)) res + slot(res, "phenoData", check = FALSE) <- from@phenoData + slot(res, "featureData", check = FALSE) <- from@featureData + res }) #' @rdname XChromatogram @@ -676,4 +676,4 @@ setMethod("transformIntensity", "XChromatograms", function(object, nrow = nrow(object), dimnames = dimnames(object)) object -}) +}) \ No newline at end of file diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd index 9d8129783..5cfd036d3 100644 --- a/man/XcmsExperiment.Rd +++ b/man/XcmsExperiment.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/MsExperiment.R, % R/XcmsExperiment-functions.R, R/XcmsExperiment-plotting.R, -% R/XcmsExperiment.R, R/XcmsExperimentHdf5.R, R/methods-XCMSnExp.R +% R/XcmsExperiment.R, R/methods-XCMSnExp.R \name{filterFeatureDefinitions} \alias{filterFeatureDefinitions} \alias{filterRt,MsExperiment-method} @@ -50,7 +50,6 @@ \alias{chromatogram,XcmsExperiment-method} \alias{processHistory,XcmsExperiment-method} \alias{filterFile,XcmsExperiment-method} -\alias{[,XcmsExperimentHdf5,ANY,ANY,ANY-method} \title{Next Generation \code{xcms} Result Object} \usage{ filterFeatureDefinitions(object, ...) @@ -206,8 +205,6 @@ featureArea( keepFeatures = FALSE, ... ) - -\S4method{[}{XcmsExperimentHdf5,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) } \arguments{ \item{object}{An \code{XcmsExperiment} object.} diff --git a/man/XcmsExperimentHdf5.Rd b/man/XcmsExperimentHdf5.Rd index 96456ea0a..ae0f1e9ba 100644 --- a/man/XcmsExperimentHdf5.Rd +++ b/man/XcmsExperimentHdf5.Rd @@ -3,9 +3,18 @@ \name{XcmsExperimentHdf5} \alias{XcmsExperimentHdf5} \alias{XcmsExperimentHdf5-class} +\alias{chromPeakData,XcmsExperimentHdf5-method} \alias{adjustRtimePeakGroups,XcmsExperimentHdf5,PeakGroupsParam-method} \title{xcms result object for very large data sets} \usage{ +\S4method{chromPeakData}{XcmsExperimentHdf5}( + object, + msLevel = integer(), + peaks = character(), + return.type = c("DataFrame", "data.frame"), + bySample = FALSE +) + \S4method{adjustRtimePeakGroups}{XcmsExperimentHdf5,PeakGroupsParam}(object, param = PeakGroupsParam(), msLevel = 1L) } \description{ @@ -34,6 +43,12 @@ the ID of the sample (usually related to the index in the original `MsExperiment` object) and the the index of the chromatographic peak in the chrom peak matrix **of that sample** and MS level. + +HDF5 does not support parallel processing, thus preprocessing results need +to be loaded sequentially. + +All functionality for `XcmsExperimentHdf5` objects is optimized to reduce +memory demand at the cost of eventually lower performance. } \section{Using the HDF5 file-based on-disk data storage}{ @@ -44,6 +59,25 @@ hence use the on-disk data storage mode described on this page. All data is stored in a file with the name specifyied with parameter `hdf5File`. } +\section{Functionality related to chromatographic peaks}{ + + +- `chromPeaks()` gains parameter `bySample = FALSE` that, if set to `TRUE` + returns a `list` of `chromPeaks` matrices, one for each sample. Due to + the way data is organized in `XcmsExperimentHdf5` objects this is more + efficient than `bySample = FALSE`. Thus, in cases where chrom peak data + is subsequently evaluated or processed by sample, it is suggested to + use `bySample = TRUE`. + +- `chromPeakData()` gains a new parameter `peaks = character()` which allows + to specify from which chromatographic peaks data should be returned. + For these chromatographic peaks the ID (row name in `chromPeaks()`) + should be provided with the `peaks` parameter. This can reduce the memory + requirement for cases in which only data of some selected chromatographic + peaks needs to be extracted. Also, `chromPeakData()` supports the + `bySample` parameter described for `chromPeaks()` above. +} + \section{Retention time alignment}{ diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index f5821c8d0..6cea1d3d7 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -5,13 +5,13 @@ \name{hidden_aliases} \alias{hidden_aliases} \alias{show,XcmsExperimentHdf5-method} +\alias{[,XcmsExperimentHdf5,ANY,ANY,ANY-method} \alias{findChromPeaks,XcmsExperimentHdf5,Param-method} \alias{dropChromPeaks,XcmsExperimentHdf5-method} \alias{hasChromPeaks,XcmsExperimentHdf5-method} \alias{chromPeaks<-,XcmsExperimentHdf5-method} \alias{chromPeaks,XcmsExperimentHdf5-method} \alias{chromPeakData<-,XcmsExperimentHdf5-method} -\alias{chromPeakData,XcmsExperimentHdf5-method} \alias{refineChromPeaks,XcmsExperimentHdf5,MergeNeighboringPeaksParam-method} \alias{adjustRtime,XcmsExperimentHdf5,PeakGroupsParam-method} \alias{dropAdjustedRtime,XcmsExperimentHdf5-method} @@ -21,11 +21,14 @@ \alias{featureDefinitions<-,XcmsExperimentHdf5-method} \alias{featureDefinitions,XcmsExperimentHdf5-method} \alias{featureValues,XcmsExperimentHdf5-method} +\alias{chromatogram,XcmsExperimentHdf5-method} \alias{adjustRtimePeakGroups,XcmsResult,PeakGroupsParam-method} \title{Internal page for hidden aliases} \usage{ \S4method{show}{XcmsExperimentHdf5}(object) +\S4method{[}{XcmsExperimentHdf5,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + \S4method{findChromPeaks}{XcmsExperimentHdf5,Param}( object, param, @@ -50,18 +53,12 @@ msLevel = integer(), type = c("any", "within", "apex_within"), isFilledColumn = FALSE, - columns = character() + columns = character(), + bySample = FALSE ) \S4method{chromPeakData}{XcmsExperimentHdf5}(object) <- value -\S4method{chromPeakData}{XcmsExperimentHdf5}( - object, - msLevel = integer(), - sample = integer(), - return.type = c("DataFrame", "data.frame") -) - \S4method{refineChromPeaks}{XcmsExperimentHdf5,MergeNeighboringPeaksParam}( object, param, @@ -101,6 +98,20 @@ msLevel = integer() ) +\S4method{chromatogram}{XcmsExperimentHdf5}( + object, + rt = matrix(nrow = 0, ncol = 2), + mz = matrix(nrow = 0, ncol = 2), + aggregationFun = "sum", + msLevel = 1L, + chunkSize = 2L, + isolationWindowTargetMz = NULL, + return.type = c("XChromatograms", "MChromatograms"), + include = character(), + chromPeaks = c("apex_within", "any", "none"), + BPPARAM = bpparam() +) + \S4method{adjustRtimePeakGroups}{XcmsResult,PeakGroupsParam}(object, param = PeakGroupsParam(), msLevel = 1L) } \value{ diff --git a/tests/testthat/test_XcmsExperimentHdf5-functions.R b/tests/testthat/test_XcmsExperimentHdf5-functions.R index 5bf6da74b..2aaca46dd 100644 --- a/tests/testthat/test_XcmsExperimentHdf5-functions.R +++ b/tests/testthat/test_XcmsExperimentHdf5-functions.R @@ -233,7 +233,7 @@ test_that(".h5_xmse_merge_neighboring_peaks works", { ms_level = rep(1L, length(x)), read_colnames = TRUE, read_rownames = TRUE) .h5_xmse_merge_neighboring_peaks(x) - mod_count <- as.vector(rhdf5::h5read(h5f, "/header/modcount")) + mod_count <- .h5_mod_count(h5f) expect_true(mod_count > x@hdf5_mod_count) ## Check that content was changed. res <- .h5_read_data(x@hdf5_file, id = x@sample_id, @@ -542,6 +542,13 @@ test_that(".h5_initialize_file", { file.remove(h5f) }) +test_that(".h5_mod_count works", { + h5f <- tempfile() + .h5_initialize_file(h5f, mod_count = 7L) + expect_identical(.h5_mod_count(h5f), 7L) + file.remove(h5f) +}) + test_that(".h5_increment_mod_count works", { h5f <- tempfile() .h5_initialize_file(h5f) @@ -693,11 +700,11 @@ test_that(".h5_update_rt_chrom_peaks_sample works", { expect_true(TRUE) }) -test_that(".h5_x_chromatogram wokrs", { +test_that(".h5_x_chromatogram works", { rt <- matrix(c(2600, 2700), nrow = 1) mz <- matrix(c(340, 400), nrow = 1) - res <- .h5_x_chromatogram(xmse_h5, mz = mz, rt = rt, chromPeaks = "any") + res <- .h5_x_chromatograms(xmse_h5, mz = mz, rt = rt, chromPeaks = "any") expect_s4_class(res, "XChromatograms") expect_true(nrow(res) == 1L) expect_equal(ncol(res), length(xmse_h5)) @@ -712,12 +719,48 @@ test_that(".h5_x_chromatogram wokrs", { expect_equal(unname(chromPeaks(res)), unname(chromPeaks(ref))) ## Multiple rows. - res <- .h5_x_chromatogram( + res <- .h5_x_chromatograms( xmse_h5, mz = chromPeaks(xmse_h5)[1:10, c("mzmin", "mzmax")], rt = chromPeaks(xmse_h5)[1:10, c("rtmin", "rtmax")], ms_level = 1L, chromPeaks = "apex_within", BPPARAM = bpparam()) expect_true(nrow(res) == 10L) + ## With features. + rt <- rbind(rt, c(3000, 3500)) + mz <- rbind(mz, c(450, 500)) + tmp <- loadXcmsData("xmse") |> + dropFeatureDefinitions() |> + groupChromPeaks(pdp, msLevel = 1L) + + ref <- chromatogram(tmp, mz = mz, rt = rt, chromPeaks = "any") + res <- .h5_x_chromatograms(xmseg_full_h5, mz = mz, rt = rt, + chromPeaks = "any") + a <- chromPeaks(ref) + b <- chromPeaks(res) + expect_equal(colnames(a), colnames(b)) + rownames(a) <- NULL + rownames(b) <- NULL + expect_equal(a, b) + + a <- featureDefinitions(ref) + b <- featureDefinitions(res) + expect_true(all(colnames(a) %in% colnames(b))) + ## We don't have exactly the same number of features, because XcmsExperiment + ## selects features based on mz and rt range, not based on included chrom + ## peaks. + expect_true(all(a$mzmed %in% b$mzmed)) + b <- b[b$mzmed %in% a$mzmed, ] + expect_equal(a$peakidx, b$peakidx) + + ## With overlapping ranges -> duplicated features and chrom peaks. + mz <- rbind(mz, mz[1, ]) + rt <- rbind(rt, rt[1, ]) + res <- .h5_x_chromatograms(xmseg_full_h5, mz = mz, rt = rt, + chromPeaks = "apex_within") + cp <- chromPeaks(res) + expect_equal(cp[cp[, "row"] == 1L, colnames(cp) != "row"], + cp[cp[, "row"] == 3L, colnames(cp) != "row"]) + fts <- featureDefinitions(res) }) rm(h5f_full_g) \ No newline at end of file diff --git a/tests/testthat/test_XcmsExperimentHdf5.R b/tests/testthat/test_XcmsExperimentHdf5.R index fcd0ed4a7..8813708e9 100644 --- a/tests/testthat/test_XcmsExperimentHdf5.R +++ b/tests/testthat/test_XcmsExperimentHdf5.R @@ -11,6 +11,9 @@ xmseg_full_h5 <- xcms:::.xcms_experiment_to_hdf5(a, h5f_full_g) pdp <- PeakDensityParam(sampleGroups = sampleData(xmseg_full_h5)$sample_group, minFraction = 0.4, bw = 30) xmseg_full_h5 <- groupChromPeaks(xmseg_full_h5, pdp, msLevel = 1L) +## reference +xmseg_full_ref <- dropFeatureDefinitions(loadXcmsData("xmse")) +xmseg_full_ref <- groupChromPeaks(xmseg_full_ref, pdp, msLevel = 1L) test_that("XcmsExperimentHdf5 validation works", { a <- new("XcmsExperimentHdf5") @@ -88,7 +91,7 @@ test_that("findChromPeaks,XcmsExperimentHdf5 works", { a <- as(xmse_h5, "MsExperiment") a <- as(a, "XcmsExperimentHdf5") h5_file <- tempfile() - xcms:::.h5_initialize_file(h5_file) + .h5_initialize_file(h5_file) a@hdf5_file <- h5_file a@sample_id <- c("S1", "S2", "S3") p <- xmse_h5@processHistory[[1L]]@param @@ -236,19 +239,19 @@ test_that("featureValues,XcmsExperimentHdf5 etc works", { nf <- nrow(b) rtmed <- b$rtmed ## .h5_feature_values_sample - a <- xcms:::.h5_feature_values_sample( + a <- .h5_feature_values_sample( xmseg_full_h5@hdf5_file, sample_id = "S1", ms_level = 1L, n_features = nf, method = "sum", filled = FALSE, col_idx = 9L) b <- unname(featureValues(ref, method = "sum", value = "maxo", filled = FALSE)[, 1L]) expect_equal(a, b) - a <- xcms:::.h5_feature_values_sample( + a <- .h5_feature_values_sample( xmseg_full_h5@hdf5_file, sample_id = "S4", ms_level = 1L, n_features = nf, filled = FALSE, method = "maxint", col_idx = c(7L, 9L)) b <- unname(featureValues(ref, method = "maxint", value = "into", filled = FALSE, intensity = "maxo")[, 4L]) expect_equal(a, b) - a <- xcms:::.h5_feature_values_sample( + a <- .h5_feature_values_sample( xmseg_full_h5@hdf5_file, sample_id = "S4", ms_level = 1L, n_features = nf, filled = FALSE, method = "medret", col_idx = c(8L, 4L), rtmed = rtmed) @@ -257,7 +260,7 @@ test_that("featureValues,XcmsExperimentHdf5 etc works", { expect_equal(a, b) ## .h5_feature_values_ms_level - a <- xcms:::.h5_feature_values_ms_level(1L, xmseg_full_h5, method = "medret", + a <- .h5_feature_values_ms_level(1L, xmseg_full_h5, method = "medret", value = "into", filled = FALSE) b <- featureValues(ref, method = "medret", value = "into", filled = FALSE) expect_equal(unname(a), unname(b)) @@ -301,8 +304,7 @@ test_that("featureValues,XcmsExperimentHdf5 etc works", { }) test_that("adjustRtimePeakGroups works", { - ref <- dropFeatureDefinitions(loadXcmsData("xmse")) - ref <- groupChromPeaks(ref, pdp, msLevel = 1L) + ref <- xmseg_full_ref a <- featureValues( ref, method = "maxint", intensity = "into", value = "rt") @@ -343,11 +345,11 @@ test_that("adjustRtime,XcmsExperimentHdf5 and related function work", { dropFeatureDefinitions() |> applyAdjustedRtime() res_h5 <- tempfile() - res <- xcms:::.xcms_experiment_to_hdf5(ref, res_h5) + res <- .xcms_experiment_to_hdf5(ref, res_h5) ## Create a single sample XcmsExperimentHdf5 a <- ref[3L] a_h5 <- tempfile() - a <- xcms:::.xcms_experiment_to_hdf5(a, a_h5) + a <- .xcms_experiment_to_hdf5(a, a_h5) ## Perform retention time alignment on reference data ref <- ref |> groupChromPeaks(pdp, msLevel = 1L) |> @@ -411,6 +413,89 @@ test_that(".hasFilledPeaks works with XcmsExperimentHdf5", { expect_false(.hasFilledPeaks(xmse_h5)) }) +test_that("chromatogram,XcmsExperimentHdf5 works", { + expect_error(chromatogram(xmse_h5, adjustedRtime = FALSE), "unused") + expect_warning(res <- chromatogram(xmse_h5, include = "apex_within", + return.type = "MChromatograms"), + "deprecated") + expect_s4_class(res, "MChromatograms") + expect_true(nrow(res) == 1L) + ref <- chromatogram(faahko_od) + expect_equal(intensity(res[1, 1]), unname(intensity(ref[1, 1]))) + + rtr <- c(2600, 2700) + mzr <- c(340, 400) + res <- chromatogram(xmse_h5, mz = mzr, rt = rtr) + expect_s4_class(res, "XChromatograms") + expect_true(nrow(res) == 1L) + expect_true(nrow(chromPeaks(res)) > 0) + expect_true(all(chromPeaks(res)[, "mz"] >= 340 & + chromPeaks(res)[, "mz"] <= 400)) + expect_true(all(chromPeaks(res[1, 1])[, "sample"] == 1L)) + expect_true(all(chromPeaks(res[1, 2])[, "sample"] == 2L)) + expect_true(all(chromPeaks(res[1, 3])[, "sample"] == 3L)) + ref <- chromatogram(xod_x, mz = mzr, rt = rtr) + expect_equal(unname(chromPeaks(res)), unname(chromPeaks(ref))) + + ## with features + res <- chromatogram( + xmseg_full_h5, mz = chromPeaks(xmseg_full_h5)[1:5, c("mzmin", "mzmax")], + rt = chromPeaks(xmseg_full_h5)[1:5, c("rtmin", "rtmax")], + chunkSize = 2L, BPPARAM = bpparam(), msLevel = 1L, + aggregationFun = "sum", isolationWindow = NULL, + chromPeaks = "apex_within", return.type = "XChromatograms") + expect_true(nrow(featureDefinitions(res)) == 2) + expect_true(all(unlist(featureDefinitions(res)$peakidx) %in% + seq_len(nrow(chromPeaks(res))))) + ref <- chromatogram( + xmseg_full_ref, + mz = chromPeaks(xmseg_full_h5)[1:5, c("mzmin", "mzmax")], + rt = chromPeaks(xmse_full_h5)[1:5, c("rtmin", "rtmax")]) + a <- featureDefinitions(res) + b <- featureDefinitions(res) + expect_true(all(colnames(a) %in% colnames(b))) + expect_equal(unname(a[, colnames(b)]), unname(b[, colnames(b)])) + + ## MS2 data. + res <- chromatogram(xmseg_full_h5, msLevel = 2L, + mz = chromPeaks(xmse_full_h5)[1:5, c("mzmin", "mzmax")], + rt = chromPeaks(xmse_full_h5)[1:5, c("rtmin", "rtmax")]) + expect_true(validObject(res)) + expect_true(length(intensity(res[[1L]])) == 0) + expect_true(length(intensity(res[[2L]])) == 0) + expect_s4_class(res, "XChromatograms") + expect_true(nrow(chromPeaks(res)) == 0) + + ## Defining only mz or rt. + rtr <- c(2600, 2700) + mzr <- c(340, 400) + res <- chromatogram(xmse_h5, mz = mzr) + expect_s4_class(res, "XChromatograms") + expect_true(nrow(res) == 1L) + expect_true(nrow(chromPeaks(res)) > 0) + expect_true(all(chromPeaks(res)[, "mz"] >= 340 & + chromPeaks(res)[, "mz"] <= 400)) + expect_true(all(chromPeaks(res[1, 1])[, "sample"] == 1L)) + expect_true(all(chromPeaks(res[1, 2])[, "sample"] == 2L)) + expect_true(all(chromPeaks(res[1, 3])[, "sample"] == 3L)) + rrt <- range(lapply(res, rtime)) + expect_true(rrt[1] < 2600) + expect_true(rrt[2] > 4400) + + res <- chromatogram(xmse_h5, rt = rtr) + expect_s4_class(res, "XChromatograms") + expect_true(nrow(res) == 1L) + expect_true(nrow(chromPeaks(res)) > 0) + expect_true(any(chromPeaks(res)[, "mz"] < 340 | + chromPeaks(res)[, "mz"] > 400)) + expect_true(all(chromPeaks(res[1, 1])[, "sample"] == 1L)) + expect_true(all(chromPeaks(res[1, 2])[, "sample"] == 2L)) + expect_true(all(chromPeaks(res[1, 3])[, "sample"] == 3L)) + rrt <- range(lapply(res, rtime)) + expect_true(rrt[1] >= 2600) + expect_true(rrt[2] <= 2700) +}) + ## test_that(".h5_feature_chrom_peaks_sample works", { ## cn <- .h5_chrom_peaks_colnames(xmseg_full_h5, 1L) ## res <- .h5_feature_chrom_peaks_sample("S3", xmseg_full_h5@hdf5_file, diff --git a/tests/testthat/test_functions-utils.R b/tests/testthat/test_functions-utils.R index d0c18dc64..a58454ff9 100644 --- a/tests/testthat/test_functions-utils.R +++ b/tests/testthat/test_functions-utils.R @@ -504,4 +504,4 @@ test_that(".match_last works", { res <- .match_last(c("c", "a", "d"), a) expect_equal(res, c(3L, 4L, NA_integer_)) -}) +}) \ No newline at end of file