diff --git a/DESCRIPTION b/DESCRIPTION index d3f7cca9..af30f5c3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: xcms -Version: 4.5.2 +Version: 4.5.3 Title: LC-MS and GC-MS Data Analysis Description: Framework for processing and visualization of chromatographically separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF, @@ -159,4 +159,3 @@ Collate: 'writemztab.R' 'xcmsSource.R' 'zzz.R' - diff --git a/NAMESPACE b/NAMESPACE index 24fb6605..7c186a54 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -95,7 +95,7 @@ S3method(plot, xcmsEIC) S3method(split, xcmsSet) S3method(c, xcmsSet) S3method(c, XCMSnExp) - +S3method(c, XcmsExperiment) S3method(split, xcmsRaw) exportClasses( @@ -461,7 +461,8 @@ export("CentWaveParam", "CleanPeaksParam", "MergeNeighboringPeaksParam", "FilterIntensityParam", - "ChromPeakAreaParam") + "ChromPeakAreaParam", + "BetaDistributionParam") ## Param class methods. ## New Classes @@ -530,7 +531,8 @@ exportMethods("hasChromPeaks", "featureSpectra", "chromPeakSpectra", "chromPeakChromatograms", - "featureChromatograms" + "featureChromatograms", + "chromPeakSummary" ) ## feature grouping functions and methods. diff --git a/NEWS.md b/NEWS.md index 6d38767e..a45421b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,13 +1,26 @@ -# xcms 4.5.2 +# xcms 4.5 + +## Changes in version 4.5.3 + +- Address issue #765: peak detection on chromatographic data: report a + chromatogram's `"mz"`, `"mzmin"` and `"mzmax"` as the mean m/z and lower and + upper m/z in the `chromPeaks()` matrix. +- Fix calculation of the correlation coefficient for peak shape similarity with + an idealized bell shape (*beta*) during gap filling for centWave-based + chromatographic peak detection with parameter `verboseBetaColumns = TRUE`. +- Add `chromPeakSummary` generic (issue #705). +- Add `chromPeakSummary()` method to calculate the *beta* quality metrics. +- Add `c()` method to combine multiple `XcmsExperiment` objects into one. +- Add a method to coerce from `XCMSnExp` to `XcmsExperiment` objects. ## Changes in version 4.5.2 - Small update to `featureSpectra()` and `chromPeakSpectra()` to allow addition of `chromPeaks()` and `featuresDefinitions()` columns to be added to the - `Spectra` output. -- Tidied the `xcms` vignette, to order the filtering of features and remove - the outdated normalisation paragraph.In depth discussion on this subject can - be found on `metabonaut`. + `Spectra` output. +- Tidied the `xcms` vignette, to order the filtering of features and remove + the outdated normalisation paragraph.In depth discussion on this subject can + be found on `metabonaut`. ## Changes in version 4.5.1 @@ -18,8 +31,8 @@ ## Changes in version 4.3.4 - Small update to the `matchLamaChromPeaks()` function to get the chromPeaksId - of the chromPeaks matched with Lamas. -- Small fix to the .yml file for the github actions, so they do not crash on + of the chromPeaks matched with Lamas. +- Small fix to the .yml file for the github actions, so they do not crash on warnings. ## Changes in version 4.3.3 diff --git a/R/AllGenerics.R b/R/AllGenerics.R index a53f91bc..5bf38df8 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -98,7 +98,7 @@ setGeneric("addProcessHistory", function(object, ...) #' parameter in \code{\link{profile-matrix}} documentation for more details. #' #' @param BPPARAM parallel processing setup. Defaults to `BPPARAM = bpparam()`. -#' See [bpparam()] for details. +#' See [BiocParallel::bpparam()] for details. #' #' @param centerSample \code{integer(1)} defining the index of the center sample #' in the experiment. It defaults to @@ -143,7 +143,7 @@ setGeneric("addProcessHistory", function(object, ...) #' #' @param family For `PeakGroupsParam`: `character(1)` defining the method for #' loess smoothing. Allowed values are `"gaussian"` and `"symmetric"`. See -#' [loess()] for more information. +#' [stats::loess()] for more information. #' #' @param gapExtend For `ObiwarpParam`: `numeric(1)` defining the penalty for #' gap enlargement. The default value for `gapExtend` depends on the value @@ -177,8 +177,8 @@ setGeneric("addProcessHistory", function(object, ...) #' @param msLevel For `adjustRtime`: `integer(1)` defining the MS level on #' which the alignment should be performed. #' -#' @param object For `adjustRtime`: an [OnDiskMSnExp()], [XCMSnExp()], -#' [MsExperiment()] or [XcmsExperiment()] object. +#' @param object For `adjustRtime`: an [MSnbase::OnDiskMSnExp()], [XCMSnExp()], +#' [MsExperiment::MsExperiment()] or [XcmsExperiment()] object. #' #' @param param The parameter object defining the alignment method (and its #' setting). @@ -212,7 +212,7 @@ setGeneric("addProcessHistory", function(object, ...) #' #' @param span For `PeakGroupsParam`: `numeric(1)` defining #' the degree of smoothing (if `smooth = "loess"`). This parameter is -#' passed to the internal call to [loess()]. +#' passed to the internal call to [stats::loess()]. #' #' @param subset For `ObiwarpParam` and `PeakGroupsParam`: `integer` with the #' indices of samples within the experiment on which the alignment models @@ -463,7 +463,8 @@ setGeneric("chromPeakData<-", function(object, value) #' The columns will be named as they are written in the `chromPeaks` object #' with a prefix `"chrom_peak_"`. Defaults to `c("mz", "rt")`. #' -#' @param BPPARAM parallel processing setup. Defaults to [bpparam()]. +#' @param BPPARAM parallel processing setup. Defaults to +#' [BiocParallel::bpparam()]. #' #' @param ... ignored. #' @@ -545,6 +546,66 @@ setGeneric("chromPeakData<-", function(object, value) setGeneric("chromPeakSpectra", function(object, ...) standardGeneric("chromPeakSpectra")) +#' @title Chromatographic peak summaries +#' +#' @name chromPeakSummary +#' +#' @description +#' +#' The `chromPeakSummary()` method calculates summary statistics or other +#' metrics for each of the identified chromatographic peaks in an *xcms* result +#' object, such as the [XcmsExperiment()]. Different metrics can be calculated, +#' depending upon (and configured by) using dedicated *parameter* classes. As a +#' result, the method returns a `matrix` or `data.frame` with one row per +#' chromatographic peak. Each column contains calculated values, depending on +#' the used method/parameter class. +#' +#' Currently implemented methods/parameter classes are: +#' +#' - `BetaDistributionParam`: calculates the *beta_cor* and *beta_snr* quality +#' metrics as described in Kumler 2023 representing the result from a +#' (correlation) test of similarity (using Pearson's correlation coefficient) +#' to a bell curve and the signal-to-noise ratio calculated on the residuals +#' of this test. +#' +#' @param BPPARAM Parallel processing setup. See +#' [BiocParallel::bpparam()] for details. +#' +#' @param chunkSize `integer(1)` defining the number of samples from which data +#' should be loaded and processed at a time. +#' +#' @param msLevel `integer(1)` with the MS level of the chromatographic peaks +#' on which the metric should be calculated. +#' +#' @param object an *xcms* result object containing information on +#' identified chromatographic peaks. +#' +#' @param param a parameter object defining the method/summaries that should +#' be calculated (see description above for supported parameter classes). +#' +#' @param ... additional arguments passed to the method implementation. +#' +#' @return +#' +#' A `matrix` or `data.frame` with the same number of rows as there are +#' chromatographic peaks. Columns contain the calculated values. The number of +#' columns, their names and content depend on the used parameter object. See +#' the respective documentation above for more details. +#' +#' @author Pablo Vangeenderhuysen, Johannes Rainer, William Kumler +#' +#' @md +#' +#' @references +#' +#' Kumler W, Hazelton B J and Ingalls A E (2023) "Picky with peakpicking: +#' assessing chromatographic peak quality with simple metrics in metabolomics" +#' *BMC Bioinformatics* 24(1):404. doi: 10.1186/s12859-023-05533-4 +#' +#' @export +setGeneric("chromPeakSummary", function(object, param, ...) + standardGeneric("chromPeakSummary")) + setGeneric("collect", function(object, ...) standardGeneric("collect")) setGeneric("consecMissedLimit", function(object, ...) standardGeneric("consecMissedLimit")) @@ -642,8 +703,8 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' chromatogram. #' #' @param BPPARAM For `object` being an `XcmsExperiment`: parallel processing -#' setup. Defaults to `BPPARAM = bpparam()`. See [bpparam()] for more -#' information. +#' setup. Defaults to `BPPARAM = bpparam()`. See [BiocParallel::bpparam()] +#' for more information. #' #' @param chunkSize For `object` being an `XcmsExperiment`: `integer(1)` #' defining the number of files from which the data should be loaded at @@ -810,7 +871,8 @@ setGeneric("featureDefinitions<-", function(object, value) #' spectra per feature). #' #' The information from `featureDefinitions` for each feature can be included -#' in the returned [Spectra()] object using the `featureColumns` parameter. +#' in the returned [Spectra::Spectra()] object using the `featureColumns` +#' parameter. #' This is useful for keeping details such as the median retention time (`rtmed`) #' or median m/z (`mzmed`). The columns will retain their names as specified #' in the `featureDefinitions` object, prefixed by `"feature_"` @@ -819,9 +881,11 @@ setGeneric("featureDefinitions<-", function(object, value) #' as a metadata column named `"feature_id"`. #' #' See also [chromPeakSpectra()], as it supports a similar parameter for -#' including columns from the chromatographic peaks in the returned spectra object. +#' including columns from the chromatographic peaks in the returned spectra +#' object. #' These parameters can be used in combination to include information from both -#' the chromatographic peaks and the features in the returned [Spectra()]. +#' the chromatographic peaks and the features in the returned +#' [Spectra::Spectra()]. #' The *peak ID* (i.e., the row name of the peak in the `chromPeaks` matrix) #' is added as a metadata column named `"chrom_peak_id"`. #' @@ -847,7 +911,8 @@ setGeneric("featureDefinitions<-", function(object, value) #' #' @return #' -#' The function returns either a [Spectra()] (for `return.type = "Spectra"`) +#' The function returns either a [Spectra::Spectra()] (for +#' `return.type = "Spectra"`) #' or a `List` of `Spectra` (for `return.type = "List"`). For the latter, #' the order and the length matches parameter `features` (or if no `features` #' is defined the order of the features in `featureDefinitions(object)`). @@ -1146,7 +1211,7 @@ setGeneric("filterFeatureDefinitions", function(object, ...) #' object will remove previous results. #' #' @param BPPARAM Parallel processing setup. Uses by default the system-wide -#' default setup. See [bpparam()] for more details. +#' default setup. See [BiocParallel::bpparam()] for more details. #' #' @param chunkSize `integer(1)` for `object` being an `MsExperiment` or #' [XcmsExperiment()]: defines the number of files (samples) for which the @@ -1165,14 +1230,15 @@ setGeneric("filterFeatureDefinitions", function(object, ...) #' will thus in most settings cause an out-of-memory error. #' By setting `chunkSize = -1` the peak detection will be performed #' separately, and in parallel, for each sample. This will however not work -#' for all `Spectra` *backends* (see eventually [Spectra()] for details). +#' for all `Spectra` *backends* (see eventually [Spectra::Spectra()] for +#' details). #' #' @param msLevel `integer(1)` defining the MS level on which the #' chromatographic peak detection should be performed. #' #' @param object The data object on which to perform the peak detection. Can be -#' an [OnDiskMSnExp()], [XCMSnExp()], [MChromatograms()] or [MsExperiment()] -#' object. +#' an [MSnbase::OnDiskMSnExp()], [XCMSnExp()], [MSnbase::MChromatograms()] +#' or [MsExperiment::MsExperiment()] object. #' #' @param param The parameter object selecting and configuring the algorithm. #' @@ -1242,7 +1308,8 @@ setGeneric("findChromPeaks", function(object, param, ...) #' more information. #' #' @param BPPARAM if `object` is an `MsExperiment` or `XcmsExperiment`: -#' parallel processing setup. See [bpparam()] for more information. +#' parallel processing setup. See [BiocParallel::bpparam()] for more +#' information. #' #' @param ... currently not used. #' @@ -1537,7 +1604,8 @@ setGeneric("loadRaw", function(object, ...) standardGeneric("loadRaw")) #' chromatographic peaks into features by providing their index in the #' object's `chromPeaks` matrix. #' -#' @param BPPARAM parallel processing settings (see [bpparam()] for details). +#' @param BPPARAM parallel processing settings (see [BiocParallel::bpparam()] +#' for details). #' #' @param chromPeaks For `manualChromPeaks`: `matrix` defining the boundaries #' of the chromatographic peaks with one row per chromatographic peak and @@ -1745,9 +1813,9 @@ setGeneric("rawMZ", function(object, ...) standardGeneric("rawMZ")) #' Each MS2 chromatographic peak selected for an MS1 peak will thus represent #' one **mass peak** in the reconstructed spectrum. #' -#' The resulting [Spectra()] object provides also the peak IDs of the MS2 -#' chromatographic peaks for each spectrum as well as their correlation value -#' with spectra variables *ms2_peak_id* and *ms2_peak_cor*. +#' The resulting [Spectra::Spectra()] object provides also the peak IDs of +#' the MS2 chromatographic peaks for each spectrum as well as their +#' correlation value with spectra variables *ms2_peak_id* and *ms2_peak_cor*. #' #' @param object `XCMSnExp` with identified chromatographic peaks. #' @@ -1774,8 +1842,8 @@ setGeneric("rawMZ", function(object, ...) standardGeneric("rawMZ")) #' `chromPeaks`) of MS1 peaks for which MS2 spectra should be reconstructed. #' By default they are reconstructed for all MS1 chromatographic peaks. #' -#' @param BPPARAM parallel processing setup. See [bpparam()] for more -#' information. +#' @param BPPARAM parallel processing setup. See [BiocParallel::bpparam()] +#' for more information. #' #' @param return.type `character(1)` defining the type of the returned object. #' Only `return.type = "Spectra"` is supported, `return.type = "MSpectra"` @@ -1785,14 +1853,14 @@ setGeneric("rawMZ", function(object, ...) standardGeneric("rawMZ")) #' #' @return #' -#' - [Spectra()] object (defined in the `Spectra` package) with the +#' - [Spectra::Spectra()] object (defined in the `Spectra` package) with the #' reconstructed MS2 spectra for all MS1 peaks in `object`. Contains #' empty spectra (i.e. without m/z and intensity values) for MS1 peaks for #' which reconstruction was not possible (either no MS2 signal was recorded #' or the correlation of the MS2 chromatographic peaks with the MS1 #' chromatographic peak was below threshold `minCor`. Spectra variables -#' `"ms2_peak_id"` and `"ms2_peak_cor"` (of type [CharacterList()] -#' and [NumericList()] with length equal to the number of peaks per +#' `"ms2_peak_id"` and `"ms2_peak_cor"` (of type [IRanges::CharacterList()] +#' and [IRanges::NumericList()] with length equal to the number of peaks per #' reconstructed MS2 spectrum) providing the IDs and the correlation of the #' MS2 chromatographic peaks from which the MS2 spectrum was reconstructed. #' As retention time the median retention times of all MS2 chromatographic @@ -1888,7 +1956,7 @@ setGeneric("reconstructChromPeakSpectra", function(object, ...) #' #' @param BPPARAM parameter object to set up parallel processing. Uses the #' default parallel processing setup returned by `bpparam()`. See -#' [bpparam()] for details and examples. +#' [BiocParallel::bpparam()] for details and examples. #' #' @param chunkSize For `refineChromPeaks` if `object` is either an #' `XcmsExperiment`: `integer(1)` defining the number of files (samples) diff --git a/R/DataClasses.R b/R/DataClasses.R index d32073e3..c3fd0198 100644 --- a/R/DataClasses.R +++ b/R/DataClasses.R @@ -2182,3 +2182,8 @@ setClass("FilterIntensityParam", msg else TRUE }) + +setClass("BetaDistributionParam", + contains = "Param" + ) + \ No newline at end of file diff --git a/R/MsExperiment-functions.R b/R/MsExperiment-functions.R index cb73888c..ef98d3d8 100644 --- a/R/MsExperiment-functions.R +++ b/R/MsExperiment-functions.R @@ -546,3 +546,34 @@ x@sampleDataLinks[["spectra"]] <- sdl x } + +#' WARNING: this only joins @sampleData, @spectra and +#' `@sampleDataLinks[["spectra"]]`! All other slots are ignored. +#' +#' @noRd +.mse_combine <- function(x) { + if (!all(vapply(x, inherits, NA, "MsExperiment"))) + stop("Only objects extending 'MsExperiment' accepted as input.") + ## check other slots + lapply(x, function(z) { + if (length(z@experimentFiles) || length(z@qdata) || length(z@otherData)) + stop("Slots 'experimentFiles', 'qdata' or 'otherData' are not ", + "empty! Can only combine objects for which these data slots ", + "are empty.", call. = FALSE) + }) + res <- x[[1L]] + res@sampleData <- do.call(MsCoreUtils::rbindFill, lapply(x, sampleData)) + res@spectra <- do.call(c, lapply(x, spectra)) + sl <- lapply(x, function(z) z@sampleDataLinks[["spectra"]]) + nsamp <- lengths(x) + nsamp <- c(0, cumsum(nsamp)[-length(nsamp)]) + nspec <- vapply(sl, nrow, NA_integer_) + nspec <- c(0, cumsum(nspec)[-length(nspec)]) + res@sampleDataLinks[["spectra"]] <- do.call( + rbind, mapply(function(z, i, j) { + z[, 1L] <- z[, 1L] + i + z[, 2L] <- z[, 2L] + j + z + }, sl, nsamp, nspec, SIMPLIFY = FALSE, USE.NAMES = FALSE)) + res +} diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index b0906bb8..bd605c7a 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -520,8 +520,9 @@ ## consider adding 0 or NA intensity for those. mat <- do.call(rbind, xsub) if (nrow(mat)) { + nr <- vapply(xsub, nrow, NA_integer_) ## can have 0, 1 or x values per rt; repeat rt accordingly - rts <- rep(rt[keep], vapply(xsub, nrow, integer(1L))) + rts <- rep(rt[keep], nr) maxi <- which.max(mat[, 2L])[1L] mmz <- do.call(mzCenterFun, list(mat[, 1L], mat[, 2L])) if (is.na(mmz)) mmz <- mat[maxi, 1L] @@ -530,15 +531,54 @@ sum(mat[, 2L], na.rm = TRUE) * ((rtr[2L] - rtr[1L]) / max(1L, (length(keep) - 1L))) ) - if ("beta_cor" %in% cn) + if ("beta_cor" %in% cn) { res[i, c("beta_cor", "beta_snr")] <- .get_beta_values( - mat[, 2L], rts) + vapply(xsub[nr > 0], function(z) sum(z[, "intensity"]), + NA_real_), + rt[keep][nr > 0]) + } } } } res[!is.na(res[, "maxo"]), , drop = FALSE] } + +#' Calculates quality metrics for a chromatographic peak. +#' +#' @param x `list` of peak matrices (from a single MS level and from a single +#' file/sample). +#' +#' @param rt retention time for each peak matrix. +#' +#' @param peakArea `matrix` defining the chrom peak area. +#' +#' @author Pablo Vangeenderhuysen +#' +#' @noRd +.chrom_peak_beta_metrics <- function(x, rt, peakArea, ...) { + res <- matrix(NA_real_, ncol = 2L, nrow = nrow(peakArea)) + rownames(res) <- rownames(peakArea) + colnames(res) <- c("beta_cor","beta_snr") + for (i in seq_len(nrow(res))) { + rtr <- peakArea[i, c("rtmin", "rtmax")] + keep <- which(between(rt, rtr)) + if (length(keep)) { + xsub <- lapply(x[keep], .pmat_filter_mz, + mzr = peakArea[i, c("mzmin", "mzmax")]) + nr <- vapply(xsub, nrow, NA_integer_) + res[i, c("beta_cor", "beta_snr")] <- .get_beta_values( + vapply(xsub[nr > 0], function(z) sum(z[, "intensity"]), NA_real_), + rt[keep][nr > 0]) + } + } + res +} + + + + + #' Difference to the original code is that the weighted mean is also calculated #' if some of the peak intensities in the profile matrix are 0 #' @@ -1169,6 +1209,70 @@ XcmsExperiment <- function() { as(MsExperiment(), "XcmsExperiment") } +#' Convert a XCMSnExp to a XcmsExperiment. +#' +#' @noRd +.xcms_n_exp_to_xcms_experiment <- function(from) { + requireNamespace("MsExperiment", quietly = TRUE) + ## Check requirements: + ## - an empty processing queue + if (length(from@spectraProcessingQueue)) + stop("Processing queue is not empty. Can only convert objects with ", + "an empty spectra processing queue.") + res <- readMsExperiment(spectraFiles = fileNames(from), + sampleData = MSnbase::pData(from)) + res <- as(res, "XcmsExperiment") + res@processHistory <- from@.processHistory + res <- filterSpectra( + res, filterRt, rt = range(rtime(from, adjusted = FALSE))) + if (hasAdjustedRtime(from)) { + rts <- rtime(from) + if (length(rts) != length(res@spectra)) + stop("Number of spectra don't match. Was the XCMSnExp subset?") + res@spectra$rtime_adjusted <- unname(rts) + } + if (hasChromPeaks(from)) { + res@chromPeaks <- chromPeaks(from) + res@chromPeakData <- as.data.frame(chromPeakData(from)) + } + if (hasFeatures(from)) + res@featureDefinitions <- as.data.frame(featureDefinitions(from)) + res +} + +#' Combine `XcmsExperiment` objects. Only combining of chrom peaks is supported. +#' Any alignment or correspondence results are removed. +#' +#' @param x `list` of `XcmsExperiment` objects. +#' +#' @noRd +.xmse_combine <- function(x) { + x <- lapply(x, function(z) { + if (!is(z, "XcmsExperiment")) + stop("Only 'XcmsExperiment' objects accepted.") + if (hasFeatures(z)) + z <- dropFeatureDefinitions(z) + if (hasAdjustedRtime(z)) + z <- dropAdjustedRtime(z) + z + }) + res <- .mse_combine(x) + nsamp <- lengths(x) + nsamp <- c(0, cumsum(nsamp)[-length(nsamp)]) + res@chromPeaks <- do.call(rbindFill, mapply(function(z, i) { + z <- chromPeaks(z) + z[, "sample"] <- z[, "sample"] + i + z + }, x, nsamp, SIMPLIFY = FALSE, USE.NAMES = FALSE)) + rownames(res@chromPeaks) <- .featureIDs(nrow(res@chromPeaks), "CP") + res@chromPeakData <- do.call( + rbindFill, + lapply(x, chromPeakData, return.type = "data.frame")) + rownames(res@chromPeakData) <- rownames(res@chromPeaks) + res@processHistory <- do.call(c, lapply(x, processHistory)) + res +} + #' function to convert an XcmsExperiment into an XCMSnExp. #' #' @author Johannes Rainer @@ -1191,7 +1295,7 @@ XcmsExperiment <- function() { " of the Spectra object is not empty.") ## -> OnDiskMSnExp n@processingData <- new("MSnProcess", - processing = paste0("Data converted [", date(), "]"), + processing = paste0("Data converted [", date(),"]"), files = fileNames(from), smoothed = NA) n@phenoData <- new("NAnnotatedDataFrame", as.data.frame(sampleData(from))) diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 487b868c..daffabec 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -36,7 +36,7 @@ #' be reported. This can be performed using the [fillChromPeaks()] function. #' #' -#' @section Subsetting and filtering: +#' @section Subset, filter and combine: #' #' - `[`: subset an `XcmsExperiment` by **sample** (parameter `i`). Subsetting #' will by default drop correspondence results (as subsetting by samples will @@ -47,6 +47,18 @@ #' `keepChromPeaks` (by default `TRUE`), `keepAdjustedRtime` (by default #' `FALSE`) and `keepFeatures` (by default `FALSE`). #' +#' - `c`: multiple `XcmsExperiment` objects can be combined into one using the +#' `c()` function. This requires however that all the `XcmsExperiments`' +#' `Spectra` objects use the same type of `MsBackend` and that their +#' processing queues are empty. Also, only combining of peak detection +#' results is supported. Any eventually present alignment or correspondence +#' results will be dropped before combining the `XcmsExperiment` objects. +#' Finally, at present, only the MS data of the individual `XcmsExperiment` +#' objects is combined and any data eventually present in the `@qdata`, +#' `@otherData` and `@experimentFiles` slots is ignored. +#' The function returns a `XcmsExperiment` objects with the combined MS data +#' (`Spectra` objects) and chromatographic peak detection results. +#' #' - `filterChromPeaks`: filter chromatographic peaks of an `XcmsExperiment` #' keeping only those specified with parameter `keep`. Returns the #' `XcmsExperiment` with the filtered data. Chromatographic peaks to @@ -112,9 +124,9 @@ #' Both parameters are expected to be numerical two-column matrices with #' the first column defining the lower and the second the upper margin. #' Each row can define a separate m/z - retention time region. Currently -#' the function returns a [MChromatograms()] object for `object` being a -#' `MsExperiment` or, for `object` being an `XcmsExperiment`, either a -#' `MChromatograms` or [XChromatograms()] depending on parameter +#' the function returns a [MSnbase::MChromatograms()] object for `object` +#' being a `MsExperiment` or, for `object` being an `XcmsExperiment`, +#' either a `MChromatograms` or [XChromatograms()] depending on parameter #' `return.type` (can be either `"MChromatograms"` or `"XChromatograms"`). #' For the latter also chromatographic peaks detected within the provided #' m/z and retention times are returned. Parameter `chromPeaks` allows @@ -266,9 +278,9 @@ #' defining how these parameters should be used to subset the returned #' `data.frame`. See parameter descriptions for details. #' -#' - `featureSpectra`: returns a [Spectra()] or `List` of `Spectra` with -#' (MS1 or MS2) spectra associated to each feature. See [featureSpectra()] -#' for more details and available parameters. +#' - `featureSpectra`: returns a [Spectra::Spectra()] or `List` of `Spectra` +#' with (MS1 or MS2) spectra associated to each feature. See +#' [featureSpectra()] for more details and available parameters. #' #' - `featuresSummary`: calculate a simple summary on features. See #' [featureSummary()] for details. @@ -314,12 +326,12 @@ #' chromatographic peaks assigned to the feature in the same sample. #' #' - `quantify`: extract the correspondence analysis results as a -#' [SummarizedExperiment()]. The feature *values* are used as `assay` in -#' the returned `SummarizedExperiment`, `rowData` contains the -#' `featureDefinitions` (without column `"peakidx"`) and `colData` the -#' `sampleData` of `object`. Additional parameters to the `featureValues` -#' function (that is used to extract the feature value matrix) can be -#' passed *via* `...`. +#' [SummarizedExperiment::SummarizedExperiment()]. The feature *values* +#' are used as `assay` in the returned `SummarizedExperiment`, `rowData` +#' contains the `featureDefinitions` (without column `"peakidx"`) and +#' `colData` the `sampleData` of `object`. Additional parameters to the +#' `featureValues` function (that is used to extract the feature value +#' matrix) can be passed *via* `...`. #' #' @section Visualization: #' @@ -348,7 +360,10 @@ #' - `uniqueMsLevels`: returns the unique MS levels of the spectra in `object`. #' #' The functions listed below ensure compatibility with the *older* -#' [XCMSnExp()] xcms result object. +#' [XCMSnExp()] xcms result object. Also, an `XcmsExperiment` can be coerced +#' to the *older* `XCMSnExp` class using `as(object, "XCMSnExp")` same as a +#' `XCMSnExp` class can be coerced to `XcmsExperiment` using +#' `as(object, "XcmsExperiment")`. #' #' - `fileNames`: returns the original data file names for the spectra data. #' Ideally, the `dataOrigin` or `dataStorage` spectra variables from the @@ -392,7 +407,8 @@ #' chromatogram (BPC). #' #' @param BPPARAM For `chromatogram`: parallel processing setup. Defaults -#' to `BPPARAM = bpparam()`. See [bpparam()] for more information. +#' to `BPPARAM = bpparam()`. See [BiocParallel::bpparam()] for more +#' information. #' #' @param chromPeaks For `chromatogram`: `character(1)` defining which #' chromatographic peaks should be returned. Can be either @@ -765,6 +781,14 @@ setMethod("show", "XcmsExperiment", function(object) { "\n") }) +#' @rdname XcmsExperiment +c.XcmsExperiment <- function(...) { + l <- list(...) + if (length(l) == 1L) + return(l[[1L]]) + .xmse_combine(l) +} + ################################################################################ ## Filtering and subsetting ################################################################################ @@ -2024,3 +2048,39 @@ setMethod( object[i = sort(unique(file)), keepAdjustedRtime = keepAdjustedRtime, keepFeatures = keepFeatures, ...] }) + +#' @rdname chromPeakSummary +setMethod( + "chromPeakSummary", + signature(object = "XcmsExperiment", param = "BetaDistributionParam"), + function(object, param, msLevel = 1L, chunkSize = 2L, BPPARAM = bpparam()) { + if (length(msLevel) != 1) + stop("Can only perform peak metrics for one MS level at a time.") + if (!hasChromPeaks(object, msLevel = msLevel)) + stop("No ChromPeaks definitions for MS level ", msLevel, " present.") + ## Define region to calculate metrics from for each file + cp <- chromPeaks(object, msLevel = msLevel) + f <- factor(cp[,"sample"], seq_along(object)) + pal <- split.data.frame(cp[, c("mzmin", "mzmax", "rtmin", "rtmax")], f) + names(pal) <- seq_along(pal) + ## Manual chunk processi ng because we have to split `object` and `pal` + idx <- seq_along(object) + chunks <- split(idx, ceiling(idx / chunkSize)) + pb <- progress_bar$new(format = paste0("[:bar] :current/:", + "total (:percent) in ", + ":elapsed"), + total = length(chunks) + 1L, clear = FALSE) + pb$tick(0) + # mzf <- "wMean" + res <- lapply(chunks, function(z, ...) { + pb$tick() + .xmse_integrate_chrom_peaks( + .subset_xcms_experiment(object, i = z, keepAdjustedRtime = TRUE, + ignoreHistory = TRUE), + pal = pal[z], intFun = .chrom_peak_beta_metrics, + msLevel = msLevel, BPPARAM = BPPARAM) + }) + res <- do.call(rbind, res) + pb$tick() + res + }) diff --git a/R/do_findChromPeaks-functions.R b/R/do_findChromPeaks-functions.R index 9767c8f0..11b59e9b 100644 --- a/R/do_findChromPeaks-functions.R +++ b/R/do_findChromPeaks-functions.R @@ -3720,14 +3720,20 @@ peaksWithCentWave <- function(int, rt, #' requires at least 5 scans or it will return NA for both parameters. #' #' @param intensity A numeric vector corresponding to the peak intensities +#' #' @param rtime A numeric vector corresponding to the retention times of each -#' intensity. If not provided, intensities will be assumed to be equally spaced. +#' intensity. Retention times are expected to be in increasing order +#' without duplicates. If not provided, intensities will be assumed to be +#' equally spaced. +#' #' @param skews A numeric vector of the skews to try, corresponding to the -#' shape1 of dbeta with a shape2 of 5. Values less than 5 will be increasingly -#' right-skewed, while values greater than 5 will be left-skewed. +#' shape1 of dbeta with a shape2 of 5. Values less than 5 will be +#' increasingly right-skewed, while values greater than 5 will be +#' left-skewed. +#' #' @param zero.rm Boolean value controlling whether "missing" scans are dropped -#' prior to curve fitting. The default, TRUE, will remove intensities of zero -#' or NA +#' prior to curve fitting. The default, TRUE, will remove intensities of +#' zero or NA #' #' @author William Kumler #' @@ -3740,21 +3746,22 @@ peaksWithCentWave <- function(int, rt, rtime <- rtime[keep] intensity <- intensity[keep] } - if(length(intensity)<5){ + if (length(intensity) < 5) { best_cor <- NA beta_snr <- NA } else { - beta_sequence <- rep(.scale_zero_one(rtime), each=length(skews)) + beta_sequence <- rep(.scale_zero_one(rtime), each = length(skews)) beta_vals <- t(matrix(dbeta(beta_sequence, shape1 = skews, shape2 = 5), nrow = length(skews))) # matplot(beta_vals) beta_cors <- cor(intensity, beta_vals) best_cor <- max(beta_cors) best_curve <- beta_vals[, which.max(beta_cors)] - noise_level <- sd(diff(.scale_zero_one(best_curve)-.scale_zero_one(intensity))) - beta_snr <- log10(max(intensity)/noise_level) + noise_level <- sd(diff(.scale_zero_one(best_curve) - + .scale_zero_one(intensity))) + beta_snr <- log10(max(intensity) / noise_level) } - c(best_cor=best_cor, beta_snr=beta_snr) + c(best_cor = best_cor, beta_snr = beta_snr) } @@ -3769,5 +3776,5 @@ peaksWithCentWave <- function(int, rt, #' #' @noRd .scale_zero_one <- function(num_vec){ - (num_vec-min(num_vec))/(max(num_vec)-min(num_vec)) + (num_vec-min(num_vec)) / (max(num_vec) - min(num_vec)) } diff --git a/R/functions-Params.R b/R/functions-Params.R index d790162c..b79a57b3 100644 --- a/R/functions-Params.R +++ b/R/functions-Params.R @@ -397,3 +397,8 @@ FilterIntensityParam <- function(threshold = 0, nValues = 1L, value = "maxo") { new("FilterIntensityParam", threshold = as.numeric(threshold), nValues = as.integer(nValues), value = value) } + +#' @rdname chromPeakSummary +BetaDistributionParam <- function() { + new("BetaDistributionParam") +} diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index 2bd44572..e2baa6d8 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -279,6 +279,7 @@ dropGenericProcessHistory <- function(x, fun) { valsPerSpect = valsPerSpect, rtrange = rtr, mzrange = mzr) if (length(mtx)) { + ## mtx: time, mz, intensity if (any(!is.na(mtx[, 3]))) { ## How to calculate the area: (1)sum of all intensities / (2)by ## the number of data points (REAL ones, considering also NAs) @@ -290,21 +291,20 @@ dropGenericProcessHistory <- function(x, fun) { ## as e.g. centWave. Using max(1, ... to avoid getting Inf in ## case the signal is based on a single data point. ## (3) rtr[2] - rtr[1] - res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) * + res[i, "into"] <- sum(mtx[, 3L], na.rm = TRUE) * ((rtr[2] - rtr[1]) / max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1))) - maxi <- which.max(mtx[, 3]) + maxi <- which.max(mtx[, 3L]) res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)] res[i, c("rtmin", "rtmax")] <- rtr ## Calculate the intensity weighted mean mz meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3])) if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2] res[i, "mz"] <- meanMz - - if ("beta_cor" %in% cn) { + if ("beta_cor" %in% cn) res[i, c("beta_cor", "beta_snr")] <- .get_beta_values( - mtx[, 3L], mtx[, 1L]) - } + vapply(split(mtx[, 3L], mtx[, 1L]), sum, NA_real_), + unique(mtx[, 1L])) } else { res[i, ] <- rep(NA_real_, ncols) } diff --git a/R/functions-XChromatogram.R b/R/functions-XChromatogram.R index b17f591d..bc5a4bc0 100644 --- a/R/functions-XChromatogram.R +++ b/R/functions-XChromatogram.R @@ -44,12 +44,12 @@ #' #' The `XChromatogram` object allows to store chromatographic data (e.g. #' an extracted ion chromatogram) along with identified chromatographic peaks -#' within that data. The object inherits all functions from the [Chromatogram()] -#' object in the `MSnbase` package. +#' within that data. The object inherits all functions from the +#' [MSnbase::Chromatogram()] object in the `MSnbase` package. #' #' Multiple `XChromatogram` objects can be stored in a `XChromatograms` object. -#' This class extends [MChromatograms()] from the `MSnbase` package and allows -#' thus to arrange chromatograms in a matrix-like structure, columns +#' This class extends [MSnbase::MChromatograms()] from the `MSnbase` package +#' and allows thus to arrange chromatograms in a matrix-like structure, columns #' representing samples and rows m/z-retention time ranges. #' #' All functions are described (grouped into topic-related sections) after the @@ -59,13 +59,13 @@ #' #' Objects can be created with the contructor function `XChromatogram` and #' `XChromatograms`, respectively. Also, they can be coerced from -#' [Chromatogram] or [MChromatograms()] objects using +#' [Chromatogram] or [MSnbase::MChromatograms()] objects using #' `as(object, "XChromatogram")` or `as(object, "XChromatograms")`. #' #' @section Filtering and subsetting: #' #' Besides classical subsetting with `[` specific filter operations on -#' [MChromatograms()] and `XChromatograms` objects are available. See +#' [MSnbase::MChromatograms()] and `XChromatograms` objects are available. See #' [filterColumnsIntensityAbove()] for more details. #' #' @param rtime For `XChromatogram`: `numeric` with the retention times diff --git a/R/functions-xcmsSet.R b/R/functions-xcmsSet.R index 373ee20f..09159555 100644 --- a/R/functions-xcmsSet.R +++ b/R/functions-xcmsSet.R @@ -217,7 +217,6 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, } ############################################################ -## c c.xcmsSet <- function(...) { lcsets <- list(...) object <- new("xcmsSet") diff --git a/R/methods-Chromatogram.R b/R/methods-Chromatogram.R index cdff7f10..07f7ff62 100644 --- a/R/methods-Chromatogram.R +++ b/R/methods-Chromatogram.R @@ -24,15 +24,17 @@ #' #' @param BPPARAM a parameter class specifying if and how parallel processing #' should be performed (only for `XChromatograms` objects). It defaults to -#' `bpparam()`. See [bpparam()] for more information. +#' `bpparam()`. See [BiocParallel::bpparam()] for more information. #' #' @param ... currently ignored. #' #' @return #' #' If called on a `Chromatogram` object, the method returns an [XChromatogram] -#' object with the identified peaks. See [peaksWithCentWave()] for details on -#' the peak matrix content. +#' object with the identified peaks. Columns `"mz"`, `"mzmin"` and `"mzmax"` in +#' the `chromPeaks()` peak matrix provide the mean m/z and the maximum and +#' minimum m/z value of the `Chromatogram` object. See [peaksWithCentWave()] +#' for details on the remaining columns. #' #' @seealso [peaksWithCentWave()] for the downstream function and [centWave] #' for details on the method. @@ -70,10 +72,18 @@ setMethod("findChromPeaks", signature(object = "Chromatogram", rt = rtime(object)), as(param, "list"))) object <- as(object, "XChromatogram") - chromPeaks(object) <- res + chromPeaks(object) <- .add_mz(res, object@mz) object }) +.add_mz <- function(x, mz = c(NA_real_, NA_real_)) { + nx <- nrow(x) + tmp <- cbind(mz = rep(mean(mz), nx), + mzmin = rep(mz[1L], nx), + mzmax = rep(mz[2L], nx)) + cbind(tmp, x) +} + #' @title matchedFilter-based peak detection in purely chromatographic data #' #' @description @@ -97,8 +107,10 @@ setMethod("findChromPeaks", signature(object = "Chromatogram", #' @return #' #' If called on a `Chromatogram` object, the method returns a `matrix` with -#' the identified peaks. See [peaksWithMatchedFilter()] for details on the -#' matrix content. +#' the identified peaks. Columns `"mz"`, `"mzmin"` and `"mzmax"` in +#' the `chromPeaks()` peak matrix provide the mean m/z and the maximum and +#' minimum m/z value of the `Chromatogram` object. See +#' [peaksWithMatchedFilter()] for details on the remaining columns. #' #' @seealso [peaksWithMatchedFilter()] for the downstream function and #' [matchedFilter] for details on the method. @@ -134,7 +146,7 @@ setMethod("findChromPeaks", signature(object = "Chromatogram", rt = rtime(object)), as(param, "list"))) object <- as(object, "XChromatogram") - chromPeaks(object) <- res + chromPeaks(object) <- .add_mz(res, object@mz) object }) @@ -155,24 +167,24 @@ setMethod("findChromPeaks", signature(object = "Chromatogram", #' chromatogram. See help on `alignRt` in [MSnbase::Chromatogram()] for more #' details. #' -#' If `correlate` is called on a single [MChromatograms()] object a pairwise -#' correlation of each chromatogram with each other is performed and a `matrix` -#' with the correlation coefficients is returned. +#' If `correlate` is called on a single [MSnbase::MChromatograms()] object a +#' pairwise correlation of each chromatogram with each other is performed and +#' a `matrix` with the correlation coefficients is returned. #' #' Note that the correlation of two chromatograms depends also on their order, #' e.g. `correlate(chr1, chr2)` might not be identical to #' `correlate(chr2, chr1)`. The lower and upper triangular part of the #' correlation matrix might thus be different. #' -#' @param x [Chromatogram()] or [MChromatograms()] object. +#' @param x [MSnbase::Chromatogram()] or [MSnbase::MChromatograms()] object. #' -#' @param y [Chromatogram()] or [MChromatograms()] object. +#' @param y [MSnbase::Chromatogram()] or [MSnbase::MChromatograms()] object. #' #' @param use `character(1)` passed to the `cor` function. See [cor()] for #' details. #' -#' @param method `character(1)` passed to the `cor` function. See [cor()] for -#' details. +#' @param method `character(1)` passed to the `cor` function. See +#' [stats::cor()] for details. #' #' @param align `character(1)` defining the alignment method to be used. See #' help on `alignRt` in [MSnbase::Chromatogram()] for details. The value of @@ -232,7 +244,7 @@ setMethod("correlate", signature = c(x = "Chromatogram", y = "Chromatogram"), #' matching certain conditions (depending on parameter `which`). The #' intensities are actually not *removed* but replaced with `NA_real_`. To #' actually **remove** the intensities (and the associated retention times) -#' use [clean()] afterwards. +#' use [MSnbase::clean()] afterwards. #' #' Parameter `which` allows to specify which intensities should be replaced by #' `NA_real_`. By default (`which = "below_threshod"` intensities below @@ -246,8 +258,8 @@ setMethod("correlate", signature = c(x = "Chromatogram", y = "Chromatogram"), #' chromatographic data. #' #' @param object an object representing chromatographic data. Can be a -#' [Chromatogram()], [MChromatograms()], [XChromatogram()] or -#' [XChromatograms()] object. +#' [MSnbase::Chromatogram()], [MSnbase::MChromatograms()], +#' [XChromatogram()] or [XChromatograms()] object. #' #' @param which `character(1)` defining the condition to remove intensities. #' See description for details and options. diff --git a/R/methods-MChromatograms.R b/R/methods-MChromatograms.R index 78bd598f..8599deb1 100644 --- a/R/methods-MChromatograms.R +++ b/R/methods-MChromatograms.R @@ -114,10 +114,10 @@ setMethod("removeIntensity", "MChromatograms", #' #' @description #' -#' These functions allow to filter (subset) [MChromatograms()] or +#' These functions allow to filter (subset) [MSnbase::MChromatograms()] or #' [XChromatograms()] objects, i.e. sets of chromatographic data, without #' changing the data (intensity and retention times) within the individual -#' chromatograms ([Chromatogram()] objects). +#' chromatograms ([MSnbase::Chromatogram()] objects). #' #' - `filterColumnsIntensityAbove`: subsets a `MChromatograms` objects keeping #' only columns (samples) for which `value` is larger than the provided @@ -156,7 +156,7 @@ setMethod("removeIntensity", "MChromatograms", #' columns that should be returned. `n` will be rounded to the closest #' (larger) integer value. #' -#' @param object [MChromatograms()] or [XChromatograms()] object. +#' @param object [MSnbase::MChromatograms()] or [XChromatograms()] object. #' #' @param sortBy for `filterColumnsKeepTop`: the value by which columns should #' be ordered to determine the top n columns. Can be either `sortBy = "bpi"` @@ -291,9 +291,10 @@ setMethod("filterColumnsKeepTop", "MChromatograms", #' `plotOverlay` draws chromatographic peak data from multiple (different) #' extracted ion chromatograms (EICs) into the same plot. This allows to #' directly compare the peak shape of these EICs in the same sample. In -#' contrast to the `plot` function for [MChromatograms()] object, which draws -#' the data from the same EIC across multiple samples in the same plot, this -#' function draws the different EICs from the same sample into the same plot. +#' contrast to the `plot` function for [MSnbase::MChromatograms()] object, +#' which draws the data from the same EIC across multiple samples in the +#' same plot, this function draws the different EICs from the same sample +#' into the same plot. #' #' If `plotChromatogramsOverlay` is called on a `XChromatograms` object any #' present chromatographic peaks will also be highlighted/drawn depending on the @@ -307,7 +308,7 @@ setMethod("filterColumnsKeepTop", "MChromatograms", #' @param main optional title of the plot. If not defined, the range of m/z #' values is used. #' -#' @param object [MChromatograms()] or [XChromatograms()] object. +#' @param object [MSnbase::MChromatograms()] or [XChromatograms()] object. #' #' @param peakBg if `object` is a `XChromatograms` object: definition of #' background color(s) for each chromatographic peak. Has to be either of diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index ae8a0a71..a276e4fb 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -1445,6 +1445,12 @@ setAs(from = "XcmsExperiment", to = "xcmsSet", def = .XCMSnExp2xcmsSet) setAs(from = "XcmsExperiment", to = "XCMSnExp", def = .xcms_experiment_to_xcms_n_exp) +#' @rdname XcmsExperiment +#' +#' @name XcmsExperiment +setAs(from = "XCMSnExp", to = "XcmsExperiment", + def = .xcms_n_exp_to_xcms_experiment) + #' @rdname XCMSnExp-peak-grouping-results setMethod("quantify", "XCMSnExp", function(object, ...) { .XCMSnExp2SummarizedExperiment(object, ...) @@ -3059,7 +3065,7 @@ setMethod("groupnames", "XCMSnExp", function(object, mzdec = 0, rtdec = 0, #' See documentation of the `software_processing` parameter of #' [mzR::writeMSData()]. #' -#' @param ... Additional parameters to pass down to the [writeMSData()] +#' @param ... Additional parameters to pass down to the [MSnbase::writeMSData()] #' function in the `MSnbase` package, such as `outformat` to specify the #' output format (`"mzml"` or `"mzxml"`) or `copy` to specify whether #' general information from the original MS data files (such as data @@ -3069,7 +3075,7 @@ setMethod("groupnames", "XCMSnExp", function(object, mzdec = 0, rtdec = 0, #' #' @md #' -#' @seealso [writeMSData()] function in the `MSnbase` package. +#' @seealso [MSnbase::writeMSData()] function in the `MSnbase` package. setMethod("writeMSData", signature(object = "XCMSnExp", file = "character"), function(object, file, outformat = c("mzml", "mzxml"), copy = FALSE, software_processing = NULL, ...) { diff --git a/R/methods-XChromatogram.R b/R/methods-XChromatogram.R index c7a24c1c..d0b24589 100644 --- a/R/methods-XChromatogram.R +++ b/R/methods-XChromatogram.R @@ -28,18 +28,21 @@ setMethod("show", "XChromatogram", function(object) { #' retention time range for which peaks should be returned along with #' parameter `type` that defines how *overlapping* is defined (parameter #' description for details). For `XChromatogram` objects the function returns -#' a `matrix` with columns `"rt"` (retention time of the peak apex), -#' `"rtmin"` (the lower peak boundary), `"rtmax"` (the upper peak boundary), -#' `"into"` (the ingegrated peak signal/area of the peak), `"maxo"` (the -#' maximum instensity of the peak and `"sn"` (the signal to noise ratio). +#' a `matrix` with columns `"mz"` (mean m/z value), `"mzmin"` (minimal m/z +#' value) and `"mzmax"` (maximal m/z value), `"rt"` (retention time of the +#' peak apex), `"rtmin"` (the lower peak boundary in retention time +#' dimension), `"rtmax"` (the upper peak boundary in retention time +#' dimension), `"into"` (the integrated peak signal/area of the peak), +#' `"maxo"` (the maximum instensity of the peak and `"sn"` (the signal to +#' noise ratio). #' Note that, depending on the peak detection algorithm, the matrix may #' contain additional columns. #' For `XChromatograms` objects the `matrix` contains also columns `"row"` #' and `"column"` specifying in which chromatogram of `object` the peak was #' identified. Chromatographic peaks are ordered by row. #' -#' - `chromPeakData`, `chromPeakData<-`: extract or set the [DataFrame()] with -#' optional chromatographic peak annotations. +#' - `chromPeakData`, `chromPeakData<-`: extract or set the +#' [S4Vectors::DataFrame()] with optional chromatographic peak annotations. #' #' - `hasChromPeaks`: infer whether a `XChromatogram` (or `XChromatograms`) #' has chromatographic peaks. For `XChromatogram`: returns a `logical(1)`, @@ -174,7 +177,7 @@ setMethod("show", "XChromatogram", function(object) { #' @seealso #' #' [findChromPeaks-centWave][findChromPeaks-Chromatogram-CentWaveParam] for peak -#' detection on [MChromatograms()] objects. +#' detection on [MSnbase::MChromatograms()] objects. #' #' @examples #' diff --git a/man/XChromatogram.Rd b/man/XChromatogram.Rd index 8b233081..ff5e3d36 100644 --- a/man/XChromatogram.Rd +++ b/man/XChromatogram.Rd @@ -382,12 +382,12 @@ See help of the individual functions. \description{ The \code{XChromatogram} object allows to store chromatographic data (e.g. an extracted ion chromatogram) along with identified chromatographic peaks -within that data. The object inherits all functions from the \code{\link[=Chromatogram]{Chromatogram()}} -object in the \code{MSnbase} package. +within that data. The object inherits all functions from the +\code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}} object in the \code{MSnbase} package. Multiple \code{XChromatogram} objects can be stored in a \code{XChromatograms} object. -This class extends \code{\link[=MChromatograms]{MChromatograms()}} from the \code{MSnbase} package and allows -thus to arrange chromatograms in a matrix-like structure, columns +This class extends \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} from the \code{MSnbase} package +and allows thus to arrange chromatograms in a matrix-like structure, columns representing samples and rows m/z-retention time ranges. All functions are described (grouped into topic-related sections) after the @@ -407,7 +407,7 @@ used to extract the chromatogram was larger than the peak's m/z. Objects can be created with the contructor function \code{XChromatogram} and \code{XChromatograms}, respectively. Also, they can be coerced from -\link{Chromatogram} or \code{\link[=MChromatograms]{MChromatograms()}} objects using +\link{Chromatogram} or \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} objects using \code{as(object, "XChromatogram")} or \code{as(object, "XChromatograms")}. } @@ -415,7 +415,7 @@ Objects can be created with the contructor function \code{XChromatogram} and Besides classical subsetting with \code{[} specific filter operations on -\code{\link[=MChromatograms]{MChromatograms()}} and \code{XChromatograms} objects are available. See +\code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} and \code{XChromatograms} objects are available. See \code{\link[=filterColumnsIntensityAbove]{filterColumnsIntensityAbove()}} for more details. @@ -451,17 +451,20 @@ chromatographic peak definitions. Parameter \code{rt} allows to specify a retention time range for which peaks should be returned along with parameter \code{type} that defines how \emph{overlapping} is defined (parameter description for details). For \code{XChromatogram} objects the function returns -a \code{matrix} with columns \code{"rt"} (retention time of the peak apex), -\code{"rtmin"} (the lower peak boundary), \code{"rtmax"} (the upper peak boundary), -\code{"into"} (the ingegrated peak signal/area of the peak), \code{"maxo"} (the -maximum instensity of the peak and \code{"sn"} (the signal to noise ratio). +a \code{matrix} with columns \code{"mz"} (mean m/z value), \code{"mzmin"} (minimal m/z +value) and \code{"mzmax"} (maximal m/z value), \code{"rt"} (retention time of the +peak apex), \code{"rtmin"} (the lower peak boundary in retention time +dimension), \code{"rtmax"} (the upper peak boundary in retention time +dimension), \code{"into"} (the integrated peak signal/area of the peak), +\code{"maxo"} (the maximum instensity of the peak and \code{"sn"} (the signal to +noise ratio). Note that, depending on the peak detection algorithm, the matrix may contain additional columns. For \code{XChromatograms} objects the \code{matrix} contains also columns \code{"row"} and \code{"column"} specifying in which chromatogram of \code{object} the peak was identified. Chromatographic peaks are ordered by row. -\item \code{chromPeakData}, \verb{chromPeakData<-}: extract or set the \code{\link[=DataFrame]{DataFrame()}} with -optional chromatographic peak annotations. +\item \code{chromPeakData}, \verb{chromPeakData<-}: extract or set the +\code{\link[S4Vectors:DataFrame-class]{S4Vectors::DataFrame()}} with optional chromatographic peak annotations. \item \code{hasChromPeaks}: infer whether a \code{XChromatogram} (or \code{XChromatograms}) has chromatographic peaks. For \code{XChromatogram}: returns a \code{logical(1)}, for \code{XChromatograms}: returns a \code{matrix}, same dimensions than \code{object} @@ -742,7 +745,7 @@ plot(xchr_sub) } \seealso{ \link[=findChromPeaks-Chromatogram-CentWaveParam]{findChromPeaks-centWave} for peak -detection on \code{\link[=MChromatograms]{MChromatograms()}} objects. +detection on \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} objects. } \author{ Johannes Rainer diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd index 9bfc0437..910eaef5 100644 --- a/man/XcmsExperiment.Rd +++ b/man/XcmsExperiment.Rd @@ -22,6 +22,7 @@ \alias{XcmsExperiment-class} \alias{show,XcmsExperiment-method} \alias{filterChromPeaks} +\alias{c.XcmsExperiment} \alias{[,XcmsExperiment,ANY,ANY,ANY-method} \alias{filterIsolationWindow,XcmsExperiment-method} \alias{filterRt,XcmsExperiment-method} @@ -99,6 +100,8 @@ featureArea( \S4method{plot}{MsExperiment,missing}(x, y, msLevel = 1L, peakCol = "#ff000060", ...) +\method{c}{XcmsExperiment}(...) + \S4method{[}{XcmsExperiment,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) \S4method{filterIsolationWindow}{XcmsExperiment}(object, mz = numeric()) @@ -270,7 +273,8 @@ type of the returned object. Currently only \code{return.type = "MChromatograms"} is supported.} \item{BPPARAM}{For \code{chromatogram}: parallel processing setup. Defaults -to \code{BPPARAM = bpparam()}. See \code{\link[=bpparam]{bpparam()}} for more information.} +to \code{BPPARAM = bpparam()}. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for more +information.} \item{mzmin}{For \code{featureArea}: function to calculate the \code{"mzmin"} of a feature based on the \code{"mzmin"} values of the individual @@ -426,7 +430,7 @@ no chromatographic peak was identified and hence a missing value would be reported. This can be performed using the \code{\link[=fillChromPeaks]{fillChromPeaks()}} function. } } -\section{Subsetting and filtering}{ +\section{Subset, filter and combine}{ \itemize{ \item \code{[}: subset an \code{XcmsExperiment} by \strong{sample} (parameter \code{i}). Subsetting @@ -437,6 +441,17 @@ samples) will be retained. Which preprocessing results should be kept or dropped can also be configured with optional parameters \code{keepChromPeaks} (by default \code{TRUE}), \code{keepAdjustedRtime} (by default \code{FALSE}) and \code{keepFeatures} (by default \code{FALSE}). +\item \code{c}: multiple \code{XcmsExperiment} objects can be combined into one using the +\code{c()} function. This requires however that all the \code{XcmsExperiments}' +\code{Spectra} objects use the same type of \code{MsBackend} and that their +processing queues are empty. Also, only combining of peak detection +results is supported. Any eventually present alignment or correspondence +results will be dropped before combining the \code{XcmsExperiment} objects. +Finally, at present, only the MS data of the individual \code{XcmsExperiment} +objects is combined and any data eventually present in the \verb{@qdata}, +\verb{@otherData} and \verb{@experimentFiles} slots is ignored. +The function returns a \code{XcmsExperiment} objects with the combined MS data +(\code{Spectra} objects) and chromatographic peak detection results. \item \code{filterChromPeaks}: filter chromatographic peaks of an \code{XcmsExperiment} keeping only those specified with parameter \code{keep}. Returns the \code{XcmsExperiment} with the filtered data. Chromatographic peaks to @@ -499,9 +514,9 @@ extract the data from (to e.g. for extracted ion chromatograms EICs). Both parameters are expected to be numerical two-column matrices with the first column defining the lower and the second the upper margin. Each row can define a separate m/z - retention time region. Currently -the function returns a \code{\link[=MChromatograms]{MChromatograms()}} object for \code{object} being a -\code{MsExperiment} or, for \code{object} being an \code{XcmsExperiment}, either a -\code{MChromatograms} or \code{\link[=XChromatograms]{XChromatograms()}} depending on parameter +the function returns a \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} object for \code{object} +being a \code{MsExperiment} or, for \code{object} being an \code{XcmsExperiment}, +either a \code{MChromatograms} or \code{\link[=XChromatograms]{XChromatograms()}} depending on parameter \code{return.type} (can be either \code{"MChromatograms"} or \code{"XChromatograms"}). For the latter also chromatographic peaks detected within the provided m/z and retention times are returned. Parameter \code{chromPeaks} allows @@ -636,9 +651,9 @@ Parameters \code{msLevel}, \code{mz}, \code{ppm} and \code{rt} allow to define s feature definitions that should be returned with the parameter \code{type} defining how these parameters should be used to subset the returned \code{data.frame}. See parameter descriptions for details. -\item \code{featureSpectra}: returns a \code{\link[=Spectra]{Spectra()}} or \code{List} of \code{Spectra} with -(MS1 or MS2) spectra associated to each feature. See \code{\link[=featureSpectra]{featureSpectra()}} -for more details and available parameters. +\item \code{featureSpectra}: returns a \code{\link[Spectra:Spectra]{Spectra::Spectra()}} or \code{List} of \code{Spectra} +with (MS1 or MS2) spectra associated to each feature. See +\code{\link[=featureSpectra]{featureSpectra()}} for more details and available parameters. \item \code{featuresSummary}: calculate a simple summary on features. See \code{\link[=featureSummary]{featureSummary()}} for details. \item \code{groupChromPeaks}: performs the correspondence analysis (i.e., grouping @@ -680,12 +695,12 @@ to define the column in \code{chromPeaks} that should be selected; defaults to \verb{intensity = "into"). }method = "sum"`: sum the values for all chromatographic peaks assigned to the feature in the same sample. \item \code{quantify}: extract the correspondence analysis results as a -\code{\link[=SummarizedExperiment]{SummarizedExperiment()}}. The feature \emph{values} are used as \code{assay} in -the returned \code{SummarizedExperiment}, \code{rowData} contains the -\code{featureDefinitions} (without column \code{"peakidx"}) and \code{colData} the -\code{sampleData} of \code{object}. Additional parameters to the \code{featureValues} -function (that is used to extract the feature value matrix) can be -passed \emph{via} \code{...}. +\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment::SummarizedExperiment()}}. The feature \emph{values} +are used as \code{assay} in the returned \code{SummarizedExperiment}, \code{rowData} +contains the \code{featureDefinitions} (without column \code{"peakidx"}) and +\code{colData} the \code{sampleData} of \code{object}. Additional parameters to the +\code{featureValues} function (that is used to extract the feature value +matrix) can be passed \emph{via} \code{...}. } } @@ -718,7 +733,10 @@ sample in the RT-m/z space. See \code{\link[=plotChromPeaks]{plotChromPeaks()}} } The functions listed below ensure compatibility with the \emph{older} -\code{\link[=XCMSnExp]{XCMSnExp()}} xcms result object. +\code{\link[=XCMSnExp]{XCMSnExp()}} xcms result object. Also, an \code{XcmsExperiment} can be coerced +to the \emph{older} \code{XCMSnExp} class using \code{as(object, "XCMSnExp")} same as a +\code{XCMSnExp} class can be coerced to \code{XcmsExperiment} using +\code{as(object, "XcmsExperiment")}. \itemize{ \item \code{fileNames}: returns the original data file names for the spectra data. Ideally, the \code{dataOrigin} or \code{dataStorage} spectra variables from the diff --git a/man/adjustRtime.Rd b/man/adjustRtime.Rd index f618bfe2..584efe64 100644 --- a/man/adjustRtime.Rd +++ b/man/adjustRtime.Rd @@ -210,8 +210,8 @@ adjustRtimePeakGroups(object, param = PeakGroupsParam(), msLevel = 1L) \S4method{adjustRtime}{XCMSnExp,ObiwarpParam}(object, param, msLevel = 1L) } \arguments{ -\item{object}{For \code{adjustRtime}: an \code{\link[=OnDiskMSnExp]{OnDiskMSnExp()}}, \code{\link[=XCMSnExp]{XCMSnExp()}}, -\code{\link[=MsExperiment]{MsExperiment()}} or \code{\link[=XcmsExperiment]{XcmsExperiment()}} object.} +\item{object}{For \code{adjustRtime}: an \code{\link[MSnbase:OnDiskMSnExp-class]{MSnbase::OnDiskMSnExp()}}, \code{\link[=XCMSnExp]{XCMSnExp()}}, +\code{\link[MsExperiment:MsExperiment]{MsExperiment::MsExperiment()}} or \code{\link[=XcmsExperiment]{XcmsExperiment()}} object.} \item{param}{The parameter object defining the alignment method (and its setting).} @@ -232,7 +232,7 @@ the total number of samples in an experiment will load the full MS data into memory and will thus in most settings cause an out-of-memory error.} \item{BPPARAM}{parallel processing setup. Defaults to \code{BPPARAM = bpparam()}. -See \code{\link[=bpparam]{bpparam()}} for details.} +See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for details.} \item{msLevel}{For \code{adjustRtime}: \code{integer(1)} defining the MS level on which the alignment should be performed.} @@ -262,11 +262,11 @@ Can be either \code{"loess"} or \code{"linear"}.} \item{span}{For \code{PeakGroupsParam}: \code{numeric(1)} defining the degree of smoothing (if \code{smooth = "loess"}). This parameter is -passed to the internal call to \code{\link[=loess]{loess()}}.} +passed to the internal call to \code{\link[stats:loess]{stats::loess()}}.} \item{family}{For \code{PeakGroupsParam}: \code{character(1)} defining the method for loess smoothing. Allowed values are \code{"gaussian"} and \code{"symmetric"}. See -\code{\link[=loess]{loess()}} for more information.} +\code{\link[stats:loess]{stats::loess()}} for more information.} \item{peakGroupsMatrix}{For \code{PeakGroupsParam}: optional \code{matrix} of (raw) retention times for the (marker) peak groups on which the alignment diff --git a/man/chromPeakSpectra.Rd b/man/chromPeakSpectra.Rd index 8899c15a..f60757f1 100644 --- a/man/chromPeakSpectra.Rd +++ b/man/chromPeakSpectra.Rd @@ -74,7 +74,8 @@ with a prefix \code{"chrom_peak_"}. Defaults to \code{c("mz", "rt")}.} \item{return.type}{\code{character(1)} defining the type of result object that should be returned.} -\item{BPPARAM}{parallel processing setup. Defaults to \code{\link[=bpparam]{bpparam()}}.} +\item{BPPARAM}{parallel processing setup. Defaults to +\code{\link[BiocParallel:register]{BiocParallel::bpparam()}}.} } \value{ parameter \code{return.type} allow to specify the type of the returned object: diff --git a/man/chromPeakSummary.Rd b/man/chromPeakSummary.Rd new file mode 100644 index 00000000..0aed9655 --- /dev/null +++ b/man/chromPeakSummary.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/XcmsExperiment.R, +% R/functions-Params.R +\name{chromPeakSummary} +\alias{chromPeakSummary} +\alias{chromPeakSummary,XcmsExperiment,BetaDistributionParam-method} +\alias{BetaDistributionParam} +\title{Chromatographic peak summaries} +\usage{ +chromPeakSummary(object, param, ...) + +\S4method{chromPeakSummary}{XcmsExperiment,BetaDistributionParam}( + object, + param, + msLevel = 1L, + chunkSize = 2L, + BPPARAM = bpparam() +) + +BetaDistributionParam() +} +\arguments{ +\item{object}{an \emph{xcms} result object containing information on +identified chromatographic peaks.} + +\item{param}{a parameter object defining the method/summaries that should +be calculated (see description above for supported parameter classes).} + +\item{...}{additional arguments passed to the method implementation.} + +\item{msLevel}{\code{integer(1)} with the MS level of the chromatographic peaks +on which the metric should be calculated.} + +\item{chunkSize}{\code{integer(1)} defining the number of samples from which data +should be loaded and processed at a time.} + +\item{BPPARAM}{Parallel processing setup. See +\code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for details.} +} +\value{ +A \code{matrix} or \code{data.frame} with the same number of rows as there are +chromatographic peaks. Columns contain the calculated values. The number of +columns, their names and content depend on the used parameter object. See +the respective documentation above for more details. +} +\description{ +The \code{chromPeakSummary()} method calculates summary statistics or other +metrics for each of the identified chromatographic peaks in an \emph{xcms} result +object, such as the \code{\link[=XcmsExperiment]{XcmsExperiment()}}. Different metrics can be calculated, +depending upon (and configured by) using dedicated \emph{parameter} classes. As a +result, the method returns a \code{matrix} or \code{data.frame} with one row per +chromatographic peak. Each column contains calculated values, depending on +the used method/parameter class. + +Currently implemented methods/parameter classes are: +\itemize{ +\item \code{BetaDistributionParam}: calculates the \emph{beta_cor} and \emph{beta_snr} quality +metrics as described in Kumler 2023 representing the result from a +(correlation) test of similarity (using Pearson's correlation coefficient) +to a bell curve and the signal-to-noise ratio calculated on the residuals +of this test. +} +} +\references{ +Kumler W, Hazelton B J and Ingalls A E (2023) "Picky with peakpicking: +assessing chromatographic peak quality with simple metrics in metabolomics" +\emph{BMC Bioinformatics} 24(1):404. doi: 10.1186/s12859-023-05533-4 +} +\author{ +Pablo Vangeenderhuysen, Johannes Rainer, William Kumler +} diff --git a/man/correlate-Chromatogram.Rd b/man/correlate-Chromatogram.Rd index d5eb143a..844f2e8f 100644 --- a/man/correlate-Chromatogram.Rd +++ b/man/correlate-Chromatogram.Rd @@ -36,15 +36,15 @@ ) } \arguments{ -\item{x}{\code{\link[=Chromatogram]{Chromatogram()}} or \code{\link[=MChromatograms]{MChromatograms()}} object.} +\item{x}{\code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}} or \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} object.} -\item{y}{\code{\link[=Chromatogram]{Chromatogram()}} or \code{\link[=MChromatograms]{MChromatograms()}} object.} +\item{y}{\code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}} or \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} object.} \item{use}{\code{character(1)} passed to the \code{cor} function. See \code{\link[=cor]{cor()}} for details.} -\item{method}{\code{character(1)} passed to the \code{cor} function. See \code{\link[=cor]{cor()}} for -details.} +\item{method}{\code{character(1)} passed to the \code{cor} function. See +\code{\link[stats:cor]{stats::cor()}} for details.} \item{align}{\code{character(1)} defining the alignment method to be used. See help on \code{alignRt} in \code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}} for details. The value of @@ -70,9 +70,9 @@ Correlate intensities of two chromatograms with each other. If the two chromatogram. See help on \code{alignRt} in \code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}} for more details. -If \code{correlate} is called on a single \code{\link[=MChromatograms]{MChromatograms()}} object a pairwise -correlation of each chromatogram with each other is performed and a \code{matrix} -with the correlation coefficients is returned. +If \code{correlate} is called on a single \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} object a +pairwise correlation of each chromatogram with each other is performed and +a \code{matrix} with the correlation coefficients is returned. Note that the correlation of two chromatograms depends also on their order, e.g. \code{correlate(chr1, chr2)} might not be identical to diff --git a/man/do_adjustRtime_peakGroups.Rd b/man/do_adjustRtime_peakGroups.Rd index 4d0d898e..7f6c870d 100644 --- a/man/do_adjustRtime_peakGroups.Rd +++ b/man/do_adjustRtime_peakGroups.Rd @@ -54,11 +54,11 @@ Can be either \code{"loess"} or \code{"linear"}.} \item{span}{For \code{PeakGroupsParam}: \code{numeric(1)} defining the degree of smoothing (if \code{smooth = "loess"}). This parameter is -passed to the internal call to \code{\link[=loess]{loess()}}.} +passed to the internal call to \code{\link[stats:loess]{stats::loess()}}.} \item{family}{For \code{PeakGroupsParam}: \code{character(1)} defining the method for loess smoothing. Allowed values are \code{"gaussian"} and \code{"symmetric"}. See -\code{\link[=loess]{loess()}} for more information.} +\code{\link[stats:loess]{stats::loess()}} for more information.} \item{peakGroupsMatrix}{optional \code{matrix} of (raw) retention times for peak groups on which the alignment should be performed. Each column diff --git a/man/featureChromatograms.Rd b/man/featureChromatograms.Rd index 72586588..7f4a7a54 100644 --- a/man/featureChromatograms.Rd +++ b/man/featureChromatograms.Rd @@ -98,8 +98,8 @@ peaks of a feature will be used.} \item{progressbar}{\code{logical(1)} defining whether a progress bar is shown.} \item{BPPARAM}{For \code{object} being an \code{XcmsExperiment}: parallel processing -setup. Defaults to \code{BPPARAM = bpparam()}. See \code{\link[=bpparam]{bpparam()}} for more -information.} +setup. Defaults to \code{BPPARAM = bpparam()}. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} +for more information.} \item{include}{Only for \code{object} being an \code{XCMSnExp}: \code{character(1)} defining which chromatographic peaks (and related feature definitions) diff --git a/man/featureSpectra.Rd b/man/featureSpectra.Rd index c669649a..663ec44b 100644 --- a/man/featureSpectra.Rd +++ b/man/featureSpectra.Rd @@ -72,7 +72,8 @@ object. The columns will be named as they are written in the Defaults to \code{c("mzmed", "rtmed")}.} } \value{ -The function returns either a \code{\link[=Spectra]{Spectra()}} (for \code{return.type = "Spectra"}) +The function returns either a \code{\link[Spectra:Spectra]{Spectra::Spectra()}} (for +\code{return.type = "Spectra"}) or a \code{List} of \code{Spectra} (for \code{return.type = "List"}). For the latter, the order and the length matches parameter \code{features} (or if no \code{features} is defined the order of the features in \code{featureDefinitions(object)}). @@ -104,7 +105,8 @@ spectrum \strong{per chromatographic peak} will be returned (hence multiple spectra per feature). The information from \code{featureDefinitions} for each feature can be included -in the returned \code{\link[=Spectra]{Spectra()}} object using the \code{featureColumns} parameter. +in the returned \code{\link[Spectra:Spectra]{Spectra::Spectra()}} object using the \code{featureColumns} +parameter. This is useful for keeping details such as the median retention time (\code{rtmed}) or median m/z (\code{mzmed}). The columns will retain their names as specified in the \code{featureDefinitions} object, prefixed by \code{"feature_"} @@ -113,9 +115,11 @@ name of the feature in the \code{featureDefinitions} data.frame) is always added as a metadata column named \code{"feature_id"}. See also \code{\link[=chromPeakSpectra]{chromPeakSpectra()}}, as it supports a similar parameter for -including columns from the chromatographic peaks in the returned spectra object. +including columns from the chromatographic peaks in the returned spectra +object. These parameters can be used in combination to include information from both -the chromatographic peaks and the features in the returned \code{\link[=Spectra]{Spectra()}}. +the chromatographic peaks and the features in the returned +\code{\link[Spectra:Spectra]{Spectra::Spectra()}}. The \emph{peak ID} (i.e., the row name of the peak in the \code{chromPeaks} matrix) is added as a metadata column named \code{"chrom_peak_id"}. } diff --git a/man/filter-MChromatograms.Rd b/man/filter-MChromatograms.Rd index 2a8fea2d..75c35eb3 100644 --- a/man/filter-MChromatograms.Rd +++ b/man/filter-MChromatograms.Rd @@ -39,7 +39,7 @@ ) } \arguments{ -\item{object}{\code{\link[=MChromatograms]{MChromatograms()}} or \code{\link[=XChromatograms]{XChromatograms()}} object.} +\item{object}{\code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} or \code{\link[=XChromatograms]{XChromatograms()}} object.} \item{threshold}{for \code{filterColumnsIntensityAbove}: \code{numeric(1)} with the threshold value to compare against.} @@ -82,10 +82,10 @@ same number of rows (EICs) but eventually a lower number of columns (samples). } \description{ -These functions allow to filter (subset) \code{\link[=MChromatograms]{MChromatograms()}} or +These functions allow to filter (subset) \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} or \code{\link[=XChromatograms]{XChromatograms()}} objects, i.e. sets of chromatographic data, without changing the data (intensity and retention times) within the individual -chromatograms (\code{\link[=Chromatogram]{Chromatogram()}} objects). +chromatograms (\code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}} objects). \itemize{ \item \code{filterColumnsIntensityAbove}: subsets a \code{MChromatograms} objects keeping only columns (samples) for which \code{value} is larger than the provided diff --git a/man/findChromPeaks-Chromatogram-CentWaveParam.Rd b/man/findChromPeaks-Chromatogram-CentWaveParam.Rd index 9f342440..5639fcb4 100644 --- a/man/findChromPeaks-Chromatogram-CentWaveParam.Rd +++ b/man/findChromPeaks-Chromatogram-CentWaveParam.Rd @@ -25,12 +25,14 @@ arguments used for peak detection.} \item{BPPARAM}{a parameter class specifying if and how parallel processing should be performed (only for \code{XChromatograms} objects). It defaults to -\code{bpparam()}. See \code{\link[=bpparam]{bpparam()}} for more information.} +\code{bpparam()}. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for more information.} } \value{ If called on a \code{Chromatogram} object, the method returns an \link{XChromatogram} -object with the identified peaks. See \code{\link[=peaksWithCentWave]{peaksWithCentWave()}} for details on -the peak matrix content. +object with the identified peaks. Columns \code{"mz"}, \code{"mzmin"} and \code{"mzmax"} in +the \code{chromPeaks()} peak matrix provide the mean m/z and the maximum and +minimum m/z value of the \code{Chromatogram} object. See \code{\link[=peaksWithCentWave]{peaksWithCentWave()}} +for details on the remaining columns. } \description{ \code{findChromPeaks} on a \link{Chromatogram} or \link{MChromatograms} object with a diff --git a/man/findChromPeaks-Chromatogram-MatchedFilter.Rd b/man/findChromPeaks-Chromatogram-MatchedFilter.Rd index 15444da3..31956c8c 100644 --- a/man/findChromPeaks-Chromatogram-MatchedFilter.Rd +++ b/man/findChromPeaks-Chromatogram-MatchedFilter.Rd @@ -17,8 +17,10 @@ arguments used for peak detection.} } \value{ If called on a \code{Chromatogram} object, the method returns a \code{matrix} with -the identified peaks. See \code{\link[=peaksWithMatchedFilter]{peaksWithMatchedFilter()}} for details on the -matrix content. +the identified peaks. Columns \code{"mz"}, \code{"mzmin"} and \code{"mzmax"} in +the \code{chromPeaks()} peak matrix provide the mean m/z and the maximum and +minimum m/z value of the \code{Chromatogram} object. See +\code{\link[=peaksWithMatchedFilter]{peaksWithMatchedFilter()}} for details on the remaining columns. } \description{ \code{findChromPeaks} on a \link{Chromatogram} or \link{MChromatograms} object with a diff --git a/man/findChromPeaks.Rd b/man/findChromPeaks.Rd index a564d689..8a09be28 100644 --- a/man/findChromPeaks.Rd +++ b/man/findChromPeaks.Rd @@ -29,8 +29,8 @@ findChromPeaks(object, param, ...) } \arguments{ \item{object}{The data object on which to perform the peak detection. Can be -an \code{\link[=OnDiskMSnExp]{OnDiskMSnExp()}}, \code{\link[=XCMSnExp]{XCMSnExp()}}, \code{\link[=MChromatograms]{MChromatograms()}} or \code{\link[=MsExperiment]{MsExperiment()}} -object.} +an \code{\link[MSnbase:OnDiskMSnExp-class]{MSnbase::OnDiskMSnExp()}}, \code{\link[=XCMSnExp]{XCMSnExp()}}, \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} +or \code{\link[MsExperiment:MsExperiment]{MsExperiment::MsExperiment()}} object.} \item{param}{The parameter object selecting and configuring the algorithm.} @@ -49,10 +49,11 @@ the peak detection. Because parallel processing can only performed on the subset of data loaded currently into memory (in each iteration), the value for \code{chunkSize} should be match the defined parallel setting setup. Using a parallel processing setup using 4 CPUs (separate -processes) but using \verb{chunkSize = }1\verb{will not perform any parallel processing, as only the data from one sample is loaded in memory at a time. On the other hand, setting}chunkSize\verb{to the total number of samples in an experiment will load the full MS data into memory and will thus in most settings cause an out-of-memory error. By setting}chunkSize = -1\verb{the peak detection will be performed separately, and in parallel, for each sample. This will however not work for all}Spectra` \emph{backends} (see eventually \code{\link[=Spectra]{Spectra()}} for details).} +processes) but using \verb{chunkSize = }1\verb{will not perform any parallel processing, as only the data from one sample is loaded in memory at a time. On the other hand, setting}chunkSize\verb{to the total number of samples in an experiment will load the full MS data into memory and will thus in most settings cause an out-of-memory error. By setting}chunkSize = -1\verb{the peak detection will be performed separately, and in parallel, for each sample. This will however not work for all}Spectra` \emph{backends} (see eventually \code{\link[Spectra:Spectra]{Spectra::Spectra()}} for +details).} \item{BPPARAM}{Parallel processing setup. Uses by default the system-wide -default setup. See \code{\link[=bpparam]{bpparam()}} for more details.} +default setup. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for more details.} \item{add}{\code{logical(1)} (if \code{object} contains already chromatographic peaks, i.e. is either an \code{XCMSnExp} or \code{XcmsExperiment}) whether chromatographic diff --git a/man/findChromPeaksIsolationWindow.Rd b/man/findChromPeaksIsolationWindow.Rd index ba1dc514..5334640b 100644 --- a/man/findChromPeaksIsolationWindow.Rd +++ b/man/findChromPeaksIsolationWindow.Rd @@ -51,7 +51,8 @@ loaded into memory and processed at a time. See \code{\link[=findChromPeaks]{fin more information.} \item{BPPARAM}{if \code{object} is an \code{MsExperiment} or \code{XcmsExperiment}: -parallel processing setup. See \code{\link[=bpparam]{bpparam()}} for more information.} +parallel processing setup. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for more +information.} } \value{ An \code{XcmsExperiment} or \code{XCMSnExp} object with the chromatographic peaks diff --git a/man/manualChromPeaks.Rd b/man/manualChromPeaks.Rd index c838ffdc..d8fa1476 100644 --- a/man/manualChromPeaks.Rd +++ b/man/manualChromPeaks.Rd @@ -84,7 +84,8 @@ using 4 CPUs (separate processes) but using \verb{chunkSize = }1\verb{will not p the total number of samples in an experiment will load the full MS data into memory and will thus in most settings cause an out-of-memory error.} -\item{BPPARAM}{parallel processing settings (see \code{\link[=bpparam]{bpparam()}} for details).} +\item{BPPARAM}{parallel processing settings (see \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} +for details).} \item{peakIdx}{For \code{manualFeatures}: \code{list} of \code{integer} vectors with the indices of chromatographic peaks in the object's \code{chromPeaks} matrix diff --git a/man/plotChromatogramsOverlay.Rd b/man/plotChromatogramsOverlay.Rd index 1030ad61..d2677170 100644 --- a/man/plotChromatogramsOverlay.Rd +++ b/man/plotChromatogramsOverlay.Rd @@ -39,7 +39,7 @@ ) } \arguments{ -\item{object}{\code{\link[=MChromatograms]{MChromatograms()}} or \code{\link[=XChromatograms]{XChromatograms()}} object.} +\item{object}{\code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} or \code{\link[=XChromatograms]{XChromatograms()}} object.} \item{col}{definition of the color in which the chromatograms should be drawn. Can be of length 1 or equal to \code{nrow(object)} to plot each @@ -103,9 +103,10 @@ each EIC. \code{plotOverlay} draws chromatographic peak data from multiple (different) extracted ion chromatograms (EICs) into the same plot. This allows to directly compare the peak shape of these EICs in the same sample. In -contrast to the \code{plot} function for \code{\link[=MChromatograms]{MChromatograms()}} object, which draws -the data from the same EIC across multiple samples in the same plot, this -function draws the different EICs from the same sample into the same plot. +contrast to the \code{plot} function for \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}} object, +which draws the data from the same EIC across multiple samples in the +same plot, this function draws the different EICs from the same sample +into the same plot. If \code{plotChromatogramsOverlay} is called on a \code{XChromatograms} object any present chromatographic peaks will also be highlighted/drawn depending on the diff --git a/man/reconstructChromPeakSpectra.Rd b/man/reconstructChromPeakSpectra.Rd index 473ad1ea..8e0d7ce4 100644 --- a/man/reconstructChromPeakSpectra.Rd +++ b/man/reconstructChromPeakSpectra.Rd @@ -58,8 +58,8 @@ be used as \code{precursorIntensity} of the resulting spectra.} \code{chromPeaks}) of MS1 peaks for which MS2 spectra should be reconstructed. By default they are reconstructed for all MS1 chromatographic peaks.} -\item{BPPARAM}{parallel processing setup. See \code{\link[=bpparam]{bpparam()}} for more -information.} +\item{BPPARAM}{parallel processing setup. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} +for more information.} \item{return.type}{\code{character(1)} defining the type of the returned object. Only \code{return.type = "Spectra"} is supported, \code{return.type = "MSpectra"} @@ -67,14 +67,14 @@ is deprecated.} } \value{ \itemize{ -\item \code{\link[=Spectra]{Spectra()}} object (defined in the \code{Spectra} package) with the +\item \code{\link[Spectra:Spectra]{Spectra::Spectra()}} object (defined in the \code{Spectra} package) with the reconstructed MS2 spectra for all MS1 peaks in \code{object}. Contains empty spectra (i.e. without m/z and intensity values) for MS1 peaks for which reconstruction was not possible (either no MS2 signal was recorded or the correlation of the MS2 chromatographic peaks with the MS1 chromatographic peak was below threshold \code{minCor}. Spectra variables -\code{"ms2_peak_id"} and \code{"ms2_peak_cor"} (of type \code{\link[=CharacterList]{CharacterList()}} -and \code{\link[=NumericList]{NumericList()}} with length equal to the number of peaks per +\code{"ms2_peak_id"} and \code{"ms2_peak_cor"} (of type \code{\link[IRanges:AtomicList-class]{IRanges::CharacterList()}} +and \code{\link[IRanges:AtomicList-class]{IRanges::NumericList()}} with length equal to the number of peaks per reconstructed MS2 spectrum) providing the IDs and the correlation of the MS2 chromatographic peaks from which the MS2 spectrum was reconstructed. As retention time the median retention times of all MS2 chromatographic @@ -104,9 +104,9 @@ Each MS2 chromatographic peak selected for an MS1 peak will thus represent one \strong{mass peak} in the reconstructed spectrum. } -The resulting \code{\link[=Spectra]{Spectra()}} object provides also the peak IDs of the MS2 -chromatographic peaks for each spectrum as well as their correlation value -with spectra variables \emph{ms2_peak_id} and \emph{ms2_peak_cor}. +The resulting \code{\link[Spectra:Spectra]{Spectra::Spectra()}} object provides also the peak IDs of +the MS2 chromatographic peaks for each spectrum as well as their +correlation value with spectra variables \emph{ms2_peak_id} and \emph{ms2_peak_cor}. } \seealso{ \code{\link[=findChromPeaksIsolationWindow]{findChromPeaksIsolationWindow()}} for the function to perform MS2 diff --git a/man/refineChromPeaks.Rd b/man/refineChromPeaks.Rd index b1472702..dfd38b2d 100644 --- a/man/refineChromPeaks.Rd +++ b/man/refineChromPeaks.Rd @@ -93,7 +93,7 @@ into memory and will thus in most settings cause an out-of-memory error.} \item{BPPARAM}{parameter object to set up parallel processing. Uses the default parallel processing setup returned by \code{bpparam()}. See -\code{\link[=bpparam]{bpparam()}} for details and examples.} +\code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for details and examples.} \item{maxPeakwidth}{For \code{CleanPeaksParam}: \code{numeric(1)} defining the maximal allowed peak width (in retention time).} diff --git a/man/removeIntensity-Chromatogram.Rd b/man/removeIntensity-Chromatogram.Rd index 5343689a..16d23f4d 100644 --- a/man/removeIntensity-Chromatogram.Rd +++ b/man/removeIntensity-Chromatogram.Rd @@ -20,8 +20,8 @@ } \arguments{ \item{object}{an object representing chromatographic data. Can be a -\code{\link[=Chromatogram]{Chromatogram()}}, \code{\link[=MChromatograms]{MChromatograms()}}, \code{\link[=XChromatogram]{XChromatogram()}} or -\code{\link[=XChromatograms]{XChromatograms()}} object.} +\code{\link[MSnbase:Chromatogram-class]{MSnbase::Chromatogram()}}, \code{\link[MSnbase:MChromatograms-class]{MSnbase::MChromatograms()}}, +\code{\link[=XChromatogram]{XChromatogram()}} or \code{\link[=XChromatograms]{XChromatograms()}} object.} \item{which}{\code{character(1)} defining the condition to remove intensities. See description for details and options.} @@ -37,7 +37,7 @@ the input object with matching intensities being replaced by \code{NA}. matching certain conditions (depending on parameter \code{which}). The intensities are actually not \emph{removed} but replaced with \code{NA_real_}. To actually \strong{remove} the intensities (and the associated retention times) -use \code{\link[=clean]{clean()}} afterwards. +use \code{\link[MSnbase:clean-methods]{MSnbase::clean()}} afterwards. Parameter \code{which} allows to specify which intensities should be replaced by \code{NA_real_}. By default (\code{which = "below_threshod"} intensities below diff --git a/man/writeMSData-XCMSnExp-character-method.Rd b/man/writeMSData-XCMSnExp-character-method.Rd index 29edc500..c9a72210 100644 --- a/man/writeMSData-XCMSnExp-character-method.Rd +++ b/man/writeMSData-XCMSnExp-character-method.Rd @@ -29,7 +29,7 @@ original file names etc) should be copied from the original files.} See documentation of the \code{software_processing} parameter of \code{\link[mzR:writeMSData]{mzR::writeMSData()}}.} -\item{...}{Additional parameters to pass down to the \code{\link[=writeMSData]{writeMSData()}} +\item{...}{Additional parameters to pass down to the \code{\link[MSnbase:writeMSData]{MSnbase::writeMSData()}} function in the \code{MSnbase} package, such as \code{outformat} to specify the output format (\code{"mzml"} or \code{"mzxml"}) or \code{copy} to specify whether general information from the original MS data files (such as data @@ -41,7 +41,7 @@ If adjusted retention times are present, these are used as retention time of the exported spectra. } \seealso{ -\code{\link[=writeMSData]{writeMSData()}} function in the \code{MSnbase} package. +\code{\link[MSnbase:writeMSData]{MSnbase::writeMSData()}} function in the \code{MSnbase} package. } \author{ Johannes Rainer diff --git a/tests/testthat/test_MsExperiment-functions.R b/tests/testthat/test_MsExperiment-functions.R index 773ef380..c69f66cf 100644 --- a/tests/testthat/test_MsExperiment-functions.R +++ b/tests/testthat/test_MsExperiment-functions.R @@ -436,3 +436,29 @@ test_that(".update_sample_data_links_spectra works", { expect_true(length(spectra(res[2L])) == 0) expect_true(length(spectra(res[3L])) == 1) }) + +test_that(".mse_combine works", { + a <- as(loadXcmsData("xmse"), "MsExperiment") + b <- as(loadXcmsData("faahko_sub2"), "MsExperiment") + + res <- .mse_combine(list(a, b, a)) + expect_s4_class(res, "MsExperiment") + expect_true(validObject(res)) + expect_equal(length(res), 19) + expect_equal(rtime(res), c(rtime(a), rtime(b), rtime(a))) + + sd_test <- sampleData(res[12:19]) + sd_ref <- sampleData(a) + expect_equal(sampleData(res[12:19])[, colnames(sampleData(a))], + sampleData(a)) + expect_equal(colnames(sampleData(res)), + union(colnames(sampleData(a)), colnames(sampleData(b)))) + + s <- spectra(res[9:11]) + expect_equal(rtime(spectra(b)), rtime(s)) + expect_equal(mz(spectra(b)), mz(s)) + + expect_error(.mse_combine(list(a, 3)), "objects extending") + b@otherData[[1L]] <- 3 + expect_error(.mse_combine(list(a, b)), "not empty") +}) diff --git a/tests/testthat/test_Param_classes.R b/tests/testthat/test_Param_classes.R index 4281d56f..4f1754cb 100644 --- a/tests/testthat/test_Param_classes.R +++ b/tests/testthat/test_Param_classes.R @@ -1002,3 +1002,9 @@ test_that("FilterIntensityParam works", { res@threshold <- c(10, 20) expect_error(validObject(res), "length 1") }) + + +test_that("BetaDistributionParam works", { + res <- BetaDistributionParam() + expect_true(is(res, "BetaDistributionParam")) +}) diff --git a/tests/testthat/test_XcmsExperiment-functions.R b/tests/testthat/test_XcmsExperiment-functions.R index dd2128f2..bbcd9bed 100644 --- a/tests/testthat/test_XcmsExperiment-functions.R +++ b/tests/testthat/test_XcmsExperiment-functions.R @@ -231,3 +231,59 @@ test_that(".xcms_experiment_to_xcms_n_exp works", { expect_equal(mz(res[1:3]), mz(ref[1:3])) }) + +test_that(".xcms_n_exp_to_xcms_experiment works", { + from <- loadXcmsData("xdata") + res <- .xcms_n_exp_to_xcms_experiment(from) + expect_s4_class(res, "XcmsExperiment") + expect_equal(unname(rtime(res)), unname(rtime(from))) + expect_true(hasChromPeaks(res)) + expect_true(hasFeatures(res)) + expect_true(hasAdjustedRtime(res)) + + from@featureData <- from@featureData[-(10:20), ] + expect_error(.xcms_n_exp_to_xcms_experiment(from), "don't match") + + from <- loadXcmsData("faahko_sub") + res <- as(from, "XcmsExperiment") + expect_s4_class(res, "XcmsExperiment") + expect_equal(unname(rtime(res)), unname(rtime(from))) + expect_true(hasChromPeaks(res)) + expect_false(hasFeatures(res)) + expect_false(hasAdjustedRtime(res)) + + from@spectraProcessingQueue <- list("a") + expect_error(.xcms_n_exp_to_xcms_experiment(from), "not empty") + from@spectraProcessingQueue <- list() +}) + +test_that(".xmse_combine works", { + a <- loadXcmsData("xmse") + a_2 <- dropFeatureDefinitions(a) + a_2 <- dropAdjustedRtime(a_2) + b <- loadXcmsData("faahko_sub2") + + res <- .xmse_combine(list(a, b)) + expect_s4_class(res, "XcmsExperiment") + expect_true(hasChromPeaks(res)) + expect_false(hasAdjustedRtime(res)) + expect_false(hasFeatures(res)) + expect_equal(nrow(chromPeaks(res)), + nrow(chromPeaks(a_2)) + nrow(chromPeaks(b))) + + cp_b <- chromPeaks(b) + cp_res <- chromPeaks(res[9:11]) + rownames(cp_b) <- NULL + rownames(cp_res) <- NULL + expect_equal(cp_b, cp_res) + + cpd_res <- chromPeakData(res) + cpd_a <- chromPeakData(a_2) + cpd_b <- chromPeakData(b) + rownames(cpd_res) <- NULL + rownames(cpd_a) <- NULL + rownames(cpd_b) <- NULL + expect_equal(cpd_res, MsCoreUtils::rbindFill(cpd_a, cpd_b)) + + expect_error(.xmse_combine(list(a, 3)), "objects accepted") +}) diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index 10d0699e..d5e508fb 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -825,6 +825,19 @@ test_that(".chrom_peak_intensity_centWave works", { ## pks[11, ]. }) + +## That's from XcmsExperiment-functions.R +test_that(".chrom_peak_beta_metrics works", { + x <- Spectra::peaksData(spectra(xmse[2L])) + rt <- rtime(spectra(xmse[2L])) + pks <- chromPeaks(xmse)[chromPeaks(xmse)[, "sample"] == 2L, ] + + res <- .chrom_peak_beta_metrics(x, rt, pks, sampleIndex = 2L, + cn = colnames(pks)) + expect_equal(nrow(res), nrow(pks)) + +}) + ## That's from XcmsExperiment-functions.R test_that(".chrom_peak_intensity_matchedFilter works", { x <- Spectra::peaksData(spectra(xmse[2L])) @@ -1449,3 +1462,23 @@ test_that("fillChromPeaks,XcmsExperiment works with verboseBetaColumns", { pks_fil <- chromPeaks(res)[chromPeakData(res)$is_filled, ] expect_true(sum(is.na(pks_fil[, "beta_cor"])) < 4) }) + +test_that("chromPeakSummary,XcmsExperiment works", { + p <- CentWaveParam(noise = 10000, snthresh = 40, prefilter = c(3, 10000), + verboseBetaColumns = FALSE) + xmse <- findChromPeaks(mse, param = p) + mat <- chromPeakSummary(xmse,BetaDistributionParam()) + expect_true(all(c("beta_cor", "beta_snr") %in% colnames(mat))) + expect_true(is.numeric(mat)) +}) + +test_that("c,XcmsExperiment works", { + a <- loadXcmsData("faahko_sub2") + res <- c(a) + expect_equal(res, a) + + res <- c(a, a) + expect_s4_class(res, "XcmsExperiment") + expect_true(length(res) == length(a) * 2) + expect_true(nrow(chromPeaks(res)) == nrow(chromPeaks(a)) * 2) +}) diff --git a/tests/testthat/test_do_findChromPeaks-functions.R b/tests/testthat/test_do_findChromPeaks-functions.R index 32592fdc..d855dca3 100644 --- a/tests/testthat/test_do_findChromPeaks-functions.R +++ b/tests/testthat/test_do_findChromPeaks-functions.R @@ -46,29 +46,29 @@ test_that("beta calculation returns expected values", { expect_lt(.get_beta_values(1:10, zero.rm = FALSE)["beta_snr"], 2) expect_lt(.get_beta_values(1:10)["best_cor"], 0.0001) expect_lt(.get_beta_values(1:10)["beta_snr"], 2) - + ideal_beta <- dbeta(seq(0, 1, length.out=10), 5, 5) expect_gte(.get_beta_values(ideal_beta, zero.rm = FALSE)["best_cor"], 1) expect_gte(.get_beta_values(ideal_beta, zero.rm = FALSE)["beta_snr"], 16) expect_gte(.get_beta_values(ideal_beta)["best_cor"], 0.97) expect_gte(.get_beta_values(ideal_beta)["beta_snr"], 1) - + skew_beta <- dbeta(seq(0, 1, length.out=10), 3, 5) expect_gte(.get_beta_values(ideal_beta, zero.rm = FALSE)["best_cor"], 1) expect_gte(.get_beta_values(ideal_beta, zero.rm = FALSE)["beta_snr"], 16) expect_gte(.get_beta_values(ideal_beta)["best_cor"], 0.97) expect_gte(.get_beta_values(ideal_beta)["beta_snr"], 1) - + rightskew_beta <- dbeta(seq(0, 1, length.out=10), 7, 5) expect_gt(.get_beta_values(rightskew_beta, skews = c(3,5,7))["best_cor"], 0.95) - + noise_beta <- dbeta(seq(0, 1, length.out=21), 5, 5)*10+runif(21) expect_gt(.get_beta_values(noise_beta)["best_cor"], 0.9) - + expect_no_error(.get_beta_values(runif(1))) expect_no_error(.get_beta_values(runif(10))) expect_no_error(.get_beta_values(runif(100))) - + expect_length(.get_beta_values(1), 2) expect_true(is.na(.get_beta_values(1)["best_cor"])) expect_true(is.na(.get_beta_values(1)["beta_snr"])) @@ -78,31 +78,31 @@ test_that("New beta columns perform as expected", { # faahko_xod comes from testthat.R # faahko_xod <- findChromPeaks( # faahko_od, param = CentWaveParam(noise = 10000, snthresh = 40, - # prefilter = c(3, 10000))) + # prefilter = c(3, 10000))) # Same params as before but with verboseBetaColumns = TRUE - beta_cwp <- CentWaveParam(noise = 10000, snthresh = 40, - prefilter = c(3, 10000), + beta_cwp <- CentWaveParam(noise = 10000, snthresh = 40, + prefilter = c(3, 10000), verboseBetaColumns = TRUE) faahko_xod_beta <- findChromPeaks(faahko_od, beta_cwp) - + # Unit test - check that the new object contains expected columns - expect_contains(colnames(chromPeaks(faahko_xod_beta)), + expect_contains(colnames(chromPeaks(faahko_xod_beta)), c("beta_cor", "beta_snr")) - + # Unit test - check that everything else in the object is the same orig_chrompeaks <- chromPeaks(faahko_xod) beta_chrompeaks <- chromPeaks(faahko_xod_beta) expect_identical(orig_chrompeaks, beta_chrompeaks[,colnames(orig_chrompeaks)]) - + # Object will contain NAs because there are peaks <5 scans wide expect_true(any(is.na(beta_chrompeaks[,"beta_snr"]))) beta_chrompeaks <- beta_chrompeaks[!is.na(beta_chrompeaks[,"beta_cor"]),] - + # Unit test - check that beta values make sense expect_true(all(beta_chrompeaks[,"beta_cor"]<=1)) expect_true(all(beta_chrompeaks[,"beta_cor"]>=-1)) expect_true(all(beta_chrompeaks[,"beta_snr"]>=0)) - + # Unit test - finds a single good peak (beta_cor>0.8, beta_snr>7) # Skinny peak copied from below peaksWithCentWave tests skinny_peak <- c(9107, 3326, 9523, 3245, 3429, 9394, 1123, 935, 5128, 8576, @@ -113,33 +113,33 @@ test_that("New beta columns perform as expected", { 8283, 3410, 5935, 3332, 7041, 3284, 7478, 76, 3739, 2158, 5507) skinny_peak_rt <- seq_along(skinny_peak)+100 cw_output_beta <- .centWave_orig(int = skinny_peak, scantime = skinny_peak_rt, - mz=sort(rnorm(60)/1000+100), + mz=sort(rnorm(60)/1000+100), valsPerSpect = rep(1, length(skinny_peak)), - peakwidth = c(20, 80), extendLengthMSW = TRUE, + peakwidth = c(20, 80), extendLengthMSW = TRUE, verboseBetaColumns = TRUE, snthresh = 0) expect_equal(nrow(cw_output_beta), 1) # Known values to ensure performance doesn't degrade unexpectedly expect_gt(cw_output_beta[,"beta_cor"], 0.8) expect_gt(cw_output_beta[,"beta_snr"], 7) - + # Unit test - finds a single noise peak (beta_cor < 0.5, beta_snr < 6) # set.seed(123) # noise_peak <- round(runif(100), 3) - noise_peak <- c(0.288, 0.788, 0.409, 0.883, 0.94, 0.046, 0.528, 0.892, 0.551, - 0.457, 0.957, 0.453, 0.678, 0.573, 0.103, 0.9, 0.246, 0.042, - 0.328, 0.955, 0.89, 0.693, 0.641, 0.994, 0.656, 0.709, 0.544, - 0.594, 0.289, 0.147, 0.963, 0.902, 0.691, 0.795, 0.025, 0.478, - 0.758, 0.216, 0.318, 0.232, 0.143, 0.415, 0.414, 0.369, 0.152, - 0.139, 0.233, 0.466, 0.266, 0.858, 0.046, 0.442, 0.799, 0.122, - 0.561, 0.207, 0.128, 0.753, 0.895, 0.374, 0.665, 0.095, 0.384, - 0.274, 0.815, 0.449, 0.81, 0.812, 0.794, 0.44, 0.754, 0.629, - 0.71, 0.001, 0.475, 0.22, 0.38, 0.613, 0.352, 0.111, 0.244, - 0.668, - 0.418, 0.788, 0.103, 0.435, 0.985, 0.893, 0.886, 0.175, 0.131, + noise_peak <- c(0.288, 0.788, 0.409, 0.883, 0.94, 0.046, 0.528, 0.892, 0.551, + 0.457, 0.957, 0.453, 0.678, 0.573, 0.103, 0.9, 0.246, 0.042, + 0.328, 0.955, 0.89, 0.693, 0.641, 0.994, 0.656, 0.709, 0.544, + 0.594, 0.289, 0.147, 0.963, 0.902, 0.691, 0.795, 0.025, 0.478, + 0.758, 0.216, 0.318, 0.232, 0.143, 0.415, 0.414, 0.369, 0.152, + 0.139, 0.233, 0.466, 0.266, 0.858, 0.046, 0.442, 0.799, 0.122, + 0.561, 0.207, 0.128, 0.753, 0.895, 0.374, 0.665, 0.095, 0.384, + 0.274, 0.815, 0.449, 0.81, 0.812, 0.794, 0.44, 0.754, 0.629, + 0.71, 0.001, 0.475, 0.22, 0.38, 0.613, 0.352, 0.111, 0.244, + 0.668, + 0.418, 0.788, 0.103, 0.435, 0.985, 0.893, 0.886, 0.175, 0.131, 0.653, 0.344, 0.657, 0.32, 0.188, 0.782, 0.094, 0.467, 0.512) - cw_output_beta <- .centWave_orig(int = noise_peak*100000, + cw_output_beta <- .centWave_orig(int = noise_peak*100000, scantime = seq_along(noise_peak), - mz=rep(530.1, length(noise_peak)), + mz=rep(530.1, length(noise_peak)), valsPerSpect = rep(1, length(noise_peak)), peakwidth = c(20, 80), extendLengthMSW = TRUE, verboseBetaColumns = TRUE, snthresh = 0) @@ -149,6 +149,21 @@ test_that("New beta columns perform as expected", { expect_lt(cw_output_beta[,"beta_snr"], 6) }) +test_that(".get_beta_values works with chromatographic and MS data", { + vals <- c(2, 3, 4, 6, 8, 10, 7, 6, 4, 3, 2) + res <- .get_beta_values(vals) + ## intensity values with duplicated retention times + vals <- c(2, 3, 4, 6, 2, 8, 10, 7, 6, 4, 3, 2) + rts <- c(1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10) + res_a <- .get_beta_values(vals) + expect_true(res_a[1L] < res[1L]) + ## WARNING: does not work if we have duplicated retentio times! + expect_warning( + res_b <- .get_beta_values(vals, rts) + ) + expect_true(is.na(res_b[1L])) +}) + test_that("do_findChromPeaks_centWaveWithPredIsoROIs works", { skip_on_os(os = "windows", arch = "i386") diff --git a/tests/testthat/test_functions-Chromatogram.R b/tests/testthat/test_functions-Chromatogram.R index 6616d20b..10e78ac4 100644 --- a/tests/testthat/test_functions-Chromatogram.R +++ b/tests/testthat/test_functions-Chromatogram.R @@ -52,13 +52,15 @@ test_that(".chrom_merge_neighboring_peaks works", { expect_equal(res$chromPeakData$index, 3L) ## Check "into" calculation. - pks <- rbind(pks[-c(1, 2), ], - c(18, pks[2, "rtmin"], pks[2, "rtmin"] + 4, NA_real_, - NA_real_, 3, 3), - c(20, pks[2, "rtmax"] - 5, pks[2, "rtmax"], NA_real_, - NA_real_, 9, 8)) + pks2 <- rbind(pks[-c(1, 2), ], + c(NA_real_, NA_real_, NA_real_, 18, pks[2, "rtmin"], + pks[2, "rtmin"] + 4, NA_real_, + NA_real_, 3, 3), + c(NA_real_, NA_real_, NA_real_, 20, pks[2, "rtmax"] - 5, + pks[2, "rtmax"], NA_real_, + NA_real_, 9, 8)) res <- .chrom_merge_neighboring_peaks( - chr, pks, pkd, diffRt = 5, minProp = 0.75) + chr, pks2, pkd, diffRt = 5, minProp = 0.75) expect_equal(unname(res$chromPeaks[1, "into"]), unname(chromPeaks(xchr)[2, "into"])) }) diff --git a/tests/testthat/test_methods-Chromatogram.R b/tests/testthat/test_methods-Chromatogram.R index 2746b6ef..a2884fae 100644 --- a/tests/testthat/test_methods-Chromatogram.R +++ b/tests/testthat/test_methods-Chromatogram.R @@ -55,3 +55,22 @@ test_that("removeIntensity,Chromatogram works", { expect_equal(intensity(res), c(NA_real_, NA_real_, NA_real_, 22, 34, NA_real_, NA_real_)) }) + +test_that(".add_mz works", { + a <- matrix(nrow = 0, ncol = 3) + colnames(a) <- c("rt", "rtmin", "rtmax") + res <- .add_mz(a, c(1, 2)) + expect_equal(ncol(res), 6) + expect_equal(colnames(res), c("mz", "mzmin", "mzmax", + "rt", "rtmin", "rtmax")) + expect_true(nrow(res) == 0) + a <- matrix(nrow = 2, ncol = 3) + colnames(a) <- c("rt", "rtmin", "rtmax") + res <- .add_mz(a, c(1, 2)) + expect_equal(ncol(res), 6) + expect_equal(colnames(res), c("mz", "mzmin", "mzmax", + "rt", "rtmin", "rtmax")) + expect_equal(res[, "mzmin"], c(1, 1)) + expect_equal(res[, "mzmax"], c(2, 2)) + expect_equal(res[, "mz"], c(1.5, 1.5)) +}) diff --git a/vignettes/references.bib b/vignettes/references.bib index 78d8c894..bf6a2aed 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -122,3 +122,58 @@ @article{gattoMSnbaseEfficientElegant2020a language = {eng}, pmid = {32902283} } + +@article{Kumler2023, + title = "Picky with peakpicking: assessing chromatographic peak quality + with simple metrics in metabolomics", + author = "Kumler, William and Hazelton, Bryna J and Ingalls, Anitra E", + abstract = "BACKGROUND: Chromatographic peakpicking continues to represent a + significant bottleneck in automated LC-MS workflows. + Uncontrolled false discovery rates and the lack of + manually-calibrated quality metrics require researchers to + visually evaluate individual peaks, requiring large amounts of + time and breaking replicability. This problem is exacerbated in + noisy environmental datasets and for novel separation methods + such as hydrophilic interaction columns in metabolomics, + creating a demand for a simple, intuitive, and robust metric of + peak quality. RESULTS: Here, we manually labeled four HILIC + oceanographic particulate metabolite datasets to assess the + performance of individual peak quality metrics. We used these + datasets to construct a predictive model calibrated to the + likelihood that visual inspection by an MS expert would include + a given mass feature in the downstream analysis. We implemented + two novel peak quality metrics, a custom signal-to-noise metric + and a test of similarity to a bell curve, both calculated from + the raw data in the extracted ion chromatogram, and found that + these outperformed existing measurements of peak quality. A + simple logistic regression model built on two metrics reduced + the fraction of false positives in the analysis from 70-80\% + down to 1-5\% and showed minimal overfitting when applied to + novel datasets. We then explored the implications of this + quality thresholding on the conclusions obtained by the + downstream analysis and found that while only 10\% of the + variance in the dataset could be explained by depth in the + default output from the peakpicker, approximately 40\% of the + variance was explained when restricted to high-quality peaks + alone. CONCLUSIONS: We conclude that the poor performance of + peakpicking algorithms significantly reduces the power of both + univariate and multivariate statistical analyses to detect + environmental differences. We demonstrate that simple models + built on intuitive metrics and derived from the raw data are + more robust and can outperform more complex models when applied + to new data. Finally, we show that in properly curated datasets, + depth is a major driver of variability in the marine microbial + metabolome and identify several interesting metabolite trends + for future investigation.", + journal = "BMC Bioinformatics", + publisher = "Springer Science and Business Media LLC", + volume = 24, + number = 1, + pages = "404", + month = oct, + year = 2023, + keywords = "Ground Truth Dataset; Marine environment; Mass-spectrometry; + Metabolomics; Peakpicking; XCMS", + copyright = "https://creativecommons.org/licenses/by/4.0", + language = "en" +} diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index 02abbe13..1c4de245 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -434,6 +434,56 @@ numeric). chromPeakData(faahko) ``` +### Chromatographic peak quality + +Based on the publication by Kumler et al. published in 2023 [@Kumler2023], new +quality metrics (*beta_cor* and *beta_snr*) were added to *xcms*. *beta_cor* +indicates how bell-shaped the chromatographic peak is and *beta_snr* is +estimating the signal-to-noise ratio using the residuals from this fit. These +metrics can be calculated during peak picking by setting `verboseBetaColumns = +TRUE` in the `CentWaveParam` object, or they can be calculated afterwards by +using the `chromPeakSummary()` function with the `XcmsExperiment` object and +the `BetaDistributionParam` parameter object as input: + +```{r peak-detection-chromPeakSummary} +beta_metrics <- chromPeakSummary(faahko, BetaDistributionParam()) +head(beta_metrics) + +``` + +The result returned by `chromPeakSummary()` is thus a numeric matrix with the +values for these quality estimates, one row for each chromatographic peak. +Using summary statistics, one can explore the distribution of these metrics in +the data. + +```{r beta-metrics} +summary(beta_metrics) +``` + +Visual inspection gives a better idea of what these metrics represent in terms +of peak quality in the data at hand. This information can be used to e.g. +filter out peaks that don't meet a chosen quality metric threshold. In order to +plot the detected peaks, their EIC can be extracted with the function +`chromPeakChromatograms`. An example of a peak with a high *beta_cor* and for +a peak with a low *beta_cor* score is given below. + +```{r chromPeakChromatograms, message=FALSE} +beta_metrics[c(4, 6), ] +eics <- chromPeakChromatograms( + faahko, peaks = rownames(chromPeaks(faahko))[c(4, 6)]) +``` + +```{r peak-quality-metrics, fig.width = 10, fig.height = 5, fig.cap = "Plots of high and low quality peaks. Left: peak CP0004 with a beta_cor = 0.98, right: peak CP0006 with a beta_cor = 0.13."} +peak_1 <- eics[1] +peak_2 <- eics[2] +par(mfrow = c(1, 2)) +plot(peak_1) +plot(peak_2) +``` + + +### Refining peak detection + Peak detection will not always work perfectly for all types of peak shapes present in the data set leading to peak detection artifacts, such as (partially or completely) overlapping peaks or artificially split peaks (common issues @@ -462,7 +512,7 @@ faahko_pp <- refineChromPeaks(faahko, mpp) An example for a merged peak is given below. -```{r peak-postprocessing-merged, fig.widht = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The splitted peak was merged into one."} +```{r peak-postprocessing-merged, fig.width = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The splitted peak was merged into one."} mzr_1 <- 305.1 + c(-0.01, 0.01) chr_1 <- chromatogram(faahko[1], mz = mzr_1) chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) @@ -477,7 +527,7 @@ plot(chr_2) peaks into a single one (right panel in the figure above). Other close peaks, with a lower intensity between them, were however not merged (see below). -```{r peak-postprocessing-not-merged, fig.widht = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The peaks were not merged."} +```{r peak-postprocessing-not-merged, fig.width = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The peaks were not merged."} mzr_1 <- 496.2 + c(-0.01, 0.01) chr_1 <- chromatogram(faahko[1], mz = mzr_1) chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1)