From 156788a1f6b775ee072a04202d67346a476ba8db Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Tue, 24 Sep 2024 16:01:08 +0200 Subject: [PATCH] refactor: little fixes --- NEWS.md | 2 ++ R/AllGenerics.R | 20 +++++++++++ R/XcmsExperiment.R | 11 +++--- man/chromPeakSummary.Rd | 50 +++++++++++++++++++++++----- tests/testthat/test_Param_classes.R | 5 +-- tests/testthat/test_XcmsExperiment.R | 8 ++--- vignettes/xcms.Rmd | 42 ++++++++++++----------- 7 files changed, 96 insertions(+), 42 deletions(-) diff --git a/NEWS.md b/NEWS.md index 48cff211..f3f1dec3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,8 @@ an idealized bell shape (*beta*) during gap filling for centWave-based chromatographic peak detection with parameter `verboseBetaColumns = TRUE`. - Add `chromPeakSummary` generic (issue #705). +- Add `chromPeakSummary()` method to calculate the *beta* quality metrics. + ## Changes in version 4.3.3 diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 00bfeebf..c1e3ec2b 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -551,6 +551,18 @@ setGeneric("chromPeakSpectra", function(object, ...) #' #' Currently implemented methods/parameter classes are: #' +#' - `BetaDistributionParam`: calculates the *beta_cor* and *beta_snr* quality +#' metrics as described in (Kumler 2023) representing the result from a +#' (correlation) test of similarity to a bell curve and the signal-to-noise +#' ratio calculated on the residulas of this test. +#' +#' @param BPPARAM Parallel processing setup. See [bpparam()] for details. +#' +#' @param chunkSize `integer(1)` defining the number of samples from which data +#' should be loaded and processed at a time. +#' +#' @param msLevel `integer(1)` with the MS level of the chromatographic peaks +#' on which the metric should be calculated. #' #' @param object an *xcms* result object containing information on #' identified chromatographic peaks. @@ -569,7 +581,15 @@ setGeneric("chromPeakSpectra", function(object, ...) #' #' @author Pablo Vangeenderhuysen, Johannes Rainer #' +#' @md +#' +#' @references +#' +#' Kumler W, Hazelton B J and Ingalls A E (2023) "Picky with peakpicking: +#' assessing chromatographic peak quality with simple metrics in metabolomics" +#' *BMC Bioinformatics* 24(1):404. doi: 10.1186/s12859-023-05533-4 #' +#' @export setGeneric("chromPeakSummary", function(object, param, ...) standardGeneric("chromPeakSummary")) diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 59f24388..5b76de69 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -2024,11 +2024,11 @@ setMethod( if (!hasChromPeaks(object, msLevel = msLevel)) stop("No ChromPeaks definitions for MS level ", msLevel, " present.") ## Define region to calculate metrics from for each file - cp <- chromPeaks(object) - f <- factor(cp[,"sample"],seq_along(object)) - pal <- split.data.frame(cp[,c("mzmin","mzmax","rtmin","rtmax")],f) + cp <- chromPeaks(object, msLevel = msLevel) + f <- factor(cp[,"sample"], seq_along(object)) + pal <- split.data.frame(cp[, c("mzmin", "mzmax", "rtmin", "rtmax")], f) names(pal) <- seq_along(pal) - ## Manual chunk processing because we have to split `object` and `pal` + ## Manual chunk processi ng because we have to split `object` and `pal` idx <- seq_along(object) chunks <- split(idx, ceiling(idx / chunkSize)) pb <- progress_bar$new(format = paste0("[:bar] :current/:", @@ -2042,7 +2042,8 @@ setMethod( .xmse_integrate_chrom_peaks( .subset_xcms_experiment(object, i = z, keepAdjustedRtime = TRUE, ignoreHistory = TRUE), - pal = pal[z], intFun = .chrom_peak_beta_metrics, BPPARAM = BPPARAM) + pal = pal[z], intFun = .chrom_peak_beta_metrics, + msLevel = msLevel, BPPARAM = BPPARAM) }) res <- do.call(rbind, res) pb$tick() diff --git a/man/chromPeakSummary.Rd b/man/chromPeakSummary.Rd index 016fd858..a7f6edd3 100644 --- a/man/chromPeakSummary.Rd +++ b/man/chromPeakSummary.Rd @@ -1,37 +1,69 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R +% Please edit documentation in R/AllGenerics.R, R/XcmsExperiment.R, +% R/functions-Params.R \name{chromPeakSummary} \alias{chromPeakSummary} +\alias{chromPeakSummary,XcmsExperiment,BetaDistributionParam-method} +\alias{BetaDistributionParam} \title{Chromatographic peak summaries} \usage{ chromPeakSummary(object, param, ...) + +\S4method{chromPeakSummary}{XcmsExperiment,BetaDistributionParam}( + object, + param, + msLevel = 1L, + chunkSize = 2L, + BPPARAM = bpparam() +) + +BetaDistributionParam() } \arguments{ -\item{object}{an *xcms* result object containing information on +\item{object}{an \emph{xcms} result object containing information on identified chromatographic peaks.} \item{param}{a parameter object defining the method/summaries that should be calculated (see description above for supported parameter classes).} \item{...}{additional arguments passed to the method implementation.} + +\item{msLevel}{\code{integer(1)} with the MS level of the chromatographic peaks +on which the metric should be calculated.} + +\item{chunkSize}{\code{integer(1)} defining the number of samples from which data +should be loaded and processed at a time.} + +\item{BPPARAM}{Parallel processing setup. See \code{\link[=bpparam]{bpparam()}} for details.} } \value{ -A `matrix` or `data.frame` with the same number of rows as there are +A \code{matrix} or \code{data.frame} with the same number of rows as there are chromatographic peaks. Columns contain the calculated values. The number of columns, their names and content depend on the used parameter object. See the respective documentation above for more details. } \description{ -The `chromPeakSummary()` method calculates summary statistic or other -metrics for each of the identified chromatographic peaks in an *xcms* -result object, such as the [XcmsExperiment()]. Different metrics can be -calculated, depending (and configured) using dedicated *parameter* classes. -As a result, the method returns a `matrix` or `data.frame` with one row +The \code{chromPeakSummary()} method calculates summary statistic or other +metrics for each of the identified chromatographic peaks in an \emph{xcms} +result object, such as the \code{\link[=XcmsExperiment]{XcmsExperiment()}}. Different metrics can be +calculated, depending (and configured) using dedicated \emph{parameter} classes. +As a result, the method returns a \code{matrix} or \code{data.frame} with one row per chromatographic peak. Each column contains calculated values, depending on the used method/parameter class. Currently implemented methods/parameter classes are: +\itemize{ +\item \code{BetaDistributionParam}: calculates the \emph{beta_cor} and \emph{beta_snr} quality +metrics as described in (Kumler 2023) representing the result from a +(correlation) test of similarity to a bell curve and the signal-to-noise +ratio calculated on the residulas of this test. +} +} +\references{ +Kumler W, Hazelton B J and Ingalls A E (2023) "Picky with peakpicking: +assessing chromatographic peak quality with simple metrics in metabolomics" +\emph{BMC Bioinformatics} 24(1):404. doi: 10.1186/s12859-023-05533-4 } \author{ -Pable Vangeenderhuysen, Johannes Rainer +Pablo Vangeenderhuysen, Johannes Rainer } diff --git a/tests/testthat/test_Param_classes.R b/tests/testthat/test_Param_classes.R index 2b43e7af..4f1754cb 100644 --- a/tests/testthat/test_Param_classes.R +++ b/tests/testthat/test_Param_classes.R @@ -1005,9 +1005,6 @@ test_that("FilterIntensityParam works", { test_that("BetaDistributionParam works", { - skip_on_os(os = "windows", arch = "i386") - res <- BetaDistributionParam() - expect_true(is(res, "BetaDistributionParam ")) - + expect_true(is(res, "BetaDistributionParam")) }) diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index 7e543866..e6ddb8a6 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -831,11 +831,11 @@ test_that(".chrom_peak_beta_metrics works", { x <- Spectra::peaksData(spectra(xmse[2L])) rt <- rtime(spectra(xmse[2L])) pks <- chromPeaks(xmse)[chromPeaks(xmse)[, "sample"] == 2L, ] - + res <- .chrom_peak_beta_metrics(x, rt, pks, sampleIndex = 2L, cn = colnames(pks)) expect_equal(nrow(res), nrow(pks)) - + }) ## That's from XcmsExperiment-functions.R @@ -1458,8 +1458,6 @@ test_that("chromPeakSummary,XcmsExperiment works", { verboseBetaColumns = FALSE) xmse <- findChromPeaks(mse, param = p) mat <- chromPeakSummary(xmse,BetaDistributionParam()) - expect_true(all(c("beta_cor", "beta_snr") %in% colnames(res))). + expect_true(all(c("beta_cor", "beta_snr") %in% colnames(mat))) expect_true(is.numeric(mat)) - }) - diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index a38458b7..41a031c8 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -433,42 +433,46 @@ chromPeakData(faahko) ### Chromatographic peak quality -Based on the publication by Kumler et al. published in 2023 [@Kumler2023], new -quality metrics (`beta_cor` and `beta_snr`) were added to *xcms*. `beta_cor` -indicates how bell-shaped the peak is and `beta_snr` is estimating the -signal-to-noise ratio using the residuals. These metrics can be calculated -during peak picking by setting `verboseBetaColumns = TRUE` in the -`CentWaveParam` object, or they can becalculated afterwards by using the -`chromPeakSummary` function with the `XcmsExperiment` object and -`BetaDistributionParam` parameter object as input. +Based on the publication by Kumler et al. published in 2023 [@Kumler2023], new +quality metrics (*beta_cor* and *beta_snr*) were added to *xcms*. *beta_cor* +indicates how bell-shaped the chromatographic peak is and *beta_snr* is +estimating the signal-to-noise ratio using the residuals from this fit. These +metrics can be calculated during peak picking by setting `verboseBetaColumns = +TRUE` in the `CentWaveParam` object, or they can be calculated afterwards by +using the `chromPeakSummary()` function with the `XcmsExperiment` object and +the `BetaDistributionParam` parameter object as input: ```{r peak-detection-chromPeakSummary} -beta_metrics <- chromPeakSummary(faahko,BetaDistributionParam()) +beta_metrics <- chromPeakSummary(faahko, BetaDistributionParam()) head(beta_metrics) ``` -Using summary statistics, one can explore the distribution of these metrics in +The result returned by `chromPeakSummary()` is thus a numeric matrix with the +values for these quality estimates, one row for each chromatographic peak. +Using summary statistics, one can explore the distribution of these metrics in the data. ```{r beta-metrics} summary(beta_metrics) ``` -Visual inspection gives a better idea of what these metrics represent in terms -of peak quality in the data at hand. This information can be used to e.g. -filter out peaks that don't meet a chosen quality metric threshold. In order to -plot the detected peaks, their EIC can be extracted with the function -`chromPeakChromatograms`. An example of a peak with a high `beta_cor` a peak -with a low `beta_cor` score is given below. +Visual inspection gives a better idea of what these metrics represent in terms +of peak quality in the data at hand. This information can be used to e.g. +filter out peaks that don't meet a chosen quality metric threshold. In order to +plot the detected peaks, their EIC can be extracted with the function +`chromPeakChromatograms`. An example of a peak with a high *beta_cor* and for +a peak with a low *beta_cor* score is given below. ```{r chromPeakChromatograms, message=FALSE} -eics <- chromPeakChromatograms(faahko) +beta_metrics[c(4, 6), ] +eics <- chromPeakChromatograms( + faahko, peaks = rownames(chromPeaks(faahko))[c(4, 6)]) ``` ```{r peak-quality-metrics, fig.widht = 10, fig.height = 5, fig.cap = "Plots of high and low quality peaks. Left: peak CP0004 with a beta_cor = 0.98, right: peak CP0006 with a beta_cor = 0.13."} -peak_1 <- eics[4] -peak_2 <- eics[6] +peak_1 <- eics[1] +peak_2 <- eics[2] par(mfrow = c(1, 2)) plot(peak_1) plot(peak_2)