Skip to content

Commit

Permalink
Merge pull request #685 from wkumler/devel
Browse files Browse the repository at this point in the history
Adding new peak quality metrics to findChromPeaks for CentWave
  • Loading branch information
sneumann authored Nov 28, 2023
2 parents f1cbe52 + 6eb7e5b commit fe214c9
Show file tree
Hide file tree
Showing 12 changed files with 338 additions and 51 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ importFrom("stats", "aov", "approx", "convolve", "cor", "deriv3",
"dist", "fft", "fitted", "lm", "loess", "lsfit", "median",
"na.omit", "nextn", "nls", "predict", "pt", "quantile",
"runmed", "sd", "stepfun", "weighted.mean", "density", "approxfun",
"rnorm", "runif")
"rnorm", "runif", "dbeta")
importFrom("utils", "flush.console", "head", "object.size",
"packageVersion", "read.csv", "tail", "write.csv",
"write.table")
Expand Down
18 changes: 14 additions & 4 deletions R/DataClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,14 @@ setClass("XProcessHistory",
#' method to extend the EIC to a integer base-2 length prior to being passed to
#' \code{convolve} rather than the default "reflect" method. See
#' https://github.com/sneumann/xcms/issues/445 for more information.
#'
#' @param verboseBetaColumns Option to calculate two additional metrics of peak
#' quality via comparison to an idealized bell curve. Adds \code{beta_cor} and
#' \code{beta_snr} to the \code{chromPeaks} output, corresponding to a Pearson
#' correlation coefficient to a bell curve with several degrees of skew as well
#' as an estimate of signal-to-noise using the residuals from the best-fitting
#' bell curve. See https://github.com/sneumann/xcms/pull/685 and
#' https://doi.org/10.1186/s12859-023-05533-4 for more information.
#'
#' @details
#'
Expand Down Expand Up @@ -491,7 +499,7 @@ NULL
#' for a chromatographic peak detection using the centWave method. Instances
#' should be created with the \code{CentWaveParam} constructor.
#'
#' @slot ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,extendLengthMSW See corresponding parameter above. Slots values should exclusively be accessed
#' @slot ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,extendLengthMSW,verboseBetaColumns See corresponding parameter above. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname findChromPeaks-centWave
Expand Down Expand Up @@ -533,7 +541,8 @@ setClass("CentWaveParam",
roiList = "list",
firstBaselineCheck = "logical",
roiScales = "numeric",
extendLengthMSW = "logical"
extendLengthMSW = "logical",
verboseBetaColumns = "logical"
),
contains = c("Param"),
prototype = prototype(
Expand All @@ -550,7 +559,8 @@ setClass("CentWaveParam",
roiList = list(),
firstBaselineCheck = TRUE,
roiScales = numeric(),
extendLengthMSW = FALSE
extendLengthMSW = FALSE,
verboseBetaColumns = FALSE
),
validity = function(object) {
msg <- character()
Expand Down Expand Up @@ -1242,7 +1252,7 @@ NULL
#' \code{\link{CentWaveParam}} for all methods and arguments this class
#' inherits.
#'
#' @slot ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity
#' @slot ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,extendLengthMSW,verboseBetaColumns,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity
#' See corresponding parameter above.
#'
#' @rdname findChromPeaks-centWaveWithPredIsoROIs
Expand Down
180 changes: 141 additions & 39 deletions R/do_findChromPeaks-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@
#' \item{scmin}{Left peak limit found by wavelet analysis (scan number).}
#' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).}
#' }
#' Additional columns for \code{verboseBetaColumns = TRUE}:
#' \describe{
#'
#' \item{beta_cor}{Correlation between an "ideal" bell curve and the raw data}
#' \item{beta_snr}{Signal-to-noise residuals calculated from the beta_cor fit}
#' }
#'
#' @author Ralf Tautenhahn, Johannes Rainer
#'
Expand Down Expand Up @@ -145,7 +151,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,
firstBaselineCheck = TRUE,
roiScales = NULL,
sleep = 0,
extendLengthMSW = FALSE) {
extendLengthMSW = FALSE,
verboseBetaColumns = FALSE) {
if (getOption("originalCentWave", default = TRUE)) {
## message("DEBUG: using original centWave.")
.centWave_orig(mz = mz, int = int, scantime = scantime,
Expand All @@ -156,7 +163,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,
verboseColumns = verboseColumns, roiList = roiList,
firstBaselineCheck = firstBaselineCheck,
roiScales = roiScales, sleep = sleep,
extendLengthMSW = extendLengthMSW)
extendLengthMSW = extendLengthMSW,
verboseBetaColumns = verboseBetaColumns)
} else {
## message("DEBUG: using modified centWave.")
.centWave_new(mz = mz, int = int, scantime = scantime,
Expand All @@ -178,7 +186,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,
noise = 0, ## noise.local=TRUE,
sleep = 0, verboseColumns = FALSE, roiList = list(),
firstBaselineCheck = TRUE, roiScales = NULL,
extendLengthMSW = FALSE) {
extendLengthMSW = FALSE, verboseBetaColumns = FALSE) {
## Input argument checking.
if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect))
stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'",
Expand Down Expand Up @@ -218,6 +226,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,
"into", "intb", "maxo", "sn")
verbosenames <- c("egauss", "mu", "sigma", "h", "f", "dppm", "scale",
"scpos", "scmin", "scmax", "lmin", "lmax")
betanames <- c("beta_cor", "beta_snr")

## Peak width: seconds to scales
scalerange <- round((peakwidth / mean(diff(scantime))) / 2)
Expand All @@ -226,15 +235,19 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,
scalerange <- scalerange[-z]
if (length(scalerange) < 1) {
warning("No scales? Please check peak width!")
if (verboseColumns) {
nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
length(verbosenames))
colnames(nopeaks) <- c(basenames, verbosenames)
} else {
nopeaks <- matrix(nrow = 0, ncol = length(basenames))
colnames(nopeaks) <- c(basenames)
}
return(invisible(nopeaks))
matrix_length <- length(basenames)
matrix_names <- basenames
if (verboseColumns) {
matrix_length <- matrix_length + length(verbosenames)
matrix_names <- c(matrix_names, verbosenames)
}
if (verboseBetaColumns) {
matrix_length <- matrix_length + length(betanames)
matrix_names <- c(matrix_names, betanames)
}
nopeaks <- matrix(nrow = 0, ncol = matrix_length)
colnames(nopeaks) <- matrix_names
return(invisible(nopeaks))
}

if (length(scalerange) > 1)
Expand Down Expand Up @@ -319,15 +332,19 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,
## ROI.list <- findmzROI(object,scanrange=scanrange,dev=ppm * 1e-6,minCentroids=minCentroids, prefilter=prefilter, noise=noise)
if (length(roiList) == 0) {
warning("No ROIs found! \n")
if (verboseColumns) {
nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
length(verbosenames))
colnames(nopeaks) <- c(basenames, verbosenames)
} else {
nopeaks <- matrix(nrow = 0, ncol = length(basenames))
colnames(nopeaks) <- c(basenames)
}
return(invisible(nopeaks))
matrix_length <- length(basenames)
matrix_names <- basenames
if (verboseColumns) {
matrix_length <- matrix_length + length(verbosenames)
matrix_names <- c(matrix_names, verbosenames)
}
if (verboseBetaColumns) {
matrix_length <- matrix_length + length(betanames)
matrix_names <- c(matrix_names, betanames)
}
nopeaks <- matrix(nrow = 0, ncol = matrix_length)
colnames(nopeaks) <- matrix_names
return(invisible(nopeaks))
}
}

Expand Down Expand Up @@ -525,7 +542,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,
td[best.scale.pos],
td[lwpos],
td[rwpos], ## Peak positions guessed from the wavelet's (scan nr)
NA, NA)) ## Peak limits (scan nr)
NA, NA, ## Peak limits (scan nr)
NA, NA)) ## Beta fitting values
peakinfo <- rbind(peakinfo,
c(best.scale, best.scale.nr,
best.scale.pos, lwpos, rwpos))
Expand All @@ -538,7 +556,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,

## postprocessing
if (!is.null(peaks)) {
colnames(peaks) <- c(basenames, verbosenames)
colnames(peaks) <- c(basenames, verbosenames, betanames)
colnames(peakinfo) <- c("scale", "scaleNr", "scpos",
"scmin", "scmax")
for (p in 1:dim(peaks)[1]) {
Expand All @@ -561,6 +579,14 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,
lm <- .narrow_rt_boundaries(lm, d)
lm_seq <- lm[1]:lm[2]
pd <- d[lm_seq]

# Implement a fit of a skewed gaussian (beta distribution)
# for peak shape and within-peak signal-to-noise ratio
# See https://doi.org/10.1186/s12859-023-05533-4 and
# https://github.com/sneumann/xcms/pull/685
if(verboseBetaColumns){
peaks[p, c("beta_cor", "beta_snr")] <- .get_beta_values(pd)
}

peakrange <- td[lm]
peaks[p, "rtmin"] <- scantime[peakrange[1]]
Expand Down Expand Up @@ -675,22 +701,30 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect,

if (length(peaklist) == 0) {
warning("No peaks found!")

if (verboseColumns) {
nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
length(verbosenames))
colnames(nopeaks) <- c(basenames, verbosenames)
} else {
nopeaks <- matrix(nrow = 0, ncol = length(basenames))
colnames(nopeaks) <- c(basenames)
}
message(" FAIL: none found!")
return(nopeaks)
matrix_length <- length(basenames)
matrix_names <- basenames
if (verboseColumns) {
matrix_length <- matrix_length + length(verbosenames)
matrix_names <- c(matrix_names, verbosenames)
}
if (verboseBetaColumns) {
matrix_length <- matrix_length + length(betanames)
matrix_names <- c(matrix_names, betanames)
}
nopeaks <- matrix(nrow = 0, ncol = matrix_length)
colnames(nopeaks) <- matrix_names
message(" FAIL: none found!")
return(nopeaks)
}
p <- do.call(rbind, peaklist)
if (!verboseColumns)
p <- p[, basenames, drop = FALSE]

keepcols <- basenames
if (verboseColumns){
keepcols <- c(keepcols, verbosenames)
}
if(verboseBetaColumns){
keepcols <- c(keepcols, betanames)
}
p <- p[, keepcols, drop = FALSE]
uorder <- order(p[, "into"], decreasing = TRUE)
pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE])
uindex <- rectUnique(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks
Expand Down Expand Up @@ -2623,6 +2657,11 @@ do_findKalmanROI <- function(mz, int, scantime, valsPerSpect,
#' \item{scmin}{Left peak limit found by wavelet analysis (scan number).}
#' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).}
#' }
#' Additional columns for \code{verboseBetaColumns = TRUE}:
#' \describe{
#' \item{beta_cor}{Correlation between an "ideal" bell curve and the raw data}
#' \item{beta_snr}{Signal-to-noise residuals calculated from the beta_cor fit}
#' }
#'
#' @rdname do_findChromPeaks_centWaveWithPredIsoROIs
#'
Expand All @@ -2634,7 +2673,8 @@ do_findChromPeaks_centWaveWithPredIsoROIs <-
verboseColumns = FALSE, roiList = list(),
firstBaselineCheck = TRUE, roiScales = NULL, snthreshIsoROIs = 6.25,
maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE,
polarity = "unknown", extendLengthMSW = FALSE) {
polarity = "unknown", extendLengthMSW = FALSE,
verboseBetaColumns = FALSE) {
## Input argument checking: most of it will be done in
## do_findChromPeaks_centWave
polarity <- match.arg(polarity, c("positive", "negative", "unknown"))
Expand All @@ -2655,7 +2695,8 @@ do_findChromPeaks_centWaveWithPredIsoROIs <-
roiList = roiList,
firstBaselineCheck = firstBaselineCheck,
roiScales = roiScales,
extendLengthMSW = extendLengthMSW)
extendLengthMSW = extendLengthMSW,
verboseBetaColumns = verboseBetaColumns)
return(do_findChromPeaks_addPredIsoROIs(mz = mz, int = int,
scantime = scantime,
valsPerSpect = valsPerSpect,
Expand Down Expand Up @@ -3668,3 +3709,64 @@ peaksWithCentWave <- function(int, rt,
}
lm
}


#' @description
#'
#' Calculate beta parameters for a chromatographic peak, both its similarity
#' to a bell curve of varying degrees of skew and the standard deviation of the
#' residuals after the best-fit bell is normalized and subtracted. This function
#' requires at least 5 scans or it will return NA for both parameters.
#'
#' @param intensity A numeric vector corresponding to the peak intensities
#' @param rtime A numeric vector corresponding to the retention times of each
#' intensity. If not provided, intensities will be assumed to be equally spaced.
#' @param skews A numeric vector of the skews to try, corresponding to the
#' shape1 of dbeta with a shape2 of 5. Values less than 5 will be increasingly
#' right-skewed, while values greater than 5 will be left-skewed.
#' @param zero.rm Boolean value controlling whether "missing" scans are dropped
#' prior to curve fitting. The default, TRUE, will remove intensities of zero
#' or NA
#'
#' @author William Kumler
#'
#' @noRd
.get_beta_values <- function(intensity, rtime = seq_along(intensity),
skews=c(3, 3.5, 4, 4.5, 5), zero.rm = TRUE){
if (zero.rm) {
## remove 0 or NA intensities
keep <- which(intensity > 0)
rtime <- rtime[keep]
intensity <- intensity[keep]
}
if(length(intensity)<5){
best_cor <- NA
beta_snr <- NA
} else {
beta_sequence <- rep(.scale_zero_one(rtime), each=length(skews))
beta_vals <- t(matrix(dbeta(beta_sequence, shape1 = skews, shape2 = 5),
nrow = length(skews)))
# matplot(beta_vals)
beta_cors <- cor(intensity, beta_vals)
best_cor <- max(beta_cors)
best_curve <- beta_vals[, which.max(beta_cors)]
noise_level <- sd(diff(.scale_zero_one(best_curve)-.scale_zero_one(intensity)))
beta_snr <- log10(max(intensity)/noise_level)
}
c(best_cor=best_cor, beta_snr=beta_snr)
}


#' @description
#'
#' Simple helper function to scale a numeric vector between zero and one by
#' subtracting the lowest value and dividing by the maximum.
#'
#' @param num_vec `numeric` vector, typically chromatographic intensities.
#'
#' @author William Kumler
#'
#' @noRd
.scale_zero_one <- function(num_vec){
(num_vec-min(num_vec))/(max(num_vec)-min(num_vec))
}
8 changes: 6 additions & 2 deletions R/functions-Params.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,15 @@ CentWaveParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10,
integrate = 1L, mzdiff = -0.001, fitgauss = FALSE,
noise = 0, verboseColumns = FALSE, roiList = list(),
firstBaselineCheck = TRUE, roiScales = numeric(),
extendLengthMSW = FALSE) {
extendLengthMSW = FALSE, verboseBetaColumns = FALSE) {
return(new("CentWaveParam", ppm = ppm, peakwidth = peakwidth,
snthresh = snthresh, prefilter = prefilter,
mzCenterFun = mzCenterFun, integrate = as.integer(integrate),
mzdiff = mzdiff, fitgauss = fitgauss, noise = noise,
verboseColumns = verboseColumns, roiList = roiList,
firstBaselineCheck = firstBaselineCheck, roiScales = roiScales,
extendLengthMSW = extendLengthMSW))
extendLengthMSW = extendLengthMSW,
verboseBetaColumns=verboseBetaColumns))
}

#' @return The \code{MatchedFilterParam} function returns a
Expand Down Expand Up @@ -213,6 +214,7 @@ CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10,
integrate = 1L, mzdiff = -0.001, fitgauss = FALSE,
noise = 0, verboseColumns = FALSE, roiList = list(),
firstBaselineCheck = TRUE, roiScales = numeric(),
extendLengthMSW = FALSE, verboseBetaColumns = FALSE,
snthreshIsoROIs = 6.25, maxCharge = 3, maxIso = 5,
mzIntervalExtension = TRUE, polarity = "unknown") {
return(new("CentWavePredIsoParam", ppm = ppm, peakwidth = peakwidth,
Expand All @@ -221,6 +223,8 @@ CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10,
mzdiff = mzdiff, fitgauss = fitgauss, noise = noise,
verboseColumns = verboseColumns, roiList = roiList,
firstBaselineCheck = firstBaselineCheck, roiScales = roiScales,
extendLengthMSW = extendLengthMSW,
verboseBetaColumns = verboseBetaColumns,
snthreshIsoROIs = snthreshIsoROIs, maxIso = as.integer(maxIso),
maxCharge = as.integer(maxCharge),
mzIntervalExtension = mzIntervalExtension, polarity = polarity))
Expand Down
4 changes: 4 additions & 0 deletions R/functions-XCMSnExp.R
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,10 @@ dropGenericProcessHistory <- function(x, fun) {
meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
res[i, "mz"] <- meanMz

if("beta_cor"%in%cn){
res[i, c("beta_cor", "beta_snr")] <- .get_beta_values(mtx[,3])
}
} else {
res[i, ] <- rep(NA_real_, ncols)
}
Expand Down
Binary file modified data/faahko_sub.RData
Binary file not shown.
Loading

0 comments on commit fe214c9

Please sign in to comment.