Small fixes and improvements

sneumann · Nov 8, 2024 · 9e1a1a6 · 9e1a1a6
1 parent 6834d1c
commit 9e1a1a6
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 41 deletions.
diff --git a/R/XcmsExperimentHdf5-functions.R b/R/XcmsExperimentHdf5-functions.R
@@ -175,15 +175,15 @@ NULL
                           is_filled = rep(FALSE, nr))
         if (add) {
             ## Need to load previous results and append to that.
-            pks <- .h5_read_data(h5_file, index = sid, name = "chrom_peaks",
+            pks <- .h5_read_data(h5_file, id = sid, name = "chrom_peaks",
                                  ms_level = msLevel, read_colnames = TRUE,
                                  read_rownames = TRUE)[[1L]]
             rnames <- rownames(pks)
             max_index <- max(
                 as.integer(sub(paste0("CP", msLevel, sid), "", rnames)))
             res[[i]] <- rbindFill(pks, res[[i]])
             pkd <- rbindFill(.h5_read_data(
-                h5_file, index = sid, name = "chrom_peak_data",
+                h5_file, id = sid, name = "chrom_peak_data",
                 ms_level = msLevel, read_rownames = FALSE)[[1L]], pkd)
         }
         pkdl[[i]] <- pkd
@@ -217,7 +217,7 @@ NULL
     else rt <- rtime(spectra(x))[keep]
     ## Get the list of chromPeak data for x.
     pksl <- .h5_read_data(
-        x@hdf5_file, index = x@sample_id, name = "chrom_peaks",
+        x@hdf5_file, id = x@sample_id, name = "chrom_peaks",
         ms_level = rep(msLevel, length(x@sample_id)),
         read_colnames = TRUE, read_rownames = TRUE)
     ## Get the max index of a chrom peak per sample
@@ -228,7 +228,7 @@ NULL
                                  rownames(pksl[[i]])))))
     ## Get the list of chromPeakData for x.
     pkdl <- .h5_read_data(
-        x@hdf5_file, index = x@sample_id, name = "chrom_peak_data",
+        x@hdf5_file, id = x@sample_id, name = "chrom_peak_data",
         ms_level = rep(msLevel, length(x@sample_id)), read_rownames = TRUE)
     ## Do refinement (in parallel)
     res <- bpmapply(
@@ -302,22 +302,24 @@ NULL
     } else idx_columns <- NULL
     ids <- rep(x@sample_id, length(msLevel))
     msl <- rep(msLevel, each = length(x@sample_id))
-    res <- .h5_read_data(x@hdf5_file, index = ids, name = "chrom_peaks",
+    res <- .h5_read_data(x@hdf5_file, id = ids, name = "chrom_peaks",
                          ms_level = msl, read_colnames = TRUE,
-                         read_rownames = TRUE, j = idx_columns)
-    if (length(mz) | length(rt))
-        res <- lapply(res, function(z, rt, mz, ppm, type) {
-            z[.is_chrom_peaks_within_mz_rt(
-                z, rt = rt, mz = mz, ppm = ppm, type = type), , drop = FALSE]
-        }, rt = rt, mz = mz, ppm = ppm, type = type)
+                         read_rownames = TRUE, j = idx_columns,
+                         rt = rt, mz = mz, ppm = ppm, type = type)
+    ## ## Might be better (memory wise) to pass this to the import function
+    ## ## instead
+    ## if (length(mz) | length(rt))
+    ##     res <- lapply(res, function(z, rt, mz, ppm, type) {
+    ##         z[.is_chrom_peaks_within_mz_rt(
+    ##             z, rt = rt, mz = mz, ppm = ppm, type = type), , drop = FALSE]
+    ##     }, rt = rt, mz = mz, ppm = ppm, type = type)
     if (by_sample) {
         names(res) <- ids
+        res
     } else {
         l <- vapply(res, nrow, 1L)
-        res <- cbind(do.call(rbind, res),
-                     sample = rep(match(ids, x@sample_id), l))
+        cbind(do.call(rbind, res), sample = rep(match(ids, x@sample_id), l))
     }
-    res
 }
 
 .h5_chrom_peak_data <- function(x, columns = character(), by_sample = TRUE) {
@@ -354,7 +356,7 @@ NULL
     cnt <- 0L
     for (msl in ms_level) {
         ## read chrom peaks
-        cp <- .h5_read_data(hdf5_file, index = id, name = "chrom_peaks",
+        cp <- .h5_read_data(hdf5_file, id = id, name = "chrom_peaks",
                             ms_level = msl, read_colnames = TRUE,
                             read_rownames = FALSE)[[1L]]
         ## adjust chrom peak rt - use .applyRtAdjToChromPeaks for that.
@@ -487,7 +489,7 @@ NULL
 .h5_feature_values_ms_level <- function(ms_level, x, method, value, intensity,
                                         filled = TRUE) {
     cn <- .h5_chrom_peaks_colnames(x, ms_level)
-    col <- switch(method,
+  col <- switch(method,
                   sum = value,
                   medret = c(value, "rt"),
                   maxint = c(value, intensity))
@@ -598,6 +600,21 @@ NULL
     d
 }
 
+.h5_read_chrom_peaks_matrix <- function(name, h5, index = list(NULL, NULL),
+                                        read_colnames = FALSE,
+                                        read_rownames = FALSE,
+                                        rownames = paste0(name, "_rownames"),
+                                        rt = numeric(), mz = numeric(),
+                                        ppm = 0, type = "any") {
+    read_colnames <- read_colnames || length(rt) > 0 || length(mz) > 0
+    d <- .h5_read_matrix2(name, h5, index, read_colnames, read_rownames,
+                          rownames)
+    if (length(rt) | length(mz))
+        d[.is_chrom_peaks_within_mz_rt(d, rt = rt, mz = mz,
+                                       ppm = ppm, type = type), , drop = FALSE]
+    else d
+}
+
 #' Read a single `data.frame` from the HDF5 file. With
 #' `read_rownames = TRUE` also the row names are read and set, which requires
 #' an additional reading step. Note that for a `data.frame` each column
@@ -679,12 +696,12 @@ NULL
 #'
 #' @param h5_file `character(1)` with the HDF5 file name
 #'
-#' @param index `integer` with the indices/IDs of the data sets to read.
+#' @param id `character` with the ID(s) of the data sets to read.
 #'
 #' @param name `character(1)` specifying which data should be read.
 #'
 #' @param ms_level `integer` with the MS level of each sample/data set that
-#'     should be read. Has to have the same length than `index`.
+#'     should be read. Has to have the same length than `id`.
 #'
 #' @param read_colnames `logical(1)` whether column names should be read and
 #'     set for each `matrix`.
@@ -701,40 +718,42 @@ NULL
 #'     select a **single** column to read. For `name = "chrom_peaks"`: `integer`
 #'     with the indices of the column(s) that should be imported.
 #'
+#' @param ... additional parameters passed to `FUN`
+#'
 #' @return `list()` with the read datasets. Will be a `list` of `numeric`
 #'     matrices for `name = "chrom_peaks"` or a `list` with `data.frame`s for
 #'     `name = "chrom_peak_data"`.
 #'
 #' @noRd
 .h5_read_data <- function(h5_file = character(),
-                          index = integer(),
+                          id = character(),
                           name = c("chrom_peaks", "chrom_peak_data",
                                    "feature_definitions",
                                    "feature_to_chrom_peaks"),
                           ms_level = integer(),
                           read_colnames = FALSE,
                           read_rownames = FALSE,
-                          i = NULL, j = NULL) {
-    if (!length(index)) return(list())
-    stopifnot(length(ms_level) == length(index))
+                          i = NULL, j = NULL, ...) {
+    if (!length(id)) return(list())
+    stopifnot(length(ms_level) == length(id))
     name <- match.arg(name)
     FUN <- switch(name,
                   chrom_peak_data = .h5_read_chrom_peak_data,
                   feature_definitions = .h5_read_data_frame,
+                  chrom_peaks = .h5_read_chrom_peaks_matrix,
                   .h5_read_matrix2)
     h5 <- rhdf5::H5Fopen(h5_file)
     on.exit(invisible(rhdf5::H5Fclose(h5)))
-    d <- paste0("/", index, "/ms_", ms_level, "/", name)
+    d <- paste0("/", id, "/ms_", ms_level, "/", name)
     index <- list(i, j)
     if (is.character(j) && length(j) == 1L) {
         d <- paste0(d, "/", j)
         index <- list(i, NULL)
     }
     lapply(d, FUN = FUN, read_colnames = read_colnames,
-           read_rownames = read_rownames, index = index, h5 = h5)
+           read_rownames = read_rownames, index = index, h5 = h5, ...)
 }
 
-
 ##  --------  VALIDITY  --------
 
 #' Compares the "mod_count" attribute from an h5 file with the expected

diff --git a/tests/testthat/test_XcmsExperimentHdf5-functions.R b/tests/testthat/test_XcmsExperimentHdf5-functions.R
@@ -72,15 +72,15 @@ test_that(".h5_chrom_peaks_chunk works", {
                  c("chrom_peak_data", "chrom_peaks",
                    "chrom_peaks_colnames", "chrom_peaks_rownames"))
     H5Fclose(h5)
-    a <- .h5_read_data(h5_file, index = "S2", name = "chrom_peaks",
+    a <- .h5_read_data(h5_file, id = "S2", name = "chrom_peaks",
                        ms_level = 1L, read_colnames = TRUE,
                        read_rownames = TRUE)[[1L]]
     ## add = TRUE
     res <- .h5_find_chrom_peaks_chunk(
         sps, msLevel = 1L, param = p, h5_file = h5_file, add = TRUE,
         sample_id = xmse_h5@sample_id)
     expect_equal(res, 4L)
-    b <- .h5_read_data(h5_file, index = "S2", name = "chrom_peaks",
+    b <- .h5_read_data(h5_file, id = "S2", name = "chrom_peaks",
                        ms_level = 1L, read_colnames = TRUE,
                        read_rownames = TRUE)[[1L]]
     expect_equal(nrow(b), 2 * nrow(a))
@@ -205,14 +205,14 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
     h5f <- tempfile()
     ref <- loadXcmsData("faahko_sub2")
     x <- .xcms_experiment_to_hdf5(ref, h5f)
-    ref <- .h5_read_data(x@hdf5_file, index = x@sample_id,
+    ref <- .h5_read_data(x@hdf5_file, id = x@sample_id,
                          ms_level = rep(1L, length(x)),
                          read_colnames = TRUE, read_rownames = TRUE)
     .h5_xmse_merge_neighboring_peaks(x)
     mod_count <- as.vector(rhdf5::h5read(h5f, "/header/modcount"))
     expect_true(mod_count > x@hdf5_mod_count)
     ## Check that content was changed.
-    res <- .h5_read_data(x@hdf5_file, index = x@sample_id,
+    res <- .h5_read_data(x@hdf5_file, id = x@sample_id,
                          ms_level = rep(1L, length(x)),
                          read_colnames = TRUE, read_rownames = TRUE)
     expect_true(nrow(ref[[1L]]) > nrow(res[[1L]]))
@@ -235,7 +235,7 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
     res <- .xcms_experiment_to_hdf5(ref, h5f)
 
     .h5_xmse_merge_neighboring_peaks(res)
-    res <- .h5_read_data(res@hdf5_file, index = res@sample_id,
+    res <- .h5_read_data(res@hdf5_file, id = res@sample_id,
                          ms_level = rep(1L, length(res)),
                          read_colnames = TRUE, read_rownames = TRUE)
     ref <- .xmse_merge_neighboring_peaks(ref)
@@ -250,7 +250,7 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
 
 test_that(".h5_read_matrix works", {
     h5f <- tempfile()
-    xcms:::.h5_initialize_file(h5f)
+    .h5_initialize_file(h5f)
 
     a <- cbind(a = c(1.2, 1.4), b = c(3.5, 3.6), c = c(5.3, 5.1))
     rownames(a) <- c("CP1", "CP2")
@@ -319,6 +319,26 @@ test_that(".h5_read_matrix works", {
     file.remove(h5f)
 })
 
+test_that(".h5_read_chrom_peaks_matrix works", {
+    res <- .h5_read_chrom_peaks_matrix(
+        "/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
+        read_colnames = FALSE, read_rownames = FALSE)
+    expect_true(is.matrix(res))
+    expect_true(is.numeric(res))
+    res <- .h5_read_chrom_peaks_matrix(
+        "/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
+        read_colnames = TRUE, read_rownames = FALSE,
+        mz = c(300, 350), rt = c(3000, 3500), type = "within")
+    expect_true(all(res[, "mz"] > 300 & res[, "mz"] < 350))
+    expect_true(all(res[, "rt"] > 3000 & res[, "rt"] < 3500))
+
+    res <- .h5_read_chrom_peaks_matrix(
+        "/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
+        read_colnames = TRUE, read_rownames = FALSE,
+        mz = c(300, 350), type = "within")
+    expect_true(all(res[, "mz"] > 300 & res[, "mz"] < 350))
+})
+
 test_that(".h5_read_data_frame works", {
     h5f <- tempfile()
     .h5_initialize_file(h5f)
@@ -401,32 +421,32 @@ test_that(".h5_read_data works", {
     ## chrom peaks
     res <- .h5_read_data(h5f)
     expect_equal(res, list())
-    res <- .h5_read_data(h5f, index = 2, name = "chrom_peaks", ms_level = 2L)
+    res <- .h5_read_data(h5f, id = 2, name = "chrom_peaks", ms_level = 2L)
     expect_equal(length(res), 1L)
     expect_equal(res[[1L]], unname(b2))
-    res <- .h5_read_data(h5f, index = 1, name = "chrom_peaks", ms_level = 2L,
+    res <- .h5_read_data(h5f, id = 1, name = "chrom_peaks", ms_level = 2L,
                          read_colnames = TRUE)
     expect_equal(unname(res[[1L]]), unname(a2))
     expect_equal(colnames(res[[1L]]), colnames(a2))
     expect_true(is.null(rownames(res[[1L]])))
-    res <- .h5_read_data(h5f, index = 1, name = "chrom_peaks", ms_level = 2L,
+    res <- .h5_read_data(h5f, id = 1, name = "chrom_peaks", ms_level = 2L,
                          read_rownames = TRUE)
     expect_equal(unname(res[[1L]]), unname(a2))
     expect_equal(rownames(res[[1L]]), rownames(a2))
     expect_true(is.null(colnames(res[[1L]])))
     ## single column
-    res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peaks",
+    res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peaks",
                          ms_level = c(2L, 2L), j = 2)
     expect_equal(length(res), 2L)
     expect_true(ncol(res[[1L]]) == 1L)
     expect_equal(res[[1L]][, 1], unname(b2[, 2]))
-    res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peaks",
+    res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peaks",
                          ms_level = c(2L, 2L), j = 2, read_colnames = TRUE,
                          read_rownames = TRUE)
     expect_equal(length(res), 2L)
     expect_true(ncol(res[[1L]]) == 1L)
     expect_equal(res[[1L]][, 1, drop = FALSE], b2[, 2, drop = FALSE])
-    res <- .h5_read_data(h5f, index = c(1, 2, 1), name = "chrom_peaks",
+    res <- .h5_read_data(h5f, id = c(1, 2, 1), name = "chrom_peaks",
                          ms_level = c(2, 2, 2), j = 1L,
                          read_colnames = TRUE,
                          read_rownames = TRUE)
@@ -435,7 +455,7 @@ test_that(".h5_read_data works", {
     expect_equal(res[[2]], b2[, 1, drop = FALSE])
 
     ## selected rows.
-    res <- .h5_read_data(h5f, index = c(1, 2, 1), name = "chrom_peaks",
+    res <- .h5_read_data(h5f, id = c(1, 2, 1), name = "chrom_peaks",
                          ms_level = c(2, 2, 2), j = 1L, i = c(2, 1, 2),
                          read_colnames = TRUE,
                          read_rownames = TRUE)
@@ -445,13 +465,13 @@ test_that(".h5_read_data works", {
     expect_equal(res[[1]], a2[c(2, 1, 2), 1, drop = FALSE])
 
     ## chrom peak data
-    res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peak_data",
+    res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peak_data",
                          ms_level = c(2L, 2L), read_colnames = TRUE,
                          read_rownames = TRUE)
     expect_equal(length(res), 2)
     rownames(b) <- c("CP3", "CP4", "CP5")
     expect_equal(unname(res[[1L]]), unname(b))
-    res <- .h5_read_data(h5f, index = 1, name = "chrom_peak_data",
+    res <- .h5_read_data(h5f, id = 1, name = "chrom_peak_data",
                          ms_level = 2L, j = "is_filled")
     expect_equal(length(res), 1L)
     expect_equal(res[[1L]][, 1], a$is_filled)

diff --git a/tests/testthat/test_XcmsExperimentHdf5.R b/tests/testthat/test_XcmsExperimentHdf5.R
@@ -124,7 +124,7 @@ test_that("refineChromPeaks,XcmsExperimentHdf5,MergeNeighboringPeaksParam", {
     ## Compare results from both. Need chromPeaks() function first.
     ref <- refineChromPeaks(ref, MergeNeighboringPeaksParam())
     ref_pks <- chromPeaks(ref)
-    res_pks <- .h5_read_data(res@hdf5_file, index = res@sample_id,
+    res_pks <- .h5_read_data(res@hdf5_file, id = res@sample_id,
                              ms_level = rep(1L, length(res)),
                              read_colnames = TRUE, read_rownames = TRUE)
     res_pks <- do.call(