From 7bd035dbd904a008aedc75af5579789810357d47 Mon Sep 17 00:00:00 2001
From: Johannes Rainer <johannes.rainer@gmail.com>
Date: Thu, 14 Nov 2024 09:57:37 +0100
Subject: [PATCH] feat: add chromatogram,XcmsExperimentHdf5

---
 R/XcmsExperiment-functions.R                  |   6 +-
 R/XcmsExperimentHdf5-functions.R              | 101 ++++++++++++-----
 R/XcmsExperimentHdf5.R                        |  40 +++----
 R/functions-utils.R                           |   2 +-
 R/methods-XChromatograms.R                    |  10 +-
 man/XcmsExperiment.Rd                         |   5 +-
 man/XcmsExperimentHdf5.Rd                     |  34 ++++++
 man/hidden_aliases.Rd                         |  29 +++--
 .../test_XcmsExperimentHdf5-functions.R       |  51 ++++++++-
 tests/testthat/test_XcmsExperimentHdf5.R      | 103 ++++++++++++++++--
 tests/testthat/test_functions-utils.R         |   2 +-
 11 files changed, 294 insertions(+), 89 deletions(-)

diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R
index 9eb7b089f..1ff360860 100644
--- a/R/XcmsExperiment-functions.R
+++ b/R/XcmsExperiment-functions.R
@@ -921,6 +921,7 @@
                                             msLevel, isolationWindow = NULL,
                                             chunkSize, chromPeaks,
                                             return.type, BPPARAM) {
+    message("Extracting chromatographic data")
     chrs <- as(.mse_chromatogram(
         as(object, "MsExperiment"), rt = rt, mz = mz,
         aggregationFun = aggregationFun, msLevel = msLevel,
@@ -956,7 +957,8 @@
         }
     }
     pb$tick()
-    ## Process features - that is not perfect.
+    ## Process features - that is not perfect: features are selected based on
+    ## mz and rt, not based on the selected chrom peaks.
     if (hasFeatures(object)) {
         message("Processing features")
         pb <- progress_bar$new(format = paste0("[:bar] :current/:",
@@ -1264,4 +1266,4 @@ XcmsExperiment <- function() {
     n@.processHistory <- from@processHistory
     validObject(n)
     n
-}
+}
\ No newline at end of file
diff --git a/R/XcmsExperimentHdf5-functions.R b/R/XcmsExperimentHdf5-functions.R
index fcd7ed8b7..82fd14810 100644
--- a/R/XcmsExperimentHdf5-functions.R
+++ b/R/XcmsExperimentHdf5-functions.R
@@ -578,7 +578,10 @@ NULL
 ##
 ################################################################################
 #' Read chromatograms for a set of samples (chunk) and adds chromatographic
-#' peaks.
+#' peaks. Chromatographic peaks are added/processed by sample reading the
+#' respective information from the hdf5 file. Features are added by processing
+#' the feature and chrom peak IDs of the selected chrom peaks.
+#'
 #'
 #' @param x `XcmsExperimentHdf5` for one subset/chunk of data from which the
 #'     data should be extracted
@@ -588,49 +591,90 @@ NULL
 #'
 #' @param ms_level `integer(1)` with the MS level.
 #' @noRd
-.h5_x_chromatogram <- function(x, index = seq_along(x), ms_level = 1L,
-                               mz, rt, ppm = 0, chromPeaks = "any",
+.h5_x_chromatograms <- function(x, index = seq_along(x), aggregationFun = "sum",
+                               ms_level = 1L, mz, rt, isolationWindow = NULL,
+                               chromPeaks = "any", chunkSize = 2L,
+                               return.type = "XChromatograms",
                                BPPARAM = bpparam()) {
-    ## Get the chromatograms in parallel.
-    chr <- as(chromatogram(as(x, "MsExperiment"), mz = mz,
-                        rt = rt, BPPARAM = BPPARAM), "XChromatograms")
-    js <- seq_len(nrow(chr))
+    message("Extracting chromatographic data")
+    chr <- as(.mse_chromatogram(
+        as(x, "MsExperiment"), rt = rt, mz = mz, msLevel = ms_level,
+        aggregationFun = aggregationFun, isolationWindow = isolationWindow,
+        chunkSize = chunkSize, BPPARAM = BPPARAM), return.type)
+    if (return.type == "MChromatograms" || chromPeaks == "none")
+        return(chr)
     message("Processing chromatographic peaks")
+    js <- seq_len(nrow(chr))
     pb <- progress_bar$new(format = paste0("[:bar] :current/:",
                                            "total (:percent) in ",
                                            ":elapsed"),
-                           total = ncol(chr) + 1L, clear = FALSE)
-    for (i in seq_along(x)) {
+                           total = length(x), clear = FALSE)
+    has_features <- hasFeatures(x, ms_level)
+    if (has_features) {
+        fd <- .h5_read_data(x@hdf5_file, "features", "feature_definitions",
+                            read_rownames = TRUE, ms_level = ms_level)[[1L]]
+        f2p <- vector("list", 1000) # initialize with an educated guess;
+        cnt <- 1L
+    }
+    for (i in seq_along(x)) { # iterate over samples
         cp <- .h5_read_data(x@hdf5_file, x@sample_id[i], "chrom_peaks",
                             ms_level = ms_level, read_colnames = TRUE,
                             read_rownames = TRUE)[[1L]]
-        for (j in js) {
+        cd <- .h5_read_data(x@hdf5_file, x@sample_id[i], "chrom_peak_data",
+                            ms_level = ms_level, read_colnames = TRUE,
+                            read_rownames = FALSE)[[1L]]
+        if (has_features)
+            fidx <- .h5_read_data(x@hdf5_file, x@sample_id[i],
+                                  "feature_to_chrom_peaks",
+                                  ms_level = ms_level)[[1L]]
+        for (j in js) { # iterate over ranges/EICs
             idx <- which(.is_chrom_peaks_within_mz_rt(
-                cp, rt[j, ], mz[j, ], ppm, chromPeaks), useNames = FALSE)
-            a <- cbind(cp[idx, , drop = FALSE], sample = rep(i, length(idx)))
-            b <- .h5_read_data(
-                x@hdf5_file, x@sample_id[i], "chrom_peak_data",
-                ms_level = ms_level, read_colnames = TRUE, i = idx,
-                read_rownames = FALSE)[[1L]]
-            b$ms_level <- rep(ms_level, length(idx))
+                cp, rt[j, ], mz[j, ], type = chromPeaks), useNames = FALSE)
+            li <- length(idx)
+            a <- cbind(cp[idx, , drop = FALSE], sample = rep(i, li))
+            b <- extractROWS(cd, idx)
+            b$ms_level <- rep(ms_level, li)
             attr(b, "row.names") <- rownames(a)
             tmp <- chr@.Data[j, i][[1L]]
             slot(tmp, "chromPeaks", check = FALSE) <- a
             slot(tmp, "chromPeakData", check = FALSE) <- as(b, "DataFrame")
-            chr@.Data[j, i][[1L]] <- tmp
+            chr@.Data[j, i][[1L]] <- tmp # this does not seem to cause copying
+            ## Add mapping of features and chrom peaks for that sample/EIC
+            if (li && has_features) {
+                is_feature <- fidx[, 2L] %in% idx
+                if (any(is_feature)) {
+                    f2p[[cnt]] <- cbind(rownames(fd)[fidx[is_feature, 1L]],
+                                        rownames(cp)[fidx[is_feature, 2L]],
+                                        rep(as.character(j), sum(is_feature)))
+                    cnt <- cnt + 1L
+                }
+            }
         }
         pb$tick()
     }
-    pb$tick()
-    if (hasFeatures(x, ms_level)) {
-        stop("Not yet implemented")
-        ## Somehow add features.
+    if (has_features) {
+        message("Processing features")
+        ## Define the featureDefinitions and assign chrom peaks to each,
+        ## matching the same `"row"` (!).
+        f2p <- do.call(base::rbind, f2p) # mapping of features and chrom peaks
+        cp <- chromPeaks(chr)            # chrom peaks to calculate index
+        cp_row_id <- paste0(rownames(cp), ".", cp[, "row"])
+        ft_cp_row_id <- paste0(f2p[, 2], ".", f2p[, 3])
+        ft_row_id <- paste0(f2p[, 1], ".", f2p[, 3])
+        peakidx <- lapply(split(ft_cp_row_id, ft_row_id),
+                          base::match, table = cp_row_id)
+        id <- strsplit(names(peakidx), ".", fixed = TRUE)
+        fts <- fd[vapply(id, `[`, i = 1L, FUN.VALUE = NA_character_), ]
+        fts$peakidx <- unname(peakidx)
+        fts$row <- vapply(id, function(z) as.integer(z[2L]), NA_integer_)
+        fts$ms_level <- ms_level
+        slot(chr, "featureDefinitions", check = FALSE) <-
+            DataFrame(extractROWS(fts, order(fts$row)))
     }
-    chr@.processHistory <- x@processHistory
+    slot(chr, ".processHistory", check = FALSE) <- x@processHistory
     chr
 }
 
-
 ################################################################################
 ##
 ##        HDF5 FUNCTIONALITY
@@ -805,7 +849,6 @@ NULL
     rhdf5::h5ls(g, recursive = recursive, datasetinfo = FALSE)$name
 }
 
-
 .h5_ms_levels <- function(h5, sample_id) {
     nms <- .h5_dataset_names(paste0("/", sample_id), h5)
     as.integer(unique(sub("ms_", "", grep("^ms", nms, value = TRUE))))
@@ -909,7 +952,7 @@ NULL
 #'
 #' @noRd
 .h5_check_mod_count <- function(h5, mod_count = 0L) {
-    mc <- rhdf5::h5read(h5, "/header/modcount")[1L]
+    mc <- .h5_mod_count(h5)
     if (mc != mod_count)
         stop("The HDF5 file was changed by a different process. This xcms ",
              "result object/variable is no longer valid.")
@@ -965,13 +1008,17 @@ NULL
                    level = comp_level)
 }
 
+.h5_mod_count <- function(h5) {
+    rhdf5::h5read(h5, "/header/modcount")[1L]
+}
+
 #' Every writing operation should increate the "mod count", i.e. the
 #' count of data modifications. This function increases the mod count by +1
 #' and returns this value.
 #'
 #' @noRd
 .h5_increment_mod_count <- function(h5) {
-    mc <- rhdf5::h5read(h5, "/header/modcount")[1L] + 1L
+    mc <- .h5_mod_count(h5) + 1L
     rhdf5::h5write(mc, h5, "/header/modcount",
                    level = .h5_compression_level())
     mc
diff --git a/R/XcmsExperimentHdf5.R b/R/XcmsExperimentHdf5.R
index fba822fcb..6aa92df3e 100644
--- a/R/XcmsExperimentHdf5.R
+++ b/R/XcmsExperimentHdf5.R
@@ -410,14 +410,14 @@ setMethod(
 ##     })
 
 #' TODO:
-#' - add chunkSize to chromPeaks?
-#' - chromPeakData,XcmsExperimentHdf5
-#' - adjustRtime,XcmsExperimentHdf5
+#' - fillChromPeaks,XcmsExperimentHdf5
+#' - filterMsLevel
+#' - filterRt
+#' - filterMz
 #'
 #' @noRd
 NULL
 
-
 ################################################################################
 ##
 ##        RETENTION TIME ALIGNMENT
@@ -594,7 +594,7 @@ setMethod(
             unique(c(object@features_ms_level, msLevel)))
         cpk_idx <- res$peakidx
         res$peakidx <- NULL
-        rownames(res) <- .featureIDs(
+        attr(res, "row.names") <- .featureIDs(
             nrow(res), prefix = paste0("FT", msLevel), min_len = 6)
         ## Save features to "/features/ms_<msLevel>/feature_definitions"
         .h5_write_data(object@hdf5_file, list(features = res),
@@ -705,15 +705,6 @@ setMethod(
 ##
 ################################################################################
 
-#' While previously we were first extracting the chromatograms and then adding
-#' the chrom peaks later for `XcmsExperimentHdf5` it might be more efficient to
-#' also extract the chrom peaks in the loop/chunk processing. So, essentially:
-#' - process `object` chunk-wise
-#' - for each chunk:
-#'   - extract chromatograms (in parallel?)
-#'   - get chrom peaks for each sample/chrom peak.
-#'
-#' @noRd
 #' @rdname hidden_aliases
 setMethod(
     "chromatogram", "XcmsExperimentHdf5",
@@ -737,19 +728,14 @@ setMethod(
             mz <- cbind(rep(-Inf, nrow(rt)), rep(Inf, nrow(rt)))
         return.type <- match.arg(return.type)
         chromPeaks <- match.arg(chromPeaks)
+        if (!hasChromPeaks(object, msLevel))
+            chromPeaks <- "none"
         if (hasAdjustedRtime(object))
             object <- applyAdjustedRtime(object)
-        ## process the data in chunks.
-        ## in each chunk: get chromatograms, load chrom peaks and process those.
-        ## ? how to get/define the features too? get the feature indices?
-        ## Implementation notes:
-        ## XChromatogram has slots @chromPeaks (matrix) @chromPeakData (DataFrame)
-        ## XChromatograms has slot @featureDefinitions (DataFrame)
-
-
-        .xmse_extract_chromatograms_old(
-            object, rt = rt, mz = mz, aggregationFun = aggregationFun,
-            msLevel = msLevel, isolationWindow = isolationWindowTargetMz,
-            chunkSize = chunkSize, chromPeaks = chromPeaks,
-            return.type = return.type, BPPARAM = BPPARAM)
+        .h5_x_chromatograms(
+            object, ms_level = msLevel, chromPeaks = chromPeaks,
+            mz = mz, rt = rt, aggregationFun = aggregationFun,
+            chunkSize = chunkSize, return.type = return.type,
+            isolationWindow = isolationWindowTargetMz,
+            BPPARAM = BPPARAM)
     })
\ No newline at end of file
diff --git a/R/functions-utils.R b/R/functions-utils.R
index 9c92c8043..b80ad97cc 100644
--- a/R/functions-utils.R
+++ b/R/functions-utils.R
@@ -922,4 +922,4 @@ groupOverlaps <- function(xmin, xmax) {
                  PACKAGE = "xcms")
     cbind(rtime = scantime[scns],
           intensity = res$intensity)
-}
+}
\ No newline at end of file
diff --git a/R/methods-XChromatograms.R b/R/methods-XChromatograms.R
index 29f9b80ec..ce0a64075 100644
--- a/R/methods-XChromatograms.R
+++ b/R/methods-XChromatograms.R
@@ -3,14 +3,14 @@
 
 setAs("MChromatograms", "XChromatograms", function(from) {
     res <- new("XChromatograms")
-    res@.Data <- matrix(lapply(from, function(z) {
+    slot(res, ".Data", check = FALSE) <- matrix(lapply(from@.Data, function(z) {
         if (is(z, "Chromatogram"))
             as(z, "XChromatogram")
         else z
     }), nrow = nrow(from), ncol = ncol(from), dimnames = dimnames(from))
-    res@phenoData <- from@phenoData
-    res@featureData <- from@featureData
-    if (validObject(resetClass)) res
+    slot(res, "phenoData", check = FALSE) <- from@phenoData
+    slot(res, "featureData", check = FALSE) <- from@featureData
+    res
 })
 
 #' @rdname XChromatogram
@@ -676,4 +676,4 @@ setMethod("transformIntensity", "XChromatograms", function(object,
                            nrow = nrow(object),
                            dimnames = dimnames(object))
     object
-})
+})
\ No newline at end of file
diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd
index 9d8129783..5cfd036d3 100644
--- a/man/XcmsExperiment.Rd
+++ b/man/XcmsExperiment.Rd
@@ -1,7 +1,7 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/AllGenerics.R, R/MsExperiment.R,
 %   R/XcmsExperiment-functions.R, R/XcmsExperiment-plotting.R,
-%   R/XcmsExperiment.R, R/XcmsExperimentHdf5.R, R/methods-XCMSnExp.R
+%   R/XcmsExperiment.R, R/methods-XCMSnExp.R
 \name{filterFeatureDefinitions}
 \alias{filterFeatureDefinitions}
 \alias{filterRt,MsExperiment-method}
@@ -50,7 +50,6 @@
 \alias{chromatogram,XcmsExperiment-method}
 \alias{processHistory,XcmsExperiment-method}
 \alias{filterFile,XcmsExperiment-method}
-\alias{[,XcmsExperimentHdf5,ANY,ANY,ANY-method}
 \title{Next Generation \code{xcms} Result Object}
 \usage{
 filterFeatureDefinitions(object, ...)
@@ -206,8 +205,6 @@ featureArea(
   keepFeatures = FALSE,
   ...
 )
-
-\S4method{[}{XcmsExperimentHdf5,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE)
 }
 \arguments{
 \item{object}{An \code{XcmsExperiment} object.}
diff --git a/man/XcmsExperimentHdf5.Rd b/man/XcmsExperimentHdf5.Rd
index 96456ea0a..ae0f1e9ba 100644
--- a/man/XcmsExperimentHdf5.Rd
+++ b/man/XcmsExperimentHdf5.Rd
@@ -3,9 +3,18 @@
 \name{XcmsExperimentHdf5}
 \alias{XcmsExperimentHdf5}
 \alias{XcmsExperimentHdf5-class}
+\alias{chromPeakData,XcmsExperimentHdf5-method}
 \alias{adjustRtimePeakGroups,XcmsExperimentHdf5,PeakGroupsParam-method}
 \title{xcms result object for very large data sets}
 \usage{
+\S4method{chromPeakData}{XcmsExperimentHdf5}(
+  object,
+  msLevel = integer(),
+  peaks = character(),
+  return.type = c("DataFrame", "data.frame"),
+  bySample = FALSE
+)
+
 \S4method{adjustRtimePeakGroups}{XcmsExperimentHdf5,PeakGroupsParam}(object, param = PeakGroupsParam(), msLevel = 1L)
 }
 \description{
@@ -34,6 +43,12 @@ the ID of the sample (usually related to the index in the original
 `MsExperiment` object) and the <chrom peak index> the index of the
 chromatographic peak in the chrom peak matrix **of that sample** and
 MS level.
+
+HDF5 does not support parallel processing, thus preprocessing results need
+to be loaded sequentially.
+
+All functionality for `XcmsExperimentHdf5` objects is optimized to reduce
+memory demand at the cost of eventually lower performance.
 }
 \section{Using the HDF5 file-based on-disk data storage}{
 
@@ -44,6 +59,25 @@ hence use the on-disk data storage mode described on this page. All data
 is stored in a file with the name specifyied with parameter `hdf5File`.
 }
 
+\section{Functionality related to chromatographic peaks}{
+
+
+- `chromPeaks()` gains parameter `bySample = FALSE` that, if set to `TRUE`
+  returns a `list` of `chromPeaks` matrices, one for each sample. Due to
+  the way data is organized in `XcmsExperimentHdf5` objects this is more
+  efficient than `bySample = FALSE`. Thus, in cases where chrom peak data
+  is subsequently evaluated or processed by sample, it is suggested to
+  use `bySample = TRUE`.
+
+- `chromPeakData()` gains a new parameter `peaks = character()` which allows
+  to specify from which chromatographic peaks data should be returned.
+  For these chromatographic peaks the ID (row name in `chromPeaks()`)
+  should be provided with the `peaks` parameter. This can reduce the memory
+  requirement for cases in which only data of some selected chromatographic
+  peaks needs to be extracted. Also, `chromPeakData()` supports the
+  `bySample` parameter described for `chromPeaks()` above.
+}
+
 \section{Retention time alignment}{
 
 
diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd
index f5821c8d0..6cea1d3d7 100644
--- a/man/hidden_aliases.Rd
+++ b/man/hidden_aliases.Rd
@@ -5,13 +5,13 @@
 \name{hidden_aliases}
 \alias{hidden_aliases}
 \alias{show,XcmsExperimentHdf5-method}
+\alias{[,XcmsExperimentHdf5,ANY,ANY,ANY-method}
 \alias{findChromPeaks,XcmsExperimentHdf5,Param-method}
 \alias{dropChromPeaks,XcmsExperimentHdf5-method}
 \alias{hasChromPeaks,XcmsExperimentHdf5-method}
 \alias{chromPeaks<-,XcmsExperimentHdf5-method}
 \alias{chromPeaks,XcmsExperimentHdf5-method}
 \alias{chromPeakData<-,XcmsExperimentHdf5-method}
-\alias{chromPeakData,XcmsExperimentHdf5-method}
 \alias{refineChromPeaks,XcmsExperimentHdf5,MergeNeighboringPeaksParam-method}
 \alias{adjustRtime,XcmsExperimentHdf5,PeakGroupsParam-method}
 \alias{dropAdjustedRtime,XcmsExperimentHdf5-method}
@@ -21,11 +21,14 @@
 \alias{featureDefinitions<-,XcmsExperimentHdf5-method}
 \alias{featureDefinitions,XcmsExperimentHdf5-method}
 \alias{featureValues,XcmsExperimentHdf5-method}
+\alias{chromatogram,XcmsExperimentHdf5-method}
 \alias{adjustRtimePeakGroups,XcmsResult,PeakGroupsParam-method}
 \title{Internal page for hidden aliases}
 \usage{
 \S4method{show}{XcmsExperimentHdf5}(object)
 
+\S4method{[}{XcmsExperimentHdf5,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE)
+
 \S4method{findChromPeaks}{XcmsExperimentHdf5,Param}(
   object,
   param,
@@ -50,18 +53,12 @@
   msLevel = integer(),
   type = c("any", "within", "apex_within"),
   isFilledColumn = FALSE,
-  columns = character()
+  columns = character(),
+  bySample = FALSE
 )
 
 \S4method{chromPeakData}{XcmsExperimentHdf5}(object) <- value
 
-\S4method{chromPeakData}{XcmsExperimentHdf5}(
-  object,
-  msLevel = integer(),
-  sample = integer(),
-  return.type = c("DataFrame", "data.frame")
-)
-
 \S4method{refineChromPeaks}{XcmsExperimentHdf5,MergeNeighboringPeaksParam}(
   object,
   param,
@@ -101,6 +98,20 @@
   msLevel = integer()
 )
 
+\S4method{chromatogram}{XcmsExperimentHdf5}(
+  object,
+  rt = matrix(nrow = 0, ncol = 2),
+  mz = matrix(nrow = 0, ncol = 2),
+  aggregationFun = "sum",
+  msLevel = 1L,
+  chunkSize = 2L,
+  isolationWindowTargetMz = NULL,
+  return.type = c("XChromatograms", "MChromatograms"),
+  include = character(),
+  chromPeaks = c("apex_within", "any", "none"),
+  BPPARAM = bpparam()
+)
+
 \S4method{adjustRtimePeakGroups}{XcmsResult,PeakGroupsParam}(object, param = PeakGroupsParam(), msLevel = 1L)
 }
 \value{
diff --git a/tests/testthat/test_XcmsExperimentHdf5-functions.R b/tests/testthat/test_XcmsExperimentHdf5-functions.R
index 5bf6da74b..2aaca46dd 100644
--- a/tests/testthat/test_XcmsExperimentHdf5-functions.R
+++ b/tests/testthat/test_XcmsExperimentHdf5-functions.R
@@ -233,7 +233,7 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
                          ms_level = rep(1L, length(x)),
                          read_colnames = TRUE, read_rownames = TRUE)
     .h5_xmse_merge_neighboring_peaks(x)
-    mod_count <- as.vector(rhdf5::h5read(h5f, "/header/modcount"))
+    mod_count <- .h5_mod_count(h5f)
     expect_true(mod_count > x@hdf5_mod_count)
     ## Check that content was changed.
     res <- .h5_read_data(x@hdf5_file, id = x@sample_id,
@@ -542,6 +542,13 @@ test_that(".h5_initialize_file", {
     file.remove(h5f)
 })
 
+test_that(".h5_mod_count works", {
+    h5f <- tempfile()
+    .h5_initialize_file(h5f, mod_count = 7L)
+    expect_identical(.h5_mod_count(h5f), 7L)
+    file.remove(h5f)
+})
+
 test_that(".h5_increment_mod_count works", {
     h5f <- tempfile()
     .h5_initialize_file(h5f)
@@ -693,11 +700,11 @@ test_that(".h5_update_rt_chrom_peaks_sample works", {
     expect_true(TRUE)
 })
 
-test_that(".h5_x_chromatogram wokrs", {
+test_that(".h5_x_chromatogram works", {
     rt <- matrix(c(2600, 2700), nrow = 1)
     mz <- matrix(c(340, 400), nrow = 1)
 
-    res <- .h5_x_chromatogram(xmse_h5, mz = mz, rt = rt, chromPeaks = "any")
+    res <- .h5_x_chromatograms(xmse_h5, mz = mz, rt = rt, chromPeaks = "any")
     expect_s4_class(res, "XChromatograms")
     expect_true(nrow(res) == 1L)
     expect_equal(ncol(res), length(xmse_h5))
@@ -712,12 +719,48 @@ test_that(".h5_x_chromatogram wokrs", {
     expect_equal(unname(chromPeaks(res)), unname(chromPeaks(ref)))
 
     ## Multiple rows.
-    res <- .h5_x_chromatogram(
+    res <- .h5_x_chromatograms(
         xmse_h5, mz = chromPeaks(xmse_h5)[1:10, c("mzmin", "mzmax")],
         rt = chromPeaks(xmse_h5)[1:10, c("rtmin", "rtmax")],
         ms_level = 1L,  chromPeaks = "apex_within", BPPARAM = bpparam())
     expect_true(nrow(res) == 10L)
 
+    ## With features.
+    rt <- rbind(rt, c(3000, 3500))
+    mz <- rbind(mz, c(450, 500))
+    tmp <- loadXcmsData("xmse") |>
+        dropFeatureDefinitions() |>
+        groupChromPeaks(pdp, msLevel = 1L)
+
+    ref <- chromatogram(tmp, mz = mz, rt = rt, chromPeaks = "any")
+    res <- .h5_x_chromatograms(xmseg_full_h5, mz = mz, rt = rt,
+                               chromPeaks = "any")
+    a <- chromPeaks(ref)
+    b <- chromPeaks(res)
+    expect_equal(colnames(a), colnames(b))
+    rownames(a) <- NULL
+    rownames(b) <- NULL
+    expect_equal(a, b)
+
+    a <- featureDefinitions(ref)
+    b <- featureDefinitions(res)
+    expect_true(all(colnames(a) %in% colnames(b)))
+    ## We don't have exactly the same number of features, because XcmsExperiment
+    ## selects features based on mz and rt range, not based on included chrom
+    ## peaks.
+    expect_true(all(a$mzmed %in% b$mzmed))
+    b <- b[b$mzmed %in% a$mzmed, ]
+    expect_equal(a$peakidx, b$peakidx)
+
+    ## With overlapping ranges -> duplicated features and chrom peaks.
+    mz <- rbind(mz, mz[1, ])
+    rt <- rbind(rt, rt[1, ])
+    res <- .h5_x_chromatograms(xmseg_full_h5, mz = mz, rt = rt,
+                               chromPeaks = "apex_within")
+    cp <- chromPeaks(res)
+    expect_equal(cp[cp[, "row"] == 1L, colnames(cp) != "row"],
+                 cp[cp[, "row"] == 3L, colnames(cp) != "row"])
+    fts <- featureDefinitions(res)
 })
 
 rm(h5f_full_g)
\ No newline at end of file
diff --git a/tests/testthat/test_XcmsExperimentHdf5.R b/tests/testthat/test_XcmsExperimentHdf5.R
index fcd0ed4a7..8813708e9 100644
--- a/tests/testthat/test_XcmsExperimentHdf5.R
+++ b/tests/testthat/test_XcmsExperimentHdf5.R
@@ -11,6 +11,9 @@ xmseg_full_h5 <- xcms:::.xcms_experiment_to_hdf5(a, h5f_full_g)
 pdp <- PeakDensityParam(sampleGroups = sampleData(xmseg_full_h5)$sample_group,
                         minFraction = 0.4, bw = 30)
 xmseg_full_h5 <- groupChromPeaks(xmseg_full_h5, pdp, msLevel = 1L)
+## reference
+xmseg_full_ref <- dropFeatureDefinitions(loadXcmsData("xmse"))
+xmseg_full_ref <- groupChromPeaks(xmseg_full_ref, pdp, msLevel = 1L)
 
 test_that("XcmsExperimentHdf5 validation works", {
     a <- new("XcmsExperimentHdf5")
@@ -88,7 +91,7 @@ test_that("findChromPeaks,XcmsExperimentHdf5 works", {
     a <- as(xmse_h5, "MsExperiment")
     a <- as(a, "XcmsExperimentHdf5")
     h5_file <- tempfile()
-    xcms:::.h5_initialize_file(h5_file)
+    .h5_initialize_file(h5_file)
     a@hdf5_file <- h5_file
     a@sample_id <- c("S1", "S2", "S3")
     p <- xmse_h5@processHistory[[1L]]@param
@@ -236,19 +239,19 @@ test_that("featureValues,XcmsExperimentHdf5 etc works", {
     nf <- nrow(b)
     rtmed <- b$rtmed
     ## .h5_feature_values_sample
-    a <- xcms:::.h5_feature_values_sample(
+    a <- .h5_feature_values_sample(
         xmseg_full_h5@hdf5_file, sample_id = "S1", ms_level = 1L,
         n_features = nf, method = "sum", filled = FALSE, col_idx = 9L)
     b <- unname(featureValues(ref, method = "sum", value = "maxo",
                               filled = FALSE)[, 1L])
     expect_equal(a, b)
-    a <- xcms:::.h5_feature_values_sample(
+    a <- .h5_feature_values_sample(
         xmseg_full_h5@hdf5_file, sample_id = "S4", ms_level = 1L,
         n_features = nf, filled = FALSE, method = "maxint", col_idx = c(7L, 9L))
     b <- unname(featureValues(ref, method = "maxint", value = "into",
                               filled = FALSE, intensity = "maxo")[, 4L])
     expect_equal(a, b)
-    a <- xcms:::.h5_feature_values_sample(
+    a <- .h5_feature_values_sample(
         xmseg_full_h5@hdf5_file, sample_id = "S4", ms_level = 1L,
         n_features = nf, filled = FALSE, method = "medret", col_idx = c(8L, 4L),
         rtmed = rtmed)
@@ -257,7 +260,7 @@ test_that("featureValues,XcmsExperimentHdf5 etc works", {
     expect_equal(a, b)
 
     ## .h5_feature_values_ms_level
-    a <- xcms:::.h5_feature_values_ms_level(1L, xmseg_full_h5, method = "medret",
+    a <- .h5_feature_values_ms_level(1L, xmseg_full_h5, method = "medret",
                                      value = "into", filled = FALSE)
     b <- featureValues(ref, method = "medret", value = "into", filled = FALSE)
     expect_equal(unname(a), unname(b))
@@ -301,8 +304,7 @@ test_that("featureValues,XcmsExperimentHdf5 etc works", {
 })
 
 test_that("adjustRtimePeakGroups works", {
-    ref <- dropFeatureDefinitions(loadXcmsData("xmse"))
-    ref <- groupChromPeaks(ref, pdp, msLevel = 1L)
+    ref <- xmseg_full_ref
 
     a <- featureValues(
         ref, method = "maxint", intensity = "into", value = "rt")
@@ -343,11 +345,11 @@ test_that("adjustRtime,XcmsExperimentHdf5 and related function work", {
         dropFeatureDefinitions() |>
         applyAdjustedRtime()
     res_h5 <- tempfile()
-    res <- xcms:::.xcms_experiment_to_hdf5(ref, res_h5)
+    res <- .xcms_experiment_to_hdf5(ref, res_h5)
     ## Create a single sample XcmsExperimentHdf5
     a <- ref[3L]
     a_h5 <- tempfile()
-    a <- xcms:::.xcms_experiment_to_hdf5(a, a_h5)
+    a <- .xcms_experiment_to_hdf5(a, a_h5)
     ## Perform retention time alignment on reference data
     ref <- ref |>
         groupChromPeaks(pdp, msLevel = 1L) |>
@@ -411,6 +413,89 @@ test_that(".hasFilledPeaks works with XcmsExperimentHdf5", {
     expect_false(.hasFilledPeaks(xmse_h5))
 })
 
+test_that("chromatogram,XcmsExperimentHdf5 works", {
+    expect_error(chromatogram(xmse_h5, adjustedRtime = FALSE), "unused")
+    expect_warning(res <- chromatogram(xmse_h5, include = "apex_within",
+                                       return.type = "MChromatograms"),
+                   "deprecated")
+    expect_s4_class(res, "MChromatograms")
+    expect_true(nrow(res) == 1L)
+    ref <- chromatogram(faahko_od)
+    expect_equal(intensity(res[1, 1]), unname(intensity(ref[1, 1])))
+
+    rtr <- c(2600, 2700)
+    mzr <- c(340, 400)
+    res <- chromatogram(xmse_h5, mz = mzr, rt = rtr)
+    expect_s4_class(res, "XChromatograms")
+    expect_true(nrow(res) == 1L)
+    expect_true(nrow(chromPeaks(res)) > 0)
+    expect_true(all(chromPeaks(res)[, "mz"] >= 340 &
+                    chromPeaks(res)[, "mz"] <= 400))
+    expect_true(all(chromPeaks(res[1, 1])[, "sample"] == 1L))
+    expect_true(all(chromPeaks(res[1, 2])[, "sample"] == 2L))
+    expect_true(all(chromPeaks(res[1, 3])[, "sample"] == 3L))
+    ref <- chromatogram(xod_x, mz = mzr, rt = rtr)
+    expect_equal(unname(chromPeaks(res)), unname(chromPeaks(ref)))
+
+    ## with features
+    res <- chromatogram(
+        xmseg_full_h5, mz = chromPeaks(xmseg_full_h5)[1:5, c("mzmin", "mzmax")],
+        rt = chromPeaks(xmseg_full_h5)[1:5, c("rtmin", "rtmax")],
+        chunkSize = 2L, BPPARAM = bpparam(), msLevel = 1L,
+        aggregationFun = "sum", isolationWindow = NULL,
+        chromPeaks = "apex_within", return.type = "XChromatograms")
+    expect_true(nrow(featureDefinitions(res)) == 2)
+    expect_true(all(unlist(featureDefinitions(res)$peakidx) %in%
+                    seq_len(nrow(chromPeaks(res)))))
+    ref <- chromatogram(
+        xmseg_full_ref,
+        mz = chromPeaks(xmseg_full_h5)[1:5, c("mzmin", "mzmax")],
+        rt = chromPeaks(xmse_full_h5)[1:5, c("rtmin", "rtmax")])
+    a <- featureDefinitions(res)
+    b <- featureDefinitions(res)
+    expect_true(all(colnames(a) %in% colnames(b)))
+    expect_equal(unname(a[, colnames(b)]), unname(b[, colnames(b)]))
+
+    ## MS2 data.
+    res <- chromatogram(xmseg_full_h5, msLevel = 2L,
+                        mz = chromPeaks(xmse_full_h5)[1:5, c("mzmin", "mzmax")],
+                        rt = chromPeaks(xmse_full_h5)[1:5, c("rtmin", "rtmax")])
+    expect_true(validObject(res))
+    expect_true(length(intensity(res[[1L]])) == 0)
+    expect_true(length(intensity(res[[2L]])) == 0)
+    expect_s4_class(res, "XChromatograms")
+    expect_true(nrow(chromPeaks(res)) == 0)
+
+    ## Defining only mz or rt.
+    rtr <- c(2600, 2700)
+    mzr <- c(340, 400)
+    res <- chromatogram(xmse_h5, mz = mzr)
+    expect_s4_class(res, "XChromatograms")
+    expect_true(nrow(res) == 1L)
+    expect_true(nrow(chromPeaks(res)) > 0)
+    expect_true(all(chromPeaks(res)[, "mz"] >= 340 &
+                    chromPeaks(res)[, "mz"] <= 400))
+    expect_true(all(chromPeaks(res[1, 1])[, "sample"] == 1L))
+    expect_true(all(chromPeaks(res[1, 2])[, "sample"] == 2L))
+    expect_true(all(chromPeaks(res[1, 3])[, "sample"] == 3L))
+    rrt <- range(lapply(res, rtime))
+    expect_true(rrt[1] < 2600)
+    expect_true(rrt[2] > 4400)
+
+    res <- chromatogram(xmse_h5, rt = rtr)
+    expect_s4_class(res, "XChromatograms")
+    expect_true(nrow(res) == 1L)
+    expect_true(nrow(chromPeaks(res)) > 0)
+    expect_true(any(chromPeaks(res)[, "mz"] < 340 |
+                    chromPeaks(res)[, "mz"] > 400))
+    expect_true(all(chromPeaks(res[1, 1])[, "sample"] == 1L))
+    expect_true(all(chromPeaks(res[1, 2])[, "sample"] == 2L))
+    expect_true(all(chromPeaks(res[1, 3])[, "sample"] == 3L))
+    rrt <- range(lapply(res, rtime))
+    expect_true(rrt[1] >= 2600)
+    expect_true(rrt[2] <= 2700)
+})
+
 ## test_that(".h5_feature_chrom_peaks_sample works", {
 ##     cn <- .h5_chrom_peaks_colnames(xmseg_full_h5, 1L)
 ##     res <- .h5_feature_chrom_peaks_sample("S3", xmseg_full_h5@hdf5_file,
diff --git a/tests/testthat/test_functions-utils.R b/tests/testthat/test_functions-utils.R
index d0c18dc64..a58454ff9 100644
--- a/tests/testthat/test_functions-utils.R
+++ b/tests/testthat/test_functions-utils.R
@@ -504,4 +504,4 @@ test_that(".match_last works", {
 
     res <- .match_last(c("c", "a", "d"), a)
     expect_equal(res, c(3L, 4L, NA_integer_))
-})
+})
\ No newline at end of file