Skip to content

Commit

Permalink
Small fixes and improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
jorainer committed Nov 8, 2024
1 parent 6834d1c commit 9e1a1a6
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 41 deletions.
69 changes: 44 additions & 25 deletions R/XcmsExperimentHdf5-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,15 @@ NULL
is_filled = rep(FALSE, nr))
if (add) {
## Need to load previous results and append to that.
pks <- .h5_read_data(h5_file, index = sid, name = "chrom_peaks",
pks <- .h5_read_data(h5_file, id = sid, name = "chrom_peaks",
ms_level = msLevel, read_colnames = TRUE,
read_rownames = TRUE)[[1L]]
rnames <- rownames(pks)
max_index <- max(
as.integer(sub(paste0("CP", msLevel, sid), "", rnames)))
res[[i]] <- rbindFill(pks, res[[i]])
pkd <- rbindFill(.h5_read_data(
h5_file, index = sid, name = "chrom_peak_data",
h5_file, id = sid, name = "chrom_peak_data",
ms_level = msLevel, read_rownames = FALSE)[[1L]], pkd)
}
pkdl[[i]] <- pkd
Expand Down Expand Up @@ -217,7 +217,7 @@ NULL
else rt <- rtime(spectra(x))[keep]
## Get the list of chromPeak data for x.
pksl <- .h5_read_data(
x@hdf5_file, index = x@sample_id, name = "chrom_peaks",
x@hdf5_file, id = x@sample_id, name = "chrom_peaks",
ms_level = rep(msLevel, length(x@sample_id)),
read_colnames = TRUE, read_rownames = TRUE)
## Get the max index of a chrom peak per sample
Expand All @@ -228,7 +228,7 @@ NULL
rownames(pksl[[i]])))))
## Get the list of chromPeakData for x.
pkdl <- .h5_read_data(
x@hdf5_file, index = x@sample_id, name = "chrom_peak_data",
x@hdf5_file, id = x@sample_id, name = "chrom_peak_data",
ms_level = rep(msLevel, length(x@sample_id)), read_rownames = TRUE)
## Do refinement (in parallel)
res <- bpmapply(
Expand Down Expand Up @@ -302,22 +302,24 @@ NULL
} else idx_columns <- NULL
ids <- rep(x@sample_id, length(msLevel))
msl <- rep(msLevel, each = length(x@sample_id))
res <- .h5_read_data(x@hdf5_file, index = ids, name = "chrom_peaks",
res <- .h5_read_data(x@hdf5_file, id = ids, name = "chrom_peaks",
ms_level = msl, read_colnames = TRUE,
read_rownames = TRUE, j = idx_columns)
if (length(mz) | length(rt))
res <- lapply(res, function(z, rt, mz, ppm, type) {
z[.is_chrom_peaks_within_mz_rt(
z, rt = rt, mz = mz, ppm = ppm, type = type), , drop = FALSE]
}, rt = rt, mz = mz, ppm = ppm, type = type)
read_rownames = TRUE, j = idx_columns,
rt = rt, mz = mz, ppm = ppm, type = type)
## ## Might be better (memory wise) to pass this to the import function
## ## instead
## if (length(mz) | length(rt))
## res <- lapply(res, function(z, rt, mz, ppm, type) {
## z[.is_chrom_peaks_within_mz_rt(
## z, rt = rt, mz = mz, ppm = ppm, type = type), , drop = FALSE]
## }, rt = rt, mz = mz, ppm = ppm, type = type)
if (by_sample) {
names(res) <- ids
res
} else {
l <- vapply(res, nrow, 1L)
res <- cbind(do.call(rbind, res),
sample = rep(match(ids, x@sample_id), l))
cbind(do.call(rbind, res), sample = rep(match(ids, x@sample_id), l))
}
res
}

.h5_chrom_peak_data <- function(x, columns = character(), by_sample = TRUE) {
Expand Down Expand Up @@ -354,7 +356,7 @@ NULL
cnt <- 0L
for (msl in ms_level) {
## read chrom peaks
cp <- .h5_read_data(hdf5_file, index = id, name = "chrom_peaks",
cp <- .h5_read_data(hdf5_file, id = id, name = "chrom_peaks",
ms_level = msl, read_colnames = TRUE,
read_rownames = FALSE)[[1L]]
## adjust chrom peak rt - use .applyRtAdjToChromPeaks for that.
Expand Down Expand Up @@ -487,7 +489,7 @@ NULL
.h5_feature_values_ms_level <- function(ms_level, x, method, value, intensity,
filled = TRUE) {
cn <- .h5_chrom_peaks_colnames(x, ms_level)
col <- switch(method,
col <- switch(method,
sum = value,
medret = c(value, "rt"),
maxint = c(value, intensity))
Expand Down Expand Up @@ -598,6 +600,21 @@ NULL
d
}

.h5_read_chrom_peaks_matrix <- function(name, h5, index = list(NULL, NULL),
read_colnames = FALSE,
read_rownames = FALSE,
rownames = paste0(name, "_rownames"),
rt = numeric(), mz = numeric(),
ppm = 0, type = "any") {
read_colnames <- read_colnames || length(rt) > 0 || length(mz) > 0
d <- .h5_read_matrix2(name, h5, index, read_colnames, read_rownames,
rownames)
if (length(rt) | length(mz))
d[.is_chrom_peaks_within_mz_rt(d, rt = rt, mz = mz,
ppm = ppm, type = type), , drop = FALSE]
else d
}

#' Read a single `data.frame` from the HDF5 file. With
#' `read_rownames = TRUE` also the row names are read and set, which requires
#' an additional reading step. Note that for a `data.frame` each column
Expand Down Expand Up @@ -679,12 +696,12 @@ NULL
#'
#' @param h5_file `character(1)` with the HDF5 file name
#'
#' @param index `integer` with the indices/IDs of the data sets to read.
#' @param id `character` with the ID(s) of the data sets to read.
#'
#' @param name `character(1)` specifying which data should be read.
#'
#' @param ms_level `integer` with the MS level of each sample/data set that
#' should be read. Has to have the same length than `index`.
#' should be read. Has to have the same length than `id`.
#'
#' @param read_colnames `logical(1)` whether column names should be read and
#' set for each `matrix`.
Expand All @@ -701,40 +718,42 @@ NULL
#' select a **single** column to read. For `name = "chrom_peaks"`: `integer`
#' with the indices of the column(s) that should be imported.
#'
#' @param ... additional parameters passed to `FUN`
#'
#' @return `list()` with the read datasets. Will be a `list` of `numeric`
#' matrices for `name = "chrom_peaks"` or a `list` with `data.frame`s for
#' `name = "chrom_peak_data"`.
#'
#' @noRd
.h5_read_data <- function(h5_file = character(),
index = integer(),
id = character(),
name = c("chrom_peaks", "chrom_peak_data",
"feature_definitions",
"feature_to_chrom_peaks"),
ms_level = integer(),
read_colnames = FALSE,
read_rownames = FALSE,
i = NULL, j = NULL) {
if (!length(index)) return(list())
stopifnot(length(ms_level) == length(index))
i = NULL, j = NULL, ...) {
if (!length(id)) return(list())
stopifnot(length(ms_level) == length(id))
name <- match.arg(name)
FUN <- switch(name,
chrom_peak_data = .h5_read_chrom_peak_data,
feature_definitions = .h5_read_data_frame,
chrom_peaks = .h5_read_chrom_peaks_matrix,
.h5_read_matrix2)
h5 <- rhdf5::H5Fopen(h5_file)
on.exit(invisible(rhdf5::H5Fclose(h5)))
d <- paste0("/", index, "/ms_", ms_level, "/", name)
d <- paste0("/", id, "/ms_", ms_level, "/", name)
index <- list(i, j)
if (is.character(j) && length(j) == 1L) {
d <- paste0(d, "/", j)
index <- list(i, NULL)
}
lapply(d, FUN = FUN, read_colnames = read_colnames,
read_rownames = read_rownames, index = index, h5 = h5)
read_rownames = read_rownames, index = index, h5 = h5, ...)
}


## -------- VALIDITY --------

#' Compares the "mod_count" attribute from an h5 file with the expected
Expand Down
50 changes: 35 additions & 15 deletions tests/testthat/test_XcmsExperimentHdf5-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ test_that(".h5_chrom_peaks_chunk works", {
c("chrom_peak_data", "chrom_peaks",
"chrom_peaks_colnames", "chrom_peaks_rownames"))
H5Fclose(h5)
a <- .h5_read_data(h5_file, index = "S2", name = "chrom_peaks",
a <- .h5_read_data(h5_file, id = "S2", name = "chrom_peaks",
ms_level = 1L, read_colnames = TRUE,
read_rownames = TRUE)[[1L]]
## add = TRUE
res <- .h5_find_chrom_peaks_chunk(
sps, msLevel = 1L, param = p, h5_file = h5_file, add = TRUE,
sample_id = xmse_h5@sample_id)
expect_equal(res, 4L)
b <- .h5_read_data(h5_file, index = "S2", name = "chrom_peaks",
b <- .h5_read_data(h5_file, id = "S2", name = "chrom_peaks",
ms_level = 1L, read_colnames = TRUE,
read_rownames = TRUE)[[1L]]
expect_equal(nrow(b), 2 * nrow(a))
Expand Down Expand Up @@ -205,14 +205,14 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
h5f <- tempfile()
ref <- loadXcmsData("faahko_sub2")
x <- .xcms_experiment_to_hdf5(ref, h5f)
ref <- .h5_read_data(x@hdf5_file, index = x@sample_id,
ref <- .h5_read_data(x@hdf5_file, id = x@sample_id,
ms_level = rep(1L, length(x)),
read_colnames = TRUE, read_rownames = TRUE)
.h5_xmse_merge_neighboring_peaks(x)
mod_count <- as.vector(rhdf5::h5read(h5f, "/header/modcount"))
expect_true(mod_count > x@hdf5_mod_count)
## Check that content was changed.
res <- .h5_read_data(x@hdf5_file, index = x@sample_id,
res <- .h5_read_data(x@hdf5_file, id = x@sample_id,
ms_level = rep(1L, length(x)),
read_colnames = TRUE, read_rownames = TRUE)
expect_true(nrow(ref[[1L]]) > nrow(res[[1L]]))
Expand All @@ -235,7 +235,7 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
res <- .xcms_experiment_to_hdf5(ref, h5f)

.h5_xmse_merge_neighboring_peaks(res)
res <- .h5_read_data(res@hdf5_file, index = res@sample_id,
res <- .h5_read_data(res@hdf5_file, id = res@sample_id,
ms_level = rep(1L, length(res)),
read_colnames = TRUE, read_rownames = TRUE)
ref <- .xmse_merge_neighboring_peaks(ref)
Expand All @@ -250,7 +250,7 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {

test_that(".h5_read_matrix works", {
h5f <- tempfile()
xcms:::.h5_initialize_file(h5f)
.h5_initialize_file(h5f)

a <- cbind(a = c(1.2, 1.4), b = c(3.5, 3.6), c = c(5.3, 5.1))
rownames(a) <- c("CP1", "CP2")
Expand Down Expand Up @@ -319,6 +319,26 @@ test_that(".h5_read_matrix works", {
file.remove(h5f)
})

test_that(".h5_read_chrom_peaks_matrix works", {
res <- .h5_read_chrom_peaks_matrix(
"/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
read_colnames = FALSE, read_rownames = FALSE)
expect_true(is.matrix(res))
expect_true(is.numeric(res))
res <- .h5_read_chrom_peaks_matrix(
"/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
read_colnames = TRUE, read_rownames = FALSE,
mz = c(300, 350), rt = c(3000, 3500), type = "within")
expect_true(all(res[, "mz"] > 300 & res[, "mz"] < 350))
expect_true(all(res[, "rt"] > 3000 & res[, "rt"] < 3500))

res <- .h5_read_chrom_peaks_matrix(
"/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
read_colnames = TRUE, read_rownames = FALSE,
mz = c(300, 350), type = "within")
expect_true(all(res[, "mz"] > 300 & res[, "mz"] < 350))
})

test_that(".h5_read_data_frame works", {
h5f <- tempfile()
.h5_initialize_file(h5f)
Expand Down Expand Up @@ -401,32 +421,32 @@ test_that(".h5_read_data works", {
## chrom peaks
res <- .h5_read_data(h5f)
expect_equal(res, list())
res <- .h5_read_data(h5f, index = 2, name = "chrom_peaks", ms_level = 2L)
res <- .h5_read_data(h5f, id = 2, name = "chrom_peaks", ms_level = 2L)
expect_equal(length(res), 1L)
expect_equal(res[[1L]], unname(b2))
res <- .h5_read_data(h5f, index = 1, name = "chrom_peaks", ms_level = 2L,
res <- .h5_read_data(h5f, id = 1, name = "chrom_peaks", ms_level = 2L,
read_colnames = TRUE)
expect_equal(unname(res[[1L]]), unname(a2))
expect_equal(colnames(res[[1L]]), colnames(a2))
expect_true(is.null(rownames(res[[1L]])))
res <- .h5_read_data(h5f, index = 1, name = "chrom_peaks", ms_level = 2L,
res <- .h5_read_data(h5f, id = 1, name = "chrom_peaks", ms_level = 2L,
read_rownames = TRUE)
expect_equal(unname(res[[1L]]), unname(a2))
expect_equal(rownames(res[[1L]]), rownames(a2))
expect_true(is.null(colnames(res[[1L]])))
## single column
res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peaks",
res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peaks",
ms_level = c(2L, 2L), j = 2)
expect_equal(length(res), 2L)
expect_true(ncol(res[[1L]]) == 1L)
expect_equal(res[[1L]][, 1], unname(b2[, 2]))
res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peaks",
res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peaks",
ms_level = c(2L, 2L), j = 2, read_colnames = TRUE,
read_rownames = TRUE)
expect_equal(length(res), 2L)
expect_true(ncol(res[[1L]]) == 1L)
expect_equal(res[[1L]][, 1, drop = FALSE], b2[, 2, drop = FALSE])
res <- .h5_read_data(h5f, index = c(1, 2, 1), name = "chrom_peaks",
res <- .h5_read_data(h5f, id = c(1, 2, 1), name = "chrom_peaks",
ms_level = c(2, 2, 2), j = 1L,
read_colnames = TRUE,
read_rownames = TRUE)
Expand All @@ -435,7 +455,7 @@ test_that(".h5_read_data works", {
expect_equal(res[[2]], b2[, 1, drop = FALSE])

## selected rows.
res <- .h5_read_data(h5f, index = c(1, 2, 1), name = "chrom_peaks",
res <- .h5_read_data(h5f, id = c(1, 2, 1), name = "chrom_peaks",
ms_level = c(2, 2, 2), j = 1L, i = c(2, 1, 2),
read_colnames = TRUE,
read_rownames = TRUE)
Expand All @@ -445,13 +465,13 @@ test_that(".h5_read_data works", {
expect_equal(res[[1]], a2[c(2, 1, 2), 1, drop = FALSE])

## chrom peak data
res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peak_data",
res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peak_data",
ms_level = c(2L, 2L), read_colnames = TRUE,
read_rownames = TRUE)
expect_equal(length(res), 2)
rownames(b) <- c("CP3", "CP4", "CP5")
expect_equal(unname(res[[1L]]), unname(b))
res <- .h5_read_data(h5f, index = 1, name = "chrom_peak_data",
res <- .h5_read_data(h5f, id = 1, name = "chrom_peak_data",
ms_level = 2L, j = "is_filled")
expect_equal(length(res), 1L)
expect_equal(res[[1L]][, 1], a$is_filled)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_XcmsExperimentHdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ test_that("refineChromPeaks,XcmsExperimentHdf5,MergeNeighboringPeaksParam", {
## Compare results from both. Need chromPeaks() function first.
ref <- refineChromPeaks(ref, MergeNeighboringPeaksParam())
ref_pks <- chromPeaks(ref)
res_pks <- .h5_read_data(res@hdf5_file, index = res@sample_id,
res_pks <- .h5_read_data(res@hdf5_file, id = res@sample_id,
ms_level = rep(1L, length(res)),
read_colnames = TRUE, read_rownames = TRUE)
res_pks <- do.call(
Expand Down

0 comments on commit 9e1a1a6

Please sign in to comment.